[Edu-sig] [edupython] Python in Education Advocacy Article

Toby Donaldson tjd at sfu.ca
Tue Mar 27 00:51:13 CEST 2007


Michael,

On the sigcse list there was recently a coding challenge
(http://www.cs.duke.edu/csed/code) that asked for solutions to a word
frequency problem in various languages. I believe the author is
planning to eventually list all the solutions received (entries came
in many different languages); that could make for some interesting
comparisons.

I wrote one solution in Python, and one in Java, which I give below;
needless to say, I found the Python version far easier to write.

Toby

-------------------------------------------------------------------------------------------------------------------------------
import sys

if __name__ == '__main__':
   # get the path from the command-line
   fname = sys.argv[1]

   # read in the file as a list of tokens
   tokens = [tok.strip().lower() for tok in open(fname, 'r').read().split()]

   # calculate the frequency of each token
   freq = {}
   for tok in tokens:
       if tok in freq:
           freq[tok] += 1
       else:
           freq[tok] = 1

   # Sort the list in highest frequency to lowest frequency,
   # with ties sorted by lexicographics order of the words.
   # Uses the Python sort-decorate-unsort idiom. We sort by
   # the negation of the frequency values to get the proper
   # ordering.

   lst = [(-freq[tok], tok) for tok in freq]  # decorate
   lst.sort()                                 # sort
   lst = [(-freq, tok) for freq, tok in lst]  # undecorate

   # print the results
   for freq, tok in lst:
       print '%s\t%s' % (freq, tok)

-------------------------------------------------------------------------------------------------------------------------------

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Scanner;
import java.util.TreeMap;
import java.util.TreeSet;

public class Puzzle {

       public static void main(String[] args) throws IOException {
               // get the file to process
               String fname = args[0];

               Scanner sc = new Scanner(new File(fname));

               // initialize the map for the words and counts;
               // a TreeMap is always ordered by keys
               TreeMap<String, Integer> map = new TreeMap<String, Integer>();

               // process the file a line at a time
               while (sc.hasNextLine()) {
                       // chop each line into its constituent tokens
                       // "\\s+" is a regular expression that matches
one or more
                       // whitespace characters
                       String[] tokens = sc.nextLine().split("\\s+");

                       // make all the strings lower case, and remove
any excess whitespace
                       for (int i = 0; i < tokens.length; ++i) {
                               tokens[i] = tokens[i].toLowerCase().trim();
                       }

                       // add each token to the map
                       for (String tok : tokens) {
                               if (map.containsKey(tok)) {
                                       map.put(tok, map.get(tok) + 1);
                               } else {
                                       map.put(tok, 1);
                               }
                       }
               }

               // remove the empty string if it is present
               map.remove("");

               // sort the data by storing each word that occurs the
same number of
               // times in a TreeMap of sets keyed by count; TreeSet stores its
               // values in sorted order
               TreeMap<Integer, TreeSet<String>> sortMap = new TreeMap<Integer,
TreeSet<String>>();
               for (String tok : map.keySet()) {
                       int count = map.get(tok);
                       if (sortMap.containsKey(count)) {
                               TreeSet<String> arr = sortMap.get(count);
                               arr.add(tok);
                               sortMap.put(count, arr);
                       } else {
                               TreeSet<String> arr = new TreeSet<String>();
                               arr.add(tok);
                               sortMap.put(count, arr);
                       }
               }

               // print the data

               // first reverse the keys to print data in the proper order
               ArrayList<Integer> idx = new ArrayList<Integer>();
               idx.addAll(sortMap.keySet());
               Collections.reverse(idx);

               // print it to stdout
               for (Integer key : idx) {
                       TreeSet<String> toks = sortMap.get(key);
                       for (String t : toks) {
                               System.out.printf("%s\t%s\n", key, t);
                       }
               }
       }
}


More information about the Edu-sig mailing list