[Tutor] MapReduce
Kent Johnson
kent37 at tds.net
Tue Feb 6 12:12:49 CET 2007
Steve Nelson wrote:
> On 2/5/07, Kent Johnson <kent37 at tds.net> wrote:
>> You can also do this operation easily with dicts (not tested!):
>
> Thank you - code now complete and tests passing. Would appreciate
> comments / criticisms. I did wonder if I should create a UrlAnalyser
> Class rather than have hanging methods:
NOOOOOOOO! Python is not Java! There is nothing wrong with a "hanging
method". In Python we call them functions. :-)
The code looks fine to me.
Kent
>
> #!/usr/bin/python
> import unittest
>
> def myMap(data, search):
> """Take list of tuples of record number and url accessed and return
> list of tuples keyed by url with record number as value if search is
> in the url"""
> return [(value, key) for key, value in data if search in value]
>
> def myGroup(data):
> """Take list of tuples keyed by url with record number as value, and
> group together same urls with list of record numbers as value."""
> groups = {}
> for value, index in data:
> groups.setdefault(value, []).append(index)
> return sorted(groups.items())
>
> def myReduce(data):
> """Process list of tuples of url and record number list and return
> list of url and frequency of occurence."""
> return [(value, len(occurences)) for value, occurences in data]
>
> class UnitTests(unittest.TestCase):
> """Do not taunt unit tests."""
>
> def setUp(self):
> pass
>
> def tearDown(self):
> pass
>
> def testMapper(self):
> """Produce set of intermediate key value pairs, with record
> content as key and record number as value, if a condition is met."""
> test_pairs = [(1, 'a'), (2, 'b'), (3, 'c'), (4, 'a'), (5, 'd')]
> intermediate_list = [('a', 1), ('a', 4)]
> self.assertEqual(myMap(test_pairs, "a"), intermediate_list)
>
> def testGrouper(self):
> """Group occurences of a record together:
>
> [('fred', 1), ('jim', 2), ('bill', 3), ('jim', 4)] -> [(fred, 1),
> ('jim', [2, 4]), ('bill' ,3)]"""
>
> test_list = [('fred', 1), ('jim', 2), ('bill', 3), ('jim', 4)]
> grouped_list = [('bill', [3]), ('fred', [1]), ('jim', [2, 4])]
> self.assertEqual(myGroup(test_list), grouped_list)
>
> def testReduce(self):
> """Aggregate results of map and group functions to produce value
> and frequency."""
> test_intermediate = [('bill', [3]), ('fred', [1]), ('jim', [2, 4])]
> test_summary = [('bill', 1), ('fred', 1), ('jim', 2)]
> self.assertEqual(myReduce(test_intermediate), test_summary)
>
> def doTests():
> """Run our test suite"""
> suite = unittest.makeSuite(UnitTests,'test')
> runner = unittest.TextTestRunner()
> result = runner.run(suite)
> return result
>
> def main():
> """Main program here"""
> print "Analysing URL data:\n"
> url_data = [(1, 'http://www.beer.com'), (2,
> 'http://www.ban-beer.com'), (3, 'http://www.bbc.co.uk'), (4,
> 'http://www.beer.com'), (5, 'http://wwww.kernel.org')]
> print myReduce(myGroup(myMap(url_data, "beer")))
>
> if __name__ == "__main__":
> result = doTests()
> if result.wasSuccessful():
> main()
> else:
> print "Error - check test output."
>
>
More information about the Tutor
mailing list