From edfb4704efe9bdb2dc6022fb4b665c62a1525a2c Mon Sep 17 00:00:00 2001 From: Bo Bayles Date: Tue, 20 Feb 2018 06:04:55 -0600 Subject: [PATCH] Add map_reduce function (#196) * Add map_reduce function * Note return type * keyfunc won't be None * Correct typo in docstring --- docs/api.rst | 1 + more_itertools/more.py | 68 +++++++++++++++++++++++++++++++ more_itertools/tests/test_more.py | 35 +++++++++++++++- 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index b1c0632..914e5bc 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -129,6 +129,7 @@ These tools return summarized or aggregated data from an iterable. .. autofunction:: consecutive_groups(iterable, ordering=lambda x: x) .. autofunction:: exactly_n(iterable, n, predicate=bool) .. autoclass:: run_length +.. autofunction:: map_reduce ---- diff --git a/more_itertools/more.py b/more_itertools/more.py index 21a1497..c4b08ae 100644 --- a/more_itertools/more.py +++ b/more_itertools/more.py @@ -51,6 +51,7 @@ __all__ = [ 'locate', 'lstrip', 'make_decorator', + 'map_reduce', 'numeric_range', 'one', 'padded', @@ -1983,3 +1984,70 @@ def make_decorator(wrapping_func, result_index=0): return outer_wrapper return decorator + + +def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None): + """Return a dictionary that maps the items in *iterable* to categories + defined by *keyfunc*, transforms them with *valuefunc*, and + then summarizes them by category with *reducefunc*. + + *valuefunc* defaults to the identity function if it is unspecified. + If *reducefunc* is unspecified, no summarization takes place: + + >>> keyfunc = lambda x: x.upper() + >>> result = map_reduce('abbccc', keyfunc) + >>> sorted(result.items()) + [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])] + + Specifying *valuefunc* transforms the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> result = map_reduce('abbccc', keyfunc, valuefunc) + >>> sorted(result.items()) + [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])] + + Specifying *reducefunc* summarizes the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> reducefunc = sum + >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc) + >>> sorted(result.items()) + [('A', 1), ('B', 2), ('C', 3)] + + You may want to filter the input iterable before applying the map/reduce + proecdure: + + >>> all_items = range(30) + >>> items = [x for x in all_items if 10 <= x <= 20] # Filter + >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1 + >>> categories = map_reduce(items, keyfunc=keyfunc) + >>> sorted(categories.items()) + [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])] + >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum) + >>> sorted(summaries.items()) + [(0, 90), (1, 75)] + + Note that all items in the iterable are gathered into a list before the + summarization step, which may require significant storage. + + The returned object is a :obj:`collections.defaultdict` with the + ``default_factory`` set to ``None``, such that it behaves like a normal + dictionary. + + """ + valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc + + ret = defaultdict(list) + for item in iterable: + key = keyfunc(item) + value = valuefunc(item) + ret[key].append(value) + + if reducefunc is not None: + for key, value_list in ret.items(): + ret[key] = reducefunc(value_list) + + ret.default_factory = None + return ret diff --git a/more_itertools/tests/test_more.py b/more_itertools/tests/test_more.py index d8ada9c..0ac4abd 100644 --- a/more_itertools/tests/test_more.py +++ b/more_itertools/tests/test_more.py @@ -15,7 +15,7 @@ from itertools import ( product, repeat, ) -from operator import add, itemgetter +from operator import add, mul, itemgetter from unittest import TestCase from six.moves import filter, map, range, zip @@ -1792,3 +1792,36 @@ class MakeDecoratorTests(TestCase): it.seek(0) self.assertEqual(list(it), ['0', '1', '2', '3', '4']) + + +class MapReduceTests(TestCase): + def test_default(self): + iterable = (str(x) for x in range(5)) + keyfunc = lambda x: int(x) // 2 + actual = sorted(mi.map_reduce(iterable, keyfunc).items()) + expected = [(0, ['0', '1']), (1, ['2', '3']), (2, ['4'])] + self.assertEqual(actual, expected) + + def test_valuefunc(self): + iterable = (str(x) for x in range(5)) + keyfunc = lambda x: int(x) // 2 + valuefunc = int + actual = sorted(mi.map_reduce(iterable, keyfunc, valuefunc).items()) + expected = [(0, [0, 1]), (1, [2, 3]), (2, [4])] + self.assertEqual(actual, expected) + + def test_reducefunc(self): + iterable = (str(x) for x in range(5)) + keyfunc = lambda x: int(x) // 2 + valuefunc = int + reducefunc = lambda value_list: reduce(mul, value_list, 1) + actual = sorted( + mi.map_reduce(iterable, keyfunc, valuefunc, reducefunc).items() + ) + expected = [(0, 0), (1, 6), (2, 4)] + self.assertEqual(actual, expected) + + def test_ret(self): + d = mi.map_reduce([1, 0, 2, 0, 1, 0], bool) + self.assertEqual(d, {False: [0, 0, 0], True: [1, 2, 1]}) + self.assertRaises(KeyError, lambda: d[None].append(1)) -- 2.39.0.windows.2