python-more-itertools/0003-Add-map_reduce-function-196.patch
zhang-liang-pengkun 8352412e6d Add map_reduce function
Signed-off-by: zhang-liang-pengkun <zhangliangpengkun@xfusion.com>
2023-12-22 17:25:51 +08:00

167 lines
5.6 KiB
Diff

From edfb4704efe9bdb2dc6022fb4b665c62a1525a2c Mon Sep 17 00:00:00 2001
From: Bo Bayles <bbayles@gmail.com>
Date: Tue, 20 Feb 2018 06:04:55 -0600
Subject: [PATCH] Add map_reduce function (#196)
* Add map_reduce function
* Note return type
* keyfunc won't be None
* Correct typo in docstring
---
docs/api.rst | 1 +
more_itertools/more.py | 68 +++++++++++++++++++++++++++++++
more_itertools/tests/test_more.py | 35 +++++++++++++++-
3 files changed, 103 insertions(+), 1 deletion(-)
diff --git a/docs/api.rst b/docs/api.rst
index b1c0632..914e5bc 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -129,6 +129,7 @@ These tools return summarized or aggregated data from an iterable.
.. autofunction:: consecutive_groups(iterable, ordering=lambda x: x)
.. autofunction:: exactly_n(iterable, n, predicate=bool)
.. autoclass:: run_length
+.. autofunction:: map_reduce
----
diff --git a/more_itertools/more.py b/more_itertools/more.py
index 21a1497..c4b08ae 100644
--- a/more_itertools/more.py
+++ b/more_itertools/more.py
@@ -51,6 +51,7 @@ __all__ = [
'locate',
'lstrip',
'make_decorator',
+ 'map_reduce',
'numeric_range',
'one',
'padded',
@@ -1983,3 +1984,70 @@ def make_decorator(wrapping_func, result_index=0):
return outer_wrapper
return decorator
+
+
+def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None):
+ """Return a dictionary that maps the items in *iterable* to categories
+ defined by *keyfunc*, transforms them with *valuefunc*, and
+ then summarizes them by category with *reducefunc*.
+
+ *valuefunc* defaults to the identity function if it is unspecified.
+ If *reducefunc* is unspecified, no summarization takes place:
+
+ >>> keyfunc = lambda x: x.upper()
+ >>> result = map_reduce('abbccc', keyfunc)
+ >>> sorted(result.items())
+ [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])]
+
+ Specifying *valuefunc* transforms the categorized items:
+
+ >>> keyfunc = lambda x: x.upper()
+ >>> valuefunc = lambda x: 1
+ >>> result = map_reduce('abbccc', keyfunc, valuefunc)
+ >>> sorted(result.items())
+ [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])]
+
+ Specifying *reducefunc* summarizes the categorized items:
+
+ >>> keyfunc = lambda x: x.upper()
+ >>> valuefunc = lambda x: 1
+ >>> reducefunc = sum
+ >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc)
+ >>> sorted(result.items())
+ [('A', 1), ('B', 2), ('C', 3)]
+
+ You may want to filter the input iterable before applying the map/reduce
+ proecdure:
+
+ >>> all_items = range(30)
+ >>> items = [x for x in all_items if 10 <= x <= 20] # Filter
+ >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1
+ >>> categories = map_reduce(items, keyfunc=keyfunc)
+ >>> sorted(categories.items())
+ [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])]
+ >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum)
+ >>> sorted(summaries.items())
+ [(0, 90), (1, 75)]
+
+ Note that all items in the iterable are gathered into a list before the
+ summarization step, which may require significant storage.
+
+ The returned object is a :obj:`collections.defaultdict` with the
+ ``default_factory`` set to ``None``, such that it behaves like a normal
+ dictionary.
+
+ """
+ valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc
+
+ ret = defaultdict(list)
+ for item in iterable:
+ key = keyfunc(item)
+ value = valuefunc(item)
+ ret[key].append(value)
+
+ if reducefunc is not None:
+ for key, value_list in ret.items():
+ ret[key] = reducefunc(value_list)
+
+ ret.default_factory = None
+ return ret
diff --git a/more_itertools/tests/test_more.py b/more_itertools/tests/test_more.py
index d8ada9c..0ac4abd 100644
--- a/more_itertools/tests/test_more.py
+++ b/more_itertools/tests/test_more.py
@@ -15,7 +15,7 @@ from itertools import (
product,
repeat,
)
-from operator import add, itemgetter
+from operator import add, mul, itemgetter
from unittest import TestCase
from six.moves import filter, map, range, zip
@@ -1792,3 +1792,36 @@ class MakeDecoratorTests(TestCase):
it.seek(0)
self.assertEqual(list(it), ['0', '1', '2', '3', '4'])
+
+
+class MapReduceTests(TestCase):
+ def test_default(self):
+ iterable = (str(x) for x in range(5))
+ keyfunc = lambda x: int(x) // 2
+ actual = sorted(mi.map_reduce(iterable, keyfunc).items())
+ expected = [(0, ['0', '1']), (1, ['2', '3']), (2, ['4'])]
+ self.assertEqual(actual, expected)
+
+ def test_valuefunc(self):
+ iterable = (str(x) for x in range(5))
+ keyfunc = lambda x: int(x) // 2
+ valuefunc = int
+ actual = sorted(mi.map_reduce(iterable, keyfunc, valuefunc).items())
+ expected = [(0, [0, 1]), (1, [2, 3]), (2, [4])]
+ self.assertEqual(actual, expected)
+
+ def test_reducefunc(self):
+ iterable = (str(x) for x in range(5))
+ keyfunc = lambda x: int(x) // 2
+ valuefunc = int
+ reducefunc = lambda value_list: reduce(mul, value_list, 1)
+ actual = sorted(
+ mi.map_reduce(iterable, keyfunc, valuefunc, reducefunc).items()
+ )
+ expected = [(0, 0), (1, 6), (2, 4)]
+ self.assertEqual(actual, expected)
+
+ def test_ret(self):
+ d = mi.map_reduce([1, 0, 2, 0, 1, 0], bool)
+ self.assertEqual(d, {False: [0, 0, 0], True: [1, 2, 1]})
+ self.assertRaises(KeyError, lambda: d[None].append(1))
--
2.39.0.windows.2