[Python-checkins] r68789 - in python/trunk: Doc/library/collections.rst Lib/collections.py Lib/test/test_collections.py

raymond.hettinger python-checkins at python.org
Tue Jan 20 02:19:26 CET 2009


Author: raymond.hettinger
Date: Tue Jan 20 02:19:26 2009
New Revision: 68789
Log:
Build-outs for Counter() class:
* Constructor and update() support keyword args (like their dict counterparts).
* The 'del' statement no longer raises KeyError for missing values.
* Add multiset operations: __add__, __sub__, __and__, __or__.
Modified:
 python/trunk/Doc/library/collections.rst
 python/trunk/Lib/collections.py
 python/trunk/Lib/test/test_collections.py
Modified: python/trunk/Doc/library/collections.rst
==============================================================================
--- python/trunk/Doc/library/collections.rst	(original)
+++ python/trunk/Doc/library/collections.rst	Tue Jan 20 02:19:26 2009
@@ -189,6 +189,7 @@
 >>> c = Counter() # a new, empty counter
 >>> c = Counter('gallahad') # a new counter from an iterable
 >>> c = Counter({'red': 4, 'blue': 2}) # a new counter from a mapping
+ >>> c = Counter(spam=8, eggs=1) # a new counter from keyword args
 
 The returned object has a dictionary style interface except that it returns
 a zero count for missing items (instead of raising a :exc:`KeyError` like a
@@ -219,7 +220,7 @@
 Elements are returned in arbitrary order. If an element's count has been
 set to zero or a negative number, :meth:`elements` will ignore it.
 
- >>> c = Counter({'a': 4, 'b': 2, 'd': 0, 'e': -2})
+ >>> c = Counter(a=4, b=2, c=0, d=-2)
 >>> list(c.elements())
 ['a', 'a', 'a', 'a', 'b', 'b']
 
@@ -244,10 +245,10 @@
 
 .. method:: update([iterable-or-mapping])
 
- Like :meth:`dict.update` but adds-in counts instead of replacing them.
-
 Elements are counted from an *iterable* or added-in from another
- *mapping* (or counter)::
+ *mapping* (or counter). Like :meth:`dict.update` but adds-in counts
+ instead of replacing them, and the *iterable* is expected to be a
+ sequence of elements, not a sequence of ``(key, value)`` pairs::
 
 >>> c = Counter('which')
 >>> c.update('witch') # add elements from another iterable
@@ -267,6 +268,34 @@
 Counter(dict(list_of_pairs)) # convert from a list of (elem, cnt) pairs
 c.most_common()[:-n:-1] # n least common elements
 
+Several multiset mathematical operations are provided for combining
+:class:`Counter` objects. Multisets are like regular sets but allowed to
+contain repeated elements (with counts of one or more). Addition and
+subtraction combine counters by adding or subtracting the counts of
+corresponding elements. Intersection and union return the minimum and maximum
+of corresponding counts::
+
+ >>> c = Counter('a': 3, 'b': 1})
+ >>> d = Counter({'a': 1, 'b': 2})
+ >>> c + d # add two counters together: c[x] + d[x]
+ Counter({'a': 4, 'b': 3})
+ >>> c - d # subtract (keeping only positive counts)
+ Counter({'a': 2})
+ >>> c & d # interection: min(c[x], d[x])
+ Counter({'a': 1, 'b': 1})
+ >>> c | d # union: max(c[x], d[x])
+ Counter({'a': 3, 'b': 2})
+
+All four multiset operations produce only positive counts (negative and zero
+results are skipped). If inputs include negative counts, addition will sum
+both counts and then exclude non-positive results. The other three operations
+are undefined for negative inputs::
+
+ >>> e = Counter(a=8, b=-2, c=0)
+ >>> e += Counter() # remove zero and negative counts
+ >>> e
+ Counter({'a': 8})
+
 **References**:
 
 * Wikipedia entry for `Multisets <http://en.wikipedia.org/wiki/Multiset>`_
Modified: python/trunk/Lib/collections.py
==============================================================================
--- python/trunk/Lib/collections.py	(original)
+++ python/trunk/Lib/collections.py	Tue Jan 20 02:19:26 2009
@@ -10,7 +10,7 @@
 from keyword import iskeyword as _iskeyword
 import sys as _sys
 import heapq as _heapq
-from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
+from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, ifilter as _ifilter
 
 ########################################################################
 ### namedtuple #######################################################
@@ -167,7 +167,7 @@
 # http://code.activestate.com/recipes/259174/
 # Knuth, TAOCP Vol. II section 4.6.3
 
- def __init__(self, iterable=None):
+ def __init__(self, iterable=None, **kwds):
 '''Create a new, empty Counter object. And if given, count elements
 from an input iterable. Or, initialize the count from another mapping
 of elements to their counts.
@@ -175,9 +175,10 @@
 >>> c = Counter() # a new, empty counter
 >>> c = Counter('gallahad') # a new counter from an iterable
 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
+ >>> c = Counter(a=4, b=2) # a new counter from keyword args
 
 '''
- self.update(iterable)
+ self.update(iterable, **kwds)
 
 def __missing__(self, key):
 'The count of elements not in the Counter is zero.'
@@ -228,7 +229,7 @@
 raise NotImplementedError(
 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
 
- def update(self, iterable=None):
+ def update(self, iterable=None, **kwds):
 '''Like dict.update() but add counts instead of replacing them.
 
 Source can be an iterable, a dictionary, or another Counter instance.
@@ -245,10 +246,8 @@
 # replace behavior results in the some of original untouched counts
 # being mixed-in with all of the other counts for a mismash that
 # doesn't have a straight-forward interpretation in most counting
- # contexts. Instead, we look to Knuth for suggested operations on
- # multisets and implement the union-add operation discussed in
- # TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for
- # multisets calls that operation a sum or join.
+ # contexts. Instead, we implement straight-addition. Both the inputs
+ # and outputs are allowed to contain zero and negative counts.
 
 if iterable is not None:
 if isinstance(iterable, Mapping):
@@ -257,17 +256,101 @@
 else:
 for elem in iterable:
 self[elem] += 1
+ if kwds:
+ self.update(kwds)
 
 def copy(self):
 'Like dict.copy() but returns a Counter instance instead of a dict.'
 return Counter(self)
 
+ def __delitem__(self, elem):
+ 'Like dict.__delitem__() but does not raise KeyError for missing values.'
+ if elem in self:
+ dict.__delitem__(self, elem)
+
 def __repr__(self):
 if not self:
 return '%s()' % self.__class__.__name__
 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
 return '%s({%s})' % (self.__class__.__name__, items)
 
+ # Multiset-style mathematical operations discussed in:
+ # Knuth TAOCP Volume II section 4.6.3 exercise 19
+ # and at http://en.wikipedia.org/wiki/Multiset
+ #
+ # Results are undefined when inputs contain negative counts.
+ # Outputs guaranteed to only include positive counts.
+ #
+ # To strip negative and zero counts, add-in an empty counter:
+ # c += Counter()
+
+ def __add__(self, other):
+ '''Add counts from two counters.
+
+ >>> Counter('abbb') + Counter('bcc')
+ Counter({'b': 4, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem in set(self) | set(other):
+ newcount = self[elem] + other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ def __sub__(self, other):
+ ''' Subtract count, but keep only results with positive counts.
+
+ >>> Counter('abbbc') - Counter('bccd')
+ Counter({'b': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.iteritems():
+ newcount = count - other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ def __or__(self, other):
+ '''Union is the maximum of value in either of the input counters.
+
+ >>> Counter('abbb') | Counter('bcc')
+ Counter({'b': 3, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ _max = max
+ result = Counter()
+ for elem in set(self) | set(other):
+ newcount = _max(self[elem], other[elem])
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ def __and__(self, other):
+ ''' Intersection is the minimum of corresponding counts.
+
+ >>> Counter('abbb') & Counter('bcc')
+ Counter({'b': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ _min = min
+ result = Counter()
+ if len(self) < len(other):
+ self, other = other, self
+ for elem in _ifilter(self.__contains__, other):
+ newcount = _min(self[elem], other[elem])
+ if newcount > 0:
+ result[elem] = newcount
+ return result
 
 
 if __name__ == '__main__':
Modified: python/trunk/Lib/test/test_collections.py
==============================================================================
--- python/trunk/Lib/test/test_collections.py	(original)
+++ python/trunk/Lib/test/test_collections.py	Tue Jan 20 02:19:26 2009
@@ -2,6 +2,8 @@
 from test import test_support
 from collections import namedtuple, Counter, Mapping
 import pickle, cPickle, copy
+from random import randrange
+import operator
 from collections import Hashable, Iterable, Iterator
 from collections import Sized, Container, Callable
 from collections import Set, MutableSet
@@ -350,6 +352,8 @@
 
 def test_basics(self):
 c = Counter('abcaba')
+ self.assertEqual(c, Counter({'a':3 , 'b': 2, 'c': 1}))
+ self.assertEqual(c, Counter(a=3, b=2, c=1))
 self.assert_(isinstance(c, dict))
 self.assert_(isinstance(c, Mapping))
 self.assert_(issubclass(Counter, dict))
@@ -379,6 +383,7 @@
 c['a'] += 1 # increment an existing value
 c['b'] -= 2 # sub existing value to zero
 del c['c'] # remove an entry
+ del c['c'] # make sure that del doesn't raise KeyError
 c['d'] -= 2 # sub from a missing value
 c['e'] = -5 # directly assign a missing value
 c['f'] += 4 # add to a missing value
@@ -394,7 +399,8 @@
 self.assertEqual(repr(c), 'Counter()')
 self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc')
 self.assertRaises(TypeError, hash, c)
- c.update(dict(a=5, b=3, c=1))
+ c.update(dict(a=5, b=3))
+ c.update(c=1)
 c.update(Counter('a' * 50 + 'b' * 30))
 c.update() # test case with no args
 c.__init__('a' * 500 + 'b' * 300)
@@ -442,6 +448,43 @@
 self.assertEqual(dict(Counter(s)), dict(Counter(s).items()))
 self.assertEqual(set(Counter(s)), set(s))
 
+ def test_multiset_operations(self):
+ # Verify that adding a zero counter will strip zeros and negatives
+ c = Counter(a=10, b=-2, c=0) + Counter()
+ self.assertEqual(dict(c), dict(a=10))
+
+ elements = 'abcd'
+ for i in range(1000):
+ # test random pairs of multisets
+ p = Counter(dict((elem, randrange(-2,4)) for elem in elements))
+ q = Counter(dict((elem, randrange(-2,4)) for elem in elements))
+ for counterop, numberop, defneg in [
+ (Counter.__add__, lambda x, y: x+y if x+y>0 else 0, True),
+ (Counter.__sub__, lambda x, y: x-y if x-y>0 else 0, False),
+ (Counter.__or__, max, False),
+ (Counter.__and__, min, False),
+ ]:
+ result = counterop(p, q)
+ for x in elements:
+ # all except __add__ are undefined for negative inputs
+ if defneg or (p[x] >= 0 and q[x] >= 0):
+ self.assertEqual(numberop(p[x], q[x]), result[x])
+ # verify that results exclude non-positive counts
+ self.assert_(x>0 for x in result.values())
+
+ elements = 'abcdef'
+ for i in range(100):
+ # verify that random multisets with no repeats are exactly like sets
+ p = Counter(dict((elem, randrange(0, 2)) for elem in elements))
+ q = Counter(dict((elem, randrange(0, 2)) for elem in elements))
+ for counterop, setop in [
+ (Counter.__sub__, set.__sub__),
+ (Counter.__or__, set.__or__),
+ (Counter.__and__, set.__and__),
+ ]:
+ counter_result = counterop(p, q)
+ set_result = setop(set(p.elements()), set(q.elements()))
+ self.assertEqual(counter_result, dict.fromkeys(set_result, 1))
 
 import doctest, collections
 


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /