[Python-checkins] r84576 - in python/branches/py3k: Doc/library/random.rst Lib/random.py Lib/test/test_random.py Misc/NEWS

Tue Sep 7 06:44:52 CEST 2010

Author: raymond.hettinger
Date: Tue Sep  7 06:44:52 2010
New Revision: 84576

Log:
Issues #7889, #9025 and #9379:  Improvements to the random module.

Modified:
   python/branches/py3k/Doc/library/random.rst
   python/branches/py3k/Lib/random.py
   python/branches/py3k/Lib/test/test_random.py
   python/branches/py3k/Misc/NEWS

Modified: python/branches/py3k/Doc/library/random.rst
==============================================================================

--- python/branches/py3k/Doc/library/random.rst	(original)
+++ python/branches/py3k/Doc/library/random.rst	Tue Sep  7 06:44:52 2010
@@ -98,6 +98,13 @@
    equivalent to ``choice(range(start, stop, step))``, but doesn't actually build a
    range object.
 
+   The positional argument pattern matches that of :func:`range`.  Keyword arguments
+   should not be used because the function may use them in unexpected ways.
+
+   .. versionchanged:: 3.2
+      :meth:`randrange` is more sophisticated about producing equally distributed
+      values.  Formerly it used a style like ``int(random()*n)`` which could produce
+      slightly uneven distributions.
 
 .. function:: randint(a, b)
 

Modified: python/branches/py3k/Lib/random.py
==============================================================================
--- python/branches/py3k/Lib/random.py	(original)
+++ python/branches/py3k/Lib/random.py	Tue Sep  7 06:44:52 2010
@@ -161,7 +161,7 @@
 
 ## -------------------- integer methods  -------------------
 
-    def randrange(self, start, stop=None, step=1, int=int, maxwidth=1<<BPF):
+    def randrange(self, start, stop=None, step=1, int=int):
         """Choose a random item from range(start, stop[, step]).
 
         This fixes the problem with randint() which includes the
@@ -177,9 +177,7 @@
             raise ValueError("non-integer arg 1 for randrange()")
         if stop is None:
             if istart > 0:
-                if istart >= maxwidth:
-                    return self._randbelow(istart)
-                return int(self.random() * istart)
+                return self._randbelow(istart)
             raise ValueError("empty range for randrange()")
 
         # stop argument supplied.
@@ -201,9 +199,7 @@
             # a long, but we're supposed to return an int (for backward
             # compatibility).
 
-            if width >= maxwidth:
-                return int(istart + self._randbelow(width))
-            return int(istart + int(self.random()*width))
+            return int(istart + self._randbelow(width))
         if step == 1:
             raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width))
 
@@ -221,9 +217,7 @@
         if n <= 0:
             raise ValueError("empty range for randrange()")
 
-        if n >= maxwidth:
-            return istart + istep*self._randbelow(n)
-        return istart + istep*int(self.random() * n)
+        return istart + istep*self._randbelow(n)
 
     def randint(self, a, b):
         """Return random integer in range [a, b], including both end points.
@@ -231,7 +225,7 @@
 
         return self.randrange(a, b+1)
 
-    def _randbelow(self, n, _log=_log, int=int, _maxwidth=1<<BPF,
+    def _randbelow(self, n, int=int, _maxwidth=1<<BPF, type=type,
                    _Method=_MethodType, _BuiltinMethod=_BuiltinMethodType):
         """Return a random int in the range [0,n)
 
@@ -248,8 +242,8 @@
             # has not been overridden or if a new getrandbits() was supplied.
             # This assures that the two methods correspond.
             if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method:
-                k = int(1.00001 + _log(n-1, 2.0))   # 2**k > n-1 > 2**(k-2)
-                r = getrandbits(k)
+                k = n.bit_length()  # don't use (n-1) here because n can be 1
+                r = getrandbits(k)  # 0 <= r < 2**k
                 while r >= n:
                     r = getrandbits(k)
                 return r
@@ -262,7 +256,7 @@
 
     def choice(self, seq):
         """Choose a random element from a non-empty sequence."""
-        return seq[int(self.random() * len(seq))]  # raises IndexError if seq is empty
+        return seq[self._randbelow(len(seq))]   # raises IndexError if seq is empty
 
     def shuffle(self, x, random=None, int=int):
         """x, random=random.random -> shuffle list x in place; return None.
@@ -272,11 +266,15 @@
         """
 
         if random is None:
-            random = self.random
-        for i in reversed(range(1, len(x))):
-            # pick an element in x[:i+1] with which to exchange x[i]
-            j = int(random() * (i+1))
-            x[i], x[j] = x[j], x[i]
+            for i in reversed(range(1, len(x))):
+                # pick an element in x[:i+1] with which to exchange x[i]
+                j = self._randbelow(i+1)
+                x[i], x[j] = x[j], x[i]
+        else:
+            for i in reversed(range(1, len(x))):
+                # pick an element in x[:i+1] with which to exchange x[i]
+                j = int(random() * (i+1))
+                x[i], x[j] = x[j], x[i]
 
     def sample(self, population, k):
         """Chooses k unique random elements from a population sequence or set.
@@ -314,7 +312,6 @@
         n = len(population)
         if not 0 <= k <= n:
             raise ValueError("Sample larger than population")
-        _int = int
         result = [None] * k
         setsize = 21        # size of a small set minus size of an empty list
         if k > 5:
@@ -323,16 +320,16 @@
             # An n-length list is smaller than a k-length set
             pool = list(population)
             for i in range(k):         # invariant:  non-selected at [0,n-i)
-                j = _int(random() * (n-i))
+                j = self._randbelow(n-i)
                 result[i] = pool[j]
                 pool[j] = pool[n-i-1]   # move non-selected item into vacancy
         else:
             selected = set()
             selected_add = selected.add
             for i in range(k):
-                j = _int(random() * n)
+                j = self._randbelow(n)
                 while j in selected:
-                    j = _int(random() * n)
+                    j = self._randbelow(n)
                 selected_add(j)
                 result[i] = population[j]
         return result

Modified: python/branches/py3k/Lib/test/test_random.py
==============================================================================
--- python/branches/py3k/Lib/test/test_random.py	(original)
+++ python/branches/py3k/Lib/test/test_random.py	Tue Sep  7 06:44:52 2010
@@ -121,7 +121,15 @@
             f = open(support.findfile(file),"rb")
             r = pickle.load(f)
             f.close()
-            self.assertEqual(r.randrange(1000), value)
+            self.assertEqual(int(r.random()*1000), value)
+
+    def test_bug_9025(self):
+        # Had problem with an uneven distribution in int(n*random())
+        # Verify the fix by checking that distributions fall within expectations.
+        n = 100000
+        randrange = self.gen.randrange
+        k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n))
+        self.assertTrue(0.30 < k/n < .37, (k/n))
 
 class SystemRandom_TestBasicOps(TestBasicOps):
     gen = random.SystemRandom()

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Tue Sep  7 06:44:52 2010
@@ -19,7 +19,16 @@
     across versions and which parts are subject to change.
 
   * Update the seed() method to use all of the bits in a string
-    instead of just the hash value.
+    instead of just the hash value.  This makes better use of the
+    seed value and assures the seeding is platform independent.
+    Issue #7889.
+
+  * Improved the random()-->integer algorithm used in choice(),
+    shuffle(), sample(), randrange(), and randint().  Formerly, it
+    used int(n*random()) which has a slight bias whenever n is not
+    a power of two.  Issue #9025.
+
+  * Improved documentation of arguments to randrange().  Issue #9379.
 
 - collections.OrderedDict now supports a new method for repositioning
   keys to either end.