[Python-checkins] python/dist/src/Lib random.py,1.37,1.38
rhettinger@users.sourceforge.net
rhettinger@users.sourceforge.net
Wed, 13 Nov 2002 07:26:40 -0800
Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv11660
Modified Files:
random.py
Log Message:
Improved clarity and thoroughness of docstring.
Added design notes in comments.
Used better variable names.
Eliminated the unsavory "pool[-k:]" which was an aspiring bug (for k==0).
Used if/else to show the two algorithms in parallel style.
Added one more test assertion.
Index: random.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/random.py,v
retrieving revision 1.37
retrieving revision 1.38
diff -C2 -d -r1.37 -r1.38
*** random.py 13 Nov 2002 13:25:46 -0000 1.37
--- random.py 13 Nov 2002 15:26:37 -0000 1.38
***************
*** 378,390 ****
"""Chooses k unique random elements from a population sequence.
! Returns a new list containing elements from the population. The
! list itself is in random order so that all sub-slices are also
! random samples. The original sequence is left undisturbed.
! If the population has repeated elements, then each occurrence is
! a possible selection in the sample.
! If indices are needed for a large population, use xrange as an
! argument: sample(xrange(10000000), 60)
Optional arg random is a 0-argument function returning a random
--- 378,394 ----
"""Chooses k unique random elements from a population sequence.
! Returns a new list containing elements from the population while
! leaving the original population unchanged. The resulting list is
! in selection order so that all sub-slices will also be valid random
! samples. This allows raffle winners (the sample) to be partitioned
! into grand prize and second place winners (the subslices).
! Members of the population need not be hashable or unique. If the
! population contains repeats, then each occurrence is a possible
! selection in the sample.
! To choose a sample in a range of integers, use xrange as an argument.
! This is especially fast and space efficient for sampling from a
! large population: sample(xrange(10000000), 60)
Optional arg random is a 0-argument function returning a random
***************
*** 392,395 ****
--- 396,414 ----
"""
+ # Sampling without replacement entails tracking either potential
+ # selections (the pool) or previous selections.
+
+ # Pools are stored in lists which provide __getitem__ for selection
+ # and provide a way to remove selections. But each list.remove()
+ # rebuilds the entire list, so it is better to rearrange the list,
+ # placing non-selected elements at the head of the list. Tracking
+ # the selection pool is only space efficient with small populations.
+
+ # Previous selections are stored in dictionaries which provide
+ # __contains__ for detecting repeat selections. Discarding repeats
+ # is efficient unless most of the population has already been chosen.
+ # So, tracking selections is useful when sample sizes are much
+ # smaller than the total population.
+
n = len(population)
if not 0 <= k <= n:
***************
*** 397,414 ****
if random is None:
random = self.random
if n < 6 * k: # if n len list takes less space than a k len dict
! pool = list(population)
! for i in xrange(n-1, n-k-1, -1):
! j = int(random() * (i+1))
! pool[i], pool[j] = pool[j], pool[i]
! return pool[-k:]
! inorder = [None] * k
! selections = {}
! for i in xrange(k):
! j = int(random() * n)
! while j in selections:
j = int(random() * n)
! selections[j] = inorder[i] = population[j]
! return inorder # return selections in the order they were picked
## -------------------- real-valued distributions -------------------
--- 416,434 ----
if random is None:
random = self.random
+ result = [None] * k
if n < 6 * k: # if n len list takes less space than a k len dict
! pool = list(population) # track potential selections
! for i in xrange(k):
! j = int(random() * (n-i)) # non-selected at [0,n-i)
! result[i] = pool[j] # save selected element
! pool[j] = pool[n-i-1] # non-selected to head of list
! else:
! selected = {} # track previous selections
! for i in xrange(k):
j = int(random() * n)
! while j in selected: # discard and replace repeats
! j = int(random() * n)
! result[i] = selected[j] = population[j]
! return result # return selections in the order they were picked
## -------------------- real-valued distributions -------------------
***************
*** 757,760 ****
--- 777,781 ----
s = sample(population, k)
assert len(dict([(elem,True) for elem in s])) == len(s) == k
+ assert None not in s
def _sample_generator(n, k):
***************
*** 788,792 ****
_test_generator(N, '_sample_generator(50, 5)') # expected s.d.: 14.4
_test_generator(N, '_sample_generator(50, 45)') # expected s.d.: 14.4
! _test_sample(1000)
# Test jumpahead.
--- 809,813 ----
_test_generator(N, '_sample_generator(50, 5)') # expected s.d.: 14.4
_test_generator(N, '_sample_generator(50, 45)') # expected s.d.: 14.4
! _test_sample(500)
# Test jumpahead.