Strange behavior

Virgil Stokes vs at it.uu.se
Thu Aug 16 10:31:47 EDT 2012


On 16-Aug-2012 15:02, Peter Otten wrote:
> Virgil Stokes wrote:
>
>>>> def testFunc(startingList):
>>>> xOnlyList = []; j = -1
>>>> for xl in startingList:
>>>> if (xl[0] == 'x'):
>>> That's going to fail in the starting list contains an empty string. Use
>>> xl.startswith('x') instead.
>> Yes, but this was by design (tacitly assumed that startingList was both a
>> list and non-empty).
> You missunderstood it will fail if the list contains an empty string, not if
> the list itself is empty:
>
>>>> words = ["alpha", "", "xgamma"]
>>>> [word for word in words if word[0] == "x"]
> Traceback (most recent call last):
>    File "<stdin>", line 1, in <module>
> IndexError: string index out of range
>
> The startswith() version:
>
>>>> [word for word in words if word.startswith("x")]
> ['xgamma']
>
> Also possible:
>
>>>> [word for word in words if word[:1] == "x"]
> ['xgamma']
>
>> def testFunc1(startingList):
>>       '''
>>         Algorithm-1
>>         Note:
>>           One should check for an empty startingList before
>>           calling testFunc1 -- If this possibility exists!
>>       '''
>>       return([x for x in startingList if x[0] == 'x'],
>>              [x for x in startingList if x[0] != 'x'])
>>   
>>
>> I would be interested in seeing code that is faster than algorithm-1
> In pure Python? Perhaps the messy variant:
>
> def test_func(words):
>      nox = []
>      append = nox.append
>      withx = [x for x in words if x[0] == 'x' or append(x)]
>      return withx, nox
>
>
Very nice Peter,

Here are the new results for timing with your method added (algorithm-3).

Method
	average (sd) time in seconds
algorithm-1 (list comprehension)
	0.11774 (0.002968)
algorithm-2 (S. D'Aprano)
	0.17573 (0.003385)
algorithm-2A (modified S. D'Aprano)
	0.18116 (0.003081)
algorithm-3 (improved list comprehension)
	0.06639 (0.001728)


Algorithm-3 is 43% faster than algorithm-1.  Again, the code used to obtain 
these results is attached.

Thanks Peter for your contribution
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20120816/6358ffd7/attachment.html>
-------------- next part --------------
'''
  Purpose: Time four different algorithms for the same task
  
  Author: V. Stokes (vs at it.uu.se, 2012-08-16 (15:46), 2012-08-16)
  Refs:
   python-list at python.org list
    * Strange behavior, 14-Aug-2012 17:38, light1quark at gmail.com
    * Re: Strange behavior, 14-Aug-2012 21:40, Stokes, Virgil
    * Re: Strange behavior, 15-Aug-2012 02:19, Steven D'Aprano
    * Re: Strange behavior, 16-Aug-2012 15:02, Peter Otten
  
  Notes:
   1. The mean and standard deviation over the runs (MC simulations)
      are estimated using recursive equations.
   2. A seed (syd) is used with the RNG for repeatability. Each run is 
      started with a new seed to force the generation of independent 
      random sequences.
   3. Warning! No checks are made on the parameters passed to
      the functions (this was by design).
   4. No effort has been made to make this code elegant. My focus was
      to make the code clear and easy to understand.
   5. This was executed on a Windows Vista 32-bit platform with Python 2.6.6
       Processor: Intel(R) core(TM)2 Duo CPU E8500 at 3.16GHz 3.17GHz
   6. The estimated time to completion is displayed after each run.    
      
'''
import random as random
import math as math
from time import clock # clock gives good resolution on MS Windows

def testFunc1(startingList): 
    '''
      Algorithm-1
      Note: 
        One should check for an empty startingList before 
        calling testFunc1 -- If this possibility exists!
    '''
    return([x for x in startingList if x[0] == 'x'], 
           [x for x in startingList if x[0] != 'x'])

def testFunc2(words):
    '''
      Algorithm-2
    '''
    words_with_x = []
    words_without_x = []
    for word in words:
        if word.startswith('x'):
            words_with_x.append(word)
        else:
            words_without_x.append(word)
    words[:] = words_without_x  # slice assignment
    return words_with_x

def testFunc2A(words):
    '''
      Algorithm-2A
    '''
    words_with_x = []
    words_without_x = []
    for word in words:
        if word.startswith('x'):
            words_with_x.append(word)
        else:
            words_without_x.append(word)
    #words[:] = words_without_x  # slice assignment
    return words_with_x, words_without_x

def testFunc3(words):
    '''
      Algorithm-3 (from: Peter Otten) 
    '''
    nox = []
    append = nox.append
    withx = [x for x in words if x[0] == 'x' or append(x)]
    return withx, nox


def genStrList(NChar,NStrng,Alph,leadChr):
    '''
     Purpose: Generate a list of NStrng elements with each element a string
              of length NChar and constrained such that approx. 50% of the 
              strings will begin with the character (symbol) leadChr
       Inputs: 
           NChar -- number of characters in each element
          NStrng -- number of elements in list (strgList)
            Alph -- list of characters to be used (must contain the leadChr)
         leadChr -- leading character for strings to be generated from Alph
       Otputs:
        strgList -- list with NString strings of NChar characters (from Alph) in each                    
    '''
    NSym = len(Alph)
    strgList = []
    for i in range(NStrng): 
        if random.random() < 0.5: 
            char = leadChr
        else: 
            while True:
                indx = random.randint(0,NSym-1)            
                char = Alph[indx]
                if char != leadChr: break
            
        for j in range(1,NChar):
            indx = random.randint(0,NSym-1)            
            char = char + Alph[indx]
            
        strgList.append(char)   
        
    return strgList

#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alph = ['a','b','c','d','e','f','g','h','i','j','k','l','m',
        'n','o','p','q','r','s','t','u','v','w','x','y','z']

NChar = 4         # 4 chars per string
NStrng = 100000   # 100,000 strings in list 
NRuns = 100       # 100 random lists (number of MC simulations)

# For quick testing
#NChar = 4        # 4 chars per string
#NStrng = 10000   # 10,000 strings in list 
#NRuns = 20       # 20 random lists

syd = 1071       # initial seed for RNG (for repeatability)

printRunningMean = False

mu1,mu2,mu3,mu4,mug = 0.0,0.0,0.0,0.0,0.0
x1,x2,x3,x4 = 0.0,0.0,0.0,0.0
print '%5d runs with lists containing %7d elements' %(NRuns,NStrng)
for irun in range(NRuns):
    tic = clock()
    k = irun + 1
    print ' run - %2d ...' %k
    syd += 19    # new seed for each run
    
    random.seed(syd)
    #print 'Genrating list of %7d strings...' %NStrng
    strgList = genStrList(NChar,NStrng,Alph,'x')   
    
    t0 = clock()
    xOnlyList,strgList = testFunc1(strgList) # algorithm-1
    t1 = clock()    
    
    #print 'Length of xOnlyList = %7d' %len(xOnlyList)
    #print ' Length of strgList = %7d' %len(strgList)    
    etime = t1 - t0
    delta = etime - mu1
    mu1 += delta/k
    if printRunningMean:
        print '  algorithm-1 -- %9.5f' %mu1
    x1 += delta*(etime-mu1)
    
    #print
    random.seed(syd)
    #print 'Genrating list of %7d strings...' %NStrng
    strgList = genStrList(NChar,NStrng,Alph,'x')
    
    t0 = clock()
    xOnlyList = testFunc2(strgList)          # algorithm-2   
    t1 = clock()
    
    #print 'Length of xOnlyList = %7d' %len(xOnlyList)
    #print ' Length of strgList = %7d' %len(strgList)    
    etime = t1 - t0
    delta = etime - mu2
    mu2 += delta/k
    if printRunningMean:
        print '  algorithm-2 -- %9.5f' %mu2
    x2 += delta*(etime-mu2)    
    
    random.seed(syd)    
    #print 'Genrating list of %7d strings...' %NStrng
    strgList = genStrList(NChar,NStrng,Alph,'x')
    
    t0 = clock()
    xOnlyList,strgList = testFunc2A(strgList)          # algorithm-2A
    t1 = clock()
    
    #print 'Length of xOnlyList = %7d' %len(xOnlyList)
    #print ' Length of strgList = %7d' %len(strgList)    
    etime = t1 - t0
    delta = etime - mu3
    mu3 += delta/k
    if printRunningMean:
        print '  algorithm-2A -- %9.5f' %mu3
    x3 += delta*(etime-mu3)
    
    random.seed(syd)    
    #print 'Genrating list of %7d strings...' %NStrng
    strgList = genStrList(NChar,NStrng,Alph,'x')
        
    t0 = clock()
    xOnlyList,strgList = testFunc3(strgList)          # algorithm-3
    t1 = clock()
        
    #print 'Length of xOnlyList = %7d' %len(xOnlyList)
    #print ' Length of strgList = %7d' %len(strgList)    
    etime = t1 - t0
    delta = etime - mu4
    mu4 += delta/k
    if printRunningMean:
        print '  algorithm-3 -- %9.5f' %mu4
    x4 += delta*(etime-mu4)    
    
    toc = clock()
    loopTime = toc - tic
    delta = loopTime - mug
    mug += delta/k
    runsR = NRuns - k
    estToComplete = mug*runsR
    print '   ETTC = %8.2f seconds' %estToComplete
    

print
print '        Timing in seconds'
print ' ************************************'
print '    method     |     avg time (sd)'
print ' ------------------------------------'
print ' algorithm-1   | %9.5f (%8.6f)' %(mu1,math.sqrt(x1/NRuns))
print ' algorithm-2   | %9.5f (%8.6f)' %(mu2,math.sqrt(x2/NRuns))
print ' algorithm-2A  | %9.5f (%8.6f)' %(mu3,math.sqrt(x3/NRuns))
print ' algorithm-3   | %9.5f (%8.6f)' %(mu4,math.sqrt(x4/NRuns))

print
print "Th-tha-tha-that's all folks!"


More information about the Python-list mailing list