Does Python allow access to some of the implementation details?

Sat Jan 7 08:05:18 EST 2006

mensanator at aol.com wrote:
> Claudio Grondi wrote:
> 
>>Martin v. Löwis wrote:
>>
>>>You can get somewhat faster in Python than your code if you avoid
>>>producing new long objects all the time, and do the task in chunks of 30
>>>bits.
>>
>>It would be nice if you could explain why you consider chunks of 30 bits
>>to be superior e.g. to chunks of 32 bits?
>>
>>
>>>write a C file that is a Python module
>>
>>If I understand you right, there is no way to get direct access to
>>internal representation of Python objects by Python script language
>>means provided in the standard Python 2.4.2 distribution.
>>So the answer to the question in the subject is : NO (valid for what is
>>  provided in the standard Python distribution, but not valid when
>>considering to write an own extension module in C)
> 
> 
> No need to re-invent the wheel. That extension has already been
> written. The GMPY module has a full suite of bit-functions:
> 
> digits(...)
>     digits(x[,base]): returns Python string representing x in the
>     given base (2 to 36, default 10 if omitted or 0); leading '-'
>     present if x<0, but no leading '+' if x>=0. x must be an mpz,
>     or else gets coerced into one.
That's nice function. A pity it's not in the standard Python distro as 
the inversion of the int() operation.
What I am also looking for is a conversion to base 256 (i.e where the 
full byte is used and the string and the integer have the same actual 
content if on appropriate endian machine), which would make the bit 
extraction comparable easy and effective as the i.__hex__() based method.
Using digits() instead of the code you have provided speeds the whole 
thing up two times (see attached code for details), but still is 
i.__hex__() the best way to go and could be improved probably only by 
direct conversion to base 256 or even higher base as e.g. 2**16 or 2**32.

Claudio

import time

# dctLstOfBitsVsCharOfNibble is a dictionary with a key beeing one 
character
# string representing hexadecimal value of a nibble and a value beeing a 
list
# of bits of the nibble where the lowest bit is stored at index 0 of the 
list.
# i.e.
dctLstOfBitsVsCharOfNibble = {
   '0':[0, 0, 0, 0],
   '1':[0, 0, 0, 1],
   '2':[0, 0, 1, 0],
   '3':[0, 0, 1, 1],
   '4':[0, 1, 0, 0],
   '5':[0, 1, 0, 1],
   '6':[0, 1, 1, 0],
   '7':[0, 1, 1, 1],
   '8':[1, 0, 0, 0],
   '9':[1, 0, 0, 1],
   'A':[1, 0, 1, 0],
   'B':[1, 0, 1, 1],
   'C':[1, 1, 0, 0],
   'D':[1, 1, 0, 1],
   'E':[1, 1, 1, 0],
   'F':[1, 1, 1, 1]
}
# The dctLstOfBitsVsCharOfNibble dictionary can be generated by 
following code:
# dctLstOfBitsVsCharOfNibble = {}
# for intNibbleValue in range(0, 16):
#   lstBitReprOfCurrNibble=[]
#   for indxOfBit in range(0,4):
#     lstBitReprOfCurrNibble.append(intNibbleValue>>indxOfBit&0x01)
#   #:for
#   lstBitReprOfCurrNibble.reverse()
# 
dctLstOfBitsVsCharOfNibble['%X'%(intNibbleValue,)]=lstBitReprOfCurrNibble
# #:for
n = 0
NoOf32bitChunks   = 0
lstBitsBitwiseAnd = []
lstBitsModulo     = []
lstViaBitChunks   = []
lstBitsViaHex     = []
lstBitsGMPY       = []
timeBitwiseAnd    = -1
timeModulo        = -1
timeBitsViaHex    = -1
timeViaBitChunks  = -1
timeGMPY          = -1

bitChunkSize      = 32 # must be <= 32

while timeBitwiseAnd <= timeBitsViaHex + 0.001:

   n = (n<<32) + 0x12345678

   NoOf32bitChunks += 1

   i = n
   lstBitsModulo = []
   start = time.clock()
   while i:
     lstBitsModulo.append(i%2)
     i=i>>1
   timeModulo = time.clock()-start

   i = n
   lstBitsBitwiseAnd = []
   start = time.clock()
   while i:
     lstBitsBitwiseAnd.append(i&0x01)
     i=i>>1
   timeBitwiseAnd = time.clock()-start

   i = n
   lstViaBitChunks = []
   bitFilter    = 0
   for dummy in range(bitChunkSize):
     bitFilter = (bitFilter<<1)+1
   #:for

   start = time.clock()
   done = False
   while i:
     i1 = int(i & bitFilter) # int() converts here a long-integer to integer
     i >>= bitChunkSize
     if i == 0: done = True
     for k in xrange(bitChunkSize):
       lstViaBitChunks.append(i1 & 1)
       i1 >>= 1
       if done and i1==0:
         # if this is the top word, and no bits are left, we are done
         break
       #:if
     #:for
   #:while
   timeViaBitChunks = time.clock()-start

   i = n
   # lstBitsViaHex = []
   start = time.clock()
   strHexOf_i = i.__hex__()[2:]
   if strHexOf_i[-1]=='L':
     strHexOf_i=strHexOf_i[0:-1]
   #:if
   intNoOfLeadingZeroBits = 0
   lstBitsOfFirstNibble = dctLstOfBitsVsCharOfNibble[strHexOf_i[0]]
   while not lstBitsOfFirstNibble[intNoOfLeadingZeroBits] and 
intNoOfLeadingZeroBits < 4:
     intNoOfLeadingZeroBits += 1
   #:while
   if intNoOfLeadingZeroBits == 4:
     lstBitsViaHex = []
   else:
     lstBitsViaHex = lstBitsOfFirstNibble[intNoOfLeadingZeroBits:]
   #:if
   for indxToStrHexOf_i in range(1,len(strHexOf_i)):
     lstBitsViaHex += 
dctLstOfBitsVsCharOfNibble[strHexOf_i[indxToStrHexOf_i]]
   #:for
   lstBitsViaHex.reverse()
   timeBitsViaHex = time.clock()-start

   import gmpy
   lstBitsGMPY = []
   # start = time.clock()
   # i = gmpy.mpz(n)
   # f = gmpy.scan1(i,0)
   # if f==0:
   #   lstBitsGMPY.append(1)
   #   k = 1
   #   f = gmpy.scan1(i,1)
   # else:
   #   k = 0
   # while f:
   #   while k<f:
   #     lstBitsGMPY.append(0)
   #     k += 1
   #   lstBitsGMPY.append(1)
   #   k += 1
   #   f = gmpy.scan1(i,f+1)
   # timeGMPY = time.clock()-start
   start = time.clock()
   i = gmpy.mpz(n)
   strBitsOf_i = gmpy.digits(i,2)
   for char in strBitsOf_i:
     if char=='0':
       lstBitsGMPY.append(0)
     else:
       lstBitsGMPY.append(1)
   #:for
   lstBitsGMPY.reverse()
   timeGMPY = time.clock()-start

#:while

if(   lstBitsBitwiseAnd == lstBitsModulo
   and lstBitsBitwiseAnd == lstBitsViaHex
   and lstBitsBitwiseAnd == lstViaBitChunks
   and lstBitsBitwiseAnd == lstBitsGMPY):
   print
   print '  Seconds for bit extraction from integer with %i hex-digits 
when: '%(NoOf32bitChunks*8,)
   print
   print '      looping over i>>1;i&0x01 = %f '%(timeBitwiseAnd,)
   print '      ^-- but in %i bit chunks = %f '%(bitChunkSize, 
timeViaBitChunks)
   print '      looping over i>>1;i%%2    = %f '%(timeModulo,)
   print '      using i.__hex__() repr.  = %f '%(timeBitsViaHex,)
   print '      using gmpy.digits(i,2)   = %f '%(timeGMPY,)
   print
   print '  Bits extraction from long integers with number of 
hexadecimal digits '
   print '      > %i '%(NoOf32bitChunks*8,)
   print '  is at least 1 ms faster with i.__hex__() then with 
i>>1;i&0x01 method.'
   print
   print '  Modulo division (%2) is always slower than bitwise-and (&0x01).'