Does Python allow access to some of the implementation details?

Claudio Grondi claudio.grondi at freenet.de
Sat Jan 7 13:12:55 EST 2006


Paul Rubin wrote:
> Claudio Grondi <claudio.grondi at freenet.de> writes:
> 
>>The question is if Python allows somehow access to the bytes of the
>>representation of a long integer or integer in computers memory?
> 
> 
> No it doesn't, and that's a good thing, since the internal
> representation is a little bit surprising (it stores 15 bits of the
> long int in each 16-bit word of the representation, to make the
> arithmetic routines  a little simpler).
> 
> 
>>If it were possible to 'tell' the %2 operation to operate
>>only on one short of the integer number representation there will be
>>probably no difference in speed. Is there a way to do this efficiently
>>in Python like it is possible in C when using pointers and recasting?
> 
> 
> The usual trick to get at the contents of a long is to use hex
> conversion and maybe the array module (untested):
> 
>    a = '%x' % n
>    if len(a) % 2 == 1: 
>       a = '0' + a
>    s = array.array('B', binascii.unhexlify(a))
> 
> s now gives you the individual bytes of n.

This is the _fastest_ approach I have seen so far (updated code attached).
Thank you!

Claudio

import time

# dctLstOfBitsVsCharOfNibble is a dictionary with a key beeing one 
character
# string representing hexadecimal value of a nibble and a value beeing a 
list
# of bits of the nibble where the lowest bit is stored at index 0 of the 
list.
# i.e.
dctLstOfBitsVsCharOfNibble = {
   '0':[0, 0, 0, 0],
   '1':[0, 0, 0, 1],
   '2':[0, 0, 1, 0],
   '3':[0, 0, 1, 1],
   '4':[0, 1, 0, 0],
   '5':[0, 1, 0, 1],
   '6':[0, 1, 1, 0],
   '7':[0, 1, 1, 1],
   '8':[1, 0, 0, 0],
   '9':[1, 0, 0, 1],
   'A':[1, 0, 1, 0],
   'B':[1, 0, 1, 1],
   'C':[1, 1, 0, 0],
   'D':[1, 1, 0, 1],
   'E':[1, 1, 1, 0],
   'F':[1, 1, 1, 1]
}
# The dctLstOfBitsVsCharOfNibble dictionary can be also generated by 
following code:
# dctLstOfBitsVsCharOfNibble = {}
# for intNibbleValue in range(0, 16):
#   lstBitReprOfCurrNibble=[]
#   for indxOfBit in range(0,4):
#     lstBitReprOfCurrNibble.append(intNibbleValue>>indxOfBit&0x01)
#   #:for
#   lstBitReprOfCurrNibble.reverse()
# 
dctLstOfBitsVsCharOfNibble['%X'%(intNibbleValue,)]=lstBitReprOfCurrNibble
# #:for

dctLstOfBitsVsOrdOfChar = {}
for ordOfChar in range(256):
   dctLstOfBitsVsOrdOfChar[ordOfChar] = 
dctLstOfBitsVsCharOfNibble['%X'%(ordOfChar>>4,)] + 
dctLstOfBitsVsCharOfNibble['%X'%(ordOfChar&0x0F,)]
#:for
# what gives: {
# 0x00:[0, 0, 0, 0, 0, 0, 0, 0],
# 0x01:[0, 0, 0, 0, 0, 0, 0, 1],
# 0x02:[0, 0, 0, 0, 0, 0, 1, 0],
#  ...
# 0xFD:[1, 1, 1, 1, 1, 1, 0, 1],
# 0xFE:[1, 1, 1, 1, 1, 1, 1, 0],
# 0xFF:[1, 1, 1, 1, 1, 1, 1, 1]
#              }

n = 0
NoOf32bitChunks    = 0
lstBitsBitwiseAnd  = []
lstBitsModulo      = []
lstViaBitChunks    = []
lstBitsViaHex      = []
lstBitsGMPY        = []
lstViaHexAndArray  = []
timeBitwiseAnd     = -1
timeModulo         = -1
timeBitsViaHex     = -1
timeViaBitChunks   = -1
timeGMPY           = -1
timeViaHexAndArray = -1

bitChunkSize      = 32 # must be <= 32

while timeBitwiseAnd <= timeBitsViaHex + 0.001:

   n = (n<<32) + 0x12345678

   NoOf32bitChunks += 1

   i = n
   lstBitsModulo = []
   start = time.clock()
   while i:
     lstBitsModulo.append(i%2)
     i=i>>1
   timeModulo = time.clock()-start

   i = n
   lstBitsBitwiseAnd = []
   start = time.clock()
   while i:
     lstBitsBitwiseAnd.append(i&0x01)
     i=i>>1
   timeBitwiseAnd = time.clock()-start

   i = n
   lstViaBitChunks = []
   bitFilter    = 0
   for dummy in range(bitChunkSize):
     bitFilter = (bitFilter<<1)+1
   #:for

   start = time.clock()
   done = False
   while i:
     i1 = int(i & bitFilter) # int() converts here a long-integer to integer
     i >>= bitChunkSize
     if i == 0: done = True
     for k in xrange(bitChunkSize):
       lstViaBitChunks.append(i1 & 1)
       i1 >>= 1
       if done and i1==0:
         # if this is the top word, and no bits are left, we are done
         break
       #:if
     #:for
   #:while
   timeViaBitChunks = time.clock()-start

   i = n
   # lstBitsViaHex = []
   start = time.clock()
   strHexOf_i = i.__hex__()[2:]
   if strHexOf_i[-1]=='L':
     strHexOf_i=strHexOf_i[0:-1]
   #:if
   intNoOfLeadingZeroBits = 0
   lstBitsOfFirstNibble = dctLstOfBitsVsCharOfNibble[strHexOf_i[0]]
   while not lstBitsOfFirstNibble[intNoOfLeadingZeroBits] and 
intNoOfLeadingZeroBits < 4:
     intNoOfLeadingZeroBits += 1
   #:while
   if intNoOfLeadingZeroBits == 4:
     lstBitsViaHex = []
   else:
     lstBitsViaHex = lstBitsOfFirstNibble[intNoOfLeadingZeroBits:]
   #:if
   for indxToStrHexOf_i in range(1,len(strHexOf_i)):
     lstBitsViaHex += 
dctLstOfBitsVsCharOfNibble[strHexOf_i[indxToStrHexOf_i]]
   #:for
   lstBitsViaHex.reverse()
   timeBitsViaHex = time.clock()-start

   i = n
   lstViaHexAndArray = []
   import array, binascii
   start = time.clock()
   strHexOf_i = i.__hex__()[2:]
   if strHexOf_i[-1]=='L':
     strHexOf_i=strHexOf_i[0:-1]
   #:if
   for item in array.array('B', binascii.unhexlify(strHexOf_i)):
     lstViaHexAndArray += dctLstOfBitsVsOrdOfChar[item]
   #:for
   intNoOfLeadingZeroBits = 0
   while not lstViaHexAndArray[intNoOfLeadingZeroBits] and 
intNoOfLeadingZeroBits < 4:
     intNoOfLeadingZeroBits += 1
   #:while
   if intNoOfLeadingZeroBits == 4:
     lstViaHexAndArray = []
   else:
     lstViaHexAndArray = lstViaHexAndArray[intNoOfLeadingZeroBits:]
   #:if
   lstViaHexAndArray.reverse()
   timeViaHexAndArray = time.clock()-start

   import gmpy
   lstBitsGMPY = []
   # start = time.clock()
   # i = gmpy.mpz(n)
   # f = gmpy.scan1(i,0)
   # if f==0:
   #   lstBitsGMPY.append(1)
   #   k = 1
   #   f = gmpy.scan1(i,1)
   # else:
   #   k = 0
   # while f:
   #   while k<f:
   #     lstBitsGMPY.append(0)
   #     k += 1
   #   lstBitsGMPY.append(1)
   #   k += 1
   #   f = gmpy.scan1(i,f+1)
   # timeGMPY = time.clock()-start
   start = time.clock()
   i = gmpy.mpz(n)
   strBitsOf_i = gmpy.digits(i,2)
   for char in strBitsOf_i:
     if char=='0':
       lstBitsGMPY.append(0)
     else:
       lstBitsGMPY.append(1)
   #:for
   lstBitsGMPY.reverse()
   timeGMPY = time.clock()-start

#:while

if(   lstBitsBitwiseAnd == lstBitsModulo
   and lstBitsBitwiseAnd == lstBitsViaHex
   and lstBitsBitwiseAnd == lstViaBitChunks
   and lstBitsBitwiseAnd == lstBitsGMPY
   and lstBitsBitwiseAnd == lstViaHexAndArray):
   print
   print '  Seconds for bit extraction from integer with %i hex-digits 
when: '%(NoOf32bitChunks*8,)
   print
   print '      looping over i>>1;i&0x01     = %f '%(timeBitwiseAnd,)
   print '      ^-- but in %i bit chunks     = %f '%(bitChunkSize, 
timeViaBitChunks)
   print '      looping over i>>1;i%%2        = %f '%(timeModulo,)
   print '      using i.__hex__() repr.      = %f '%(timeBitsViaHex,)
   print '      using __hex__,array,binascii = %f '%(timeViaHexAndArray,)
   print '      using gmpy.digits(i,2)       = %f '%(timeGMPY,)
   print
   print '  Bits extraction from long integers with number of 
hexadecimal digits '
   print '      > %i '%(NoOf32bitChunks*8,)
   print '  is at least 1 ms faster with i.__hex__() then with 
i>>1;i&0x01 method.'
   print
   print '  Modulo division (%2) is always slower than bitwise-and (&0x01).'



More information about the Python-list mailing list