Does Python allow access to some of the implementation details?
Claudio Grondi
claudio.grondi at freenet.de
Sat Jan 7 13:12:55 EST 2006
Paul Rubin wrote:
> Claudio Grondi <claudio.grondi at freenet.de> writes:
>
>>The question is if Python allows somehow access to the bytes of the
>>representation of a long integer or integer in computers memory?
>
>
> No it doesn't, and that's a good thing, since the internal
> representation is a little bit surprising (it stores 15 bits of the
> long int in each 16-bit word of the representation, to make the
> arithmetic routines a little simpler).
>
>
>>If it were possible to 'tell' the %2 operation to operate
>>only on one short of the integer number representation there will be
>>probably no difference in speed. Is there a way to do this efficiently
>>in Python like it is possible in C when using pointers and recasting?
>
>
> The usual trick to get at the contents of a long is to use hex
> conversion and maybe the array module (untested):
>
> a = '%x' % n
> if len(a) % 2 == 1:
> a = '0' + a
> s = array.array('B', binascii.unhexlify(a))
>
> s now gives you the individual bytes of n.
This is the _fastest_ approach I have seen so far (updated code attached).
Thank you!
Claudio
import time
# dctLstOfBitsVsCharOfNibble is a dictionary with a key beeing one
character
# string representing hexadecimal value of a nibble and a value beeing a
list
# of bits of the nibble where the lowest bit is stored at index 0 of the
list.
# i.e.
dctLstOfBitsVsCharOfNibble = {
'0':[0, 0, 0, 0],
'1':[0, 0, 0, 1],
'2':[0, 0, 1, 0],
'3':[0, 0, 1, 1],
'4':[0, 1, 0, 0],
'5':[0, 1, 0, 1],
'6':[0, 1, 1, 0],
'7':[0, 1, 1, 1],
'8':[1, 0, 0, 0],
'9':[1, 0, 0, 1],
'A':[1, 0, 1, 0],
'B':[1, 0, 1, 1],
'C':[1, 1, 0, 0],
'D':[1, 1, 0, 1],
'E':[1, 1, 1, 0],
'F':[1, 1, 1, 1]
}
# The dctLstOfBitsVsCharOfNibble dictionary can be also generated by
following code:
# dctLstOfBitsVsCharOfNibble = {}
# for intNibbleValue in range(0, 16):
# lstBitReprOfCurrNibble=[]
# for indxOfBit in range(0,4):
# lstBitReprOfCurrNibble.append(intNibbleValue>>indxOfBit&0x01)
# #:for
# lstBitReprOfCurrNibble.reverse()
#
dctLstOfBitsVsCharOfNibble['%X'%(intNibbleValue,)]=lstBitReprOfCurrNibble
# #:for
dctLstOfBitsVsOrdOfChar = {}
for ordOfChar in range(256):
dctLstOfBitsVsOrdOfChar[ordOfChar] =
dctLstOfBitsVsCharOfNibble['%X'%(ordOfChar>>4,)] +
dctLstOfBitsVsCharOfNibble['%X'%(ordOfChar&0x0F,)]
#:for
# what gives: {
# 0x00:[0, 0, 0, 0, 0, 0, 0, 0],
# 0x01:[0, 0, 0, 0, 0, 0, 0, 1],
# 0x02:[0, 0, 0, 0, 0, 0, 1, 0],
# ...
# 0xFD:[1, 1, 1, 1, 1, 1, 0, 1],
# 0xFE:[1, 1, 1, 1, 1, 1, 1, 0],
# 0xFF:[1, 1, 1, 1, 1, 1, 1, 1]
# }
n = 0
NoOf32bitChunks = 0
lstBitsBitwiseAnd = []
lstBitsModulo = []
lstViaBitChunks = []
lstBitsViaHex = []
lstBitsGMPY = []
lstViaHexAndArray = []
timeBitwiseAnd = -1
timeModulo = -1
timeBitsViaHex = -1
timeViaBitChunks = -1
timeGMPY = -1
timeViaHexAndArray = -1
bitChunkSize = 32 # must be <= 32
while timeBitwiseAnd <= timeBitsViaHex + 0.001:
n = (n<<32) + 0x12345678
NoOf32bitChunks += 1
i = n
lstBitsModulo = []
start = time.clock()
while i:
lstBitsModulo.append(i%2)
i=i>>1
timeModulo = time.clock()-start
i = n
lstBitsBitwiseAnd = []
start = time.clock()
while i:
lstBitsBitwiseAnd.append(i&0x01)
i=i>>1
timeBitwiseAnd = time.clock()-start
i = n
lstViaBitChunks = []
bitFilter = 0
for dummy in range(bitChunkSize):
bitFilter = (bitFilter<<1)+1
#:for
start = time.clock()
done = False
while i:
i1 = int(i & bitFilter) # int() converts here a long-integer to integer
i >>= bitChunkSize
if i == 0: done = True
for k in xrange(bitChunkSize):
lstViaBitChunks.append(i1 & 1)
i1 >>= 1
if done and i1==0:
# if this is the top word, and no bits are left, we are done
break
#:if
#:for
#:while
timeViaBitChunks = time.clock()-start
i = n
# lstBitsViaHex = []
start = time.clock()
strHexOf_i = i.__hex__()[2:]
if strHexOf_i[-1]=='L':
strHexOf_i=strHexOf_i[0:-1]
#:if
intNoOfLeadingZeroBits = 0
lstBitsOfFirstNibble = dctLstOfBitsVsCharOfNibble[strHexOf_i[0]]
while not lstBitsOfFirstNibble[intNoOfLeadingZeroBits] and
intNoOfLeadingZeroBits < 4:
intNoOfLeadingZeroBits += 1
#:while
if intNoOfLeadingZeroBits == 4:
lstBitsViaHex = []
else:
lstBitsViaHex = lstBitsOfFirstNibble[intNoOfLeadingZeroBits:]
#:if
for indxToStrHexOf_i in range(1,len(strHexOf_i)):
lstBitsViaHex +=
dctLstOfBitsVsCharOfNibble[strHexOf_i[indxToStrHexOf_i]]
#:for
lstBitsViaHex.reverse()
timeBitsViaHex = time.clock()-start
i = n
lstViaHexAndArray = []
import array, binascii
start = time.clock()
strHexOf_i = i.__hex__()[2:]
if strHexOf_i[-1]=='L':
strHexOf_i=strHexOf_i[0:-1]
#:if
for item in array.array('B', binascii.unhexlify(strHexOf_i)):
lstViaHexAndArray += dctLstOfBitsVsOrdOfChar[item]
#:for
intNoOfLeadingZeroBits = 0
while not lstViaHexAndArray[intNoOfLeadingZeroBits] and
intNoOfLeadingZeroBits < 4:
intNoOfLeadingZeroBits += 1
#:while
if intNoOfLeadingZeroBits == 4:
lstViaHexAndArray = []
else:
lstViaHexAndArray = lstViaHexAndArray[intNoOfLeadingZeroBits:]
#:if
lstViaHexAndArray.reverse()
timeViaHexAndArray = time.clock()-start
import gmpy
lstBitsGMPY = []
# start = time.clock()
# i = gmpy.mpz(n)
# f = gmpy.scan1(i,0)
# if f==0:
# lstBitsGMPY.append(1)
# k = 1
# f = gmpy.scan1(i,1)
# else:
# k = 0
# while f:
# while k<f:
# lstBitsGMPY.append(0)
# k += 1
# lstBitsGMPY.append(1)
# k += 1
# f = gmpy.scan1(i,f+1)
# timeGMPY = time.clock()-start
start = time.clock()
i = gmpy.mpz(n)
strBitsOf_i = gmpy.digits(i,2)
for char in strBitsOf_i:
if char=='0':
lstBitsGMPY.append(0)
else:
lstBitsGMPY.append(1)
#:for
lstBitsGMPY.reverse()
timeGMPY = time.clock()-start
#:while
if( lstBitsBitwiseAnd == lstBitsModulo
and lstBitsBitwiseAnd == lstBitsViaHex
and lstBitsBitwiseAnd == lstViaBitChunks
and lstBitsBitwiseAnd == lstBitsGMPY
and lstBitsBitwiseAnd == lstViaHexAndArray):
print
print ' Seconds for bit extraction from integer with %i hex-digits
when: '%(NoOf32bitChunks*8,)
print
print ' looping over i>>1;i&0x01 = %f '%(timeBitwiseAnd,)
print ' ^-- but in %i bit chunks = %f '%(bitChunkSize,
timeViaBitChunks)
print ' looping over i>>1;i%%2 = %f '%(timeModulo,)
print ' using i.__hex__() repr. = %f '%(timeBitsViaHex,)
print ' using __hex__,array,binascii = %f '%(timeViaHexAndArray,)
print ' using gmpy.digits(i,2) = %f '%(timeGMPY,)
print
print ' Bits extraction from long integers with number of
hexadecimal digits '
print ' > %i '%(NoOf32bitChunks*8,)
print ' is at least 1 ms faster with i.__hex__() then with
i>>1;i&0x01 method.'
print
print ' Modulo division (%2) is always slower than bitwise-and (&0x01).'
More information about the Python-list
mailing list