Does Python allow access to some of the implementation details?
Claudio Grondi
claudio.grondi at freenet.de
Sat Jan 7 08:05:18 EST 2006
mensanator at aol.com wrote:
> Claudio Grondi wrote:
>
>>Martin v. Löwis wrote:
>>
>>>You can get somewhat faster in Python than your code if you avoid
>>>producing new long objects all the time, and do the task in chunks of 30
>>>bits.
>>
>>It would be nice if you could explain why you consider chunks of 30 bits
>>to be superior e.g. to chunks of 32 bits?
>>
>>
>>>write a C file that is a Python module
>>
>>If I understand you right, there is no way to get direct access to
>>internal representation of Python objects by Python script language
>>means provided in the standard Python 2.4.2 distribution.
>>So the answer to the question in the subject is : NO (valid for what is
>> provided in the standard Python distribution, but not valid when
>>considering to write an own extension module in C)
>
>
> No need to re-invent the wheel. That extension has already been
> written. The GMPY module has a full suite of bit-functions:
>
> digits(...)
> digits(x[,base]): returns Python string representing x in the
> given base (2 to 36, default 10 if omitted or 0); leading '-'
> present if x<0, but no leading '+' if x>=0. x must be an mpz,
> or else gets coerced into one.
That's nice function. A pity it's not in the standard Python distro as
the inversion of the int() operation.
What I am also looking for is a conversion to base 256 (i.e where the
full byte is used and the string and the integer have the same actual
content if on appropriate endian machine), which would make the bit
extraction comparable easy and effective as the i.__hex__() based method.
Using digits() instead of the code you have provided speeds the whole
thing up two times (see attached code for details), but still is
i.__hex__() the best way to go and could be improved probably only by
direct conversion to base 256 or even higher base as e.g. 2**16 or 2**32.
Claudio
import time
# dctLstOfBitsVsCharOfNibble is a dictionary with a key beeing one
character
# string representing hexadecimal value of a nibble and a value beeing a
list
# of bits of the nibble where the lowest bit is stored at index 0 of the
list.
# i.e.
dctLstOfBitsVsCharOfNibble = {
'0':[0, 0, 0, 0],
'1':[0, 0, 0, 1],
'2':[0, 0, 1, 0],
'3':[0, 0, 1, 1],
'4':[0, 1, 0, 0],
'5':[0, 1, 0, 1],
'6':[0, 1, 1, 0],
'7':[0, 1, 1, 1],
'8':[1, 0, 0, 0],
'9':[1, 0, 0, 1],
'A':[1, 0, 1, 0],
'B':[1, 0, 1, 1],
'C':[1, 1, 0, 0],
'D':[1, 1, 0, 1],
'E':[1, 1, 1, 0],
'F':[1, 1, 1, 1]
}
# The dctLstOfBitsVsCharOfNibble dictionary can be generated by
following code:
# dctLstOfBitsVsCharOfNibble = {}
# for intNibbleValue in range(0, 16):
# lstBitReprOfCurrNibble=[]
# for indxOfBit in range(0,4):
# lstBitReprOfCurrNibble.append(intNibbleValue>>indxOfBit&0x01)
# #:for
# lstBitReprOfCurrNibble.reverse()
#
dctLstOfBitsVsCharOfNibble['%X'%(intNibbleValue,)]=lstBitReprOfCurrNibble
# #:for
n = 0
NoOf32bitChunks = 0
lstBitsBitwiseAnd = []
lstBitsModulo = []
lstViaBitChunks = []
lstBitsViaHex = []
lstBitsGMPY = []
timeBitwiseAnd = -1
timeModulo = -1
timeBitsViaHex = -1
timeViaBitChunks = -1
timeGMPY = -1
bitChunkSize = 32 # must be <= 32
while timeBitwiseAnd <= timeBitsViaHex + 0.001:
n = (n<<32) + 0x12345678
NoOf32bitChunks += 1
i = n
lstBitsModulo = []
start = time.clock()
while i:
lstBitsModulo.append(i%2)
i=i>>1
timeModulo = time.clock()-start
i = n
lstBitsBitwiseAnd = []
start = time.clock()
while i:
lstBitsBitwiseAnd.append(i&0x01)
i=i>>1
timeBitwiseAnd = time.clock()-start
i = n
lstViaBitChunks = []
bitFilter = 0
for dummy in range(bitChunkSize):
bitFilter = (bitFilter<<1)+1
#:for
start = time.clock()
done = False
while i:
i1 = int(i & bitFilter) # int() converts here a long-integer to integer
i >>= bitChunkSize
if i == 0: done = True
for k in xrange(bitChunkSize):
lstViaBitChunks.append(i1 & 1)
i1 >>= 1
if done and i1==0:
# if this is the top word, and no bits are left, we are done
break
#:if
#:for
#:while
timeViaBitChunks = time.clock()-start
i = n
# lstBitsViaHex = []
start = time.clock()
strHexOf_i = i.__hex__()[2:]
if strHexOf_i[-1]=='L':
strHexOf_i=strHexOf_i[0:-1]
#:if
intNoOfLeadingZeroBits = 0
lstBitsOfFirstNibble = dctLstOfBitsVsCharOfNibble[strHexOf_i[0]]
while not lstBitsOfFirstNibble[intNoOfLeadingZeroBits] and
intNoOfLeadingZeroBits < 4:
intNoOfLeadingZeroBits += 1
#:while
if intNoOfLeadingZeroBits == 4:
lstBitsViaHex = []
else:
lstBitsViaHex = lstBitsOfFirstNibble[intNoOfLeadingZeroBits:]
#:if
for indxToStrHexOf_i in range(1,len(strHexOf_i)):
lstBitsViaHex +=
dctLstOfBitsVsCharOfNibble[strHexOf_i[indxToStrHexOf_i]]
#:for
lstBitsViaHex.reverse()
timeBitsViaHex = time.clock()-start
import gmpy
lstBitsGMPY = []
# start = time.clock()
# i = gmpy.mpz(n)
# f = gmpy.scan1(i,0)
# if f==0:
# lstBitsGMPY.append(1)
# k = 1
# f = gmpy.scan1(i,1)
# else:
# k = 0
# while f:
# while k<f:
# lstBitsGMPY.append(0)
# k += 1
# lstBitsGMPY.append(1)
# k += 1
# f = gmpy.scan1(i,f+1)
# timeGMPY = time.clock()-start
start = time.clock()
i = gmpy.mpz(n)
strBitsOf_i = gmpy.digits(i,2)
for char in strBitsOf_i:
if char=='0':
lstBitsGMPY.append(0)
else:
lstBitsGMPY.append(1)
#:for
lstBitsGMPY.reverse()
timeGMPY = time.clock()-start
#:while
if( lstBitsBitwiseAnd == lstBitsModulo
and lstBitsBitwiseAnd == lstBitsViaHex
and lstBitsBitwiseAnd == lstViaBitChunks
and lstBitsBitwiseAnd == lstBitsGMPY):
print
print ' Seconds for bit extraction from integer with %i hex-digits
when: '%(NoOf32bitChunks*8,)
print
print ' looping over i>>1;i&0x01 = %f '%(timeBitwiseAnd,)
print ' ^-- but in %i bit chunks = %f '%(bitChunkSize,
timeViaBitChunks)
print ' looping over i>>1;i%%2 = %f '%(timeModulo,)
print ' using i.__hex__() repr. = %f '%(timeBitsViaHex,)
print ' using gmpy.digits(i,2) = %f '%(timeGMPY,)
print
print ' Bits extraction from long integers with number of
hexadecimal digits '
print ' > %i '%(NoOf32bitChunks*8,)
print ' is at least 1 ms faster with i.__hex__() then with
i>>1;i&0x01 method.'
print
print ' Modulo division (%2) is always slower than bitwise-and (&0x01).'
More information about the Python-list
mailing list