[pypy-commit] pypy py3k: Implement array._array_reconstructor: used with pickle protocol 3, allows
amauryfa
noreply at buildbot.pypy.org
Mon Nov 12 01:37:17 CET 2012
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3k
Changeset: r58830:9f61bab5651f
Date: 2012-11-11 22:51 +0100
http://bitbucket.org/pypy/pypy/changeset/9f61bab5651f/
Log: Implement array._array_reconstructor: used with pickle protocol 3,
allows a machine-independent pickling of array objects.
diff --git a/pypy/module/array/__init__.py b/pypy/module/array/__init__.py
--- a/pypy/module/array/__init__.py
+++ b/pypy/module/array/__init__.py
@@ -11,6 +11,7 @@
interpleveldefs = {
'array': 'interp_array.W_ArrayBase',
'ArrayType': 'interp_array.W_ArrayBase',
+ '_array_reconstructor': 'reconstructor.array_reconstructor',
}
appleveldefs = {
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -72,7 +72,7 @@
array_fromfile = SMM('fromfile', 3)
array_buffer_info = SMM('buffer_info', 1)
-array_reduce = SMM('__reduce__', 1)
+array_reduce_ex = SMM('__reduce_ex__', 2)
array_copy = SMM('__copy__', 1)
array_byteswap = SMM('byteswap', 1)
@@ -124,6 +124,10 @@
# hint for the annotator: track individual constant instances
return True
+ def is_integer_type(self):
+ return self.unwrap == 'int_w' or self.unwrap == 'bigint_w'
+
+
types = {
'u': TypeCode(lltype.UniChar, 'unicode_w'),
'b': TypeCode(rffi.SIGNEDCHAR, 'int_w', True, True),
@@ -639,17 +643,41 @@
w_len = space.wrap(self.len)
return space.newtuple([w_ptr, w_len])
- def array_reduce__Array(space, self):
- if self.len > 0:
- w_s = array_tobytes__Array(space, self)
- args = [space.wrap(mytype.typecode), w_s]
- else:
- args = [space.wrap(mytype.typecode)]
+ def array_reduce_ex__Array_ANY(space, self, w_protocol):
+ protocol = space.int_w(w_protocol)
try:
- dct = space.getattr(self, space.wrap('__dict__'))
+ w_dict = space.getattr(self, space.wrap('__dict__'))
except OperationError:
- dct = space.w_None
- return space.newtuple([space.type(self), space.newtuple(args), dct])
+ w_dict = space.w_None
+ from pypy.module.array import reconstructor
+ mformat_code = reconstructor.typecode_to_mformat_code(mytype.typecode)
+ if protocol < 3 or mformat_code == reconstructor.UNKNOWN_FORMAT:
+ # Convert the array to a list if we got something weird
+ # (e.g., non-IEEE floats), or we are pickling the array
+ # using a Python 2.x compatible protocol.
+ #
+ # It is necessary to use a list representation for Python
+ # 2.x compatible pickle protocol, since Python 2's str
+ # objects are unpickled as unicode by Python 3. Thus it is
+ # impossible to make arrays unpicklable by Python 3 by
+ # using their memory representation, unless we resort to
+ # ugly hacks such as coercing unicode objects to bytes in
+ # array_reconstructor.
+ w_list = array_tolist__Array(space, self)
+ return space.newtuple([
+ space.type(self),
+ space.newtuple([space.wrap(mytype.typecode), w_list]),
+ w_dict])
+
+ w_bytes = array_tobytes__Array(space, self)
+ w_array_reconstructor = space.fromcache(State).w_array_reconstructor
+ return space.newtuple([
+ w_array_reconstructor,
+ space.newtuple([space.type(self),
+ space.wrap(mytype.typecode),
+ space.wrap(mformat_code),
+ w_bytes]),
+ w_dict])
def array_copy__Array(space, self):
w_a = mytype.w_class(self.space)
@@ -709,4 +737,10 @@
for mytype in types.values():
make_array(mytype)
-register_all(locals(), globals())
+
+class State:
+ def __init__(self, space):
+ w_module = space.getbuiltinmodule('array')
+ self.w_array_reconstructor = space.getattr(
+ w_module, space.wrap("_array_reconstructor"))
+
diff --git a/pypy/module/array/reconstructor.py b/pypy/module/array/reconstructor.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/array/reconstructor.py
@@ -0,0 +1,193 @@
+# array._array_reconstructor is a special constructor used when
+# unpickling an array. It provides a portable way to rebuild an array
+# from its memory representation.
+import sys
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.argument import Arguments
+from pypy.rlib import runicode, rbigint
+from pypy.rlib.rstruct import ieee
+from pypy.rpython.lltypesystem import rffi
+
+from pypy.module.array import interp_array
+
+UNKNOWN_FORMAT = -1
+UNSIGNED_INT8 = 0
+SIGNED_INT8 = 1
+UNSIGNED_INT16_LE = 2
+UNSIGNED_INT16_BE = 3
+SIGNED_INT16_LE = 4
+SIGNED_INT16_BE = 5
+UNSIGNED_INT32_LE = 6
+UNSIGNED_INT32_BE = 7
+SIGNED_INT32_LE = 8
+SIGNED_INT32_BE = 9
+UNSIGNED_INT64_LE = 10
+UNSIGNED_INT64_BE = 11
+SIGNED_INT64_LE = 12
+SIGNED_INT64_BE = 13
+IEEE_754_FLOAT_LE = 14
+IEEE_754_FLOAT_BE = 15
+IEEE_754_DOUBLE_LE = 16
+IEEE_754_DOUBLE_BE = 17
+UTF16_LE = 18
+UTF16_BE = 19
+UTF32_LE = 20
+UTF32_BE = 21
+
+IS_BIG_ENDIAN = sys.byteorder == 'big'
+
+class MachineFormat(object):
+ def __init__(self, bytes, signed, big_endian):
+ self.bytes = bytes
+ self.signed = signed
+ self.big_endian = big_endian
+
+format_descriptors = {
+ UNSIGNED_INT8: MachineFormat(1, False, False),
+ SIGNED_INT8: MachineFormat(1, True, False),
+ UNSIGNED_INT16_LE: MachineFormat(2, False, False),
+ UNSIGNED_INT16_BE: MachineFormat(2, False, True),
+ SIGNED_INT16_LE: MachineFormat(2, True, False),
+ SIGNED_INT16_BE: MachineFormat(2, True, True),
+ UNSIGNED_INT32_LE: MachineFormat(4, False, False),
+ UNSIGNED_INT32_BE: MachineFormat(4, False, True),
+ SIGNED_INT32_LE: MachineFormat(4, True, False),
+ SIGNED_INT32_BE: MachineFormat(4, True, True),
+ UNSIGNED_INT64_LE: MachineFormat(8, False, False),
+ UNSIGNED_INT64_BE: MachineFormat(8, False, True),
+ SIGNED_INT64_LE: MachineFormat(8, True, False),
+ SIGNED_INT64_BE: MachineFormat(8, True, True),
+ IEEE_754_FLOAT_LE: MachineFormat(4, False, False),
+ IEEE_754_FLOAT_BE: MachineFormat(4, False, True),
+ IEEE_754_DOUBLE_LE: MachineFormat(8, False, False),
+ IEEE_754_DOUBLE_BE: MachineFormat(8, False, True),
+ UTF16_LE: MachineFormat(4, False, False),
+ UTF16_BE: MachineFormat(4, False, True),
+ UTF32_LE: MachineFormat(8, False, False),
+ UTF32_BE: MachineFormat(8, False, True),
+}
+MACHINE_FORMAT_CODE_MIN = min(format_descriptors)
+MACHINE_FORMAT_CODE_MAX = max(format_descriptors)
+
+
+ at unwrap_spec(typecode=str, mformat_code=int)
+def array_reconstructor(space, w_cls, typecode, mformat_code, w_items):
+ # Fast path: machine format code corresponds to the
+ # platform-independent typecode.
+ if mformat_code == typecode_to_mformat_code(typecode):
+ return interp_array.w_array(
+ space, w_cls, typecode, Arguments(space, [w_items]))
+
+ if typecode not in interp_array.types:
+ raise OperationError(space.w_ValueError,
+ space.wrap("invalid type code"))
+ if (mformat_code < MACHINE_FORMAT_CODE_MIN or
+ mformat_code > MACHINE_FORMAT_CODE_MAX):
+ raise OperationError(space.w_ValueError,
+ space.wrap("invalid machine format code"))
+
+ # Slow path: Decode the byte string according to the given machine
+ # format code. This occurs when the computer unpickling the array
+ # object is architecturally different from the one that pickled
+ # the array.
+ if (mformat_code == IEEE_754_FLOAT_LE or
+ mformat_code == IEEE_754_FLOAT_BE or
+ mformat_code == IEEE_754_DOUBLE_LE or
+ mformat_code == IEEE_754_DOUBLE_BE):
+
+ memstr = space.bytes_w(w_items)
+ descr = format_descriptors[mformat_code]
+ converted_items = [
+ space.wrap(ieee.unpack_float(
+ memstr[i:i+descr.bytes],
+ descr.big_endian))
+ for i in range(0, len(memstr), descr.bytes)]
+ w_converted_items = space.newlist(converted_items)
+
+ elif mformat_code == UTF16_LE:
+ w_converted_items = space.call_method(
+ w_items, "decode", space.wrap("utf-16-le"))
+ elif mformat_code == UTF16_BE:
+ w_converted_items = space.call_method(
+ w_items, "decode", space.wrap("utf-16-be"))
+ elif mformat_code == UTF32_LE:
+ w_converted_items = space.call_method(
+ w_items, "decode", space.wrap("utf-32-le"))
+ elif mformat_code == UTF32_BE:
+ w_converted_items = space.call_method(
+ w_items, "decode", space.wrap("utf-32-be"))
+ else:
+ descr = format_descriptors[mformat_code]
+ # If possible, try to pack array's items using a data type
+ # that fits better. This may result in an array with narrower
+ # or wider elements.
+ #
+ # For example, if a 32-bit machine pickles a L-code array of
+ # unsigned longs, then the array will be unpickled by 64-bit
+ # machine as an I-code array of unsigned ints.
+ #
+ # XXX: Is it possible to write a unit test for this?
+ for tc in interp_array.unroll_typecodes:
+ typecode_descr = interp_array.types[tc]
+ if (typecode_descr.is_integer_type() and
+ typecode_descr.bytes == descr.bytes and
+ typecode_descr.signed == descr.signed):
+ typecode = tc
+ break
+
+ memstr = space.bytes_w(w_items)
+ converted_items = [
+ space.newlong_from_rbigint(rbigint.rbigint.frombytes(
+ memstr[i:i+descr.bytes],
+ descr.big_endian and 'big' or 'little',
+ descr.signed))
+ for i in range(0, len(memstr), descr.bytes)]
+ w_converted_items = space.newlist(converted_items)
+
+ return interp_array.w_array(
+ space, w_cls, typecode, Arguments(space, [w_converted_items]))
+
+def typecode_to_mformat_code(typecode):
+ intsize = 0
+ if typecode == 'b':
+ return SIGNED_INT8
+ elif typecode == 'B':
+ return UNSIGNED_INT8
+ elif typecode == 'u':
+ if runicode.MAXUNICODE == 0xffff:
+ return UTF16_LE + IS_BIG_ENDIAN
+ else:
+ return UTF32_LE + IS_BIG_ENDIAN
+ elif typecode == 'f':
+ return IEEE_754_FLOAT_LE + IS_BIG_ENDIAN
+ elif typecode == 'd':
+ return IEEE_754_DOUBLE_LE + IS_BIG_ENDIAN
+ # Integers
+ elif typecode == 'h':
+ intsize = rffi.sizeof(rffi.SHORT)
+ is_signed = True
+ elif typecode == 'H':
+ intsize = rffi.sizeof(rffi.SHORT)
+ is_signed = False
+ elif typecode == 'i':
+ intsize = rffi.sizeof(rffi.INT)
+ is_signed = True
+ elif typecode == 'I':
+ intsize = rffi.sizeof(rffi.INT)
+ is_signed = False
+ elif typecode == 'l':
+ intsize = rffi.sizeof(rffi.LONG)
+ is_signed = True
+ elif typecode == 'L':
+ intsize = rffi.sizeof(rffi.LONG)
+ is_signed = False
+ else:
+ return UNKNOWN_FORMAT
+ if intsize == 2:
+ return UNSIGNED_INT16_LE + IS_BIG_ENDIAN + (2 * is_signed)
+ elif intsize == 4:
+ return UNSIGNED_INT32_LE + IS_BIG_ENDIAN + (2 * is_signed)
+ elif intsize == 8:
+ return UNSIGNED_INT64_LE + IS_BIG_ENDIAN + (2 * is_signed)
+ return UNKNOWN_FORMAT
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -524,18 +524,18 @@
def test_reduce(self):
import pickle
a = self.array('i', [1, 2, 3])
- s = pickle.dumps(a, 1)
+ s = pickle.dumps(a)
b = pickle.loads(s)
assert a == b
a = self.array('l')
- s = pickle.dumps(a, 1)
+ s = pickle.dumps(a)
b = pickle.loads(s)
assert len(b) == 0 and b.typecode == 'l'
a = self.array('i', [1, 2, 4])
i = iter(a)
- #raises(TypeError, pickle.dumps, i, 1)
+ #raises(TypeError, pickle.dumps, i)
def test_copy_swap(self):
a = self.array('i', [1, 2, 3])
@@ -920,3 +920,130 @@
spaceconfig = AppTestArray.spaceconfig.copy()
spaceconfig['objspace.std.builtinshortcut'] = True
+
+class AppTestArrayReconstructor:
+ spaceconfig = dict(usemodules=('array', 'struct'))
+
+ def test_error(self):
+ import array
+ array_reconstructor = array._array_reconstructor
+ UNKNOWN_FORMAT = -1
+ raises(TypeError, array_reconstructor,
+ "", "b", 0, b"")
+ raises(TypeError, array_reconstructor,
+ str, "b", 0, b"")
+ raises(TypeError, array_reconstructor,
+ array.array, "b", '', b"")
+ raises(TypeError, array_reconstructor,
+ array.array, "b", 0, "")
+ raises(ValueError, array_reconstructor,
+ array.array, "?", 0, b"")
+ raises(ValueError, array_reconstructor,
+ array.array, "b", UNKNOWN_FORMAT, b"")
+ raises(ValueError, array_reconstructor,
+ array.array, "b", 22, b"")
+ raises(ValueError, array_reconstructor,
+ array.array, "d", 16, b"a")
+
+ def test_numbers(self):
+ import array, struct
+ array_reconstructor = array._array_reconstructor
+ UNSIGNED_INT8 = 0
+ SIGNED_INT8 = 1
+ UNSIGNED_INT16_LE = 2
+ UNSIGNED_INT16_BE = 3
+ SIGNED_INT16_LE = 4
+ SIGNED_INT16_BE = 5
+ UNSIGNED_INT32_LE = 6
+ UNSIGNED_INT32_BE = 7
+ SIGNED_INT32_LE = 8
+ SIGNED_INT32_BE = 9
+ UNSIGNED_INT64_LE = 10
+ UNSIGNED_INT64_BE = 11
+ SIGNED_INT64_LE = 12
+ SIGNED_INT64_BE = 13
+ IEEE_754_FLOAT_LE = 14
+ IEEE_754_FLOAT_BE = 15
+ IEEE_754_DOUBLE_LE = 16
+ IEEE_754_DOUBLE_BE = 17
+ testcases = (
+ (['B', 'H', 'I', 'L'], UNSIGNED_INT8, '=BBBB',
+ [0x80, 0x7f, 0, 0xff]),
+ (['b', 'h', 'i', 'l'], SIGNED_INT8, '=bbb',
+ [-0x80, 0x7f, 0]),
+ (['H', 'I', 'L'], UNSIGNED_INT16_LE, '<HHHH',
+ [0x8000, 0x7fff, 0, 0xffff]),
+ (['H', 'I', 'L'], UNSIGNED_INT16_BE, '>HHHH',
+ [0x8000, 0x7fff, 0, 0xffff]),
+ (['h', 'i', 'l'], SIGNED_INT16_LE, '<hhh',
+ [-0x8000, 0x7fff, 0]),
+ (['h', 'i', 'l'], SIGNED_INT16_BE, '>hhh',
+ [-0x8000, 0x7fff, 0]),
+ (['I', 'L'], UNSIGNED_INT32_LE, '<IIII',
+ [1<<31, (1<<31)-1, 0, (1<<32)-1]),
+ (['I', 'L'], UNSIGNED_INT32_BE, '>IIII',
+ [1<<31, (1<<31)-1, 0, (1<<32)-1]),
+ (['i', 'l'], SIGNED_INT32_LE, '<iii',
+ [-1<<31, (1<<31)-1, 0]),
+ (['i', 'l'], SIGNED_INT32_BE, '>iii',
+ [-1<<31, (1<<31)-1, 0]),
+ (['L'], UNSIGNED_INT64_LE, '<QQQQ',
+ [1<<31, (1<<31)-1, 0, (1<<32)-1]),
+ (['L'], UNSIGNED_INT64_BE, '>QQQQ',
+ [1<<31, (1<<31)-1, 0, (1<<32)-1]),
+ (['l'], SIGNED_INT64_LE, '<qqq',
+ [-1<<31, (1<<31)-1, 0]),
+ (['l'], SIGNED_INT64_BE, '>qqq',
+ [-1<<31, (1<<31)-1, 0]),
+ # The following tests for INT64 will raise an OverflowError
+ # when run on a 32-bit machine. The tests are simply skipped
+ # in that case.
+ (['L'], UNSIGNED_INT64_LE, '<QQQQ',
+ [1<<63, (1<<63)-1, 0, (1<<64)-1]),
+ (['L'], UNSIGNED_INT64_BE, '>QQQQ',
+ [1<<63, (1<<63)-1, 0, (1<<64)-1]),
+ (['l'], SIGNED_INT64_LE, '<qqq',
+ [-1<<63, (1<<63)-1, 0]),
+ (['l'], SIGNED_INT64_BE, '>qqq',
+ [-1<<63, (1<<63)-1, 0]),
+ (['f'], IEEE_754_FLOAT_LE, '<ffff',
+ [16711938.0, float('inf'), float('-inf'), -0.0]),
+ (['f'], IEEE_754_FLOAT_BE, '>ffff',
+ [16711938.0, float('inf'), float('-inf'), -0.0]),
+ (['d'], IEEE_754_DOUBLE_LE, '<dddd',
+ [9006104071832581.0, float('inf'), float('-inf'), -0.0]),
+ (['d'], IEEE_754_DOUBLE_BE, '>dddd',
+ [9006104071832581.0, float('inf'), float('-inf'), -0.0])
+ )
+ for testcase in testcases:
+ valid_typecodes, mformat_code, struct_fmt, values = testcase
+ arraystr = struct.pack(struct_fmt, *values)
+ for typecode in valid_typecodes:
+ try:
+ a = array.array(typecode, values)
+ except OverflowError:
+ continue # Skip this test case.
+ b = array_reconstructor(
+ array.array, typecode, mformat_code, arraystr)
+ assert a == b
+
+ def test_unicode(self):
+ import array
+ array_reconstructor = array._array_reconstructor
+ UTF16_LE = 18
+ UTF16_BE = 19
+ UTF32_LE = 20
+ UTF32_BE = 21
+ teststr = "Bonne Journ\xe9e \U0002030a\U00020347"
+ testcases = (
+ (UTF16_LE, "UTF-16-LE"),
+ (UTF16_BE, "UTF-16-BE"),
+ (UTF32_LE, "UTF-32-LE"),
+ (UTF32_BE, "UTF-32-BE")
+ )
+ for testcase in testcases:
+ mformat_code, encoding = testcase
+ a = array.array('u', teststr)
+ b = array_reconstructor(
+ array.array, 'u', mformat_code, teststr.encode(encoding))
+ assert a == b
More information about the pypy-commit
mailing list