[pypy-commit] lang-smalltalk default: Refactor image version identification

krono noreply at buildbot.pypy.org
Wed Feb 20 16:15:55 CET 2013


Author: Tobias Pape <tobias at netshed.de>
Branch: 
Changeset: r75:072c0a42c109
Date: 2013-02-20 16:06 +0100
http://bitbucket.org/pypy/lang-smalltalk/changeset/072c0a42c109/

Log:	Refactor image version identification

diff --git a/spyvm/squeakimage.py b/spyvm/squeakimage.py
--- a/spyvm/squeakimage.py
+++ b/spyvm/squeakimage.py
@@ -6,19 +6,39 @@
 
 from rpython.rlib import objectmodel
 
-def chrs2int(b):
+def chrs2int(b, unsigned):
     assert len(b) == 4
     first = ord(b[0]) # big endian
-    if first & 0x80 != 0:
-        first = first - 0x100
-    return first << 24 | ord(b[1]) << 16 | ord(b[2]) << 8 | ord(b[3])
+    if not unsigned:
+        if first & 0x80 != 0:
+            first = first - 0x100
+    return (first << 24 | ord(b[1]) << 16 | ord(b[2]) << 8 | ord(b[3]))
 
-def swapped_chrs2int(b):
+def swapped_chrs2int(b, unsigned):
     assert len(b) == 4
     first = ord(b[3]) # little endian
-    if first & 0x80 != 0:
-        first = first - 0x100
-    return first << 24 | ord(b[2]) << 16 | ord(b[1]) << 8 | ord(b[0])
+    if not unsigned:
+        if first & 0x80 != 0:
+            first = first - 0x100
+    return (first << 24 | ord(b[2]) << 16 | ord(b[1]) << 8 | ord(b[0]))
+
+def chrs2long(b, unsigned):
+    assert len(b) == 8
+    first = ord(b[0]) # big endian
+    if not unsigned:
+        if first & 0x80 != 0:
+            first = first - 0x100
+    return (      first << 56 | ord(b[1]) << 48 | ord(b[2]) << 40 | ord(b[3]) << 32
+            | ord(b[4]) << 24 | ord(b[5]) << 16 | ord(b[6]) <<  8 | ord(b[7])      )
+
+def swapped_chrs2long(b, unsigned):
+    assert len(b) == 8
+    first = ord(b[7]) # little endian
+    if not unsigned:
+        if first & 0x80 != 0:
+            first = first - 0x100
+    return (      first << 56 | ord(b[6]) << 48 | ord(b[5]) << 40 | ord(b[4]) << 32
+            | ord(b[3]) << 24 | ord(b[2]) << 16 | ord(b[1]) <<  8 | ord(b[0])      )
 
 
 # ____________________________________________________________
@@ -32,24 +52,51 @@
             self.data = inputfile.read()
         finally:
             inputfile.close()
-        self.swap = False
-        self.pos = 0
-        self.count = 0
+        self.reset()
 
     def peek(self):
         if self.pos >= len(self.data):
             raise IndexError
-        if self.swap:
-            return swapped_chrs2int( self.data[self.pos:self.pos+4] )
+        data_peek = self.data[self.pos:self.pos + self.word_size]
+        if self.use_long_read:
+            if self.swap:
+                return swapped_chrs2long(data_peek, False)
+            else:
+                return chrs2long(data_peek, False)
         else:
-            return chrs2int( self.data[self.pos:self.pos+4] )
+            if self.swap:
+                return swapped_chrs2int(data_peek, False)
+            else:
+                return chrs2int(data_peek, False)
+
+    def peek_unsigned(self):
+        if self.pos >= len(self.data):
+            raise IndexError
+        data_peek = self.data[self.pos:self.pos + self.word_size]
+        if self.use_long_read:
+            if self.swap:
+                return swapped_chrs2long(data_peek, True)
+            else:
+                return chrs2long(data_peek, True)
+        else:
+            if self.swap:
+                return swapped_chrs2int(data_peek, True)
+            else:
+                return chrs2int(data_peek, True)
+    
 
     def next(self):
         integer = self.peek()
-        self.pos += 4
-        self.count += 4
+        self.pos += self.word_size
+        self.count += self.word_size
         return integer
 
+    def reset(self):
+        self.swap = False
+        self.pos = 0
+        self.count = 0
+        self.be_32bit()
+
     def reset_count(self):
         self.count = 0
 
@@ -59,25 +106,114 @@
         self.pos += jump
         self.count += jump
 
+    def skipwords(self, jump):
+        self.skipbytes(jump * self.word_size)
+        assert (self.pos + jump) <= len(self.data)
+        self.pos += jump
+        self.count += jump
+
+
+    def length(self):
+        return len(self.data)
+
     def close(self):
         pass # already closed
 
+    def be_64bit(self):
+        self.word_size = 8
+        self.use_long_read = True
+
+    def be_32bit(self):
+        self.word_size = 4
+        self.use_long_read = False
 
 
 class CorruptImageError(Exception):
     pass
 
+class UnsupportedImageError(Exception):
+    pass
+
 # ____________________________________________________________
 
-# XXX hack to read Cog images.
-# TODO implement Cog float byte reversal
-SUPPORTED_VERSIONS = [6502, 6505]
+class ImageVersion(object):
+
+    def __init__(self, magic, is_big_endian, is_64bit, has_closures, has_floats_reversed):
+        self.magic = magic
+        self.is_big_endian = is_big_endian
+        self.is_64bit = is_64bit
+        self.has_closures = has_closures
+        self.has_floats_reversed = has_floats_reversed
+
+image_versions = {
+    0x00001966:         ImageVersion(6502,  True,  False, False, False),
+    0x66190000:         ImageVersion(6502,  False, False, False, False),
+    0x00001968:         ImageVersion(6504,  True,  False, True,  False),
+    0x68190000:         ImageVersion(6504,  False, False, True,  False),
+    0x00001969:         ImageVersion(6505,  True,  False, True,  True ),
+    0x69190000:         ImageVersion(6505,  False, False, True,  True ),
+    0x000109A0:         ImageVersion(68000, True,  True,  False, False),
+    0xA009010000000000: ImageVersion(68000, False, True,  False, False),
+    0x00000000000109A2: ImageVersion(68002, True,  True,  True,  False),
+    0xA209010000000000: ImageVersion(68002, False, True,  True,  False),
+    0x00000000000109A3: ImageVersion(68003, True,  True,  True,  True ),
+    0xA309010000000000: ImageVersion(68003, False, True,  True,  True ),
+}
+
+    
+def version(magic):
+    ver = image_versions.get(magic, None)
+    if ver is None:
+        raise CorruptImageError
+    # if ver.is_64bit or ver.has_floats_reversed:
+    #     raise UnsupportedImageError
+    return ver
+
+possible_image_offset = 512
+
+def version_from_stream(stream):
+    # 32 bit
+    try:
+        return version(stream.peek_unsigned())
+    except CorruptImageError as e:
+        if stream.length() > possible_image_offset + 4:
+            stream.skipbytes(possible_image_offset)
+            try:
+                return version(stream.peek_unsigned())
+            except CorruptImageError:
+                pass # raise original error
+        # 64 bit
+        stream.reset()
+        stream.be_64bit()
+        try:
+            v = version(stream.peek_unsigned())
+            assert v.is_64bit
+            return v
+        except CorruptImageError as e:
+            if stream.length() > possible_image_offset + 4:
+                stream.skipbytes(possible_image_offset)
+                try:
+                    v = version(stream.peek_unsigned())
+                    assert v.is_64bit
+                    return v
+                except CorruptImageError:
+                    pass # raise original error
+        raise
+
+    
+    
+def reader_for_image(space, stream):
+    ver = version_from_stream(stream)
+    if not ver.is_big_endian:
+        stream.swap = True
+    return ImageReader(space, stream, ver)
 
 class ImageReader(object):
     
-    def __init__(self, space, stream):
+    def __init__(self, space, stream, version):
         self.space = space
         self.stream = stream
+        self.version = version
         # dictionary mapping old address to chunk object
         self.chunks = {}
         self.chunklist = []
@@ -94,15 +230,13 @@
         self.init_w_objects()
         self.fillin_w_objects()
 
+    def read_version(self):
+        # 1 word version
+        magic = self.stream.next()
+        assert self.version.magic == magic
+
     def read_header(self):
-        # 1 word version
-        version = self.stream.peek()
-        if version not in SUPPORTED_VERSIONS:
-            self.stream.swap = True
-            version = self.stream.peek()
-            if version not in SUPPORTED_VERSIONS:
-                raise CorruptImageError
-        version = self.stream.next()
+        self.read_version()
         #------
         # 1 word headersize
         headersize = self.stream.next()
@@ -118,8 +252,7 @@
         print "savedwindowssize", savedwindowssize
         fullscreenflag = self.stream.next()
         extravmmemory = self.stream.next()
-        # we called 9 times next, 1 word = 4 byte
-        self.stream.skipbytes(headersize - (9 * 4))
+        self.stream.skipbytes(headersize - self.stream.pos)
 
     def read_body(self):
         import sys
diff --git a/spyvm/test/test_miniimage.py b/spyvm/test/test_miniimage.py
--- a/spyvm/test/test_miniimage.py
+++ b/spyvm/test/test_miniimage.py
@@ -21,7 +21,7 @@
     module.space = space
     
 def open_miniimage(space):
-    return squeakimage.ImageReader(space, squeakimage.Stream(mini_image.open()))
+    return squeakimage.reader_for_image(space, squeakimage.Stream(mini_image.open()))
 
 def get_reader():
     return reader
diff --git a/spyvm/test/test_squeakimage.py b/spyvm/test/test_squeakimage.py
--- a/spyvm/test/test_squeakimage.py
+++ b/spyvm/test/test_squeakimage.py
@@ -1,15 +1,16 @@
 import py
 from spyvm import squeakimage
-from spyvm.squeakimage import chrs2int
+from spyvm.squeakimage import chrs2int, chrs2long, swapped_chrs2long
 from spyvm import objspace
 
+from struct import pack
+
 space = objspace.ObjSpace()
 
 # ----- helpers ----------------------------------------------
 
 def ints2str(*ints):
-    import struct
-    return struct.pack(">" + "i" * len(ints), *ints)
+    return pack(">" + "i" * len(ints), *ints)
 
 def joinbits(values, lengths):
     result = 0
@@ -18,21 +19,37 @@
         result += each
     return result   
 
-def imagereader_mock(string):
+def imagestream_mock(string):
     import StringIO
     f = StringIO.StringIO(string)
-    stream = squeakimage.Stream(f)
-    return squeakimage.ImageReader(space, stream)
+    return squeakimage.Stream(f)
 
+def imagereader_mock(string):
+    stream = imagestream_mock(string)
+    return squeakimage.reader_for_image(space, stream)
+
+
+SIMPLE_VERSION_HEADER = pack(">i", 6502)
+SIMPLE_VERSION_HEADER_LE = pack("<i", 6502)
 
 # ----- tests ------------------------------------------------
 
 def test_chrs2int():
-    assert 1 == chrs2int('\x00\x00\x00\x01')
-    assert -1 == chrs2int('\xFF\xFF\xFF\xFF')
+    assert 1 == chrs2int('\x00\x00\x00\x01', False)
+    assert -1 == chrs2int('\xFF\xFF\xFF\xFF', False)
+    assert 1 == chrs2int('\x00\x00\x00\x01', True)
+    assert 0xFFFFFFFF == chrs2int('\xFF\xFF\xFF\xFF', True)
+
+def test_chrs2long():
+    assert 1 == chrs2long('\x00\x00\x00\x00\x00\x00\x00\x01', False)
+    assert -1 == chrs2long('\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF', False)
+    assert 1 == chrs2long('\x00\x00\x00\x00\x00\x00\x00\x01', True)
+    assert 0xFFFFFFFFFFFFFFFF == chrs2long('\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF', True)
+    assert 68002 == chrs2long(pack(">Q", 68002), False)
+    assert 68002 == swapped_chrs2long(pack("<Q", 68002), False)
 
 def test_stream():
-    stream = imagereader_mock('\x00\x00\x19\x66').stream
+    stream = imagestream_mock(SIMPLE_VERSION_HEADER)
     n = stream.peek()
     assert n == 6502 
     n = stream.next()
@@ -40,14 +57,14 @@
     py.test.raises(IndexError, lambda: stream.next())
     
 def test_stream_swap():
-    stream = imagereader_mock('\x66\x19\x00\x00').stream
+    stream = imagestream_mock('\x66\x19\x00\x00')
     stream.swap = True
     first = stream.next()
     assert first == 6502 
     py.test.raises(IndexError, lambda: stream.next())
     
 def test_stream_many():
-    stream = imagereader_mock('\x00\x00\x19\x66' * 5).stream
+    stream = imagestream_mock(SIMPLE_VERSION_HEADER * 5)
     for each in range(5):
         first = stream.peek()
         assert first == 6502 
@@ -56,14 +73,14 @@
     py.test.raises(IndexError, lambda: stream.next())
     
 def test_stream_skipbytes():
-    stream = imagereader_mock('\xFF\xFF\xFF\x00\x00\x19\x66').stream
+    stream = imagestream_mock('\xFF\xFF\xFF' + SIMPLE_VERSION_HEADER)
     stream.skipbytes(3)
     value = stream.next()
     assert value == 6502 
     py.test.raises(IndexError, lambda: stream.next())
         
 def test_stream_count():
-    stream = imagereader_mock('\xFF' * 20).stream
+    stream = imagestream_mock('\xFF' * 20)
     stream.next()
     stream.next()
     stream.reset_count()
@@ -85,41 +102,125 @@
     
 def test_ints2str():
     assert "\x00\x00\x00\x02" == ints2str(2)       
-    assert '\x00\x00\x19\x66\x00\x00\x00\x02' == ints2str(6502,2)
+    assert SIMPLE_VERSION_HEADER + '\x00\x00\x00\x02' == ints2str(6502,2)
     
 def test_freeblock():
-    r = imagereader_mock("\x00\x00\x00\x02")
+    r = imagereader_mock(SIMPLE_VERSION_HEADER + "\x00\x00\x00\x02")
+    r.read_version()
     py.test.raises(squeakimage.CorruptImageError, lambda: r.read_object())
 
 def test_1wordobjectheader():
     s = ints2str(joinbits([3, 1, 2, 3, 4], [2,6,4,5,12]))
-    r = imagereader_mock(s)
-    assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0) == r.read_1wordobjectheader()
+    r = imagereader_mock(SIMPLE_VERSION_HEADER + s)
+    r.read_version()
+    l = len(SIMPLE_VERSION_HEADER)
+    assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0 + l) == r.read_1wordobjectheader()
 
 def test_1wordobjectheader2():
     s = ints2str(joinbits([3, 1, 2, 3, 4], [2,6,4,5,12]))
-    r = imagereader_mock(s * 3)
-    assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0) == r.read_1wordobjectheader()
-    assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 4) == r.read_1wordobjectheader()
-    assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 8) == r.read_1wordobjectheader()
+    r = imagereader_mock(SIMPLE_VERSION_HEADER + (s * 3))
+    r.read_version()
+    l = len(SIMPLE_VERSION_HEADER)
+    assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0 + l) == r.read_1wordobjectheader()
+    assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 4 + l) == r.read_1wordobjectheader()
+    assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 8 + l) == r.read_1wordobjectheader()
 
 def test_2wordobjectheader():
     s = ints2str(4200 + 1, joinbits([1, 1, 2, 3, 4], [2,6,4,5,12]))
-    r = imagereader_mock(s)
-    assert (squeakimage.ImageChunk(space, 1, 2, 4200, 4), 4) == r.read_2wordobjectheader()
+    r = imagereader_mock(SIMPLE_VERSION_HEADER + s)
+    r.read_version()
+    l = len(SIMPLE_VERSION_HEADER)
+    assert (squeakimage.ImageChunk(space, 1, 2, 4200, 4), 4 + l) == r.read_2wordobjectheader()
 
 def test_3wordobjectheader():
     s = ints2str(1701 << 2, 4200 + 0, joinbits([0, 1, 2, 3, 4], [2,6,4,5,12]))
-    r = imagereader_mock(s)
-    assert (squeakimage.ImageChunk(space, 1701, 2, 4200, 4), 8) == r.read_3wordobjectheader()
+    r = imagereader_mock(SIMPLE_VERSION_HEADER + s)
+    r.read_version()
+    l = len(SIMPLE_VERSION_HEADER)
+    assert (squeakimage.ImageChunk(space, 1701, 2, 4200, 4), 8 + l) == r.read_3wordobjectheader()
     
 def test_read3wordheaderobject():
     size = 42
     s = ints2str(size << 2, 4200 + 0, joinbits([0, 1, 2, 3, 4], [2,6,4,5,12]))
-    r = imagereader_mock(s + '\x00\x00\x19\x66' * (size - 1))
+    r = imagereader_mock(SIMPLE_VERSION_HEADER + s + SIMPLE_VERSION_HEADER * (size - 1))
+    r.read_version()
+    l = len(SIMPLE_VERSION_HEADER)
     chunk, pos = r.read_object()
     chunk0 = squeakimage.ImageChunk(space, size, 2, 4200, 4)
     chunk0.data = [6502] * (size - 1)
-    assert pos == 8
+    assert pos == 8 + l
     assert chunk0 == chunk
     
+def test_simple_image():
+    word_size = 4
+    header_size = 16 * word_size
+
+    image_1 = (SIMPLE_VERSION_HEADER     # 1
+               + pack(">i", header_size) # 2 64 byte header
+               + pack(">i", 0)           # 3 no body
+               + pack(">i", 0)           # 4 old base addresss unset
+               + pack(">i", 0)           # 5 no spl objs array
+               + "\x12\x34\x56\x78"      # 6 last hash
+               + pack(">h", 480)         # 7 window 480 height
+               +     pack(">h", 640)     #   window 640 width
+               + pack(">i", 0)           # 8 not fullscreen
+               + pack(">i", 0)           # 9 no extra memory
+               + ("\x00" * (header_size - (9 * word_size))))
+    r = imagereader_mock(image_1)
+    # does not raise
+    r.read_header()
+    assert r.stream.pos == len(image_1)
+
+    image_2 = (SIMPLE_VERSION_HEADER_LE  # 1
+               + pack("<i", header_size) # 2 64 byte header
+               + pack("<i", 0)           # 3 no body
+               + pack("<i", 0)           # 4 old base addresss unset
+               + pack("<i", 0)           # 5 no spl objs array
+               + "\x12\x34\x56\x78"      # 6 last hash
+               + pack("<h", 480)         # 7 window 480 height
+               +     pack("<h", 640)     #   window 640 width
+               + pack("<i", 0)           # 8 not fullscreen
+               + pack("<i", 0)           # 9 no extra memory
+               + ("\x00" * (header_size - (9 * word_size))))
+    r = imagereader_mock(image_2)
+    # does not raise
+    r.read_header()
+    assert r.stream.pos == len(image_2)
+
+def test_simple_image64():
+    word_size = 8
+    header_size = 16 * word_size
+
+    image_1 = (pack(">Q", 68002)         # 1 version
+               + pack(">q", header_size) # 2 64 byte header
+               + pack(">q", 0)           # 3 no body
+               + pack(">q", 0)           # 4 old base addresss unset
+               + pack(">q", 0)           # 5 no spl objs array
+               + ("\x12\x34\x56\x78" * 2)# 6 last hash
+               + pack(">H", 480)         # 7 window 480 height
+               +     pack(">H", 640)     #   window 640 width
+               +     pack(">i", 0)       #   pad
+               + pack(">q", 0)           # 8 not fullscreen
+               + pack(">q", 0)           # 9 no extra memory
+               + ("\x00" * (header_size - (9 * word_size))))
+    r = imagereader_mock(image_1)
+    # does not raise
+    r.read_header()
+    assert r.stream.pos == len(image_1)
+
+    image_2 = (pack("<Q", 68002)         # 1 version
+               + pack("<q", header_size) # 2 64 byte header
+               + pack("<q", 0)           # 3 no body
+               + pack("<q", 0)           # 4 old base addresss unset
+               + pack("<q", 0)           # 5 no spl objs array
+               + ("\x12\x34\x56\x78" * 2)# 6 last hash
+               + pack("<H", 480)         # 7 window 480 height
+               +     pack("<H", 640)     #   window 640 width
+               +     pack(">i", 0)       #   pad
+               + pack(">q", 0)           # 8 not fullscreen
+               + pack("<q", 0)           # 9 no extra memory
+               + ("\x00" * (header_size - (9 * word_size))))
+    r = imagereader_mock(image_2)
+    # does not raise
+    r.read_header()
+    assert r.stream.pos == len(image_2)
diff --git a/spyvm/tool/analyseimage.py b/spyvm/tool/analyseimage.py
--- a/spyvm/tool/analyseimage.py
+++ b/spyvm/tool/analyseimage.py
@@ -11,10 +11,10 @@
 minitest_image = image_dir.join('minitest.image')
 
 def get_miniimage(space):
-    return squeakimage.ImageReader(space, squeakimage.Stream(mini_image.open()))
+    return squeakimage.reader_for_image(space, squeakimage.Stream(mini_image.open()))
 
 def get_minitestimage(space):
-    return squeakimage.ImageReader(space, squeakimage.Stream(minitest_image.open()))
+    return squeakimage.reader_for_image(space, squeakimage.Stream(minitest_image.open()))
 
 def create_image(space, image_reader):
     image_reader.initialize()
diff --git a/targetimageloadingsmalltalk.py b/targetimageloadingsmalltalk.py
--- a/targetimageloadingsmalltalk.py
+++ b/targetimageloadingsmalltalk.py
@@ -45,7 +45,7 @@
     else:
         print "usage:", argv[0], "<image name>"
         return -1
-    reader = squeakimage.ImageReader(space, squeakimage.Stream(DummyFile(filename)))
+    reader = squeakimage.reader_for_image(space, squeakimage.Stream(DummyFile(filename)))
     reader.initialize()
     image = squeakimage.SqueakImage()
     image.from_reader(space, reader)


More information about the pypy-commit mailing list