[pypy-commit] lang-smalltalk default: Refactor image version identification
krono
noreply at buildbot.pypy.org
Wed Feb 20 16:15:55 CET 2013
Author: Tobias Pape <tobias at netshed.de>
Branch:
Changeset: r75:072c0a42c109
Date: 2013-02-20 16:06 +0100
http://bitbucket.org/pypy/lang-smalltalk/changeset/072c0a42c109/
Log: Refactor image version identification
diff --git a/spyvm/squeakimage.py b/spyvm/squeakimage.py
--- a/spyvm/squeakimage.py
+++ b/spyvm/squeakimage.py
@@ -6,19 +6,39 @@
from rpython.rlib import objectmodel
-def chrs2int(b):
+def chrs2int(b, unsigned):
assert len(b) == 4
first = ord(b[0]) # big endian
- if first & 0x80 != 0:
- first = first - 0x100
- return first << 24 | ord(b[1]) << 16 | ord(b[2]) << 8 | ord(b[3])
+ if not unsigned:
+ if first & 0x80 != 0:
+ first = first - 0x100
+ return (first << 24 | ord(b[1]) << 16 | ord(b[2]) << 8 | ord(b[3]))
-def swapped_chrs2int(b):
+def swapped_chrs2int(b, unsigned):
assert len(b) == 4
first = ord(b[3]) # little endian
- if first & 0x80 != 0:
- first = first - 0x100
- return first << 24 | ord(b[2]) << 16 | ord(b[1]) << 8 | ord(b[0])
+ if not unsigned:
+ if first & 0x80 != 0:
+ first = first - 0x100
+ return (first << 24 | ord(b[2]) << 16 | ord(b[1]) << 8 | ord(b[0]))
+
+def chrs2long(b, unsigned):
+ assert len(b) == 8
+ first = ord(b[0]) # big endian
+ if not unsigned:
+ if first & 0x80 != 0:
+ first = first - 0x100
+ return ( first << 56 | ord(b[1]) << 48 | ord(b[2]) << 40 | ord(b[3]) << 32
+ | ord(b[4]) << 24 | ord(b[5]) << 16 | ord(b[6]) << 8 | ord(b[7]) )
+
+def swapped_chrs2long(b, unsigned):
+ assert len(b) == 8
+ first = ord(b[7]) # little endian
+ if not unsigned:
+ if first & 0x80 != 0:
+ first = first - 0x100
+ return ( first << 56 | ord(b[6]) << 48 | ord(b[5]) << 40 | ord(b[4]) << 32
+ | ord(b[3]) << 24 | ord(b[2]) << 16 | ord(b[1]) << 8 | ord(b[0]) )
# ____________________________________________________________
@@ -32,24 +52,51 @@
self.data = inputfile.read()
finally:
inputfile.close()
- self.swap = False
- self.pos = 0
- self.count = 0
+ self.reset()
def peek(self):
if self.pos >= len(self.data):
raise IndexError
- if self.swap:
- return swapped_chrs2int( self.data[self.pos:self.pos+4] )
+ data_peek = self.data[self.pos:self.pos + self.word_size]
+ if self.use_long_read:
+ if self.swap:
+ return swapped_chrs2long(data_peek, False)
+ else:
+ return chrs2long(data_peek, False)
else:
- return chrs2int( self.data[self.pos:self.pos+4] )
+ if self.swap:
+ return swapped_chrs2int(data_peek, False)
+ else:
+ return chrs2int(data_peek, False)
+
+ def peek_unsigned(self):
+ if self.pos >= len(self.data):
+ raise IndexError
+ data_peek = self.data[self.pos:self.pos + self.word_size]
+ if self.use_long_read:
+ if self.swap:
+ return swapped_chrs2long(data_peek, True)
+ else:
+ return chrs2long(data_peek, True)
+ else:
+ if self.swap:
+ return swapped_chrs2int(data_peek, True)
+ else:
+ return chrs2int(data_peek, True)
+
def next(self):
integer = self.peek()
- self.pos += 4
- self.count += 4
+ self.pos += self.word_size
+ self.count += self.word_size
return integer
+ def reset(self):
+ self.swap = False
+ self.pos = 0
+ self.count = 0
+ self.be_32bit()
+
def reset_count(self):
self.count = 0
@@ -59,25 +106,114 @@
self.pos += jump
self.count += jump
+ def skipwords(self, jump):
+ self.skipbytes(jump * self.word_size)
+ assert (self.pos + jump) <= len(self.data)
+ self.pos += jump
+ self.count += jump
+
+
+ def length(self):
+ return len(self.data)
+
def close(self):
pass # already closed
+ def be_64bit(self):
+ self.word_size = 8
+ self.use_long_read = True
+
+ def be_32bit(self):
+ self.word_size = 4
+ self.use_long_read = False
class CorruptImageError(Exception):
pass
+class UnsupportedImageError(Exception):
+ pass
+
# ____________________________________________________________
-# XXX hack to read Cog images.
-# TODO implement Cog float byte reversal
-SUPPORTED_VERSIONS = [6502, 6505]
+class ImageVersion(object):
+
+ def __init__(self, magic, is_big_endian, is_64bit, has_closures, has_floats_reversed):
+ self.magic = magic
+ self.is_big_endian = is_big_endian
+ self.is_64bit = is_64bit
+ self.has_closures = has_closures
+ self.has_floats_reversed = has_floats_reversed
+
+image_versions = {
+ 0x00001966: ImageVersion(6502, True, False, False, False),
+ 0x66190000: ImageVersion(6502, False, False, False, False),
+ 0x00001968: ImageVersion(6504, True, False, True, False),
+ 0x68190000: ImageVersion(6504, False, False, True, False),
+ 0x00001969: ImageVersion(6505, True, False, True, True ),
+ 0x69190000: ImageVersion(6505, False, False, True, True ),
+ 0x000109A0: ImageVersion(68000, True, True, False, False),
+ 0xA009010000000000: ImageVersion(68000, False, True, False, False),
+ 0x00000000000109A2: ImageVersion(68002, True, True, True, False),
+ 0xA209010000000000: ImageVersion(68002, False, True, True, False),
+ 0x00000000000109A3: ImageVersion(68003, True, True, True, True ),
+ 0xA309010000000000: ImageVersion(68003, False, True, True, True ),
+}
+
+
+def version(magic):
+ ver = image_versions.get(magic, None)
+ if ver is None:
+ raise CorruptImageError
+ # if ver.is_64bit or ver.has_floats_reversed:
+ # raise UnsupportedImageError
+ return ver
+
+possible_image_offset = 512
+
+def version_from_stream(stream):
+ # 32 bit
+ try:
+ return version(stream.peek_unsigned())
+ except CorruptImageError as e:
+ if stream.length() > possible_image_offset + 4:
+ stream.skipbytes(possible_image_offset)
+ try:
+ return version(stream.peek_unsigned())
+ except CorruptImageError:
+ pass # raise original error
+ # 64 bit
+ stream.reset()
+ stream.be_64bit()
+ try:
+ v = version(stream.peek_unsigned())
+ assert v.is_64bit
+ return v
+ except CorruptImageError as e:
+ if stream.length() > possible_image_offset + 4:
+ stream.skipbytes(possible_image_offset)
+ try:
+ v = version(stream.peek_unsigned())
+ assert v.is_64bit
+ return v
+ except CorruptImageError:
+ pass # raise original error
+ raise
+
+
+
+def reader_for_image(space, stream):
+ ver = version_from_stream(stream)
+ if not ver.is_big_endian:
+ stream.swap = True
+ return ImageReader(space, stream, ver)
class ImageReader(object):
- def __init__(self, space, stream):
+ def __init__(self, space, stream, version):
self.space = space
self.stream = stream
+ self.version = version
# dictionary mapping old address to chunk object
self.chunks = {}
self.chunklist = []
@@ -94,15 +230,13 @@
self.init_w_objects()
self.fillin_w_objects()
+ def read_version(self):
+ # 1 word version
+ magic = self.stream.next()
+ assert self.version.magic == magic
+
def read_header(self):
- # 1 word version
- version = self.stream.peek()
- if version not in SUPPORTED_VERSIONS:
- self.stream.swap = True
- version = self.stream.peek()
- if version not in SUPPORTED_VERSIONS:
- raise CorruptImageError
- version = self.stream.next()
+ self.read_version()
#------
# 1 word headersize
headersize = self.stream.next()
@@ -118,8 +252,7 @@
print "savedwindowssize", savedwindowssize
fullscreenflag = self.stream.next()
extravmmemory = self.stream.next()
- # we called 9 times next, 1 word = 4 byte
- self.stream.skipbytes(headersize - (9 * 4))
+ self.stream.skipbytes(headersize - self.stream.pos)
def read_body(self):
import sys
diff --git a/spyvm/test/test_miniimage.py b/spyvm/test/test_miniimage.py
--- a/spyvm/test/test_miniimage.py
+++ b/spyvm/test/test_miniimage.py
@@ -21,7 +21,7 @@
module.space = space
def open_miniimage(space):
- return squeakimage.ImageReader(space, squeakimage.Stream(mini_image.open()))
+ return squeakimage.reader_for_image(space, squeakimage.Stream(mini_image.open()))
def get_reader():
return reader
diff --git a/spyvm/test/test_squeakimage.py b/spyvm/test/test_squeakimage.py
--- a/spyvm/test/test_squeakimage.py
+++ b/spyvm/test/test_squeakimage.py
@@ -1,15 +1,16 @@
import py
from spyvm import squeakimage
-from spyvm.squeakimage import chrs2int
+from spyvm.squeakimage import chrs2int, chrs2long, swapped_chrs2long
from spyvm import objspace
+from struct import pack
+
space = objspace.ObjSpace()
# ----- helpers ----------------------------------------------
def ints2str(*ints):
- import struct
- return struct.pack(">" + "i" * len(ints), *ints)
+ return pack(">" + "i" * len(ints), *ints)
def joinbits(values, lengths):
result = 0
@@ -18,21 +19,37 @@
result += each
return result
-def imagereader_mock(string):
+def imagestream_mock(string):
import StringIO
f = StringIO.StringIO(string)
- stream = squeakimage.Stream(f)
- return squeakimage.ImageReader(space, stream)
+ return squeakimage.Stream(f)
+def imagereader_mock(string):
+ stream = imagestream_mock(string)
+ return squeakimage.reader_for_image(space, stream)
+
+
+SIMPLE_VERSION_HEADER = pack(">i", 6502)
+SIMPLE_VERSION_HEADER_LE = pack("<i", 6502)
# ----- tests ------------------------------------------------
def test_chrs2int():
- assert 1 == chrs2int('\x00\x00\x00\x01')
- assert -1 == chrs2int('\xFF\xFF\xFF\xFF')
+ assert 1 == chrs2int('\x00\x00\x00\x01', False)
+ assert -1 == chrs2int('\xFF\xFF\xFF\xFF', False)
+ assert 1 == chrs2int('\x00\x00\x00\x01', True)
+ assert 0xFFFFFFFF == chrs2int('\xFF\xFF\xFF\xFF', True)
+
+def test_chrs2long():
+ assert 1 == chrs2long('\x00\x00\x00\x00\x00\x00\x00\x01', False)
+ assert -1 == chrs2long('\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF', False)
+ assert 1 == chrs2long('\x00\x00\x00\x00\x00\x00\x00\x01', True)
+ assert 0xFFFFFFFFFFFFFFFF == chrs2long('\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF', True)
+ assert 68002 == chrs2long(pack(">Q", 68002), False)
+ assert 68002 == swapped_chrs2long(pack("<Q", 68002), False)
def test_stream():
- stream = imagereader_mock('\x00\x00\x19\x66').stream
+ stream = imagestream_mock(SIMPLE_VERSION_HEADER)
n = stream.peek()
assert n == 6502
n = stream.next()
@@ -40,14 +57,14 @@
py.test.raises(IndexError, lambda: stream.next())
def test_stream_swap():
- stream = imagereader_mock('\x66\x19\x00\x00').stream
+ stream = imagestream_mock('\x66\x19\x00\x00')
stream.swap = True
first = stream.next()
assert first == 6502
py.test.raises(IndexError, lambda: stream.next())
def test_stream_many():
- stream = imagereader_mock('\x00\x00\x19\x66' * 5).stream
+ stream = imagestream_mock(SIMPLE_VERSION_HEADER * 5)
for each in range(5):
first = stream.peek()
assert first == 6502
@@ -56,14 +73,14 @@
py.test.raises(IndexError, lambda: stream.next())
def test_stream_skipbytes():
- stream = imagereader_mock('\xFF\xFF\xFF\x00\x00\x19\x66').stream
+ stream = imagestream_mock('\xFF\xFF\xFF' + SIMPLE_VERSION_HEADER)
stream.skipbytes(3)
value = stream.next()
assert value == 6502
py.test.raises(IndexError, lambda: stream.next())
def test_stream_count():
- stream = imagereader_mock('\xFF' * 20).stream
+ stream = imagestream_mock('\xFF' * 20)
stream.next()
stream.next()
stream.reset_count()
@@ -85,41 +102,125 @@
def test_ints2str():
assert "\x00\x00\x00\x02" == ints2str(2)
- assert '\x00\x00\x19\x66\x00\x00\x00\x02' == ints2str(6502,2)
+ assert SIMPLE_VERSION_HEADER + '\x00\x00\x00\x02' == ints2str(6502,2)
def test_freeblock():
- r = imagereader_mock("\x00\x00\x00\x02")
+ r = imagereader_mock(SIMPLE_VERSION_HEADER + "\x00\x00\x00\x02")
+ r.read_version()
py.test.raises(squeakimage.CorruptImageError, lambda: r.read_object())
def test_1wordobjectheader():
s = ints2str(joinbits([3, 1, 2, 3, 4], [2,6,4,5,12]))
- r = imagereader_mock(s)
- assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0) == r.read_1wordobjectheader()
+ r = imagereader_mock(SIMPLE_VERSION_HEADER + s)
+ r.read_version()
+ l = len(SIMPLE_VERSION_HEADER)
+ assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0 + l) == r.read_1wordobjectheader()
def test_1wordobjectheader2():
s = ints2str(joinbits([3, 1, 2, 3, 4], [2,6,4,5,12]))
- r = imagereader_mock(s * 3)
- assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0) == r.read_1wordobjectheader()
- assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 4) == r.read_1wordobjectheader()
- assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 8) == r.read_1wordobjectheader()
+ r = imagereader_mock(SIMPLE_VERSION_HEADER + (s * 3))
+ r.read_version()
+ l = len(SIMPLE_VERSION_HEADER)
+ assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0 + l) == r.read_1wordobjectheader()
+ assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 4 + l) == r.read_1wordobjectheader()
+ assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 8 + l) == r.read_1wordobjectheader()
def test_2wordobjectheader():
s = ints2str(4200 + 1, joinbits([1, 1, 2, 3, 4], [2,6,4,5,12]))
- r = imagereader_mock(s)
- assert (squeakimage.ImageChunk(space, 1, 2, 4200, 4), 4) == r.read_2wordobjectheader()
+ r = imagereader_mock(SIMPLE_VERSION_HEADER + s)
+ r.read_version()
+ l = len(SIMPLE_VERSION_HEADER)
+ assert (squeakimage.ImageChunk(space, 1, 2, 4200, 4), 4 + l) == r.read_2wordobjectheader()
def test_3wordobjectheader():
s = ints2str(1701 << 2, 4200 + 0, joinbits([0, 1, 2, 3, 4], [2,6,4,5,12]))
- r = imagereader_mock(s)
- assert (squeakimage.ImageChunk(space, 1701, 2, 4200, 4), 8) == r.read_3wordobjectheader()
+ r = imagereader_mock(SIMPLE_VERSION_HEADER + s)
+ r.read_version()
+ l = len(SIMPLE_VERSION_HEADER)
+ assert (squeakimage.ImageChunk(space, 1701, 2, 4200, 4), 8 + l) == r.read_3wordobjectheader()
def test_read3wordheaderobject():
size = 42
s = ints2str(size << 2, 4200 + 0, joinbits([0, 1, 2, 3, 4], [2,6,4,5,12]))
- r = imagereader_mock(s + '\x00\x00\x19\x66' * (size - 1))
+ r = imagereader_mock(SIMPLE_VERSION_HEADER + s + SIMPLE_VERSION_HEADER * (size - 1))
+ r.read_version()
+ l = len(SIMPLE_VERSION_HEADER)
chunk, pos = r.read_object()
chunk0 = squeakimage.ImageChunk(space, size, 2, 4200, 4)
chunk0.data = [6502] * (size - 1)
- assert pos == 8
+ assert pos == 8 + l
assert chunk0 == chunk
+def test_simple_image():
+ word_size = 4
+ header_size = 16 * word_size
+
+ image_1 = (SIMPLE_VERSION_HEADER # 1
+ + pack(">i", header_size) # 2 64 byte header
+ + pack(">i", 0) # 3 no body
+ + pack(">i", 0) # 4 old base addresss unset
+ + pack(">i", 0) # 5 no spl objs array
+ + "\x12\x34\x56\x78" # 6 last hash
+ + pack(">h", 480) # 7 window 480 height
+ + pack(">h", 640) # window 640 width
+ + pack(">i", 0) # 8 not fullscreen
+ + pack(">i", 0) # 9 no extra memory
+ + ("\x00" * (header_size - (9 * word_size))))
+ r = imagereader_mock(image_1)
+ # does not raise
+ r.read_header()
+ assert r.stream.pos == len(image_1)
+
+ image_2 = (SIMPLE_VERSION_HEADER_LE # 1
+ + pack("<i", header_size) # 2 64 byte header
+ + pack("<i", 0) # 3 no body
+ + pack("<i", 0) # 4 old base addresss unset
+ + pack("<i", 0) # 5 no spl objs array
+ + "\x12\x34\x56\x78" # 6 last hash
+ + pack("<h", 480) # 7 window 480 height
+ + pack("<h", 640) # window 640 width
+ + pack("<i", 0) # 8 not fullscreen
+ + pack("<i", 0) # 9 no extra memory
+ + ("\x00" * (header_size - (9 * word_size))))
+ r = imagereader_mock(image_2)
+ # does not raise
+ r.read_header()
+ assert r.stream.pos == len(image_2)
+
+def test_simple_image64():
+ word_size = 8
+ header_size = 16 * word_size
+
+ image_1 = (pack(">Q", 68002) # 1 version
+ + pack(">q", header_size) # 2 64 byte header
+ + pack(">q", 0) # 3 no body
+ + pack(">q", 0) # 4 old base addresss unset
+ + pack(">q", 0) # 5 no spl objs array
+ + ("\x12\x34\x56\x78" * 2)# 6 last hash
+ + pack(">H", 480) # 7 window 480 height
+ + pack(">H", 640) # window 640 width
+ + pack(">i", 0) # pad
+ + pack(">q", 0) # 8 not fullscreen
+ + pack(">q", 0) # 9 no extra memory
+ + ("\x00" * (header_size - (9 * word_size))))
+ r = imagereader_mock(image_1)
+ # does not raise
+ r.read_header()
+ assert r.stream.pos == len(image_1)
+
+ image_2 = (pack("<Q", 68002) # 1 version
+ + pack("<q", header_size) # 2 64 byte header
+ + pack("<q", 0) # 3 no body
+ + pack("<q", 0) # 4 old base addresss unset
+ + pack("<q", 0) # 5 no spl objs array
+ + ("\x12\x34\x56\x78" * 2)# 6 last hash
+ + pack("<H", 480) # 7 window 480 height
+ + pack("<H", 640) # window 640 width
+ + pack(">i", 0) # pad
+ + pack(">q", 0) # 8 not fullscreen
+ + pack("<q", 0) # 9 no extra memory
+ + ("\x00" * (header_size - (9 * word_size))))
+ r = imagereader_mock(image_2)
+ # does not raise
+ r.read_header()
+ assert r.stream.pos == len(image_2)
diff --git a/spyvm/tool/analyseimage.py b/spyvm/tool/analyseimage.py
--- a/spyvm/tool/analyseimage.py
+++ b/spyvm/tool/analyseimage.py
@@ -11,10 +11,10 @@
minitest_image = image_dir.join('minitest.image')
def get_miniimage(space):
- return squeakimage.ImageReader(space, squeakimage.Stream(mini_image.open()))
+ return squeakimage.reader_for_image(space, squeakimage.Stream(mini_image.open()))
def get_minitestimage(space):
- return squeakimage.ImageReader(space, squeakimage.Stream(minitest_image.open()))
+ return squeakimage.reader_for_image(space, squeakimage.Stream(minitest_image.open()))
def create_image(space, image_reader):
image_reader.initialize()
diff --git a/targetimageloadingsmalltalk.py b/targetimageloadingsmalltalk.py
--- a/targetimageloadingsmalltalk.py
+++ b/targetimageloadingsmalltalk.py
@@ -45,7 +45,7 @@
else:
print "usage:", argv[0], "<image name>"
return -1
- reader = squeakimage.ImageReader(space, squeakimage.Stream(DummyFile(filename)))
+ reader = squeakimage.reader_for_image(space, squeakimage.Stream(DummyFile(filename)))
reader.initialize()
image = squeakimage.SqueakImage()
image.from_reader(space, reader)
More information about the pypy-commit
mailing list