[pypy-commit] lang-smalltalk storage-cleanups: More cleanups in squeakimage.py and stream.py

anton_gulenko noreply at buildbot.pypy.org
Wed Aug 6 13:38:35 CEST 2014


Author: Anton Gulenko <anton.gulenko at googlemail.com>
Branch: storage-cleanups
Changeset: r1020:2bf5b77ee6b7
Date: 2014-08-06 13:07 +0200
http://bitbucket.org/pypy/lang-smalltalk/changeset/2bf5b77ee6b7/

Log:	More cleanups in squeakimage.py and stream.py

diff --git a/spyvm/error.py b/spyvm/error.py
--- a/spyvm/error.py
+++ b/spyvm/error.py
@@ -42,3 +42,6 @@
     _attrs_ = ["msg"]
     def __init__(self, msg):
         self.msg = msg
+
+class CorruptImageError(Exit):
+    pass
diff --git a/spyvm/primitives.py b/spyvm/primitives.py
--- a/spyvm/primitives.py
+++ b/spyvm/primitives.py
@@ -711,7 +711,7 @@
     height = interp.space.unwrap_int(w_rcvr.fetch(interp.space, 2))
     depth = interp.space.unwrap_int(w_rcvr.fetch(interp.space, 3))
     hotpt = wrapper.PointWrapper(interp.space, w_rcvr.fetch(interp.space, 4))
-    if not interp.image.is_modern:
+    if not interp.image.version.is_modern:
         display.SDLCursor.set(
             w_bitmap.words,
             width,
diff --git a/spyvm/squeakimage.py b/spyvm/squeakimage.py
--- a/spyvm/squeakimage.py
+++ b/spyvm/squeakimage.py
@@ -1,5 +1,5 @@
 import os, time
-from spyvm import constants, model, util
+from spyvm import constants, model, util, error
 from spyvm.util import stream
 from spyvm.util.bitmanipulation import splitter
 from rpython.rlib import objectmodel
@@ -19,20 +19,22 @@
 # The image data can optionally start after this fixed offset.
 POSSIBLE_IMAGE_OFFSET = 512
 
-class CorruptImageError(Exception):
-    pass
-
-class UnsupportedImageError(Exception):
-    pass
-
 class ImageVersion(object):
-
+    
     def __init__(self, magic, is_big_endian, is_64bit, has_closures, has_floats_reversed):
         self.magic = magic
         self.is_big_endian = is_big_endian
         self.is_64bit = is_64bit
         self.has_closures = has_closures
         self.has_floats_reversed = has_floats_reversed
+        self.is_modern = magic > 6502
+    
+    def configure_stream(self, stream):
+        stream.big_endian = self.is_big_endian
+        if self.is_64bit:
+            stream.be_64bit()
+        else:
+            stream.be_32bit()
 
 image_versions = {
     0x00001966:         ImageVersion(6502,  True,  False, False, False),
@@ -51,91 +53,71 @@
     -0x5cf6ff0000000000:ImageVersion(68003, False, True,  True,  True ), # 0xA309010000000000
 }
 
-def version(magic):
-    ver = image_versions.get(magic, None)
-    if ver is None:
-        raise CorruptImageError
-    # if ver.is_64bit or ver.has_floats_reversed:
-    #     raise UnsupportedImageError
-    return ver
-
-def version_from_stream(stream):
-    # 32 bit
-    try:
-        return version(stream.peek())
-    except CorruptImageError as e:
-        if stream.length() > POSSIBLE_IMAGE_OFFSET + 4:
-            stream.skipbytes(POSSIBLE_IMAGE_OFFSET)
-            try:
-                return version(stream.peek())
-            except CorruptImageError:
-                pass # raise original error
-        # 64 bit
-        stream.reset()
-        stream.be_64bit()
-        try:
-            v = version(stream.peek())
-            assert v.is_64bit
-            return v
-        except CorruptImageError as e:
-            if stream.length() > POSSIBLE_IMAGE_OFFSET + 4:
-                stream.skipbytes(POSSIBLE_IMAGE_OFFSET)
-                try:
-                    v = version(stream.peek())
-                    assert v.is_64bit
-                    return v
-                except CorruptImageError:
-                    pass # raise original error
-        raise
-
-
 # ____________________________________________________________
 #
 # Parser classes for Squeak image format.
 
-def reader_for_image(space, stream):
-    ver = version_from_stream(stream)
-    if not ver.is_big_endian:
-        stream.swap = True
-    return ImageReader(space, stream, ver)
-
-def parse_image(space, stream):
-    image_reader = reader_for_image(space, stream)
-    image_reader.read_all()
-    return SqueakImage(space, image_reader)
-
 class ImageReader(object):
-
-    def __init__(self, space, stream, version):
+    
+    _attrs_ = [ "space", "stream", "version",
+        "chunks", # Dictionary mapping old address to chunk object
+        "chunklist", # Flat list of all read chunks
+        "intcache", # Cached instances of SmallInteger
+        "lastWindowSize"
+    ]
+    
+    def __init__(self, space, stream):
         self.space = space
         self.stream = stream
-        self.version = version
-        self.is_modern = self.version.magic > 6502
-        # dictionary mapping old address to chunk object
+        self.version = None
         self.chunks = {}
         self.chunklist = []
-        # cache wrapper integers
         self.intcache = {}
-
         self.lastWindowSize = 0
-
+    
+    def create_image(self):
+        self.read_all()
+        return SqueakImage(self)
+    
+    def log_progress(self, progress, char):
+        if progress % 1000 == 0:
+            os.write(2, char)
+    
     def read_all(self):
         self.read_header()
         self.read_body()
         self.init_compactclassesarray()
-        # until here, the chunks are generated
+        # All chunks are read, now convert them to real objects.
         self.init_g_objects()
+        self.assign_prebuilt_constants()
         self.init_w_objects()
         self.fillin_w_objects()
 
+    def try_read_version(self):
+        version = image_versions.get(self.stream.next(), None)
+        if version:
+            return version
+        self.stream.reset()
+        if self.stream.length() > POSSIBLE_IMAGE_OFFSET + 4:
+            self.stream.skipbytes(POSSIBLE_IMAGE_OFFSET)
+            version = image_versions.get(self.stream.next(), None)
+            if not version:
+                self.stream.reset()
+            return version
+
     def read_version(self):
-        # 1 word version
-        magic = self.stream.next()
-        assert self.version.magic == magic
-
+        version = self.try_read_version()
+        if not version:
+            # Try 64 bit
+            self.stream.be_64bit()
+            version = self.try_read_version()
+            if not version:
+                raise error.CorruptImageError("Illegal version magic.")
+        version.configure_stream(self.stream)
+        self.version = version
+    
     def read_header(self):
         self.read_version()
-        #------
         # 1 word headersize
         headersize = self.stream.next()
         # 1 word size of the full image
@@ -146,82 +128,21 @@
         self.specialobjectspointer = self.stream.next()
         # 1 word last used hash
         lasthash = self.stream.next()
-        self.lastWindowSize = savedwindowssize = self.stream.next()
-        # print "savedwindowssize: ", savedwindowssize >> 16, "@", savedwindowssize & 0xffff
+        self.lastWindowSize = self.stream.next()
         fullscreenflag = self.stream.next()
         extravmmemory = self.stream.next()
         self.stream.skipbytes(headersize - self.stream.pos)
-
+    
     def read_body(self):
         self.stream.reset_count()
         while self.stream.count < self.endofmemory:
             chunk, pos = self.read_object()
-            if len(self.chunklist) % 1000 == 0: os.write(2,'#')
+            self.log_progress(len(self.chunklist), '#')
             self.chunklist.append(chunk)
             self.chunks[pos + self.oldbaseaddress] = chunk
         self.stream.close()
-        self.swap = self.stream.swap #save for later
-        self.stream = None
         return self.chunklist # return for testing
 
-    def init_g_objects(self):
-        for chunk in self.chunks.itervalues():
-            chunk.as_g_object(self) # initialized g_object
-
-    def init_w_objects(self):
-        self.assign_prebuilt_constants()
-        for chunk in self.chunks.itervalues():
-            chunk.g_object.init_w_object()
-
-    def assign_prebuilt_constants(self):
-        # Assign classes and objects that in special objects array that are already created.
-        self._assign_prebuilt_constants(constants.objects_in_special_object_table, self.space.objtable)
-        if not self.is_modern:
-            classtable = {}
-            for name, so_index in self.space.classtable.items():
-                # In non-modern images (pre 4.0), there was no BlockClosure class.
-                if not name == "BlockClosure":
-                    classtable[name] = so_index
-        else:
-            classtable = self.space.classtable
-        self._assign_prebuilt_constants(constants.classes_in_special_object_table, classtable)
-
-    def _assign_prebuilt_constants(self, names_and_indices, prebuilt_objects):
-        for name, so_index in names_and_indices.items():
-            name = "w_" + name
-            if name in prebuilt_objects:
-                w_object = prebuilt_objects[name]
-                if self.special_object(so_index).w_object is None:
-                    self.special_object(so_index).w_object = w_object
-                else:
-                    if not self.special_object(0).w_object.is_nil(self.space):
-                       raise Warning('Object found in multiple places in the special objects array')
-    
-    def special_object(self, index):
-        special = self.chunks[self.specialobjectspointer].g_object.pointers
-        return special[index]
-
-    def fillin_w_objects(self):
-        self.filledin_objects = 0
-        for chunk in self.chunks.itervalues():
-            chunk.g_object.fillin(self.space)
-
-    def print_object_filledin(self):
-        self.filledin_objects = self.filledin_objects + 1
-        if self.filledin_objects % 1000 == 0:
-            os.write(2,'%')
-    
-    def init_compactclassesarray(self):
-        """ from the blue book (CompiledMethod Symbol Array PseudoContext LargePositiveInteger nil MethodDictionary Association Point Rectangle nil TranslatedMethod BlockContext MethodContext nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil ) """
-        special = self.chunks[self.specialobjectspointer]
-        assert special.size > 24 #at least
-        assert special.format == 2
-        # squeak-specific: compact classes array
-        chunk = self.chunks[special.data[COMPACT_CLASSES_ARRAY]]
-        assert len(chunk.data) == 31
-        assert chunk.format == 2
-        self.compactclasses = [self.chunks[pointer] for pointer in chunk.data]
-
     def read_object(self):
         kind = self.stream.peek() & 3 # 2 bits
         if kind == 0: # 00 bits
@@ -231,7 +152,7 @@
         elif kind == 3: # 11 bits
             chunk, pos = self.read_1wordobjectheader()
         else: # 10 bits
-            raise CorruptImageError("Unused block not allowed in image")
+            raise error.CorruptImageError("Unused block not allowed in image")
         size = chunk.size
         chunk.data = [self.stream.next()
                      for _ in range(size - 1)] #size-1, excluding header
@@ -258,15 +179,71 @@
         kind, _, format, _, idhash = splitter[2,6,4,5,12](self.stream.next())
         assert kind == 0
         return ImageChunk(self.space, size, format, classid, idhash), self.stream.count - 4
+    
+    def init_compactclassesarray(self):
+        """ from the blue book (CompiledMethod Symbol Array PseudoContext LargePositiveInteger nil MethodDictionary Association Point Rectangle nil TranslatedMethod BlockContext MethodContext nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil ) """
+        special = self.chunks[self.specialobjectspointer]
+        assert special.size > 24 #at least
+        assert special.format == 2
+        # squeak-specific: compact classes array
+        chunk = self.chunks[special.data[COMPACT_CLASSES_ARRAY]]
+        assert len(chunk.data) == 31
+        assert chunk.format == 2
+        self.compactclasses = [self.chunks[pointer] for pointer in chunk.data]
+    
+    def init_g_objects(self):
+        for chunk in self.chunks.itervalues():
+            chunk.as_g_object(self) # initialized g_object
+
+    def assign_prebuilt_constants(self):
+        # Assign classes and objects that in special objects array that are already created.
+        self._assign_prebuilt_constants(constants.objects_in_special_object_table, self.space.objtable)
+        if not self.version.is_modern:
+            classtable = {}
+            for name, so_index in self.space.classtable.items():
+                # In non-modern images (pre 4.0), there was no BlockClosure class.
+                if not name == "BlockClosure":
+                    classtable[name] = so_index
+        else:
+            classtable = self.space.classtable
+        self._assign_prebuilt_constants(constants.classes_in_special_object_table, classtable)
+
+    def _assign_prebuilt_constants(self, names_and_indices, prebuilt_objects):
+        for name, so_index in names_and_indices.items():
+            name = "w_" + name
+            if name in prebuilt_objects:
+                w_object = prebuilt_objects[name]
+                if self.special_object(so_index).w_object is None:
+                    self.special_object(so_index).w_object = w_object
+                else:
+                    if not self.special_object(0).w_object.is_nil(self.space):
+                       raise Warning('Object found in multiple places in the special objects array')
+    
+    def special_object(self, index):
+        special = self.chunks[self.specialobjectspointer].g_object.pointers
+        return special[index]
+    
+    def init_w_objects(self):
+        for chunk in self.chunks.itervalues():
+            chunk.g_object.init_w_object()
+
+    def fillin_w_objects(self):
+        self.filledin_objects = 0
+        for chunk in self.chunks.itervalues():
+            chunk.g_object.fillin(self.space)
+
+    def log_object_filledin(self):
+        self.filledin_objects = self.filledin_objects + 1
+        self.log_progress(self.filledin_objects, '%')
 
 
 # ____________________________________________________________
 
 class SqueakImage(object):
-    _immutable_fields_ = ["w_asSymbol", "w_simulateCopyBits", "version",
-                          "is_modern", "startup_time"]
+    _immutable_fields_ = ["w_asSymbol", "w_simulateCopyBits", "version", "startup_time"]
 
-    def __init__(self, space, reader):
+    def __init__(self, reader):
+        space = reader.space
         self.special_objects = [g_object.w_object for g_object in
                                 reader.chunks[reader.specialobjectspointer]
                                 .g_object.pointers]
@@ -275,7 +252,6 @@
         self.w_simulateCopyBits = self.find_symbol(space, reader, "simulateCopyBits")
         self.lastWindowSize = reader.lastWindowSize
         self.version = reader.version
-        self.is_modern = reader.is_modern
         self.run_spy_hacks(space)
         self.startup_time = time.time()
 
@@ -423,7 +399,7 @@
             if self.ispointers():
                 self.w_object = objectmodel.instantiate(model.W_PointersObject)
             elif self.format == 5:
-                raise CorruptImageError("Unknown format 5")
+                raise error.CorruptImageError("Unknown format 5")
             elif self.isfloat():
                 self.w_object = objectmodel.instantiate(model.W_Float)
             elif self.is32bitlargepositiveinteger():
@@ -431,7 +407,7 @@
             elif self.iswords():
                 self.w_object = objectmodel.instantiate(model.W_WordsObject)
             elif self.format == 7:
-                raise CorruptImageError("Unknown format 7, no 64-bit support yet :-)")
+                raise error.CorruptImageError("Unknown format 7, no 64-bit support yet :-)")
             elif self.isbytes():
                 self.w_object = objectmodel.instantiate(model.W_BytesObject)
             elif self.iscompiledmethod():
@@ -442,18 +418,18 @@
 
     def get_bytes(self):
         bytes = []
-        if self.reader.swap:
+        if self.reader.version.is_big_endian:
+            for each in self.chunk.data:
+                bytes.append(chr((each >> 24) & 0xff))
+                bytes.append(chr((each >> 16) & 0xff))
+                bytes.append(chr((each >> 8) & 0xff))
+                bytes.append(chr((each >> 0) & 0xff))
+        else:
             for each in self.chunk.data:
                 bytes.append(chr((each >> 0) & 0xff))
                 bytes.append(chr((each >> 8) & 0xff))
                 bytes.append(chr((each >> 16) & 0xff))
                 bytes.append(chr((each >> 24) & 0xff))
-        else:
-            for each in self.chunk.data:
-                bytes.append(chr((each >> 24) & 0xff))
-                bytes.append(chr((each >> 16) & 0xff))
-                bytes.append(chr((each >> 8) & 0xff))
-                bytes.append(chr((each >> 0) & 0xff))
         stop = len(bytes) - (self.format & 3)
         assert stop >= 0
         return bytes[:stop] # omit odd bytes
@@ -462,14 +438,14 @@
         from rpython.rlib.rarithmetic import r_uint
         words = [r_uint(x) for x in self.chunk.data]
         if required_len != -1 and len(words) != required_len:
-            raise CorruptImageError("Expected %d words, got %d" % (required_len, len(words)))
+            raise error.CorruptImageError("Expected %d words, got %d" % (required_len, len(words)))
         return words
 
     def fillin(self, space):
         if not self.filled_in:
             self.filled_in = True
             self.w_object.fillin(space, self)
-            self.reader.print_object_filledin()
+            self.reader.log_object_filledin()
         
     def get_g_pointers(self):
         assert self.pointers is not None
diff --git a/spyvm/test/test_squeakimage.py b/spyvm/test/test_squeakimage.py
--- a/spyvm/test/test_squeakimage.py
+++ b/spyvm/test/test_squeakimage.py
@@ -1,8 +1,7 @@
 import py, StringIO, sys
 from struct import pack
-from spyvm import squeakimage
+from spyvm import squeakimage, error
 from spyvm.util.stream import chrs2int, chrs2long, swapped_chrs2long
-from spyvm import objspace
 from .util import create_space, copy_to_module, cleanup_module
 
 def setup_module():
@@ -30,7 +29,7 @@
 
 def imagereader_mock(string):
     stream = imagestream_mock(string)
-    return squeakimage.reader_for_image(space, stream)
+    return squeakimage.ImageReader(space, stream)
 
 SIMPLE_VERSION_HEADER = pack(">i", 6502)
 SIMPLE_VERSION_HEADER_LE = pack("<i", 6502)
@@ -55,9 +54,9 @@
     assert n == 6502 
     py.test.raises(IndexError, lambda: stream.next())
     
-def test_stream_swap():
+def test_stream_little_endian():
     stream = imagestream_mock('\x66\x19\x00\x00')
-    stream.swap = True
+    stream.big_endian = False
     first = stream.next()
     assert first == 6502 
     py.test.raises(IndexError, lambda: stream.next())
@@ -106,7 +105,7 @@
 def test_freeblock():
     r = imagereader_mock(SIMPLE_VERSION_HEADER + "\x00\x00\x00\x02")
     r.read_version()
-    py.test.raises(squeakimage.CorruptImageError, lambda: r.read_object())
+    py.test.raises(error.CorruptImageError, lambda: r.read_object())
 
 def test_1wordobjectheader():
     s = ints2str(joinbits([3, 1, 2, 3, 4], [2,6,4,5,12]))
diff --git a/spyvm/test/util.py b/spyvm/test/util.py
--- a/spyvm/test/util.py
+++ b/spyvm/test/util.py
@@ -8,15 +8,16 @@
 
 image_dir = py.path.local(__file__).dirpath().dirpath().dirpath('images')
 
+def image_stream(imagefilename):
+    return squeakimage.Stream(filename=str(image_dir.join(imagefilename).strpath))
+
 def open_reader(space, imagefilename):
-    stream = squeakimage.Stream(filename=str(image_dir.join(imagefilename).strpath))
-    return squeakimage.reader_for_image(space, stream)
+    return squeakimage.ImageReader(space, image_stream(imagefilename))
 
 def read_image(image_filename, bootstrap = bootstrap_by_default):
     space = create_space(bootstrap)
     reader = open_reader(space, image_filename)
-    reader.read_all()
-    image = squeakimage.SqueakImage(space, reader)
+    image = reader.create_image()
     interp = TestInterpreter(space, image)
     return space, interp, image, reader
 
diff --git a/spyvm/util/stream.py b/spyvm/util/stream.py
--- a/spyvm/util/stream.py
+++ b/spyvm/util/stream.py
@@ -59,15 +59,15 @@
             raise IndexError
         data_peek = self.data[self.pos:self.pos + self.word_size]
         if self.use_long_read:
-            if self.swap:
+            if self.big_endian:
+                return chrs2long(data_peek)
+            else:
                 return swapped_chrs2long(data_peek)
+        else:
+            if self.big_endian:
+                return chrs2int(data_peek)
             else:
-                return chrs2long(data_peek)
-        else:
-            if self.swap:
                 return swapped_chrs2int(data_peek)
-            else:
-                return chrs2int(data_peek)
 
     def next(self):
         integer = self.peek()
@@ -76,7 +76,7 @@
         return integer
 
     def reset(self):
-        self.swap = False
+        self.big_endian = True
         self.pos = 0
         self.count = 0
         self.be_32bit()
diff --git a/targetimageloadingsmalltalk.py b/targetimageloadingsmalltalk.py
--- a/targetimageloadingsmalltalk.py
+++ b/targetimageloadingsmalltalk.py
@@ -159,7 +159,7 @@
         return 1
     
     # Load & prepare image and environment
-    image = squeakimage.parse_image(space, stream)
+    image = squeakimage.ImageReader(space, stream).create_image()
     interp = interpreter.Interpreter(space, image,
                 trace=trace, trace_important=trace_important,
                 evented=not poll, interrupts=interrupts)
diff --git a/targettinybenchsmalltalk.py b/targettinybenchsmalltalk.py
--- a/targettinybenchsmalltalk.py
+++ b/targettinybenchsmalltalk.py
@@ -17,7 +17,8 @@
 
 def setup():
     space = objspace.ObjSpace()
-    image = squeakimage.parse_image(space, Stream(filename=imagefile))
+    stream = squeakimage.Stream(filename=imagefile)
+    image = squeakimage.ImageReader(space, stream).create_image()
     interp = interpreter.Interpreter(space, image)
     w_selector = interp.perform(space.wrap_string("loopTest"), "asSymbol")
     w_object = model.W_SmallInteger(0)


More information about the pypy-commit mailing list