[pypy-commit] pypy faster-rstruct: move str_storage_getitem into its own module, as it's no longer based on rawstorage. Refactor, and re-implement based on casting. Add JIT support for force_cast between GC objects and the specific case of getinteriorfield we need. Add enough support to the llgraph backend to run the tests

antocuni noreply at buildbot.pypy.org
Wed Nov 18 02:17:24 EST 2015


Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: faster-rstruct
Changeset: r80736:2294ad1228f5
Date: 2015-11-18 07:21 +0100
http://bitbucket.org/pypy/pypy/changeset/2294ad1228f5/

Log:	move str_storage_getitem into its own module, as it's no longer
	based on rawstorage. Refactor, and re-implement based on casting.
	Add JIT support for force_cast between GC objects and the specific
	case of getinteriorfield we need. Add enough support to the llgraph
	backend to run the tests

diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -638,9 +638,18 @@
         return array.getlength()
 
     def bh_getarrayitem_gc(self, a, index, descr):
-        a = support.cast_arg(lltype.Ptr(descr.A), a)
+        assert index >= 0
+        if descr.A is descr.OUTERA:
+            a = support.cast_arg(lltype.Ptr(descr.A), a)
+        else:
+            # we use rffi.cast instead of support.cast_arg because the types
+            # might not be "compatible" enough from the lltype point of
+            # view. In particular, this happens when we use
+            # str_storage_getitem, in which an rpy_string is casted to
+            # rpy_string_as_Signed (or similar)
+            a = rffi.cast(lltype.Ptr(descr.OUTERA), a)
+            a = getattr(a, descr.OUTERA._arrayfld)
         array = a._obj
-        assert index >= 0
         return support.cast_result(descr.A.OF, array.getitem(index))
 
     bh_getarrayitem_gc_pure_i = bh_getarrayitem_gc
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -1008,12 +1008,11 @@
             return SpaceOperation('getarrayitem_gc_i',
                                   [op.args[0], v_index, bytearraydescr],
                                   op.result)
-        else:
+        elif op.result.concretetype is lltype.Void:
+            return
+        elif isinstance(op.args[0].concretetype.TO, lltype.GcArray):
+            # special-case 1: GcArray of Struct
             v_inst, v_index, c_field = op.args
-            if op.result.concretetype is lltype.Void:
-                return
-            # only GcArray of Struct supported
-            assert isinstance(v_inst.concretetype.TO, lltype.GcArray)
             STRUCT = v_inst.concretetype.TO.OF
             assert isinstance(STRUCT, lltype.Struct)
             descr = self.cpu.interiorfielddescrof(v_inst.concretetype.TO,
@@ -1022,6 +1021,16 @@
             kind = getkind(op.result.concretetype)[0]
             return SpaceOperation('getinteriorfield_gc_%s' % kind, args,
                                   op.result)
+        elif isinstance(op.args[0].concretetype.TO, lltype.GcStruct):
+            # special-case 2: GcStruct with Array field
+            v_inst, c_field, v_index = op.args
+            STRUCT = v_inst.concretetype.TO
+            ARRAY = getattr(STRUCT, c_field.value)
+            assert isinstance(ARRAY, lltype.Array)
+            arraydescr = self.cpu.arraydescrof(STRUCT)
+            return SpaceOperation('getarrayitem_gc_i',
+                                  [op.args[0], v_index, arraydescr],
+                                  op.result)
 
     def rewrite_op_setinteriorfield(self, op):
         assert len(op.args) == 4
@@ -1130,10 +1139,13 @@
     def rewrite_op_force_cast(self, op):
         v_arg = op.args[0]
         v_result = op.result
-        assert not self._is_gc(v_arg)
-
         if v_arg.concretetype == v_result.concretetype:
             return
+        elif self._is_gc(v_arg) and self._is_gc(v_result):
+            # cast from GC to GC is always fine
+            return
+        else:
+            assert not self._is_gc(v_arg)
 
         float_arg = v_arg.concretetype in [lltype.Float, lltype.SingleFloat]
         float_res = v_result.concretetype in [lltype.Float, lltype.SingleFloat]
diff --git a/rpython/jit/metainterp/test/test_rawmem.py b/rpython/jit/metainterp/test/test_rawmem.py
--- a/rpython/jit/metainterp/test/test_rawmem.py
+++ b/rpython/jit/metainterp/test/test_rawmem.py
@@ -108,18 +108,17 @@
 
     def test_str_storage_int(self):
         import struct
-        data = struct.pack('q', 42)
+        data = struct.pack('qq', 42, 100)
         def f():
-            res = str_storage_getitem(lltype.Signed, data, 0)
-            return res
+            a = str_storage_getitem(lltype.Signed, data, 0)
+            b = str_storage_getitem(lltype.Signed, data, 8)
+            return a+b
         res = self.interp_operations(f, [])
-        assert res == 42
-        import pdb;pdb.set_trace()
-        self.check_operations_history({'call_i': 1, 'guard_no_exception': 1,
-                                       'call_n': 1,
-                                       'raw_store': 1, 'raw_load_i': 1,
+        assert res == 142
+        self.check_operations_history({'getarrayitem_gc_i': 2,
+                                       'int_add': 1,
                                        'finish': 1})
-        self.metainterp.staticdata.stats.check_resops({'finish': 1}, omit_finish=False)
+
 
 
 class TestRawMem(RawMemTests, LLJitMixin):
diff --git a/rpython/rlib/rawstorage.py b/rpython/rlib/rawstorage.py
--- a/rpython/rlib/rawstorage.py
+++ b/rpython/rlib/rawstorage.py
@@ -6,8 +6,7 @@
 from rpython.rlib import rgc
 from rpython.rlib.rgc import lltype_is_gc
 from rpython.rlib.objectmodel import specialize
-from rpython.rtyper.lltypesystem.rstr import STR, _get_raw_str_buf
-from rpython.rtyper.annlowlevel import llstr
+from rpython.rlib.strstorage import str_storage_getitem
 
 RAW_STORAGE = rffi.CCHARP.TO
 RAW_STORAGE_PTR = rffi.CCHARP
@@ -44,30 +43,6 @@
     lltype.free(storage, flavor='raw', track_allocation=track_allocation)
 
 
- at rgc.no_collect
- at specialize.ll()
-def str_storage_getitem(TP, s, index):
-    lls = llstr(s)
-    # from here, no GC operations can happen
-    buf = _get_raw_str_buf(STR, lls, 0)
-    storage = rffi.cast(RAW_STORAGE_PTR, buf)
-    res = raw_storage_getitem(TP, storage, index)
-    # end of "no GC" section
-    keepalive_until_here(lls)
-    return res
-
- at rgc.no_collect
- at specialize.ll()
-def str_storage_getitem_unaligned(TP, s, index):
-    lls = llstr(s)
-    # from here, no GC operations can happen
-    buf = _get_raw_str_buf(STR, lls, 0)
-    storage = rffi.cast(RAW_STORAGE_PTR, buf)
-    res = raw_storage_getitem_unaligned(TP, storage, index)
-    # end of "no GC" section
-    keepalive_until_here(lls)
-    return res
-
 # ____________________________________________________________
 #
 # Support for possibly-unaligned accesses
diff --git a/rpython/rlib/strstorage.py b/rpython/rlib/strstorage.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/strstorage.py
@@ -0,0 +1,55 @@
+# Support for str_storage: i.e., reading primitive types out of RPython string
+#
+# There are various possible ways to implement it, however not all of them are
+# easily supported by the JIT:
+#
+#   1. use _get_raw_str_buf and cast the chars buffer to RAW_STORAGE_PTR: this
+#      works well without the JIT, but the cast to RAW_STORAGE_PTR needs to
+#      happen inside a short "no GC" section (like the one in
+#      rstr.py:copy_string_contents), which has no chance to work during
+#      tracing
+#
+#   2. use llop.raw_load: despite the name, llop.raw_load DOES support reading
+#      from GC pointers. However:
+#
+#        a. we would like to use a CompositeOffset as the offset (using the
+#           same logic as in rstr.py:_get_raw_str_buf), but this is not (yet)
+#           supported before translation: it works only if you pass an actual
+#           integer
+#
+#        b. raw_load from a GC pointer is not (yet) supported by the
+#           JIT. There are plans to introduce a gc_load operation: when it
+#           will be there, we could fix the issue above and actually use it to
+#           implement str_storage_getitem
+#
+#   3. the actual solution: cast rpy_string to a GcStruct which has the very
+#      same layout, with the only difference that its 'chars' field is no
+#      longer an Array(Char) but e.e. an Array(Signed). Then, we just need to
+#      read the appropriate index into the array
+
+from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
+from rpython.rtyper.lltypesystem.rstr import STR, _get_raw_str_buf
+from rpython.rtyper.annlowlevel import llstr
+from rpython.rlib.objectmodel import specialize
+from rpython.rlib import rgc
+
+ at specialize.memo()
+def rpy_string_as_type(TP):
+    # sanity check that STR is actually what we think it is
+    assert STR._flds == {
+        'hash': lltype.Signed,
+        'chars': lltype.Array(lltype.Char, hints={'immutable': True})
+        }
+    STR_AS_TP = lltype.GcStruct('rpy_string_as_%s' % TP,
+                                ('hash',  lltype.Signed),
+                                ('chars', lltype.Array(TP, hints={'immutable': True})))
+    return STR_AS_TP
+
+ at rgc.no_collect
+ at specialize.ll()
+def str_storage_getitem(TP, s, index):
+    STR_AS_TP = rpy_string_as_type(TP)
+    lls = llstr(s)
+    str_as_tp = rffi.cast(lltype.Ptr(STR_AS_TP), lls)
+    index = index / rffi.sizeof(TP)
+    return str_as_tp.chars[index]
diff --git a/rpython/rlib/test/test_rawstorage.py b/rpython/rlib/test/test_rawstorage.py
--- a/rpython/rlib/test/test_rawstorage.py
+++ b/rpython/rlib/test/test_rawstorage.py
@@ -5,7 +5,7 @@
 from rpython.rlib.rawstorage import alloc_raw_storage, free_raw_storage,\
      raw_storage_setitem, raw_storage_getitem, AlignmentError,\
      raw_storage_setitem_unaligned, raw_storage_getitem_unaligned,\
-     str_storage_getitem, str_storage_getitem_unaligned
+     str_storage_getitem
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.translator.c.test.test_genc import compile
 
@@ -44,16 +44,6 @@
     res = str_storage_getitem(lltype.Float, buf, size*2)
     assert res == 123.0
 
-def test_untranslated_str_storage_unaligned(monkeypatch):
-    import struct
-    monkeypatch.setattr(rawstorage, 'misaligned_is_fine', False)
-    buf = 'foo' + struct.pack('@ll', 42, 43)
-    size = struct.calcsize('@l')
-    res = str_storage_getitem_unaligned(lltype.Signed, buf, 3)
-    assert res == 42
-    res = str_storage_getitem_unaligned(lltype.Signed, buf, size+3)
-    assert res == 43
-
 class TestRawStorage(BaseRtypingTest):
 
     def test_storage_int(self):


More information about the pypy-commit mailing list