[pypy-svn] pypy numpy-exp: in-progress. Start implementing vector operations. It's a bit annoying because

Tue Mar 22 22:04:45 CET 2011

Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: numpy-exp
Changeset: r42850:e583fdf3b8b1
Date: 2011-03-22 15:03 -0600
http://bitbucket.org/pypy/pypy/changeset/e583fdf3b8b1/

Log:	in-progress. Start implementing vector operations. It's a bit
	annoying because we don't (can't?) have a type representing sse
	vector while not jitted so a bit of dance is required.

diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -15,6 +15,7 @@
 INT   = 'i'
 REF   = 'r'
 FLOAT = 'f'
+VECTOR = 'F'
 HOLE  = '_'
 VOID  = 'v'
 
@@ -508,6 +509,9 @@
     def forget_value(self):
         raise NotImplementedError
 
+class BoxVector(Box):
+    _attrs_ = ()
+
 class BoxInt(Box):
     type = INT
     _attrs_ = ('value',)

diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -161,7 +161,6 @@
     'force_token'     : ((), 'int'),
     'call_may_force'  : (('int', 'varargs'), 'intorptr'),
     'guard_not_forced': ((), None),
-    'sse_float_add'   : (('int', 'int', 'int', 'int'), None),
 }
 
 # ____________________________________________________________
@@ -736,12 +735,6 @@
 
     op_getarrayitem_raw_pure = op_getarrayitem_raw
 
-    def op_sse_float_add(self, arraydescr, array1, array2, arrayres,
-                         index):
-        from pypy.jit.metainterp.blackhole import BlackholeInterpreter
-        return BlackholeInterpreter.bhimpl_sse_float_add.im_func(self.cpu,
-               arraydescr, array1, array2, arrayres, index)
-
     def op_getfield_gc(self, fielddescr, struct):
         if fielddescr.typeinfo == REF:
             return do_getfield_gc_ptr(struct, fielddescr.ofs)

diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -394,11 +394,6 @@
     opimpl_getarrayitem_gc_r = _opimpl_getarrayitem_gc_any
     opimpl_getarrayitem_gc_f = _opimpl_getarrayitem_gc_any
 
-    @arguments("descr", "box", "box", "box", "box")
-    def opimpl_sse_float_add(self, arraydescr, array1, array2, arrayres, index):
-        return self.execute_with_descr(rop.SSE_FLOAT_ADD, arraydescr, array1,
-                                       array2, arrayres, index)
-
     @arguments("box", "descr", "box")
     def _opimpl_getarrayitem_raw_any(self, arraybox, arraydescr, indexbox):
         return self.execute_with_descr(rop.GETARRAYITEM_RAW,

diff --git a/pypy/jit/codewriter/test/test_jtransform.py b/pypy/jit/codewriter/test/test_jtransform.py
--- a/pypy/jit/codewriter/test/test_jtransform.py
+++ b/pypy/jit/codewriter/test/test_jtransform.py
@@ -947,3 +947,7 @@
     assert op1.args[1] == 'calldescr-%d' % effectinfo.EffectInfo.OS_ARRAYCOPY
     assert op1.args[2] == ListOfKind('int', [v3, v4, v5])
     assert op1.args[3] == ListOfKind('ref', [v1, v2])
+
+def test_vector_ops():
+    TP = lltype.Array(lltype.Float, hints={'nolength': True})
+    

diff --git a/pypy/rpython/lltypesystem/opimpl.py b/pypy/rpython/lltypesystem/opimpl.py
--- a/pypy/rpython/lltypesystem/opimpl.py
+++ b/pypy/rpython/lltypesystem/opimpl.py
@@ -578,10 +578,6 @@
 def op_shrink_array(array, smallersize):
     return False
 
-def op_sse_float_add(arr1, arr2, arr_res, index):
-    arr_res[index] = arr1[index] + arr2[index]
-    arr_res[index + 1] = arr1[index + 1] + arr2[index + 1]
-
 # ____________________________________________________________
 
 def get_op_impl(opname):

diff --git a/pypy/rlib/rvector.py b/pypy/rlib/rvector.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/rvector.py
@@ -0,0 +1,31 @@
+
+from pypy.rpython.extregistry import ExtRegistryEntry
+
+class VectorContainer(object):
+    """ Class that is a container for multiple float/int objects.
+    Can be represented at jit-level by a single register, like xmm
+    on x86 architecture
+    """
+
+class FloatVectorContainer(VectorContainer):
+    """ A container for float values
+    """
+    def __init__(self, val1, val2):
+        self.v1 = val1
+        self.v2 = val2
+
+    def __repr__(self):
+        return '<FloatVector %f %f>' % (self.v1, self.v2)
+
+def vector_float_read(arr, index):
+    return FloatVectorContainer(arr[index], arr[index + 1])
+vector_float_read.oopspec = 'vector_float_read(arr, index)'
+
+def vector_float_write(arr, index, container):
+    arr[index] = container.v1
+    arr[index + 1] = container.v2
+vector_float_write.oopspec = 'vector_from_write(arr, index, container)'
+
+def vector_float_add(left, right):
+    return FloatVectorContainer(left.v1 + right.v1, left.v2 + right.v2)
+vector_float_add.oopspec = 'vector_float_add(left, right)'

diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -125,17 +125,6 @@
     else:
         cpu.bh_setarrayitem_raw_i(arraydescr, array, index, itembox.getint())
 
-def do_sse_float_add(cpu, _, array1, array2, arrayres, indexbox, arraydescr):
-    onebox = do_getarrayitem_raw(cpu, _, array1, indexbox, arraydescr)
-    twobox = do_getarrayitem_raw(cpu, _, array2, indexbox, arraydescr)
-    res = onebox.getfloat() + twobox.getfloat()
-    do_setarrayitem_raw(cpu, _, arrayres, indexbox, BoxFloat(res), arraydescr)
-    indexbox = BoxInt(indexbox.getint() + 1)
-    onebox = do_getarrayitem_raw(cpu, _, array1, indexbox, arraydescr)
-    twobox = do_getarrayitem_raw(cpu, _, array2, indexbox, arraydescr)
-    res = onebox.getfloat() + twobox.getfloat()
-    do_setarrayitem_raw(cpu, _, arrayres, indexbox, BoxFloat(res), arraydescr)
-
 def do_getfield_gc(cpu, _, structbox, fielddescr):
     struct = structbox.getref_base()
     if fielddescr.is_pointer_field():

diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -1069,15 +1069,6 @@
     def bhimpl_setarrayitem_raw_f(cpu, array, arraydescr, index, newvalue):
         cpu.bh_setarrayitem_raw_f(arraydescr, array, index, newvalue)
 
-    @arguments("cpu", "d", "i", "i", "i", "i")
-    def bhimpl_sse_float_add(cpu, arraydescr, array1, array2, array_res, index):
-        one = cpu.bh_getarrayitem_raw_f(arraydescr, array1, index)
-        two = cpu.bh_getarrayitem_raw_f(arraydescr, array2, index)
-        cpu.bh_setarrayitem_raw_f(arraydescr, array_res, index, one + two)
-        one = cpu.bh_getarrayitem_raw_f(arraydescr, array1, index + 1)
-        two = cpu.bh_getarrayitem_raw_f(arraydescr, array2, index + 1)
-        cpu.bh_setarrayitem_raw_f(arraydescr, array_res, index + 1, one + two)
-
     # note, there is no 'r' here, since it can't happen
 
     @arguments("cpu", "r", "d", returns="i")

diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -441,10 +441,6 @@
     'get_write_barrier_from_array_failing_case': LLOp(sideeffects=False),
     'gc_get_type_info_group': LLOp(sideeffects=False),
 
-    # __________ vectorization ops _______
-
-    'sse_float_add': LLOp(canrun=True),
-
     # __________ GC operations __________
 
     'gc__collect':          LLOp(canunwindgc=True),

diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -351,6 +351,8 @@
             prepare = self._handle_jit_call
         elif oopspec_name.startswith('libffi_'):
             prepare = self._handle_libffi_call
+        elif oopspec_name.startswith('vector_'):
+            prepare = self._handle_vector_op
         else:
             prepare = self.prepare_builtin_call
         try:
@@ -476,14 +478,6 @@
         return self._do_builtin_call(op, 'raw_free', [op.args[0]],
                                      extra = (ARRAY,), extrakey = ARRAY)
 
-    def rewrite_op_sse_float_add(self, op):
-        ARRAY = op.args[0].concretetype.TO
-        arraydescr = self.cpu.arraydescrof(ARRAY)
-        kind = getkind(op.result.concretetype)
-        assert kind == 'void'
-        return SpaceOperation('sse_float_add',
-                              [arraydescr] + op.args, op.result)
-
     def rewrite_op_getarrayitem(self, op):
         ARRAY = op.args[0].concretetype.TO
         if self._array_of_voids(ARRAY):
@@ -1359,6 +1353,17 @@
             assert False, 'unsupported oopspec: %s' % oopspec_name
         return self._handle_oopspec_call(op, args, oopspecindex, extraeffect)
 
+    # ----------
+    # vector ops
+
+    def _handle_vector_op(self, op, oopspec_name, args):
+        if oopspec_name in ['vector_float_read',
+                            'vector_float_write',
+                            'vector_float_add']:
+            return SpaceOperation(oopspec_name, op.args, op.result)
+        else:
+            raise NotSupported(oopspec_name)
+
     def rewrite_op_jit_force_virtual(self, op):
         return self._do_builtin_call(op)
 

diff --git a/pypy/rlib/test/test_rvector.py b/pypy/rlib/test/test_rvector.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/test/test_rvector.py
@@ -0,0 +1,56 @@
+
+from pypy.rlib.rvector import (vector_float_read, vector_float_write,
+                               vector_float_add)
+from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.test.test_llinterp import interpret
+
+TP = lltype.Array(lltype.Float, hints={'nolength': True})
+
+class TestRVector(object):
+    def test_direct_add(self):
+        a = lltype.malloc(TP, 16, flavor='raw')
+        b = lltype.malloc(TP, 16, flavor='raw')
+        res = lltype.malloc(TP, 16, flavor='raw')
+        a[0] = 1.2
+        a[1] = 1.3
+        b[0] = 0.1
+        b[1] = 0.3
+        a[10] = 8.3
+        a[11] = 8.1
+        b[10] = 7.8
+        b[11] = 7.6
+        f1 = vector_float_read(a, 0)
+        f2 = vector_float_read(b, 0)
+        vector_float_write(res, 2, vector_float_add(f1, f2))
+        assert res[2] == 1.2 + 0.1
+        assert res[3] == 1.3 + 0.3
+        f1 = vector_float_read(a, 10)
+        f2 = vector_float_read(b, 10)
+        vector_float_write(res, 8, vector_float_add(f1, f2))
+        assert res[8] == 8.3 + 7.8
+        assert res[9] == 8.1 + 7.6
+        lltype.free(a, flavor='raw')
+        lltype.free(b, flavor='raw')
+        lltype.free(res, flavor='raw')
+
+    def test_interpret(self):
+        def f():
+            a = lltype.malloc(TP, 16, flavor='raw')
+            b = lltype.malloc(TP, 16, flavor='raw')
+            res = lltype.malloc(TP, 16, flavor='raw')
+            try:
+                a[0] = 1.2
+                a[1] = 1.3
+                b[0] = 0.1
+                b[1] = 0.3
+                f1 = vector_float_read(a, 0)
+                f2 = vector_float_read(b, 0)
+                vector_float_write(res, 8, vector_float_add(f1, f2))
+                return res[8] * 100 + res[9]
+            finally:
+                lltype.free(a, flavor='raw')
+                lltype.free(b, flavor='raw')
+                lltype.free(res, flavor='raw')
+
+        res = interpret(f, [])
+        assert res == f()

diff --git a/pypy/jit/metainterp/test/test_optimizeopt.py b/pypy/jit/metainterp/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/test/test_optimizeopt.py
@@ -5515,9 +5515,6 @@
         # not obvious, because of the exception UnicodeDecodeError that
         # can be raised by ll_str2unicode()
 
-
-
-
 ##class TestOOtype(OptimizeOptTest, OOtypeMixin):
 
 ##    def test_instanceof(self):

diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -466,7 +466,6 @@
     'SETARRAYITEM_RAW/3d',
     'SETFIELD_GC/2d',
     'SETFIELD_RAW/2d',
-    'SSE_FLOAT_ADD/4d',
     'STRSETITEM/3',
     'UNICODESETITEM/3',
     #'RUNTIMENEW/1',     # ootype operation