[pypy-commit] pypy vecopt: cvtpd2dq packs ints to the lower quadword. that is why it did not work, starting to rethink the conversion function

Tue May 19 16:59:45 CEST 2015

Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77391:43ebe5044bd1
Date: 2015-05-19 11:33 +0200
http://bitbucket.org/pypy/pypy/changeset/43ebe5044bd1/

Log:	cvtpd2dq packs ints to the lower quadword. that is why it did not
	work, starting to rethink the conversion function

diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -324,6 +324,15 @@
         return W_TypeObject(w_obj.typedef.name)
 
     def call_function(self, tp, w_dtype, *args):
+        if tp is self.w_float
+            if isinstance(w_dtype, boxes.W_Float64Box):
+                return FloatObject(float(w_dtype.value))
+            if isinstance(w_dtype, boxes.W_Float32Box):
+                return FloatObject(float(w_dtype.value))
+            if isinstance(w_dtype, boxes.W_Int64Box):
+                return FloatObject(float(int(w_dtype.value)))
+            if isinstance(w_dtype, boxes.W_Int32Box):
+                return FloatObject(float(int(w_dtype.value)))
         return w_dtype
 
     def call_method(self, w_obj, s, *args):
@@ -534,9 +543,9 @@
             if isinstance(w_res, boxes.W_Float32Box):
                 print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value)
             if isinstance(w_res, boxes.W_Int64Box):
-                print "access", w_lhs, "[", w_rhs.intval, "] => ", float(int(w_res.value))
+                print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value)
             if isinstance(w_res, boxes.W_Int32Box):
-                print "access", w_lhs, "[", w_rhs.intval, "] => ", float(int(w_res.value))
+                print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value)
         else:
             raise NotImplementedError
         if (not isinstance(w_res, W_NDimArray) and
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -107,6 +107,35 @@
         retval = self.interp.eval_graph(self.graph, [i])
         return retval
 
+    def define_float32_copy():
+        return """
+        a = astype(|30|, float32)
+        x1 = a -> 7
+        x2 = a -> 8
+        x3 = a -> 9
+        x4 = a -> 10
+        r = x1 + x2 + x3 + x4
+        r
+        """
+    def test_float32_copy(self):
+        result = self.run("float32_copy")
+        assert int(result) == 7+8+9+10
+        self.check_vectorized(1, 1)
+
+    def define_int32_copy():
+        return """
+        a = astype(|30|, int32)
+        x1 = a -> 7
+        x2 = a -> 8
+        x3 = a -> 9
+        x4 = a -> 10
+        x1 + x2 + x3 + x4
+        """
+    def test_int32_copy(self):
+        result = self.run("int32_copy")
+        assert int(result) == 7+8+9+10
+        self.check_vectorized(1, 1)
+
     def define_float32_add():
         return """
         a = astype(|30|, float32)
@@ -175,7 +204,8 @@
         x2 = b -> 8
         x3 = b -> 9
         x4 = b -> 10
-        x1 + x2 + x3 + x4
+        r = x1 + x2 + x3 + x4
+        r
         """
         #return """
         #a = astype(|30|, int32)
@@ -192,25 +222,6 @@
         assert int(result) == 7+1+8+1+9+1+10+1
         self.check_vectorized(1, 1)
 
-    def define_int32_copy():
-        return """
-        a = astype(|30|, float32)
-        x1 = a -> 7
-        x2 = a -> 8
-        x3 = a -> 9
-        x4 = a -> 10
-        x5 = a -> 11
-        x6 = a -> 12
-        x7 = a -> 13
-        x8 = a -> 14
-        x9 = a -> 15
-        x1 + x2 + x3 + x4
-        """
-    def test_int32_copy(self):
-        result = self.run("int32_copy")
-        assert int(result) == 7+8+9+10
-        self.check_vectorized(1, 1)
-
 
     def define_pow():
         return """
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2565,11 +2565,13 @@
             # is there a better sequence to move them?
             scratch = X86_64_SCRATCH_REG.value
             #print resloc, "[0] <- int32(", srcloc, "[0])"
+            #66 48 0f 7e c0     movq   %xmm0,%rax
             print resloc, "[1] <- int32(", srcloc, "[1])"
+            #self.mc.MOVDQ(scratch, srcloc)
             #self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0)
             #self.mc.PINSRD_xri(resloc.value, scratch, 0)
-            self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
-            self.mc.PINSRD_xri(resloc.value, scratch, 1)
+            #self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
+            #self.mc.PINSRD_xri(resloc.value, scratch, 1)
         else:
             py.test.set_trace()
             raise NotImplementedError("sign ext missing")
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -814,8 +814,6 @@
                 continue
             new_box = tgt_box.clonebox()
             new_box.item_count += src_box.item_count
-            if opnum == rop.VEC_FLOAT_PACK:
-                py.test.set_trace()
             op = ResOperation(opnum, [tgt_box, src_box, ConstInt(i),
                                       ConstInt(src_box.item_count)], new_box)
             self.preamble_ops.append(op)