[pypy-commit] pypy default: Use custom assembler for divisions of 128 bits by 64 bits with a result
arigo
pypy.commits at gmail.com
Sun Jun 26 16:21:04 EDT 2016
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r85386:800377eb1f02
Date: 2016-06-26 15:37 +0200
http://bitbucket.org/pypy/pypy/changeset/800377eb1f02/
Log: Use custom assembler for divisions of 128 bits by 64 bits with a
result that fits 64 bits. It's hard to get this effect automatically
while writing C.
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -1827,6 +1827,8 @@
Divide bigint pin by non-zero digit n, storing quotient
in pout, and returning the remainder. It's OK for pin == pout on entry.
"""
+ from rpython.rtyper.lltypesystem.lloperation import llop
+
rem = _widen_digit(0)
assert n > 0 and n <= MASK
if not size:
@@ -1834,7 +1836,7 @@
size -= 1
while size >= 0:
rem = (rem << SHIFT) | pin.widedigit(size)
- hi = rem // n
+ hi = llop.long2_floordiv(lltype.Signed, rem, n)
pout.setdigit(size, hi)
rem -= hi * n
size -= 1
@@ -1924,6 +1926,7 @@
z._normalize()
return z
_muladd1._annspecialcase_ = "specialize:argtype(2)"
+
def _v_lshift(z, a, m, d):
""" Shift digit vector a[0:m] d bits left, with 0 <= d < SHIFT. Put
* result in z[0:m], and return the d bits shifted out of the top.
@@ -1961,6 +1964,8 @@
def _x_divrem(v1, w1):
""" Unsigned bigint division with remainder -- the algorithm """
+ from rpython.rtyper.lltypesystem.lloperation import llop
+
size_v = v1.numdigits()
size_w = w1.numdigits()
assert size_v >= size_w and size_w > 1
@@ -1991,6 +1996,7 @@
assert k > 0
a = rbigint([NULLDIGIT] * k, 1, k)
+ wm1s = w.digit(abs(size_w-1))
wm1 = w.widedigit(abs(size_w-1))
wm2 = w.widedigit(abs(size_w-2))
@@ -2008,7 +2014,7 @@
vtop = v.widedigit(j)
assert vtop <= wm1
vv = (vtop << SHIFT) | v.widedigit(abs(j-1))
- q = vv / wm1
+ q = llop.long2_floordiv(lltype.Signed, vv, wm1s)
r = vv - wm1 * q
while wm2 * q > ((r << SHIFT) | v.widedigit(abs(j-2))):
q -= 1
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -319,6 +319,9 @@
'lllong_rshift': LLOp(canfold=True), # args (r_longlonglong, int)
'lllong_xor': LLOp(canfold=True),
+ 'long2_floordiv': LLOp(canfold=True), # (double-r_long, int) => int
+ # (all integers signed)
+
'cast_primitive': LLOp(canfold=True),
'cast_bool_to_int': LLOp(canfold=True),
'cast_bool_to_uint': LLOp(canfold=True),
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -16,7 +16,7 @@
'bool': True, 'is_true':True}
# global synonyms for some types
-from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.rarithmetic import intmask, base_int
from rpython.rlib.rarithmetic import r_int, r_uint, r_longlong, r_ulonglong, r_longlonglong
from rpython.rtyper.lltypesystem.llmemory import AddressAsInt
@@ -733,6 +733,16 @@
assert isinstance(x, bool)
return x
+def op_long2_floordiv(x, y):
+ if lltype.typeOf(x) != lltype.Signed:
+ assert isinstance(x, base_int)
+ assert x.BITS == 2 * r_int.BITS
+ assert x.SIGNED
+ assert lltype.typeOf(y) is lltype.Signed
+ result = int(x) // y
+ assert result == intmask(result), "overflow in long2_floordiv"
+ return result
+
# ____________________________________________________________
def get_op_impl(opname):
diff --git a/rpython/translator/c/src/asm_gcc_x86.h b/rpython/translator/c/src/asm_gcc_x86.h
--- a/rpython/translator/c/src/asm_gcc_x86.h
+++ b/rpython/translator/c/src/asm_gcc_x86.h
@@ -106,3 +106,11 @@
#define PYPY_X86_CHECK_SSE2_DEFINED
RPY_EXTERN void pypy_x86_check_sse2(void);
#endif
+
+
+#undef OP_LONG2_FLOORDIV
+/* assumes that 'y' and 'r' fit in a signed word,
+ but 'x' takes up to two words */
+#define OP_LONG2_FLOORDIV(x, y, r) \
+ __asm__("idiv %1" : "=a"(r) : \
+ "r"((long)y), "A"((long long)x));
diff --git a/rpython/translator/c/src/asm_gcc_x86_64.h b/rpython/translator/c/src/asm_gcc_x86_64.h
--- a/rpython/translator/c/src/asm_gcc_x86_64.h
+++ b/rpython/translator/c/src/asm_gcc_x86_64.h
@@ -6,3 +6,10 @@
asm volatile("rdtsc" : "=a"(_rax), "=d"(_rdx)); \
val = (_rdx << 32) | _rax; \
} while (0)
+
+#undef OP_LONG2_FLOORDIV
+/* assumes that 'y' and 'r' fit in a signed word,
+ but 'x' takes up to two words */
+#define OP_LONG2_FLOORDIV(x, y, r) \
+ __asm__("idiv %1" : "=a"(r) : \
+ "r"((long)y), "a"((long)x), "d"((long)((x >> 32) >> 32)))
diff --git a/rpython/translator/c/src/int.h b/rpython/translator/c/src/int.h
--- a/rpython/translator/c/src/int.h
+++ b/rpython/translator/c/src/int.h
@@ -135,6 +135,7 @@
#define OP_LLONG_FLOORDIV(x,y,r) r = (x) / (y)
#define OP_ULLONG_FLOORDIV(x,y,r) r = (x) / (y)
#define OP_LLLONG_FLOORDIV(x,y,r) r = (x) / (y)
+#define OP_LONG2_FLOORDIV(x,y,r) r = (x) / (y)
/* modulus */
diff --git a/rpython/translator/c/test/test_lltyped.py b/rpython/translator/c/test/test_lltyped.py
--- a/rpython/translator/c/test/test_lltyped.py
+++ b/rpython/translator/c/test/test_lltyped.py
@@ -1,6 +1,7 @@
-import py
+import py, sys, random
from rpython.rtyper.lltypesystem.lltype import *
from rpython.rtyper.lltypesystem import rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.translator.c.test.test_genc import compile
from rpython.tool.sourcetools import func_with_new_name
@@ -1023,3 +1024,27 @@
assert fn(r_longlong(1)) == True
assert fn(r_longlong(256)) == True
assert fn(r_longlong(2**32)) == True
+
+ def test_long2_floordiv(self):
+ def f(a, b):
+ return llop.long2_floordiv(Signed, a, b)
+ fn = self.getcompiled(f, [int, int])
+ assert fn(100, 3) == 33
+ #
+ if sys.maxint > 2**32:
+ HUGE = getattr(rffi, '__INT128_T', None)
+ bits = 128
+ else:
+ HUGE = SignedLongLong
+ bits = 64
+ if HUGE is not None:
+ def f(a, b, c):
+ ab = (rffi.cast(HUGE, a) << (bits//2)) | b
+ return llop.long2_floordiv(Signed, ab, c)
+ fn = self.getcompiled(f, [int, int, int])
+ for i in range(100):
+ a = random.randrange(0, 10)
+ b = random.randrange(0, sys.maxint+1)
+ c = random.randrange(2*a+2, 25)
+ print a, b, c
+ assert fn(a, b, c) == ((a << (bits//2)) | b) // c
More information about the pypy-commit
mailing list