[pypy-commit] pypy default: CPython has a special case for ``long("string", power-of-two-base)`` to

arigo pypy.commits at gmail.com
Tue Jan 5 05:59:10 EST 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r81572:ef530201647c
Date: 2016-01-05 11:58 +0100
http://bitbucket.org/pypy/pypy/changeset/ef530201647c/

Log:	CPython has a special case for ``long("string", power-of-two-base)``
	to avoid quadratic time. It is used by pickling, notably.

diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py
--- a/pypy/objspace/std/test/test_longobject.py
+++ b/pypy/objspace/std/test/test_longobject.py
@@ -358,3 +358,10 @@
         assert 3L.__coerce__(4L) == (3L, 4L)
         assert 3L.__coerce__(4) == (3, 4)
         assert 3L.__coerce__(object()) == NotImplemented
+
+    def test_linear_long_base_16(self):
+        # never finishes if long(_, 16) is not linear-time
+        size = 100000
+        n = "5" + "0" * size
+        expected = 5 << (size * 4)
+        assert long(n, 16) == expected
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2794,8 +2794,10 @@
 
 def parse_digit_string(parser):
     # helper for fromstr
+    base = parser.base
+    if (base & (base - 1)) == 0:
+        return parse_string_from_binary_base(parser)
     a = rbigint()
-    base = parser.base
     digitmax = BASE_MAX[base]
     tens, dig = 1, 0
     while True:
@@ -2811,3 +2813,50 @@
             tens *= base
     a.sign *= parser.sign
     return a
+
+def parse_string_from_binary_base(parser):
+    # The point to this routine is that it takes time linear in the number of
+    # string characters.
+    base = parser.base
+    if   base ==  2: bits_per_char = 1
+    elif base ==  4: bits_per_char = 2
+    elif base ==  8: bits_per_char = 3
+    elif base == 16: bits_per_char = 4
+    elif base == 32: bits_per_char = 5
+    else:
+        raise AssertionError
+
+    # n <- total number of bits needed, while moving 'parser' to the end
+    n = 0
+    while parser.next_digit() >= 0:
+        n += 1
+
+    # b <- number of Python digits needed, = ceiling(n/SHIFT). */
+    try:
+        b = ovfcheck(n * bits_per_char)
+        b = ovfcheck(b + (SHIFT - 1))
+    except OverflowError:
+        raise ParseStringError("long string too large to convert")
+    b = (b // SHIFT) or 1
+    z = rbigint([NULLDIGIT] * b, sign=parser.sign)
+
+    # Read string from right, and fill in long from left; i.e.,
+    # from least to most significant in both.
+    accum = _widen_digit(0)
+    bits_in_accum = 0
+    pdigit = 0
+    for _ in range(n):
+        k = parser.prev_digit()
+        accum |= _widen_digit(k) << bits_in_accum
+        bits_in_accum += bits_per_char
+        if bits_in_accum >= SHIFT:
+            z.setdigit(pdigit, accum)
+            pdigit += 1
+            assert pdigit <= b
+            accum >>= SHIFT
+            bits_in_accum -= SHIFT
+
+    if bits_in_accum:
+        z.setdigit(pdigit, accum)
+    z._normalize()
+    return z
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -485,6 +485,24 @@
         else:
             return -1
 
+    def prev_digit(self):
+        # After exhausting all n digits in next_digit(), you can walk them
+        # again in reverse order by calling prev_digit() exactly n times
+        i = self.i - 1
+        assert i >= 0
+        self.i = i
+        c = self.s[i]
+        digit = ord(c)
+        if '0' <= c <= '9':
+            digit -= ord('0')
+        elif 'A' <= c <= 'Z':
+            digit = (digit - ord('A')) + 10
+        elif 'a' <= c <= 'z':
+            digit = (digit - ord('a')) + 10
+        else:
+            raise AssertionError
+        return digit
+
 # -------------- public API ---------------------------------
 
 INIT_SIZE = 100 # XXX tweak
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -825,7 +825,19 @@
             def __init__(self, base, sign, digits):
                 self.base = base
                 self.sign = sign
-                self.next_digit = iter(digits + [-1]).next
+                self.i = 0
+                self._digits = digits
+            def next_digit(self):
+                i = self.i
+                if i == len(self._digits):
+                    return -1
+                self.i = i + 1
+                return self._digits[i]
+            def prev_digit(self):
+                i = self.i - 1
+                assert i >= 0
+                self.i = i
+                return self._digits[i]
         x = parse_digit_string(Parser(10, 1, [6]))
         assert x.eq(rbigint.fromint(6))
         x = parse_digit_string(Parser(10, 1, [6, 2, 3]))
@@ -847,6 +859,16 @@
         x = parse_digit_string(Parser(7, -1, [0, 0, 0]))
         assert x.tobool() is False
 
+        for base in [2, 4, 8, 16, 32]:
+            for inp in [[0], [1], [1, 0], [0, 1], [1, 0, 1], [1, 0, 0, 1],
+                        [1, 0, 0, base-1, 0, 1], [base-1, 1, 0, 0, 0, 1, 0],
+                        [base-1]]:
+                inp = inp * 97
+                x = parse_digit_string(Parser(base, -1, inp))
+                num = sum(inp[i] * (base ** (len(inp)-1-i))
+                          for i in range(len(inp)))
+                assert x.eq(rbigint.fromlong(-num))
+
 
 BASE = 2 ** SHIFT
 


More information about the pypy-commit mailing list