[pypy-commit] pypy default: CPython has a special case for ``long("string", power-of-two-base)`` to
arigo
pypy.commits at gmail.com
Tue Jan 5 05:59:10 EST 2016
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r81572:ef530201647c
Date: 2016-01-05 11:58 +0100
http://bitbucket.org/pypy/pypy/changeset/ef530201647c/
Log: CPython has a special case for ``long("string", power-of-two-base)``
to avoid quadratic time. It is used by pickling, notably.
diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py
--- a/pypy/objspace/std/test/test_longobject.py
+++ b/pypy/objspace/std/test/test_longobject.py
@@ -358,3 +358,10 @@
assert 3L.__coerce__(4L) == (3L, 4L)
assert 3L.__coerce__(4) == (3, 4)
assert 3L.__coerce__(object()) == NotImplemented
+
+ def test_linear_long_base_16(self):
+ # never finishes if long(_, 16) is not linear-time
+ size = 100000
+ n = "5" + "0" * size
+ expected = 5 << (size * 4)
+ assert long(n, 16) == expected
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2794,8 +2794,10 @@
def parse_digit_string(parser):
# helper for fromstr
+ base = parser.base
+ if (base & (base - 1)) == 0:
+ return parse_string_from_binary_base(parser)
a = rbigint()
- base = parser.base
digitmax = BASE_MAX[base]
tens, dig = 1, 0
while True:
@@ -2811,3 +2813,50 @@
tens *= base
a.sign *= parser.sign
return a
+
+def parse_string_from_binary_base(parser):
+ # The point to this routine is that it takes time linear in the number of
+ # string characters.
+ base = parser.base
+ if base == 2: bits_per_char = 1
+ elif base == 4: bits_per_char = 2
+ elif base == 8: bits_per_char = 3
+ elif base == 16: bits_per_char = 4
+ elif base == 32: bits_per_char = 5
+ else:
+ raise AssertionError
+
+ # n <- total number of bits needed, while moving 'parser' to the end
+ n = 0
+ while parser.next_digit() >= 0:
+ n += 1
+
+ # b <- number of Python digits needed, = ceiling(n/SHIFT). */
+ try:
+ b = ovfcheck(n * bits_per_char)
+ b = ovfcheck(b + (SHIFT - 1))
+ except OverflowError:
+ raise ParseStringError("long string too large to convert")
+ b = (b // SHIFT) or 1
+ z = rbigint([NULLDIGIT] * b, sign=parser.sign)
+
+ # Read string from right, and fill in long from left; i.e.,
+ # from least to most significant in both.
+ accum = _widen_digit(0)
+ bits_in_accum = 0
+ pdigit = 0
+ for _ in range(n):
+ k = parser.prev_digit()
+ accum |= _widen_digit(k) << bits_in_accum
+ bits_in_accum += bits_per_char
+ if bits_in_accum >= SHIFT:
+ z.setdigit(pdigit, accum)
+ pdigit += 1
+ assert pdigit <= b
+ accum >>= SHIFT
+ bits_in_accum -= SHIFT
+
+ if bits_in_accum:
+ z.setdigit(pdigit, accum)
+ z._normalize()
+ return z
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -485,6 +485,24 @@
else:
return -1
+ def prev_digit(self):
+ # After exhausting all n digits in next_digit(), you can walk them
+ # again in reverse order by calling prev_digit() exactly n times
+ i = self.i - 1
+ assert i >= 0
+ self.i = i
+ c = self.s[i]
+ digit = ord(c)
+ if '0' <= c <= '9':
+ digit -= ord('0')
+ elif 'A' <= c <= 'Z':
+ digit = (digit - ord('A')) + 10
+ elif 'a' <= c <= 'z':
+ digit = (digit - ord('a')) + 10
+ else:
+ raise AssertionError
+ return digit
+
# -------------- public API ---------------------------------
INIT_SIZE = 100 # XXX tweak
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -825,7 +825,19 @@
def __init__(self, base, sign, digits):
self.base = base
self.sign = sign
- self.next_digit = iter(digits + [-1]).next
+ self.i = 0
+ self._digits = digits
+ def next_digit(self):
+ i = self.i
+ if i == len(self._digits):
+ return -1
+ self.i = i + 1
+ return self._digits[i]
+ def prev_digit(self):
+ i = self.i - 1
+ assert i >= 0
+ self.i = i
+ return self._digits[i]
x = parse_digit_string(Parser(10, 1, [6]))
assert x.eq(rbigint.fromint(6))
x = parse_digit_string(Parser(10, 1, [6, 2, 3]))
@@ -847,6 +859,16 @@
x = parse_digit_string(Parser(7, -1, [0, 0, 0]))
assert x.tobool() is False
+ for base in [2, 4, 8, 16, 32]:
+ for inp in [[0], [1], [1, 0], [0, 1], [1, 0, 1], [1, 0, 0, 1],
+ [1, 0, 0, base-1, 0, 1], [base-1, 1, 0, 0, 0, 1, 0],
+ [base-1]]:
+ inp = inp * 97
+ x = parse_digit_string(Parser(base, -1, inp))
+ num = sum(inp[i] * (base ** (len(inp)-1-i))
+ for i in range(len(inp)))
+ assert x.eq(rbigint.fromlong(-num))
+
BASE = 2 ** SHIFT
More information about the pypy-commit
mailing list