[pypy-commit] pypy default: move code from strutil to rstring, rarithmetic, rfloat and rbigint.

cfbolz noreply at buildbot.pypy.org
Thu Jun 27 16:44:53 CEST 2013


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: 
Changeset: r65038:c1fe611caf63
Date: 2013-06-27 14:26 +0200
http://bitbucket.org/pypy/pypy/changeset/c1fe611caf63/

Log:	move code from strutil to rstring, rarithmetic, rfloat and rbigint.

diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -41,8 +41,8 @@
     def save_field(self, field_builder):
         field = field_builder.build()
         if self.numeric_field:
-            from pypy.objspace.std.strutil import ParseStringError
-            from pypy.objspace.std.strutil import string_to_float
+            from rpython.rlib.rstring import ParseStringError
+            from rpython.rlib.rfloat import string_to_float
             self.numeric_field = False
             try:
                 ff = string_to_float(field)
diff --git a/pypy/module/micronumpy/interp_support.py b/pypy/module/micronumpy/interp_support.py
--- a/pypy/module/micronumpy/interp_support.py
+++ b/pypy/module/micronumpy/interp_support.py
@@ -2,7 +2,7 @@
 from pypy.interpreter.gateway import unwrap_spec, WrappedDefault
 from rpython.rtyper.lltypesystem import lltype, rffi
 from pypy.module.micronumpy import interp_dtype, loop
-from pypy.objspace.std.strutil import strip_spaces
+from rpython.rlib.rstring import strip_spaces
 from rpython.rlib.rarithmetic import maxint
 from pypy.module.micronumpy.base import W_NDimArray
 
diff --git a/pypy/objspace/std/complextype.py b/pypy/objspace/std/complextype.py
--- a/pypy/objspace/std/complextype.py
+++ b/pypy/objspace/std/complextype.py
@@ -1,10 +1,11 @@
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.objspace.std.register_all import register_all
-from pypy.objspace.std.strutil import string_to_float, ParseStringError
 from pypy.objspace.std.noneobject import W_NoneObject
 from pypy.objspace.std.stdtypedef import GetSetProperty, StdTypeDef
 from pypy.objspace.std.stdtypedef import StdObjSpaceMultiMethod
+from rpython.rlib.rfloat import string_to_float
+from rpython.rlib.rstring import ParseStringError
 
 # ERRORCODES
 
diff --git a/pypy/objspace/std/floattype.py b/pypy/objspace/std/floattype.py
--- a/pypy/objspace/std/floattype.py
+++ b/pypy/objspace/std/floattype.py
@@ -8,10 +8,9 @@
 from pypy.interpreter.error import OperationError
 from pypy.objspace.std.register_all import register_all
 from pypy.objspace.std.stdtypedef import StdTypeDef, SMM
-from pypy.objspace.std.strutil import ParseStringError
-from pypy.objspace.std.strutil import string_to_float
 from pypy.objspace.std.model import W_Object
 from rpython.rlib.rbigint import rbigint
+from rpython.rlib.rstring import ParseStringError
 
 
 float_as_integer_ratio = SMM("as_integer_ratio", 1)
@@ -41,7 +40,7 @@
           space.isinstance_w(w_value, space.w_bytearray)):
         strvalue = space.bufferstr_w(w_value)
         try:
-            value = string_to_float(strvalue)
+            value = rfloat.string_to_float(strvalue)
         except ParseStringError, e:
             raise OperationError(space.w_ValueError,
                                  space.wrap(e.msg))
@@ -49,7 +48,7 @@
         from unicodeobject import unicode_to_decimal_w
         strvalue = unicode_to_decimal_w(space, w_value)
         try:
-            value = string_to_float(strvalue)
+            value = rfloat.string_to_float(strvalue)
         except ParseStringError, e:
             raise OperationError(space.w_ValueError,
                                  space.wrap(e.msg))
diff --git a/pypy/objspace/std/inttype.py b/pypy/objspace/std/inttype.py
--- a/pypy/objspace/std/inttype.py
+++ b/pypy/objspace/std/inttype.py
@@ -5,13 +5,11 @@
 from pypy.interpreter.buffer import Buffer
 from pypy.objspace.std.register_all import register_all
 from pypy.objspace.std.stdtypedef import StdTypeDef, SMM
-from pypy.objspace.std.strutil import (string_to_int, string_to_bigint,
-                                       ParseStringError,
-                                       ParseStringOverflowError)
 from pypy.objspace.std.model import W_Object
-from rpython.rlib.rarithmetic import r_uint
+from rpython.rlib.rarithmetic import r_uint, string_to_int
 from rpython.rlib.objectmodel import instantiate
 from rpython.rlib.rbigint import rbigint
+from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError
 
 # ____________________________________________________________
 
@@ -78,12 +76,11 @@
 def retry_to_w_long(space, parser, base=0):
     parser.rewind()
     try:
-        bigint = string_to_bigint(None, base=base, parser=parser)
+        bigint = rbigint.fromstr(None, base=base, parser=parser)
     except ParseStringError, e:
         raise OperationError(space.w_ValueError,
                              space.wrap(e.msg))
-    from pypy.objspace.std.longobject import newlong
-    return newlong(space, bigint)
+    return space.newlong_from_rbigint(bigint)
 
 @unwrap_spec(w_x = WrappedDefault(0))
 def descr__new__(space, w_inttype, w_x, w_base=None):
diff --git a/pypy/objspace/std/longtype.py b/pypy/objspace/std/longtype.py
--- a/pypy/objspace/std/longtype.py
+++ b/pypy/objspace/std/longtype.py
@@ -4,7 +4,7 @@
      interpindirect2app
 from pypy.objspace.std.model import W_Object
 from pypy.objspace.std.stdtypedef import StdTypeDef
-from pypy.objspace.std.strutil import string_to_bigint, ParseStringError
+from rpython.rlib.rstring import ParseStringError
 from rpython.rlib.rbigint import rbigint
 
 def descr_conjugate(space, w_int):
@@ -66,7 +66,7 @@
 
 def string_to_w_long(space, w_longtype, s, base=10):
     try:
-        bigint = string_to_bigint(s, base)
+        bigint = rbigint.fromstr(s, base)
     except ParseStringError, e:
         raise OperationError(space.w_ValueError,
                              space.wrap(e.msg))
diff --git a/pypy/objspace/std/strutil.py b/pypy/objspace/std/strutil.py
deleted file mode 100644
--- a/pypy/objspace/std/strutil.py
+++ /dev/null
@@ -1,188 +0,0 @@
-"""
-Pure Python implementation of string utilities.
-"""
-
-from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rlib.rfloat import rstring_to_float, INFINITY, NAN
-from rpython.rlib.rbigint import rbigint, parse_digit_string
-from pypy.interpreter.error import OperationError
-import math
-
-# XXX factor more functions out of stringobject.py.
-# This module is independent from PyPy.
-
-def strip_spaces(s):
-    # XXX this is not locale-dependent
-    p = 0
-    q = len(s)
-    while p < q and s[p] in ' \f\n\r\t\v':
-        p += 1
-    while p < q and s[q-1] in ' \f\n\r\t\v':
-        q -= 1
-    assert q >= p     # annotator hint, don't remove
-    return s[p:q]
-
-class ParseStringError(Exception):
-    def __init__(self, msg):
-        self.msg = msg
-
-class ParseStringOverflowError(Exception):
-    def __init__(self, parser):
-        self.parser = parser
-
-# iterator-like class
-class NumberStringParser:
-
-    def error(self):
-        raise ParseStringError("invalid literal for %s() with base %d: '%s'" %
-                               (self.fname, self.original_base, self.literal))
-
-    def __init__(self, s, literal, base, fname):
-        self.literal = literal
-        self.fname = fname
-        sign = 1
-        if s.startswith('-'):
-            sign = -1
-            s = strip_spaces(s[1:])
-        elif s.startswith('+'):
-            s = strip_spaces(s[1:])
-        self.sign = sign
-        self.original_base = base
-
-        if base == 0:
-            if s.startswith('0x') or s.startswith('0X'):
-                base = 16
-            elif s.startswith('0b') or s.startswith('0B'):
-                base = 2
-            elif s.startswith('0'): # also covers the '0o' case
-                base = 8
-            else:
-                base = 10
-        elif base < 2 or base > 36:
-            raise ParseStringError, "%s() base must be >= 2 and <= 36" % (fname,)
-        self.base = base
-
-        if base == 16 and (s.startswith('0x') or s.startswith('0X')):
-            s = s[2:]
-        if base == 8 and (s.startswith('0o') or s.startswith('0O')):
-            s = s[2:]
-        if base == 2 and (s.startswith('0b') or s.startswith('0B')):
-            s = s[2:]
-        if not s:
-            self.error()
-        self.s = s
-        self.n = len(s)
-        self.i = 0
-
-    def rewind(self):
-        self.i = 0
-
-    def next_digit(self): # -1 => exhausted
-        if self.i < self.n:
-            c = self.s[self.i]
-            digit = ord(c)
-            if '0' <= c <= '9':
-                digit -= ord('0')
-            elif 'A' <= c <= 'Z':
-                digit = (digit - ord('A')) + 10
-            elif 'a' <= c <= 'z':
-                digit = (digit - ord('a')) + 10
-            else:
-                self.error()
-            if digit >= self.base:
-                self.error()
-            self.i += 1
-            return digit
-        else:
-            return -1
-
-def string_to_int(s, base=10):
-    """Utility to converts a string to an integer.
-    If base is 0, the proper base is guessed based on the leading
-    characters of 's'.  Raises ParseStringError in case of error.
-    Raises ParseStringOverflowError in case the result does not fit.
-    """
-    s = literal = strip_spaces(s)
-    p = NumberStringParser(s, literal, base, 'int')
-    base = p.base
-    result = 0
-    while True:
-        digit = p.next_digit()
-        if digit == -1:
-            return result
-
-        if p.sign == -1:
-            digit = -digit
-
-        try:
-            result = ovfcheck(result * base)
-            result = ovfcheck(result + digit)
-        except OverflowError:
-            raise ParseStringOverflowError(p)
-
-def string_to_bigint(s, base=10, parser=None):
-    """As string_to_int(), but ignores an optional 'l' or 'L' suffix
-    and returns an rbigint."""
-    if parser is None:
-        s = literal = strip_spaces(s)
-        if (s.endswith('l') or s.endswith('L')) and base < 22:
-            # in base 22 and above, 'L' is a valid digit!  try: long('L',22)
-            s = s[:-1]
-        p = NumberStringParser(s, literal, base, 'long')
-    else:
-        p = parser
-    return parse_digit_string(p)
-
-# Tim's comment:
-# 57 bits are more than needed in any case.
-# to allow for some rounding, we take one
-# digit more.
-
-# In the PyPy case, we can compute everything at compile time:
-# XXX move this stuff to some central place, it is now also
-# in _float_formatting.
-
-def calc_mantissa_bits():
-    bits = 1 # I know it is almost always 53, but let it compute...
-    while 1:
-        pattern = (1L << bits) - 1
-        comp = long(float(pattern))
-        if comp != pattern:
-            return bits - 1
-        bits += 1
-
-MANTISSA_BITS = calc_mantissa_bits()
-del calc_mantissa_bits
-MANTISSA_DIGITS = len(str( (1L << MANTISSA_BITS)-1 )) + 1
-
-def string_to_float(s):
-    """
-    Conversion of string to float.
-    This version tries to only raise on invalid literals.
-    Overflows should be converted to infinity whenever possible.
-
-    Expects an unwrapped string and return an unwrapped float.
-    """
-
-    s = strip_spaces(s)
-
-    if not s:
-        raise ParseStringError("empty string for float()")
-
-
-    low = s.lower()
-    if low == "-inf" or low == "-infinity":
-        return -INFINITY
-    elif low == "inf" or low == "+inf":
-        return INFINITY
-    elif low == "infinity" or low == "+infinity":
-        return INFINITY
-    elif low == "nan" or low == "+nan":
-        return NAN
-    elif low == "-nan":
-        return -NAN
-
-    try:
-        return rstring_to_float(s)
-    except ValueError:
-        raise ParseStringError("invalid literal for float(): '%s'" % s)
diff --git a/pypy/objspace/std/test/test_strutil.py b/pypy/objspace/std/test/test_strutil.py
deleted file mode 100644
--- a/pypy/objspace/std/test/test_strutil.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import py, random
-from pypy.objspace.std.strutil import *
-from pypy.interpreter.error import OperationError
-
-
-class TestStrUtil:
-
-    def test_string_to_int(self):
-        space = self.space
-        cases = [('0', 0),
-                 ('1', 1),
-                 ('9', 9),
-                 ('10', 10),
-                 ('09', 9),
-                 ('0000101', 101),    # not octal unless base 0 or 8
-                 ('5123', 5123),
-                 (' 0', 0),
-                 ('0  ', 0),
-                 (' \t \n   32313  \f  \v   \r  \n\r    ', 32313),
-                 ('+12', 12),
-                 ('-5', -5),
-                 ('- 5', -5),
-                 ('+ 5', 5),
-                 ('  -123456789 ', -123456789),
-                 ]
-        for s, expected in cases:
-            assert string_to_int(s) == expected
-            assert string_to_bigint(s).tolong() == expected
-
-    def test_string_to_int_base(self):
-        space = self.space        
-        cases = [('111', 2, 7),
-                 ('010', 2, 2),
-                 ('102', 3, 11),
-                 ('103', 4, 19),
-                 ('107', 8, 71),
-                 ('109', 10, 109),
-                 ('10A', 11, 131),
-                 ('10a', 11, 131),
-                 ('10f', 16, 271),
-                 ('10F', 16, 271),
-                 ('0x10f', 16, 271),
-                 ('0x10F', 16, 271),
-                 ('10z', 36, 1331),
-                 ('10Z', 36, 1331),
-                 ('12',   0, 12),
-                 ('015',  0, 13),
-                 ('0x10', 0, 16),
-                 ('0XE',  0, 14),
-                 ('0',    0, 0),
-                 ('0b11', 2, 3),
-                 ('0B10', 2, 2),
-                 ('0o77', 8, 63),
-                 ]
-        for s, base, expected in cases:
-            assert string_to_int(s, base) == expected
-            assert string_to_int('+'+s, base) == expected
-            assert string_to_int('-'+s, base) == -expected
-            assert string_to_int(s+'\n', base) == expected
-            assert string_to_int('  +'+s, base) == expected
-            assert string_to_int('-'+s+'  ', base) == -expected
-
-    def test_string_to_int_error(self):
-        space = self.space
-        cases = ['0x123',    # must use base 0 or 16
-                 ' 0X12 ',
-                 '0b01',
-                 '0o01',
-                 '',
-                 '++12',
-                 '+-12',
-                 '-+12',
-                 '--12',
-                 '12a6',
-                 '12A6',
-                 'f',
-                 'Z',
-                 '.',
-                 '@',
-                 ]
-        for s in cases:
-            raises(ParseStringError, string_to_int, s)
-            raises(ParseStringError, string_to_int, '  '+s)
-            raises(ParseStringError, string_to_int, s+'  ')
-            raises(ParseStringError, string_to_int, '+'+s)
-            raises(ParseStringError, string_to_int, '-'+s)
-        raises(ParseStringError, string_to_int, '0x', 16)
-        raises(ParseStringError, string_to_int, '-0x', 16)
-
-        exc = raises(ParseStringError, string_to_int, '')
-        assert exc.value.msg == "invalid literal for int() with base 10: ''"
-        exc = raises(ParseStringError, string_to_int, '', 0)
-        assert exc.value.msg == "invalid literal for int() with base 0: ''"
-
-    def test_string_to_int_overflow(self):
-        import sys
-        space = self.space
-        raises(ParseStringOverflowError, string_to_int,
-               str(sys.maxint*17))
-
-    def test_string_to_int_not_overflow(self):
-        import sys
-        for x in [-sys.maxint-1, sys.maxint]:
-            y = string_to_int(str(x))
-            assert y == x
-
-    def test_string_to_int_base_error(self):
-        space = self.space
-        cases = [('1', 1),
-                 ('1', 37),
-                 ('a', 0),
-                 ('9', 9),
-                 ('0x123', 7),
-                 ('145cdf', 15),
-                 ('12', 37),
-                 ('12', 98172),
-                 ('12', -1),
-                 ('12', -908),
-                 ('12.3', 10),
-                 ('12.3', 13),
-                 ('12.3', 16),
-                 ]
-        for s, base in cases:
-            raises(ParseStringError, string_to_int, s, base)
-            raises(ParseStringError, string_to_int, '  '+s, base)
-            raises(ParseStringError, string_to_int, s+'  ', base)
-            raises(ParseStringError, string_to_int, '+'+s, base)
-            raises(ParseStringError, string_to_int, '-'+s, base)
-
-    def test_string_to_bigint(self):
-        assert string_to_bigint('123L').tolong() == 123
-        assert string_to_bigint('123L  ').tolong() == 123
-        raises(ParseStringError, string_to_bigint, 'L')
-        raises(ParseStringError, string_to_bigint, 'L  ')
-        assert string_to_bigint('123L', 4).tolong() == 27
-        assert string_to_bigint('123L', 30).tolong() == 27000 + 1800 + 90 + 21
-        assert string_to_bigint('123L', 22).tolong() == 10648 + 968 + 66 + 21
-        assert string_to_bigint('123L', 21).tolong() == 441 + 42 + 3
-        assert string_to_bigint('1891234174197319').tolong() == 1891234174197319
-
-    def test_string_to_float(self):
-        assert string_to_float('0') == 0.0
-        assert string_to_float('1') == 1.0
-        assert string_to_float('-1.5') == -1.5
-        assert string_to_float('1.5E2') == 150.0
-        assert string_to_float('2.5E-1') == 0.25
-        assert string_to_float('1e1111111111111') == float('1e1111111111111')
-        assert string_to_float('1e-1111111111111') == float('1e-1111111111111')
-        assert string_to_float('-1e1111111111111') == float('-1e1111111111111')
-        assert string_to_float('-1e-1111111111111') == float('-1e-1111111111111')
-        assert string_to_float('1e111111111111111111111') == float('1e111111111111111111111')
-        assert string_to_float('1e-111111111111111111111') == float('1e-111111111111111111111')
-        assert string_to_float('-1e111111111111111111111') == float('-1e111111111111111111111')
-        assert string_to_float('-1e-111111111111111111111') == float('-1e-111111111111111111111')
-
-        valid_parts = [['', '  ', ' \f\n\r\t\v'],
-                       ['', '+', '-'],
-                       ['00', '90', '.5', '2.4', '3.', '0.07',
-                        '12.3489749871982471987198371293717398256187563298638726'
-                        '2187362820947193247129871083561249818451804287437824015'
-                        '013816418758104762348932657836583048761487632840726386'],
-                       ['', 'e0', 'E+1', 'E-01', 'E42'],
-                       ['', '  ', ' \f\n\r\t\v'],
-                       ]
-        invalid_parts = [['#'],
-                         ['++', '+-', '-+', '--'],
-                         ['', '1.2.3', '.', '5..6'],
-                         ['E+', 'E-', 'e', 'e++', 'E++2'],
-                         ['#'],
-                         ]
-        for part0 in valid_parts[0]:
-            for part1 in valid_parts[1]:
-                for part2 in valid_parts[2]:
-                    for part3 in valid_parts[3]:
-                        for part4 in valid_parts[4]:
-                            s = part0+part1+part2+part3+part4
-                            assert (abs(string_to_float(s) - float(s)) <=
-                                    1E-13 * abs(float(s)))
-
-        for j in range(len(invalid_parts)):
-            for invalid in invalid_parts[j]:
-                for i in range(20):
-                    parts = [random.choice(lst) for lst in valid_parts]
-                    parts[j] = invalid
-                    s = ''.join(parts)
-                    print repr(s)
-                    if s.strip(): # empty s raises OperationError directly
-                        py.test.raises(ParseStringError, string_to_float, s)
-        py.test.raises(ParseStringError, string_to_float, "")
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -668,3 +668,36 @@
     if T == lltype.Float:
         return longlong2float(rffi.cast(rffi.LONGLONG, res))
     return rffi.cast(T, res)
+
+
+# String parsing support
+# ---------------------------
+
+def string_to_int(s, base=10):
+    """Utility to converts a string to an integer.
+    If base is 0, the proper base is guessed based on the leading
+    characters of 's'.  Raises ParseStringError in case of error.
+    Raises ParseStringOverflowError in case the result does not fit.
+    """
+    from rpython.rlib.rstring import NumberStringParser, \
+        ParseStringOverflowError, \
+        ParseStringError, strip_spaces
+    s = literal = strip_spaces(s)
+    p = NumberStringParser(s, literal, base, 'int')
+    base = p.base
+    result = 0
+    while True:
+        digit = p.next_digit()
+        if digit == -1:
+            return result
+
+        if p.sign == -1:
+            digit = -digit
+
+        try:
+            result = ovfcheck(result * base)
+            result = ovfcheck(result + digit)
+        except OverflowError:
+            raise ParseStringOverflowError(p)
+
+
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -252,6 +252,21 @@
 
     @staticmethod
     @jit.elidable
+    def fromstr(s, base=0, parser=None):
+        """As string_to_int(), but ignores an optional 'l' or 'L' suffix
+        and returns an rbigint."""
+        from rpython.rlib.rstring import NumberStringParser, \
+            strip_spaces
+        if parser is None:
+            s = literal = strip_spaces(s)
+            if (s.endswith('l') or s.endswith('L')) and base < 22:
+                # in base 22 and above, 'L' is a valid digit!  try: long('L',22)
+                s = s[:-1]
+            parser = NumberStringParser(s, literal, base, 'long')
+        return parse_digit_string(parser)
+
+    @staticmethod
+    @jit.elidable
     def frombytes(s, byteorder, signed):
         if byteorder not in ('big', 'little'):
             raise InvalidEndiannessError()
@@ -2289,7 +2304,7 @@
     return a
 
 def parse_digit_string(parser):
-    # helper for objspace.std.strutil
+    # helper for fromstr
     a = rbigint()
     base = parser.base
     digitmax = BASE_MAX[base]
diff --git a/rpython/rlib/rfloat.py b/rpython/rlib/rfloat.py
--- a/rpython/rlib/rfloat.py
+++ b/rpython/rlib/rfloat.py
@@ -27,6 +27,39 @@
 
 globals().update(rffi_platform.configure(CConfig))
 
+def string_to_float(s):
+    """
+    Conversion of string to float.
+    This version tries to only raise on invalid literals.
+    Overflows should be converted to infinity whenever possible.
+
+    Expects an unwrapped string and return an unwrapped float.
+    """
+    from rpython.rlib.rstring import strip_spaces, ParseStringError
+
+    s = strip_spaces(s)
+
+    if not s:
+        raise ParseStringError("empty string for float()")
+
+
+    low = s.lower()
+    if low == "-inf" or low == "-infinity":
+        return -INFINITY
+    elif low == "inf" or low == "+inf":
+        return INFINITY
+    elif low == "infinity" or low == "+infinity":
+        return INFINITY
+    elif low == "nan" or low == "+nan":
+        return NAN
+    elif low == "-nan":
+        return -NAN
+
+    try:
+        return rstring_to_float(s)
+    except ValueError:
+        raise ParseStringError("invalid literal for float(): '%s'" % s)
+
 def rstring_to_float(s):
     return rstring_to_float_impl(s)
 
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -186,6 +186,92 @@
             return False
     return True
 
+# -------------- numeric parsing support --------------------
+
+def strip_spaces(s):
+    # XXX this is not locale-dependent
+    p = 0
+    q = len(s)
+    while p < q and s[p] in ' \f\n\r\t\v':
+        p += 1
+    while p < q and s[q-1] in ' \f\n\r\t\v':
+        q -= 1
+    assert q >= p     # annotator hint, don't remove
+    return s[p:q]
+
+class ParseStringError(Exception):
+    def __init__(self, msg):
+        self.msg = msg
+
+class ParseStringOverflowError(Exception):
+    def __init__(self, parser):
+        self.parser = parser
+
+# iterator-like class
+class NumberStringParser:
+
+    def error(self):
+        raise ParseStringError("invalid literal for %s() with base %d: '%s'" %
+                               (self.fname, self.original_base, self.literal))
+
+    def __init__(self, s, literal, base, fname):
+        self.literal = literal
+        self.fname = fname
+        sign = 1
+        if s.startswith('-'):
+            sign = -1
+            s = strip_spaces(s[1:])
+        elif s.startswith('+'):
+            s = strip_spaces(s[1:])
+        self.sign = sign
+        self.original_base = base
+
+        if base == 0:
+            if s.startswith('0x') or s.startswith('0X'):
+                base = 16
+            elif s.startswith('0b') or s.startswith('0B'):
+                base = 2
+            elif s.startswith('0'): # also covers the '0o' case
+                base = 8
+            else:
+                base = 10
+        elif base < 2 or base > 36:
+            raise ParseStringError, "%s() base must be >= 2 and <= 36" % (fname,)
+        self.base = base
+
+        if base == 16 and (s.startswith('0x') or s.startswith('0X')):
+            s = s[2:]
+        if base == 8 and (s.startswith('0o') or s.startswith('0O')):
+            s = s[2:]
+        if base == 2 and (s.startswith('0b') or s.startswith('0B')):
+            s = s[2:]
+        if not s:
+            self.error()
+        self.s = s
+        self.n = len(s)
+        self.i = 0
+
+    def rewind(self):
+        self.i = 0
+
+    def next_digit(self): # -1 => exhausted
+        if self.i < self.n:
+            c = self.s[self.i]
+            digit = ord(c)
+            if '0' <= c <= '9':
+                digit -= ord('0')
+            elif 'A' <= c <= 'Z':
+                digit = (digit - ord('A')) + 10
+            elif 'a' <= c <= 'z':
+                digit = (digit - ord('a')) + 10
+            else:
+                self.error()
+            if digit >= self.base:
+                self.error()
+            self.i += 1
+            return digit
+        else:
+            return -1
 
 # -------------- public API ---------------------------------
 
diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py
--- a/rpython/rlib/test/test_rarithmetic.py
+++ b/rpython/rlib/test/test_rarithmetic.py
@@ -1,6 +1,7 @@
 from rpython.rtyper.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
 from rpython.rtyper.test.test_llinterp import interpret
 from rpython.rlib.rarithmetic import *
+from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError
 import sys
 import py
 
@@ -412,3 +413,123 @@
 
 def test_byteswap_interpret():
     interpret(test_byteswap, [])
+
+
+class TestStringToInt:
+
+    def test_string_to_int(self):
+        cases = [('0', 0),
+                 ('1', 1),
+                 ('9', 9),
+                 ('10', 10),
+                 ('09', 9),
+                 ('0000101', 101),    # not octal unless base 0 or 8
+                 ('5123', 5123),
+                 (' 0', 0),
+                 ('0  ', 0),
+                 (' \t \n   32313  \f  \v   \r  \n\r    ', 32313),
+                 ('+12', 12),
+                 ('-5', -5),
+                 ('- 5', -5),
+                 ('+ 5', 5),
+                 ('  -123456789 ', -123456789),
+                 ]
+        for s, expected in cases:
+            assert string_to_int(s) == expected
+            #assert string_to_bigint(s).tolong() == expected
+
+    def test_string_to_int_base(self):
+        cases = [('111', 2, 7),
+                 ('010', 2, 2),
+                 ('102', 3, 11),
+                 ('103', 4, 19),
+                 ('107', 8, 71),
+                 ('109', 10, 109),
+                 ('10A', 11, 131),
+                 ('10a', 11, 131),
+                 ('10f', 16, 271),
+                 ('10F', 16, 271),
+                 ('0x10f', 16, 271),
+                 ('0x10F', 16, 271),
+                 ('10z', 36, 1331),
+                 ('10Z', 36, 1331),
+                 ('12',   0, 12),
+                 ('015',  0, 13),
+                 ('0x10', 0, 16),
+                 ('0XE',  0, 14),
+                 ('0',    0, 0),
+                 ('0b11', 2, 3),
+                 ('0B10', 2, 2),
+                 ('0o77', 8, 63),
+                 ]
+        for s, base, expected in cases:
+            assert string_to_int(s, base) == expected
+            assert string_to_int('+'+s, base) == expected
+            assert string_to_int('-'+s, base) == -expected
+            assert string_to_int(s+'\n', base) == expected
+            assert string_to_int('  +'+s, base) == expected
+            assert string_to_int('-'+s+'  ', base) == -expected
+
+    def test_string_to_int_error(self):
+        cases = ['0x123',    # must use base 0 or 16
+                 ' 0X12 ',
+                 '0b01',
+                 '0o01',
+                 '',
+                 '++12',
+                 '+-12',
+                 '-+12',
+                 '--12',
+                 '12a6',
+                 '12A6',
+                 'f',
+                 'Z',
+                 '.',
+                 '@',
+                 ]
+        for s in cases:
+            py.test.raises(ParseStringError, string_to_int, s)
+            py.test.raises(ParseStringError, string_to_int, '  '+s)
+            py.test.raises(ParseStringError, string_to_int, s+'  ')
+            py.test.raises(ParseStringError, string_to_int, '+'+s)
+            py.test.raises(ParseStringError, string_to_int, '-'+s)
+        py.test.raises(ParseStringError, string_to_int, '0x', 16)
+        py.test.raises(ParseStringError, string_to_int, '-0x', 16)
+
+        exc = py.test.raises(ParseStringError, string_to_int, '')
+        assert exc.value.msg == "invalid literal for int() with base 10: ''"
+        exc = py.test.raises(ParseStringError, string_to_int, '', 0)
+        assert exc.value.msg == "invalid literal for int() with base 0: ''"
+
+    def test_string_to_int_overflow(self):
+        import sys
+        py.test.raises(ParseStringOverflowError, string_to_int,
+               str(sys.maxint*17))
+
+    def test_string_to_int_not_overflow(self):
+        import sys
+        for x in [-sys.maxint-1, sys.maxint]:
+            y = string_to_int(str(x))
+            assert y == x
+
+    def test_string_to_int_base_error(self):
+        cases = [('1', 1),
+                 ('1', 37),
+                 ('a', 0),
+                 ('9', 9),
+                 ('0x123', 7),
+                 ('145cdf', 15),
+                 ('12', 37),
+                 ('12', 98172),
+                 ('12', -1),
+                 ('12', -908),
+                 ('12.3', 10),
+                 ('12.3', 13),
+                 ('12.3', 16),
+                 ]
+        for s, base in cases:
+            py.test.raises(ParseStringError, string_to_int, s, base)
+            py.test.raises(ParseStringError, string_to_int, '  '+s, base)
+            py.test.raises(ParseStringError, string_to_int, s+'  ', base)
+            py.test.raises(ParseStringError, string_to_int, '+'+s, base)
+            py.test.raises(ParseStringError, string_to_int, '-'+s, base)
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -210,6 +210,18 @@
         assert x.tolong() == 0
         assert x.tobool() is False
 
+    def test_fromstr(self):
+        from rpython.rlib.rstring import ParseStringError
+        assert rbigint.fromstr('123L').tolong() == 123
+        assert rbigint.fromstr('123L  ').tolong() == 123
+        py.test.raises(ParseStringError, rbigint.fromstr, 'L')
+        py.test.raises(ParseStringError, rbigint.fromstr, 'L  ')
+        assert rbigint.fromstr('123L', 4).tolong() == 27
+        assert rbigint.fromstr('123L', 30).tolong() == 27000 + 1800 + 90 + 21
+        assert rbigint.fromstr('123L', 22).tolong() == 10648 + 968 + 66 + 21
+        assert rbigint.fromstr('123L', 21).tolong() == 441 + 42 + 3
+        assert rbigint.fromstr('1891234174197319').tolong() == 1891234174197319
+
     def test_add(self):
         x = 123456789123456789000000L
         y = 123858582373821923936744221L
diff --git a/rpython/rlib/test/test_rfloat.py b/rpython/rlib/test/test_rfloat.py
--- a/rpython/rlib/test/test_rfloat.py
+++ b/rpython/rlib/test/test_rfloat.py
@@ -7,6 +7,7 @@
 from rpython.rlib.rfloat import round_double
 from rpython.rlib.rfloat import erf, erfc, gamma, lgamma, isnan
 from rpython.rlib.rfloat import ulps_check, acc_check
+from rpython.rlib.rfloat import string_to_float
 from rpython.rlib.rbigint import rbigint
 
 def test_copysign():
@@ -232,3 +233,57 @@
     assert f(10.0) == 362880.0
     assert f(1720.0) == -42
     assert f(172.0) == -42
+
+
+
+def test_string_to_float():
+    from rpython.rlib.rstring import ParseStringError
+    import random
+    assert string_to_float('0') == 0.0
+    assert string_to_float('1') == 1.0
+    assert string_to_float('-1.5') == -1.5
+    assert string_to_float('1.5E2') == 150.0
+    assert string_to_float('2.5E-1') == 0.25
+    assert string_to_float('1e1111111111111') == float('1e1111111111111')
+    assert string_to_float('1e-1111111111111') == float('1e-1111111111111')
+    assert string_to_float('-1e1111111111111') == float('-1e1111111111111')
+    assert string_to_float('-1e-1111111111111') == float('-1e-1111111111111')
+    assert string_to_float('1e111111111111111111111') == float('1e111111111111111111111')
+    assert string_to_float('1e-111111111111111111111') == float('1e-111111111111111111111')
+    assert string_to_float('-1e111111111111111111111') == float('-1e111111111111111111111')
+    assert string_to_float('-1e-111111111111111111111') == float('-1e-111111111111111111111')
+
+    valid_parts = [['', '  ', ' \f\n\r\t\v'],
+                   ['', '+', '-'],
+                   ['00', '90', '.5', '2.4', '3.', '0.07',
+                    '12.3489749871982471987198371293717398256187563298638726'
+                    '2187362820947193247129871083561249818451804287437824015'
+                    '013816418758104762348932657836583048761487632840726386'],
+                   ['', 'e0', 'E+1', 'E-01', 'E42'],
+                   ['', '  ', ' \f\n\r\t\v'],
+                   ]
+    invalid_parts = [['#'],
+                     ['++', '+-', '-+', '--'],
+                     ['', '1.2.3', '.', '5..6'],
+                     ['E+', 'E-', 'e', 'e++', 'E++2'],
+                     ['#'],
+                     ]
+    for part0 in valid_parts[0]:
+        for part1 in valid_parts[1]:
+            for part2 in valid_parts[2]:
+                for part3 in valid_parts[3]:
+                    for part4 in valid_parts[4]:
+                        s = part0+part1+part2+part3+part4
+                        assert (abs(string_to_float(s) - float(s)) <=
+                                1E-13 * abs(float(s)))
+
+    for j in range(len(invalid_parts)):
+        for invalid in invalid_parts[j]:
+            for i in range(20):
+                parts = [random.choice(lst) for lst in valid_parts]
+                parts[j] = invalid
+                s = ''.join(parts)
+                print repr(s)
+                if s.strip(): # empty s raises OperationError directly
+                    py.test.raises(ParseStringError, string_to_float, s)
+    py.test.raises(ParseStringError, string_to_float, "")


More information about the pypy-commit mailing list