[pypy-svn] r74793 - in pypy/trunk/pypy/objspace/std: . test

Wed May 26 21:31:17 CEST 2010

Author: fijal
Date: Wed May 26 21:31:15 2010
New Revision: 74793

Modified:
   pypy/trunk/pypy/objspace/std/floattype.py
   pypy/trunk/pypy/objspace/std/strutil.py
   pypy/trunk/pypy/objspace/std/test/test_floatobject.py
   pypy/trunk/pypy/objspace/std/test/test_strutil.py
Log:
* Fix issue 534 by special casing -inf inf nan and -nan (not sure if that's
  enough though) when doing float(string)
* Remove unused code (the comment said "for reference", but I don't see a point)
* Run tests over code that we use and not test unused functions


Modified: pypy/trunk/pypy/objspace/std/floattype.py
==============================================================================

--- pypy/trunk/pypy/objspace/std/floattype.py	(original)
+++ pypy/trunk/pypy/objspace/std/floattype.py	Wed May 26 21:31:15 2010
@@ -1,21 +1,16 @@
 from pypy.interpreter import gateway
 from pypy.interpreter.error import OperationError
 from pypy.objspace.std.stdtypedef import StdTypeDef
-from pypy.objspace.std.strutil import string_to_float, ParseStringError
+from pypy.objspace.std.strutil import ParseStringError
 from pypy.objspace.std.strutil import interp_string_to_float
 
-USE_NEW_S2F = True
-
 def descr__new__(space, w_floattype, w_x=0.0):
     from pypy.objspace.std.floatobject import W_FloatObject
     w_value = w_x     # 'x' is the keyword argument name in CPython
     if space.is_true(space.isinstance(w_value, space.w_str)):
         strvalue = space.str_w(w_value)
         try:
-            if USE_NEW_S2F:
-                value = interp_string_to_float(space, strvalue)
-            else:
-                value = string_to_float(strvalue)
+            value = interp_string_to_float(space, strvalue)
         except ParseStringError, e:
             raise OperationError(space.w_ValueError,
                                  space.wrap(e.msg))
@@ -26,10 +21,7 @@
             from unicodeobject import unicode_to_decimal_w
         strvalue = unicode_to_decimal_w(space, w_value)
         try:
-            if USE_NEW_S2F:
-                value = interp_string_to_float(space, strvalue)
-            else:
-                value = string_to_float(strvalue)
+            value = interp_string_to_float(space, strvalue)
         except ParseStringError, e:
             raise OperationError(space.w_ValueError,
                                  space.wrap(e.msg))

Modified: pypy/trunk/pypy/objspace/std/strutil.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/strutil.py	(original)
+++ pypy/trunk/pypy/objspace/std/strutil.py	Wed May 26 21:31:15 2010
@@ -2,7 +2,8 @@
 Pure Python implementation of string utilities.
 """
 
-from pypy.rlib.rarithmetic import ovfcheck, break_up_float, parts_to_float
+from pypy.rlib.rarithmetic import ovfcheck, break_up_float, parts_to_float,\
+     INFINITY, NAN
 from pypy.interpreter.error import OperationError
 import math
 
@@ -138,33 +139,6 @@
             return w_result
         w_result = space.add(space.mul(w_result,w_base), space.newlong(digit))
 
-def string_to_float(s):
-    """
-    Conversion of string to float.
-    This version tries to only raise on invalid literals.
-    Overflows should be converted to infinity whenever possible.
-    """
-
-    s = strip_spaces(s)
-
-    if not s:
-        raise ParseStringError("empty string for float()")
-
-    # 1) parse the string into pieces.
-    try:
-        sign, before_point, after_point, exponent = break_up_float(s)
-    except ValueError:
-        raise ParseStringError("invalid literal for float()")
-    
-    if not before_point and not after_point:
-        raise ParseStringError("invalid literal for float()")
-
-    try:
-        return parts_to_float(sign, before_point, after_point, exponent)
-    except ValueError:
-        raise ParseStringError("invalid literal for float()")
-
-
 # Tim's comment:
 # 57 bits are more than needed in any case.
 # to allow for some rounding, we take one
@@ -187,140 +161,6 @@
 del calc_mantissa_bits
 MANTISSA_DIGITS = len(str( (1L << MANTISSA_BITS)-1 )) + 1
 
-# we keep this version for reference.
-def applevel_string_to_float(s):
-    """
-    Conversion of string to float.
-    This version tries to only raise on invalid literals.
-    Overflows should be converted to infinity whenever possible.
-    """
-    # this version was triggered by Python 2.4 which adds
-    # a test that breaks on overflow.
-    # XXX The test still breaks for a different reason:
-    # float must implement rich comparisons, where comparison
-    # between infinity and a too large long does not overflow!
-
-    # The problem:
-    # there can be extreme notations of floats which are not
-    # infinity.
-    # For instance, this works in CPython:
-    # float('1' + '0'*1000 + 'e-1000')
-    # should evaluate to 1.0.
-    # note: float('1' + '0'*10000 + 'e-10000')
-    # does not work in CPython, but PyPy can do it, now.
-
-    # The idea:
-    # in order to compensate between very long digit strings
-    # and extreme exponent numbers, we try to avoid overflows
-    # by adjusting the exponent by the number of mantissa
-    # digits. For simplicity, all computations are done in
-    # long math.
-
-    # The plan:
-    # 1) parse the string into pieces.
-    # 2) pre-calculate digit exponent dexp.
-    # 3) truncate and adjust dexp.
-    # 4) compute the exponent and truncate to +-400.
-    # 5) compute the value using long math and proper rounding.
-
-    # Positive results:
-    # The algorithm appears appears to produce correct round-trip
-    # values for the perfect input of _float_formatting.
-    # Note:
-    # XXX: the builtin rounding of long->float does not work, correctly.
-    # Ask Tim Peters for the reasons why no correct rounding is done.
-    # XXX: limitations:
-    # - It is possibly not too efficient.
-    # - Really optimum results need a more sophisticated algorithm
-    #   like Bellerophon from William D. Clinger, cf.
-    #   http://citeseer.csail.mit.edu/clinger90how.html
-    
-    s = strip_spaces(s)
-
-    if not s:
-        raise ParseStringError("empty string for float()")
-
-    # 1) parse the string into pieces.
-    try:
-        sign, before_point, after_point, exponent = break_up_float(s)
-    except ValueError:
-        raise ParseStringError("invalid literal for float()")
-        
-    digits = before_point + after_point
-    if digits:
-        raise ParseStringError("invalid literal for float()")
-
-    # 2) pre-calculate digit exponent dexp.
-    dexp = len(before_point)
-
-    # 3) truncate and adjust dexp.
-    p = 0
-    plim = dexp + len(after_point)
-    while p < plim and digits[p] == '0':
-        p += 1
-        dexp -= 1
-    digits = digits[p : p + MANTISSA_DIGITS]
-    p = len(digits) - 1
-    while p >= 0 and digits[p] == '0':
-        p -= 1
-    dexp -= p + 1
-    digits = digits[:p+1]
-    if len(digits) == 0:
-        digits = '0'
-
-    # 4) compute the exponent and truncate to +-400
-    if not exponent:
-        exponent = '0'
-    e = long(exponent) + dexp
-    if e >= 400:
-        e = 400
-    elif e <= -400:
-        e = -400
-
-    # 5) compute the value using long math and proper rounding.
-    lr = long(digits)
-    if e >= 0:
-        bits = 0
-        m = lr * 10L ** e
-    else:
-        # compute a sufficiently large scale
-        prec = MANTISSA_DIGITS * 2 + 22 # 128, maybe
-        bits = - (int(math.ceil(-e / math.log10(2.0) - 1e-10)) + prec)
-        scale = 2L ** -bits
-        pten = 10L ** -e
-        m = (lr * scale) // pten
-
-    # we now have a fairly large mantissa.
-    # Shift it and round the last bit.
-
-    # first estimate the bits and do a big shift
-    if m:
-        mbits = int(math.ceil(math.log(m, 2) - 1e-10))
-        needed = MANTISSA_BITS
-        if mbits > needed:
-            if mbits > needed+1:
-                shifted = mbits - (needed+1)
-                m >>= shifted
-                bits += shifted
-            # do the rounding
-            bits += 1
-            m = (m >> 1) + (m & 1)
-
-    try:
-        r = math.ldexp(m, bits)
-    except OverflowError:
-        r = 1e200 * 1e200 # produce inf, hopefully
-
-    if sign == '-':
-        r = -r
-
-    return r
-
-
-# the "real" implementation.
-# for comments, see above.
-# XXX probably this very specific thing should go into longobject?
-
 def interp_string_to_float(space, s):
     """
     Conversion of string to float.
@@ -336,6 +176,15 @@
         raise OperationError(space.w_ValueError, space.wrap(
             "empty string for float()"))
 
+    
+    low = s.lower()
+    if low == "-inf":
+        return -INFINITY
+    elif low == "inf":
+        return INFINITY
+    elif low == "nan" or low == "-nan":
+        return NAN
+
     # 1) parse the string into pieces.
     try:
         sign, before_point, after_point, exponent = break_up_float(s)
@@ -432,7 +281,7 @@
         if r == 2*r and r != 0.0:
             raise OverflowError
     except OverflowError:
-        r = 1e200 * 1e200 # produce inf, hopefully
+        r = INFINITY
 
     if sign == '-':
         r = -r

Modified: pypy/trunk/pypy/objspace/std/test/test_floatobject.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/test/test_floatobject.py	(original)
+++ pypy/trunk/pypy/objspace/std/test/test_floatobject.py	Wed May 26 21:31:15 2010
@@ -92,6 +92,9 @@
     def test_float_string(self):
         assert 42 == float("42")
         assert 42.25 == float("42.25")
+        assert str(float("inf")).startswith("inf")
+        assert str(float("-INf")).startswith("-inf")
+        assert str(float("-nAn")).startswith("nan")
 
     def test_float_unicode(self):
         # u00A0 and u2000 are some kind of spaces

Modified: pypy/trunk/pypy/objspace/std/test/test_strutil.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/test/test_strutil.py	(original)
+++ pypy/trunk/pypy/objspace/std/test/test_strutil.py	Wed May 26 21:31:15 2010
@@ -1,5 +1,6 @@
 import py, random
 from pypy.objspace.std.strutil import *
+from pypy.interpreter.error import OperationError
 
 import py
 
@@ -132,50 +133,53 @@
         assert string_to_w_long(space, '123L', 21).longval() == 441 + 42 + 3
         assert string_to_w_long(space, '1891234174197319').longval() == 1891234174197319
 
-def test_string_to_float():
-    assert string_to_float('0') == 0.0
-    assert string_to_float('1') == 1.0
-    assert string_to_float('-1.5') == -1.5
-    assert string_to_float('1.5E2') == 150.0
-    assert string_to_float('2.5E-1') == 0.25
-    assert string_to_float('1e1111111111111') == float('1e1111111111111')
-    assert string_to_float('1e-1111111111111') == float('1e-1111111111111')
-    assert string_to_float('-1e1111111111111') == float('-1e1111111111111')
-    assert string_to_float('-1e-1111111111111') == float('-1e-1111111111111')
-    assert string_to_float('1e111111111111111111111') == float('1e111111111111111111111')
-    assert string_to_float('1e-111111111111111111111') == float('1e-111111111111111111111')
-    assert string_to_float('-1e111111111111111111111') == float('-1e111111111111111111111')
-    assert string_to_float('-1e-111111111111111111111') == float('-1e-111111111111111111111')
-
-    valid_parts = [['', '  ', ' \f\n\r\t\v'],
-                   ['', '+', '-'],
-                   ['00', '90', '.5', '2.4', '3.', '0.07',
-                    '12.3489749871982471987198371293717398256187563298638726'
-                    '2187362820947193247129871083561249818451804287437824015'
-                    '013816418758104762348932657836583048761487632840726386'],
-                   ['', 'e0', 'E+1', 'E-01', 'E42'],
-                   ['', '  ', ' \f\n\r\t\v'],
-                   ]
-    invalid_parts = [['#'],
-                     ['++', '+-', '-+', '--'],
-                     ['', '1.2.3', '.', '5..6'],
-                     ['E+', 'E-', 'e', 'e++', 'E++2'],
-                     ['#'],
-                     ]
-    for part0 in valid_parts[0]:
-        for part1 in valid_parts[1]:
-            for part2 in valid_parts[2]:
-                for part3 in valid_parts[3]:
-                    for part4 in valid_parts[4]:
-                        s = part0+part1+part2+part3+part4
-                        assert (abs(string_to_float(s) - float(s)) <=
-                                1E-13 * abs(float(s)))
-
-    for j in range(len(invalid_parts)):
-        for invalid in invalid_parts[j]:
-            for i in range(20):
-                parts = [random.choice(lst) for lst in valid_parts]
-                parts[j] = invalid
-                s = ''.join(parts)
-                print repr(s)
-                py.test.raises(ParseStringError, string_to_float, s)
+    def test_string_to_float(self):
+        def string_to_float(x):
+            return interp_string_to_float(self.space, x)
+        assert string_to_float('0') == 0.0
+        assert string_to_float('1') == 1.0
+        assert string_to_float('-1.5') == -1.5
+        assert string_to_float('1.5E2') == 150.0
+        assert string_to_float('2.5E-1') == 0.25
+        assert string_to_float('1e1111111111111') == float('1e1111111111111')
+        assert string_to_float('1e-1111111111111') == float('1e-1111111111111')
+        assert string_to_float('-1e1111111111111') == float('-1e1111111111111')
+        assert string_to_float('-1e-1111111111111') == float('-1e-1111111111111')
+        assert string_to_float('1e111111111111111111111') == float('1e111111111111111111111')
+        assert string_to_float('1e-111111111111111111111') == float('1e-111111111111111111111')
+        assert string_to_float('-1e111111111111111111111') == float('-1e111111111111111111111')
+        assert string_to_float('-1e-111111111111111111111') == float('-1e-111111111111111111111')
+
+        valid_parts = [['', '  ', ' \f\n\r\t\v'],
+                       ['', '+', '-'],
+                       ['00', '90', '.5', '2.4', '3.', '0.07',
+                        '12.3489749871982471987198371293717398256187563298638726'
+                        '2187362820947193247129871083561249818451804287437824015'
+                        '013816418758104762348932657836583048761487632840726386'],
+                       ['', 'e0', 'E+1', 'E-01', 'E42'],
+                       ['', '  ', ' \f\n\r\t\v'],
+                       ]
+        invalid_parts = [['#'],
+                         ['++', '+-', '-+', '--'],
+                         ['', '1.2.3', '.', '5..6'],
+                         ['E+', 'E-', 'e', 'e++', 'E++2'],
+                         ['#'],
+                         ]
+        for part0 in valid_parts[0]:
+            for part1 in valid_parts[1]:
+                for part2 in valid_parts[2]:
+                    for part3 in valid_parts[3]:
+                        for part4 in valid_parts[4]:
+                            s = part0+part1+part2+part3+part4
+                            assert (abs(string_to_float(s) - float(s)) <=
+                                    1E-13 * abs(float(s)))
+
+        for j in range(len(invalid_parts)):
+            for invalid in invalid_parts[j]:
+                for i in range(20):
+                    parts = [random.choice(lst) for lst in valid_parts]
+                    parts[j] = invalid
+                    s = ''.join(parts)
+                    print repr(s)
+                    if s.strip(): # empty s raises OperationError directly
+                        py.test.raises(ParseStringError, string_to_float, s)