[pypy-commit] pypy py3k: hg merge default

Wed Jul 18 16:19:05 CEST 2012

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: py3k
Changeset: r56162:47325d047a97
Date: 2012-07-18 16:15 +0200
http://bitbucket.org/pypy/pypy/changeset/47325d047a97/

Log:	hg merge default

diff --git a/pypy/annotation/binaryop.py b/pypy/annotation/binaryop.py
--- a/pypy/annotation/binaryop.py
+++ b/pypy/annotation/binaryop.py
@@ -7,7 +7,7 @@
 from pypy.tool.pairtype import pair, pairtype
 from pypy.annotation.model import SomeObject, SomeInteger, SomeBool, s_Bool
 from pypy.annotation.model import SomeString, SomeChar, SomeList, SomeDict
-from pypy.annotation.model import SomeUnicodeCodePoint
+from pypy.annotation.model import SomeUnicodeCodePoint, SomeStringOrUnicode
 from pypy.annotation.model import SomeTuple, SomeImpossibleValue, s_ImpossibleValue
 from pypy.annotation.model import SomeInstance, SomeBuiltin, SomeIterator
 from pypy.annotation.model import SomePBC, SomeFloat, s_None
@@ -471,30 +471,37 @@
             "string formatting mixing strings and unicode not supported")
 
 
-class __extend__(pairtype(SomeString, SomeTuple)):
-    def mod((str, s_tuple)):
+class __extend__(pairtype(SomeString, SomeTuple),
+                 pairtype(SomeUnicodeString, SomeTuple)):
+    def mod((s_string, s_tuple)):
+        is_string = isinstance(s_string, SomeString)
+        is_unicode = isinstance(s_string, SomeUnicodeString)
+        assert is_string or is_unicode
         for s_item in s_tuple.items:
-            if isinstance(s_item, (SomeUnicodeCodePoint, SomeUnicodeString)):
+            if (is_unicode and isinstance(s_item, (SomeChar, SomeString)) or
+                is_string and isinstance(s_item, (SomeUnicodeCodePoint,
+                                                  SomeUnicodeString))):
                 raise NotImplementedError(
                     "string formatting mixing strings and unicode not supported")
-        getbookkeeper().count('strformat', str, s_tuple)
-        no_nul = str.no_nul
+        getbookkeeper().count('strformat', s_string, s_tuple)
+        no_nul = s_string.no_nul
         for s_item in s_tuple.items:
             if isinstance(s_item, SomeFloat):
                 pass   # or s_item is a subclass, like SomeInteger
-            elif isinstance(s_item, SomeString) and s_item.no_nul:
+            elif isinstance(s_item, SomeStringOrUnicode) and s_item.no_nul:
                 pass
             else:
                 no_nul = False
                 break
-        return SomeString(no_nul=no_nul)
+        return s_string.__class__(no_nul=no_nul)
 
 
-class __extend__(pairtype(SomeString, SomeObject)):
+class __extend__(pairtype(SomeString, SomeObject),
+                 pairtype(SomeUnicodeString, SomeObject)):
 
-    def mod((str, args)):
-        getbookkeeper().count('strformat', str, args)
-        return SomeString()
+    def mod((s_string, args)):
+        getbookkeeper().count('strformat', s_string, args)
+        return s_string.__class__()
 
 class __extend__(pairtype(SomeFloat, SomeFloat)):
     
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -3389,6 +3389,22 @@
         s = a.build_types(f, [str])
         assert isinstance(s, annmodel.SomeString)
 
+    def test_unicodeformatting(self):
+        def f(x):
+            return u'%s' % x
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [unicode])
+        assert isinstance(s, annmodel.SomeUnicodeString)
+
+    def test_unicodeformatting_tuple(self):
+        def f(x):
+            return u'%s' % (x,)
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [unicode])
+        assert isinstance(s, annmodel.SomeUnicodeString)
+
 
     def test_negative_slice(self):
         def f(s, e):
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -255,7 +255,12 @@
   code if the translator can prove that they are non-negative.  When
   slicing a string it is necessary to prove that the slice start and
   stop indexes are non-negative. There is no implicit str-to-unicode cast
-  anywhere.
+  anywhere. Simple string formatting using the ``%`` operator works, as long
+  as the format string is known at translation time; the only supported
+  formatting specifiers are ``%s``, ``%d``, ``%x``, ``%o``, ``%f``, plus
+  ``%r`` but only for user-defined instances. Modifiers such as conversion
+  flags, precision, length etc. are not supported. Moreover, it is forbidden
+  to mix unicode and strings when formatting.
 
 **tuples**
 
diff --git a/pypy/objspace/std/strutil.py b/pypy/objspace/std/strutil.py
--- a/pypy/objspace/std/strutil.py
+++ b/pypy/objspace/std/strutil.py
@@ -193,4 +193,4 @@
     try:
         return rstring_to_float(mystring)
     except ValueError:
-        raise ParseStringError(u"invalid literal for float()")
+        raise ParseStringError(u"invalid literal for float(): '%s'" % s)
diff --git a/pypy/objspace/std/test/test_floatobject.py b/pypy/objspace/std/test/test_floatobject.py
--- a/pypy/objspace/std/test/test_floatobject.py
+++ b/pypy/objspace/std/test/test_floatobject.py
@@ -441,6 +441,14 @@
         b = A(5).real
         assert type(b) is float
 
+    def test_invalid_literal_message(self):
+        try:
+            float('abcdef')
+        except ValueError as e:
+            assert 'abcdef' in e.message
+        else:
+            assert False, 'did not raise'
+
     def test_float_from_unicode(self):
         s = '\U0001D7CF\U0001D7CE.4' # &#120783;&#120782;.4
         assert float(s) == 10.4
diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py
--- a/pypy/rlib/objectmodel.py
+++ b/pypy/rlib/objectmodel.py
@@ -112,7 +112,9 @@
     """ Decorate a function with forcing of RPython-level types on arguments.
     None means no enforcing.
 
-    XXX shouldn't we also add asserts in function body?
+    When not translated, the type of the actual arguments are checked against
+    the enforced types every time the function is called. You can disable the
+    typechecking by passing ``typecheck=False`` to @enforceargs.
     """
     typecheck = kwds.pop('typecheck', True)
     if kwds:
diff --git a/pypy/rlib/test/test_objectmodel.py b/pypy/rlib/test/test_objectmodel.py
--- a/pypy/rlib/test/test_objectmodel.py
+++ b/pypy/rlib/test/test_objectmodel.py
@@ -429,6 +429,7 @@
     exc = py.test.raises(TypeError, "f(1, 2, 3)")
     assert exc.value.message == "f argument number 2 must be of type <type 'str'>"
     py.test.raises(TypeError, "f('hello', 'world', 3)")
+    
 
 def test_enforceargs_defaults():
     @enforceargs(int, int)
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -1,5 +1,6 @@
 from weakref import WeakValueDictionary
 from pypy.tool.pairtype import pairtype
+from pypy.annotation import model as annmodel
 from pypy.rpython.error import TyperError
 from pypy.rlib.objectmodel import malloc_zero_filled, we_are_translated
 from pypy.rlib.objectmodel import _hash_string, enforceargs
@@ -169,6 +170,13 @@
         return result
 
     @jit.elidable
+    def ll_unicode(self, s):
+        if s:
+            return s
+        else:
+            return self.convert_const(u'None')
+
+    @jit.elidable
     def ll_encode_latin1(self, s):
         length = len(s.chars)
         result = mallocstr(length)
@@ -962,13 +970,18 @@
     def do_stringformat(cls, hop, sourcevarsrepr):
         s_str = hop.args_s[0]
         assert s_str.is_constant()
+        is_unicode = isinstance(s_str, annmodel.SomeUnicodeString)
+        if is_unicode:
+            TEMPBUF = TEMP_UNICODE
+        else:
+            TEMPBUF = TEMP
         s = s_str.const
         things = cls.parse_fmt_string(s)
         size = inputconst(Signed, len(things)) # could be unsigned?
-        cTEMP = inputconst(Void, TEMP)
+        cTEMP = inputconst(Void, TEMPBUF)
         cflags = inputconst(Void, {'flavor': 'gc'})
         vtemp = hop.genop("malloc_varsize", [cTEMP, cflags, size],
-                          resulttype=Ptr(TEMP))
+                          resulttype=Ptr(TEMPBUF))
 
         argsiter = iter(sourcevarsrepr)
 
@@ -979,7 +992,13 @@
                 vitem, r_arg = argsiter.next()
                 if not hasattr(r_arg, 'll_str'):
                     raise TyperError("ll_str unsupported for: %r" % r_arg)
-                if code == 's' or (code == 'r' and isinstance(r_arg, InstanceRepr)):
+                if code == 's':
+                    if is_unicode:
+                        # only UniCharRepr and UnicodeRepr has it so far
+                        vchunk = hop.gendirectcall(r_arg.ll_unicode, vitem)
+                    else:
+                        vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
+                elif code == 'r' and isinstance(r_arg, InstanceRepr):
                     vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
                 elif code == 'd':
                     assert isinstance(r_arg, IntegerRepr)
@@ -999,9 +1018,17 @@
                 else:
                     raise TyperError, "%%%s is not RPython" % (code, )
             else:
-                from pypy.rpython.lltypesystem.rstr import string_repr
-                vchunk = inputconst(string_repr, thing)
+                from pypy.rpython.lltypesystem.rstr import string_repr, unicode_repr
+                if is_unicode:
+                    vchunk = inputconst(unicode_repr, thing)
+                else:
+                    vchunk = inputconst(string_repr, thing)
             i = inputconst(Signed, i)
+            if is_unicode and vchunk.concretetype != Ptr(UNICODE):
+                # if we are here, one of the ll_str.* functions returned some
+                # STR, so we convert it to unicode. It's a bit suboptimal
+                # because we do one extra copy.
+                vchunk = hop.gendirectcall(cls.ll_str2unicode, vchunk)
             hop.genop('setarrayitem', [vtemp, i, vchunk])
 
         hop.exception_cannot_occur()   # to ignore the ZeroDivisionError of '%'
@@ -1009,6 +1036,7 @@
     do_stringformat = classmethod(do_stringformat)
 
 TEMP = GcArray(Ptr(STR))
+TEMP_UNICODE = GcArray(Ptr(UNICODE))
 
 # ____________________________________________________________
 
diff --git a/pypy/rpython/ootypesystem/rstr.py b/pypy/rpython/ootypesystem/rstr.py
--- a/pypy/rpython/ootypesystem/rstr.py
+++ b/pypy/rpython/ootypesystem/rstr.py
@@ -1,4 +1,5 @@
 from pypy.tool.pairtype import pairtype
+from pypy.annotation import model as annmodel
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.error import TyperError
 from pypy.rpython.rstr import AbstractStringRepr,AbstractCharRepr,\
@@ -79,6 +80,12 @@
             sb.ll_append_char(cast_primitive(Char, c))
         return sb.ll_build()
 
+    def ll_unicode(self, s):
+        if s:
+            return s
+        else:
+            return self.convert_const(u'None')
+
     def ll_encode_latin1(self, value):
         sb = ootype.new(ootype.StringBuilder)
         length = value.ll_strlen()
@@ -312,6 +319,7 @@
         string_repr = hop.rtyper.type_system.rstr.string_repr
         s_str = hop.args_s[0]
         assert s_str.is_constant()
+        is_unicode = isinstance(s_str, annmodel.SomeUnicodeString)
         s = s_str.const
 
         c_append = hop.inputconst(ootype.Void, 'll_append')
@@ -320,8 +328,15 @@
         c8 = hop.inputconst(ootype.Signed, 8)
         c10 = hop.inputconst(ootype.Signed, 10)
         c16 = hop.inputconst(ootype.Signed, 16)
-        c_StringBuilder = hop.inputconst(ootype.Void, ootype.StringBuilder)
-        v_buf = hop.genop("new", [c_StringBuilder], resulttype=ootype.StringBuilder)
+        if is_unicode:
+            StringBuilder = ootype.UnicodeBuilder
+            RESULT = ootype.Unicode
+        else:
+            StringBuilder = ootype.StringBuilder
+            RESULT = ootype.String
+            
+        c_StringBuilder = hop.inputconst(ootype.Void, StringBuilder)
+        v_buf = hop.genop("new", [c_StringBuilder], resulttype=StringBuilder)
 
         things = cls.parse_fmt_string(s)
         argsiter = iter(sourcevarsrepr)
@@ -331,7 +346,12 @@
                 vitem, r_arg = argsiter.next()
                 if not hasattr(r_arg, 'll_str'):
                     raise TyperError("ll_str unsupported for: %r" % r_arg)
-                if code == 's' or (code == 'r' and isinstance(r_arg, InstanceRepr)):
+                if code == 's':
+                    if is_unicode:
+                        vchunk = hop.gendirectcall(r_arg.ll_unicode, vitem)
+                    else:
+                        vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
+                elif code == 'r' and isinstance(r_arg, InstanceRepr):
                     vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
                 elif code == 'd':
                     assert isinstance(r_arg, IntegerRepr)
@@ -348,13 +368,19 @@
                 else:
                     raise TyperError, "%%%s is not RPython" % (code, )
             else:
-                vchunk = hop.inputconst(string_repr, thing)
-            #i = inputconst(Signed, i)
-            #hop.genop('setarrayitem', [vtemp, i, vchunk])
+                if is_unicode:
+                    vchunk = hop.inputconst(unicode_repr, thing)
+                else:
+                    vchunk = hop.inputconst(string_repr, thing)
+            if is_unicode and vchunk.concretetype != ootype.Unicode:
+                # if we are here, one of the ll_str.* functions returned some
+                # STR, so we convert it to unicode. It's a bit suboptimal
+                # because we do one extra copy.
+                vchunk = hop.gendirectcall(cls.ll_str2unicode, vchunk)
             hop.genop('oosend', [c_append, v_buf, vchunk], resulttype=ootype.Void)
 
         hop.exception_cannot_occur()   # to ignore the ZeroDivisionError of '%'
-        return hop.genop('oosend', [c_build, v_buf], resulttype=ootype.String)
+        return hop.genop('oosend', [c_build, v_buf], resulttype=RESULT)
     do_stringformat = classmethod(do_stringformat)
 
 
diff --git a/pypy/rpython/rpbc.py b/pypy/rpython/rpbc.py
--- a/pypy/rpython/rpbc.py
+++ b/pypy/rpython/rpbc.py
@@ -11,7 +11,7 @@
         mangle, inputdesc, warning, impossible_repr
 from pypy.rpython import rclass
 from pypy.rpython import robject
-from pypy.rpython.annlowlevel import llstr
+from pypy.rpython.annlowlevel import llstr, llunicode
 
 from pypy.rpython import callparse
 
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -483,6 +483,8 @@
         # xxx suboptimal, maybe
         return str(unicode(ch))
 
+    def ll_unicode(self, ch):
+        return unicode(ch)
 
 class __extend__(AbstractCharRepr,
                  AbstractUniCharRepr):
diff --git a/pypy/rpython/test/test_runicode.py b/pypy/rpython/test/test_runicode.py
--- a/pypy/rpython/test/test_runicode.py
+++ b/pypy/rpython/test/test_runicode.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 
 from pypy.rpython.lltypesystem.lltype import malloc
 from pypy.rpython.lltypesystem.rstr import LLHelpers, UNICODE
@@ -194,7 +195,16 @@
         assert self.interpret(fn, [u'(']) == False
         assert self.interpret(fn, [u'\u1058']) == False
         assert self.interpret(fn, [u'X']) == True
-    
+
+    def test_strformat_unicode_arg(self):
+        const = self.const
+        def percentS(s):
+            return const("before %s after") % (s,)
+        #
+        res = self.interpret(percentS, [const(u'&#224;')])
+        assert self.ll_to_string(res) == const(u'before &#224; after')
+        #
+
     def unsupported(self):
         py.test.skip("not supported")
 
@@ -202,12 +212,6 @@
     test_upper = unsupported
     test_lower = unsupported
     test_splitlines = unsupported
-    test_strformat = unsupported
-    test_strformat_instance = unsupported
-    test_strformat_nontuple = unsupported
-    test_percentformat_instance = unsupported
-    test_percentformat_tuple = unsupported
-    test_percentformat_list = unsupported
     test_int = unsupported
     test_int_valueerror = unsupported
     test_float = unsupported