[pypy-svn] r12607 - in pypy/branch/non-fake-unicode/pypy: lib objspace/std

ac at codespeak.net ac at codespeak.net
Fri May 20 10:56:03 CEST 2005


Author: ac
Date: Fri May 20 10:56:03 2005
New Revision: 12607

Modified:
   pypy/branch/non-fake-unicode/pypy/lib/_formatting.py
   pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py
Log:
Make stringformatting handle unicode as well.

Modified: pypy/branch/non-fake-unicode/pypy/lib/_formatting.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/lib/_formatting.py	(original)
+++ pypy/branch/non-fake-unicode/pypy/lib/_formatting.py	Fri May 20 10:56:03 2005
@@ -6,6 +6,7 @@
 # (1) rounding isn't always right (see comments in _float_formatting).
 # (2) something goes wrong in the f_alt case of %g handling.
 # (3) it's really, really slow.
+import sys
 
 class _Flags(object):
     def __repr__(self):
@@ -323,7 +324,7 @@
         return self.std_wp(v)
 
 
-format_registry = {
+str_format_registry = {
     'd':IntFormatter,
     'i':IntFormatter,
     'o':OctFormatter,
@@ -344,6 +345,50 @@
     # doesn't consume a value.
     '%':funcFormatter(lambda x:'%'),
     }
+    
+class UnicodeStringFormatter(Formatter):
+    def format(self):
+        if isinstance(self.value, unicode):
+            return self.std_wp(self.value)
+        return self.std_wp(str(self.value))
+
+class UnicodeCharFormatter(Formatter):
+    def format(self):
+        if isinstance(self.value, unicode):
+            v = self.value
+            if len(v) != 1:
+                raise TypeError, "%c requires int or unicode char"
+        else:
+            i = maybe_int(self.value)
+            if not 0 <= i <= sys.maxunicode:
+                raise OverflowError("OverflowError: unsigned byte "
+                                    "integer is greater than maximum")
+            v = unichr(i)
+        self.prec = None
+        return self.std_wp(v)
+
+unicode_format_registry = {
+    u'd':IntFormatter,
+    u'i':IntFormatter,
+    u'o':OctFormatter,
+    u'u':IntFormatter,
+    u'x':HexFormatter,
+    u'X':HexFormatter,
+    u'e':FloatEFormatter,
+    u'E':FloatEFormatter,
+    u'f':FloatFFormatter,
+    u'F':FloatFFormatter,
+    u'g':FloatGFormatter,
+    u'G':FloatGFormatter,
+    u'c':UnicodeCharFormatter,
+    u's':UnicodeStringFormatter,
+    u'r':funcFormatter(repr),
+    # this *can* get accessed, by e.g. '%()4%'%{'':1}.
+    # The usual %% case has to be handled specially as it
+    # doesn't consume a value.
+    u'%':funcFormatter(lambda x:u'%'),
+    }
+    
 
 del funcFormatter # don't irritate flow space
 
@@ -374,7 +419,12 @@
             return self.fmt[i:j]
 
 
-def format(fmt, values, valuedict=None):
+def format(fmt, values, valuedict=None, do_unicode=False):
+    if do_unicode:
+        format_registry = unicode_format_registry
+    else:
+        format_registry = str_format_registry
+        
     fmtiter = FmtIter(fmt)
     valueiter = iter(values)
     r = []
@@ -407,5 +457,7 @@
         if valuedict is None:
             raise TypeError('not all arguments converted '
                             'during string formatting')
+    if do_unicode:
+        return u''.join(r)
     return ''.join(r)
 

Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py	(original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py	Fri May 20 10:56:03 2005
@@ -766,21 +766,13 @@
                 raise TypeError("character mapping must return integer, None or unicode")
     return ''.join(result)
 
-def unicode_to_str(val):
-    if isinstance(val, unicode):
-        return val.encode("utf-8")
-    return val
-
 def mod__Unicode_ANY(format, values):
-    format = format.encode("utf-8")
+    import _formatting
     if isinstance(values, tuple):
-        values = tuple([unicode_to_str(val) for val in values])
-    elif hasattr(values, 'keys'):
-        values = dict([(key, unicode_to_str(val)) for key, val in values.iteritems()])
-    else:
-        values = unicode_to_str(values)
-    return unicode(format % values, "utf-8")
-
+        return _formatting.format(format, values, None, do_unicode=True)
+    if hasattr(values, 'keys'):
+        return _formatting.format(format, (values,), values, do_unicode=True)
+    return _formatting.format(format, (values,), None, do_unicode=True)
 ''')
 unicode_expandtabs__Unicode_ANY = app.interphook('unicode_expandtabs__Unicode_ANY')
 unicode_translate__Unicode_ANY = app.interphook('unicode_translate__Unicode_ANY')



More information about the Pypy-commit mailing list