[pypy-commit] pypy utf8-unicode2: Fixed translation

Tue Aug 5 10:59:54 CEST 2014

Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r72697:1ae88439bc79
Date: 2014-08-05 02:54 -0500
http://bitbucket.org/pypy/pypy/changeset/1ae88439bc79/

Log:	Fixed translation

diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -53,6 +53,7 @@
 
 @specialize.argtype(0)
 def ORD(s, pos):
+    assert s is not None
     if isinstance(s, Utf8Str):
         return utf8ord(s, pos)
     else:
@@ -76,8 +77,17 @@
     else:
         return s1 != s2
 
+ at specialize.argtype(0, 1)
+def LT(s1, s2):
+    assert s1 is not None
+    if isinstance(s1, Utf8Str):
+        return s1.__lt__(s2)
+    else:
+        return s1 < s2
+
 @specialize.argtype(0)
 def ADD(s1, s2):
+    assert s1 is not None
     if isinstance(s1, Utf8Str):
         return s1.__add__(s2)
     else:
@@ -85,14 +95,17 @@
 
 @specialize.argtype(0)
 def MUL(s1, s2):
+    assert s1 is not None
     if isinstance(s1, Utf8Str):
         return s1.__mul__(s2)
     else:
+        assert not isinstance(s1, Utf8Str)
         return s1 * s2
 
 @specialize.argtype(0, 1)
 def IN(s1, s2):
-    if isinstance(s1, Utf8Str):
+    assert s1 is not None
+    if isinstance(s2, Utf8Str):
         return s2.__contains__(s1)
     else:
         return s1 in s2
@@ -468,6 +481,7 @@
                     break
             return Utf8Str(self.bytes.join([s.bytes for s in other]), is_ascii)
         else:
+            assert isinstance(other[0], str)
             return Utf8Str(self.bytes.join([s for s in other]))
     join._annspecialcase_ = 'specialize:arglistitemtype(1)'
 
@@ -652,9 +666,6 @@
                 raise ValueError("Invalid unicode codepoint > 0x10FFFF.")
             self._length += 1
         elif isinstance(c, str):
-            # TODO: Remove this check?
-            if len(c) == 1:
-                assert ord(c) < 128
             self._builder.append(c)
 
             # XXX The assumption here is that the bytes being appended are
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -1,6 +1,7 @@
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.typedef import (
     TypeDef, generic_new_descr, GetSetProperty)
+from pypy.interpreter import utf8
 from pypy.interpreter.utf8 import Utf8Str, utf8ord
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
 from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder
@@ -28,7 +29,9 @@
             newline = space.unicode_w(w_newline)
 
         if (newline is not None and len(newline) != 0 and
-            newline not in (Utf8Str('\n'), Utf8Str('\r\n'), Utf8Str('\r'))):
+             utf8.NE(newline, Utf8Str('\n')) and
+             utf8.NE(newline, Utf8Str('\r\n')) and
+             utf8.NE(newline, Utf8Str('\r'))):
             # Not using oefmt() because I don't know how to ues it
             # with unicode
             raise OperationError(space.w_ValueError,
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -521,7 +521,6 @@
         if self.decoded_chars_used > 0 or size < available:
             start = self.decoded_chars_used
             end = self.decoded_chars_used + size
-            assert start >= 0
             assert end >= 0
             chars = self.decoded_chars[start:end]
         else:
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -16,6 +16,7 @@
     interp2app)
 from pypy.interpreter.generator import GeneratorIterator
 from pypy.interpreter.signature import Signature
+from pypy.interpreter import utf8
 from pypy.objspace.std import slicetype
 from pypy.objspace.std.bytesobject import W_BytesObject
 from pypy.objspace.std.floatobject import W_FloatObject
@@ -1807,7 +1808,7 @@
 
 class UnicodeSort(UnicodeBaseTimSort):
     def lt(self, a, b):
-        return a < b
+        return utf8.LT(a, b)
 
 
 class CustomCompareSort(SimpleSort):
diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -663,11 +663,11 @@
         rpos = len(value)
 
         if left:
-            while lpos < rpos and value[lpos] in chars:
+            while lpos < rpos and utf8.IN(value[lpos], chars):
                 lpos += 1
 
         if right:
-            while rpos > lpos and value[rpos - 1] in chars:
+            while rpos > lpos and utf8.IN(value[rpos - 1], chars):
                 rpos -= 1
 
         assert rpos >= lpos    # annotator hint, don't remove