[pypy-commit] pypy unicode-utf8: more consistency

fijal pypy.commits at gmail.com
Sun Feb 26 17:42:20 EST 2017


Author: fijal
Branch: unicode-utf8
Changeset: r90372:cab3a879c6cb
Date: 2017-02-26 22:44 +0100
http://bitbucket.org/pypy/pypy/changeset/cab3a879c6cb/

Log:	more consistency

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -86,10 +86,10 @@
 
 def str_decode_ascii(s, slen, errors, final, errorhandler):
     try:
-        rutf8.check_ascii(s)
-        return s
+        rutf8.check_ascii(s, slen)
+        return s, slen, len(s)
     except rutf8.AsciiCheckError:
-        return rutf8.str_decode_ascii(s, errors, errorhandler)
+        return rutf8.str_decode_ascii(s, slen, errors, errorhandler)
 
 # XXX wrappers, think about speed
 
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -5,9 +5,7 @@
     enforceargs, newlist_hint)
 from rpython.rlib.buffer import StringBuffer
 from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder
-from rpython.rlib.runicode import (
-    make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
-    unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii)
+from rpython.rlib.runicode import make_unicode_escape_function
 from rpython.rlib import rutf8, jit
 
 from pypy.interpreter import unicodehelper
@@ -24,7 +22,7 @@
 
 __all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode',
            'encode_object', 'decode_object', 'unicode_from_object',
-           'utf8_from_string', 'unicode_to_decimal_w']
+           'unicode_from_string', 'unicode_to_decimal_w']
 
 
 class W_UnicodeObject(W_Root):
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -69,8 +69,6 @@
 def codepoint_at_pos(code, pos):
     """ Give a codepoint in code at pos - assumes valid utf8, no checking!
     """
-    import pdb
-    pdb.set_trace()
     ordch1 = ord(code[pos])
     if ordch1 < 0x80:
         return ordch1
@@ -103,8 +101,10 @@
     def __init__(self, pos):
         self.pos = pos
 
-def check_ascii(s):
-    for i in range(0, len(s)):
+def check_ascii(s, size=-1):
+    if size == -1:
+        size = len(s)
+    for i in range(0, size):
         if ord(s[i]) & 0x80:
             raise AsciiCheckError(i)
 
@@ -123,9 +123,8 @@
         pos = next_codepoint_pos(s, pos)
     return res.build()
 
-def str_decode_ascii(s, errors, errorhandler):
+def str_decode_ascii(s, size, errors, errorhandler):
     # ASCII is equivalent to the first 128 ordinals in Unicode.
-    size = len(s)
     result = StringBuilder(size)
     pos = 0
     while pos < size:


More information about the pypy-commit mailing list