[pypy-commit] pypy unicode-utf8: more consistency
fijal
pypy.commits at gmail.com
Sun Feb 26 17:42:20 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r90372:cab3a879c6cb
Date: 2017-02-26 22:44 +0100
http://bitbucket.org/pypy/pypy/changeset/cab3a879c6cb/
Log: more consistency
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -86,10 +86,10 @@
def str_decode_ascii(s, slen, errors, final, errorhandler):
try:
- rutf8.check_ascii(s)
- return s
+ rutf8.check_ascii(s, slen)
+ return s, slen, len(s)
except rutf8.AsciiCheckError:
- return rutf8.str_decode_ascii(s, errors, errorhandler)
+ return rutf8.str_decode_ascii(s, slen, errors, errorhandler)
# XXX wrappers, think about speed
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -5,9 +5,7 @@
enforceargs, newlist_hint)
from rpython.rlib.buffer import StringBuffer
from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder
-from rpython.rlib.runicode import (
- make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
- unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii)
+from rpython.rlib.runicode import make_unicode_escape_function
from rpython.rlib import rutf8, jit
from pypy.interpreter import unicodehelper
@@ -24,7 +22,7 @@
__all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode',
'encode_object', 'decode_object', 'unicode_from_object',
- 'utf8_from_string', 'unicode_to_decimal_w']
+ 'unicode_from_string', 'unicode_to_decimal_w']
class W_UnicodeObject(W_Root):
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -69,8 +69,6 @@
def codepoint_at_pos(code, pos):
""" Give a codepoint in code at pos - assumes valid utf8, no checking!
"""
- import pdb
- pdb.set_trace()
ordch1 = ord(code[pos])
if ordch1 < 0x80:
return ordch1
@@ -103,8 +101,10 @@
def __init__(self, pos):
self.pos = pos
-def check_ascii(s):
- for i in range(0, len(s)):
+def check_ascii(s, size=-1):
+ if size == -1:
+ size = len(s)
+ for i in range(0, size):
if ord(s[i]) & 0x80:
raise AsciiCheckError(i)
@@ -123,9 +123,8 @@
pos = next_codepoint_pos(s, pos)
return res.build()
-def str_decode_ascii(s, errors, errorhandler):
+def str_decode_ascii(s, size, errors, errorhandler):
# ASCII is equivalent to the first 128 ordinals in Unicode.
- size = len(s)
result = StringBuilder(size)
pos = 0
while pos < size:
More information about the pypy-commit
mailing list