[pypy-commit] pypy unicode-utf8: Fixes
arigo
pypy.commits at gmail.com
Thu Aug 24 09:20:30 EDT 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r92253:07a16357501d
Date: 2017-08-24 15:19 +0200
http://bitbucket.org/pypy/pypy/changeset/07a16357501d/
Log: Fixes
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -375,7 +375,7 @@
def make_encoder_wrapper(name):
rname = "utf8_encode_%s" % (name.replace("_encode", ""), )
@unwrap_spec(utf8='utf8', errors='text_or_none')
- def wrap_encoder(space, utf8, utf8len, errors="strict"):
+ def wrap_encoder(space, utf8, errors="strict"):
from pypy.interpreter import unicodehelper
XXX
@@ -446,7 +446,8 @@
# utf-8 functions are not regular, because we have to pass
# "allow_surrogates=True"
@unwrap_spec(utf8='utf8', errors='text_or_none')
-def utf_8_encode(space, utf8, utf8len, errors="strict"):
+def utf_8_encode(space, utf8, errors="strict"):
+ XXXX
return space.newtuple([space.newbytes(utf8), space.newint(utf8len)])
#@unwrap_spec(uni=unicode, errors='text_or_none')
#def utf_8_encode(space, uni, errors="strict"):
@@ -472,29 +473,17 @@
state = space.fromcache(CodecState)
# call the fast version for checking
try:
- consumed, lgt = rutf8.str_check_utf8(string, len(string), final)
- except rutf8.Utf8CheckError as e:
- if errors == 'strict':
- # just raise
- state.decode_error_handler(errors, 'utf8', e.msg, string,
- e.startpos, e.endpos)
- assert False, "raises"
- # XXX do the way aroun runicode - we can optimize it later if we
+ lgt = rutf8.check_utf8(string)
+ except rutf8.CheckError as e:
+ # XXX do the way around runicode - we can optimize it later if we
# decide we care about obscure cases
res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string),
errors, final, state.decode_error_handler)
return space.newtuple([space.newutf8(res, lgt),
- space.newint(consumed)])
- #result, consumed = runicode.str_decode_utf_8_impl(
- # string, len(string), errors,
- # final, state.decode_error_handler,
- # allow_surrogates=True)
- if final or consumed == len(string):
+ space.newint(consumed)])
+ else:
return space.newtuple([space.newutf8(string, lgt),
- space.newint(consumed)])
-
- return space.newtuple([space.newutf8(string[:consumed], lgt),
- space.newint(consumed)])
+ space.newint(len(string))])
@unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int,
w_final=WrappedDefault(False))
@@ -639,8 +628,9 @@
return space.newtuple([space.newunicode(result), space.newint(consumed)])
@unwrap_spec(utf8='utf8', errors='text_or_none')
-def charmap_encode(space, utf8, utf8len, errors="strict", w_mapping=None):
+def charmap_encode(space, utf8, errors="strict", w_mapping=None):
from pypy.interpreter.unicodehelper import EncodeWrapper
+ XXXXX
if errors is None:
errors = 'strict'
@@ -658,8 +648,9 @@
@unwrap_spec(chars='utf8')
-def charmap_build(space, chars, charslen):
+def charmap_build(space, chars):
# XXX CPython sometimes uses a three-level trie
+ XXXXXX
w_charmap = space.newdict()
pos = 0
num = 0
More information about the pypy-commit
mailing list