[pypy-commit] pypy unicode-utf8: fixes
fijal
pypy.commits at gmail.com
Sun Feb 26 17:42:22 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r90373:77af71423e68
Date: 2017-02-26 23:41 +0100
http://bitbucket.org/pypy/pypy/changeset/77af71423e68/
Log: fixes
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -604,6 +604,8 @@
@unwrap_spec(string='bufferstr', errors='str_or_None')
def charmap_decode(space, string, errors="strict", w_mapping=None):
+ from pypy.interpreter.unicodehelper import DecodeWrapper
+
if errors is None:
errors = 'strict'
if len(string) == 0:
@@ -618,12 +620,13 @@
state = space.fromcache(CodecState)
result, consumed = runicode.str_decode_charmap(
string, len(string), errors,
- final, state.decode_error_handler, mapping)
+ final, DecodeWrapper(state.decode_error_handler).handle, mapping)
return space.newtuple([space.newunicode(result), space.newint(consumed)])
@unwrap_spec(utf8='utf8', errors='str_or_None')
def charmap_encode(space, utf8, utf8len, errors="strict", w_mapping=None):
- xxx
+ from pypy.interpreter.unicodehelper import EncodeWrapper
+
if errors is None:
errors = 'strict'
if space.is_none(w_mapping):
@@ -632,20 +635,24 @@
mapping = Charmap_Encode(space, w_mapping)
state = space.fromcache(CodecState)
+ uni = utf8.decode('utf8')
result = runicode.unicode_encode_charmap(
uni, len(uni), errors,
- state.encode_error_handler, mapping)
+ EncodeWrapper(state.encode_error_handler).handle, mapping)
return space.newtuple([space.newbytes(result), space.newint(len(uni))])
@unwrap_spec(chars='utf8')
def charmap_build(space, chars, charslen):
# XXX CPython sometimes uses a three-level trie
- xxx
w_charmap = space.newdict()
- for num in range(len(chars)):
- elem = chars[num]
- space.setitem(w_charmap, space.newint(ord(elem)), space.newint(num))
+ pos = 0
+ num = 0
+ while num < charslen:
+ w_char = space.newint(rutf8.codepoint_at_pos(chars, pos))
+ space.setitem(w_charmap, w_char, space.newint(num))
+ pos = rutf8.next_codepoint_pos(chars, pos)
+ num += 1
return w_charmap
# ____________________________________________________________
@@ -690,6 +697,8 @@
@unwrap_spec(errors='str_or_None')
def unicode_internal_decode(space, w_string, errors="strict"):
+ from pypy.interpreter.unicodehelper import DecodeWrapper
+
if errors is None:
errors = 'strict'
# special case for this codec: unicodes are returned as is
@@ -705,7 +714,7 @@
state = space.fromcache(CodecState)
result, consumed = runicode.str_decode_unicode_internal(
string, len(string), errors,
- final, state.decode_error_handler)
+ final, DecodeWrapper(state.decode_error_handler).handle)
return space.newtuple([space.newunicode(result), space.newint(consumed)])
# ____________________________________________________________
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1166,8 +1166,8 @@
c = mapping.get(ch, ERROR_CHAR)
if c == ERROR_CHAR:
r, pos = errorhandler(errors, "charmap",
- "character maps to <undefined>",
- s, pos, pos + 1)
+ "character maps to <undefined>",
+ s, pos, pos + 1)
result.append(r)
continue
result.append(c)
More information about the pypy-commit
mailing list