[pypy-commit] pypy unicode-utf8-py3: disallow encoding with surrogates, occured in pyparsing
mattip
pypy.commits at gmail.com
Sat Aug 4 18:01:38 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94942:d5aecbf7948c
Date: 2018-08-04 14:59 -0700
http://bitbucket.org/pypy/pypy/changeset/d5aecbf7948c/
Log: disallow encoding with surrogates, occured in pyparsing
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1185,6 +1185,14 @@
def encode_object(space, w_object, encoding, errors):
utf8 = space.utf8_w(w_object)
+ # TODO: refactor unnatrual use of error hanlders here,
+ # we should make a single pass over the utf8 str
+ pos = rutf8.surrogate_in_utf8(utf8)
+ if pos >= 0:
+ eh = unicodehelper.encode_error_handler(space)
+ eh(None, "utf8", "surrogates not allowed", utf8,
+ pos, pos + 1)
+ assert False, "always raises"
if errors is None or errors == 'strict':
if encoding is None or encoding == 'utf-8':
#if rutf8.has_surrogates(utf8):
More information about the pypy-commit
mailing list