[pypy-commit] pypy default: expand the comments
arigo
pypy.commits at gmail.com
Sat Dec 10 09:40:06 EST 2016
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r88995:e8b1d9913039
Date: 2016-12-10 15:39 +0100
http://bitbucket.org/pypy/pypy/changeset/e8b1d9913039/
Log: expand the comments
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -51,6 +51,10 @@
return result
def decode_utf8(space, string):
+ # Surrogates are accepted and not treated specially at all.
+ # If there happen to be two 3-bytes encoding a pair of surrogates,
+ # you still get two surrogate unicode characters in the result.
+ # These are the Python2 rules; Python3 differs.
result, consumed = runicode.str_decode_utf_8(
string, len(string), "strict",
final=True, errorhandler=decode_error_handler(space),
@@ -59,10 +63,9 @@
def encode_utf8(space, uni):
# Note that this function never raises UnicodeEncodeError,
- # since surrogate pairs are allowed.
- # This is not the case with Python3.
- # Also, note that the two characters \d800\dc00 are considered as
- # a paired surrogate, and turn into a single 4-byte utf8 char.
+ # since surrogates are allowed, either paired or lone.
+ # A paired surrogate is considered like the non-BMP character
+ # it stands for. These are the Python2 rules; Python3 differs.
return runicode.unicode_encode_utf_8(
uni, len(uni), "strict",
errorhandler=raise_unicode_exception_encode,
More information about the pypy-commit
mailing list