[pypy-commit] pypy default: also handle surrogates when hosted on a narrow build

pjenvey noreply at buildbot.pypy.org
Tue Oct 14 02:31:49 CEST 2014


Author: Philip Jenvey <pjenvey at underboss.org>
Branch: 
Changeset: r73939:44eef64a93d7
Date: 2014-05-22 17:26 -0700
http://bitbucket.org/pypy/pypy/changeset/44eef64a93d7/

Log:	also handle surrogates when hosted on a narrow build (grafted from
	556155656b471613725c28fec6602117c714f661)

diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -13,6 +13,7 @@
     MAXUNICODE = 0xffff
     allow_surrogate_by_default = True
 
+NARROW_HOST = not we_are_translated() and sys.maxunicode == 0xFFFF
 BYTEORDER = sys.byteorder
 
 # python 2.7 has a preview of py3k behavior, so those functions
@@ -65,7 +66,7 @@
 
 if MAXUNICODE > 0xFFFF:
     def code_to_unichr(code):
-        if not we_are_translated() and sys.maxunicode == 0xFFFF:
+        if NARROW_HOST:
             # Host CPython is narrow build, generate surrogates
             return unichr_returns_surrogate(code)
         else:
@@ -336,7 +337,8 @@
                         ch2 = ord(s[pos])
                         # Check for low surrogate and combine the two to
                         # form a UCS4 value
-                        if ((allow_surrogates or MAXUNICODE < 65536) and
+                        if ((allow_surrogates or MAXUNICODE < 65536
+                             or NARROW_HOST) and
                             ch <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF):
                             ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
                             pos += 1
@@ -1349,8 +1351,7 @@
 
             # The following logic is enabled only if MAXUNICODE == 0xffff, or
             # for testing on top of a host Python where sys.maxunicode == 0xffff
-            if ((MAXUNICODE < 65536 or
-                    (not we_are_translated() and sys.maxunicode < 65536))
+            if ((MAXUNICODE < 65536 or NARROW_HOST)
                 and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
                 # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
                 pos += 1


More information about the pypy-commit mailing list