[pypy-commit] pypy fastjson: handle surrogate pairs
antocuni
noreply at buildbot.pypy.org
Tue Jun 25 19:01:45 CEST 2013
Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fastjson
Changeset: r64979:72939f271dd6
Date: 2013-06-25 18:37 +0200
http://bitbucket.org/pypy/pypy/changeset/72939f271dd6/
Log: handle surrogate pairs
diff --git a/pypy/module/_fastjson/interp_decoder.py b/pypy/module/_fastjson/interp_decoder.py
--- a/pypy/module/_fastjson/interp_decoder.py
+++ b/pypy/module/_fastjson/interp_decoder.py
@@ -311,16 +311,29 @@
i += 4
hexdigits = self.getslice(start, i)
try:
- uchr = unichr(int(hexdigits, 16))
+ val = int(hexdigits, 16)
+ if val & 0xfc00 == 0xd800:
+ # surrogate pair
+ val = self.decode_surrogate_pair(i, val)
+ i += 6
except ValueError:
self._raise("Invalid \uXXXX escape (char %d)", i-1)
return # help the annotator to know that we'll never go beyond
# this point
#
+ uchr = unichr(val)
utf8_ch = unicodehelper.encode_utf8(self.space, uchr)
builder.append(utf8_ch)
return i
+ def decode_surrogate_pair(self, i, highsurr):
+ if self.ll_chars[i] != '\\' or self.ll_chars[i+1] != 'u':
+ self._raise("Unpaired high surrogate at char %d", i)
+ i += 2
+ hexdigits = self.getslice(i, i+4)
+ lowsurr = int(hexdigits, 16) # the possible ValueError is caugth by the caller
+ return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00))
+
def loads(space, w_s):
if space.isinstance_w(w_s, space.w_unicode):
diff --git a/pypy/module/_fastjson/test/test__fastjson.py b/pypy/module/_fastjson/test/test__fastjson.py
--- a/pypy/module/_fastjson/test/test__fastjson.py
+++ b/pypy/module/_fastjson/test/test__fastjson.py
@@ -149,3 +149,8 @@
raises(ValueError, "_fastjson.loads('[1: 2]')")
raises(ValueError, "_fastjson.loads('[1, 2')")
+ def test_big_unicode_decode(self):
+ import _fastjson
+ expected = u'z\U0001d120x'
+ res = _fastjson.loads('"z\\ud834\\udd20x"')
+ assert res == expected
More information about the pypy-commit
mailing list