[pypy-commit] pypy unicode-utf8: start working on pypyjson
fijal
pypy.commits at gmail.com
Fri Nov 24 04:04:41 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r93155:109fd5f5d4eb
Date: 2017-11-23 20:52 +0100
http://bitbucket.org/pypy/pypy/changeset/109fd5f5d4eb/
Log: start working on pypyjson
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1760,10 +1760,6 @@
def utf8_w(self, w_obj):
return w_obj.utf8_w(self)
- def unicode_w(self, w_obj):
- # XXX: kill me!
- return w_obj.utf8_w(self).decode('utf-8')
-
def convert_to_w_unicode(self, w_obj):
return w_obj.convert_to_w_unicode(self)
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -1,7 +1,7 @@
import sys
from rpython.rlib.rstring import StringBuilder
from rpython.rlib.objectmodel import specialize, always_inline, r_dict
-from rpython.rlib import rfloat, runicode
+from rpython.rlib import rfloat, runicode, rutf8
from rpython.rtyper.lltypesystem import lltype, rffi
from pypy.interpreter.error import oefmt
from pypy.interpreter import unicodehelper
@@ -19,29 +19,6 @@
return 0.0
return x * NEG_POW_10[exp]
-def strslice2unicode_latin1(s, start, end):
- """
- Convert s[start:end] to unicode. s is supposed to be an RPython string
- encoded in latin-1, which means that the numeric value of each char is the
- same as the corresponding unicode code point.
-
- Internally it's implemented at the level of low-level helpers, to avoid
- the extra copy we would need if we take the actual slice first.
-
- No bound checking is done, use carefully.
- """
- from rpython.rtyper.annlowlevel import llstr, hlunicode
- from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE
- from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar
- length = end-start
- ll_s = llstr(s)
- ll_res = malloc(UNICODE, length)
- ll_res.hash = 0
- for i in range(length):
- ch = ll_s.chars[start+i]
- ll_res.chars[i] = cast_primitive(UniChar, ch)
- return hlunicode(ll_res)
-
def slice_eq(a, b):
(ll_chars1, start1, length1, _) = a
(ll_chars2, start2, length2, _) = b
@@ -312,8 +289,7 @@
bits |= ord(ch)
if ch == '"':
self.pos = i
- return self.space.newunicode(
- self._create_string(start, i - 1, bits))
+ return self._create_string(start, i - 1, bits)
elif ch == '\\' or ch < '\x20':
self.pos = i-1
return self.decode_string_escaped(start)
@@ -322,12 +298,15 @@
if bits & 0x80:
# the 8th bit is set, it's an utf8 string
content_utf8 = self.getslice(start, end)
- return unicodehelper.decode_utf8(self.space, content_utf8)
+ lgt, flag = unicodehelper.check_utf8_or_raise(self.space,
+ content_utf8)
+ return self.space.newutf8(content_utf8, lgt, flag)
else:
# ascii only, fast path (ascii is a strict subset of
# latin1, and we already checked that all the chars are <
# 128)
- return strslice2unicode_latin1(self.s, start, end)
+ return self.space.newutf8(self.getslice(start, end),
+ end - start, rutf8.FLAG_ASCII)
def decode_string_escaped(self, start):
i = self.pos
@@ -340,9 +319,10 @@
i += 1
if ch == '"':
content_utf8 = builder.build()
- content_unicode = unicodehelper.decode_utf8(self.space, content_utf8)
+ lgt, f = unicodehelper.check_utf8_or_raise(self.space,
+ content_utf8)
self.pos = i
- return self.space.newunicode(content_unicode)
+ return self.space.newutf8(content_utf8, lgt, f)
elif ch == '\\':
i = self.decode_escape_sequence(i, builder)
elif ch < '\x20':
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -10,10 +10,14 @@
assert dec.skip_whitespace(8) == len(s)
dec.close()
+class FakeSpace(object):
+ def newutf8(self, s, l, f):
+ return s
+
def test_decode_key():
s1 = "123" * 100
s = ' "%s" "%s" ' % (s1, s1)
- dec = JSONDecoder('fake space', s)
+ dec = JSONDecoder(FakeSpace(), s)
assert dec.pos == 0
x = dec.decode_key(0)
assert x == s1
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -367,23 +367,10 @@
assert isinstance(utf8s, str)
return W_UnicodeObject(utf8s, length, flag)
- def new_from_utf8(self, utf8s):
- # XXX: kill me!
- assert isinstance(utf8s, str)
- length, flag = rutf8.check_utf8(utf8s, True)
- return W_UnicodeObject(utf8s, length, flag)
-
def newfilename(self, s):
assert isinstance(s, str) # on pypy3, this decodes the byte string
return W_BytesObject(s) # with the filesystem encoding
- def newunicode(self, unistr):
- # XXX: kill me!
- assert isinstance(unistr, unicode)
- utf8s = unistr.encode("utf-8")
- length, flag = rutf8.check_utf8(utf8s, True)
- return self.newutf8(utf8s, length, flag)
-
def type(self, w_obj):
jit.promote(w_obj.__class__)
return w_obj.getclass(self)
More information about the pypy-commit
mailing list