[pypy-commit] pypy faster-json: Step one - remove irrelevant clutter, mostly CPython optimizations.
fijal
noreply at buildbot.pypy.org
Wed Oct 19 13:16:05 CEST 2011
Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: faster-json
Changeset: r48226:4c970cf4ece9
Date: 2011-10-19 13:11 +0200
http://bitbucket.org/pypy/pypy/changeset/4c970cf4ece9/
Log: Step one - remove irrelevant clutter, mostly CPython optimizations.
diff --git a/lib-python/modified-2.7/json/encoder.py b/lib-python/modified-2.7/json/encoder.py
--- a/lib-python/modified-2.7/json/encoder.py
+++ b/lib-python/modified-2.7/json/encoder.py
@@ -2,15 +2,6 @@
"""
import re
-try:
- from _json import encode_basestring_ascii as c_encode_basestring_ascii
-except ImportError:
- c_encode_basestring_ascii = None
-try:
- from _json import make_encoder as c_make_encoder
-except ImportError:
- c_make_encoder = None
-
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]')
@@ -24,8 +15,8 @@
'\t': '\\t',
}
for i in range(0x20):
- ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
- #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+ #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
+ ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
# Assume this produces an infinity on all machines (probably not guaranteed)
INFINITY = float('1e66666')
@@ -39,8 +30,7 @@
return ESCAPE_DCT[match.group(0)]
return '"' + ESCAPE.sub(replace, s) + '"'
-
-def py_encode_basestring_ascii(s):
+def encode_basestring_ascii(s):
"""Return an ASCII-only JSON representation of a Python string
"""
@@ -63,10 +53,8 @@
return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
#return '\\u%04x\\u%04x' % (s1, s2)
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
-
-
-encode_basestring_ascii = (
- c_encode_basestring_ascii or py_encode_basestring_ascii)
+py_encode_basestring_ascii = encode_basestring_ascii
+c_encode_basestring_ascii = None
class JSONEncoder(object):
"""Extensible JSON <http://json.org> encoder for Python data structures.
@@ -147,6 +135,10 @@
self.skipkeys = skipkeys
self.ensure_ascii = ensure_ascii
+ if ensure_ascii:
+ self.encoder = encode_basestring_ascii
+ else:
+ self.encoder = encode_basestring
self.check_circular = check_circular
self.allow_nan = allow_nan
self.sort_keys = sort_keys
@@ -197,7 +189,7 @@
return encode_basestring(o)
# This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly
- # equivalent to the PySequence_Fast that ''.join() would do.
+ # equivalent to the PySequence_Fast that ''.join() would do.
chunks = self.iterencode(o, _one_shot=True)
if not isinstance(chunks, (list, tuple)):
chunks = list(chunks)
@@ -227,59 +219,30 @@
o = o.decode(_encoding)
return _orig_encoder(o)
- def floatstr(o, allow_nan=self.allow_nan,
- _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
- # Check for specials. Note that this type of test is processor
- # and/or platform-specific, so do tests which don't depend on the
- # internals.
+ return self._iterencode(o, markers, 0)
- if o != o:
- text = 'NaN'
- elif o == _inf:
- text = 'Infinity'
- elif o == _neginf:
- text = '-Infinity'
- else:
- return _repr(o)
+ def _floatstr(self, o):
+ # Check for specials. Note that this type of test is processor
+ # and/or platform-specific, so do tests which don't depend on the
+ # internals.
- if not allow_nan:
- raise ValueError(
- "Out of range float values are not JSON compliant: " +
- repr(o))
+ if o != o:
+ text = 'NaN'
+ elif o == INFINITY:
+ text = 'Infinity'
+ elif o == -INFINITY:
+ text = '-Infinity'
+ else:
+ return FLOAT_REPR(o)
- return text
+ if not self.allow_nan:
+ raise ValueError(
+ "Out of range float values are not JSON compliant: " +
+ repr(o))
+ return text
- if (_one_shot and c_make_encoder is not None
- and not self.indent and not self.sort_keys):
- _iterencode = c_make_encoder(
- markers, self.default, _encoder, self.indent,
- self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, self.allow_nan)
- else:
- _iterencode = _make_iterencode(
- markers, self.default, _encoder, self.indent, floatstr,
- self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, _one_shot)
- return _iterencode(o, 0)
-
-def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
- _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
- ## HACK: hand-optimized bytecode; turn globals into locals
- ValueError=ValueError,
- basestring=basestring,
- dict=dict,
- float=float,
- id=id,
- int=int,
- isinstance=isinstance,
- list=list,
- long=long,
- str=str,
- tuple=tuple,
- ):
-
- def _iterencode_list(lst, _current_indent_level):
+ def _iterencode_list(self, lst, markers, _current_indent_level):
if not lst:
yield '[]'
return
@@ -289,14 +252,15 @@
raise ValueError("Circular reference detected")
markers[markerid] = lst
buf = '['
- if _indent is not None:
+ if self.indent is not None:
_current_indent_level += 1
- newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
- separator = _item_separator + newline_indent
+ newline_indent = '\n' + (' ' * (self.indent *
+ _current_indent_level))
+ separator = self.item_separator + newline_indent
buf += newline_indent
else:
newline_indent = None
- separator = _item_separator
+ separator = self.item_separator
first = True
for value in lst:
if first:
@@ -304,7 +268,7 @@
else:
buf = separator
if isinstance(value, basestring):
- yield buf + _encoder(value)
+ yield buf + self.encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
@@ -314,25 +278,28 @@
elif isinstance(value, (int, long)):
yield buf + str(value)
elif isinstance(value, float):
- yield buf + _floatstr(value)
+ yield buf + self._floatstr(value)
else:
yield buf
if isinstance(value, (list, tuple)):
- chunks = _iterencode_list(value, _current_indent_level)
+ chunks = self._iterencode_list(value, markers,
+ _current_indent_level)
elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
+ chunks = self._iterencode_dict(value, markers,
+ _current_indent_level)
else:
- chunks = _iterencode(value, _current_indent_level)
+ chunks = self._iterencode(value, markers,
+ _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
_current_indent_level -= 1
- yield '\n' + (' ' * (_indent * _current_indent_level))
+ yield '\n' + (' ' * (self.indent * _current_indent_level))
yield ']'
if markers is not None:
del markers[markerid]
- def _iterencode_dict(dct, _current_indent_level):
+ def _iterencode_dict(self, dct, markers, _current_indent_level):
if not dct:
yield '{}'
return
@@ -342,16 +309,17 @@
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
- if _indent is not None:
+ if self.indent is not None:
_current_indent_level += 1
- newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
- item_separator = _item_separator + newline_indent
+ newline_indent = '\n' + (' ' * (self.indent *
+ _current_indent_level))
+ item_separator = self.item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
- item_separator = _item_separator
+ item_separator = self.item_separator
first = True
- if _sort_keys:
+ if self.sort_keys:
items = sorted(dct.items(), key=lambda kv: kv[0])
else:
items = dct.iteritems()
@@ -361,7 +329,7 @@
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
- key = _floatstr(key)
+ key = self._floatstr(key)
elif key is True:
key = 'true'
elif key is False:
@@ -370,7 +338,7 @@
key = 'null'
elif isinstance(key, (int, long)):
key = str(key)
- elif _skipkeys:
+ elif self.skipkeys:
continue
else:
raise TypeError("key " + repr(key) + " is not a string")
@@ -378,10 +346,10 @@
first = False
else:
yield item_separator
- yield _encoder(key)
- yield _key_separator
+ yield self.encoder(key)
+ yield self.key_separator
if isinstance(value, basestring):
- yield _encoder(value)
+ yield self.encoder(value)
elif value is None:
yield 'null'
elif value is True:
@@ -391,26 +359,29 @@
elif isinstance(value, (int, long)):
yield str(value)
elif isinstance(value, float):
- yield _floatstr(value)
+ yield self._floatstr(value)
else:
if isinstance(value, (list, tuple)):
- chunks = _iterencode_list(value, _current_indent_level)
+ chunks = self._iterencode_list(value, markers,
+ _current_indent_level)
elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
+ chunks = self._iterencode_dict(value, markers,
+ _current_indent_level)
else:
- chunks = _iterencode(value, _current_indent_level)
+ chunks = self._iterencode(value, markers,
+ _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
_current_indent_level -= 1
- yield '\n' + (' ' * (_indent * _current_indent_level))
+ yield '\n' + (' ' * (self.indent * _current_indent_level))
yield '}'
if markers is not None:
del markers[markerid]
- def _iterencode(o, _current_indent_level):
+ def _iterencode(self, o, markers, _current_indent_level):
if isinstance(o, basestring):
- yield _encoder(o)
+ yield self.encoder(o)
elif o is None:
yield 'null'
elif o is True:
@@ -420,12 +391,14 @@
elif isinstance(o, (int, long)):
yield str(o)
elif isinstance(o, float):
- yield _floatstr(o)
+ yield self._floatstr(o)
elif isinstance(o, (list, tuple)):
- for chunk in _iterencode_list(o, _current_indent_level):
+ for chunk in self._iterencode_list(o, markers,
+ _current_indent_level):
yield chunk
elif isinstance(o, dict):
- for chunk in _iterencode_dict(o, _current_indent_level):
+ for chunk in self._iterencode_dict(o, markers,
+ _current_indent_level):
yield chunk
else:
if markers is not None:
@@ -433,10 +406,10 @@
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
- o = _default(o)
- for chunk in _iterencode(o, _current_indent_level):
+ o = self.default(o)
+ for chunk in self._iterencode(o, markers,
+ _current_indent_level):
yield chunk
if markers is not None:
del markers[markerid]
- return _iterencode
More information about the pypy-commit
mailing list