[pypy-commit] pypy json-decoder-maps: more commonts, some simplifications
cfbolz
pypy.commits at gmail.com
Wed Jun 5 16:00:03 EDT 2019
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: json-decoder-maps
Changeset: r96761:7876645a7f18
Date: 2019-06-05 21:54 +0200
http://bitbucket.org/pypy/pypy/changeset/7876645a7f18/
Log: more commonts, some simplifications
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -26,6 +26,8 @@
class IntCache(object):
+ """ A cache for wrapped ints between START and END """
+
START = -10
END = 256
@@ -89,7 +91,9 @@
# otherwise convert them to dicts (see .close())
self.unclear_objects = []
- self.scratch = [[None] * self.DEFAULT_SIZE_SCRATCH] # list of scratch space
+ # this is a freelist of lists that store the decoded value of an
+ # object, before they get copied into the eventual dict
+ self.scratch = [[None] * self.DEFAULT_SIZE_SCRATCH]
def close(self):
@@ -116,11 +120,13 @@
break
return i
- def decode_any(self, i):
+ def decode_any(self, i, contextmap=None):
+ """ Decode an object at position i. Optionally pass a contextmap, if
+ the value is decoded as the value of a dict. """
i = self.skip_whitespace(i)
ch = self.ll_chars[i]
if ch == '"':
- return self.decode_string(i+1)
+ return self.decode_string(i+1, contextmap)
elif ch == '[':
return self.decode_array(i+1)
elif ch == '{':
@@ -276,13 +282,8 @@
self._raise("Unexpected '%s' when decoding object (char %d)",
ch, i)
- def decode_surrogate_pair(self, i, highsurr):
- """ uppon enter the following must hold:
- chars[i] == "\\" and chars[i+1] == "u"
- """
- # the possible ValueError is caught by the caller
-
def decode_array(self, i):
+ """ Decode a list. i must be after the opening '[' """
w_list = self.space.newlist([])
start = i
i = self.skip_whitespace(start)
@@ -308,13 +309,6 @@
self._raise("Unexpected '%s' when decoding array (char %d)",
ch, i-1)
- def decode_any_context(self, i, context):
- i = self.skip_whitespace(i)
- ch = self.ll_chars[i]
- if ch == '"':
- return self.decode_string(i+1, context)
- return self.decode_any(i)
-
def decode_object(self, i):
start = i
@@ -338,7 +332,7 @@
self._raise("No ':' found at char %d", i)
i += 1
- w_value = self.decode_any_context(i, currmap)
+ w_value = self.decode_any(i, currmap)
if nextindex == len(values_w): # full
values_w = values_w + [None] * len(values_w) # double
@@ -527,7 +521,10 @@
return i
- def decode_string(self, i, context=None):
+ def decode_string(self, i, contextmap=None):
+ """ Decode a string at position i (which is right after the opening ").
+ Optionally pass a contextmap, if the value is decoded as the value of a
+ dict. """
ll_chars = self.ll_chars
start = i
ch = ll_chars[i]
@@ -536,9 +533,9 @@
return self.w_empty_string # surprisingly common
cache = True
- if context is not None:
- context.decoded_strings += 1
- if not context.should_cache():
+ if contextmap is not None:
+ contextmap.decoded_strings += 1
+ if not contextmap.should_cache_strings():
cache = False
if len(self.s) < self.MIN_SIZE_FOR_STRING_CACHE:
cache = False
@@ -568,8 +565,8 @@
w_res = self._create_string_wrapped(start, i, nonascii)
# only add *some* strings to the cache, because keeping them all is
# way too expensive
- if ((context is not None and
- context.decoded_strings < self.STRING_CACHE_EVALUATION_SIZE) or
+ if ((contextmap is not None and
+ contextmap.decoded_strings < self.STRING_CACHE_EVALUATION_SIZE) or
strhash in self.lru_cache):
entry = CacheEntry(
self.getslice(start, start + length), w_res)
@@ -581,8 +578,8 @@
if not entry.compare(ll_chars, start, length):
# collision! hopefully rare
return self._create_string_wrapped(start, i, nonascii)
- if context is not None:
- context.cache_hits += 1
+ if contextmap is not None:
+ contextmap.cache_hits += 1
return entry.w_uni
def decode_key_map(self, i, currmap):
@@ -801,6 +798,7 @@
return next
def change_number_of_leaves(self, difference):
+ """ add difference to .number_of_leaves of self and its parents """
if not difference:
return
parent = self
@@ -974,8 +972,8 @@
MapBase._check_invariants(self)
def mark_useful(self, terminator):
- # mark self as useful, and also the most commonly instantiated
- # children, recursively
+ """ mark self as useful, and also the most commonly instantiated
+ children, recursively """
was_fringe = self.state == MapBase.FRINGE
assert self.state in (MapBase.FRINGE, MapBase.PRELIMINARY)
self.state = MapBase.USEFUL
@@ -1018,12 +1016,20 @@
return self.state == MapBase.USEFUL
def average_instantiation(self):
+ """ the number of instantiations, divided by the number of leaves. we
+ want to favor nodes that have either a high instantiation count, or few
+ leaves below it. """
return self.instantiation_count / float(self.number_of_leaves)
def is_useful(self):
return self.average_instantiation() > self.USEFUL_THRESHOLD
- def should_cache(self):
+ def should_cache_strings(self):
+ """ return whether strings parsed in the context of this map should be
+ cached. """
+ # we should cache if either we've seen few strings so far (less than
+ # STRING_CACHE_EVALUATION_SIZE), or if we've seen many, and the cache
+ # hit rate has been high enough
return not (self.decoded_strings > JSONDecoder.STRING_CACHE_EVALUATION_SIZE and
self.cache_hits * JSONDecoder.STRING_CACHE_USEFULNESS_FACTOR < self.decoded_strings)
More information about the pypy-commit
mailing list