[pypy-commit] pypy default: don't do one to two dict lookups per parsed character
cfbolz
noreply at buildbot.pypy.org
Fri Oct 30 13:32:34 EDT 2015
Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch:
Changeset: r80490:986b29dd1c0b
Date: 2015-10-30 18:30 +0100
http://bitbucket.org/pypy/pypy/changeset/986b29dd1c0b/
Log: don't do one to two dict lookups per parsed character
this is done by computing a different representation of the state
transition table (a big string, instead of many dictionaries). A bit
of a "just-because" commit.
diff --git a/pypy/interpreter/pyparser/automata.py b/pypy/interpreter/pyparser/automata.py
--- a/pypy/interpreter/pyparser/automata.py
+++ b/pypy/interpreter/pyparser/automata.py
@@ -22,27 +22,61 @@
# PYPY Modification : removed all automata functions (any, maybe,
# newArcPair, etc.)
+ERROR_STATE = chr(255)
+
class DFA:
# ____________________________________________________________
def __init__(self, states, accepts, start = 0):
- self.states = states
+ """ NOT_RPYTHON """
+ assert len(states) < 255 # no support for huge amounts of states
+ # construct string for looking up state transitions
+ string_states = [] * len(states)
+ # compute maximum
+ maximum = 0
+ for state in states:
+ for key in state:
+ if key == DEFAULT:
+ continue
+ maximum = max(ord(key), maximum)
+ self.max_char = maximum + 1
+
+ defaults = []
+ for i, state in enumerate(states):
+ default = ERROR_STATE
+ if DEFAULT in state:
+ default = chr(state[DEFAULT])
+ defaults.append(default)
+ string_state = [default] * self.max_char
+ for key, value in state.iteritems():
+ if key == DEFAULT:
+ continue
+ assert len(key) == 1
+ assert ord(key) < self.max_char
+ string_state[ord(key)] = chr(value)
+ string_states.extend(string_state)
+ self.states = "".join(string_states)
+ self.defaults = "".join(defaults)
self.accepts = accepts
self.start = start
# ____________________________________________________________
- def recognize (self, inVec, pos = 0): # greedy = True
+
+ def _next_state(self, item, crntState):
+ if ord(item) >= self.max_char:
+ return self.defaults[crntState]
+ else:
+ return self.states[crntState * self.max_char + ord(item)]
+
+ def recognize(self, inVec, pos = 0):
crntState = self.start
lastAccept = False
i = pos
for i in range(pos, len(inVec)):
item = inVec[i]
- # arcMap, accept = self.states[crntState]
- arcMap = self.states[crntState]
accept = self.accepts[crntState]
- if item in arcMap:
- crntState = arcMap[item]
- elif DEFAULT in arcMap:
- crntState = arcMap[DEFAULT]
+ crntState = self._next_state(item, crntState)
+ if crntState != ERROR_STATE:
+ pass
elif accept:
return i
elif lastAccept:
@@ -51,6 +85,7 @@
return i - 1
else:
return -1
+ crntState = ord(crntState)
lastAccept = accept
# if self.states[crntState][1]:
if self.accepts[crntState]:
@@ -63,24 +98,20 @@
# ______________________________________________________________________
class NonGreedyDFA (DFA):
- def recognize (self, inVec, pos = 0):
+
+ def recognize(self, inVec, pos = 0):
crntState = self.start
i = pos
for i in range(pos, len(inVec)):
item = inVec[i]
- # arcMap, accept = self.states[crntState]
- arcMap = self.states[crntState]
accept = self.accepts[crntState]
if accept:
return i
- elif item in arcMap:
- crntState = arcMap[item]
- elif DEFAULT in arcMap:
- crntState = arcMap[DEFAULT]
- else:
+ crntState = self._next_state(item, crntState)
+ if crntState == ERROR_STATE:
return -1
+ crntState = ord(crntState)
i += 1
- # if self.states[crntState][1]:
if self.accepts[crntState]:
return i
else:
diff --git a/pypy/interpreter/pyparser/test/test_automata.py b/pypy/interpreter/pyparser/test/test_automata.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/pyparser/test/test_automata.py
@@ -0,0 +1,12 @@
+from pypy.interpreter.pyparser.automata import DFA, DEFAULT
+
+def test_states():
+ d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True])
+ assert d.states == "\x01\xff\xff\x00"
+ assert d.defaults == "\xff\xff"
+ assert d.max_char == 2
+
+ d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True])
+ assert d.states == "\x01\x00"
+ assert d.defaults == "\xff\x00"
+ assert d.max_char == 1
More information about the pypy-commit
mailing list