[pypy-commit] pypy pyparser-improvements: store the first set as 32-char string, as opposed to a dict
cfbolz
pypy.commits at gmail.com
Mon Mar 12 12:52:50 EDT 2018
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: pyparser-improvements
Changeset: r93979:2221844101cc
Date: 2018-03-12 17:40 +0100
http://bitbucket.org/pypy/pypy/changeset/2221844101cc/
Log: store the first set as 32-char string, as opposed to a dict
diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -1,6 +1,7 @@
"""
A CPython inspired RPython parser.
"""
+from rpython.rlib.objectmodel import not_rpython
class Grammar(object):
@@ -45,8 +46,23 @@
def __init__(self, symbol_id, states, first):
self.symbol_id = symbol_id
self.states = states
- self.first = first
+ self.first = self._first_to_string(first)
+ def could_match_token(self, label_index):
+ pos = label_index >> 3
+ bit = 1 << (label_index & 0b111)
+ return bool(ord(self.first[label_index >> 3]) & bit)
+
+ @staticmethod
+ @not_rpython
+ def _first_to_string(first):
+ l = sorted(first.keys())
+ b = bytearray(32)
+ for label_index in l:
+ pos = label_index >> 3
+ bit = 1 << (label_index & 0b111)
+ b[pos] |= bit
+ return str(b)
class Node(object):
@@ -259,7 +275,7 @@
elif sym_id >= 256:
sub_node_dfa = self.grammar.dfas[sym_id - 256]
# Check if this token can start a child node.
- if label_index in sub_node_dfa.first:
+ if sub_node_dfa.could_match_token(label_index):
self.push(sub_node_dfa, next_state, sym_id, lineno,
column)
break
diff --git a/pypy/interpreter/pyparser/test/test_parser.py b/pypy/interpreter/pyparser/test/test_parser.py
--- a/pypy/interpreter/pyparser/test/test_parser.py
+++ b/pypy/interpreter/pyparser/test/test_parser.py
@@ -7,6 +7,12 @@
from pypy.interpreter.pyparser.test.test_metaparser import MyGrammar
+def test_char_set():
+ first = {5: None, 9: None, 100: None, 255:None}
+ p = parser.DFA(None, None, first)
+ for i in range(256):
+ assert p.could_match_token(i) == (i in first)
+
class SimpleParser(parser.Parser):
def parse(self, input):
More information about the pypy-commit
mailing list