[pyparsing] make sure entire string was parsed
Steven Bethard
steven.bethard at gmail.com
Sat Sep 10 19:46:40 EDT 2005
How do I make sure that my entire string was parsed when I call a
pyparsing element's parseString method? Here's a dramatically
simplified version of my problem:
py> import pyparsing as pp
py> match = pp.Word(pp.nums)
py> def parse_num(s, loc, toks):
... n, = toks
... return int(n) + 10
...
py> match.setParseAction(parse_num)
W:(0123...)
py> match.parseString('121abc')
([131], {})
I want to know (somehow) that when I called match.parseString(), there
was some of the string left over (in this case, 'abc') after the parse
was complete. How can I do this? (I don't think I can do character
counting; all my internal setParseAction() functions return non-strings).
STeVe
P.S. FWIW, I've included the real code below. I need to throw an
exception when I call the parseString method of cls._root_node or
cls._root_nodes and the entire string is not consumed.
----------------------------------------------------------------------
# some character classes
printables_trans = _pp.printables.translate
word_chars = printables_trans(_id_trans, '()')
syn_tag_chars = printables_trans(_id_trans, '()-=')
func_tag_chars = printables_trans(_id_trans, '()-=0123456789')
# basic tag components
sep = _pp.Literal('-').leaveWhitespace()
alt_sep = _pp.Literal('=').leaveWhitespace()
special_word = _pp.Combine(sep + _pp.Word(syn_tag_chars) + sep)
supp_sep = (alt_sep | sep).suppress()
syn_word = _pp.Word(syn_tag_chars).leaveWhitespace()
func_word = _pp.Word(func_tag_chars).leaveWhitespace()
id_word = _pp.Word(_pp.nums).leaveWhitespace()
# the different tag types
special_tag = special_word.setResultsName('tag')
syn_tag = syn_word.setResultsName('tag')
func_tags = _pp.ZeroOrMore(supp_sep + func_word)
func_tags = func_tags.setResultsName('funcs')
id_tag = _pp.Optional(supp_sep + id_word).setResultsName('id')
tags = special_tag | (syn_tag + func_tags + id_tag)
def get_tag(orig_string, tokens_start, tokens):
tokens = dict(tokens)
tag = tokens.pop('tag')
if tag == '-NONE-':
tag = None
functions = list(tokens.pop('funcs', []))
id = tokens.pop('id', None)
return [dict(tag=tag, functions=functions, id=id)]
tags.setParseAction(get_tag)
# node parentheses
start = _pp.Literal('(').suppress()
end = _pp.Literal(')').suppress()
# words
word = _pp.Word(word_chars).setResultsName('word')
# leaf nodes
leaf_node = tags + _pp.Optional(word)
def get_leaf_node(orig_string, tokens_start, tokens):
try:
tag_dict, word = tokens
word = cls._unescape(word)
except ValueError:
tag_dict, = tokens
word = None
return cls(word=word, **tag_dict)
leaf_node.setParseAction(get_leaf_node)
# node, recursive
node = _pp.Forward()
# branch nodes
branch_node = tags + _pp.OneOrMore(node)
def get_branch_node(orig_string, tokens_start, tokens):
return cls(children=tokens[1:], **tokens[0])
branch_node.setParseAction(get_branch_node)
# node, recursive
node << start + (branch_node | leaf_node) + end
# root node may have additional parentheses
cls._root_node = node | start + node + end
cls._root_nodes = _pp.OneOrMore(cls._root_node)
More information about the Python-list
mailing list