[pyparsing] make sure entire string was parsed

Steven Bethard steven.bethard at gmail.com
Sat Sep 10 19:46:40 EDT 2005


How do I make sure that my entire string was parsed when I call a 
pyparsing element's parseString method?  Here's a dramatically 
simplified version of my problem:

py> import pyparsing as pp
py> match = pp.Word(pp.nums)
py> def parse_num(s, loc, toks):
...     n, = toks
...     return int(n) + 10
...
py> match.setParseAction(parse_num)
W:(0123...)
py> match.parseString('121abc')
([131], {})

I want to know (somehow) that when I called match.parseString(), there 
was some of the string left over (in this case, 'abc') after the parse 
was complete.  How can I do this?  (I don't think I can do character 
counting; all my internal setParseAction() functions return non-strings).

STeVe

P.S.  FWIW, I've included the real code below.  I need to throw an 
exception when I call the parseString method of cls._root_node or 
cls._root_nodes and the entire string is not consumed.

----------------------------------------------------------------------
# some character classes
printables_trans = _pp.printables.translate
word_chars = printables_trans(_id_trans, '()')
syn_tag_chars = printables_trans(_id_trans, '()-=')
func_tag_chars = printables_trans(_id_trans, '()-=0123456789')

# basic tag components
sep = _pp.Literal('-').leaveWhitespace()
alt_sep = _pp.Literal('=').leaveWhitespace()
special_word = _pp.Combine(sep + _pp.Word(syn_tag_chars) + sep)
supp_sep = (alt_sep | sep).suppress()
syn_word = _pp.Word(syn_tag_chars).leaveWhitespace()
func_word = _pp.Word(func_tag_chars).leaveWhitespace()
id_word = _pp.Word(_pp.nums).leaveWhitespace()

# the different tag types
special_tag = special_word.setResultsName('tag')
syn_tag = syn_word.setResultsName('tag')
func_tags = _pp.ZeroOrMore(supp_sep + func_word)
func_tags = func_tags.setResultsName('funcs')
id_tag = _pp.Optional(supp_sep + id_word).setResultsName('id')
tags = special_tag | (syn_tag + func_tags + id_tag)
def get_tag(orig_string, tokens_start, tokens):
     tokens = dict(tokens)
     tag = tokens.pop('tag')
     if tag == '-NONE-':
         tag = None
     functions = list(tokens.pop('funcs', []))
     id = tokens.pop('id', None)
     return [dict(tag=tag, functions=functions, id=id)]
tags.setParseAction(get_tag)

# node parentheses
start = _pp.Literal('(').suppress()
end = _pp.Literal(')').suppress()

# words
word = _pp.Word(word_chars).setResultsName('word')

# leaf nodes
leaf_node = tags + _pp.Optional(word)
def get_leaf_node(orig_string, tokens_start, tokens):
     try:
         tag_dict, word = tokens
         word = cls._unescape(word)
     except ValueError:
         tag_dict, = tokens
         word = None
     return cls(word=word, **tag_dict)
leaf_node.setParseAction(get_leaf_node)

# node, recursive
node = _pp.Forward()

# branch nodes
branch_node = tags + _pp.OneOrMore(node)
def get_branch_node(orig_string, tokens_start, tokens):
     return cls(children=tokens[1:], **tokens[0])
branch_node.setParseAction(get_branch_node)

# node, recursive
node << start + (branch_node | leaf_node) + end

# root node may have additional parentheses
cls._root_node = node | start + node + end
cls._root_nodes = _pp.OneOrMore(cls._root_node)



More information about the Python-list mailing list