Regex for nested {}

Diez B.Roggisch deets at
Thu Jul 28 12:37:52 EDT 2005

Chris <c <at>> writes:
> is something like that possible?

No. Not with "pure" regexes. The reason is that the theory behind them doesn't
allow to detect syntactic 
constructs like a**nn**n, with a={ and b=} in your case.
 What you need is a "real" parser - usually one 
uses regexes to split the string into so-called tokens. The either a handwritten 

parser or a parser-generator is used.

The following is a simple implementation based on the spark parser generator:

import spark

class ParsingError:
    def __init__(self, msg = None):
        self.msg = msg
    def __repr__(self):
        return "Parsing Error: %s" % self.msg
class Token:
    def __init__(self, type, attr=None):
        self.type = type
        self.attr = attr

    def __cmp__(self, o):
        return cmp(self.type, o)

    def __repr__(self):
        return self.attr or self.type        

class Scanner(spark.GenericScanner):
    def __init__(_):

    def error(self, s, pos):
        raise ParsingError("Lexical error at %i" % pos)
    def tokenize(self, input):
        self.rv = []
        spark.GenericScanner.tokenize(self, input)
        return self.rv
    def t_whitespace(self, s):
        r' \s+ '

    def t_word(self, s):
        t = Token(type='word', attr=s)

    def t_parentheses(self, s):
        r' \{| \}'        
        t = Token(type=s)

    def t_comment(self, s):
    def t_default(self, s):

class Parser(spark.GenericParser):
    def __init__(self, start='expr'):
        spark.GenericParser.__init__(self, start)

    def p_expr_1(self, args):
        expr ::= word block expr
	return [(args[0], args[1])] + args[2]

    def p_expr_2(self, args):
	expr ::=
	return []

    def p_block(self, args):
        block ::= { expr }
        return args[1]

    def error(self, token):
        raise ParsingError("Syntax error at or near `%s' token" % token)

def scan(input):
    scanner = Scanner()
	return scanner.tokenize(input)
    except ParsingError,e:
	print e

def parse(tokens):
    return Parser().parse(tokens)

text = """
        outer {
                inner1 {  }
                inner2 {  }
        simple {  }

def main():
    tokens = scan(text)
    print parse(tokens)

if __name__ == "__main__":

More information about the Python-list mailing list