[Python-checkins] r52919 - sandbox/trunk/2to3/fix_apply.py sandbox/trunk/2to3/fix_has_key.py sandbox/trunk/2to3/fix_print.py sandbox/trunk/2to3/pytree.py sandbox/trunk/2to3/tests.py
guido.van.rossum
python-checkins at python.org
Tue Dec 5 18:54:06 CET 2006
Author: guido.van.rossum
Date: Tue Dec 5 18:54:04 2006
New Revision: 52919
Added:
sandbox/trunk/2to3/tests.py (contents, props changed)
Modified:
sandbox/trunk/2to3/fix_apply.py
sandbox/trunk/2to3/fix_has_key.py
sandbox/trunk/2to3/fix_print.py
sandbox/trunk/2to3/pytree.py
Log:
Much better wildcard pattern matching.
And some unit tests.
Modified: sandbox/trunk/2to3/fix_apply.py
==============================================================================
--- sandbox/trunk/2to3/fix_apply.py (original)
+++ sandbox/trunk/2to3/fix_apply.py Tue Dec 5 18:54:04 2006
@@ -96,6 +96,9 @@
pytree.Leaf(token.DOUBLESTAR, "**"),
l_args[2]])
l_newargs[-2].set_prefix(" ")
+ for n in l_newargs:
+ if n.parent is not None:
+ n.replace(None) # Force parent to None
n_arglist.replace(pytree.Node(syms.arglist, l_newargs))
# XXX Sometimes we can be cleverer, e.g. apply(f, (x, y) + t)
# can be translated into f(x, y, *t) instead of f(*(x, y) + t)
Modified: sandbox/trunk/2to3/fix_has_key.py
==============================================================================
--- sandbox/trunk/2to3/fix_has_key.py (original)
+++ sandbox/trunk/2to3/fix_has_key.py Tue Dec 5 18:54:04 2006
@@ -51,7 +51,6 @@
# Sample nodes
n_star = pytree.Leaf(token.STAR, "*")
n_comma = pytree.Leaf(token.COMMA, ",")
-n_in = pytree.Leaf(token.NAME, "in", context=(" ", (0, 0)))
# Tree matching patterns
p_has_key = pytree.NodePattern(syms.trailer,
@@ -100,6 +99,12 @@
# Change "X.has_key(Y)" into "Y in X"
arg.set_prefix(nodes[0].get_prefix())
nodes[0].set_prefix(" ")
+ if arg.parent is not None:
+ arg.replace(None)
+ n_in = pytree.Leaf(token.NAME, "in", context=(" ", (0, 0)))
+ for node in nodes[:i]:
+ if node.parent is not None:
+ node.replace(None)
new = pytree.Node(syms.comparison,
(arg, n_in, pytree.Node(syms.power, nodes[:i])))
# XXX Sometimes we need to parenthesize arg or new. Later.
Modified: sandbox/trunk/2to3/fix_print.py
==============================================================================
--- sandbox/trunk/2to3/fix_print.py (original)
+++ sandbox/trunk/2to3/fix_print.py Tue Dec 5 18:54:04 2006
@@ -97,6 +97,9 @@
if file is not None:
add_kwarg(l_args, "file", file)
if l_args:
+ for n in l_args:
+ if n.parent is not None:
+ n.replace(None) # Force parent to None
n_arglist = pytree.Node(syms.arglist, l_args)
else:
n_arglist = None
@@ -112,6 +115,8 @@
def add_kwarg(l_nodes, s_kwd, n_expr):
# XXX All this prefix-setting may lose comments (though rarely)
n_expr.set_prefix("")
+ if n_expr.parent is not None:
+ n_expr.replace(None) # Force parent to None
n_argument = pytree.Node(syms.argument,
(pytree.Leaf(token.NAME, s_kwd),
pytree.Leaf(token.EQUAL, "="),
Modified: sandbox/trunk/2to3/pytree.py
==============================================================================
--- sandbox/trunk/2to3/pytree.py (original)
+++ sandbox/trunk/2to3/pytree.py Tue Dec 5 18:54:04 2006
@@ -118,6 +118,7 @@
self.type = type
self.children = tuple(children)
for ch in self.children:
+ assert ch.parent is None, str(ch)
ch.parent = self
def __repr__(self):
@@ -226,6 +227,13 @@
It looks for a specific node type (token or symbol), and
optionally for a specific content.
+
+ This is an abstract base class. There are three concrete
+ subclasses:
+
+ - LeafPattern matches a single leaf node;
+ - NodePattern matches a single node (usually non-leaf);
+ - WildcardPattern matches a sequence of nodes of variable length.
"""
# Defaults for instance variables
@@ -238,13 +246,21 @@
assert cls is not BasePattern, "Cannot instantiate BasePattern"
return object.__new__(cls, *args, **kwds)
+ def __repr__(self):
+ return "%s(%s)" % (self.__class__.__name__,
+ ", ".join("%s=%r" % (x, getattr(self, x))
+ for x in ("type", "content", "name")
+ if getattr(self, x) is not None))
+
def match(self, node, results=None):
- """Does that node match this pattern?
+ """Does this pattern exactly match a node?
Returns True if it matches, False if not.
If results is not None, it must be a dict which will be
updated with the nodes matching named subpatterns.
+
+ Default implementation for non-wildcard patterns.
"""
if self.type is not None and node.type != self.type:
return False
@@ -256,41 +272,56 @@
return False
if r:
results.update(r)
- if results is not None and self.name is not None:
+ if results is not None and self.name:
results[self.name] = node
return True
+ def match_seq(self, nodes, results=None):
+ """Does this pattern exactly match a sequence of nodes?
-class NodePattern(BasePattern):
+ Default implementation for non-wildcard patterns.
+ """
+ if len(nodes) != 1:
+ return False
+ return self.match(nodes[0], results)
+
+ def generate_matches(self, nodes):
+ """Generator yielding all matches for this pattern.
+
+ Default implementation for non-wildcard patterns.
+ """
+ r = {}
+ if nodes and self.match(nodes[0], r):
+ yield 1, r
- wildcards = False
+
+class LeafPattern(BasePattern):
def __init__(self, type=None, content=None, name=None):
- """Constructor. Takes optional type, content, and name.
+ """Initializer. Takes optional type, content, and name.
- The type, if given, must be a symbol type (>= 256).
+ The type, if given must be a token type (< 256). If not given,
+ this matches any *leaf* node; the content may still be required.
- The content, if given, must be a sequence of Patterns that
- must match the node's children exactly.
+ The content, if given, must be a string.
If a name is given, the matching node is stored in the results
dict under that key.
"""
if type is not None:
- assert type >= 256, type
- else:
- assert content is None, repr(content)
+ assert 0 <= type < 256, type
if content is not None:
- assert not isinstance(content, basestring), repr(content)
- content = tuple(content)
- for i, item in enumerate(content):
- assert isinstance(item, BasePattern), (i, item)
- if isinstance(item, WildcardPattern):
- self.wildcards = True
+ assert isinstance(content, basestring), repr(content)
self.type = type
self.content = content
self.name = name
+ def match(self, node, results=None):
+ """Override match() to insist on a leaf node."""
+ if not isinstance(node, Leaf):
+ return False
+ return BasePattern.match(self, node, results)
+
def _submatch(self, node, results=None):
"""Match the pattern's content to the node's children.
@@ -303,63 +334,38 @@
When returning False, the results dict may still be updated.
"""
- if self.wildcards:
- return _wcmatch(self.content, node.children, results)
- if len(self.content) != len(node.children):
- return False
- for subpattern, child in zip(self.content, node.children):
- if not subpattern.match(child, results):
- return False
- return True
-
-
-def _wcmatch(patterns, nodes, results):
- if not patterns:
- if nodes:
- return False
- return True
- pat = patterns[0]
- if isinstance(pat, WildcardPattern):
- for i in xrange(pat.min):
- if i >= len(nodes) or not pat.match(nodes[i], None):
- return False
- for i in xrange(pat.min, pat.max):
- # Shortest match wins
- r = {}
- if _wcmatch(patterns[1:], nodes[i:], r):
- if pat.name:
- r[pat.name] = nodes[:i]
- if results is not None:
- results.update(r)
- return True
- return False
- else:
- if not nodes:
- return False
- r = {}
- if pat.match(nodes[0], r):
- if _wcmatch(patterns[1:], nodes[1:], r):
- if results is not None:
- results.update(r)
- return True
- return False
+ return self.content == node.value
-class LeafPattern(BasePattern):
+class NodePattern(BasePattern):
- def __init__(self, type, content=None, name=None):
- """Constructor. Takes a type, optional content, and optional name.
+ wildcards = False
- The type must be a token type (< 256).
+ def __init__(self, type=None, content=None, name=None):
+ """Initializer. Takes optional type, content, and name.
- The content, if given, must be a string.
+ The type, if given, must be a symbol type (>= 256). If the
+ type is None, the content must be None, and this matches *any*
+ single node (leaf or not).
+
+ The content, if not None, must be a sequence of Patterns that
+ must match the node's children exactly. If the content is
+ given, the type must not be None.
If a name is given, the matching node is stored in the results
dict under that key.
"""
- assert type < 256, type
+ if type is not None:
+ assert type >= 256, type
+ else:
+ assert content is None, repr(content)
if content is not None:
- assert isinstance(content, basestring), repr(content)
+ assert not isinstance(content, basestring), repr(content)
+ content = tuple(content)
+ for i, item in enumerate(content):
+ assert isinstance(item, BasePattern), (i, item)
+ if isinstance(item, WildcardPattern):
+ self.wildcards = True
self.type = type
self.content = content
self.name = name
@@ -376,19 +382,138 @@
When returning False, the results dict may still be updated.
"""
- return self.content == node.value
+ if self.wildcards:
+ for c, r in generate_matches(self.content, node.children):
+ if c == len(node.children):
+ if results is not None:
+ results.update(r)
+ return True
+ return False
+ if len(self.content) != len(node.children):
+ return False
+ for subpattern, child in zip(self.content, node.children):
+ if not subpattern.match(child, results):
+ return False
+ return True
-class WildcardPattern(NodePattern):
+class WildcardPattern(BasePattern):
- """A wildcard pattern can match zero or more nodes.
+ """A wildcard pattern can match zero or more nodes."""
- The matching must be done by special-casing in NodePattern._submatch().
- """
+ def __init__(self, content=None, min=0, max=sys.maxint, name=None):
+ """Initializer.
- def __init__(self, type=None, content=None, name=None,
- min=0, max=sys.maxint):
- """Constructor."""
- NodePattern.__init__(self, type=type, content=content, name=name)
+ Args:
+ content: optional sequence of subsequences of patterns;
+ if absent, matches one node;
+ if present, each subsequence is an alternative [*]
+ min: optinal minumum number of times to match, default 0
+ max: optional maximum number of times tro match, default sys.maxint
+ name: optional name assigned to this match
+
+ [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
+ equivalent to (a b c | d e | f g h); if content is None,
+ this is equivalent to '.' in regular expression terms.
+ The min and max parameters work as follows:
+ min=0, max=maxint: .*
+ min=1, max=maxint: .+
+ min=0, max=1: .?
+ min=1, max=1: .
+ If content is not None, replace the dot with the parenthesized
+ list of alternatives, e.g. (a b c | d e | f g h)*
+ """
+ assert 0 <= min <= max <= sys.maxint, (min, max)
+ if content is not None:
+ content = tuple(map(tuple, content)) # Protect against alterations
+ # Check sanity of alternatives
+ assert len(content), repr(content) # Can't have zero alternatives
+ for alt in content:
+ assert len(alt), repr(alt) # Can have empty alternatives
+ self.content = content
self.min = min
self.max = max
+ self.name = name
+
+ def match(self, node, results=None):
+ """Does this pattern exactly match a node?"""
+ return self.match_seq([node], results)
+
+ def match_seq(self, nodes, results=None):
+ """Does this pattern exactly match a sequence of nodes?"""
+ for c, r in self.generate_matches(nodes):
+ if c == len(nodes):
+ if results is not None:
+ results.update(r)
+ if self.name:
+ results[self.name] = tuple(nodes)
+ return True
+ return False
+
+ def generate_matches(self, nodes):
+ """Generator yielding matches for a sequence of nodes.
+
+ Args:
+ nodes: sequence of nodes
+
+ Yields:
+ (count, results) tuples where:
+ count: the match comprises nodes[:count];
+ results: dict containing named submatches.
+ """
+ if self.content is None:
+ # Shortcut for special case (see __init__.__doc__)
+ for count in xrange(self.min, 1 + min(len(nodes), self.max)):
+ r = {}
+ if self.name:
+ r[self.name] = nodes[:count]
+ yield count, r
+ else:
+ for count, r in self._recursive_matches(nodes, 0):
+ if self.name:
+ r[self.name] = nodes[:count]
+ yield count, r
+
+ def _recursive_matches(self, nodes, count):
+ """Helper to recursively yield the matches."""
+ assert self.content is not None
+ if count >= self.min:
+ r = {}
+ if self.name:
+ r[self.name] = nodes[:0]
+ yield 0, r
+ if count < self.max:
+ for alt in self.content:
+ for c0, r0 in generate_matches(alt, nodes):
+ for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
+ r = {}
+ r.update(r0)
+ r.update(r1)
+ yield c0 + c1, r
+
+
+def generate_matches(patterns, nodes):
+ """Generator yielding matches for a sequence of patterns and nodes.
+
+ Args:
+ patterns: a sequence of patterns
+ nodes: a sequence of nodes
+
+ Yields:
+ (count, results) tuples where:
+ count: the entire sequence of patterns matches nodes[:count];
+ results: dict containing named submatches.
+ """
+ if not patterns:
+ yield 0, {}
+ else:
+ p, rest = patterns[0], patterns[1:]
+ for c0, r0 in p.generate_matches(nodes):
+ if not rest:
+ yield c0, r0
+ else:
+ for c1, r1 in generate_matches(rest, nodes[c0:]):
+ r = {}
+ r.update(r0)
+ r.update(r1)
+ yield c0 + c1, r
Added: sandbox/trunk/2to3/tests.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/tests.py Tue Dec 5 18:54:04 2006
@@ -0,0 +1,237 @@
+#!/usr/bin/env python2.5
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Unit tests for pytree.py.
+
+NOTE: Please *don't* add doc strings to individual test methods!
+In verbose mode, printing of the module, class and method name is much
+more helpful than printing of (the first line of) the docstring,
+especially when debugging a test.
+"""
+
+# Python imports
+import unittest
+
+# Local imports (XXX should become a package)
+import pytree
+
+
+class TestNodes(unittest.TestCase):
+
+ """Unit tests for nodes (Base, Leaf, Node)."""
+
+ def testBaseCantConstruct(self):
+ if __debug__:
+ # Test that instantiating Base() raises an AssertionError
+ self.assertRaises(AssertionError, pytree.Base)
+
+ def testLeaf(self):
+ l1 = pytree.Leaf(100, "foo")
+ self.assertEqual(l1.type, 100)
+ self.assertEqual(l1.value, "foo")
+
+ def testLeafRepr(self):
+ l1 = pytree.Leaf(100, "foo")
+ self.assertEqual(repr(l1), "Leaf(100, 'foo')")
+
+ def testLeafStr(self):
+ l1 = pytree.Leaf(100, "foo")
+ self.assertEqual(str(l1), "foo")
+ l2 = pytree.Leaf(100, "foo", context=(" ", (10, 1)))
+ self.assertEqual(str(l2), " foo")
+
+ def testLeafEq(self):
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "foo", context=(" ", (1, 0)))
+ self.assertEqual(l1, l2)
+ l3 = pytree.Leaf(101, "foo")
+ l4 = pytree.Leaf(100, "bar")
+ self.assertNotEqual(l1, l3)
+ self.assertNotEqual(l1, l4)
+
+ def testLeafPrefix(self):
+ l1 = pytree.Leaf(100, "foo")
+ self.assertEqual(l1.get_prefix(), "")
+ l1.set_prefix(" ##\n\n")
+ self.assertEqual(l1.get_prefix(), " ##\n\n")
+
+ def testNode(self):
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(200, "bar")
+ n1 = pytree.Node(1000, [l1, l2])
+ self.assertEqual(n1.type, 1000)
+ self.assertEqual(n1.children, (l1, l2))
+
+ def testNodeRepr(self):
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "bar", context=(" ", (1, 0)))
+ n1 = pytree.Node(1000, [l1, l2])
+ self.assertEqual(repr(n1),
+ "Node(1000, (%s, %s))" % (repr(l1), repr(l2)))
+
+ def testNodeStr(self):
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "bar", context=(" ", (1, 0)))
+ n1 = pytree.Node(1000, [l1, l2])
+ self.assertEqual(str(n1), "foo bar")
+
+ def testNodePrefix(self):
+ l1 = pytree.Leaf(100, "foo")
+ self.assertEqual(l1.get_prefix(), "")
+ n1 = pytree.Node(1000, [l1])
+ self.assertEqual(n1.get_prefix(), "")
+ n1.set_prefix(" ")
+ self.assertEqual(n1.get_prefix(), " ")
+ self.assertEqual(l1.get_prefix(), " ")
+
+ def testNodeEq(self):
+ n1 = pytree.Node(1000, ())
+ n2 = pytree.Node(1000, [], context=(" ", (1, 0)))
+ self.assertEqual(n1, n2)
+ n3 = pytree.Node(1001, ())
+ self.assertNotEqual(n1, n3)
+
+ def testNodeEqRecursive(self):
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "foo")
+ n1 = pytree.Node(1000, [l1])
+ n2 = pytree.Node(1000, [l2])
+ self.assertEqual(n1, n2)
+ l3 = pytree.Leaf(100, "bar")
+ n3 = pytree.Node(1000, [l3])
+ self.assertNotEqual(n1, n3)
+
+ def testReplace(self):
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "+")
+ l3 = pytree.Leaf(100, "bar")
+ n1 = pytree.Node(1000, [l1, l2, l3])
+ self.assertEqual(n1.children, (l1, l2, l3))
+ l2new = pytree.Leaf(100, "-")
+ l2.replace(l2new)
+ self.assertEqual(n1.children, (l1, l2new, l3))
+
+ def testConvert(self):
+ # XXX
+ pass
+
+
+class TestPatterns(unittest.TestCase):
+
+ """Unit tests for tree matching patterns."""
+
+ def testBasicPatterns(self):
+ # Build a tree
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "bar")
+ l3 = pytree.Leaf(100, "foo")
+ n1 = pytree.Node(1000, [l1, l2])
+ n2 = pytree.Node(1000, [l3])
+ root = pytree.Node(1000, [n1, n2])
+ # Build a pattern matching a leaf
+ pl = pytree.LeafPattern(100, "foo", name="pl")
+ r = {}
+ self.assertEqual(pl.match(root, results=r), False)
+ self.assertEqual(r, {})
+ self.assertEqual(pl.match(n1, results=r), False)
+ self.assertEqual(r, {})
+ self.assertEqual(pl.match(n2, results=r), False)
+ self.assertEqual(r, {})
+ self.assertEqual(pl.match(l1, results=r), True)
+ self.assertEqual(r, {"pl": l1})
+ r = {}
+ self.assertEqual(pl.match(l2, results=r), False)
+ self.assertEqual(r, {})
+ # Build a pattern matching a node
+ pn = pytree.NodePattern(1000, [pl], name="pn")
+ self.assertEqual(pn.match(root, results=r), False)
+ self.assertEqual(r, {})
+ self.assertEqual(pn.match(n1, results=r), False)
+ self.assertEqual(r, {})
+ self.assertEqual(pn.match(n2, results=r), True)
+ self.assertEqual(r, {"pn": n2, "pl": l3})
+ r = {}
+ self.assertEqual(pn.match(l1, results=r), False)
+ self.assertEqual(r, {})
+ self.assertEqual(pn.match(l2, results=r), False)
+ self.assertEqual(r, {})
+
+ def testWildcardPatterns(self):
+ # Build a tree for testing
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "bar")
+ l3 = pytree.Leaf(100, "foo")
+ n1 = pytree.Node(1000, [l1, l2])
+ n2 = pytree.Node(1000, [l3])
+ root = pytree.Node(1000, [n1, n2])
+ # Build a pattern
+ pl = pytree.LeafPattern(100, "foo", name="pl")
+ pn = pytree.NodePattern(1000, [pl], name="pn")
+ pw = pytree.WildcardPattern([[pn], [pl, pl]], name="pw")
+ r = {}
+ self.assertEqual(pw.match_seq([root], r), False)
+ self.assertEqual(r, {})
+ self.assertEqual(pw.match_seq([n1], r), False)
+ self.assertEqual(r, {})
+ self.assertEqual(pw.match_seq([n2], r), True)
+ # These are easier to debug
+ self.assertEqual(sorted(r.keys()), ["pl", "pn", "pw"])
+ self.assertEqual(r["pl"], l1)
+ self.assertEqual(r["pn"], n2)
+ self.assertEqual(r["pw"], (n2,))
+ # But this is equivalent
+ self.assertEqual(r, {"pl": l1, "pn": n2, "pw": (n2,)})
+ r = {}
+ self.assertEqual(pw.match_seq([l1, l3], r), True)
+ self.assertEqual(r, {"pl": l3, "pw": (l1, l3)})
+ self.assert_(r["pl"] is l3)
+ r = {}
+
+ def testGenerateMatches(self):
+ la = pytree.Leaf(1, "a")
+ lb = pytree.Leaf(1, "b")
+ lc = pytree.Leaf(1, "c")
+ ld = pytree.Leaf(1, "d")
+ le = pytree.Leaf(1, "e")
+ lf = pytree.Leaf(1, "f")
+ leaves = [la, lb, lc, ld, le, lf]
+ root = pytree.Node(1000, leaves)
+ pa = pytree.LeafPattern(1, "a", "pa")
+ pb = pytree.LeafPattern(1, "b", "pb")
+ pc = pytree.LeafPattern(1, "c", "pc")
+ pd = pytree.LeafPattern(1, "d", "pd")
+ pe = pytree.LeafPattern(1, "e", "pe")
+ pf = pytree.LeafPattern(1, "f", "pf")
+ pw = pytree.WildcardPattern([[pa, pb, pc], [pd, pe], [pa, pb], [pc, pd], [pe, pf]],
+ min=1, max=4, name="pw")
+ self.assertEqual([x[0] for x in pw.generate_matches(leaves)], [3, 5, 2, 4, 6])
+ pr = pytree.NodePattern(type=1000, content=[pw], name="pr")
+ matches = list(pr.generate_matches([root]))
+ self.assertEqual(len(matches), 1)
+ c, r = matches[0]
+ self.assertEqual(c, 1)
+ self.assertEqual(str(r["pr"]), "abcdef")
+ self.assertEqual(r["pw"], (la, lb, lc, ld, le, lf))
+ for c in "abcdef":
+ self.assertEqual(r["p" + c], pytree.Leaf(1, c))
+
+ def testHasKeyExample(self):
+ pattern = pytree.NodePattern(331,
+ (pytree.LeafPattern(7),
+ pytree.WildcardPattern(name="args"),
+ pytree.LeafPattern(8)))
+ l1 = pytree.Leaf(7, "(")
+ l2 = pytree.Leaf(3, "x")
+ l3 = pytree.Leaf(8, ")")
+ node = pytree.Node(331, (l1, l2, l3))
+ r = {}
+ self.assert_(pattern.match(node, r))
+ self.assertEqual(r["args"], (l2,))
+
+
+if __name__ == "__main__":
+ import sys
+ if not sys.argv[1:]:
+ sys.argv.append("-v")
+ unittest.main()
More information about the Python-checkins
mailing list