[Python-checkins] r52895 - sandbox/trunk/2to3/fix_has_key.py sandbox/trunk/2to3/pytree.py

guido.van.rossum python-checkins at python.org
Fri Dec 1 21:26:53 CET 2006


Author: guido.van.rossum
Date: Fri Dec  1 21:26:53 2006
New Revision: 52895

Modified:
   sandbox/trunk/2to3/fix_has_key.py
   sandbox/trunk/2to3/pytree.py
Log:
Add a pattern matching capability.  Only one example updated to use it so far.

(It needs more powerful wildcards -- it has the equivalent of  '.'
but not '.*'.)


Modified: sandbox/trunk/2to3/fix_has_key.py
==============================================================================
--- sandbox/trunk/2to3/fix_has_key.py	(original)
+++ sandbox/trunk/2to3/fix_has_key.py	Fri Dec  1 21:26:53 2006
@@ -49,20 +49,23 @@
         visit(child, func)
 
 # Sample nodes
-n_dot = pytree.Leaf(token.DOT, ".")
-n_has_key = pytree.Leaf(token.NAME, "has_key")
-n_trailer_has_key = pytree.Node(syms.trailer, (n_dot, n_has_key))
-n_lpar = pytree.Leaf(token.LPAR, "(")
 n_star = pytree.Leaf(token.STAR, "*")
 n_comma = pytree.Leaf(token.COMMA, ",")
 n_in = pytree.Leaf(token.NAME, "in", context=(" ", (0, 0)))
 
-import pdb
+# Tree matching patterns
+p_has_key = pytree.NodePattern(syms.trailer,
+                               (pytree.LeafPattern(token.DOT),
+                                pytree.LeafPattern(token.NAME, "has_key")))
+p_trailer_args = pytree.NodePattern(syms.trailer,
+                                    (pytree.LeafPattern(token.LPAR),
+                                     pytree.NodePattern(name="args"),
+                                     pytree.LeafPattern(token.RPAR)))
+
 
 def fix_has_key(node):
-    if node != n_trailer_has_key:
+    if not p_has_key.match(node):
         return
-    # XXX Could use more DOM manipulation primitives and matching operations
     parent = node.parent
     nodes = parent.children
     for i, n in enumerate(nodes):
@@ -76,14 +79,10 @@
     if len(nodes) != i+2:
         return # Too much follows ".has_key", e.g. ".has_key(x).blah"
     next = nodes[i+1]
-    if next.type != syms.trailer:
-        return # ".has_key" not followed by another trailer
-    next_children = next.children
-    if next_children[0] != n_lpar:
-        return # ".has_key" not followed by "(...)"
-    if len(next_children) != 3:
-        return # ".has_key" followed by "()"
-    argsnode = next_children[1]
+    results = {}
+    if not p_trailer_args.match(next, results):
+        return
+    argsnode = results["args"]
     arg = argsnode
     if argsnode.type == syms.arglist:
         args = argsnode.children

Modified: sandbox/trunk/2to3/pytree.py
==============================================================================
--- sandbox/trunk/2to3/pytree.py	(original)
+++ sandbox/trunk/2to3/pytree.py	Fri Dec  1 21:26:53 2006
@@ -6,7 +6,7 @@
 This is a very concrete parse tree; we need to keep every token and
 even the comments and whitespace between tokens.
 
-A node may be a subnode of at most one parent.
+There's also a matching pattern implementation here.
 """
 
 __author__ = "Guido van Rossum <guido at python.org>"
@@ -18,6 +18,8 @@
 
     This provides some default functionality and boilerplate using the
     template pattern.
+
+    A node may be a subnode of at most one parent.
     """
 
     # Default values for instance variables
@@ -214,3 +216,124 @@
         return Node(type, children, context=context)
     else:
         return Leaf(type, value, context=context)
+
+
+class BasePattern(object):
+
+    """A pattern is a tree matching pattern.
+
+    It looks for a specific node type (token or symbol), and
+    optionally for a specific content.
+    """
+
+    # Defaults for instance variables
+    type = None     # Node type (token if < 256, symbol if >= 256)
+    content = None  # Optional content matching pattern
+    name = None     # Optional name used to store match in results dict
+
+    def __new__(cls, *args, **kwds):
+        """Constructor that prevents BasePattern from being instantiated."""
+        assert cls is not BasePattern, "Cannot instantiate BasePattern"
+        return object.__new__(cls, *args, **kwds)
+
+    def match(self, node, results=None):
+        """Does that node match this pattern?
+
+        Returns True if it matches, False if not.
+
+        If results is not None, it must be a dict which will be
+        updated with the nodes matching named subpatterns.
+        """
+        if self.type is not None and node.type != self.type:
+            return False
+        if self.content is not None:
+            r = None
+            if results is not None:
+                r = {}
+            if not self._submatch(node, r):
+                return False
+            if r:
+                results.update(r)
+        if results is not None and self.name is not None:
+            results[self.name] = node
+        return True
+
+
+class NodePattern(BasePattern):
+
+    def __init__(self, type=None, content=None, name=None):
+        """Constructor.  Takes optional type, content, and name.
+
+        The type, if given, must be a symbol type (>= 256).
+
+        The content, if given, must be a sequence of Patterns that
+        must match the node's children exactly.
+
+        If a name is given, the matching node is stored in the results
+        dict under that key.
+        """
+        if type is not None:
+            assert type >= 256, type
+        else:
+            assert content is None, repr(content)
+        if content is not None:
+            assert not isinstance(content, basestring), repr(content)
+            content = tuple(content)
+            for i, item in enumerate(content):
+                assert isinstance(item, BasePattern), (i, item)
+        self.type = type
+        self.content = content
+        self.name = name
+
+    def _submatch(self, node, results=None):
+        """Match the pattern's content to the node's children.
+
+        This assumes the node type matches and self.content is not None.
+
+        Returns True if it matches, False if not.
+
+        If results is not None, it must be a dict which will be
+        updated with the nodes matching named subpatterns.
+
+        When returning False, the results dict may still be updated.
+        """
+        if len(self.content) != len(node.children):
+            return False
+        for subpattern, child in zip(self.content, node.children):
+            if not subpattern.match(child, results):
+                return False
+        return True
+
+
+class LeafPattern(BasePattern):
+
+    def __init__(self, type, content=None, name=None):
+        """Constructor.  Takes a type, optional content, and optional name.
+
+        The type must be a token type (< 256).
+
+        The content, if given, must be a string.
+
+        If a name is given, the matching node is stored in the results
+        dict under that key.
+        """
+        assert type < 256, type
+        if content is not None:
+            assert isinstance(content, basestring), repr(content)
+        self.type = type
+        self.content = content
+        self.name = name
+
+    def _submatch(self, node, results=None):
+        """Match the pattern's content to the node's children.
+
+        This assumes the node type matches and self.content is not None.
+
+        Returns True if it matches, False if not.
+
+        If results is not None, it must be a dict which will be
+        updated with the nodes matching named subpatterns.
+
+        When returning False, the results dict may still be updated.
+        """
+        return self.content == node.value


More information about the Python-checkins mailing list