[Python-checkins] r84375 - in sandbox/trunk/2to3/lib2to3: btm_matcher.py btm_utils.py fixer_base.py pytree.py refactor.py tests/test_pytree.py

george.boutsioukis python-checkins at python.org
Tue Aug 31 15:38:53 CEST 2010


Author: george.boutsioukis
Date: Tue Aug 31 15:38:53 2010
New Revision: 84375

Log:
Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code.



Modified:
   sandbox/trunk/2to3/lib2to3/btm_matcher.py
   sandbox/trunk/2to3/lib2to3/btm_utils.py
   sandbox/trunk/2to3/lib2to3/fixer_base.py
   sandbox/trunk/2to3/lib2to3/pytree.py
   sandbox/trunk/2to3/lib2to3/refactor.py
   sandbox/trunk/2to3/lib2to3/tests/test_pytree.py

Modified: sandbox/trunk/2to3/lib2to3/btm_matcher.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/btm_matcher.py	(original)
+++ sandbox/trunk/2to3/lib2to3/btm_matcher.py	Tue Aug 31 15:38:53 2010
@@ -8,23 +8,21 @@
 __author__ = "George Boutsioukis <gboutsioukis at gmail.com>"
 
 import logging
-from .btm_utils import *
+import itertools
+import pytree
+from collections import defaultdict
+
+from .btm_utils import reduce_tree
 
 class BMNode(object):
     """Class for a node of the Aho-Corasick automaton used in matching"""
-    last_id = 0
+    count = itertools.count()
     def __init__(self):
         self.transition_table = {}
         self.fixers = []
-        self.id = BMNode.new_id()
+        self.id = next(BMNode.count)
         self.content = ''
 
-    @classmethod
-    def new_id(cls):
-        new_id = cls.last_id
-        cls.last_id += 1
-        return new_id
-    
 class BottomMatcher(object):
     """The main matcher class. After instantiating the patterns should
     be added using the add_fixer method"""
@@ -54,7 +52,7 @@
         if not pattern:
             #print("empty pattern")
             return [start]
-        if type(pattern[0]) is tuple:
+        if isinstance(pattern[0], tuple):
             #alternatives
             #print("alternatives")
             match_nodes = []
@@ -67,20 +65,20 @@
             return match_nodes
         else:
             #single token
-                #not last
-                if pattern[0] not in start.transition_table.keys():
-                    #transition did not exist, create new
-                    next_node = BMNode()
-                    start.transition_table[pattern[0]] = next_node
-                else:
-                    #transition exists already, follow
-                    next_node = start.transition_table[pattern[0]]
-                    
-                if pattern[1:]:
-                    end_nodes = self.add(pattern[1:], start=next_node)
-                else:
-                    end_nodes = [next_node]
-                return end_nodes
+            #not last
+            if pattern[0] not in start.transition_table:
+                #transition did not exist, create new
+                next_node = BMNode()
+                start.transition_table[pattern[0]] = next_node
+            else:
+                #transition exists already, follow
+                next_node = start.transition_table[pattern[0]]
+
+            if pattern[1:]:
+                end_nodes = self.add(pattern[1:], start=next_node)
+            else:
+                end_nodes = [next_node]
+            return end_nodes
 
     def run(self, leaves):
         """The main interface with the bottom matcher. The tree is
@@ -99,14 +97,14 @@
            A dictionary of node matches with fixers as the keys
         """
         current_ac_node = self.root
-        results = {}
+        results = defaultdict(list)
         for leaf in leaves:
             current_ast_node = leaf
-            while(current_ast_node):
+            while current_ast_node:
                 current_ast_node.was_checked = True
                 for child in current_ast_node.children:
                     # multiple statements, recheck
-                    if hasattr(child, "value") and child.value==';':
+                    if isinstance(child, pytree.Leaf) and child.value == u";":
                         current_ast_node.was_checked = False
                         break
                 if current_ast_node.type == 1:
@@ -115,24 +113,24 @@
                 else:
                     node_token = current_ast_node.type
 
-                if node_token in current_ac_node.transition_table.keys():
+                if node_token in current_ac_node.transition_table:
                     #token matches
                     current_ac_node = current_ac_node.transition_table[node_token]
                     for fixer in current_ac_node.fixers:
-                        if not fixer in results.keys():
+                        if not fixer in results:
                             results[fixer] = []
                         results[fixer].append(current_ast_node)
 
                 else:
                     #matching failed, reset automaton
                     current_ac_node = self.root
-                    if current_ast_node.parent is not None \
-                           and current_ast_node.parent.was_checked:
+                    if (current_ast_node.parent is not None
+                        and current_ast_node.parent.was_checked):
                         #the rest of the tree upwards has been checked, next leaf
                         break
 
                     #recheck the rejected node once from the root
-                    if node_token in current_ac_node.transition_table.keys():
+                    if node_token in current_ac_node.transition_table:
                         #token matches
                         current_ac_node = current_ac_node.transition_table[node_token]
                         for fixer in current_ac_node.fixers:

Modified: sandbox/trunk/2to3/lib2to3/btm_utils.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/btm_utils.py	(original)
+++ sandbox/trunk/2to3/lib2to3/btm_utils.py	Tue Aug 31 15:38:53 2010
@@ -4,10 +4,10 @@
 from .pgen2 import grammar, token
 from .pygram import pattern_symbols, python_symbols
 
-syms = pattern_symbols.__dict__
-pysyms = python_symbols.__dict__
+syms = pattern_symbols
+pysyms = python_symbols
 tokens = grammar.opmap
-token_labels = token.__dict__
+token_labels = token
 
 TYPE_ANY = -1
 TYPE_ALTERNATIVES = -2
@@ -18,19 +18,17 @@
     pattern tree during the conversion to sets of leaf-to-root
     subpatterns"""
     
-    def __init__(self, type=None, name=None, times=1):
+    def __init__(self, type=None, name=None):
         self.type = type
         self.name = name
-        self.times = times
         self.children = []
         self.leaf = False
         self.parent = None
         self.alternatives = []
-        self.alternatives_number = None
         self.group = []
 
     def __repr__(self):
-        return str(self.type) + ' ' + str(self.name) + ' ' + str(self.times)
+        return str(self.type) + ' ' + str(self.name)
 
     def leaf_to_root(self):
         """Internal method. Returns a characteristic path of the
@@ -65,7 +63,7 @@
                     subp = None
                     break
 
-            if node.type == token_labels['NAME'] and node.name:
+            if node.type == token_labels.NAME and node.name:
                 #in case of type=name, use the name instead
                 subp.append(node.name)
             else:
@@ -113,17 +111,16 @@
     
     new_node = None
     #switch on the node type
-    if node.type == syms['Matcher']:
-        
+    if node.type == syms.Matcher:
         #skip
-        new_node = reduce_tree(node.children[0])
+        node = node.children[0]
 
-    elif node.type == syms['Alternatives']:
+    if node.type == syms.Alternatives  :
         #2 cases
-        if len(node.children)<=2:
+        if len(node.children) <= 2:
             #just a single 'Alternative', skip this node
             new_node = reduce_tree(node.children[0], parent)
-        elif len(node.children)>2:
+        else:
             #real alternatives
             new_node = MinNode(type=TYPE_ALTERNATIVES)
             #skip odd children('|' tokens)
@@ -133,10 +130,8 @@
                 reduced = reduce_tree(child, new_node)
                 if reduced is not None:
                     new_node.children.append(reduced)
-        else:
-            raise Exception
-    elif node.type == syms['Alternative']:
-        if len(node.children)>1:
+    elif node.type == syms.Alternative:
+        if len(node.children) > 1:
             
             new_node = MinNode(type=TYPE_GROUP)
             for child in node.children:
@@ -150,17 +145,17 @@
         else:
             new_node = reduce_tree(node.children[0], parent)
 
-    elif node.type == syms['Unit']:
-        if hasattr(node.children[0], "value") and \
-               node.children[0].value == '(':
+    elif node.type == syms.Unit:
+        if (isinstance(node.children[0], pytree.Leaf) and
+            node.children[0].value == '('):
             #skip parentheses
             return reduce_tree(node.children[1], parent)
-        if (hasattr(node.children[0], "value") and \
-               node.children[0].value == '[') \
-               or \
-               (len(node.children)>1 and \
-               hasattr(node.children[1], "value") and \
-               node.children[1].value == '['):
+        if ((isinstance(node.children[0], pytree.Leaf) and
+               node.children[0].value == '[')
+               or
+               (len(node.children)>1 and
+               hasattr(node.children[1], "value") and
+               node.children[1].value == '[')):
             #skip whole unit if its optional
             return None
             
@@ -172,13 +167,13 @@
         has_variable_name = False
 
         for child in node.children:
-            if child.type == syms['Details']:
+            if child.type == syms.Details:
                 leaf = False
                 details_node = child
-            elif child.type == syms['Repeater']:
+            elif child.type == syms.Repeater:
                 has_repeater = True
                 repeater_node = child
-            elif child.type == syms['Alternatives']:
+            elif child.type == syms.Alternatives:
                 alternatives_node = child
             if hasattr(child, 'value') and child.value == '=': # variable name
                 has_variable_name = True
@@ -194,25 +189,25 @@
             name_leaf = node.children[0]
 
         #set node type
-        if name_leaf.type == token_labels['NAME']:
+        if name_leaf.type == token_labels.NAME:
             #(python) non-name or wildcard
             if name_leaf.value == 'any':
                 new_node = MinNode(type=TYPE_ANY)
             else:
-                if name_leaf.value in token_labels:
-                    new_node = MinNode(type=token_labels[name_leaf.value])
+                if hasattr(token_labels, name_leaf.value):
+                    new_node = MinNode(type=getattr(token_labels, name_leaf.value))
                 else:
-                    new_node = MinNode(type=pysyms[name_leaf.value])
+                    new_node = MinNode(type=getattr(pysyms, name_leaf.value))
                     
-        elif name_leaf.type == token_labels['STRING']:
+        elif name_leaf.type == token_labels.STRING:
             #(python) name or character; remove the apostrophes from
             #the string value
-            name = name_leaf.value[1:][:-1]
+            name = name_leaf.value.strip("'")
             if name in tokens:
                 new_node = MinNode(type=tokens[name])
             else:
-                new_node = MinNode(type=token_labels['NAME'], name=name)
-        elif name_leaf.type == syms['Alternatives']:
+                new_node = MinNode(type=token_labels.NAME, name=name)
+        elif name_leaf.type == syms.Alternatives:
             new_node = reduce_tree(alternatives_node, parent)
 
         #handle repeaters
@@ -225,11 +220,12 @@
                 pass
             else:
                 #TODO: handle {min, max} repeaters
+                raise NotImplementedError
                 pass
 
         #add children
         if details_node and new_node is not None:
-            for child in details_node.children[1:][:-1]:
+            for child in details_node.children[1:-1]:
                 #skip '<', '>' markers
                 reduced = reduce_tree(child, new_node)
                 if reduced is not None:
@@ -244,9 +240,9 @@
     Current order used is:
     names > common_names > common_chars
     """
-    if type(subpatterns) is not list:
+    if not isinstance(subpatterns, list):
         return subpatterns
-    if type(subpatterns) is list and len(subpatterns)==1:
+    if len(subpatterns)==1:
         return subpatterns[0]
 
     # first pick out the ones containing variable names
@@ -258,10 +254,10 @@
     for subpattern in subpatterns:
         if any(rec_test(subpattern, lambda x: type(x) is str)):
             if any(rec_test(subpattern,
-                            lambda x: type(x) is str and x in common_chars)):
+                            lambda x: isinstance(x, str) and x in common_chars)):
                 subpatterns_with_common_chars.append(subpattern)
             elif any(rec_test(subpattern,
-                              lambda x: type(x) is str and x in common_names)):
+                              lambda x: isinstance(x, str) and x in common_names)):
                 subpatterns_with_common_names.append(subpattern)
                 
             else:
@@ -274,13 +270,13 @@
     elif subpatterns_with_common_chars:
         subpatterns = subpatterns_with_common_chars
     # of the remaining subpatterns pick out the longest one
-    return sorted(subpatterns, key=len, reverse=True)[0]
+    return max(subpatterns, key=len)
 
 def rec_test(sequence, test_func):
     """Tests test_func on all items of sequence and items of included
     sub-iterables"""
     for x in sequence:
-        if type(x) is list or type(x) is tuple:
+        if isinstance(x, (list, tuple)):
             for y in rec_test(x, test_func):
                 yield y
         else:

Modified: sandbox/trunk/2to3/lib2to3/fixer_base.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/fixer_base.py	(original)
+++ sandbox/trunk/2to3/lib2to3/fixer_base.py	Tue Aug 31 15:38:53 2010
@@ -65,8 +65,9 @@
         self.{pattern,PATTERN} in .match().
         """
         if self.PATTERN is not None:
-            self.pattern, self.pattern_tree = \
-                PatternCompiler().compile_pattern(self.PATTERN, with_tree=True)
+            PC = PatternCompiler()
+            self.pattern, self.pattern_tree = PC.compile_pattern(self.PATTERN,
+                                                                 with_tree=True)
 
     def set_filename(self, filename):
         """Set the filename, and a logger derived from it.

Modified: sandbox/trunk/2to3/lib2to3/pytree.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/pytree.py	(original)
+++ sandbox/trunk/2to3/lib2to3/pytree.py	Tue Aug 31 15:38:53 2010
@@ -216,21 +216,12 @@
         for child in self.children:
             for x in child.leaves():
                 yield x
-        if type(self) is Leaf:
-            yield self
-
-    def to_root(self):
-        yield self
-        if self.parent:
-            for p in self.parent.to_root():
-                yield p
 
     def depth(self):
         if self.parent is None:
             return 0
         return 1 + self.parent.depth()
             
-
     def get_suffix(self):
         """
         Return the string immediately following the invocant node. This is
@@ -252,7 +243,7 @@
     def __init__(self,type, children,
                  context=None,
                  prefix=None,
-                 fixers_applied=[]):
+                 fixers_applied=None):
         """
         Initializer.
 
@@ -269,7 +260,10 @@
             ch.parent = self
         if prefix is not None:
             self.prefix = prefix
-        self.fixers_applied = fixers_applied[:]
+        if fixers_applied:
+            self.fixers_applied = fixers_applied[:]
+        else:
+            self.fixers_applied = None
     
     def __repr__(self):
         """Return a canonical string representation."""
@@ -308,7 +302,7 @@
         """Return a pre-order iterator for the tree."""
         yield self
         for child in self.children:
-            for node in child.post_order():
+            for node in child.pre_order():
                 yield node
 
     def _prefix_getter(self):
@@ -409,6 +403,9 @@
                     (self.prefix, (self.lineno, self.column)),
                     fixers_applied=self.fixers_applied)
 
+    def leaves(self):
+        yield self
+
     def post_order(self):
         """Return a post-order iterator for the tree."""
         yield self

Modified: sandbox/trunk/2to3/lib2to3/refactor.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/refactor.py	(original)
+++ sandbox/trunk/2to3/lib2to3/refactor.py	Tue Aug 31 15:38:53 2010
@@ -422,9 +422,9 @@
         # obtain a set of candidate nodes
         match_set = self.BM.run(tree.leaves())
 
-        while any(list(match_set.values())):
+        while any(match_set.values()):
             for fixer in self.BM.fixers:
-                if fixer in match_set.keys() and match_set[fixer]:
+                if fixer in match_set and match_set[fixer]:
                     #sort by depth; apply fixers from bottom(of the AST) to top
                     match_set[fixer].sort(key=pytree.Base.depth, reverse=True)
 
@@ -444,7 +444,7 @@
                             # previous transformation ; skip
                             continue
                         
-                        if fixer in node.fixers_applied:
+                        if node.fixers_applied and fixer in node.fixers_applied:
                             # do not apply the same fixer again
                             continue
 
@@ -458,14 +458,17 @@
                                 for node in new.post_order():
                                     # do not apply the fixer again to
                                     # this or any subnode
+                                    if not node.fixers_applied:
+                                        node.fixers_applied = []
                                     node.fixers_applied.append(fixer)
 
                                 # update the original match set for
                                 # the added code
                                 new_matches = self.BM.run(new.leaves())
-                                for fxr in new_matches.keys():
-                                    if not fxr in list(match_set.keys()):
+                                for fxr in new_matches:
+                                    if not fxr in match_set:
                                         match_set[fxr]=[]
+                                        
                                     match_set[fxr].extend(new_matches[fxr])
                                     
         for fixer in chain(self.pre_order, self.post_order):

Modified: sandbox/trunk/2to3/lib2to3/tests/test_pytree.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/tests/test_pytree.py	(original)
+++ sandbox/trunk/2to3/lib2to3/tests/test_pytree.py	Tue Aug 31 15:38:53 2010
@@ -178,6 +178,27 @@
         self.assertEqual(str(n1), "foo**bar")
         self.assertTrue(isinstance(n1.children, list))
 
+    def test_leaves(self):
+        l1 = pytree.Leaf(100, "foo")
+        l2 = pytree.Leaf(100, "bar")
+        l3 = pytree.Leaf(100, "fooey")
+        n2 = pytree.Node(1000, [l1, l2])
+        n3 = pytree.Node(1000, [l3])
+        n1 = pytree.Node(1000, [n2, n3])
+
+        self.assertEqual(list(n1.leaves()), [l1, l2, l3])
+
+    def test_depth(self):
+        l1 = pytree.Leaf(100, "foo")
+        l2 = pytree.Leaf(100, "bar")
+        n2 = pytree.Node(1000, [l1, l2])
+        n3 = pytree.Node(1000, [])
+        n1 = pytree.Node(1000, [n2, n3])
+
+        self.assertEqual(l1.depth(), 2)
+        self.assertEqual(n3.depth(), 1)
+        self.assertEqual(n1.depth(), 0)
+        
     def test_post_order(self):
         l1 = pytree.Leaf(100, "foo")
         l2 = pytree.Leaf(100, "bar")


More information about the Python-checkins mailing list