[Python-checkins] r52868 - in sandbox/trunk/2to3: README example.py fix_has_key.py pgen2/driver.py pgen2/parse.py pgen2/test.py play.py pynode.py

guido.van.rossum python-checkins at python.org
Thu Nov 30 07:15:55 CET 2006


Author: guido.van.rossum
Date: Thu Nov 30 07:15:54 2006
New Revision: 52868

Added:
   sandbox/trunk/2to3/README   (contents, props changed)
   sandbox/trunk/2to3/fix_has_key.py   (contents, props changed)
Modified:
   sandbox/trunk/2to3/example.py
   sandbox/trunk/2to3/pgen2/driver.py
   sandbox/trunk/2to3/pgen2/parse.py
   sandbox/trunk/2to3/pgen2/test.py
   sandbox/trunk/2to3/play.py   (contents, props changed)
   sandbox/trunk/2to3/pynode.py
Log:
Got a working 'has_key' refactoring, on a part with ../refactor/has_key.py.


Added: sandbox/trunk/2to3/README
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/README	Thu Nov 30 07:15:54 2006
@@ -0,0 +1,21 @@
+A refactoring tool for converting Python 2.x code to 3.0.
+
+This is a prototype!
+
+Files:
+
+play.py        - test program
+pynode.py      - parse tree nodes for Python grammar
+tokenize.py    - modified version of stdlib tokenize.py (1)
+fix_has_key.py - refactoring tool changing 'x.has_key(y)' into 'y in x'
+example.py     - example input
+Grammar.txt    - Grammar input (same as Grammar/Grammar in Python 2.5)
+Grammar.pickle - Pickled grammar tables
+pgen2/         - Parser generator and driver (2)
+
+Notes:
+
+(1) tokenize.py was modified to yield a NL pseudo-token for backslash
+    continuations, so the original source can be reproduced exactly.
+(2) pgen2 was developed while I was at Elemental Security.  It was
+    modified to suit the needs of this refactoring tool.

Modified: sandbox/trunk/2to3/example.py
==============================================================================
--- sandbox/trunk/2to3/example.py	(original)
+++ sandbox/trunk/2to3/example.py	Thu Nov 30 07:15:54 2006
@@ -1,8 +1,8 @@
 #!/usr/bin/python
-"""Docstring.
-"""
-print (12 + # Hello
-            # world
+"""Docstring."""
+
+d = {"x": 42}
+if d.has_key("x") or d.has_key("y"):
+    print d["x"]
 
-       12)
 # This is the last line.

Added: sandbox/trunk/2to3/fix_has_key.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/fix_has_key.py	Thu Nov 30 07:15:54 2006
@@ -0,0 +1,109 @@
+#!/usr/bin/env python2.5
+# Copyright 2006 Python Software Foundation. All Rights Reserved.
+
+"""Refactoring tool: change 'x.has_key(y)' into 'y in x'."""
+
+__author__ = "Guido van Rossum <guido at python.org>"
+
+# Python imports
+import os
+import sys
+import logging
+
+import pgen2
+from pgen2 import driver
+
+import pynode
+
+logging.basicConfig(level=logging.WARN)
+
+def main():
+    args = sys.argv[1:] or ["example.py"]
+
+    gr = driver.load_grammar("Grammar.txt")
+    dr = driver.Driver(gr, convert=pynode.convert)
+
+    for fn in args:
+        print "Parsing", fn
+        tree = dr.parse_file(fn)
+        tree.set_parents()
+        refactor(tree)
+        diff(fn, tree)
+
+def refactor(tree):
+    visit(tree, fix_has_key)
+
+def visit(node, func):
+    func(node)
+    for child in node.get_children():
+        visit(child, func)
+
+# Sample nodes
+_context = ("", (0, 0))
+n_dot = pynode.Token(_context, ".")
+n_has_key = pynode.Name(_context, "has_key")
+n_trailer_has_key = pynode.trailer(_context, n_dot, n_has_key)
+n_lpar = pynode.Token(_context, "(")
+n_star = pynode.Token(_context, "*")
+n_comma = pynode.Token(_context, ",")
+n_in = pynode.Token((" ", (0, 0)), "in")
+
+def fix_has_key(node):
+    if node != n_trailer_has_key:
+        return
+    # XXX Could use more DOM manipulation primitives and matching operations
+    parent = node.parent
+    nodes = parent.get_children()
+    for i, n in enumerate(nodes):
+        if n is node:
+            break
+    else:
+        print "Can't find node in parent?!"
+        return
+    if i+1 >= len(nodes):
+        return # Nothing follows ".has_key"
+    if len(nodes) != i+2:
+        return # Too much follows ".has_key", e.g. ".has_key(x).blah"
+    next = nodes[i+1]
+    if not isinstance(next, pynode.trailer):
+        return # ".has_key" not followed by another trailer
+    next_children = next.get_children()
+    if next_children[0] != n_lpar:
+        return # ".has_key" not followed by "(...)"
+    if len(next_children) != 3:
+        return # ".has_key" followed by "()"
+    argsnode = next_children[1]
+    arg = argsnode
+    if isinstance(argsnode, pynode.arglist):
+        args = argsnode.get_children()
+        if len(args) > 2:
+            return # Too many arguments
+        if len(args) == 2:
+            if args[0] == n_star:
+                return # .has_key(*foo) -- you've gotta be kidding!
+            if args[1] != n_comma:
+                return # Only .has_key(foo,) expected
+        arg = args[0]
+    # Change "X.has_key(Y)" into "Y in X"
+    arg.set_prefix(nodes[0].get_prefix())
+    nodes[0].set_prefix(" ")
+    new = pynode.comparison(_context,
+                            arg,
+                            n_in,
+                            pynode.power(_context, *nodes[:i]))
+    # XXX Sometimes we need to parenthesize arg or new.  Later.
+    parent.parent.replace(parent, new)
+
+def diff(fn, tree):
+    f = open("@", "w")
+    try:
+        f.write(str(tree))
+    finally:
+        f.close()
+    try:
+        return os.system("diff -u %s @" % fn)
+    finally:
+        os.remove("@")
+
+if __name__ == "__main__":
+    main()

Modified: sandbox/trunk/2to3/pgen2/driver.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/driver.py	(original)
+++ sandbox/trunk/2to3/pgen2/driver.py	Thu Nov 30 07:15:54 2006
@@ -22,7 +22,6 @@
 
 # Pgen imports
 from pgen2 import parse
-from pgen2 import astnode
 from pgen2 import grammar
 
 class Driver(object):

Modified: sandbox/trunk/2to3/pgen2/parse.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/parse.py	(original)
+++ sandbox/trunk/2to3/pgen2/parse.py	Thu Nov 30 07:15:54 2006
@@ -80,9 +80,7 @@
         symbols, and None for tokens.
 
         An abstract syntax tree node may be anything; this is entirely
-        up to the converter function.  For example, it can be an
-        instance of a subclass of the astnode.Node class (see the
-        astnode module).
+        up to the converter function.
 
         """
         self.grammar = grammar

Modified: sandbox/trunk/2to3/pgen2/test.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/test.py	(original)
+++ sandbox/trunk/2to3/pgen2/test.py	Thu Nov 30 07:15:54 2006
@@ -1,6 +1,9 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 
+# Modifications:
+# Copyright 2006 Python Software Foundation. All Rights Reserved.
+
 def test():
     import sys
     sys.path[0] = ".."
@@ -11,7 +14,8 @@
     finally:
         f.close()
     sample = "year<=1989 ? ('Modula-3' + ABC) ** 2 : Python"
-    tree = driver.parse_string(sample, True)
+    dr = driver.Driver(driver.load_grammar())
+    tree = dr.parse_string(sample, True)
     print tree
 
 if __name__ == "__main__":

Modified: sandbox/trunk/2to3/play.py
==============================================================================
--- sandbox/trunk/2to3/play.py	(original)
+++ sandbox/trunk/2to3/play.py	Thu Nov 30 07:15:54 2006
@@ -1,9 +1,7 @@
 #!/usr/bin/env python2.5
 # Copyright 2006 Python Software Foundation. All Rights Reserved.
 
-"""XXX."""
-
-##from __future__ import with_statement
+"""Main program for testing the infrastructure."""
 
 __author__ = "Guido van Rossum <guido at python.org>"
 
@@ -19,22 +17,17 @@
 
 logging.basicConfig(level=logging.WARN)
 
-def diff(fn, tree):
-    f = open("@", "w")
-    try:
-        f.write(str(tree))
-    finally:
-        f.close()
-    return os.system("diff -u %s @" % fn)
-
 def main():
     gr = driver.load_grammar("Grammar.txt")
     dr = driver.Driver(gr, convert=pynode.convert)
 
     tree = dr.parse_file("example.py", debug=True)
+    tree.set_parents()
     sys.stdout.write(str(tree))
     return # Comment out to run the complete test suite below
 
+    problems = []
+
     # Process every imported module
     for name in sys.modules:
         mod = sys.modules[name]
@@ -47,7 +40,8 @@
             continue
         print >>sys.stderr, "Parsing", fn
         tree = dr.parse_file(fn, debug=True)
-        diff(fn, tree)
+        if diff(fn, tree):
+            problems.append(fn)
 
     # Process every single module on sys.path (but not in packages)
     for dir in sys.path:
@@ -66,7 +60,27 @@
             except pgen2.parse.ParseError, err:
                 print "ParseError:", err
             else:
-                diff(fn, tree)
+                if diff(fn, tree):
+                    problems.append(fn)
+
+    # Show summary of problem files
+    if not problems:
+        print "No problems.  Congratulations!"
+    else:
+        print "Problems in following files:"
+        for fn in problems:
+            print "***", fn
+
+def diff(fn, tree):
+    f = open("@", "w")
+    try:
+        f.write(str(tree))
+    finally:
+        f.close()
+    try:
+        return os.system("diff -u %s @" % fn)
+    finally:
+        os.remove("@")
 
 if __name__ == "__main__":
     main()

Modified: sandbox/trunk/2to3/pynode.py
==============================================================================
--- sandbox/trunk/2to3/pynode.py	(original)
+++ sandbox/trunk/2to3/pynode.py	Thu Nov 30 07:15:54 2006
@@ -29,15 +29,17 @@
 logger = logging.getLogger()
 
 class Node(object):
+
+    # XXX Should refactor this so that there are only two kinds of nodes,
+    # Terminal and Nonterminal; each with subclasses to match the grammar
+    # or perhaps just storing the node type in a slot.
+
     """Abstract base class for all nodes.
 
-    This has no attributes except a context slot which holds the line
-    number (or more detailed context info).  In the future this might
-    change this to several slots (e.g. filename, lineno, column, or
-    even filename, start_lineno, start_column, end_lineno,
-    end_column).  The context is only referenced by two places: the
-    part of the code that sticks it in, and the part of the code that
-    reports errors.
+    This has no attributes except a context slot which holds context
+    info (a tuple of the form (prefix, (lineno, column))), and a
+    parent slot, which is not set by default but can be set to the
+    parent node later.
 
     In order to reduce the amount of boilerplate code, the context is
     argument is handled by __new__ rather than __init__.  There are
@@ -46,7 +48,7 @@
 
     """
 
-    __slots__ = ["context"]
+    __slots__ = ["context", "parent"]
 
     def __new__(cls, context, *rest):
         assert cls not in (Node, Nonterminal, Terminal, Constant)
@@ -54,6 +56,14 @@
         obj.context = context
         return obj
 
+    def get_children(self):
+        return ()
+
+    def set_parents(self, parent=None):
+        self.parent = parent
+        for child in self.get_children():
+            child.set_parents(self)
+
     _stretch = False # Set to true to stretch the repr() vertically
 
     def __repr__(self, repr_arg=repr):
@@ -101,6 +111,28 @@
     def __str__(self):
         return self.__repr__(repr_arg=str)
 
+    def __eq__(self, other):
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return self.eq(other)
+
+    def __ne__(self, other):
+        result = self.__eq__(other)
+        if result is not NotImplemented:
+            result = not result
+        return result
+
+    def eq(self, other):
+        assert self.__class__ is other.__class__
+        return self.get_children() == other.get_children()
+
+    def set_prefix(self, new_prefix):
+        old_prefix, rest = self.context
+        self.context = (new_prefix, rest)
+
+    def get_prefix(self):
+        return self.context[0]
+
 class Nonterminal(Node):
     """Abstract base class for nonterminal symbols.
 
@@ -134,7 +166,7 @@
             return nodes[0]
         else:
             obj = Nonterminal.__new__(cls, context)
-            obj.initseries(nodes)
+            obj.init_series(nodes)
             return obj
 
 class Constant(Terminal):
@@ -152,9 +184,12 @@
         self.repr = repr
 
     def __str__(self):
-        prefix, start = self.context
+        prefix, (lineno, column) = self.context
         return prefix + self.repr
 
+    def eq(self, other):
+        return self.repr == other.repr
+
 # Node classes for terminal symbols
 
 class Token(Constant):
@@ -186,6 +221,9 @@
         prefix, start = self.context
         return prefix + self.name
 
+    def eq(self, other):
+        return self.name == other.name
+
 class Number(Constant):
     """Numeric constant.
 
@@ -207,12 +245,28 @@
 # Nodes and factory functions for Python grammar
 
 class GenericSeries(Series):
+
     __slots__ = ["nodes"]
-    def initseries(self, nodes):
+
+    def init_series(self, nodes):
         self.nodes = nodes
+
+    def get_children(self):
+        return self.nodes
+
     def __str__(self):
         return "".join(map(str, self.nodes))
 
+    def replace(self, old, new):
+        self.nodes = tuple((new if n is old else n) for n in self.nodes)
+
+    def set_prefix(self, new_prefix):
+        Series.set_prefix(self, new_prefix)
+        self.nodes[0].set_prefix(new_prefix)
+
+    def get_prefix(self):
+        return self.nodes[0].get_prefix()
+
 class atom(GenericSeries):
     __slots__ = []
 


More information about the Python-checkins mailing list