[pypy-svn] r51642 - in pypy/dist/pypy/rlib/parsing: . test

Tue Feb 19 17:25:39 CET 2008

Author: jared.grubb
Date: Tue Feb 19 17:25:38 2008
New Revision: 51642

Modified:
   pypy/dist/pypy/rlib/parsing/lexer.py
   pypy/dist/pypy/rlib/parsing/test/test_lexer.py
Log:
rlib.parser.lexer: add copy() methods, add a few comments, minor code changes; test_lexer: add tests for copy() methods

Modified: pypy/dist/pypy/rlib/parsing/lexer.py
==============================================================================

--- pypy/dist/pypy/rlib/parsing/lexer.py	(original)
+++ pypy/dist/pypy/rlib/parsing/lexer.py	Tue Feb 19 17:25:38 2008
@@ -8,6 +8,9 @@
         self.source = source
         self.source_pos = source_pos
 
+    def copy(self):
+        return Token(self.name, self.source, self.source_pos)
+
     def __eq__(self, other):
         # for testing only
         return self.__dict__ == other.__dict__
@@ -20,10 +23,14 @@
         return "Token(%r, %r, %r)" % (self.name, self.source, self.source_pos)
 
 class SourcePos(object):
+    """An object to record position in source code."""
     def __init__(self, i, lineno, columnno):
-        self.i = i
-        self.lineno = lineno
-        self.columnno = columnno
+        self.i = i                  # index in source string
+        self.lineno = lineno        # line number in source
+        self.columnno = columnno    # column in line
+
+    def copy(self):
+        return SourcePos(self.i, self.lineno, self.columnno)
 
     def __eq__(self, other):
         # for testing only
@@ -46,7 +53,6 @@
         self.automaton.optimize() # XXX not sure whether this is a good idea
         if ignore is None:
             ignore = []
-        self.ignore = []
         for ign in ignore:
             assert ign in names
         self.ignore = dict.fromkeys(ignore)
@@ -57,8 +63,8 @@
                                self.ignore, eof)
 
     def tokenize(self, text, eof=False):
-        r = LexingDFARunner(self.matcher, self.automaton, text,
-                            self.ignore, eof)
+        """Return a list of Token's from text."""
+        r = self.get_runner(text, eof)
         result = []
         while 1:
             try:
@@ -105,27 +111,26 @@
     def find_next_token(self):
         while 1:
             self.state = 0
-            i = self.last_matched_index + 1
-            start = i
+            start = self.last_matched_index + 1
             assert start >= 0
-            if i == len(self.text):
-                if self.eof:
-                    self.last_matched_index += 1
-                    return self.make_token(i, -1, "", eof=True)
-                else:
-                    raise StopIteration
-            if i >= len(self.text) + 1:
+
+            # Handle end of file situation
+            if start == len(self.text) and self.eof:
+                self.last_matched_index += 1
+                return self.make_token(start, -1, "", eof=True)
+            elif start >= len(self.text):
                 raise StopIteration
-            i = self.inner_loop(i)
+
+            i = self.inner_loop(start)
             if i < 0:
                 i = ~i
-                if start == self.last_matched_index + 1:
+                stop = self.last_matched_index + 1
+                assert stop >= 0
+                if start == stop:   
                     source_pos = SourcePos(i - 1, self.lineno, self.columnno)
                     raise deterministic.LexerError(self.text, self.state,
                                                    source_pos)
-                stop = self.last_matched_index + 1
-                assert stop >= 0
-                source = self.text[start: stop]
+                source = self.text[start:stop]
                 result = self.make_token(start, self.last_matched_index, source)
                 self.adjust_position(source)
                 if self.ignore_token(self.last_matched_state):
@@ -146,10 +151,10 @@
             raise deterministic.LexerError(self.text, self.state, source_pos)
 
     def adjust_position(self, token):
-        lineno = self.lineno
-        columnno = self.columnno
-        self.lineno += token.count("\n")
-        if lineno == self.lineno:
+        """Update the line# and col# as a result of this token."""
+        newlines = token.count("\n")
+        self.lineno += newlines
+        if newlines==0:
             self.columnno += len(token)
         else:
             self.columnno = token.rfind("\n")

Modified: pypy/dist/pypy/rlib/parsing/test/test_lexer.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_lexer.py	(original)
+++ pypy/dist/pypy/rlib/parsing/test/test_lexer.py	Tue Feb 19 17:25:38 2008
@@ -3,7 +3,6 @@
 from pypy.rlib.parsing.regex import *
 from pypy.rlib.parsing import deterministic
 
-
 class TestDirectLexer(object):
     def get_lexer(self, rexs, names, ignore=None):
         return Lexer(rexs, names, ignore)
@@ -133,3 +132,27 @@
         tok = runner.find_next_token()
         assert tok.name == "WHITE"
         py.test.raises(deterministic.LexerError, runner.find_next_token)
+
+class TestSourcePos(object):
+    def test_copy(self):
+        base = SourcePos(1, 2, 3)
+        attributes = {'i':4, 'lineno': 5, 'columnno': 6}
+        for attr, new_val in attributes.iteritems():
+            copy = base.copy()
+            assert base==copy
+            setattr(copy, attr, new_val)    # change one attribute
+            assert base!=copy
+
+class TestToken(object):
+    def test_copy(self):
+        base = Token('test', 'spource', SourcePos(1,2,3))
+        attributes = {'name': 'xxx', 'source': 'yyy', 'source_pos': SourcePos(4,5,6)}
+        for attr, new_val in attributes.iteritems():
+            copy = base.copy()
+            assert base==copy
+            setattr(copy, attr, new_val)    # change one attribute
+            assert base!=copy
+        # copy() is not deep... verify this.
+        copy = base.copy()
+        copy.source_pos.i = 0 # changes base too
+        assert base==copy