[pypy-svn] r51642 - in pypy/dist/pypy/rlib/parsing: . test
jared.grubb at codespeak.net
jared.grubb at codespeak.net
Tue Feb 19 17:25:39 CET 2008
Author: jared.grubb
Date: Tue Feb 19 17:25:38 2008
New Revision: 51642
Modified:
pypy/dist/pypy/rlib/parsing/lexer.py
pypy/dist/pypy/rlib/parsing/test/test_lexer.py
Log:
rlib.parser.lexer: add copy() methods, add a few comments, minor code changes; test_lexer: add tests for copy() methods
Modified: pypy/dist/pypy/rlib/parsing/lexer.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/lexer.py (original)
+++ pypy/dist/pypy/rlib/parsing/lexer.py Tue Feb 19 17:25:38 2008
@@ -8,6 +8,9 @@
self.source = source
self.source_pos = source_pos
+ def copy(self):
+ return Token(self.name, self.source, self.source_pos)
+
def __eq__(self, other):
# for testing only
return self.__dict__ == other.__dict__
@@ -20,10 +23,14 @@
return "Token(%r, %r, %r)" % (self.name, self.source, self.source_pos)
class SourcePos(object):
+ """An object to record position in source code."""
def __init__(self, i, lineno, columnno):
- self.i = i
- self.lineno = lineno
- self.columnno = columnno
+ self.i = i # index in source string
+ self.lineno = lineno # line number in source
+ self.columnno = columnno # column in line
+
+ def copy(self):
+ return SourcePos(self.i, self.lineno, self.columnno)
def __eq__(self, other):
# for testing only
@@ -46,7 +53,6 @@
self.automaton.optimize() # XXX not sure whether this is a good idea
if ignore is None:
ignore = []
- self.ignore = []
for ign in ignore:
assert ign in names
self.ignore = dict.fromkeys(ignore)
@@ -57,8 +63,8 @@
self.ignore, eof)
def tokenize(self, text, eof=False):
- r = LexingDFARunner(self.matcher, self.automaton, text,
- self.ignore, eof)
+ """Return a list of Token's from text."""
+ r = self.get_runner(text, eof)
result = []
while 1:
try:
@@ -105,27 +111,26 @@
def find_next_token(self):
while 1:
self.state = 0
- i = self.last_matched_index + 1
- start = i
+ start = self.last_matched_index + 1
assert start >= 0
- if i == len(self.text):
- if self.eof:
- self.last_matched_index += 1
- return self.make_token(i, -1, "", eof=True)
- else:
- raise StopIteration
- if i >= len(self.text) + 1:
+
+ # Handle end of file situation
+ if start == len(self.text) and self.eof:
+ self.last_matched_index += 1
+ return self.make_token(start, -1, "", eof=True)
+ elif start >= len(self.text):
raise StopIteration
- i = self.inner_loop(i)
+
+ i = self.inner_loop(start)
if i < 0:
i = ~i
- if start == self.last_matched_index + 1:
+ stop = self.last_matched_index + 1
+ assert stop >= 0
+ if start == stop:
source_pos = SourcePos(i - 1, self.lineno, self.columnno)
raise deterministic.LexerError(self.text, self.state,
source_pos)
- stop = self.last_matched_index + 1
- assert stop >= 0
- source = self.text[start: stop]
+ source = self.text[start:stop]
result = self.make_token(start, self.last_matched_index, source)
self.adjust_position(source)
if self.ignore_token(self.last_matched_state):
@@ -146,10 +151,10 @@
raise deterministic.LexerError(self.text, self.state, source_pos)
def adjust_position(self, token):
- lineno = self.lineno
- columnno = self.columnno
- self.lineno += token.count("\n")
- if lineno == self.lineno:
+ """Update the line# and col# as a result of this token."""
+ newlines = token.count("\n")
+ self.lineno += newlines
+ if newlines==0:
self.columnno += len(token)
else:
self.columnno = token.rfind("\n")
Modified: pypy/dist/pypy/rlib/parsing/test/test_lexer.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_lexer.py (original)
+++ pypy/dist/pypy/rlib/parsing/test/test_lexer.py Tue Feb 19 17:25:38 2008
@@ -3,7 +3,6 @@
from pypy.rlib.parsing.regex import *
from pypy.rlib.parsing import deterministic
-
class TestDirectLexer(object):
def get_lexer(self, rexs, names, ignore=None):
return Lexer(rexs, names, ignore)
@@ -133,3 +132,27 @@
tok = runner.find_next_token()
assert tok.name == "WHITE"
py.test.raises(deterministic.LexerError, runner.find_next_token)
+
+class TestSourcePos(object):
+ def test_copy(self):
+ base = SourcePos(1, 2, 3)
+ attributes = {'i':4, 'lineno': 5, 'columnno': 6}
+ for attr, new_val in attributes.iteritems():
+ copy = base.copy()
+ assert base==copy
+ setattr(copy, attr, new_val) # change one attribute
+ assert base!=copy
+
+class TestToken(object):
+ def test_copy(self):
+ base = Token('test', 'spource', SourcePos(1,2,3))
+ attributes = {'name': 'xxx', 'source': 'yyy', 'source_pos': SourcePos(4,5,6)}
+ for attr, new_val in attributes.iteritems():
+ copy = base.copy()
+ assert base==copy
+ setattr(copy, attr, new_val) # change one attribute
+ assert base!=copy
+ # copy() is not deep... verify this.
+ copy = base.copy()
+ copy.source_pos.i = 0 # changes base too
+ assert base==copy
More information about the Pypy-commit
mailing list