[pypy-svn] r45330 - in pypy/dist/pypy/interpreter/pyparser: . test
jacob at codespeak.net
jacob at codespeak.net
Wed Jul 25 19:44:49 CEST 2007
Author: jacob
Date: Wed Jul 25 19:44:49 2007
New Revision: 45330
Added:
pypy/dist/pypy/interpreter/pyparser/future.py
pypy/dist/pypy/interpreter/pyparser/test/test_futureautomaton.py
Log:
Added a new mechanism for discovering __future__ options. It is as of yet
still unused.
Added: pypy/dist/pypy/interpreter/pyparser/future.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/interpreter/pyparser/future.py Wed Jul 25 19:44:49 2007
@@ -0,0 +1,249 @@
+"""
+This automaton is designed to be invoked on a Python source string
+before the real parser starts working, in order to find all legal
+'from __future__ import blah'. As soon as something is encountered that
+would prevent more future imports, the analysis is aborted.
+The resulting legal futures are avaliable in self.flags after the
+pass has ended.
+
+Invocation is through getFutures(src), which returns a field of flags,
+one per found correct future import.
+
+The flags can then be used to set up the parser.
+All error detection is left to the parser.
+
+The reason we are not using the regular lexer/parser toolchain is that
+we do not want the overhead of generating tokens for entire files just
+to find information that resides in the first few lines of the file.
+Neither do we require sane error messages, as this job is handled by
+the parser.
+
+To make the parsing fast, especially when the module is translated to C,
+the code has been written in a very serial fashion, using an almost
+assembler like style. A further speedup could be achieved by replacing
+the "in" comparisons with explicit numeric comparisons.
+"""
+
+from pypy.interpreter.astcompiler.consts import CO_GENERATOR_ALLOWED, \
+ CO_FUTURE_DIVISION, CO_FUTURE_WITH_STATEMENT
+
+def getFutures(source):
+ futures = FutureAutomaton(source)
+ try:
+ futures.start()
+ except (IndexError, DoneException), e:
+ pass
+ return futures.flags
+
+class DoneException(Exception):
+ pass
+
+whitespace = ' \t\f'
+letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYabcdefghijklmnopqrstuvwxyz_'
+alphanumerics = letters + '1234567890'
+
+class FutureAutomaton(object):
+ """
+ A future statement must appear near the top of the module.
+ The only lines that can appear before a future statement are:
+
+ * the module docstring (if any),
+ * comments,
+ * blank lines, and
+ * other future statements.
+
+ The features recognized by Python 2.5 are "generators",
+ "division", "nested_scopes" and "with_statement".
+ "generators", "division" and "nested_scopes" are redundant
+ in 2.5 because they are always enabled.
+
+ This module parses the input until it encounters something that is
+ not recognized as a valid future statement or something that may
+ precede a future statement.
+ """
+
+ def __init__(self, string):
+ self.s = string
+ self.end = len(string)
+ self.pos = 0
+ self.docstringConsumed = False
+ self.flags = 0
+
+ def start(self):
+ c = self.s[self.pos]
+ if c in ["'", '"'] and not self.docstringConsumed:
+ self.consumeDocstring()
+ elif c in whitespace:
+ self.consumeEmptyLine()
+ elif c == '#':
+ self.consumeComment()
+ elif c == 'f':
+ self.consumeFrom()
+ else:
+ return
+
+ def consumeDocstring(self):
+ self.docstringConsumed = True
+ endchar = self.s[self.pos]
+ if (self.s[self.pos] == self.s[self.pos+1] and
+ self.s[self.pos] == self.s[self.pos+2]):
+ self.pos += 3
+ while 1: # Deal with a triple quoted docstring
+ if self.s[self.pos] != endchar:
+ self.pos += 1
+ else:
+ self.pos += 1
+ if (self.s[self.pos] == endchar and
+ self.s[self.pos+1] == endchar):
+ self.pos += 2
+ self.consumeEmptyLine()
+ break
+
+ else: # Deal with a single quoted docstring
+ self.pos += 1
+ while 1:
+ c = self.s[self.pos]
+ self.pos += 1
+ if c == endchar:
+ self.consumeEmptyLine()
+ return
+ elif c == '\\':
+ # Deal with linefeeds
+ if self.s[self.pos] not in ['\r', '\n']:
+ self.pos += 1
+ continue
+ elif self.s[self.pos] == '\r':
+ self.pos += 1
+ if self.s[self.pos] == '\n':
+ self.pos += 1
+ continue
+ else: # '\n' is the only option left
+ self.pos += 1
+ continue
+ elif c in ['\r', '\n']:
+ # Syntax error
+ return
+
+ def consumeEmptyLine(self):
+ """
+ Called when the remainder of the line can only contain whitespace
+ and comments.
+ """
+ while self.s[self.pos] in whitespace:
+ self.pos += 1
+ if self.s[self.pos] == '#':
+ self.consumeComment()
+ elif self.s[self.pos] == ';':
+ self.pos += 1
+ self.consumeWhitespace()
+ self.start()
+ elif self.s[self.pos] in ['\r', '\n']:
+ self.pos += 1
+ if self.s[self.pos] == '\n':
+ self.pos += 1
+ self.start()
+
+ def consumeComment(self):
+ self.pos += 1
+ while self.s[self.pos] not in ['\r', '\n']:
+ self.pos += 1
+ self.consumeEmptyLine()
+
+ def consumeFrom(self):
+ self.pos += 1
+ p = self.pos
+ s = self.s
+ if s[p] == 'r' and s[p+1] == 'o' and s[p+2] == 'm':
+ self.docstringConsumed = True
+ self.pos += 3
+ self.consumeMandatoryWhitespace()
+ if self.s[self.pos:self.pos+10] != '__future__':
+ raise DoneException
+ self.pos += 10
+ self.consumeMandatoryWhitespace()
+ if self.s[self.pos:self.pos+6] != 'import':
+ raise DoneException
+ self.pos += 6
+ self.consumeWhitespace()
+ if self.s[self.pos] == '(':
+ self.pos += 1
+ self.consumeWhitespace()
+ self.setFlag(self.getName())
+ # Set flag corresponding to name
+ self.getMore(parenList=True)
+ else:
+ self.setFlag(self.getName())
+ self.getMore()
+ self.consumeEmptyLine()
+ else:
+ return
+
+ def consumeMandatoryWhitespace(self):
+ if self.s[self.pos] not in whitespace + '\\':
+ raise DoneException
+ self.consumeWhitespace()
+
+ def consumeWhitespace(self):
+ while 1:
+ c = self.s[self.pos]
+ if c in whitespace:
+ self.pos += 1
+ continue
+ elif c == '\\':
+ self.pos += 1
+ c = self.s[self.pos]
+ if c == '\n':
+ self.pos += 1
+ continue
+ elif c == '\r':
+ self.pos += 1
+ if self.s[self.pos] == '\n':
+ self.pos += 1
+ else:
+ raise DoneException
+ else:
+ return
+
+ def getName(self):
+ if self.s[self.pos] not in letters:
+ raise DoneException
+ p = self.pos
+ while 1:
+ self.pos += 1
+ if self.s[self.pos] not in alphanumerics:
+ break
+ name = self.s[p:self.pos]
+ self.consumeWhitespace()
+ return name
+
+ def getMore(self, parenList=False):
+ if parenList and self.s[self.pos] == ')':
+ self.pos += 1
+ return
+
+ if (self.s[self.pos] == 'a' and
+ self.s[self.pos+1] == 's' and
+ self.s[self.pos+2] in whitespace):
+ self.getName()
+ self.getName()
+ self.getMore(parenList=parenList)
+ return
+ elif self.s[self.pos] != ',':
+ return
+ else:
+ self.pos += 1
+ self.consumeWhitespace()
+ if parenList and self.s[self.pos] == ')':
+ self.pos += 1
+ return # Handles trailing comma inside parenthesis
+ self.setFlag(self.getName())
+ self.getMore(parenList=parenList)
+
+ def setFlag(self, feature):
+ if feature == "division":
+ self.flags |= CO_FUTURE_DIVISION
+ elif feature == "generators":
+ self.flags |= CO_GENERATOR_ALLOWED
+ elif feature == "with_statement":
+ self.flags |= CO_FUTURE_WITH_STATEMENT
+
Added: pypy/dist/pypy/interpreter/pyparser/test/test_futureautomaton.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/interpreter/pyparser/test/test_futureautomaton.py Wed Jul 25 19:44:49 2007
@@ -0,0 +1,124 @@
+import pypy.interpreter.pyparser.future as future
+from pypy.interpreter.astcompiler.consts import CO_GENERATOR_ALLOWED, \
+ CO_FUTURE_DIVISION, CO_FUTURE_WITH_STATEMENT
+
+def run(s):
+ f = future.FutureAutomaton(s)
+ try:
+ f.start()
+ except IndexError, future.DoneException:
+ pass
+ return f
+
+def test_docstring():
+ s = '"Docstring"\n'
+ f = run(s)
+ assert f.pos == len(s)
+
+def test_comment():
+ s = '# A comment about nothing ;\n'
+ f = run(s)
+ assert f.pos == len(s)
+
+def test_tripledocstring():
+ s = '''""" This is a
+docstring with line
+breaks in it. It even has a \n"""
+'''
+ f = run(s)
+ assert f.pos == len(s)
+
+def test_empty_line():
+ s = ' \t \f \n \n'
+ f = run(s)
+ assert f.pos == len(s)
+
+def test_from():
+ s = 'from __future__ import division\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == CO_FUTURE_DIVISION
+
+def test_froms():
+ s = 'from __future__ import division, generators, with_statement\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED |
+ CO_FUTURE_WITH_STATEMENT)
+
+def test_from_as():
+ s = 'from __future__ import division as b\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == CO_FUTURE_DIVISION
+
+def test_froms_as():
+ s = 'from __future__ import division as b, generators as c\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED)
+
+def test_from_paren():
+ s = 'from __future__ import (division)\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == CO_FUTURE_DIVISION
+
+def test_froms_paren():
+ s = 'from __future__ import (division, generators)\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED)
+
+def test_froms_paren_as():
+ s = 'from __future__ import (division as b, generators,)\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED)
+
+def test_multiline():
+ s = '"abc" #def\n #ghi\nfrom __future__ import (division as b, generators,)\nfrom __future__ import with_statement\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED |
+ CO_FUTURE_WITH_STATEMENT)
+
+def test_windows_style_lineendings():
+ s = '"abc" #def\r\n #ghi\r\nfrom __future__ import (division as b, generators,)\r\nfrom __future__ import with_statement\r\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED |
+ CO_FUTURE_WITH_STATEMENT)
+
+def test_mac_style_lineendings():
+ s = '"abc" #def\r #ghi\rfrom __future__ import (division as b, generators,)\rfrom __future__ import with_statement\r'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED |
+ CO_FUTURE_WITH_STATEMENT)
+def test_semicolon():
+ s = '"abc" #def\n #ghi\nfrom __future__ import (division as b, generators,); from __future__ import with_statement\n'
+ f = run(s)
+ assert f.pos == len(s)
+ assert f.flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED |
+ CO_FUTURE_WITH_STATEMENT)
+
+def test_full_chain():
+ s = '"abc" #def\n #ghi\nfrom __future__ import (division as b, generators,); from __future__ import with_statement\n'
+ flags = future.getFutures(s)
+ assert flags == (CO_FUTURE_DIVISION |
+ CO_GENERATOR_ALLOWED |
+ CO_FUTURE_WITH_STATEMENT)
+
+def test_intervening_code():
+ s = 'from __future__ import (division as b, generators,)\nfrom sys import modules\nfrom __future__ import with_statement\n'
+ flags = future.getFutures(s)
+ assert flags & CO_FUTURE_WITH_STATEMENT == 0
More information about the Pypy-commit
mailing list