[Python-checkins] r76232 - in python/branches/py3k: Doc/library/functions.rst Lib/test/test_codeop.py Lib/test/test_compile.py Lib/test/test_parser.py Lib/test/test_pep263.py Parser/parsetok.c Parser/tokenizer.c Parser/tokenizer.h
benjamin.peterson
python-checkins at python.org
Fri Nov 13 01:18:00 CET 2009
Author: benjamin.peterson
Date: Fri Nov 13 01:17:59 2009
New Revision: 76232
Log:
Merged revisions 76230 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r76230 | benjamin.peterson | 2009-11-12 17:39:44 -0600 (Thu, 12 Nov 2009) | 2 lines
fix several compile() issues by translating newlines in the tokenizer
........
Modified:
python/branches/py3k/ (props changed)
python/branches/py3k/Doc/library/functions.rst
python/branches/py3k/Lib/test/test_codeop.py
python/branches/py3k/Lib/test/test_compile.py
python/branches/py3k/Lib/test/test_parser.py
python/branches/py3k/Lib/test/test_pep263.py
python/branches/py3k/Parser/parsetok.c
python/branches/py3k/Parser/tokenizer.c
python/branches/py3k/Parser/tokenizer.h
Modified: python/branches/py3k/Doc/library/functions.rst
==============================================================================
--- python/branches/py3k/Doc/library/functions.rst (original)
+++ python/branches/py3k/Doc/library/functions.rst Fri Nov 13 01:17:59 2009
@@ -176,11 +176,15 @@
.. note::
- When compiling a string with multi-line statements, line endings must be
- represented by a single newline character (``'\n'``), and the input must
- be terminated by at least one newline character. If line endings are
- represented by ``'\r\n'``, use :meth:`str.replace` to change them into
- ``'\n'``.
+ When compiling a string with multi-line statements in ``'single'`` or
+ ``'eval'`` mode, input must be terminated by at least one newline
+ character. This is to facilitate detection of incomplete and complete
+ statements in the :mod:`code` module.
+
+
+ .. versionchanged:: 3.2
+ Allowed use of Windows and Mac newlines. Also input in ``'exec'`` mode
+ does not have to end in a newline anymore.
.. function:: complex([real[, imag]])
Modified: python/branches/py3k/Lib/test/test_codeop.py
==============================================================================
--- python/branches/py3k/Lib/test/test_codeop.py (original)
+++ python/branches/py3k/Lib/test/test_codeop.py Fri Nov 13 01:17:59 2009
@@ -295,10 +295,6 @@
self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
compile("a = 1\n", "def", 'single').co_filename)
- def test_no_universal_newlines(self):
- code = compile_command("'\rfoo\r'", symbol='eval')
- self.assertEqual(eval(code), '\rfoo\r')
-
def test_main():
run_unittest(CodeopTests)
Modified: python/branches/py3k/Lib/test/test_compile.py
==============================================================================
--- python/branches/py3k/Lib/test/test_compile.py (original)
+++ python/branches/py3k/Lib/test/test_compile.py Fri Nov 13 01:17:59 2009
@@ -5,6 +5,19 @@
class TestSpecifics(unittest.TestCase):
+ def test_no_ending_newline(self):
+ compile("hi", "<test>", "exec")
+ compile("hi\r", "<test>", "exec")
+
+ def test_empty(self):
+ compile("", "<test>", "exec")
+
+ def test_other_newlines(self):
+ compile("\r\n", "<test>", "exec")
+ compile("\r", "<test>", "exec")
+ compile("hi\r\nstuff\r\ndef f():\n pass\r", "<test>", "exec")
+ compile("this_is\rreally_old_mac\rdef f():\n pass", "<test>", "exec")
+
def test_debug_assignment(self):
# catch assignments to __debug__
self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')
Modified: python/branches/py3k/Lib/test/test_parser.py
==============================================================================
--- python/branches/py3k/Lib/test/test_parser.py (original)
+++ python/branches/py3k/Lib/test/test_parser.py Fri Nov 13 01:17:59 2009
@@ -237,9 +237,9 @@
(14, '+', 2, 13),
(2, '1', 2, 15),
(4, '', 2, 16),
- (6, '', 2, -1),
- (4, '', 2, -1),
- (0, '', 2, -1)],
+ (6, '', 3, -1),
+ (4, '', 3, -1),
+ (0, '', 3, -1)],
terminals)
def test_extended_unpacking(self):
Modified: python/branches/py3k/Lib/test/test_pep263.py
==============================================================================
--- python/branches/py3k/Lib/test/test_pep263.py (original)
+++ python/branches/py3k/Lib/test/test_pep263.py Fri Nov 13 01:17:59 2009
@@ -26,7 +26,7 @@
try:
compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
except SyntaxError as v:
- self.assertEquals(v.text, "print '\u5e74'")
+ self.assertEquals(v.text, "print '\u5e74'\n")
else:
self.fail()
Modified: python/branches/py3k/Parser/parsetok.c
==============================================================================
--- python/branches/py3k/Parser/parsetok.c (original)
+++ python/branches/py3k/Parser/parsetok.c Fri Nov 13 01:17:59 2009
@@ -46,13 +46,14 @@
perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
+ int exec_input = start == file_input;
initerr(err_ret, filename);
if (*flags & PyPARSE_IGNORE_COOKIE)
- tok = PyTokenizer_FromUTF8(s);
+ tok = PyTokenizer_FromUTF8(s, exec_input);
else
- tok = PyTokenizer_FromString(s);
+ tok = PyTokenizer_FromString(s, exec_input);
if (tok == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
Modified: python/branches/py3k/Parser/tokenizer.c
==============================================================================
--- python/branches/py3k/Parser/tokenizer.c (original)
+++ python/branches/py3k/Parser/tokenizer.c Fri Nov 13 01:17:59 2009
@@ -119,6 +119,7 @@
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK;
tok->fp = NULL;
+ tok->input = NULL;
tok->tabsize = TABSIZE;
tok->indent = 0;
tok->indstack[0] = 0;
@@ -145,6 +146,17 @@
return tok;
}
+static char *
+new_string(const char *s, Py_ssize_t len)
+{
+ char* result = (char *)PyMem_MALLOC(len + 1);
+ if (result != NULL) {
+ memcpy(result, s, len);
+ result[len] = '\0';
+ }
+ return result;
+}
+
#ifdef PGEN
static char *
@@ -159,10 +171,10 @@
return feof(tok->fp);
}
-static const char *
-decode_str(const char *str, struct tok_state *tok)
+static char *
+decode_str(const char *str, int exec_input, struct tok_state *tok)
{
- return str;
+ return new_string(str, strlen(str));
}
#else /* PGEN */
@@ -177,16 +189,6 @@
return NULL; /* as if it were EOF */
}
-static char *
-new_string(const char *s, Py_ssize_t len)
-{
- char* result = (char *)PyMem_MALLOC(len + 1);
- if (result != NULL) {
- memcpy(result, s, len);
- result[len] = '\0';
- }
- return result;
-}
static char *
get_normal_name(char *s) /* for utf-8 and latin-1 */
@@ -635,17 +637,63 @@
return utf8;
}
+
+static char *
+translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
+ int skip_next_lf = 0, length = strlen(s), final_length;
+ char *buf, *current;
+ char c;
+ buf = PyMem_MALLOC(length + 2);
+ if (buf == NULL) {
+ tok->done = E_NOMEM;
+ return NULL;
+ }
+ for (current = buf; (c = *s++);) {
+ if (skip_next_lf) {
+ skip_next_lf = 0;
+ if (c == '\n') {
+ c = *s;
+ s++;
+ if (!c)
+ break;
+ }
+ }
+ if (c == '\r') {
+ skip_next_lf = 1;
+ c = '\n';
+ }
+ *current = c;
+ current++;
+ }
+ /* If this is exec input, add a newline to the end of the file if
+ there isn't one already. */
+ if (exec_input && *current != '\n') {
+ *current = '\n';
+ current++;
+ }
+ *current = '\0';
+ final_length = current - buf;
+ if (final_length < length && final_length)
+ /* should never fail */
+ buf = PyMem_REALLOC(buf, final_length + 1);
+ return buf;
+}
+
/* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them
inside TOK. */
static const char *
-decode_str(const char *str, struct tok_state *tok)
+decode_str(const char *input, int single, struct tok_state *tok)
{
PyObject* utf8 = NULL;
+ const char *str;
const char *s;
const char *newl[2] = {NULL, NULL};
int lineno = 0;
+ tok->input = str = translate_newlines(input, single, tok);
+ if (str == NULL)
+ return NULL;
tok->enc = NULL;
tok->str = str;
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@@ -696,12 +744,12 @@
/* Set up tokenizer for string */
struct tok_state *
-PyTokenizer_FromString(const char *str)
+PyTokenizer_FromString(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
if (tok == NULL)
return NULL;
- str = (char *)decode_str(str, tok);
+ str = (char *)decode_str(str, exec_input, tok);
if (str == NULL) {
PyTokenizer_Free(tok);
return NULL;
@@ -713,11 +761,18 @@
}
struct tok_state *
-PyTokenizer_FromUTF8(const char *str)
+PyTokenizer_FromUTF8(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
if (tok == NULL)
return NULL;
+#ifndef PGEN
+ tok->input = str = translate_newlines(str, exec_input, tok);
+#endif
+ if (str == NULL) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
tok->decoding_state = STATE_RAW;
tok->read_coding_spec = 1;
tok->enc = NULL;
@@ -734,7 +789,6 @@
return tok;
}
-
/* Set up tokenizer for file */
struct tok_state *
@@ -780,6 +834,8 @@
#endif
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
+ if (tok->input)
+ PyMem_FREE((char *)tok->input);
PyMem_FREE(tok);
}
Modified: python/branches/py3k/Parser/tokenizer.h
==============================================================================
--- python/branches/py3k/Parser/tokenizer.h (original)
+++ python/branches/py3k/Parser/tokenizer.h Fri Nov 13 01:17:59 2009
@@ -58,10 +58,11 @@
#endif
const char* enc; /* Encoding for the current str. */
const char* str;
+ const char* input; /* Tokenizer's newline translated copy of the string. */
};
-extern struct tok_state *PyTokenizer_FromString(const char *);
-extern struct tok_state *PyTokenizer_FromUTF8(const char *);
+extern struct tok_state *PyTokenizer_FromString(const char *, int);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
More information about the Python-checkins
mailing list