[Jython-checkins] jython: Use Java codec throughout parser (not sometimes a Python one), see issue #2123.
jeff.allen
jython-checkins at python.org
Wed Jun 11 00:59:42 CEST 2014
http://hg.python.org/jython/rev/eef3fcffc58a
changeset: 7288:eef3fcffc58a
user: Jeff Allen <ja.py at farowl.co.uk>
date: Tue Jun 10 23:40:28 2014 +0100
summary:
Use Java codec throughout parser (not sometimes a Python one), see issue #2123.
Allows at least some programs to run where the console encoding is not one
supported by Jython with a Python codec.
files:
src/org/python/antlr/GrammarActions.java | 71 ++++++-----
src/org/python/core/ParserFacade.java | 22 ++-
2 files changed, 53 insertions(+), 40 deletions(-)
diff --git a/src/org/python/antlr/GrammarActions.java b/src/org/python/antlr/GrammarActions.java
--- a/src/org/python/antlr/GrammarActions.java
+++ b/src/org/python/antlr/GrammarActions.java
@@ -1,28 +1,18 @@
package org.python.antlr;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
import org.antlr.runtime.Token;
-
-import org.python.core.Py;
-import org.python.core.PyComplex;
-import org.python.core.PyFloat;
-import org.python.core.PyInteger;
-import org.python.core.PyLong;
-import org.python.core.PyString;
-import org.python.core.PyUnicode;
-import org.python.core.codecs;
-import org.python.antlr.ast.alias;
-import org.python.antlr.ast.arguments;
-import org.python.antlr.ast.boolopType;
-import org.python.antlr.ast.cmpopType;
-import org.python.antlr.ast.expr_contextType;
-import org.python.antlr.ast.operatorType;
-import org.python.antlr.ast.unaryopType;
-import org.python.antlr.ast.Context;
-import org.python.antlr.ast.keyword;
import org.python.antlr.ast.Attribute;
import org.python.antlr.ast.BinOp;
import org.python.antlr.ast.BoolOp;
import org.python.antlr.ast.Call;
+import org.python.antlr.ast.Context;
import org.python.antlr.ast.DictComp;
import org.python.antlr.ast.ExtSlice;
import org.python.antlr.ast.For;
@@ -34,26 +24,38 @@
import org.python.antlr.ast.ListComp;
import org.python.antlr.ast.Name;
import org.python.antlr.ast.Num;
+import org.python.antlr.ast.Repr;
+import org.python.antlr.ast.SetComp;
import org.python.antlr.ast.Slice;
+import org.python.antlr.ast.Str;
import org.python.antlr.ast.TryExcept;
import org.python.antlr.ast.TryFinally;
import org.python.antlr.ast.Tuple;
-import org.python.antlr.ast.Repr;
-import org.python.antlr.ast.SetComp;
-import org.python.antlr.ast.Str;
import org.python.antlr.ast.UnaryOp;
import org.python.antlr.ast.While;
import org.python.antlr.ast.With;
import org.python.antlr.ast.Yield;
+import org.python.antlr.ast.alias;
+import org.python.antlr.ast.arguments;
+import org.python.antlr.ast.boolopType;
+import org.python.antlr.ast.cmpopType;
+import org.python.antlr.ast.expr_contextType;
+import org.python.antlr.ast.keyword;
+import org.python.antlr.ast.operatorType;
+import org.python.antlr.ast.unaryopType;
import org.python.antlr.base.excepthandler;
import org.python.antlr.base.expr;
import org.python.antlr.base.slice;
import org.python.antlr.base.stmt;
-
-import java.math.BigInteger;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import org.python.core.Py;
+import org.python.core.PyComplex;
+import org.python.core.PyFloat;
+import org.python.core.PyInteger;
+import org.python.core.PyLong;
+import org.python.core.PyString;
+import org.python.core.PyUnicode;
+import org.python.core.codecs;
+import org.python.core.util.StringUtil;
public class GrammarActions {
private ErrorHandler errorHandler = null;
@@ -183,7 +185,7 @@
}
return result;
}
-
+
List<stmt> makeElse(List elseSuite, PythonTree elif) {
if (elseSuite != null) {
return castStmts(elseSuite);
@@ -425,8 +427,9 @@
}
int ndigits = s.length();
int i=0;
- while (i < ndigits && s.charAt(i) == '0')
+ while (i < ndigits && s.charAt(i) == '0') {
i++;
+ }
if ((ndigits - i) > 11) {
return Py.newLong(new BigInteger(s, radix));
}
@@ -449,7 +452,7 @@
String getString() {
return s;
}
-
+
boolean isUnicode() {
return unicode;
}
@@ -511,8 +514,10 @@
// XXX: No need to re-encode when the encoding is iso-8859-1, but ParserFacade
// needs to normalize the encoding name
if (!ustring && encoding != null) {
- // str with a specified encoding: first re-encode back out
- string = new PyUnicode(string.substring(start, end)).encode(encoding);
+ // The parser used a non-latin encoding: re-encode chars to bytes.
+ Charset cs = Charset.forName(encoding);
+ ByteBuffer decoded = cs.encode(string.substring(start, end));
+ string = StringUtil.fromBytes(decoded);
if (!raw) {
// Handle escapes in non-raw strs
string = PyString.decode_UnicodeEscape(string, 0, string.length(), "strict",
@@ -744,7 +749,7 @@
}
return result;
}
-
+
BoolOp makeBoolOp(Token t, PythonTree left, boolopType op, List right) {
List values = new ArrayList();
values.add(left);
@@ -780,7 +785,7 @@
}
return result;
}
-
+
slice castSlice(Object o) {
if (o instanceof slice) {
return (slice)o;
diff --git a/src/org/python/core/ParserFacade.java b/src/org/python/core/ParserFacade.java
--- a/src/org/python/core/ParserFacade.java
+++ b/src/org/python/core/ParserFacade.java
@@ -9,6 +9,7 @@
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
+import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
@@ -54,8 +55,10 @@
return text;
}
if (reader.encoding != null) {
- // restore the original encoding
- text = new PyUnicode(text).encode(reader.encoding);
+ // The parser used a non-latin encoding: re-encode chars to bytes.
+ Charset cs = Charset.forName(reader.encoding);
+ ByteBuffer decoded = cs.encode(text);
+ text = StringUtil.fromBytes(decoded);
}
return text + "\n";
} catch (IOException ioe) {
@@ -100,8 +103,9 @@
+ reader.encoding + "'";
}
throw Py.SyntaxError(msg);
+ } else {
+ return Py.JavaError(t);
}
- else return Py.JavaError(t);
}
/**
@@ -121,7 +125,9 @@
return parse(bufReader, CompileMode.eval, filename, cflags);
} catch (Throwable t) {
if (bufReader == null)
+ {
throw Py.JavaError(t); // can't do any more
+ }
try {
// then, try parsing as a module
bufReader.reset();
@@ -169,7 +175,7 @@
close(bufReader);
}
}
-
+
public static mod parse(InputStream stream,
CompileMode kind,
String filename,
@@ -268,11 +274,12 @@
throws IOException {
cflags.source_is_utf8 = true;
cflags.encoding = "utf-8";
-
+
BufferedReader bufferedReader = new BufferedReader(reader);
bufferedReader.mark(MARK_LIMIT);
- if (findEncoding(bufferedReader) != null)
+ if (findEncoding(bufferedReader) != null) {
throw new ParseException("encoding declaration in Unicode string");
+ }
bufferedReader.reset();
return new ExpectedEncodingBufferedReader(bufferedReader, null);
@@ -345,8 +352,9 @@
CompilerFlags cflags,
String filename)
throws IOException {
- if (cflags.source_is_utf8)
+ if (cflags.source_is_utf8) {
return prepBufReader(new StringReader(string), cflags, filename);
+ }
byte[] stringBytes = StringUtil.toBytes(string);
return prepBufReader(new ByteArrayInputStream(stringBytes), cflags, filename, true, false);
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list