[Jython-checkins] jython: Use Java codec throughout parser (not sometimes a Python one), see issue #2123.

Wed Jun 11 00:59:42 CEST 2014

http://hg.python.org/jython/rev/eef3fcffc58a
changeset:   7288:eef3fcffc58a
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Tue Jun 10 23:40:28 2014 +0100
summary:
  Use Java codec throughout parser (not sometimes a Python one), see issue #2123.
Allows at least some programs to run where the console encoding is not one
supported by Jython with a Python codec.

files:
  src/org/python/antlr/GrammarActions.java |  71 ++++++-----
  src/org/python/core/ParserFacade.java    |  22 ++-
  2 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/src/org/python/antlr/GrammarActions.java b/src/org/python/antlr/GrammarActions.java
--- a/src/org/python/antlr/GrammarActions.java
+++ b/src/org/python/antlr/GrammarActions.java
@@ -1,28 +1,18 @@
 package org.python.antlr;
 
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
 import org.antlr.runtime.Token;
-
-import org.python.core.Py;
-import org.python.core.PyComplex;
-import org.python.core.PyFloat;
-import org.python.core.PyInteger;
-import org.python.core.PyLong;
-import org.python.core.PyString;
-import org.python.core.PyUnicode;
-import org.python.core.codecs;
-import org.python.antlr.ast.alias;
-import org.python.antlr.ast.arguments;
-import org.python.antlr.ast.boolopType;
-import org.python.antlr.ast.cmpopType;
-import org.python.antlr.ast.expr_contextType;
-import org.python.antlr.ast.operatorType;
-import org.python.antlr.ast.unaryopType;
-import org.python.antlr.ast.Context;
-import org.python.antlr.ast.keyword;
 import org.python.antlr.ast.Attribute;
 import org.python.antlr.ast.BinOp;
 import org.python.antlr.ast.BoolOp;
 import org.python.antlr.ast.Call;
+import org.python.antlr.ast.Context;
 import org.python.antlr.ast.DictComp;
 import org.python.antlr.ast.ExtSlice;
 import org.python.antlr.ast.For;
@@ -34,26 +24,38 @@
 import org.python.antlr.ast.ListComp;
 import org.python.antlr.ast.Name;
 import org.python.antlr.ast.Num;
+import org.python.antlr.ast.Repr;
+import org.python.antlr.ast.SetComp;
 import org.python.antlr.ast.Slice;
+import org.python.antlr.ast.Str;
 import org.python.antlr.ast.TryExcept;
 import org.python.antlr.ast.TryFinally;
 import org.python.antlr.ast.Tuple;
-import org.python.antlr.ast.Repr;
-import org.python.antlr.ast.SetComp;
-import org.python.antlr.ast.Str;
 import org.python.antlr.ast.UnaryOp;
 import org.python.antlr.ast.While;
 import org.python.antlr.ast.With;
 import org.python.antlr.ast.Yield;
+import org.python.antlr.ast.alias;
+import org.python.antlr.ast.arguments;
+import org.python.antlr.ast.boolopType;
+import org.python.antlr.ast.cmpopType;
+import org.python.antlr.ast.expr_contextType;
+import org.python.antlr.ast.keyword;
+import org.python.antlr.ast.operatorType;
+import org.python.antlr.ast.unaryopType;
 import org.python.antlr.base.excepthandler;
 import org.python.antlr.base.expr;
 import org.python.antlr.base.slice;
 import org.python.antlr.base.stmt;
-
-import java.math.BigInteger;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import org.python.core.Py;
+import org.python.core.PyComplex;
+import org.python.core.PyFloat;
+import org.python.core.PyInteger;
+import org.python.core.PyLong;
+import org.python.core.PyString;
+import org.python.core.PyUnicode;
+import org.python.core.codecs;
+import org.python.core.util.StringUtil;
 
 public class GrammarActions {
     private ErrorHandler errorHandler = null;
@@ -183,7 +185,7 @@
         }
         return result;
     }
-    
+
     List<stmt> makeElse(List elseSuite, PythonTree elif) {
         if (elseSuite != null) {
             return castStmts(elseSuite);
@@ -425,8 +427,9 @@
         }
         int ndigits = s.length();
         int i=0;
-        while (i < ndigits && s.charAt(i) == '0')
+        while (i < ndigits && s.charAt(i) == '0') {
             i++;
+        }
         if ((ndigits - i) > 11) {
             return Py.newLong(new BigInteger(s, radix));
         }
@@ -449,7 +452,7 @@
         String getString() {
             return s;
         }
-        
+
         boolean isUnicode() {
             return unicode;
         }
@@ -511,8 +514,10 @@
         // XXX: No need to re-encode when the encoding is iso-8859-1, but ParserFacade
         // needs to normalize the encoding name
         if (!ustring && encoding != null) {
-            // str with a specified encoding: first re-encode back out
-            string = new PyUnicode(string.substring(start, end)).encode(encoding);
+            // The parser used a non-latin encoding: re-encode chars to bytes.
+            Charset cs = Charset.forName(encoding);
+            ByteBuffer decoded = cs.encode(string.substring(start, end));
+            string = StringUtil.fromBytes(decoded);
             if (!raw) {
                 // Handle escapes in non-raw strs
                 string = PyString.decode_UnicodeEscape(string, 0, string.length(), "strict",
@@ -744,7 +749,7 @@
         }
         return result;
     }
-    
+
     BoolOp makeBoolOp(Token t, PythonTree left, boolopType op, List right) {
         List values = new ArrayList();
         values.add(left);
@@ -780,7 +785,7 @@
         }
         return result;
     }
- 
+
     slice castSlice(Object o) {
         if (o instanceof slice) {
             return (slice)o;
diff --git a/src/org/python/core/ParserFacade.java b/src/org/python/core/ParserFacade.java
--- a/src/org/python/core/ParserFacade.java
+++ b/src/org/python/core/ParserFacade.java
@@ -9,6 +9,7 @@
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StringReader;
+import java.nio.ByteBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
@@ -54,8 +55,10 @@
                 return text;
             }
             if (reader.encoding != null) {
-                // restore the original encoding
-                text = new PyUnicode(text).encode(reader.encoding);
+                // The parser used a non-latin encoding: re-encode chars to bytes.
+                Charset cs = Charset.forName(reader.encoding);
+                ByteBuffer decoded = cs.encode(text);
+                text = StringUtil.fromBytes(decoded);
             }
             return text + "\n";
         } catch (IOException ioe) {
@@ -100,8 +103,9 @@
                         + reader.encoding + "'";
             }
             throw Py.SyntaxError(msg);
+        } else {
+            return Py.JavaError(t);
         }
-        else return Py.JavaError(t);
     }
 
     /**
@@ -121,7 +125,9 @@
             return parse(bufReader, CompileMode.eval, filename, cflags);
         } catch (Throwable t) {
             if (bufReader == null)
+             {
                 throw Py.JavaError(t); // can't do any more
+            }
             try {
                 // then, try parsing as a module
                 bufReader.reset();
@@ -169,7 +175,7 @@
             close(bufReader);
         }
     }
-    
+
     public static mod parse(InputStream stream,
                                 CompileMode kind,
                                 String filename,
@@ -268,11 +274,12 @@
         throws IOException {
         cflags.source_is_utf8 = true;
         cflags.encoding = "utf-8";
-        
+
         BufferedReader bufferedReader = new BufferedReader(reader);
         bufferedReader.mark(MARK_LIMIT);
-        if (findEncoding(bufferedReader) != null)
+        if (findEncoding(bufferedReader) != null) {
             throw new ParseException("encoding declaration in Unicode string");
+        }
         bufferedReader.reset();
 
         return new ExpectedEncodingBufferedReader(bufferedReader, null);
@@ -345,8 +352,9 @@
             CompilerFlags cflags,
             String filename)
             throws IOException {
-        if (cflags.source_is_utf8)
+        if (cflags.source_is_utf8) {
             return prepBufReader(new StringReader(string), cflags, filename);
+        }
 
         byte[] stringBytes = StringUtil.toBytes(string);
         return prepBufReader(new ByteArrayInputStream(stringBytes), cflags, filename, true, false);

-- 
Repository URL: http://hg.python.org/jython