[Jython-checkins] jython: Ugrade ANTLR to 3.5.2

Fri Dec 28 10:42:04 EST 2018

https://hg.python.org/jython/rev/547c523f81c7
changeset:   8206:547c523f81c7
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Thu Dec 27 11:01:53 2018 +0000
summary:
  Ugrade ANTLR to 3.5.2

The aim is to get rid of the stack dumps that ANTLR 3.1.3 produces
running on Java 8.

This requires some tweaks to the grammar actions since ANTLR 3.5.2
generates List<Object> where it used to have a raw List. Also, the
ANTLR Lexer now returns real EOF tokens that CommonTokenStream
buffers, which we are careful not to consume in PythonTokenSource in
nextToken() or insertImaginaryIndentDedentTokens(). A minimum of
change restores passing behaviour. Some irregularities noticed during
debugging are left for later work.

files:
  build.xml                                   |    9 +-
  extlibs/antlr-3.1.3.jar                     |  Bin 
  extlibs/antlr-complete-3.5.2.jar            |  Bin 
  extlibs/antlr-runtime-3.1.3.jar             |  Bin 
  extlibs/antlr-runtime-3.5.2.jar             |  Bin 
  grammar/Python.g                            |   83 ++++++---
  grammar/PythonPartial.g                     |   31 ++-
  src/org/python/antlr/GrammarActions.java    |   10 +-
  src/org/python/antlr/PythonTokenSource.java |   23 +-
  src/org/python/indexer/demos/Styler.java    |    2 +-
  10 files changed, 97 insertions(+), 61 deletions(-)

diff --git a/build.xml b/build.xml
--- a/build.xml
+++ b/build.xml
@@ -144,9 +144,8 @@
             <pathelement path="${oracle.jar}" />
             <pathelement path="${extlibs.dir}/mysql-connector-java-5.1.42-bin.jar" />
             <pathelement path="${extlibs.dir}/postgresql-42.1.1.jre7.jar" />
-            <!-- pin to Antlr 3.1.3 until we upgrade parsing -->
-            <pathelement path="${extlibs.dir}/antlr-3.1.3.jar" />
-            <pathelement path="${extlibs.dir}/stringtemplate-3.2.1.jar" />
+            <!-- pin to Antlr 3 until we upgrade parsing -->
+            <pathelement path="${extlibs.dir}/antlr-complete-3.5.2.jar" />
             <pathelement path="${extlibs.dir}/commons-compress-1.14.jar"/>
             <pathelement path="${extlibs.dir}/asm-7.0.jar" />
             <pathelement path="${extlibs.dir}/asm-commons-7.0.jar" />
@@ -547,8 +546,8 @@
         <taskdef name="jarjar" classname="com.tonicsystems.jarjar.JarJarTask" classpath="extlibs/jarjar-1.4.jar"/>
         <jarjar destfile="${dist.dir}/${jython.deploy.jar}">
             <zipfileset src="${dist.dir}/${jython.dev.jar}"/>
-            <!-- pin to Antlr 3.1.3 until we upgrade parsing -->
-            <zipfileset src="extlibs/antlr-runtime-3.1.3.jar"/>
+            <!-- pin to Antlr 3 until we upgrade parsing -->
+            <zipfileset src="extlibs/antlr-runtime-3.5.2.jar"/>
             <rule pattern="org.antlr.runtime.**" result="org.python.antlr.runtime. at 1"/>
             <zipfileset src="extlibs/asm-7.0.jar"/>
             <zipfileset src="extlibs/asm-commons-7.0.jar"/>
diff --git a/extlibs/antlr-3.1.3.jar b/extlibs/antlr-3.1.3.jar
deleted file mode 100644
index 0ec52f864ebf29a8d3ba8fa21364c3ef687c7fdd..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
GIT binary patch
[stripped]
diff --git a/extlibs/antlr-complete-3.5.2.jar b/extlibs/antlr-complete-3.5.2.jar
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..260de7634889880167e5045e404774f1d3680b04
GIT binary patch
[stripped]
diff --git a/extlibs/antlr-runtime-3.1.3.jar b/extlibs/antlr-runtime-3.1.3.jar
deleted file mode 100644
index b0a9ea69f5c29097145a48c26e127972920db440..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
GIT binary patch
[stripped]
diff --git a/extlibs/antlr-runtime-3.5.2.jar b/extlibs/antlr-runtime-3.5.2.jar
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d48e3e867968c7fe114e45168afb71c9a3cdf124
GIT binary patch
[stripped]
diff --git a/grammar/Python.g b/grammar/Python.g
--- a/grammar/Python.g
+++ b/grammar/Python.g
@@ -239,12 +239,15 @@
     }
 
     /**
-     *  Taken directly from antlr's Lexer.java -- needs to be re-integrated every time
-     *  we upgrade from Antlr (need to consider a Lexer subclass, though the issue would
-     *  remain).
+     * The text of this is mostly taken directly from ANTLR's Lexer.java,
+     * and ought to track changes there each time we get a new version,
+     * ... if there are any after 3.5.2. Also in PythonPartial.g.
      */
+    @Override
     public Token nextToken() {
+        // -- begin Jython addition
         startPos = getCharPositionInLine();
+        // -- end Jython addition
         while (true) {
             state.token = null;
             state.channel = Token.DEFAULT_CHANNEL;
@@ -253,10 +256,12 @@
             state.tokenStartLine = input.getLine();
             state.text = null;
             if ( input.LA(1)==CharStream.EOF ) {
+                // -- begin Jython addition
                 if (implicitLineJoiningLevel > 0) {
                     eofWhileNested = true;
                 }
-                return Token.EOF_TOKEN;
+                // -- end Jython addition
+                return getEOFToken();
             }
             try {
                 mTokens();
@@ -267,21 +272,30 @@
                     continue;
                 }
                 return state.token;
+                // -- begin Jython addition
             } catch (NoViableAltException nva) {
                 reportError(nva);
                 errorHandler.recover(this, nva); // throw out current char and try again
             } catch (FailedPredicateException fp) {
-                //XXX: added this for failed STRINGPART -- the FailedPredicateException
-                //     hides a NoViableAltException.  This should be the only
-                //     FailedPredicateException that gets thrown by the lexer.
+                // Added this for failed STRINGPART -- the FailedPredicateException
+                // hides a NoViableAltException. This should be the only
+                // FailedPredicateException that gets thrown by the lexer.
                 reportError(fp);
                 errorHandler.recover(this, fp); // throw out current char and try again
+                // -- end Jython addition
+            } catch (MismatchedRangeException re) {
+                reportError(re);
+                // matchRange() routine has already called recover()
+            } catch (MismatchedTokenException re) {
+                reportError(re);
+                // match() routine has already called recover()
             } catch (RecognitionException re) {
                 reportError(re);
-                // match() routine has already called recover()
+                recover(re); // throw out current char and try again
             }
         }
     }
+
     @Override
     public void displayRecognitionError(String[] tokenNames, RecognitionException e) {
         //Do nothing. We will handle error display elsewhere.
@@ -995,10 +1009,9 @@
 //import_as_names: import_as_name (',' import_as_name)* [',']
 import_as_names
     returns [List<alias> atypes]
-    : n+=import_as_name (COMMA! n+=import_as_name)*
-    {
-        $atypes = $n;
-    }
+    @init{$atypes = new ArrayList<alias>();}
+    : n=import_as_name {$atypes.add($n.atype);}
+        (COMMA! n=import_as_name {$atypes.add($n.atype);})*
     ;
 
 //import_as_name: NAME [('as' | NAME) NAME]
@@ -1029,32 +1042,31 @@
 //dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_as_names
     returns [List<alias> atypes]
-    : d+=dotted_as_name (COMMA! d+=dotted_as_name)*
-    {
-        $atypes = $d;
-    }
+    @init{$atypes = new ArrayList<alias>();}
+    : d=dotted_as_name {$atypes.add($d.atype);}
+        (COMMA! d=dotted_as_name {$atypes.add($d.atype);})*
     ;
 
 //dotted_name: NAME ('.' NAME)*
 dotted_name
     returns [List<Name> names]
-    : NAME (DOT dn+=attr)*
-    {
-        $names = actions.makeDottedName($NAME, $dn);
-    }
+    @init{List<PythonTree> dnList = new ArrayList<>();}
+    : NAME (DOT dn=attr {dnList.add($dn.tree);})*
+        {$names = actions.makeDottedName($NAME, dnList);}
     ;
 
 //global_stmt: 'global' NAME (',' NAME)*
 global_stmt
 @init {
     stmt stype = null;
+    List<Token> names = new ArrayList<>();
 }
 @after {
    $global_stmt.tree = stype;
 }
-    : GLOBAL n+=NAME (COMMA n+=NAME)*
+    : GLOBAL n=NAME {names.add($n);} (COMMA n=NAME {names.add($n);})*
       {
-          stype = new Global($GLOBAL, actions.makeNames($n), actions.makeNameNodes($n));
+          stype = new Global($GLOBAL, actions.makeNames(names), actions.makeNameNodes(names));
       }
     ;
 
@@ -1185,14 +1197,17 @@
 try_stmt
 @init {
     stmt stype = null;
+    List<excepthandler> exceptClauses = new ArrayList<>();
 }
 @after {
    $try_stmt.tree = stype;
 }
     : TRY COLON trysuite=suite[!$suite.isEmpty() && $suite::continueIllegal]
-      ( e+=except_clause+ (ORELSE COLON elsesuite=suite[!$suite.isEmpty() && $suite::continueIllegal])? (FINALLY COLON finalsuite=suite[true])?
+      ( (e=except_clause {exceptClauses.add((excepthandler)$e.tree);})+ 
+            (ORELSE COLON elsesuite=suite[!$suite.isEmpty() && $suite::continueIllegal])?
+            (FINALLY COLON finalsuite=suite[true])?
         {
-            stype = actions.makeTryExcept($TRY, $trysuite.stypes, $e, $elsesuite.stypes, $finalsuite.stypes);
+            stype = actions.makeTryExcept($TRY, $trysuite.stypes, exceptClauses, $elsesuite.stypes, $finalsuite.stypes);
         }
       | FINALLY COLON finalsuite=suite[true]
         {
@@ -1205,14 +1220,15 @@
 with_stmt
 @init {
     stmt stype = null;
+    List<With> withList = new ArrayList<>();
 }
 @after {
    $with_stmt.tree = stype;
 }
-    : WITH w+=with_item (options {greedy=true;}:COMMA w+=with_item)* COLON suite[false]
-      {
-          stype = actions.makeWith($WITH, $w, $suite.stypes);
-      }
+    : WITH w=with_item {withList.add((With)$w.tree);}
+        (options {greedy=true;}:COMMA w=with_item {withList.add((With)$w.tree);})*
+        COLON suite[false]
+        {stype = actions.makeWith($WITH, withList, $suite.stypes);}
     ;
 
 //with_item: test ['as' expr]
@@ -1252,7 +1268,7 @@
 
 //suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
 suite
-    [boolean fromFinally] returns [List stypes]
+    [boolean fromFinally] returns [List<stmt> stypes]
 scope {
     boolean continueIllegal;
 }
@@ -1966,10 +1982,11 @@
 //Needed as an exprlist that does not produce tuples for del_stmt.
 del_list
     returns [List<expr> etypes]
-    : e+=expr[expr_contextType.Del] (options {k=2;}: COMMA e+=expr[expr_contextType.Del])* (COMMA)?
-      {
-          $etypes = actions.makeDeleteList($e);
-      }
+    @init{List<PythonTree> exprList = new ArrayList<>();}
+    : e=expr[expr_contextType.Del] {exprList.add($e.tree);}
+        (options {k=2;}: COMMA e=expr[expr_contextType.Del]
+            {exprList.add($e.tree);})* (COMMA)?
+        {$etypes = actions.makeDeleteList(exprList);}
     ;
 
 //testlist: test (',' test)* [',']
diff --git a/grammar/PythonPartial.g b/grammar/PythonPartial.g
--- a/grammar/PythonPartial.g
+++ b/grammar/PythonPartial.g
@@ -116,12 +116,15 @@
     }
 
     /**
-     *  Taken directly from antlr's Lexer.java -- needs to be re-integrated every time
-     *  we upgrade from Antlr (need to consider a Lexer subclass, though the issue would
-     *  remain).
+     * The text of this is mostly taken directly from ANTLR's Lexer.java,
+     * and ought to track changes there each time we get a new version,
+     * ... if there are any after 3.5.2. Also in PythonPartial.g.
      */
+    @Override
     public Token nextToken() {
+        // -- begin Jython addition
         startPos = getCharPositionInLine();
+        // -- end Jython addition
         while (true) {
             state.token = null;
             state.channel = Token.DEFAULT_CHANNEL;
@@ -130,10 +133,12 @@
             state.tokenStartLine = input.getLine();
             state.text = null;
             if ( input.LA(1)==CharStream.EOF ) {
+                // -- begin Jython addition
                 if (implicitLineJoiningLevel > 0) {
                     eofWhileNested = true;
                 }
-                return Token.EOF_TOKEN;
+                // -- end Jython addition
+                return getEOFToken();
             }
             try {
                 mTokens();
@@ -144,18 +149,28 @@
                     continue;
                 }
                 return state.token;
+                // -- begin Jython addition
             } catch (NoViableAltException nva) {
                 errorHandler.reportError(this, nva);
                 errorHandler.recover(this, nva); // throw out current char and try again
             } catch (FailedPredicateException fp) {
-                //XXX: added this for failed STRINGPART -- the FailedPredicateException
-                //     hides a NoViableAltException.  This should be the only
-                //     FailedPredicateException that gets thrown by the lexer.
+                // Added this for failed STRINGPART -- the FailedPredicateException
+                // hides a NoViableAltException. This should be the only
+                // FailedPredicateException that gets thrown by the lexer.
                 errorHandler.reportError(this, fp);
                 errorHandler.recover(this, fp); // throw out current char and try again
+                // -- end Jython addition
+            } catch (MismatchedRangeException re) {
+                reportError(re);
+                // matchRange() routine has already called recover()
+            } catch (MismatchedTokenException re) {
+                reportError(re);
+                // match() routine has already called recover()
             } catch (RecognitionException re) {
+                // -- Jython replaces: reportError(this, re) with:
                 errorHandler.reportError(this, re);
-                // match() routine has already called recover()
+                // -- end Jython replacement
+                recover(re); // throw out current char and try again
             }
         }
     }
diff --git a/src/org/python/antlr/GrammarActions.java b/src/org/python/antlr/GrammarActions.java
--- a/src/org/python/antlr/GrammarActions.java
+++ b/src/org/python/antlr/GrammarActions.java
@@ -129,10 +129,10 @@
         return result;
     }
 
-    List<String> makeNames(List names) {
+    List<String> makeNames(List<Token> names) {
         List<String> s = new ArrayList<String>();
-        for(int i=0;i<names.size();i++) {
-            s.add(((Token)names.get(i)).getText());
+        for(Token name : names) {
+            s.add(name.getText());
         }
         return s;
     }
@@ -146,8 +146,8 @@
 
     List<Name> makeNameNodes(List<Token> names) {
         List<Name> s = new ArrayList<Name>();
-        for (int i=0; i<names.size(); i++) {
-            s.add(makeNameNode(names.get(i)));
+        for(Token name : names) {
+            s.add(makeNameNode(name));
         }
         return s;
     }
diff --git a/src/org/python/antlr/PythonTokenSource.java b/src/org/python/antlr/PythonTokenSource.java
--- a/src/org/python/antlr/PythonTokenSource.java
+++ b/src/org/python/antlr/PythonTokenSource.java
@@ -135,8 +135,10 @@
         // if something in queue, just remove and return it
         if (tokens.size() > 0) {
             Token t = tokens.firstElement();
-            tokens.removeElementAt(0);
-            //System.out.println(filename + t);
+            if (t.getType() != Token.EOF) { // EOF stops further insertImaginaryIndentDedentTokens
+                tokens.removeElementAt(0);
+            }
+            // System.out.println(filename + t);
             return t;
         }
 
@@ -165,7 +167,6 @@
 
     protected void insertImaginaryIndentDedentTokens() {
         Token t = stream.LT(1);
-        stream.consume();
 
         if (t.getType() == Token.EOF) {
             Token prev = stream.LT(-1);
@@ -187,13 +188,12 @@
         } else if (t.getType() == PythonLexer.NEWLINE) {
             // save NEWLINE in the queue
             //System.out.println("found newline: "+t+" stack is "+stackString());
-            enqueueHiddens(t);
-            tokens.addElement(t);
+            enqueue(t);
             Token newline = t;
+            stream.consume();
 
             // grab first token of next line
             t = stream.LT(1);
-            stream.consume();
 
             List<Token> commentedNewlines = enqueueHiddens(t);
 
@@ -204,13 +204,15 @@
                 cpos = -1; // pretend EOF always happens at left edge
             }
             else if (t.getType() == PythonLexer.LEADING_WS) {
+                stream.consume();
                 Token next = stream.LT(1);
                 if (next != null && next.getType() == Token.EOF) {
-                    stream.consume();
                     return;
                 } else {
                     cpos = t.getText().length();
                 }
+            } else {
+                stream.consume();
             }
 
             //System.out.println("next token is: "+t);
@@ -241,9 +243,10 @@
 
         } else {
             enqueue(t);
+            stream.consume();
         }
     }
-    
+
     private void enqueue(Token t) {
         enqueueHiddens(t);
         tokens.addElement(t);
@@ -276,7 +279,9 @@
                 }
             }
         }
-        List<Token> hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1);
+
+        List<? extends Token> hiddenTokens =
+                stream.getTokens(lastTokenAddedIndex + 1, t.getTokenIndex() - 1);
         if (hiddenTokens != null) {
             tokens.addAll(hiddenTokens);
         }
diff --git a/src/org/python/indexer/demos/Styler.java b/src/org/python/indexer/demos/Styler.java
--- a/src/org/python/indexer/demos/Styler.java
+++ b/src/org/python/indexer/demos/Styler.java
@@ -151,7 +151,7 @@
             });
 
         Token tok;
-        while ((tok = lex.nextToken()) != Token.EOF_TOKEN) {
+        while ((tok = lex.nextToken()).getType() != Token.EOF) {
             switch (tok.getType()) {
                 case PythonLexer.STRING: {
                     int beg = ((CommonToken)tok).getStartIndex();

-- 
Repository URL: https://hg.python.org/jython