[Jython-checkins] jython: Conform to Python 2 byte and unicode definitions of white space.

jeff.allen jython-checkins at python.org
Fri Oct 26 14:13:27 EDT 2018


https://hg.python.org/jython/rev/a1f68d091a1c
changeset:   8190:a1f68d091a1c
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Thu Oct 25 23:32:11 2018 +0100
summary:
  Conform to Python 2 byte and unicode definitions of white space.

Jython traditionally accepted Java's definition of "white space", but in fact
Java has several and they evolve with Unicode versions. This change set brings
Jython into line with Python 2 in str, unicode and the re module. It requires we
no longer share some code between PyString and PyUnicode. (It also cleans up
some trailing white space in SRE_STATE.java.)

files:
  Lib/test/test_bytes_jy.py                 |    2 +-
  Lib/test/test_unicode_jy.py               |   13 +-
  src/org/python/core/PyString.java         |   82 ++--
  src/org/python/core/PyUnicode.java        |  166 +++++++++-
  src/org/python/modules/sre/SRE_STATE.java |   60 +-
  5 files changed, 235 insertions(+), 88 deletions(-)


diff --git a/Lib/test/test_bytes_jy.py b/Lib/test/test_bytes_jy.py
--- a/Lib/test/test_bytes_jy.py
+++ b/Lib/test/test_bytes_jy.py
@@ -70,7 +70,7 @@
     LOWER = b'\xe0\xe7\xe9\xff' # Uppercase in Latin-1 but not ascii
     UPPER = b'\xc0\xc7\xc9\xdd' # Lowercase in Latin-1 but not ascii
     DIGIT = b'\xb9\xb2\xb3'     # sup 1, 2, 3: numeric in Python (not Java)
-    SPACE = b'\x85\xa0'         # NEXT LINE, NBSP: space in Python (not Java)
+    SPACE = b'\x85\xa0'         # NEXT LINE, NBSP: space in unicode (not in str/bytes)
 
     def test_isalpha(self):
         for c in self.UPPER + self.LOWER:
diff --git a/Lib/test/test_unicode_jy.py b/Lib/test/test_unicode_jy.py
--- a/Lib/test/test_unicode_jy.py
+++ b/Lib/test/test_unicode_jy.py
@@ -854,7 +854,7 @@
 
 
 class UnicodeSpaceTest(unittest.TestCase):
-    # Test classification of characters as whitespace (some Jython divergence)
+    # Test classification of characters as whitespace (strictly as observed in CPython)
 
     def checkequal(self, expected, obj, methodname, *args):
         "check that object.method() returns expected result"
@@ -863,15 +863,10 @@
         self.assertEqual(expected, realresult, grumble)
         # print grumble, 'x' if realresult != expected else '.'
 
-    # The set of Unicode characters that are spaces according to CPython 2.7.8
-    SPACE = u'\t\n\x0b\x0c\r\x1c\x1d\x1e\x1f\x20\x85\xa0\u1680\u180e' + \
-            u'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a' + \
+    # The set of Unicode characters that are spaces according to CPython 2.7.15
+    SPACE = u'\t\n\x0b\x0c\r\x1c\x1d\x1e\x1f\x20\x85\xa0\u1680\u180e' \
+            u'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a' \
             u'\u2028\u2029\u202f\u205f\u3000'
-    if test_support.is_jython:
-        # Not whitespace in Jython based on java.lang.Character.isWhitespace.
-        # This test documents the divergence, until we decide to remove it.
-        for c in u'\x85\xa0\u2007\u202f':
-            SPACE = SPACE.replace(c, u'')
 
     def test_isspace(self):
         for c in self.SPACE:
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -762,7 +762,7 @@
 
         if (c == Character.TYPE || c == Character.class) {
             if (getString().length() == 1) {
-                return new Character(getString().charAt(0));
+                return getString().charAt(0);
             }
         }
 
@@ -1233,16 +1233,15 @@
      * @return a new String, stripped of the whitespace characters/bytes
      */
     protected final String _strip() {
-        String s = getString();
         // Rightmost non-whitespace
-        int right = _stripRight(s);
+        int right = _findRight();
         if (right < 0) {
             // They're all whitespace
             return "";
         } else {
             // Leftmost non-whitespace character: right known not to be a whitespace
-            int left = _stripLeft(s, right);
-            return s.substring(left, right + 1);
+            int left = _findLeft(right);
+            return getString().substring(left, right + 1);
         }
     }
 
@@ -1264,16 +1263,15 @@
             // Divert to the whitespace version
             return _strip();
         } else {
-            String s = getString();
             // Rightmost non-matching character
-            int right = _stripRight(s, stripChars);
+            int right = _findRight(stripChars);
             if (right < 0) {
                 // They all match
                 return "";
             } else {
                 // Leftmost non-matching character: right is known not to match
-                int left = _stripLeft(s, stripChars, right);
-                return s.substring(left, right + 1);
+                int left = _findLeft(stripChars, right);
+                return getString().substring(left, right + 1);
             }
         }
     }
@@ -1281,13 +1279,13 @@
     /**
      * Helper for <code>strip</code>, <code>lstrip</code> implementation, when stripping whitespace.
      *
-     * @param s string to search (only <code>s[0:right]</code> is searched).
      * @param right rightmost extent of string search
-     * @return index of lefttmost non-whitespace character or <code>right</code> if they all are.
+     * @return index of leftmost non-whitespace character or <code>right</code> if they all are.
      */
-    private static final int _stripLeft(String s, int right) {
+    protected int _findLeft(int right) {
+        String s = getString();
         for (int left = 0; left < right; left++) {
-            if (!Character.isWhitespace(s.charAt(left))) {
+            if (!BaseBytes.isspace((byte) s.charAt(left))) {
                 return left;
             }
         }
@@ -1298,13 +1296,13 @@
      * Helper for <code>strip</code>, <code>lstrip</code> implementation, when stripping specified
      * characters.
      *
-     * @param s string to search (only <code>s[0:right]</code> is searched).
      * @param stripChars specifies set of characters to strip
      * @param right rightmost extent of string search
      * @return index of leftmost character not in <code>stripChars</code> or <code>right</code> if
      *         they all are.
      */
-    private static final int _stripLeft(String s, String stripChars, int right) {
+    private int _findLeft(String stripChars, int right) {
+        String s = getString();
         for (int left = 0; left < right; left++) {
             if (stripChars.indexOf(s.charAt(left)) < 0) {
                 return left;
@@ -1316,12 +1314,12 @@
     /**
      * Helper for <code>strip</code>, <code>rstrip</code> implementation, when stripping whitespace.
      *
-     * @param s string to search.
      * @return index of rightmost non-whitespace character or -1 if they all are.
      */
-    private static final int _stripRight(String s) {
+    protected int _findRight() {
+        String s = getString();
         for (int right = s.length(); --right >= 0;) {
-            if (!Character.isWhitespace(s.charAt(right))) {
+            if (!BaseBytes.isspace((byte) s.charAt(right))) {
                 return right;
             }
         }
@@ -1332,11 +1330,11 @@
      * Helper for <code>strip</code>, <code>rstrip</code> implementation, when stripping specified
      * characters.
      *
-     * @param s string to search.
      * @param stripChars specifies set of characters to strip
      * @return index of rightmost character not in <code>stripChars</code> or -1 if they all are.
      */
-    private static final int _stripRight(String s, String stripChars) {
+    private int _findRight(String stripChars) {
+        String s = getString();
         for (int right = s.length(); --right >= 0;) {
             if (stripChars.indexOf(s.charAt(right)) < 0) {
                 return right;
@@ -1405,7 +1403,7 @@
     protected final String _lstrip() {
         String s = getString();
         // Leftmost non-whitespace character: cannot exceed length
-        int left = _stripLeft(s, s.length());
+        int left = _findLeft(s.length());
         return s.substring(left);
     }
 
@@ -1429,7 +1427,7 @@
         } else {
             String s = getString();
             // Leftmost matching character: cannot exceed length
-            int left = _stripLeft(s, stripChars, s.length());
+            int left = _findLeft(stripChars, s.length());
             return s.substring(left);
         }
     }
@@ -1492,15 +1490,14 @@
      * @return a new String, stripped of the whitespace characters/bytes
      */
     protected final String _rstrip() {
-        String s = getString();
         // Rightmost non-whitespace
-        int right = _stripRight(s);
+        int right = _findRight();
         if (right < 0) {
             // They're all whitespace
             return "";
         } else {
             // Substring up to and including this rightmost non-whitespace
-            return s.substring(0, right + 1);
+            return getString().substring(0, right + 1);
         }
     }
 
@@ -1522,11 +1519,10 @@
             // Divert to the whitespace version
             return _rstrip();
         } else {
-            String s = getString();
             // Rightmost non-matching character
-            int right = _stripRight(s, stripChars);
+            int right = _findRight(stripChars);
             // Substring up to and including this rightmost non-matching character (or "")
-            return s.substring(0, right + 1);
+            return getString().substring(0, right + 1);
         }
     }
 
@@ -1631,16 +1627,15 @@
     }
 
     /**
-     * Helper function for <code>.split</code>, in <code>str</code> and <code>unicode</code>,
-     * splitting on white space and returning a list of the separated parts. If there are more than
-     * <code>maxsplit</code> feasible the last element of the list is the remainder of the original
-     * (this) string. The split sections will be {@link PyUnicode} if this object is a
-     * <code>PyUnicode</code>.
+     * Helper function for <code>.split</code>, in <code>str</code> and (when overridden) in
+     * <code>unicode</code>, splitting on white space and returning a list of the separated parts.
+     * If there are more than <code>maxsplit</code> feasible splits the last element of the list is
+     * the remainder of the original (this) string.
      *
      * @param maxsplit limit on the number of splits (if >=0)
      * @return <code>PyList</code> of split sections
      */
-    private PyList splitfields(int maxsplit) {
+    protected PyList splitfields(int maxsplit) {
         /*
          * Result built here is a list of split parts, exactly as required for s.split(None,
          * maxsplit). If there are to be n splits, there will be n+1 elements in L.
@@ -1660,7 +1655,7 @@
 
             // Find the next occurrence of non-whitespace
             while (start < length) {
-                if (!Character.isWhitespace(s.charAt(start))) {
+                if (!BaseBytes.isspace((byte) s.charAt(start))) {
                     // Break leaving start pointing at non-whitespace
                     break;
                 }
@@ -1678,7 +1673,7 @@
             } else {
                 // The next segment runs up to the next next whitespace or end
                 for (index = start; index < length; index++) {
-                    if (Character.isWhitespace(s.charAt(index))) {
+                    if (BaseBytes.isspace((byte) s.charAt(index))) {
                         // Break leaving index pointing at whitespace
                         break;
                     }
@@ -1883,16 +1878,15 @@
     }
 
     /**
-     * Helper function for <code>.rsplit</code>, in <code>str</code> and <code>unicode</code>,
-     * splitting on white space and returning a list of the separated parts. If there are more than
-     * <code>maxsplit</code> feasible the first element of the list is the remainder of the original
-     * (this) string. The split sections will be {@link PyUnicode} if this object is a
-     * <code>PyUnicode</code>.
+     * Helper function for <code>.rsplit</code>, in <code>str</code> and (when overridden) in
+     * <code>unicode</code>, splitting on white space and returning a list of the separated parts.
+     * If there are more than <code>maxsplit</code> feasible splits the first element of the list is
+     * the remainder of the original (this) string.
      *
      * @param maxsplit limit on the number of splits (if >=0)
      * @return <code>PyList</code> of split sections
      */
-    private PyList rsplitfields(int maxsplit) {
+    protected PyList rsplitfields(int maxsplit) {
         /*
          * Result built here (in reverse) is a list of split parts, exactly as required for
          * s.rsplit(None, maxsplit). If there are to be n splits, there will be n+1 elements.
@@ -1912,7 +1906,7 @@
 
             // Find the next occurrence of non-whitespace (working leftwards)
             while (end >= 0) {
-                if (!Character.isWhitespace(s.charAt(end))) {
+                if (!BaseBytes.isspace((byte) s.charAt(end))) {
                     // Break leaving end pointing at non-whitespace
                     break;
                 }
@@ -1930,7 +1924,7 @@
             } else {
                 // The next segment runs back to the next next whitespace or beginning
                 for (index = end; index >= 0; --index) {
-                    if (Character.isWhitespace(s.charAt(index))) {
+                    if (BaseBytes.isspace((byte) s.charAt(index))) {
                         // Break leaving index pointing at whitespace
                         break;
                     }
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -1209,6 +1209,15 @@
         return new PyUnicode(buffer);
     }
 
+    /** Define what characters are to be treated as a space according to Python 2. */
+    private static boolean isPythonSpace(int ch) {
+        // Use the Java built-in methods as far as possible
+        return Character.isWhitespace(ch)    // catches the ASCII spaces and some others
+                || Character.isSpaceChar(ch) // catches remaining Unicode spaces
+                || ch == 0x0085  // NEXT LINE (not a space in Java)
+                || ch == 0x180e; // MONGOLIAN VOWEL SEPARATOR (not a space in Java 9+ or Python 3)
+    }
+
     private static class StripIterator implements Iterator<Integer> {
 
         private final Iterator<Integer> iter;
@@ -1231,7 +1240,7 @@
             } else {
                 while (iter.hasNext()) {
                     int codePoint = iter.next();
-                    if (!Character.isWhitespace(codePoint)) {
+                    if (!isPythonSpace(codePoint)) {
                         lookahead = codePoint;
                         return;
                     }
@@ -1350,6 +1359,30 @@
                 new StripIterator(sep, new ReversedIterator<>(newSubsequenceIterator()))));
     }
 
+    /** {@inheritDoc} */
+    @Override
+    protected int _findLeft(int right) {
+        String s = getString();
+        for (int left = 0; left < right; left++) {
+            if (!isPythonSpace(s.charAt(left))) {
+                return left;
+            }
+        }
+        return right;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    protected int _findRight() {
+        String s = getString();
+        for (int right = s.length(); --right >= 0;) {
+            if (!isPythonSpace(s.charAt(right))) {
+                return right;
+            }
+        }
+        return -1;
+    }
+
     @Override
     public PyTuple partition(PyObject sep) {
         return unicode_partition(sep);
@@ -1418,7 +1451,7 @@
 
             while (iter.hasNext()) {
                 int codepoint = iter.next();
-                if (Character.isWhitespace(codepoint)) {
+                if (isPythonSpace(codepoint)) {
                     completeSeparator = true;
                     if (!atBeginning) {
                         inSeparator = true;
@@ -1624,6 +1657,67 @@
         }
     }
 
+    /**
+     * {@inheritDoc} The split sections will be {@link PyUnicode} and use the Python
+     * <code>unicode</code> definition of "space".
+     */
+    @Override
+    protected PyList splitfields(int maxsplit) {
+        /*
+         * Result built here is a list of split parts, exactly as required for s.split(None,
+         * maxsplit). If there are to be n splits, there will be n+1 elements in L.
+         */
+        PyList list = new PyList();
+
+        String s = getString();
+        int length = s.length(), start = 0, splits = 0, index;
+
+        if (maxsplit < 0) {
+            // Make all possible splits: there can't be more than:
+            maxsplit = length;
+        }
+
+        // start is always the first character not consumed into a piece on the list
+        while (start < length) {
+
+            // Find the next occurrence of non-whitespace
+            while (start < length) {
+                if (!isPythonSpace(s.charAt(start))) {
+                    // Break leaving start pointing at non-whitespace
+                    break;
+                }
+                start++;
+            }
+
+            if (start >= length) {
+                // Only found whitespace so there is no next segment
+                break;
+
+            } else if (splits >= maxsplit) {
+                // The next segment is the last and contains all characters up to the end
+                index = length;
+
+            } else {
+                // The next segment runs up to the next next whitespace or end
+                for (index = start; index < length; index++) {
+                    if (isPythonSpace(s.charAt(index))) {
+                        // Break leaving index pointing at whitespace
+                        break;
+                    }
+                }
+            }
+
+            // Make a piece from start up to index
+            list.append(fromSubstring(start, index));
+            splits++;
+
+            // Start next segment search at that point
+            start = index;
+        }
+
+        return list;
+    }
+
     @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_rsplit_doc)
     final PyList unicode_rsplit(PyObject sepObj, int maxsplit) {
         String sep = coerceToString(sepObj, true);
@@ -1634,6 +1728,68 @@
         }
     }
 
+    /**
+     * {@inheritDoc} The split sections will be {@link PyUnicode} and use the Python
+     * <code>unicode</code> definition of "space".
+     */
+    @Override
+    protected PyList rsplitfields(int maxsplit) {
+        /*
+         * Result built here (in reverse) is a list of split parts, exactly as required for
+         * s.rsplit(None, maxsplit). If there are to be n splits, there will be n+1 elements.
+         */
+        PyList list = new PyList();
+
+        String s = getString();
+        int length = s.length(), end = length - 1, splits = 0, index;
+
+        if (maxsplit < 0) {
+            // Make all possible splits: there can't be more than:
+            maxsplit = length;
+        }
+
+        // end is always the rightmost character not consumed into a piece on the list
+        while (end >= 0) {
+
+            // Find the next occurrence of non-whitespace (working leftwards)
+            while (end >= 0) {
+                if (!isPythonSpace(s.charAt(end))) {
+                    // Break leaving end pointing at non-whitespace
+                    break;
+                }
+                --end;
+            }
+
+            if (end < 0) {
+                // Only found whitespace so there is no next segment
+                break;
+
+            } else if (splits >= maxsplit) {
+                // The next segment is the last and contains all characters back to the beginning
+                index = -1;
+
+            } else {
+                // The next segment runs back to the next next whitespace or beginning
+                for (index = end; index >= 0; --index) {
+                    if (isPythonSpace(s.charAt(index))) {
+                        // Break leaving index pointing at whitespace
+                        break;
+                    }
+                }
+            }
+
+            // Make a piece from index+1 start up to end+1
+            list.append(fromSubstring(index + 1, end + 1));
+            splits++;
+
+            // Start next segment search at that point
+            end = index;
+        }
+
+        list.reverse();
+        return list;
+    }
+
     @ExposedMethod(defaults = "false", doc = BuiltinDocs.unicode___getslice___doc)
     final PyList unicode_splitlines(boolean keepends) {
         return new PyList(new LineSplitIterator(keepends));
@@ -2089,7 +2245,7 @@
             return false;
         }
         for (Iterator<Integer> iter = newSubsequenceIterator(); iter.hasNext();) {
-            if (!Character.isWhitespace(iter.next())) {
+            if (!isPythonSpace(iter.next())) {
                 return false;
             }
         }
@@ -2190,7 +2346,7 @@
         int i = 0;
         for (Iterator<Integer> iter = newSubsequenceIterator(); iter.hasNext(); i++) {
             int codePoint = iter.next();
-            if (Character.isWhitespace(codePoint)) {
+            if (isPythonSpace(codePoint)) {
                 sb.append(' ');
                 continue;
             }
@@ -2221,7 +2377,7 @@
         StringBuilder sb = new StringBuilder();
         for (int i = 0; i < getString().length(); i++) {
             char ch = getString().charAt(i);
-            if (Character.isWhitespace(ch)) {
+            if (isPythonSpace(ch)) {
                 sb.append(' ');
                 continue;
             }
diff --git a/src/org/python/modules/sre/SRE_STATE.java b/src/org/python/modules/sre/SRE_STATE.java
--- a/src/org/python/modules/sre/SRE_STATE.java
+++ b/src/org/python/modules/sre/SRE_STATE.java
@@ -25,16 +25,16 @@
 import org.python.core.PyString;
 
 public class SRE_STATE {
-    
+
     /*
      * Generated from Python-2.4.5 like 'python headerToJava.py < Modules/sre_constants.h'
-     * where headerToJava.py contains the following code 
+     * where headerToJava.py contains the following code
 import sys
 for line in sys.stdin:
    if line.startswith('#define'):
        line = line.replace('#define', 'public static final int').strip()
        segs = line.split(' ')
-       print '%s = %s;' % (' '.join(segs[:-1]), segs[-1])                                                          
+       print '%s = %s;' % (' '.join(segs[:-1]), segs[-1])
      */
     //BEGIN generated code
     public static final int SRE_MAGIC = 20031017;
@@ -114,7 +114,7 @@
 
     //From here we're including things from _sre.c in the order they're defined there
     public static final int USE_RECURSION_LIMIT = 5000;
-    
+
     /* error codes */
     public static final int SRE_ERROR_ILLEGAL = -1;
     public static final int SRE_ERROR_STATE   = -2;
@@ -194,7 +194,7 @@
             return false;
         }
     }
-    
+
     final boolean sre_category(int category, int ch) {
         switch (category) {
 
@@ -230,9 +230,11 @@
             return !Character.isDigit(ch);
 
         case SRE_CATEGORY_UNI_SPACE:
-            return Character.isSpaceChar(ch) || Character.isWhitespace(ch) || ch == 0x0085;
+            return Character.isSpaceChar(ch) || Character.isWhitespace(ch) ||
+                    ch == 0x0085 || ch == 0x180e;
         case SRE_CATEGORY_UNI_NOT_SPACE:
-            return !(Character.isSpaceChar(ch) || Character.isWhitespace(ch) || ch == 0x0085);
+            return !(Character.isSpaceChar(ch) || Character.isWhitespace(ch) ||
+                    ch == 0x0085 || ch == 0x180e);
 
         case SRE_CATEGORY_UNI_WORD:
             return Character.isLetterOrDigit(ch) || ch == '_';
@@ -293,7 +295,7 @@
     }
 
     private void mark_restore(int lo, int hi, int mark_stack_base) {
-        
+
         if (hi <= lo)
             return;
 
@@ -305,7 +307,7 @@
 
         System.arraycopy(mark_stack, this.mark_stack_base, mark, lo, size);
     }
-    
+
     final boolean SRE_AT(int ptr, int at) {
         /* check if pointer is at given position. */
 
@@ -376,7 +378,7 @@
             case SRE_OP_FAILURE:
 //                TRACE(setidx, ch, "CHARSET FAILURE");
                 return !ok;
-                
+
             case SRE_OP_LITERAL:
 //                TRACE(setidx, ch, "CHARSET LITERAL " + set[setidx]);
                 /* <LITERAL> <code> */
@@ -384,7 +386,7 @@
                     return ok;
                 setidx++;
                 break;
-                
+
             case SRE_OP_CATEGORY:
                 /* <CATEGORY> <code> */
 //                TRACE(setidx, ch, "CHARSET CHARSET " + set[setidx]);
@@ -400,13 +402,13 @@
 //                            (set[setidx + (ch >> 4)] & (1 << (ch & 15))) != 0)
 //                    return ok;
 //                setidx += 16;
-                
+
                 /* <CHARSET> <bitmap> (32 bits per code word) */
                 if (ch < 256 && (set[setidx + (ch >> 5)] & (1 << (ch & 31))) != 0)
                     return ok;
                 setidx += 8;
                 break;
-                
+
             case SRE_OP_RANGE:
                 /* <RANGE> <lower> <upper> */
 //                TRACE(setidx, ch, "CHARSET RANGE " + set[setidx] + " " + set[setidx+1]);
@@ -419,11 +421,11 @@
 //                TRACE(setidx, ch, "CHARSET NEGATE");
                 ok = !ok;
                 break;
-                
+
             case SRE_OP_BIGCHARSET:
                 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
 //                TRACE(setidx, ch, "CHARSET BIGCHARSET ");
-                
+
 //                count = *(set++);
 //                if (!(ch & ~65535))
 //                    block = ((unsigned char*)set)[ch >> 8];
@@ -434,7 +436,7 @@
 //                    (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
 //                    return ok;
 //                set += count*8;
-  
+
                 int count = set[setidx++];
                 int block;
                 if (ch < 65536)
@@ -444,7 +446,7 @@
                 setidx += 64;
                 if (block >= 0 && (set[setidx + block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))) != 0)
                     return ok;
-                setidx += count * 8; 
+                setidx += count * 8;
                 break;
 
             default:
@@ -455,7 +457,7 @@
             }
         }
     }
-    
+
     private int SRE_COUNT(int[] pattern, int pidx, int maxcount, int level) {
         int chr;
         int ptr = this.ptr;
@@ -474,7 +476,7 @@
             while (ptr < end && SRE_CHARSET(pattern, pidx + 2, str[ptr]))
                 ptr++;
             break;
-            
+
         case SRE_OP_ANY:
             /* repeated dot wildcard. */
 //            TRACE(pidx, ptr, "COUNT ANY");
@@ -600,7 +602,7 @@
                 pidx++;
                 ptr++;
                 break;
-               
+
             case SRE_OP_SUCCESS:
                 /* end of pattern */
 //                TRACE(pidx, ptr, "SUCCESS");
@@ -758,7 +760,7 @@
                 }
                 lastmark = this.lastmark;
                 lastindex = this.lastindex;
-                
+
                 if (pattern[pidx + pattern[pidx]] == SRE_OP_LITERAL) {
                     /* tail starts with a literal. skip positions where
                        the rest of the pattern cannot possibly match */
@@ -796,7 +798,7 @@
                     }
                 }
                 return 0;
-                
+
             case SRE_OP_MIN_REPEAT_ONE:
                 /* match repeated sequence (minimizing regexp) */
 
@@ -962,7 +964,7 @@
                     this.ptr = ptr;
                     return 0;
                 }
-                
+
                 lastmark = this.lastmark;
                 lastindex = this.lastindex;
 
@@ -989,7 +991,7 @@
                 this.ptr = ptr;
                 return 0;
 
-                
+
             case SRE_OP_GROUPREF:
                 /* match backreference */
                 i = pattern[pidx];
@@ -1023,7 +1025,7 @@
                 }
                 pidx++;
                 break;
-                
+
             case SRE_OP_GROUPREF_EXISTS:
                 i = pattern[pidx];
 //                TRACE(pidx, ptr, "GROUPREF_EXISTS " + i);
@@ -1035,7 +1037,7 @@
                 }
                 pidx += 2;
                 break;
-                
+
             case SRE_OP_ASSERT:
                 /* assert subpattern */
                 /* args: <skip> <back> <pattern> */
@@ -1064,7 +1066,7 @@
                 }
                 pidx += pattern[pidx];
                 break;
-                
+
             case SRE_OP_FAILURE:
                 /* immediate failure */
 //                TRACE(pidx, ptr, "FAILURE");
@@ -1087,7 +1089,7 @@
             this.lastindex = lastindex;
         }
     }
-    
+
     int SRE_SEARCH(int[] pattern, int pidx) {
         int ptr = this.start;
         int end = this.end;
@@ -1328,7 +1330,7 @@
     }
 
     // XXX - this is not UTF-16 compliant; also depends on whether from PyString or PyUnicode
-    
+
     String getslice(int index, String string, boolean empty) {
         int i, j;
 

-- 
Repository URL: https://hg.python.org/jython


More information about the Jython-checkins mailing list