From jython-checkins at python.org  Tue Nov 21 17:39:08 2017
From: jython-checkins at python.org (jeff.allen)
Date: Tue, 21 Nov 2017 22:39:08 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Add_a_note_to_jython=2Epy_?=
 =?utf-8?q?on_how_to_regenerate_jython=2Eexe?=
Message-ID: <20171121223907.85344.5815D349B8685987@mg.python.org>

https://hg.python.org/jython/rev/1503edec030b
changeset:   8136:1503edec030b
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Mon Oct 23 21:48:11 2017 +0100
summary:
  Add a note to jython.py on how to regenerate jython.exe

This is a follow-up to fixing #2607 and #2620, intended to make
a regression less likely. jython.exe was regenerated following the
instructions in the devguide as a test.

files:
  src/shell/jython.exe |  Bin 
  src/shell/jython.py  |   15 ++++++++++-----
  2 files changed, 10 insertions(+), 5 deletions(-)


diff --git a/src/shell/jython.exe b/src/shell/jython.exe
index 9f1235faa2ff2480db1215f775a32a056e82b7fc..8a4abc7af726b5b52902677b534479dcc23c2361
GIT binary patch
[stripped]
diff --git a/src/shell/jython.py b/src/shell/jython.py
--- a/src/shell/jython.py
+++ b/src/shell/jython.py
@@ -1,11 +1,16 @@
 #!/usr/bin/env python2.7 -E
 # -*- coding: utf-8 -*-
 
-# Launch script for Jython. It may be wrapped as an executable with
-# tools like PyInstaller, creating jython.exe, or run directly. The
-# installer will make this the default launcher under the name
-# bin/jython if CPython 2.7 is available with the above shebang
-# invocation.
+# Launch script for Jython. It may be run directly (note the shebang line), but
+# importantly it supplies python.exe, the launcher we use on Windows.
+#
+# Each time this file changes, we must regenerate an executable with
+# PyInstaller, using the command:
+#
+#    pyinstaller --onefile jython.py
+#
+# This is best done in a virtual environment (more about this in the Jython
+# Developers' Guide).
 
 import glob
 import inspect

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Tue Nov 21 17:39:09 2017
From: jython-checkins at python.org (jeff.allen)
Date: Tue, 21 Nov 2017 22:39:09 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Formatting_only_=28PyUnico?=
 =?utf-8?q?de=2C_PyString=29?=
Message-ID: <20171121223907.85259.89105040FA4FC332@mg.python.org>

https://hg.python.org/jython/rev/862e65475e3b
changeset:   8137:862e65475e3b
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Wed Nov 01 22:14:06 2017 +0000
summary:
  Formatting only (PyUnicode, PyString)

files:
  src/org/python/core/PyString.java  |  191 ++++++++--------
  src/org/python/core/PyUnicode.java |   47 ++--
  2 files changed, 118 insertions(+), 120 deletions(-)


diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -7,7 +7,6 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
-import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -168,7 +167,7 @@
             S = S.__str__();
             if (S instanceof PyUnicode) {
                 // Encoding will raise UnicodeEncodeError if not 7-bit clean.
-                str = codecs.encode((PyUnicode)S, null, null);
+                str = codecs.encode((PyUnicode) S, null, null);
             } else {
                 // Must be str/bytes, and should be 8-bit clean already.
                 str = S.toString();
@@ -349,7 +348,7 @@
             // Escape quotes and backslash
             if ((use_quotes && ch == quote) || ch == '\\') {
                 v.append('\\');
-                v.append((char)ch);
+                v.append((char) ch);
                 continue;
             }
             /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
@@ -397,7 +396,7 @@
                 v.append(hexdigit[(ch >> 4) & 0xf]);
                 v.append(hexdigit[ch & 0xf]);
             } else {/* Copy everything else as-is */
-                v.append((char)ch);
+                v.append((char) ch);
             }
         }
 
@@ -432,7 +431,7 @@
             }
             ch = str.charAt(s++);
             switch (ch) {
-            /* \x escapes */
+                /* \x escapes */
                 case '\n':
                     break;
                 case '\\':
@@ -482,7 +481,7 @@
                         }
                         x = (x << 3) + Character.digit(ch, 8);
                     }
-                    v.append((char)x);
+                    v.append((char) x);
                     break;
                 case 'x':
                     s = hexescape(v, errors, 2, s, str, end, "truncated \\xXX");
@@ -516,7 +515,7 @@
                     if (pucnHash == null) {
                         PyObject mod = imp.importName("ucnhash", true);
                         mod = mod.__call__();
-                        pucnHash = (ucnhashAPI)mod.__tojava__(Object.class);
+                        pucnHash = (ucnhashAPI) mod.__tojava__(Object.class);
                         if (pucnHash.getCchMax() < 0) {
                             throw Py.UnicodeError("Unicode names not loaded");
                         }
@@ -564,8 +563,8 @@
     private static int hexescape(StringBuilder partialDecode, String errors, int digits,
             int hexDigitStart, String str, int size, String errorMessage) {
         if (hexDigitStart + digits > size) {
-            return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape",
-                    str, hexDigitStart - 2, size, errorMessage);
+            return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape", str,
+                    hexDigitStart - 2, size, errorMessage);
         }
         int i = 0;
         int x = 0;
@@ -588,8 +587,8 @@
         if (storeUnicodeCharacter(x, partialDecode)) {
             return hexDigitStart + i;
         } else {
-            return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape",
-                    str, hexDigitStart - 2, hexDigitStart + i + 1, "illegal Unicode character");
+            return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape", str,
+                    hexDigitStart - 2, hexDigitStart + i + 1, "illegal Unicode character");
         }
     }
 
@@ -630,7 +629,7 @@
             return -2;
         }
 
-        int c = getString().compareTo(((PyString)other).getString());
+        int c = getString().compareTo(((PyString) other).getString());
         return c < 0 ? -1 : c > 0 ? 1 : 0;
     }
 
@@ -749,12 +748,11 @@
     @Override
     public Object __tojava__(Class<?> c) {
         if (c.isAssignableFrom(String.class)) {
-            /* If c is a CharSequence we assume the caller is prepared
-             * to get maybe not an actual String. In that case we avoid
-             * conversion so the caller can do special stuff with the
-             * returned PyString or PyUnicode or whatever.
-             * (If c is Object.class, the caller usually expects to get
-             * actually a String)
+            /*
+             * If c is a CharSequence we assume the caller is prepared to get maybe not an actual
+             * String. In that case we avoid conversion so the caller can do special stuff with the
+             * returned PyString or PyUnicode or whatever. (If c is Object.class, the caller usually
+             * expects to get actually a String)
              */
             return c == CharSequence.class ? this : getString();
         }
@@ -854,10 +852,10 @@
     private static String asUTF16StringOrNull(PyObject obj) {
         if (obj instanceof PyString) {
             // str or unicode object: go directly to the String
-            return ((PyString)obj).getString();
+            return ((PyString) obj).getString();
         } else if (obj instanceof BufferProtocol) {
             // Other object with buffer API: briefly access the buffer
-            try (PyBuffer buf = ((BufferProtocol)obj).getBuffer(PyBUF.FULL_RO)) {
+            try (PyBuffer buf = ((BufferProtocol) obj).getBuffer(PyBUF.FULL_RO)) {
                 return buf.toString();
             }
         } else {
@@ -963,7 +961,7 @@
             count = 0;
         }
         int s = getString().length();
-        if ((long)s * count > Integer.MAX_VALUE) {
+        if ((long) s * count > Integer.MAX_VALUE) {
             // Since Strings store their data in an array, we can't make one
             // longer than Integer.MAX_VALUE. Without this check we get
             // NegativeArraySize exceptions when we create the array on the
@@ -1104,13 +1102,13 @@
         if (n == 1) {
             // Special-case single byte string
             char c = s.charAt(0);
-            return _isupper(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+            return _isupper(c) ? String.valueOf((char) (c ^ SWAP_CASE)) : s;
         } else {
             // Copy chars to buffer, converting to lower-case.
             char[] buf = new char[n];
             for (int i = 0; i < n; i++) {
                 char c = s.charAt(i);
-                buf[i] = _isupper(c) ? (char)(c ^ SWAP_CASE) : c;
+                buf[i] = _isupper(c) ? (char) (c ^ SWAP_CASE) : c;
             }
             return new String(buf);
         }
@@ -1127,13 +1125,13 @@
         if (n == 1) {
             // Special-case single byte string
             char c = s.charAt(0);
-            return _islower(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+            return _islower(c) ? String.valueOf((char) (c ^ SWAP_CASE)) : s;
         } else {
             // Copy chars to buffer, converting to upper-case.
             char[] buf = new char[n];
             for (int i = 0; i < n; i++) {
                 char c = s.charAt(i);
-                buf[i] = _islower(c) ? (char)(c ^ SWAP_CASE) : c;
+                buf[i] = _islower(c) ? (char) (c ^ SWAP_CASE) : c;
             }
             return new String(buf);
         }
@@ -1154,12 +1152,12 @@
                 if (previous_is_cased) {
                     // Should be lower case
                     if (_isupper(ch)) {
-                        chars[i] = (char)(ch ^ SWAP_CASE);
+                        chars[i] = (char) (ch ^ SWAP_CASE);
                     }
                 } else {
                     // Should be upper case
                     if (_islower(ch)) {
-                        chars[i] = (char)(ch ^ SWAP_CASE);
+                        chars[i] = (char) (ch ^ SWAP_CASE);
                     }
                 }
                 // And this was a letter
@@ -1183,13 +1181,13 @@
         if (n == 1) {
             // Special-case single byte string
             char c = s.charAt(0);
-            return _isalpha(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+            return _isalpha(c) ? String.valueOf((char) (c ^ SWAP_CASE)) : s;
         } else {
             // Copy chars to buffer, converting lower to upper case, upper to lower case.
             char[] buf = new char[n];
             for (int i = 0; i < n; i++) {
                 char c = s.charAt(i);
-                buf[i] = _isalpha(c) ? (char)(c ^ SWAP_CASE) : c;
+                buf[i] = _isalpha(c) ? (char) (c ^ SWAP_CASE) : c;
             }
             return new String(buf);
         }
@@ -1236,7 +1234,7 @@
     final PyObject str_strip(PyObject chars) {
         if (chars instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_strip(chars);
+            return ((PyUnicode) decode()).unicode_strip(chars);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
             String stripChars = asStringNullOrError(chars, "strip");
@@ -1406,7 +1404,7 @@
     final PyObject str_lstrip(PyObject chars) {
         if (chars instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_lstrip(chars);
+            return ((PyUnicode) decode()).unicode_lstrip(chars);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
             String stripChars = asStringNullOrError(chars, "lstrip");
@@ -1495,7 +1493,7 @@
     final PyObject str_rstrip(PyObject chars) {
         if (chars instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_rstrip(chars);
+            return ((PyUnicode) decode()).unicode_rstrip(chars);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
             String stripChars = asStringNullOrError(chars, "rstrip");
@@ -1616,7 +1614,7 @@
     final PyList str_split(PyObject sepObj, int maxsplit) {
         if (sepObj instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_split(sepObj, maxsplit);
+            return ((PyUnicode) decode()).unicode_split(sepObj, maxsplit);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
             String sep = asStringNullOrError(sepObj, "split");
@@ -1867,7 +1865,7 @@
     final PyList str_rsplit(PyObject sepObj, int maxsplit) {
         if (sepObj instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_rsplit(sepObj, maxsplit);
+            return ((PyUnicode) decode()).unicode_rsplit(sepObj, maxsplit);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
             String sep = asStringNullOrError(sepObj, "rsplit");
@@ -2066,8 +2064,8 @@
 
             int index = getString().indexOf(sep);
             if (index != -1) {
-                return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(
-                        index + sep.length(), getString().length()));
+                return new PyTuple(fromSubstring(0, index), sepObj,
+                        fromSubstring(index + sep.length(), getString().length()));
             } else {
                 return new PyTuple(this, Py.EmptyString, Py.EmptyString);
             }
@@ -2088,8 +2086,8 @@
 
         int index = str.indexOf(sep);
         if (index != -1) {
-            return new PyTuple(strObj.fromSubstring(0, index), sepObj, strObj.fromSubstring(index
-                    + sep.length(), str.length()));
+            return new PyTuple(strObj.fromSubstring(0, index), sepObj,
+                    strObj.fromSubstring(index + sep.length(), str.length()));
         } else {
             PyUnicode emptyUnicode = Py.newUnicode("");
             return new PyTuple(this, emptyUnicode, emptyUnicode);
@@ -2125,8 +2123,8 @@
 
             int index = getString().lastIndexOf(sep);
             if (index != -1) {
-                return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(
-                        index + sep.length(), getString().length()));
+                return new PyTuple(fromSubstring(0, index), sepObj,
+                        fromSubstring(index + sep.length(), getString().length()));
             } else {
                 return new PyTuple(Py.EmptyString, Py.EmptyString, this);
             }
@@ -2147,8 +2145,8 @@
 
         int index = str.lastIndexOf(sep);
         if (index != -1) {
-            return new PyTuple(strObj.fromSubstring(0, index), sepObj, strObj.fromSubstring(index
-                    + sep.length(), str.length()));
+            return new PyTuple(strObj.fromSubstring(0, index), sepObj,
+                    strObj.fromSubstring(index + sep.length(), str.length()));
         } else {
             PyUnicode emptyUnicode = Py.newUnicode("");
             return new PyTuple(emptyUnicode, emptyUnicode, this);
@@ -2420,7 +2418,7 @@
     final int str_count(PyObject subObj, PyObject start, PyObject end) {
         if (subObj instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_count(subObj, start, end);
+            return ((PyUnicode) decode()).unicode_count(subObj, start, end);
         } else {
             // It ought to be some kind of bytes with the buffer API.
             String sub = asStringOrError(subObj);
@@ -2535,7 +2533,7 @@
     final int str_find(PyObject subObj, PyObject start, PyObject end) {
         if (subObj instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_find(subObj, start, end);
+            return ((PyUnicode) decode()).unicode_find(subObj, start, end);
         } else {
             // It ought to be some kind of bytes with the buffer API.
             String sub = asStringOrError(subObj);
@@ -2640,7 +2638,7 @@
     final int str_rfind(PyObject subObj, PyObject start, PyObject end) {
         if (subObj instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_rfind(subObj, start, end);
+            return ((PyUnicode) decode()).unicode_rfind(subObj, start, end);
         } else {
             // It ought to be some kind of bytes with the buffer API.
             String sub = asStringOrError(subObj);
@@ -2948,7 +2946,8 @@
         }
 
         // if the base >= 22, then an 'l' or 'L' is a digit!
-        if (isLong && base < 22 && e > b && (str.charAt(e - 1) == 'L' || str.charAt(e - 1) == 'l')) {
+        if (isLong && base < 22 && e > b
+                && (str.charAt(e - 1) == 'L' || str.charAt(e - 1) == 'l')) {
             e--;
         }
 
@@ -2982,11 +2981,11 @@
             }
             return bi.intValue();
         } catch (NumberFormatException exc) {
-            throw Py.ValueError("invalid literal for int() with base " + base + ": '" + getString()
-                    + "'");
+            throw Py.ValueError(
+                    "invalid literal for int() with base " + base + ": '" + getString() + "'");
         } catch (StringIndexOutOfBoundsException exc) {
-            throw Py.ValueError("invalid literal for int() with base " + base + ": '" + getString()
-                    + "'");
+            throw Py.ValueError(
+                    "invalid literal for int() with base " + base + ": '" + getString() + "'");
         }
     }
 
@@ -3011,12 +3010,12 @@
                 throw Py.UnicodeEncodeError("decimal", "codec can't encode character", 0, 0,
                         "invalid decimal Unicode string");
             } else {
-                throw Py.ValueError("invalid literal for long() with base " + base + ": '"
-                        + getString() + "'");
+                throw Py.ValueError(
+                        "invalid literal for long() with base " + base + ": '" + getString() + "'");
             }
         } catch (StringIndexOutOfBoundsException exc) {
-            throw Py.ValueError("invalid literal for long() with base " + base + ": '"
-                    + getString() + "'");
+            throw Py.ValueError(
+                    "invalid literal for long() with base " + base + ": '" + getString() + "'");
         }
     }
 
@@ -3131,7 +3130,7 @@
     @ExposedMethod(defaults = "8", doc = BuiltinDocs.str_expandtabs_doc)
     final String str_expandtabs(int tabsize) {
         String s = getString();
-        StringBuilder buf = new StringBuilder((int)(s.length() * 1.5));
+        StringBuilder buf = new StringBuilder((int) (s.length() * 1.5));
         char[] chars = s.toCharArray();
         int n = chars.length;
         int position = 0;
@@ -3169,11 +3168,11 @@
             char[] buf = new char[n];
             // At least one byte: if lower convert to upper case.
             char c = s.charAt(0);
-            buf[0] = _islower(c) ? (char)(c ^ SWAP_CASE) : c;
+            buf[0] = _islower(c) ? (char) (c ^ SWAP_CASE) : c;
             // Copy the rest, converting to lower case.
             for (int i = 1; i < n; i++) {
                 c = s.charAt(i);
-                buf[i] = _isupper(c) ? (char)(c ^ SWAP_CASE) : c;
+                buf[i] = _isupper(c) ? (char) (c ^ SWAP_CASE) : c;
             }
             return new String(buf);
         }
@@ -3211,7 +3210,7 @@
     final PyString str_replace(PyObject oldPieceObj, PyObject newPieceObj, int count) {
         if (oldPieceObj instanceof PyUnicode || newPieceObj instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode)decode()).unicode_replace(oldPieceObj, newPieceObj, count);
+            return ((PyUnicode) decode()).unicode_replace(oldPieceObj, newPieceObj, count);
         } else {
             // Neither is a PyUnicode: both ought to be some kind of bytes with the buffer API.
             String oldPiece = asStringOrError(oldPieceObj);
@@ -3283,7 +3282,7 @@
         if (seqLen == 1) {
             item = seq.pyget(0);
             if (item.getType() == PyString.TYPE || item.getType() == PyUnicode.TYPE) {
-                return (PyString)item;
+                return (PyString) item;
             }
         }
 
@@ -3309,20 +3308,20 @@
             if (i != 0) {
                 size += sepLen;
             }
-            size += ((PyString)item).getString().length();
+            size += ((PyString) item).getString().length();
             if (size > Integer.MAX_VALUE) {
                 throw Py.OverflowError("join() result is too long for a Python string");
             }
         }
 
         // Catenate everything
-        StringBuilder buf = new StringBuilder((int)size);
+        StringBuilder buf = new StringBuilder((int) size);
         for (i = 0; i < seqLen; i++) {
             item = seq.pyget(i);
             if (i != 0) {
                 buf.append(getString());
             }
-            buf.append(((PyString)item).getString());
+            buf.append(((PyString) item).getString());
         }
         return new PyString(buf.toString(), true); // Guaranteed to be byte-like
     }
@@ -3345,7 +3344,7 @@
         if (seqLen == 1) {
             item = seq.pyget(0);
             if (item.getType() == PyUnicode.TYPE) {
-                return (PyUnicode)item;
+                return (PyUnicode) item;
             }
         }
 
@@ -3354,7 +3353,7 @@
             if (this instanceof PyUnicode) {
                 sep = getString();
             } else {
-                sep = ((PyUnicode)decode()).getString();
+                sep = ((PyUnicode) decode()).getString();
                 // In case decode()'s codec mutated seq
                 seqLen = seq.__len__();
             }
@@ -3369,15 +3368,16 @@
             item = seq.pyget(i);
             // Convert item to Unicode
             if (!(item instanceof PyString)) {
-                throw Py.TypeError(String.format("sequence item %d: expected string or Unicode,"
-                        + " %.80s found", i, item.getType().fastGetName()));
+                throw Py.TypeError(String.format(
+                        "sequence item %d: expected string or Unicode," + " %.80s found", i,
+                        item.getType().fastGetName()));
             }
             if (!(item instanceof PyUnicode)) {
-                item = ((PyString)item).decode();
+                item = ((PyString) item).decode();
                 // In case decode()'s codec mutated seq
                 seqLen = seq.__len__();
             }
-            itemString = ((PyUnicode)item).getString();
+            itemString = ((PyUnicode) item).getString();
 
             if (i != 0) {
                 size += sepLen;
@@ -3450,7 +3450,7 @@
 
         } else {
             // Loop will return true if this slice starts with any prefix in the tuple
-            for (PyObject prefixObj : ((PyTuple)prefix).getArray()) {
+            for (PyObject prefixObj : ((PyTuple) prefix).getArray()) {
                 // It ought to be PyUnicode or some kind of bytes with the buffer API.
                 String s = asUTF16StringOrError(prefixObj);
                 // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
@@ -3521,7 +3521,7 @@
 
         } else {
             // Loop will return true if this slice ends with any suffix in the tuple
-            for (PyObject suffixObj : ((PyTuple)suffix).getArray()) {
+            for (PyObject suffixObj : ((PyTuple) suffix).getArray()) {
                 // It ought to be PyUnicode or some kind of bytes with the buffer API.
                 String s = asUTF16StringOrError(suffixObj);
                 // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
@@ -3725,7 +3725,7 @@
 
     private boolean _islower(char ch) {
         if (ch < 256) {
-            return BaseBytes.islower((byte)ch);
+            return BaseBytes.islower((byte) ch);
         } else {
             // This is an internal error. Really, the test should be unnecessary.
             throw new java.lang.IllegalArgumentException("non-byte character in PyString");
@@ -3760,7 +3760,7 @@
 
     private boolean _isupper(char ch) {
         if (ch < 256) {
-            return BaseBytes.isupper((byte)ch);
+            return BaseBytes.isupper((byte) ch);
         } else {
             // This is an internal error. Really, the test should be unnecessary.
             throw new java.lang.IllegalArgumentException("non-byte character in PyString");
@@ -3791,7 +3791,7 @@
 
     private boolean _isalpha(char ch) {
         if (ch < 256) {
-            return BaseBytes.isalpha((byte)ch);
+            return BaseBytes.isalpha((byte) ch);
         } else {
             // This is an internal error. Really, the test should be unnecessary.
             throw new java.lang.IllegalArgumentException("non-byte character in PyString");
@@ -3823,7 +3823,7 @@
     private boolean _isalnum(char ch) {
         // This is now entirely compatible with CPython, as long as only bytes are stored.
         if (ch < 256) {
-            return BaseBytes.isalnum((byte)ch);
+            return BaseBytes.isalnum((byte) ch);
         } else {
             // This is an internal error. Really, the test should be unnecessary.
             throw new java.lang.IllegalArgumentException("non-byte character in PyString");
@@ -3868,7 +3868,7 @@
 
     private boolean _isdigit(char ch) {
         if (ch < 256) {
-            return BaseBytes.isdigit((byte)ch);
+            return BaseBytes.isdigit((byte) ch);
         } else {
             // This is an internal error. Really, the test should be unnecessary.
             throw new java.lang.IllegalArgumentException("non-byte character in PyString");
@@ -3945,7 +3945,7 @@
 
     private boolean _isspace(char ch) {
         if (ch < 256) {
-            return BaseBytes.isspace((byte)ch);
+            return BaseBytes.isspace((byte) ch);
         } else {
             // This is an internal error. Really, the test should be unnecessary.
             throw new java.lang.IllegalArgumentException("non-byte character in PyString");
@@ -4088,7 +4088,7 @@
                 // Check for "{}".format(u"abc")
                 if (fieldObj instanceof PyUnicode && !(this instanceof PyUnicode)) {
                     // Down-convert to PyString, at the risk of raising UnicodeEncodingError
-                    fieldObj = ((PyUnicode)fieldObj).__str__();
+                    fieldObj = ((PyUnicode) fieldObj).__str__();
                 }
 
                 // The format_spec may be simple, or contained nested replacement fields.
@@ -4156,11 +4156,11 @@
             Object key = chunk.value;
             if (chunk.is_attr) {
                 // key must be a String
-                obj = obj.__getattr__((String)key);
+                obj = obj.__getattr__((String) key);
             } else {
                 if (key instanceof Integer) {
                     // Can this happen?
-                    obj = obj.__getitem__(((Integer)key).intValue());
+                    obj = obj.__getitem__(((Integer) key).intValue());
                 } else {
                     obj = obj.__getitem__(new PyString(key.toString()));
                 }
@@ -4412,7 +4412,7 @@
         if (c == '*') {
             PyObject o = getarg();
             if (o instanceof PyInteger) {
-                return ((PyInteger)o).getValue();
+                return ((PyInteger) o).getValue();
             }
             throw Py.TypeError("* wants int");
         } else {
@@ -4533,7 +4533,7 @@
         if (arg instanceof PyUnicode) {
             // arg is already acceptable.
             needUnicode = true;
-            return (PyUnicode)arg;
+            return (PyUnicode) arg;
 
         } else if (needUnicode) {
             // The string being built is unicode, so we need that version of the arg.
@@ -4541,7 +4541,7 @@
 
         } else if (arg instanceof PyString) {
             // The string being built is not unicode, so arg is already acceptable.
-            return (PyString)arg;
+            return (PyString) arg;
 
         } else {
             // The string being built is not unicode, so use __str__ to get a PyString.
@@ -4572,11 +4572,10 @@
         } else {
             // Not a tuple, but possibly still some kind of container: use special argIndex values.
             argIndex = -1;
-            if (args instanceof AbstractDict
-                    || (!(args instanceof PySequence) &&
-                    // See issue 2511: __getitem__ should be looked up directly in the dict, rather
-                    // than going through another __getattr__ call. We achieve this by using
-                    // object___findattr__ instead of generic __findattr__.
+            if (args instanceof AbstractDict || (!(args instanceof PySequence) &&
+            // See issue 2511: __getitem__ should be looked up directly in the dict, rather
+            // than going through another __getattr__ call. We achieve this by using
+            // object___findattr__ instead of generic __findattr__.
                     args.object___findattr__("__getitem__".intern()) != null)) {
                 dict = args;
                 argIndex = -3;
@@ -4775,7 +4774,7 @@
                                 needUnicode = true;
                                 fi.setBytes(false);
                             }
-                            fi.format(((PyString)arg).getString().codePointAt(0));
+                            fi.format(((PyString) arg).getString().codePointAt(0));
                         }
 
                     } else {
@@ -4784,14 +4783,14 @@
 
                         // We have to check what we got back.
                         if (argAsNumber instanceof PyInteger) {
-                            fi.format(((PyInteger)argAsNumber).getValue());
+                            fi.format(((PyInteger) argAsNumber).getValue());
                         } else if (argAsNumber instanceof PyLong) {
-                            fi.format(((PyLong)argAsNumber).getValue());
+                            fi.format(((PyLong) argAsNumber).getValue());
                         } else {
                             // It couldn't be converted, raise the error here
-                            throw Py.TypeError("%" + spec.type
-                                    + " format: a number is required, not "
-                                    + arg.getType().fastGetName());
+                            throw Py.TypeError(
+                                    "%" + spec.type + " format: a number is required, not "
+                                            + arg.getType().fastGetName());
                         }
                     }
 
@@ -4814,11 +4813,11 @@
 
                     // We have to check what we got back..
                     if (argAsFloat instanceof PyFloat) {
-                        ff.format(((PyFloat)argAsFloat).getValue());
+                        ff.format(((PyFloat) argAsFloat).getValue());
                     } else {
                         // It couldn't be converted, raise the error here
-                        throw Py.TypeError("float argument required, not "
-                                + arg.getType().fastGetName());
+                        throw Py.TypeError(
+                                "float argument required, not " + arg.getType().fastGetName());
                     }
 
                     break;
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -8,8 +8,6 @@
 import java.util.List;
 import java.util.Set;
 
-import com.google.common.base.CharMatcher;
-
 import org.python.core.stringlib.FieldNameIterator;
 import org.python.core.stringlib.MarkupIterator;
 import org.python.expose.ExposedMethod;
@@ -19,6 +17,8 @@
 import org.python.modules._codecs;
 import org.python.util.Generic;
 
+import com.google.common.base.CharMatcher;
+
 /**
  * a builtin python unicode string.
  */
@@ -592,9 +592,8 @@
     @ExposedNew
     final static PyObject unicode_new(PyNewWrapper new_, boolean init, PyType subtype,
             PyObject[] args, String[] keywords) {
-        ArgParser ap =
-                new ArgParser("unicode", args, keywords, new String[] {"string", "encoding",
-                        "errors"}, 0);
+        ArgParser ap = new ArgParser("unicode", args, keywords,
+                new String[] {"string", "encoding", "errors"}, 0);
         PyObject S = ap.getPyObject(0, null);
         String encoding = checkEncoding(ap.getString(1, null));
         String errors = checkEncoding(ap.getString(2, null));
@@ -603,15 +602,15 @@
                 return new PyUnicode("");
             }
             if (S instanceof PyUnicode) {
-                return new PyUnicode(((PyUnicode)S).getString());
+                return new PyUnicode(((PyUnicode) S).getString());
             }
             if (S instanceof PyString) {
                 if (S.getType() != PyString.TYPE && encoding == null && errors == null) {
                     return S.__unicode__();
                 }
-                PyObject decoded = codecs.decode((PyString)S, encoding, errors);
+                PyObject decoded = codecs.decode((PyString) S, encoding, errors);
                 if (decoded instanceof PyUnicode) {
-                    return new PyUnicode((PyUnicode)decoded);
+                    return new PyUnicode((PyUnicode) decoded);
                 } else {
                     throw Py.TypeError("decoder did not return an unicode object (type="
                             + decoded.getType().fastGetName() + ")");
@@ -623,7 +622,7 @@
                 return new PyUnicodeDerived(subtype, Py.EmptyString);
             }
             if (S instanceof PyUnicode) {
-                return new PyUnicodeDerived(subtype, (PyUnicode)S);
+                return new PyUnicodeDerived(subtype, (PyUnicode) S);
             } else {
                 return new PyUnicodeDerived(subtype, S.__str__());
             }
@@ -910,12 +909,12 @@
      */
     private PyUnicode coerceToUnicode(PyObject o) {
         if (o instanceof PyUnicode) {
-            return (PyUnicode)o;
+            return (PyUnicode) o;
         } else if (o instanceof PyString) {
-            return new PyUnicode(((PyString)o).getString(), true);
+            return new PyUnicode(((PyString) o).getString(), true);
         } else if (o instanceof BufferProtocol) {
             // PyByteArray, PyMemoryView, Py2kBuffer ...
-            try (PyBuffer buf = ((BufferProtocol)o).getBuffer(PyBUF.FULL_RO)) {
+            try (PyBuffer buf = ((BufferProtocol) o).getBuffer(PyBUF.FULL_RO)) {
                 return new PyUnicode(buf.toString(), true);
             }
         } else {
@@ -969,9 +968,9 @@
     final PyObject unicode___add__(PyObject other) {
         PyUnicode otherUnicode;
         if (other instanceof PyUnicode) {
-            otherUnicode = (PyUnicode)other;
+            otherUnicode = (PyUnicode) other;
         } else if (other instanceof PyString) {
-            otherUnicode = (PyUnicode)((PyString)other).decode();
+            otherUnicode = (PyUnicode) ((PyString) other).decode();
         } else {
             return null;
         }
@@ -1094,9 +1093,9 @@
         if (o == null) {
             return null;
         } else if (o instanceof PyUnicode) {
-            return (PyUnicode)o;
+            return (PyUnicode) o;
         } else if (o instanceof PyString) {
-            return new PyUnicode(((PyString)o).decode().toString());
+            return new PyUnicode(((PyString) o).decode().toString());
         } else if (o == Py.None) {
             return null;
         } else {
@@ -1121,8 +1120,8 @@
         }
 
         // Not basic plane: have to do real Unicode
-        return new PyUnicode(new ReversedIterator<Integer>(new StripIterator(sep, new ReversedIterator<>(
-                new StripIterator(sep, newSubsequenceIterator())))));
+        return new PyUnicode(new ReversedIterator<Integer>(new StripIterator(sep,
+                new ReversedIterator<>(new StripIterator(sep, newSubsequenceIterator())))));
     }
 
     @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_lstrip_doc)
@@ -1162,8 +1161,8 @@
         }
 
         // Not basic plane: have to do real Unicode
-        return new PyUnicode(new ReversedIterator<Integer>(new StripIterator(sep,
-                new ReversedIterator<>(newSubsequenceIterator()))));
+        return new PyUnicode(new ReversedIterator<Integer>(
+                new StripIterator(sep, new ReversedIterator<>(newSubsequenceIterator()))));
     }
 
     @Override
@@ -1484,8 +1483,8 @@
         }
         int[] indices = super.translateIndices(start, end); // do not convert to utf-16 indices.
         int count = 0;
-        for (Iterator<Integer> mainIter = newSubsequenceIterator(indices[0], indices[1], 1); mainIter
-                .hasNext();) {
+        for (Iterator<Integer> mainIter =
+                newSubsequenceIterator(indices[0], indices[1], 1); mainIter.hasNext();) {
             int matched = sub.getCodePointCount();
             for (Iterator<Integer> subIter = sub.newSubsequenceIterator(); mainIter.hasNext()
                     && subIter.hasNext();) {
@@ -1661,7 +1660,7 @@
                 SplitIterator iter = newSplitIterator(oldPiece, count);
                 int numSplits = 0;
                 while (iter.hasNext()) {
-                    buffer.append(((PyUnicode)iter.next()).getString());
+                    buffer.append(((PyUnicode) iter.next()).getString());
                     if (iter.hasNext()) {
                         buffer.append(newPiece.getString());
                     }
@@ -1750,7 +1749,7 @@
         for (Iterator<Integer> iter = newSubsequenceIterator(); iter.hasNext();) {
             int codePoint = iter.next();
             if (!(Character.isLetterOrDigit(codePoint) || //
-            Character.getType(codePoint) == Character.LETTER_NUMBER)) {
+                    Character.getType(codePoint) == Character.LETTER_NUMBER)) {
                 return false;
             }
         }

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Tue Nov 21 17:39:10 2017
From: jython-checkins at python.org (jeff.allen)
Date: Tue, 21 Nov 2017 22:39:10 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Respect_default_encoding_w?=
 =?utf-8?q?hen_coercing_str_to_unicode_=28addresses_=232638=29=2E?=
Message-ID: <20171121223908.45462.A964147CB55D5E15@mg.python.org>

https://hg.python.org/jython/rev/78482073e91f
changeset:   8138:78482073e91f
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sun Nov 19 08:32:18 2017 +0000
summary:
  Respect default encoding when coercing str to unicode (addresses #2638).

This change corrects the implicit ascii or latin-1 assumption made when
accepting arguments in PyUnicode and PyString methods, adds tests to
test_unicode_jy, and makes small consequential changes to other modules.

The general effects is to allow, and decode, any byte buffer where a str
is acceptable. This is more liberal than CPython, except in __eq__ and
__ne__ which reproduce CPython limitations. It is not certain we have to
do that. Further change is needed to support all comparison operations.

files:
  Lib/test/test_bytes.py                  |    4 +-
  Lib/test/test_concat_jy.py              |    7 +-
  Lib/test/test_import_jy.py              |   10 +-
  Lib/test/test_unicode_jy.py             |  274 ++++++++++
  src/org/python/core/PyShadowString.java |    4 +-
  src/org/python/core/PyString.java       |  244 +++++---
  src/org/python/core/PyUnicode.java      |  319 +++++++++--
  src/org/python/core/__builtin__.java    |   17 +-
  src/org/python/core/codecs.java         |    2 +-
  src/org/python/core/imp.java            |   22 +-
  10 files changed, 726 insertions(+), 177 deletions(-)


diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -217,7 +217,9 @@
         self.assertEqual(b1 + bytes(b"def"), b"abcdef")
         self.assertEqual(bytes(b"def") + b1, b"defabc")
         self.assertRaises(TypeError, lambda: b1 + u"def")
-        self.assertRaises(TypeError, lambda: u"abc" + b2)
+        # Jython treats unicode + bytearray the same way as unicode + str
+        #self.assertRaises(TypeError, lambda: u"abc" + b2)
+        self.assertEqual(u"def" + b1, u"defabc") # OK in Jython
 
     def test_repeat(self):
         for b in b"abc", self.type2test(b"abc"):
diff --git a/Lib/test/test_concat_jy.py b/Lib/test/test_concat_jy.py
--- a/Lib/test/test_concat_jy.py
+++ b/Lib/test/test_concat_jy.py
@@ -23,11 +23,10 @@
                     resType = unicode
                 res = a.__add__(b)
                 self.assertEquals(type(res), resType,
-                                  '%r is a %s, not a %s' % (res, type(res),
-                                                            resType))
+                    '%r + %r -> %r is a %s, not a %s' % 
+                        (type(a), type(b), res, type(res), resType))
                 self.assertEquals(res, 'ab',
-                                  '%r (%s) != %r (%s)' % (res, type(res), 'ab',
-                                                 str))
+                    '%r (%s) != %r (%s)' % (res, type(res), 'ab', str))
 
 
 class StrUnicodeConcatOverridesTestCase(unittest.TestCase):
diff --git a/Lib/test/test_import_jy.py b/Lib/test/test_import_jy.py
--- a/Lib/test/test_import_jy.py
+++ b/Lib/test/test_import_jy.py
@@ -219,11 +219,17 @@
 
 class UnicodeNamesTestCase(unittest.TestCase):
 
+    def test_import_non_ascii_module(self):
+        module = "m?d?l?"
+        with self.assertRaises(ImportError) as cm:
+            __import__(module)
+
     def test_import_unicode_module(self):
+        module = u"m?d?l?"
         with self.assertRaises(UnicodeEncodeError) as cm:
-            __import__("m?d?l?")
+            __import__(module)
         self.assertEqual(cm.exception.encoding, "ascii")
-        self.assertEqual(cm.exception.object, "m?d?l?")
+        self.assertEqual(cm.exception.object, module)
         self.assertEqual(cm.exception.reason, "ordinal not in range(128)")
 
 
diff --git a/Lib/test/test_unicode_jy.py b/Lib/test/test_unicode_jy.py
--- a/Lib/test/test_unicode_jy.py
+++ b/Lib/test/test_unicode_jy.py
@@ -900,6 +900,277 @@
             self.assertEqual(2, len(s.split()), "no split made in " + repr(s))
             self.assertEqual(2, len(s.rsplit()), "no rsplit made in " + repr(s))
 
+class EncodingContext(object):
+    """Context manager to save and restore the encoding.
+
+    Use like this:
+
+        with EncodingContext("utf-8"):
+            self.assertEqual("'caf\xc3\xa9'", u"'caf\xe9'")
+    """
+
+    def __init__(self, encoding):
+        if not hasattr(sys, "setdefaultencoding"):
+            reload(sys)
+        self.original_encoding = sys.getdefaultencoding()
+        sys.setdefaultencoding(encoding)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *ignore_exc):
+        sys.setdefaultencoding(self.original_encoding)
+
+
+class DefaultDecodingTestCase(unittest.TestCase):
+    # Test use of default encoding to coerce str to unicode
+
+    def test_add(self):
+        ref = u'caf? cr?me'
+        s1 = ref[:4].encode(self.encoding)
+        s2 = ref[4:].encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual( s1 + ref[4:], ref)
+            self.assertEqual( ref[:4] + s2, ref)
+
+    def test_in(self):
+        ref = u'caf? cr?me'
+        with EncodingContext(self.encoding):
+            self.assertTrue(u'?'.encode(self.encoding) in ref)
+            self.assertTrue(u'f?'.encode(self.encoding) in ref)
+            # Fails if the string is interpreted as code points.
+            if self.encoding !=  'latin-1':
+                self.assertFalse('\xc3\xa9' in u'caf\xc3\xa9')
+
+    def test_eq(self):
+        ref = u'caf? cr?me'
+        b = ref.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertTrue(ref == b)
+            self.assertTrue(b == ref)
+
+    def test_ne(self):
+        with EncodingContext(self.encoding):
+            # Fails if the string is interpreted as code points.
+            if self.encoding !=  'latin-1':
+                self.assertFalse(u'caf\xc3\xa9'== 'caf\xc3\xa9')
+                self.assertFalse('caf\xc3\xa9' == u'caf\xc3\xa9')
+
+    def test_count(self):
+        ref = u'Le caf? des f?es ?gar?es'
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.count(u'?'.encode(self.encoding)), 4)
+            self.assertEqual(ref.count(u'f?'.encode(self.encoding)), 2)
+
+    def test_endswith(self):
+        # Set up the test using unicode values and indices
+        ref = u'caf? cr?me'
+        s, u, v = ref[-4:], u'?m?', u'??e'
+        # Encode all this
+        enc = ref.encode(self.encoding)
+        u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
+        s1 = s.encode(self.encoding)
+
+        with EncodingContext(self.encoding):
+            # Test with single argument
+            self.assertFalse(ref.endswith(v1))
+            self.assertTrue(ref.endswith(s1))
+            # Test with a mixed tuple as the argument
+            self.assertFalse(ref.endswith((u1, u, v1, v)))
+            self.assertTrue(ref.endswith((u1, s1, v1)))
+            self.assertTrue(ref.endswith((u1, u, s1, v1, v)))
+            self.assertFalse(enc.endswith((u1, v1, u, v)))
+            self.assertTrue(enc.endswith((u, s, v)))
+            self.assertTrue(enc.endswith((u1, u, s, v1, v)))
+
+    def test_endswith_slice(self):
+        # Set up the test using unicode values and indices
+        ref = u'?Un caf? cr?me??'
+        if len(u'??'.encode(self.encoding))!=2 and not test_support.is_jython:
+            # CPython fails on str.startswith(unicode, int, int) as it passes
+            # byte indices to unicode.startswith(unicode, int, int) unchanged.
+            # It only works if ? and ? encode to single bytes. Easier test:
+            ref = u'"Un caf? cr?me?"'
+        a, b = 4, -2
+        s, u, v = ref[b-4:b], u'?m?', u'??e'
+        # Encode all this, including the indices
+        enc = ref.encode(self.encoding)
+        u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
+        a1 = len(ref[:a].encode(self.encoding))
+        b1 = - len(ref[b:].encode(self.encoding))
+        s1 = s.encode(self.encoding)
+
+        with EncodingContext(self.encoding):
+            # Test the assumption on which the test is based
+            self.assertEqual(ref[a:b], enc[a1:b1])
+            # Test slice with single argument
+            self.assertFalse(ref.endswith(v1, a, b))
+            self.assertTrue(ref.endswith(s1, a, b))
+            self.assertFalse(enc.endswith(v1, a1, b1))
+            self.assertTrue(enc.endswith(s, a1, b1))
+            # CPython would pass:
+            #self.assertTrue(enc.endswith(s, a, b))
+            # Test slice with a mixed tuple as the argument
+            self.assertFalse(ref.endswith((u1, u, v1, v), a, b))
+            self.assertTrue(ref.endswith((u1, s1, v1), a, b))
+            self.assertTrue(ref.endswith((u1, u, s1, v1, v), a, b))
+            self.assertFalse(enc.endswith((u1, v1, u, v), a1, b1))
+            self.assertTrue(enc.endswith((u, s, v), a1, b1))
+            self.assertTrue(enc.endswith((u1, u, s, v1, v), a1, b1))
+            # CPython would pass:
+            #self.assertTrue(enc.endswith((u, s, v), a, b))
+            #self.assertTrue(enc.endswith((u1, u, s, v1, v), a, b))
+
+    def test_find(self):
+        ref = u'caf? cr?me'
+        sub = u'?'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.find(sub), 7)
+
+    def test_index(self):
+        ref = u'caf? cr?me'
+        sub = u'?'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.index(sub), 7)
+
+    def test_lstrip(self):
+        ref = u"??????du bl? ?"
+        sep = u'???'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.lstrip(sep), u"du bl? ?")
+
+    def test_partition(self):
+        ref = u"Des f?es h?b?t?es."
+        sep1 = u'?'.encode(self.encoding)
+        sep2 = u'?es'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.partition(sep1), (u"Des f", u"?", u"es h?b?t?es."))
+            self.assertEqual(ref.partition(sep2), (u"Des f", u"?es", u" h?b?t?es."))
+
+    def test_replace(self):
+        ref = u"?t?."
+        a = u'?'.encode(self.encoding)
+        b = u'?'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.replace(a, b), u"?t?.")
+            self.assertEqual(ref.replace(b, a), u"?t?.")
+
+    def test_rfind(self):
+        ref = u'caf? cr?me'
+        sub = u'?'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.rfind(sub), 3)
+
+    def test_rindex(self):
+        ref = u'caf? cr?me'
+        sub = u'?'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.index(sub), 3)
+
+    def test_rpartition(self):
+        ref = u"Des f?es h?b?t?es."
+        sep1 = u'?'.encode(self.encoding)
+        sep2 = u'?es'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.rpartition(sep1), (u"Des f?es h?b?t", u"?", u"es."))
+            self.assertEqual(ref.rpartition(sep2), (u"Des f?es h?b?t", u"?es", u"."))
+
+    def test_rsplit(self):
+        ref = u"Des f?es h?b?t?es."
+        sep1 = u'?'.encode(self.encoding)
+        sep2 = u'?es'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.rsplit(sep1, 3), [u"Des f?es h", u"b", u"t", u"es."])
+            self.assertEqual(ref.rsplit(sep2), [u"Des f", u" h?b?t", u"."])
+
+    def test_rstrip(self):
+        ref = u"? du bl???????"
+        sep = u'???'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.rstrip(sep), u"? du bl?")
+
+    def test_split(self):
+        ref = u"Des f?es h?b?t?es."
+        sep1 = u'?'.encode(self.encoding)
+        sep2 = u'?es'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.split(sep1, 3), [u"Des f", u"es h", u"b", u"t?es."])
+            self.assertEqual(ref.split(sep2), [u"Des f", u" h?b?t", u"."])
+
+    def test_startsswith(self):
+        # Set up the test using unicode values and indices
+        ref = u'caf? cr?me'
+        s, u, v = ref[:4], u'?af', u'caf?'
+        # Encode all this
+        enc = ref.encode(self.encoding)
+        u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
+        s1 = s.encode(self.encoding)
+
+        with EncodingContext(self.encoding):
+            self.assertFalse(ref.startswith(v1))
+            self.assertTrue(ref.startswith(enc[:5]))
+            # Test with a mixed tuple as the argument
+            self.assertFalse(ref.startswith((u1, u, v1, v)))
+            self.assertTrue(ref.startswith((u1, enc[:5], v1)))
+            self.assertTrue(ref.startswith((u1, u, enc[:5], v1, v)))
+            self.assertFalse(enc.startswith((u1, v1, u, v)))
+            self.assertTrue(enc.startswith((u, ref[:4], v)))
+            self.assertTrue(enc.startswith((u1, u, ref[:4], v1, v)))
+
+    def test_startsswith_slice(self):
+        # Set up the test using unicode values and indices
+        ref = u'?Un caf? cr?me??'
+        if len(u'??'.encode(self.encoding))!=2 and not test_support.is_jython:
+            # CPython fails on str.startswith(unicode, int, int) as it passes
+            # byte indices to unicode.startswith(unicode, int, int) unchanged.
+            # It only works if ? and ? encode to single bytes. Easier test:
+            ref = u'"Un caf? cr?me?"'
+        a, b = 4, -2
+        s, u, v = ref[a:a+4], u'?af', u'caf?'
+        # Encode all this, including the indices
+        enc = ref.encode(self.encoding)
+        u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
+        a1 = len(ref[:a].encode(self.encoding))
+        b1 = - len(ref[b:].encode(self.encoding))
+        s1 = s.encode(self.encoding)
+
+        with EncodingContext(self.encoding):
+            # Test the assumption on which the test is based
+            self.assertEqual(ref[a:b], enc[a1:b1])
+            # Test slice with single argument
+            self.assertFalse(ref.startswith(v, a, b))
+            self.assertTrue(ref.startswith(s1, a, b))
+            self.assertFalse(enc.startswith(v1, a1, b1))
+            self.assertTrue(enc.startswith(s, a1, b1))
+            # CPython would pass:
+            #self.assertTrue(enc.startswith(s, a, b))
+            # Test slice with a mixed tuple as the argument
+            self.assertFalse(ref.startswith((u1, u, v1, v), a, b))
+            self.assertTrue(ref.startswith((u1, s1, v1), a, b))
+            self.assertTrue(ref.startswith((u1, u, s1, v1, v), a, b))
+            self.assertFalse(enc.startswith((u1, v1, u, v), a1, b1))
+            self.assertTrue(enc.startswith((u, s, v), a1, b1))
+            self.assertTrue(enc.startswith((u1, u, s, v1, v), a1, b1))
+            # CPython would pass:
+            #self.assertTrue(enc.startswith((u, s, v), a, b))
+            #self.assertTrue(enc.startswith((u1, u, s, v1, v), a, b))
+
+    def test_strip(self):
+        ref = u"??????du bl???????"
+        sep = u'???'.encode(self.encoding)
+        with EncodingContext(self.encoding):
+            self.assertEqual(ref.strip(sep), u"du bl?")
+
+
+class DefaultDecodingLatin1(DefaultDecodingTestCase):
+    encoding = "latin-1"
+
+class DefaultDecodingUTF8(DefaultDecodingTestCase):
+    encoding = "utf-8"
+
+class DefaultDecodingCp850(DefaultDecodingTestCase):
+    encoding = "cp850"
+
 
 def test_main():
     test_support.run_unittest(
@@ -910,6 +1181,9 @@
                 UnicodeFormatStrTest,
                 StringModuleUnicodeTest,
                 UnicodeSpaceTest,
+                DefaultDecodingLatin1,
+                DefaultDecodingUTF8,
+                DefaultDecodingCp850,
             )
 
 
diff --git a/src/org/python/core/PyShadowString.java b/src/org/python/core/PyShadowString.java
--- a/src/org/python/core/PyShadowString.java
+++ b/src/org/python/core/PyShadowString.java
@@ -251,7 +251,7 @@
 
         if (!(prefix instanceof PyTuple)) {
             // It ought to be PyUnicode or some kind of bytes with the buffer API.
-            String s = asUTF16StringOrError(prefix);
+            String s = asU16BytesOrError(prefix);
             // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
             return sliceLen >= s.length() &&
                     (getString().startsWith(s, start) || shadow.startsWith(s, start));
@@ -259,7 +259,7 @@
             // Loop will return true if this slice starts with any prefix in the tuple
             for (PyObject prefixObj : ((PyTuple)prefix).getArray()) {
                 // It ought to be PyUnicode or some kind of bytes with the buffer API.
-                String s = asUTF16StringOrError(prefixObj);
+                String s = asU16BytesOrError(prefixObj);
                 // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
                 if (sliceLen >= s.length() &&
                         (getString().startsWith(s, start) || shadow.startsWith(s, start))) {
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -5,6 +5,7 @@
 import java.lang.ref.SoftReference;
 import java.math.BigInteger;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import java.util.regex.Matcher;
@@ -81,6 +82,10 @@
         this(TYPE, buffer.toString());
     }
 
+    PyString(PyBuffer buffer) {
+        this(TYPE, buffer.toString());
+    }
+
     /**
      * Local-use constructor in which the client is allowed to guarantee that the
      * <code>String</code> argument contains only characters in the byte range. We do not then
@@ -260,7 +265,7 @@
 
     @Override
     public PyUnicode __unicode__() {
-        return new PyUnicode(this);
+        return new PyUnicode(this);  // Decodes with default codec.
     }
 
     @Override
@@ -720,8 +725,9 @@
         return getString().compareTo(s) >= 0 ? Py.True : Py.False;
     }
 
+    /** Interpret the object as a Java String representing bytes or return <code>null</code>. */
     private static String coerce(PyObject o) {
-        if (o instanceof PyString) {
+        if (o instanceof PyString && !(o instanceof PyUnicode)) {
             return o.toString();
         }
         return null;
@@ -841,17 +847,19 @@
     }
 
     /**
-     * Return a String equivalent to the argument. This is a helper function to those methods that
-     * accept any byte array type (any object that supports a one-dimensional byte buffer), or
-     * accept a <code>unicode</code> argument which they interpret from its UTF-16 encoded form (the
-     * internal representation returned by {@link PyUnicode#getString()}).
+     * Return a Java <code>String</code> that is the Jython-internal equivalent of the byte-like
+     * argument (a <code>str</code> or any object that supports a one-dimensional byte buffer). If
+     * the argument is not acceptable (this includes a <code>unicode</code> argument) return null.
      *
      * @param obj to coerce to a String
      * @return coerced value or <code>null</code> if it can't be
      */
-    private static String asUTF16StringOrNull(PyObject obj) {
+    private static String asU16BytesOrNull(PyObject obj) {
         if (obj instanceof PyString) {
-            // str or unicode object: go directly to the String
+            if (obj instanceof PyUnicode) {
+                return null;
+            }
+            // str but not unicode object: go directly to the String
             return ((PyString) obj).getString();
         } else if (obj instanceof BufferProtocol) {
             // Other object with buffer API: briefly access the buffer
@@ -869,23 +877,11 @@
      * <b>not</b> a <code>unicode</code>.
      *
      * @param obj to coerce to a String
-     * @return coerced value or <code>null</code> if it can't be (including <code>unicode</code>)
-     */
-    private static String asStringOrNull(PyObject obj) {
-        return (obj instanceof PyUnicode) ? null : asUTF16StringOrNull(obj);
-    }
-
-    /**
-     * Return a String equivalent to the argument. This is a helper function to those methods that
-     * accept any byte array type (any object that supports a one-dimensional byte buffer), but
-     * <b>not</b> a <code>unicode</code>.
-     *
-     * @param obj to coerce to a String
      * @return coerced value
      * @throws PyException if the coercion fails (including <code>unicode</code>)
      */
-    private static String asStringOrError(PyObject obj) throws PyException {
-        String ret = (obj instanceof PyUnicode) ? null : asUTF16StringOrNull(obj);
+    protected static String asU16BytesOrError(PyObject obj) throws PyException {
+        String ret = asU16BytesOrNull(obj);
         if (ret != null) {
             return ret;
         } else {
@@ -906,12 +902,11 @@
      * @return coerced value or null
      * @throws PyException if the coercion fails (including <code>unicode</code>)
      */
-    private static String asStringNullOrError(PyObject obj, String name) throws PyException {
-
+    private static String asU16BytesNullOrError(PyObject obj, String name) throws PyException {
         if (obj == null || obj == Py.None) {
             return null;
         } else {
-            String ret = (obj instanceof PyUnicode) ? null : asUTF16StringOrNull(obj);
+            String ret = asU16BytesOrNull(obj);
             if (ret != null) {
                 return ret;
             } else if (name == null) {
@@ -924,26 +919,6 @@
         }
     }
 
-    /**
-     * Return a String equivalent to the argument according to the calling conventions of the
-     * certain methods of <code>str</code>. Those methods accept as a byte string anything bearing
-     * the buffer interface, or accept a <code>unicode</code> argument which they interpret from its
-     * UTF-16 encoded form (the internal representation returned by {@link PyUnicode#getString()}).
-     *
-     * @param obj to coerce to a String
-     * @return coerced value
-     * @throws PyException if the coercion fails
-     */
-    protected static String asUTF16StringOrError(PyObject obj) {
-        // PyUnicode accepted here. Care required in the client if obj is not basic plane.
-        String ret = asUTF16StringOrNull(obj);
-        if (ret != null) {
-            return ret;
-        } else {
-            throw Py.TypeError("expected str, bytearray, unicode or buffer compatible object");
-        }
-    }
-
     @Override
     public boolean __contains__(PyObject o) {
         return str___contains__(o);
@@ -951,8 +926,15 @@
 
     @ExposedMethod(doc = BuiltinDocs.str___contains___doc)
     final boolean str___contains__(PyObject o) {
-        String other = asUTF16StringOrError(o);
-        return getString().indexOf(other) >= 0;
+        String other = asU16BytesOrNull(o);
+        if (other != null) {
+            return getString().indexOf(other) >= 0;
+        } else if (o instanceof PyUnicode) {
+            return decode().__contains__(o);
+        } else {
+            throw Py.TypeError("'in <string>' requires string as left operand, not "
+                    + (o == null ? Py.None : o).getType().fastGetName());
+        }
     }
 
     @Override
@@ -1014,12 +996,12 @@
     @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___add___doc)
     final PyObject str___add__(PyObject other) {
         // Expect other to be some kind of byte-like object.
-        String otherStr = asStringOrNull(other);
+        String otherStr = asU16BytesOrNull(other);
         if (otherStr != null) {
             // Yes it is: concatenate as strings, which are guaranteed byte-like.
             return new PyString(getString().concat(otherStr), true);
         } else if (other instanceof PyUnicode) {
-            // Convert self to PyUnicode and escalate the problem
+            // Escalate the problem to PyUnicode
             return decode().__add__(other);
         } else {
             // Allow PyObject._basic_add to pick up the pieces or raise informative error
@@ -1237,7 +1219,7 @@
             return ((PyUnicode) decode()).unicode_strip(chars);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
-            String stripChars = asStringNullOrError(chars, "strip");
+            String stripChars = asU16BytesNullOrError(chars, "strip");
             // Strip specified characters or whitespace if stripChars == null
             return new PyString(_strip(stripChars), true);
         }
@@ -1407,7 +1389,7 @@
             return ((PyUnicode) decode()).unicode_lstrip(chars);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
-            String stripChars = asStringNullOrError(chars, "lstrip");
+            String stripChars = asU16BytesNullOrError(chars, "lstrip");
             // Strip specified characters or whitespace if stripChars == null
             return new PyString(_lstrip(stripChars), true);
         }
@@ -1496,7 +1478,7 @@
             return ((PyUnicode) decode()).unicode_rstrip(chars);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
-            String stripChars = asStringNullOrError(chars, "rstrip");
+            String stripChars = asU16BytesNullOrError(chars, "rstrip");
             // Strip specified characters or whitespace if stripChars == null
             return new PyString(_rstrip(stripChars), true);
         }
@@ -1617,7 +1599,7 @@
             return ((PyUnicode) decode()).unicode_split(sepObj, maxsplit);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
-            String sep = asStringNullOrError(sepObj, "split");
+            String sep = asU16BytesNullOrError(sepObj, "split");
             // Split on specified string or whitespace if sep == null
             return _split(sep, maxsplit);
         }
@@ -1868,7 +1850,7 @@
             return ((PyUnicode) decode()).unicode_rsplit(sepObj, maxsplit);
         } else {
             // It ought to be None, null, some kind of bytes with the buffer API.
-            String sep = asStringNullOrError(sepObj, "rsplit");
+            String sep = asU16BytesNullOrError(sepObj, "rsplit");
             // Split on specified string or whitespace if sep == null
             return _rsplit(sep, maxsplit);
         }
@@ -2056,7 +2038,7 @@
 
         } else {
             // It ought to be some kind of bytes with the buffer API.
-            String sep = asStringOrError(sepObj);
+            String sep = asU16BytesOrError(sepObj);
 
             if (sep.length() == 0) {
                 throw Py.ValueError("empty separator");
@@ -2115,7 +2097,7 @@
 
         } else {
             // It ought to be some kind of bytes with the buffer API.
-            String sep = asStringOrError(sepObj);
+            String sep = asU16BytesOrError(sepObj);
 
             if (sep.length() == 0) {
                 throw Py.ValueError("empty separator");
@@ -2418,10 +2400,10 @@
     final int str_count(PyObject subObj, PyObject start, PyObject end) {
         if (subObj instanceof PyUnicode) {
             // Promote the problem to a Unicode one
-            return ((PyUnicode) decode()).unicode_count(subObj, start, end);
+            return asUnicode(start, end).unicode_count(subObj, null, null);
         } else {
             // It ought to be some kind of bytes with the buffer API.
-            String sub = asStringOrError(subObj);
+            String sub = asU16BytesOrError(subObj);
             return _count(sub, start, end);
         }
     }
@@ -2533,10 +2515,11 @@
     final int str_find(PyObject subObj, PyObject start, PyObject end) {
         if (subObj instanceof PyUnicode) {
             // Promote the problem to a Unicode one
+            // XXX Questionable: return is a Unicode character index not byte index
             return ((PyUnicode) decode()).unicode_find(subObj, start, end);
         } else {
-            // It ought to be some kind of bytes with the buffer API.
-            String sub = asStringOrError(subObj);
+            // It ought to be a bytes-like object.
+            String sub = asU16BytesOrError(subObj);
             return _find(sub, start, end);
         }
     }
@@ -2641,7 +2624,7 @@
             return ((PyUnicode) decode()).unicode_rfind(subObj, start, end);
         } else {
             // It ought to be some kind of bytes with the buffer API.
-            String sub = asStringOrError(subObj);
+            String sub = asU16BytesOrError(subObj);
             return _rfind(sub, start, end);
         }
     }
@@ -3213,8 +3196,8 @@
             return ((PyUnicode) decode()).unicode_replace(oldPieceObj, newPieceObj, count);
         } else {
             // Neither is a PyUnicode: both ought to be some kind of bytes with the buffer API.
-            String oldPiece = asStringOrError(oldPieceObj);
-            String newPiece = asStringOrError(newPieceObj);
+            String oldPiece = asU16BytesOrError(oldPieceObj);
+            String newPiece = asU16BytesOrError(newPieceObj);
             return _replace(oldPiece, newPiece, count);
         }
     }
@@ -3401,7 +3384,7 @@
      *         <code>false</code>.
      */
     public boolean startswith(PyObject prefix) {
-        return str_startswith(prefix, null, null);
+        return startswith(prefix, null, null);
     }
 
     /**
@@ -3416,7 +3399,7 @@
      *         <code>false</code>.
      */
     public boolean startswith(PyObject prefix, PyObject start) {
-        return str_startswith(prefix, start, null);
+        return startswith(prefix, start, null);
     }
 
     /**
@@ -3438,28 +3421,49 @@
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_startswith_doc)
     final boolean str_startswith(PyObject prefix, PyObject startObj, PyObject endObj) {
+
         int[] indices = translateIndices(startObj, endObj);
         int start = indices[0];
         int sliceLen = indices[1] - start;
 
         if (!(prefix instanceof PyTuple)) {
-            // It ought to be PyUnicode or some kind of bytes with the buffer API.
-            String s = asUTF16StringOrError(prefix);
-            // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
-            return sliceLen >= s.length() && getString().startsWith(s, start);
+            if (prefix instanceof PyUnicode) {
+                // Promote to a unicode problem on the decoded slice
+                return asUnicode(startObj, endObj).unicode_startswith(prefix, null, null);
+            } else {
+                // It ought to be a bytes-like object.
+                String s = asU16BytesOrError(prefix);
+                return sliceLen >= s.length() && getString().startsWith(s, start);
+            }
 
         } else {
-            // Loop will return true if this slice starts with any prefix in the tuple
-            for (PyObject prefixObj : ((PyTuple) prefix).getArray()) {
-                // It ought to be PyUnicode or some kind of bytes with the buffer API.
-                String s = asUTF16StringOrError(prefixObj);
-                // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
-                if (sliceLen >= s.length() && getString().startsWith(s, start)) {
-                    return true;
+            // It's a tuple so we have to iterate through the members.
+            PyObject[] prefixes = ((PyTuple) prefix).getArray();
+            String string = getString();
+
+            // Test with only the bytes prefixes first and save the unicode ones
+            int unicodeCount = 0;
+            for (PyObject o : prefixes) {
+                if (o instanceof PyUnicode) {
+                    // Pack the unicode prefixes to the start of the array without trying them
+                    prefixes[unicodeCount++] = o;
+                } else {
+                    // It ought to be a bytes-like object.
+                    String s = asU16BytesOrError(o);
+                    if (sliceLen >= s.length() && string.startsWith(s, start)) {
+                        return true;
+                    }
                 }
             }
-            // None matched
-            return false;
+
+            if (unicodeCount == 0) {
+                // Only bytes prefixes given and nothing matched
+                return false;
+            } else {
+                // There were unicode prefixes: test the decoded slice for them.
+                PyTuple t = new PyTuple(Arrays.copyOf(prefixes, unicodeCount));
+                return asUnicode(startObj, endObj).unicode_startswith(t, null, null);
+            }
         }
     }
 
@@ -3472,7 +3476,7 @@
      *         <code>false</code>.
      */
     public boolean endswith(PyObject suffix) {
-        return str_endswith(suffix, null, null);
+        return endswith(suffix, null, null);
     }
 
     /**
@@ -3487,7 +3491,7 @@
      *         <code>false</code>.
      */
     public boolean endswith(PyObject suffix, PyObject start) {
-        return str_endswith(suffix, start, null);
+        return endswith(suffix, start, null);
     }
 
     /**
@@ -3511,26 +3515,45 @@
     final boolean str_endswith(PyObject suffix, PyObject startObj, PyObject endObj) {
 
         int[] indices = translateIndices(startObj, endObj);
-        String substr = getString().substring(indices[0], indices[1]);
 
         if (!(suffix instanceof PyTuple)) {
-            // It ought to be PyUnicode or some kind of bytes with the buffer API.
-            String s = asUTF16StringOrError(suffix);
-            // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
-            return substr.endsWith(s);
+            if (suffix instanceof PyUnicode) {
+                // Promote to a unicode problem on the decoded slice
+                return asUnicode(startObj, endObj).unicode_endswith(suffix, null, null);
+            } else {
+                // It ought to be a bytes-like object.
+                String s = asU16BytesOrError(suffix);
+                return getString().substring(indices[0], indices[1]).endsWith(s);
+            }
 
         } else {
-            // Loop will return true if this slice ends with any suffix in the tuple
-            for (PyObject suffixObj : ((PyTuple) suffix).getArray()) {
-                // It ought to be PyUnicode or some kind of bytes with the buffer API.
-                String s = asUTF16StringOrError(suffixObj);
-                // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
-                if (substr.endsWith(s)) {
-                    return true;
+            // It's a tuple so we have to iterate through the members.
+            PyObject[] suffixes = ((PyTuple) suffix).getArray();
+            String string = getString().substring(indices[0], indices[1]);
+
+            // Test with only the bytes suffixes first and save the unicode ones
+            int unicodeCount = 0;
+            for (PyObject o : suffixes) {
+                if (o instanceof PyUnicode) {
+                    // Pack the unicode suffixes to the start of the array without trying them
+                    suffixes[unicodeCount++] = o;
+                } else {
+                    // It ought to be a bytes-like object.
+                    String s = asU16BytesOrError(o);
+                    if (string.endsWith(s)) {
+                        return true;
+                    }
                 }
             }
-            // None matched
-            return false;
+
+            if (unicodeCount == 0) {
+                // Only bytes suffixes given and nothing matched
+                return false;
+            } else {
+                // There were unicode suffixes: test the decoded slice for them.
+                PyTuple t = new PyTuple(Arrays.copyOf(suffixes, unicodeCount));
+                return asUnicode(startObj, endObj).unicode_endswith(t, null, null);
+            }
         }
     }
 
@@ -3655,8 +3678,8 @@
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_translate_doc)
     final String str_translate(PyObject tableObj, PyObject deletecharsObj) {
         // Accept anythiong withthe buffer API or null
-        String table = asStringNullOrError(tableObj, null);
-        String deletechars = asStringNullOrError(deletecharsObj, null);
+        String table = asU16BytesNullOrError(tableObj, null);
+        String deletechars = asU16BytesNullOrError(deletecharsObj, null);
         return _translate(table, deletechars);
     }
 
@@ -4317,6 +4340,35 @@
     public CharSequence subSequence(int start, int end) {
         return string.subSequence(start, end);
     }
+
+    /**
+     * Decode this <code>str</code> object to a <code>unicode</code>, like
+     * <code>__unicode__()</code> but without the possibility it will be overridden.
+     *
+     * @return this as a <code>unicode</code> using the default encoding.
+     */
+    private PyUnicode asUnicode() {
+        return new PyUnicode(this);
+    }
+
+    /**
+     * Decode a slice of this <code>str</code> object to a <code>unicode</code>, using Python slice
+     * semantics and the default encoding. This supports the many library methods that accept
+     * slicing as part of the API, in the case where the calculation must be promoted due to a
+     * <code>unicode</code> argument.
+     *
+     * @param startObj start index (or <code>null</code> or <code>None</code>)
+     * @param endObj end index (or <code>null</code> or <code>None</code>)
+     * @return the slice as a <code>unicode</code> using the default encoding.
+     */
+    private PyUnicode asUnicode(PyObject startObj, PyObject endObj) {
+        if (startObj == null && endObj == null) {
+            return asUnicode();
+        } else {
+            int[] indices = translateIndices(startObj, endObj);
+            return new PyUnicode(fromSubstring(indices[0], indices[1]));
+        }
+    }
 }
 
 
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -715,19 +715,48 @@
         return createInstance(buffer.toString());
     }
 
-    @ExposedMethod(type = MethodType.CMP, doc = BuiltinDocs.unicode___getslice___doc)
+    @ExposedMethod(type = MethodType.CMP)
     final int unicode___cmp__(PyObject other) {
+        // XXX needs proper coercion like __eq__, then UCS-32 code point order :(
         return str___cmp__(other);
     }
 
-    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___getslice___doc)
-    final PyObject unicode___eq__(PyObject other) {
-        return str___eq__(other);
+    @Override
+    public PyObject __eq__(PyObject other) {
+        return unicode___eq__(other);
     }
 
-    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___getslice___doc)
+    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___eq___doc)
+    final PyObject unicode___eq__(PyObject other) {
+        try {
+            String s = coerceForComparison(other);
+            if (s == null) {
+                return null;
+            }
+            return getString().equals(s) ? Py.True : Py.False;
+        } catch (PyException e) {
+            // Decoding failed: treat as unequal
+            return Py.False;
+        }
+    }
+
+    @Override
+    public PyObject __ne__(PyObject other) {
+        return unicode___ne__(other);
+    }
+
+    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___ne___doc)
     final PyObject unicode___ne__(PyObject other) {
-        return str___ne__(other);
+        try {
+            String s = coerceForComparison(other);
+            if (s == null) {
+                return null;
+            }
+            return getString().equals(s) ? Py.False : Py.True;
+        } catch (PyException e) {
+            // Decoding failed: treat as unequal
+            return Py.True;
+        }
     }
 
     @ExposedMethod(doc = BuiltinDocs.unicode___hash___doc)
@@ -900,53 +929,156 @@
     }
 
     /**
-     * Helper used many times to "coerce" a method argument into a <code>PyUnicode</code> (which it
-     * may already be). A <code>null</code> or incoercible argument will raise a
-     * <code>TypeError</code>.
+     * Interpret the object as a Java <code>String</code> representing characters as UTF-16, or
+     * return <code>null</code> if the type does not admit this conversion. From a
+     * <code>PyUnicode</code> we return its internal string. A byte argument is decoded with the
+     * default encoding.
+     *
+     * @param o the object to coerce
+     * @return an equivalent <code>String</code>
+     */
+    private static String coerceToStringOrNull(PyObject o) {
+        if (o instanceof PyUnicode) {
+            return ((PyUnicode) o).getString();
+        } else if (o instanceof PyString) {
+            return ((PyString) o).decode().toString();
+        } else if (o instanceof BufferProtocol) {
+            // PyByteArray, PyMemoryView, Py2kBuffer ...
+            // We ought to be able to call codecs.decode on o but see Issue #2164
+            try (PyBuffer buf = ((BufferProtocol) o).getBuffer(PyBUF.FULL_RO)) {
+                PyString s = new PyString(buf);
+                // For any sensible codec, the return is unicode and toString() is getString().
+                return s.decode().toString();
+            }
+        } else {
+            // o is some type not allowed:
+            return null;
+        }
+    }
+
+    /**
+     * Interpret the object as a Java <code>String</code> for use in comparison. The return
+     * represents characters as UTF-16. From a <code>PyUnicode</code> we return its internal string.
+     * A <code>str</code> and <code>buffer</code> argument is decoded with the default encoding.
+     * Equivalent to {@link #coerceToStringOrNull(PyObject)} allowing only the types supported in
+     * (C)Python <code>unicode.__eq__</code>.
+     *
+     * @param o the object to coerce
+     * @return an equivalent <code>String</code>
+     */
+    private static String coerceForComparison(PyObject o) {
+        if (o instanceof PyUnicode) {
+            return ((PyUnicode) o).getString();
+        } else if (o instanceof PyString) {
+            return ((PyString) o).decode().toString();
+        } else if (o instanceof Py2kBuffer) {
+            // We ought to be able to call codecs.decode on o but see Issue #2164
+            try (PyBuffer buf = ((BufferProtocol) o).getBuffer(PyBUF.FULL_RO)) {
+                PyString s = new PyString(buf);
+                // For any sensible codec, the return is unicode and toString() is getString().
+                return s.decode().toString();
+            }
+        } else {
+            // o is some type not allowed:
+            return null;
+        }
+    }
+
+    /**
+     * Interpret the object as a Java <code>String</code> representing characters as UTF-16, or
+     * raise an error if the type does not admit this conversion. A byte argument is decoded with
+     * the default encoding.
+     *
+     * @param o the object to coerce
+     * @return an equivalent <code>String</code> (and never <code>null</code>)
+     */
+    private static String coerceToString(PyObject o) {
+        String s = coerceToStringOrNull(o);
+        if (s == null) {
+            throw errorCoercingToUnicode(o);
+        }
+        return s;
+    }
+
+    /**
+     * Interpret the object as a Java <code>String</code> representing characters as UTF-16, or
+     * optionally as <code>null</code> (for a <code>null</code> or <code>None</code> argument if the
+     * second argument is <code>true</code>). Raise an error if the type does not admit this
+     * conversion.
+     *
+     * @param o the object to coerce
+     * @param allowNullArgument iff <code>true</code> allow a null or <code>none</code> argument
+     * @return an equivalent <code>String</code> or <code>null</code>
+     */
+    private static String coerceToString(PyObject o, boolean allowNullArgument) {
+        if (allowNullArgument && (o == null || o == Py.None)) {
+            return null;
+        } else {
+            return coerceToString(o);
+        }
+    }
+
+    /** Construct exception "coercing to Unicode: ..." */
+    private static PyException errorCoercingToUnicode(PyObject o) {
+        return Py.TypeError("coercing to Unicode: need string or buffer, "
+                + (o == null ? Py.None : o).getType().fastGetName() + " found");
+    }
+
+    /**
+     * Interpret the object as a <code>PyUnicode</code>, or return <code>null</code> if the type
+     * does not admit this conversion. From a <code>PyUnicode</code> we return itself. A byte
+     * argument is decoded with the default encoding.
      *
      * @param o the object to coerce
      * @return an equivalent <code>PyUnicode</code> (or o itself)
      */
-    private PyUnicode coerceToUnicode(PyObject o) {
+    private static PyUnicode coerceToUnicodeOrNull(PyObject o) {
         if (o instanceof PyUnicode) {
             return (PyUnicode) o;
         } else if (o instanceof PyString) {
-            return new PyUnicode(((PyString) o).getString(), true);
+            // For any sensible codec, the return here is unicode.
+            PyObject u = ((PyString) o).decode();
+            return (u instanceof PyUnicode) ? (PyUnicode) u : new PyUnicode(o.toString());
         } else if (o instanceof BufferProtocol) {
             // PyByteArray, PyMemoryView, Py2kBuffer ...
+            // We ought to be able to call codecs.decode on o but see Issue #2164
             try (PyBuffer buf = ((BufferProtocol) o).getBuffer(PyBUF.FULL_RO)) {
-                return new PyUnicode(buf.toString(), true);
+                PyString s = new PyString(buf);
+                // For any sensible codec, the return is unicode and toString() is getString().
+                PyObject u = s.decode();
+                return (u instanceof PyUnicode) ? (PyUnicode) u : new PyUnicode(o.toString());
             }
         } else {
             // o is some type not allowed:
-            if (o == null) {
-                // Do something safe and approximately right
-                o = Py.None;
-            }
-            throw Py.TypeError("coercing to Unicode: need string or buffer, "
-                    + o.getType().fastGetName() + " found");
+            return null;
         }
     }
 
     /**
-     * Helper used many times to "coerce" a method argument into a <code>PyUnicode</code> (which it
-     * may already be). A <code>null</code> argument or a <code>PyNone</code> causes
-     * <code>null</code> to be returned.
+     * Interpret the object as a <code>PyUnicode</code>, or raise a <code>TypeError</code> if the
+     * type does not admit this conversion. From a <code>PyUnicode</code> we return itself. A byte
+     * argument is decoded with the default encoding.
      *
      * @param o the object to coerce
-     * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
+     * @return an equivalent <code>PyUnicode</code> (or o itself)
      */
-    private PyUnicode coerceToUnicodeOrNull(PyObject o) {
-        if (o == null || o == Py.None) {
-            return null;
-        } else {
-            return coerceToUnicode(o);
+    private static PyUnicode coerceToUnicode(PyObject o) {
+        PyUnicode u = coerceToUnicodeOrNull(o);
+        if (u == null) {
+            throw errorCoercingToUnicode(o);
         }
+        return u;
+    }
+
+    @Override
+    public boolean __contains__(PyObject o) {
+        return unicode___contains__(o);
     }
 
     @ExposedMethod(doc = BuiltinDocs.unicode___contains___doc)
     final boolean unicode___contains__(PyObject o) {
-        return str___contains__(o);
+        String other = coerceToString(o);
+        return getString().indexOf(other) >= 0;
     }
 
     @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___getslice___doc)
@@ -966,15 +1098,9 @@
 
     @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___getslice___doc)
     final PyObject unicode___add__(PyObject other) {
-        PyUnicode otherUnicode;
-        if (other instanceof PyUnicode) {
-            otherUnicode = (PyUnicode) other;
-        } else if (other instanceof PyString) {
-            otherUnicode = (PyUnicode) ((PyString) other).decode();
-        } else {
-            return null;
-        }
-        return new PyUnicode(getString().concat(otherUnicode.getString()));
+        // Interpret other as a Java String
+        String s = coerceToStringOrNull(other);
+        return s == null ? null : new PyUnicode(getString().concat(s));
     }
 
     @ExposedMethod(doc = BuiltinDocs.unicode_lower_doc)
@@ -1077,25 +1203,25 @@
         }
     }
 
-    // compliance requires that we need to support a bit of inconsistency
-    // compared to other coercion used
+    // Compliance requires a bit of inconsistency with other coercions used.
     /**
      * Helper used in <code>.strip()</code> to "coerce" a method argument into a
      * <code>PyUnicode</code> (which it may already be). A <code>null</code> argument or a
      * <code>PyNone</code> causes <code>null</code> to be returned. A buffer type is not acceptable
      * to (Unicode) <code>.strip()</code>. This is the difference from
-     * {@link #coerceToUnicodeOrNull(PyObject)}.
+     * {@link #coerceToUnicode(PyObject, boolean)}.
      *
      * @param o the object to coerce
      * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
      */
-    private PyUnicode coerceStripSepToUnicode(PyObject o) {
+    private static PyUnicode coerceStripSepToUnicode(PyObject o) {
         if (o == null) {
             return null;
         } else if (o instanceof PyUnicode) {
             return (PyUnicode) o;
         } else if (o instanceof PyString) {
-            return new PyUnicode(((PyString) o).decode().toString());
+            PyObject u = ((PyString) o).decode();
+            return (u instanceof PyUnicode) ? (PyUnicode) u : new PyUnicode(u.toString());
         } else if (o == Py.None) {
             return null;
         } else {
@@ -1431,9 +1557,9 @@
 
     @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_split_doc)
     final PyList unicode_split(PyObject sepObj, int maxsplit) {
-        PyUnicode sep = coerceToUnicodeOrNull(sepObj);
+        String sep = coerceToString(sepObj, true);
         if (sep != null) {
-            return _split(sep.getString(), maxsplit);
+            return _split(sep, maxsplit);
         } else {
             return _split(null, maxsplit);
         }
@@ -1441,9 +1567,9 @@
 
     @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_rsplit_doc)
     final PyList unicode_rsplit(PyObject sepObj, int maxsplit) {
-        PyUnicode sep = coerceToUnicodeOrNull(sepObj);
+        String sep = coerceToString(sepObj, true);
         if (sep != null) {
-            return _rsplit(sep.getString(), maxsplit);
+            return _rsplit(sep, maxsplit);
         } else {
             return _rsplit(null, maxsplit);
         }
@@ -1452,7 +1578,6 @@
     @ExposedMethod(defaults = "false", doc = BuiltinDocs.unicode___getslice___doc)
     final PyList unicode_splitlines(boolean keepends) {
         return new PyList(new LineSplitIterator(keepends));
-
     }
 
     @Override
@@ -1463,16 +1588,16 @@
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_index_doc)
     final int unicode_index(PyObject subObj, PyObject start, PyObject end) {
-        final PyUnicode sub = coerceToUnicode(subObj);
-        // Now use the mechanics of the PyString on the UTF-16 of the PyUnicode.
-        return checkIndex(_find(sub.getString(), start, end));
+        final String sub = coerceToString(subObj);
+        // Now use the mechanics of the PyString on the UTF-16.
+        return checkIndex(_find(sub, start, end));
     }
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_index_doc)
     final int unicode_rindex(PyObject subObj, PyObject start, PyObject end) {
-        final PyUnicode sub = coerceToUnicode(subObj);
-        // Now use the mechanics of the PyString on the UTF-16 of the PyUnicode.
-        return checkIndex(_rfind(sub.getString(), start, end));
+        final String sub = coerceToString(subObj);
+        // Now use the mechanics of the PyString on the UTF-16.
+        return checkIndex(_rfind(sub, start, end));
     }
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_count_doc)
@@ -1492,7 +1617,6 @@
                     break;
                 }
                 matched--;
-
             }
             if (matched == 0) {
                 count++;
@@ -1503,13 +1627,13 @@
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_find_doc)
     final int unicode_find(PyObject subObj, PyObject start, PyObject end) {
-        int found = _find(coerceToUnicode(subObj).getString(), start, end);
+        int found = _find(coerceToString(subObj), start, end);
         return found < 0 ? -1 : translator.codePointIndex(found);
     }
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_rfind_doc)
     final int unicode_rfind(PyObject subObj, PyObject start, PyObject end) {
-        int found = _rfind(coerceToUnicode(subObj).getString(), start, end);
+        int found = _rfind(coerceToString(subObj), start, end);
         return found < 0 ? -1 : translator.codePointIndex(found);
     }
 
@@ -1685,14 +1809,89 @@
         return unicodeJoin(seq);
     }
 
+    /**
+     * Equivalent to the Python <code>unicode.startswith</code> method, testing whether a string
+     * starts with a specified prefix, where a sub-range is specified by <code>[start:end]</code>.
+     * Arguments <code>start</code> and <code>end</code> are interpreted as in slice notation, with
+     * null or {@link Py#None} representing "missing". <code>prefix</code> can also be a tuple of
+     * prefixes to look for.
+     *
+     * @param prefix string to check for (or a <code>PyTuple</code> of them).
+     * @param start start of slice.
+     * @param end end of slice.
+     * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+     *         <code>false</code>.
+     */
+    @Override
+    public boolean startswith(PyObject prefix, PyObject start, PyObject end) {
+        return unicode_startswith(prefix, start, end);
+    }
+
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_startswith_doc)
-    final boolean unicode_startswith(PyObject prefix, PyObject start, PyObject end) {
-        return str_startswith(prefix, start, end);
+    final boolean unicode_startswith(PyObject prefix, PyObject startObj, PyObject endObj) {
+        int[] indices = translateIndices(startObj, endObj);
+        int start = indices[0];
+        int sliceLen = indices[1] - start;
+
+        if (!(prefix instanceof PyTuple)) {
+            // It ought to be PyUnicode or some kind of bytes with the buffer API to decode.
+            String s = coerceToString(prefix);
+            return sliceLen >= s.length() && getString().startsWith(s, start);
+
+        } else {
+            // Loop will return true if this slice starts with any prefix in the tuple
+            for (PyObject prefixObj : ((PyTuple) prefix).getArray()) {
+                // It ought to be PyUnicode or some kind of bytes with the buffer API.
+                String s = coerceToString(prefixObj);
+                if (sliceLen >= s.length() && getString().startsWith(s, start)) {
+                    return true;
+                }
+            }
+            // None matched
+            return false;
+        }
+    }
+
+    /**
+     * Equivalent to the Python <code>unicode.endswith</code> method, testing whether a string ends
+     * with a specified suffix, where a sub-range is specified by <code>[start:end]</code>.
+     * Arguments <code>start</code> and <code>end</code> are interpreted as in slice notation, with
+     * null or {@link Py#None} representing "missing". <code>suffix</code> can also be a tuple of
+     * suffixes to look for.
+     *
+     * @param suffix string to check for (or a <code>PyTuple</code> of them).
+     * @param start start of slice.
+     * @param end end of slice.
+     * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+     *         <code>false</code>.
+     */
+    @Override
+    public boolean endswith(PyObject suffix, PyObject start, PyObject end) {
+        return unicode_endswith(suffix, start, end);
     }
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_endswith_doc)
-    final boolean unicode_endswith(PyObject suffix, PyObject start, PyObject end) {
-        return str_endswith(suffix, start, end);
+    final boolean unicode_endswith(PyObject suffix, PyObject startObj, PyObject endObj) {
+        int[] indices = translateIndices(startObj, endObj);
+        String substr = getString().substring(indices[0], indices[1]);
+
+        if (!(suffix instanceof PyTuple)) {
+            // It ought to be PyUnicode or some kind of bytes with the buffer API.
+            String s = coerceToString(suffix);
+            return substr.endsWith(s);
+
+        } else {
+            // Loop will return true if this slice ends with any suffix in the tuple
+            for (PyObject suffixObj : ((PyTuple) suffix).getArray()) {
+                // It ought to be PyUnicode or some kind of bytes with the buffer API.
+                String s = coerceToString(suffixObj);
+                if (substr.endsWith(s)) {
+                    return true;
+                }
+            }
+            // None matched
+            return false;
+        }
     }
 
     @ExposedMethod(doc = BuiltinDocs.unicode_translate_doc)
diff --git a/src/org/python/core/__builtin__.java b/src/org/python/core/__builtin__.java
--- a/src/org/python/core/__builtin__.java
+++ b/src/org/python/core/__builtin__.java
@@ -1267,17 +1267,22 @@
               "is the number of parent directories to search relative to the current module.");
     }
 
+    private static final String[] ARGS = {"name", "globals", "locals", "fromlist", "level"};
+
     @Override
     public PyObject __call__(PyObject args[], String keywords[]) {
-        ArgParser ap = new ArgParser("__import__", args, keywords,
-                                     new String[] {"name", "globals", "locals", "fromlist",
-                                                   "level"},
-                                     1);
-        String module = ap.getString(0);
+        ArgParser ap = new ArgParser("__import__", args, keywords, ARGS, 1);
+        PyObject module = ap.getPyObject(0);
+        String name;
+        if (module instanceof PyUnicode) {
+            name = ((PyUnicode) module).encode("ascii").toString();
+        } else {
+            name = ap.getString(0);
+        }
         PyObject globals = ap.getPyObject(1, null);
         PyObject fromlist = ap.getPyObject(3, Py.EmptyTuple);
         int level = ap.getInt(4, imp.DEFAULT_LEVEL);
-        return imp.importName(module.intern(), fromlist == Py.None || fromlist.__len__() == 0,
+        return imp.importName(name.intern(), fromlist == Py.None || fromlist.__len__() == 0,
                               globals, fromlist, level);
     }
 }
diff --git a/src/org/python/core/codecs.java b/src/org/python/core/codecs.java
--- a/src/org/python/core/codecs.java
+++ b/src/org/python/core/codecs.java
@@ -126,7 +126,7 @@
     }
 
     private static PyUnicode wrapDecodeResult(String result) {
-        return new PyUnicode(result, true);
+        return new PyUnicode(result);
     }
 
     /**
diff --git a/src/org/python/core/imp.java b/src/org/python/core/imp.java
--- a/src/org/python/core/imp.java
+++ b/src/org/python/core/imp.java
@@ -412,7 +412,7 @@
      * moduleLocation should be the full uri for c.
      */
     public static PyObject createFromCode(String name, PyCode c, String moduleLocation) {
-        PyUnicode.checkEncoding(name);
+        checkName(name);
         PyModule module = addModule(name);
 
         PyBaseCode code = null;
@@ -585,7 +585,7 @@
     }
 
     static PyObject loadFromLoader(PyObject importer, String name) {
-        PyUnicode.checkEncoding(name);
+        checkName(name);
         PyObject load_module = importer.__getattr__("load_module");
         ReentrantLock importLock = Py.getSystemState().getImportLock();
         importLock.lock();
@@ -714,7 +714,7 @@
      * @return the loaded module
      */
     public static PyObject load(String name) {
-        PyUnicode.checkEncoding(name);
+        checkName(name);
         ReentrantLock importLock = Py.getSystemState().getImportLock();
         importLock.lock();
         try {
@@ -986,6 +986,18 @@
         }
     }
 
+    /**
+     * Enforce ASCII module name, as a guard on module names supplied as an argument. The parser
+     * guarantees the name from an actual import statement is a valid identifier.
+     */
+    private static void checkName(String name) {
+        for (int i = name.length(); i > 0;) {
+            if (name.charAt(--i) > 255) {
+                throw Py.ImportError("No module named " + name);
+            }
+        }
+    }
+
     private static void ensureFromList(PyObject mod, PyObject fromlist, String name) {
         ensureFromList(mod, fromlist, name, false);
     }
@@ -1029,7 +1041,7 @@
      */
     public static PyObject importName(String name, boolean top) {
         checkNotFile(name);
-        PyUnicode.checkEncoding(name);
+        checkName(name);
         ReentrantLock importLock = Py.getSystemState().getImportLock();
         importLock.lock();
         try {
@@ -1050,7 +1062,7 @@
     public static PyObject importName(String name, boolean top, PyObject modDict,
             PyObject fromlist, int level) {
         checkNotFile(name);
-        PyUnicode.checkEncoding(name);
+        checkName(name);
         ReentrantLock importLock = Py.getSystemState().getImportLock();
         importLock.lock();
         try {

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Tue Nov 21 17:39:10 2017
From: jython-checkins at python.org (jeff.allen)
Date: Tue, 21 Nov 2017 22:39:10 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Mixed_comparison_=28unicod?=
 =?utf-8?q?e=2C_str=29_respects_default_encoding_=28fixes_=232638=29=2E?=
Message-ID: <20171121223909.73572.6FA802C02238A4C9@mg.python.org>

https://hg.python.org/jython/rev/f71e0b2cfaf7
changeset:   8139:f71e0b2cfaf7
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sun Nov 19 19:56:33 2017 +0000
summary:
  Mixed comparison (unicode, str) respects default encoding (fixes #2638).

PyUnicode is given its own implementation of the rich comparison
operators rather than inheriting from PyString (which is to treat
encoded bytes as latin-1 characters). Corresponding tests are added to
test_unicode_jy, where coverage of other byte types is also improved.

files:
  Lib/test/test_unicode_jy.py        |  466 +++++++++++-----
  src/org/python/core/PyUnicode.java |   73 ++-
  2 files changed, 383 insertions(+), 156 deletions(-)


diff --git a/Lib/test/test_unicode_jy.py b/Lib/test/test_unicode_jy.py
--- a/Lib/test/test_unicode_jy.py
+++ b/Lib/test/test_unicode_jy.py
@@ -122,8 +122,8 @@
         self.assertRaises(UnicodeDecodeError, '???'.join, [u'foo', u'bar'])
 
     def test_file_encoding(self):
-        '''Ensure file writing doesn't attempt to encode things by default and reading doesn't
-        decode things by default.  This was jython's behavior prior to 2.2.1'''
+        # Ensure file writing doesn't attempt to encode things by default and reading doesn't
+        # decode things by default.  This was jython's behavior prior to 2.2.1'''
         EURO_SIGN = u"\u20ac"
         try:
             EURO_SIGN.encode()
@@ -852,6 +852,7 @@
         self.assertRaises(ValueError, fmt.format, u"{0}", 10, 20, i=100)
         self.assertRaises(ValueError, fmt.format, u"{i}", 10, 20, i=100)
 
+
 class UnicodeSpaceTest(unittest.TestCase):
     # Test classification of characters as whitespace (some Jython divergence)
 
@@ -900,6 +901,7 @@
             self.assertEqual(2, len(s.split()), "no split made in " + repr(s))
             self.assertEqual(2, len(s.rsplit()), "no rsplit made in " + repr(s))
 
+
 class EncodingContext(object):
     """Context manager to save and restore the encoding.
 
@@ -923,70 +925,196 @@
 
 
 class DefaultDecodingTestCase(unittest.TestCase):
-    # Test use of default encoding to coerce str to unicode
+    # Test use of default encoding to coerce byte-like data to unicode
+
+    BYTE_TYPES = (str, buffer, bytearray, memoryview)
+    BYTE_TYPES_COMPARE = (str, buffer) # Restricted as for CPython __eq__ etc.
+
+    if not test_support.is_jython:
+        # CPython restricts the acceptable the byte-like types by context
+        BYTE_TYPES = (str, buffer)
+        BYTE_TYPES_COMPARE = (str, buffer)
+
+    # Operators
 
     def test_add(self):
+        cs = self.encoding
         ref = u'caf? cr?me'
-        s1 = ref[:4].encode(self.encoding)
-        s2 = ref[4:].encode(self.encoding)
-        with EncodingContext(self.encoding):
+        s1 = ref[:4].encode(cs)
+        s2 = ref[4:].encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                b2 = B(s2)
+                self.assertEqual( ref[:4] + b2, ref)
+            # Really we're testing that str promotes. Other Bs may not.
             self.assertEqual( s1 + ref[4:], ref)
-            self.assertEqual( ref[:4] + s2, ref)
 
     def test_in(self):
+        cs = self.encoding
         ref = u'caf? cr?me'
-        with EncodingContext(self.encoding):
-            self.assertTrue(u'?'.encode(self.encoding) in ref)
-            self.assertTrue(u'f?'.encode(self.encoding) in ref)
-            # Fails if the string is interpreted as code points.
-            if self.encoding !=  'latin-1':
-                self.assertFalse('\xc3\xa9' in u'caf\xc3\xa9')
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertTrue(B(u'?'.encode(cs)) in ref)
+                self.assertTrue(B(u'f?'.encode(cs)) in ref)
+                # Fails if the string is interpreted as code points.
+                if cs !=  'latin-1':
+                    self.assertFalse(B('\xc3\xa9') in u'caf\xc3\xa9')
 
     def test_eq(self):
-        ref = u'caf? cr?me'
-        b = ref.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertTrue(ref == b)
-            self.assertTrue(b == ref)
+        cs = self.encoding
+        u = u"Un caf\xe9 cr\xe8me."
+        # Derive a string such that u1 != u and the encoded versions s, s1
+        u1 = u.replace('cr', 'm')
+        s, s1 = u.encode(cs), u1.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES_COMPARE:
+                #print B,
+                b, b1 = B(s), B(s1)
+                self.assertTrue (u == b )
+                self.assertTrue (b == u )
+                self.assertFalse(u == b1)
+                self.assertFalse(b1== u )
+                # Check not implicitly comparing as latin-1.
+                if cs !=  'latin-1':
+                    b = B('caf\xc3\xa9')
+                    self.assertFalse(u'caf\xc3\xa9'== b)
+                    self.assertFalse(b == u'caf\xc3\xa9')
 
     def test_ne(self):
-        with EncodingContext(self.encoding):
-            # Fails if the string is interpreted as code points.
-            if self.encoding !=  'latin-1':
-                self.assertFalse(u'caf\xc3\xa9'== 'caf\xc3\xa9')
-                self.assertFalse('caf\xc3\xa9' == u'caf\xc3\xa9')
+        cs = self.encoding
+        u = u"Un caf\xe9 cr\xe8me."
+        # Derive a string such that u1 != u and the encoded versions s, s1
+        u1 = u.replace('cr', 'm')
+        s, s1 = u.encode(cs), u1.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES_COMPARE:
+                #print B,
+                b, b1 = B(s), B(s1)
+                self.assertTrue (u != b1)
+                self.assertTrue (b != u1)
+                self.assertFalse(u != b )
+                self.assertFalse(b != u )
+                # Check not implicitly comparing as latin-1.
+                if cs !=  'latin-1':
+                    b = B('caf\xc3\xa9')
+                    self.assertTrue(u'caf\xc3\xa9'!= b)
+                    self.assertTrue(b != u'caf\xc3\xa9')
+
+    def test_lt(self):
+        cs = self.encoding
+        u = u"Un caf\xe9 cr\xe8me."
+        # Derive strings such that u0 < u < u1 and their encodings
+        u0 = u.replace('cr', 'Cr')
+        u1 = u.replace('.', '?')
+        s0, s, s1 = u0.encode(cs), u.encode(cs), u1.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES_COMPARE:
+                #print B,
+                b0, b, b1 = B(s0), B(s), B(s1)
+                self.assertTrue (b0 < u )
+                self.assertFalse(b  < u )
+                self.assertFalse(b1 < u )
+                self.assertFalse(u  < b0)
+                self.assertFalse(u  < b )
+                self.assertTrue (u  < b1)
+
+    def test_le(self):
+        cs = self.encoding
+        u = u"Un caf\xe9 cr\xe8me."
+        # Derive strings such that u0 < u < u1 and their encodings
+        u0 = u.replace('cr', 'Cr')
+        u1 = u.replace('.', '?')
+        s0, s, s1 = u0.encode(cs), u.encode(cs), u1.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES_COMPARE:
+                #print B,
+                b0, b, b1 = B(s0), B(s), B(s1)
+                self.assertTrue (b0 <= u )
+                self.assertTrue (b  <= u )
+                self.assertFalse(b1 <= u )
+                self.assertFalse(u  <= b0)
+                self.assertTrue (u  <= b )
+                self.assertTrue (u  <= b1)
+
+    def test_gt(self):
+        cs = self.encoding
+        u = u"Un caf\xe9 cr\xe8me."
+        # Derive strings such that u0 < u < u1 and their encodings
+        u0 = u.replace('cr', 'Cr')
+        u1 = u.replace('.', '?')
+        s0, s, s1 = u0.encode(cs), u.encode(cs), u1.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES_COMPARE:
+                #print B,
+                b0, b, b1 = B(s0), B(s), B(s1)
+                self.assertTrue (b1 > u )
+                self.assertFalse(b  > u )
+                self.assertFalse(b0 > u )
+                self.assertFalse(u  > b1)
+                self.assertFalse(u  > b )
+                self.assertTrue (u  > b0)
+
+    def test_ge(self):
+        cs = self.encoding
+        u = u"Un caf\xe9 cr\xe8me."
+        # Derive strings such that u0 < u < u1 and their encodings
+        u0 = u.replace('cr', 'Cr')
+        u1 = u.replace('.', '?')
+        s0, s, s1 = u0.encode(cs), u.encode(cs), u1.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES_COMPARE:
+                #print B,
+                b0, b, b1 = B(s0), B(s), B(s1)
+                self.assertTrue (b1 >= u )
+                self.assertTrue (b  >= u )
+                self.assertFalse(b0 >= u )
+                self.assertFalse(u  >= b1)
+                self.assertTrue (u  >= b )
+                self.assertTrue (u  >= b0)
+
+
+    # Methods
 
     def test_count(self):
+        cs = self.encoding
         ref = u'Le caf? des f?es ?gar?es'
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.count(u'?'.encode(self.encoding)), 4)
-            self.assertEqual(ref.count(u'f?'.encode(self.encoding)), 2)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.count(B(u'?'.encode(cs))), 4)
+                self.assertEqual(ref.count(B(u'f?'.encode(cs))), 2)
 
     def test_endswith(self):
+        cs = self.encoding
         # Set up the test using unicode values and indices
         ref = u'caf? cr?me'
         s, u, v = ref[-4:], u'?m?', u'??e'
         # Encode all this
-        enc = ref.encode(self.encoding)
-        u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
-        s1 = s.encode(self.encoding)
+        enc = ref.encode(cs)
+        s1, u1, v1 = s.encode(cs), u.encode(cs), v.encode(cs)
 
-        with EncodingContext(self.encoding):
-            # Test with single argument
-            self.assertFalse(ref.endswith(v1))
-            self.assertTrue(ref.endswith(s1))
-            # Test with a mixed tuple as the argument
-            self.assertFalse(ref.endswith((u1, u, v1, v)))
-            self.assertTrue(ref.endswith((u1, s1, v1)))
-            self.assertTrue(ref.endswith((u1, u, s1, v1, v)))
-            self.assertFalse(enc.endswith((u1, v1, u, v)))
-            self.assertTrue(enc.endswith((u, s, v)))
-            self.assertTrue(enc.endswith((u1, u, s, v1, v)))
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                sb, ub, vb = B(s1), B(u1), B(v1)
+                # Test with single argument
+                self.assertFalse(ref.endswith(vb))
+                self.assertTrue(ref.endswith(sb))
+                # Test with a mixed tuple as the argument
+                self.assertFalse(ref.endswith((ub, u, vb, v)))
+                self.assertTrue(ref.endswith((ub, sb, vb)))
+                self.assertTrue(ref.endswith((ub, u, sb, vb, v)))
+                self.assertFalse(enc.endswith((ub, vb, u, v)))
+                self.assertTrue(enc.endswith((u, s, v)))
+                self.assertTrue(enc.endswith((ub, u, s, vb, v)))
 
     def test_endswith_slice(self):
+        cs = self.encoding
         # Set up the test using unicode values and indices
         ref = u'?Un caf? cr?me??'
-        if len(u'??'.encode(self.encoding))!=2 and not test_support.is_jython:
+        if len(u'??'.encode(cs))!=2 and not test_support.is_jython:
             # CPython fails on str.startswith(unicode, int, int) as it passes
             # byte indices to unicode.startswith(unicode, int, int) unchanged.
             # It only works if ? and ? encode to single bytes. Easier test:
@@ -994,133 +1122,170 @@
         a, b = 4, -2
         s, u, v = ref[b-4:b], u'?m?', u'??e'
         # Encode all this, including the indices
-        enc = ref.encode(self.encoding)
-        u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
-        a1 = len(ref[:a].encode(self.encoding))
-        b1 = - len(ref[b:].encode(self.encoding))
-        s1 = s.encode(self.encoding)
+        enc = ref.encode(cs)
+        u1, v1 = u.encode(cs), v.encode(cs)
+        a1 = len(ref[:a].encode(cs))
+        b1 = - len(ref[b:].encode(cs))
+        s1 = s.encode(cs)
 
-        with EncodingContext(self.encoding):
-            # Test the assumption on which the test is based
-            self.assertEqual(ref[a:b], enc[a1:b1])
-            # Test slice with single argument
-            self.assertFalse(ref.endswith(v1, a, b))
-            self.assertTrue(ref.endswith(s1, a, b))
-            self.assertFalse(enc.endswith(v1, a1, b1))
-            self.assertTrue(enc.endswith(s, a1, b1))
-            # CPython would pass:
-            #self.assertTrue(enc.endswith(s, a, b))
-            # Test slice with a mixed tuple as the argument
-            self.assertFalse(ref.endswith((u1, u, v1, v), a, b))
-            self.assertTrue(ref.endswith((u1, s1, v1), a, b))
-            self.assertTrue(ref.endswith((u1, u, s1, v1, v), a, b))
-            self.assertFalse(enc.endswith((u1, v1, u, v), a1, b1))
-            self.assertTrue(enc.endswith((u, s, v), a1, b1))
-            self.assertTrue(enc.endswith((u1, u, s, v1, v), a1, b1))
-            # CPython would pass:
-            #self.assertTrue(enc.endswith((u, s, v), a, b))
-            #self.assertTrue(enc.endswith((u1, u, s, v1, v), a, b))
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                sb, ub, vb = B(s1), B(u1), B(v1)
+                # Test the assumption on which the test is based
+                self.assertEqual(ref[a:b], enc[a1:b1])
+                # Test slice with single argument
+                self.assertFalse(ref.endswith(vb, a, b))
+                self.assertTrue(ref.endswith(sb, a, b))
+                self.assertFalse(enc.endswith(vb, a1, b1))
+                self.assertTrue(enc.endswith(s, a1, b1))
+                # CPython would pass:
+                #self.assertTrue(enc.endswith(s, a, b))
+                # Test slice with a mixed tuple as the argument
+                self.assertFalse(ref.endswith((ub, u, vb, v), a, b))
+                self.assertTrue(ref.endswith((ub, sb, vb), a, b))
+                self.assertTrue(ref.endswith((ub, u, sb, vb, v), a, b))
+                self.assertFalse(enc.endswith((ub, vb, u, v), a1, b1))
+                self.assertTrue(enc.endswith((u, s, v), a1, b1))
+                self.assertTrue(enc.endswith((ub, u, s, vb, v), a1, b1))
+                # CPython would pass:
+                #self.assertTrue(enc.endswith((u, s, v), a, b))
+                #self.assertTrue(enc.endswith((ub, u, s, vb, v), a, b))
 
     def test_find(self):
+        cs = self.encoding
         ref = u'caf? cr?me'
-        sub = u'?'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.find(sub), 7)
+        sub = u'?'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.find(B(sub)), 7)
 
     def test_index(self):
+        cs = self.encoding
         ref = u'caf? cr?me'
-        sub = u'?'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.index(sub), 7)
+        sub = u'?'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.index(B(sub)), 7)
 
     def test_lstrip(self):
+        cs = self.encoding
         ref = u"??????du bl? ?"
-        sep = u'???'.encode(self.encoding)
-        with EncodingContext(self.encoding):
+        sep = u'???'.encode(cs)
+        with EncodingContext(cs):
             self.assertEqual(ref.lstrip(sep), u"du bl? ?")
 
     def test_partition(self):
+        cs = self.encoding
         ref = u"Des f?es h?b?t?es."
-        sep1 = u'?'.encode(self.encoding)
-        sep2 = u'?es'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.partition(sep1), (u"Des f", u"?", u"es h?b?t?es."))
-            self.assertEqual(ref.partition(sep2), (u"Des f", u"?es", u" h?b?t?es."))
+        sep1 = u'?'.encode(cs)
+        sep2 = u'?es'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.partition(B(sep1)), (u"Des f", u"?", u"es h?b?t?es."))
+                self.assertEqual(ref.partition(B(sep2)), (u"Des f", u"?es", u" h?b?t?es."))
 
     def test_replace(self):
+        cs = self.encoding
         ref = u"?t?."
-        a = u'?'.encode(self.encoding)
-        b = u'?'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.replace(a, b), u"?t?.")
-            self.assertEqual(ref.replace(b, a), u"?t?.")
+        a = u'?'.encode(cs)
+        b = u'?'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.replace(B(a), B(b)), u"?t?.")
+                self.assertEqual(ref.replace(B(b), B(a)), u"?t?.")
 
     def test_rfind(self):
+        cs = self.encoding
         ref = u'caf? cr?me'
-        sub = u'?'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.rfind(sub), 3)
+        sub = u'?'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.rfind(B(sub)), 3)
 
     def test_rindex(self):
+        cs = self.encoding
         ref = u'caf? cr?me'
-        sub = u'?'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.index(sub), 3)
+        sub = u'?'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.index(B(sub)), 3)
 
     def test_rpartition(self):
+        cs = self.encoding
         ref = u"Des f?es h?b?t?es."
-        sep1 = u'?'.encode(self.encoding)
-        sep2 = u'?es'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.rpartition(sep1), (u"Des f?es h?b?t", u"?", u"es."))
-            self.assertEqual(ref.rpartition(sep2), (u"Des f?es h?b?t", u"?es", u"."))
+        sep1 = u'?'.encode(cs)
+        sep2 = u'?es'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.rpartition(B(sep1)), (u"Des f?es h?b?t", u"?", u"es."))
+                self.assertEqual(ref.rpartition(B(sep2)), (u"Des f?es h?b?t", u"?es", u"."))
 
     def test_rsplit(self):
+        cs = self.encoding
         ref = u"Des f?es h?b?t?es."
-        sep1 = u'?'.encode(self.encoding)
-        sep2 = u'?es'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.rsplit(sep1, 3), [u"Des f?es h", u"b", u"t", u"es."])
-            self.assertEqual(ref.rsplit(sep2), [u"Des f", u" h?b?t", u"."])
+        sep1 = u'?'.encode(cs)
+        sep2 = u'?es'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.rsplit(B(sep1), 3), [u"Des f?es h", u"b", u"t", u"es."])
+                self.assertEqual(ref.rsplit(B(sep2)), [u"Des f", u" h?b?t", u"."])
 
     def test_rstrip(self):
+        cs = self.encoding
         ref = u"? du bl???????"
-        sep = u'???'.encode(self.encoding)
-        with EncodingContext(self.encoding):
+        sep = u'???'.encode(cs)
+        with EncodingContext(cs):
             self.assertEqual(ref.rstrip(sep), u"? du bl?")
 
     def test_split(self):
+        cs = self.encoding
         ref = u"Des f?es h?b?t?es."
-        sep1 = u'?'.encode(self.encoding)
-        sep2 = u'?es'.encode(self.encoding)
-        with EncodingContext(self.encoding):
-            self.assertEqual(ref.split(sep1, 3), [u"Des f", u"es h", u"b", u"t?es."])
-            self.assertEqual(ref.split(sep2), [u"Des f", u" h?b?t", u"."])
+        sep1 = u'?'.encode(cs)
+        sep2 = u'?es'.encode(cs)
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                self.assertEqual(ref.split(B(sep1), 3), [u"Des f", u"es h", u"b", u"t?es."])
+                self.assertEqual(ref.split(B(sep2)), [u"Des f", u" h?b?t", u"."])
 
     def test_startsswith(self):
+        cs = self.encoding
         # Set up the test using unicode values and indices
         ref = u'caf? cr?me'
         s, u, v = ref[:4], u'?af', u'caf?'
         # Encode all this
-        enc = ref.encode(self.encoding)
-        u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
-        s1 = s.encode(self.encoding)
+        enc = ref.encode(cs)
+        u1, v1 = u.encode(cs), v.encode(cs)
+        s1 = s.encode(cs)
 
-        with EncodingContext(self.encoding):
-            self.assertFalse(ref.startswith(v1))
-            self.assertTrue(ref.startswith(enc[:5]))
-            # Test with a mixed tuple as the argument
-            self.assertFalse(ref.startswith((u1, u, v1, v)))
-            self.assertTrue(ref.startswith((u1, enc[:5], v1)))
-            self.assertTrue(ref.startswith((u1, u, enc[:5], v1, v)))
-            self.assertFalse(enc.startswith((u1, v1, u, v)))
-            self.assertTrue(enc.startswith((u, ref[:4], v)))
-            self.assertTrue(enc.startswith((u1, u, ref[:4], v1, v)))
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                sb, ub, vb, b5 = B(s1), B(u1), B(v1), B(enc[:5])
+                self.assertFalse(ref.startswith(vb))
+                self.assertTrue(ref.startswith(b5))
+                # Test with a mixed tuple as the argument
+                self.assertFalse(ref.startswith((ub, u, vb, v)))
+                self.assertTrue(ref.startswith((ub, b5, vb)))
+                self.assertTrue(ref.startswith((ub, u, b5, vb, v)))
+                self.assertFalse(enc.startswith((ub, vb, u, v)))
+                self.assertTrue(enc.startswith((u, ref[:4], v)))
+                self.assertTrue(enc.startswith((ub, u, ref[:4], vb, v)))
 
     def test_startsswith_slice(self):
+        cs = self.encoding
         # Set up the test using unicode values and indices
         ref = u'?Un caf? cr?me??'
-        if len(u'??'.encode(self.encoding))!=2 and not test_support.is_jython:
+        if len(u'??'.encode(cs))!=2 and not test_support.is_jython:
             # CPython fails on str.startswith(unicode, int, int) as it passes
             # byte indices to unicode.startswith(unicode, int, int) unchanged.
             # It only works if ? and ? encode to single bytes. Easier test:
@@ -1128,40 +1293,43 @@
         a, b = 4, -2
         s, u, v = ref[a:a+4], u'?af', u'caf?'
         # Encode all this, including the indices
-        enc = ref.encode(self.encoding)
-        u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
-        a1 = len(ref[:a].encode(self.encoding))
-        b1 = - len(ref[b:].encode(self.encoding))
-        s1 = s.encode(self.encoding)
+        enc = ref.encode(cs)
+        u1, v1 = u.encode(cs), v.encode(cs)
+        a1 = len(ref[:a].encode(cs))
+        b1 = - len(ref[b:].encode(cs))
+        s1 = s.encode(cs)
 
-        with EncodingContext(self.encoding):
-            # Test the assumption on which the test is based
-            self.assertEqual(ref[a:b], enc[a1:b1])
-            # Test slice with single argument
-            self.assertFalse(ref.startswith(v, a, b))
-            self.assertTrue(ref.startswith(s1, a, b))
-            self.assertFalse(enc.startswith(v1, a1, b1))
-            self.assertTrue(enc.startswith(s, a1, b1))
-            # CPython would pass:
-            #self.assertTrue(enc.startswith(s, a, b))
-            # Test slice with a mixed tuple as the argument
-            self.assertFalse(ref.startswith((u1, u, v1, v), a, b))
-            self.assertTrue(ref.startswith((u1, s1, v1), a, b))
-            self.assertTrue(ref.startswith((u1, u, s1, v1, v), a, b))
-            self.assertFalse(enc.startswith((u1, v1, u, v), a1, b1))
-            self.assertTrue(enc.startswith((u, s, v), a1, b1))
-            self.assertTrue(enc.startswith((u1, u, s, v1, v), a1, b1))
-            # CPython would pass:
-            #self.assertTrue(enc.startswith((u, s, v), a, b))
-            #self.assertTrue(enc.startswith((u1, u, s, v1, v), a, b))
+        with EncodingContext(cs):
+            for B in self.BYTE_TYPES:
+                #print B,
+                sb, ub, vb = B(s1), B(u1), B(v1)
+                # Test the assumption on which the test is based
+                self.assertEqual(ref[a:b], enc[a1:b1])
+                # Test slice with single argument
+                self.assertFalse(ref.startswith(v, a, b))
+                self.assertTrue(ref.startswith(sb, a, b))
+                self.assertFalse(enc.startswith(vb, a1, b1))
+                self.assertTrue(enc.startswith(s, a1, b1))
+                # CPython would pass:
+                #self.assertTrue(enc.startswith(s, a, b))
+                # Test slice with a mixed tuple as the argument
+                self.assertFalse(ref.startswith((ub, u, vb, v), a, b))
+                self.assertTrue(ref.startswith((ub, sb, vb), a, b))
+                self.assertTrue(ref.startswith((ub, u, sb, vb, v), a, b))
+                self.assertFalse(enc.startswith((ub, vb, u, v), a1, b1))
+                self.assertTrue(enc.startswith((u, s, v), a1, b1))
+                self.assertTrue(enc.startswith((ub, u, s, vb, v), a1, b1))
+                # CPython would pass:
+                #self.assertTrue(enc.startswith((u, s, v), a, b))
+                #self.assertTrue(enc.startswith((ub, u, s, vb, v), a, b))
 
     def test_strip(self):
+        cs = self.encoding
         ref = u"??????du bl???????"
-        sep = u'???'.encode(self.encoding)
-        with EncodingContext(self.encoding):
+        sep = u'???'.encode(cs)
+        with EncodingContext(cs):
             self.assertEqual(ref.strip(sep), u"du bl?")
 
-
 class DefaultDecodingLatin1(DefaultDecodingTestCase):
     encoding = "latin-1"
 
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -759,6 +759,62 @@
         }
     }
 
+    @Override
+    public PyObject __lt__(PyObject other) {
+        return unicode___lt__(other);
+    }
+
+    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___lt___doc)
+    final PyObject unicode___lt__(PyObject other) {
+        String s = coerceForComparison(other);
+        if (s == null) {
+            return null;
+        }
+        return getString().compareTo(s) < 0 ? Py.True : Py.False;
+    }
+
+    @Override
+    public PyObject __le__(PyObject other) {
+        return unicode___le__(other);
+    }
+
+    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___le___doc)
+    final PyObject unicode___le__(PyObject other) {
+        String s = coerceForComparison(other);
+        if (s == null) {
+            return null;
+        }
+        return getString().compareTo(s) <= 0 ? Py.True : Py.False;
+    }
+
+    @Override
+    public PyObject __gt__(PyObject other) {
+        return unicode___gt__(other);
+    }
+
+    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___gt___doc)
+    final PyObject unicode___gt__(PyObject other) {
+        String s = coerceForComparison(other);
+        if (s == null) {
+            return null;
+        }
+        return getString().compareTo(s) > 0 ? Py.True : Py.False;
+    }
+
+    @Override
+    public PyObject __ge__(PyObject other) {
+        return unicode___ge__(other);
+    }
+
+    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___ge___doc)
+    final PyObject unicode___ge__(PyObject other) {
+        String s = coerceForComparison(other);
+        if (s == null) {
+            return null;
+        }
+        return getString().compareTo(s) >= 0 ? Py.True : Py.False;
+    }
+
     @ExposedMethod(doc = BuiltinDocs.unicode___hash___doc)
     final int unicode___hash__() {
         return str___hash__();
@@ -960,8 +1016,10 @@
      * Interpret the object as a Java <code>String</code> for use in comparison. The return
      * represents characters as UTF-16. From a <code>PyUnicode</code> we return its internal string.
      * A <code>str</code> and <code>buffer</code> argument is decoded with the default encoding.
-     * Equivalent to {@link #coerceToStringOrNull(PyObject)} allowing only the types supported in
-     * (C)Python <code>unicode.__eq__</code>.
+     * <p>
+     * This method could be replaced by {@link #coerceToStringOrNull(PyObject)} if we were content
+     * to allowing a wider range of types to be supported in comparison operations than (C)Python
+     * <code>unicode.__eq__</code>.
      *
      * @param o the object to coerce
      * @return an equivalent <code>String</code>
@@ -1212,9 +1270,10 @@
      * {@link #coerceToUnicode(PyObject, boolean)}.
      *
      * @param o the object to coerce
+     * @param name of method
      * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
      */
-    private static PyUnicode coerceStripSepToUnicode(PyObject o) {
+    private static PyUnicode coerceStripSepToUnicode(PyObject o, String name) {
         if (o == null) {
             return null;
         } else if (o instanceof PyUnicode) {
@@ -1225,14 +1284,14 @@
         } else if (o == Py.None) {
             return null;
         } else {
-            throw Py.TypeError("strip arg must be None, unicode or str");
+            throw Py.TypeError(name + " arg must be None, unicode or str");
         }
     }
 
     @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_strip_doc)
     final PyObject unicode_strip(PyObject sepObj) {
 
-        PyUnicode sep = coerceStripSepToUnicode(sepObj);
+        PyUnicode sep = coerceStripSepToUnicode(sepObj, "strip");
 
         if (isBasicPlane()) {
             // this contains only basic plane characters
@@ -1253,7 +1312,7 @@
     @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_lstrip_doc)
     final PyObject unicode_lstrip(PyObject sepObj) {
 
-        PyUnicode sep = coerceStripSepToUnicode(sepObj);
+        PyUnicode sep = coerceStripSepToUnicode(sepObj, "lstrip");
 
         if (isBasicPlane()) {
             // this contains only basic plane characters
@@ -1273,7 +1332,7 @@
     @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_rstrip_doc)
     final PyObject unicode_rstrip(PyObject sepObj) {
 
-        PyUnicode sep = coerceStripSepToUnicode(sepObj);
+        PyUnicode sep = coerceStripSepToUnicode(sepObj, "rstrip");
 
         if (isBasicPlane()) {
             // this contains only basic plane characters

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Tue Nov 21 17:39:12 2017
From: jython-checkins at python.org (jeff.allen)
Date: Tue, 21 Nov 2017 22:39:12 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Accept_unicode_arguments_a?=
 =?utf-8?q?t_a_csv=2Ewriter_=28fixes_=232632=29=2E?=
Message-ID: <20171121223910.73736.142DE4EBB7A31077@mg.python.org>

https://hg.python.org/jython/rev/08978c4d1ab0
changeset:   8140:08978c4d1ab0
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Tue Nov 21 19:37:02 2017 +0000
summary:
  Accept unicode arguments at a csv.writer (fixes #2632).

The CPython csv.writer accepts unicode strings and encodes them using
the current default encoding. This is not documented, but we can easily
reproduce the behaviour, which is relied on by some users. A simple
test_csv_jy is added for UTF-8 default. We hide sys.setdefaultencoding
again after use since this otherwise causes test_site to fail. The same
fault is corrected, where it was latent in test_unicode_jy.

files:
  Lib/test/test_csv_jy.py                    |  96 ++++++++++
  Lib/test/test_unicode_jy.py                |   8 +-
  src/org/python/modules/_csv/PyDialect.java |  33 +-
  src/org/python/modules/_csv/PyWriter.java  |  48 ++--
  4 files changed, 145 insertions(+), 40 deletions(-)


diff --git a/Lib/test/test_csv_jy.py b/Lib/test/test_csv_jy.py
new file mode 100644
--- /dev/null
+++ b/Lib/test/test_csv_jy.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2017 Jython Developers
+
+# Additional csv module unit tests for Jython
+
+import csv
+import io
+import sys
+from tempfile import TemporaryFile
+from test import test_support
+import unittest
+
+# This test has been adapted from Python 3 test_csv.TestUnicode. In Python 3,
+# the csv module supports Unicode directly. In Python 2, it does not, except
+# that it is transparent to byte data. Many tools, however, accept UTF-8
+# encoded text in a CSV file.
+#
+class EncodingContext(object):
+    """Context manager to save and restore the encoding.
+
+    Use like this:
+
+        with EncodingContext("utf-8"):
+            self.assertEqual("'caf\xc3\xa9'", u"'caf\xe9'")
+    """
+
+    def __init__(self, encoding):
+        if not hasattr(sys, "setdefaultencoding"):
+            reload(sys)
+        self.original_encoding = sys.getdefaultencoding()
+        sys.setdefaultencoding(encoding)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *ignore_exc):
+        sys.setdefaultencoding(self.original_encoding)
+
+class TestUnicode(unittest.TestCase):
+
+    names = [u"Martin von L?wis",
+             u"Marc Andr? Lemburg",
+             u"Guido van Rossum",
+             u"Fran?ois Pinard",
+             u"????"]
+
+    def test_decode_read(self):
+        # The user code receives byte data and takes care of the decoding
+        with TemporaryFile("w+b") as fileobj:
+            line = u",".join(self.names) + u"\r\n"
+            fileobj.write(line.encode('utf-8'))
+            fileobj.seek(0)
+            reader = csv.reader(fileobj)
+            # The reader yields rows of byte strings that decode to the data
+            table = [[e.decode('utf-8') for e in row] for row in reader]
+            self.assertEqual(table, [self.names])
+
+    def test_encode_write(self):
+        # The user encodes unicode objects to byte data that csv writes
+        with TemporaryFile("w+b") as fileobj:
+            writer = csv.writer(fileobj)
+            # We present a row of encoded strings to the writer
+            writer.writerow([n.encode('utf-8') for n in self.names])
+            # We expect the file contents to be the UTF-8 of the csv data
+            expected = u",".join(self.names) + u"\r\n"
+            fileobj.seek(0)
+            self.assertEqual(fileobj.read().decode('utf-8'), expected)
+
+    def test_unicode_write(self):
+        # The user supplies unicode data that csv.writer default-encodes
+        # (undocumented feature relied upon by client code).
+        # See Issue #2632  https://github.com/jythontools/jython/issues/90
+        with TemporaryFile("w+b") as fileobj:
+            with EncodingContext('utf-8'):
+                writer = csv.writer(fileobj)
+                # We present a row of unicode strings to the writer
+                writer.writerow(self.names)
+                # We expect the file contents to be the UTF-8 of the csv data
+                expected = u",".join(self.names) + u"\r\n"
+                fileobj.seek(0)
+                self.assertEqual(fileobj.read().decode(), expected)
+
+
+def test_main():
+    # We'll be enabling sys.setdefaultencoding so remember to disable
+    had_set = hasattr(sys, "setdefaultencoding")
+    try:
+        test_support.run_unittest(
+            TestUnicode,
+        )
+    finally:
+        if not had_set:
+            delattr(sys, "setdefaultencoding")
+
+if __name__ == "__main__":
+    test_main()
diff --git a/Lib/test/test_unicode_jy.py b/Lib/test/test_unicode_jy.py
--- a/Lib/test/test_unicode_jy.py
+++ b/Lib/test/test_unicode_jy.py
@@ -1341,7 +1341,10 @@
 
 
 def test_main():
-    test_support.run_unittest(
+    # We'll be enabling sys.setdefaultencoding so remember to disable
+    had_set = hasattr(sys, "setdefaultencoding")
+    try:
+        test_support.run_unittest(
                 UnicodeTestCase,
                 UnicodeIndexMixTest,
                 UnicodeFormatTestCase,
@@ -1353,6 +1356,9 @@
                 DefaultDecodingUTF8,
                 DefaultDecodingCp850,
             )
+    finally:
+        if not had_set:
+            delattr(sys, "setdefaultencoding")
 
 
 if __name__ == "__main__":
diff --git a/src/org/python/modules/_csv/PyDialect.java b/src/org/python/modules/_csv/PyDialect.java
--- a/src/org/python/modules/_csv/PyDialect.java
+++ b/src/org/python/modules/_csv/PyDialect.java
@@ -1,4 +1,4 @@
-/* Copyright (c) Jython Developers */
+/* Copyright (c)2017 Jython Developers */
 package org.python.modules._csv;
 
 import org.python.core.ArgParser;
@@ -9,6 +9,7 @@
 import org.python.core.PyObject;
 import org.python.core.PyString;
 import org.python.core.PyType;
+import org.python.core.PyUnicode;
 import org.python.core.Untraversable;
 import org.python.expose.ExposedDelete;
 import org.python.expose.ExposedGet;
@@ -153,17 +154,21 @@
     private static char toChar(String name, PyObject src, char dflt) {
         if (src == null) {
             return dflt;
-        }
-        boolean isStr = Py.isInstance(src, PyString.TYPE);
-        if (src == Py.None || isStr && src.__len__() == 0) {
+        } else if (src == Py.None) {
             return '\0';
-        } else if (!isStr || src.__len__() != 1) {
-            throw Py.TypeError(String.format("\"%s\" must be an 1-character string", name));
+        } else if (src instanceof PyString) {
+            String s = (src instanceof PyUnicode) ? ((PyUnicode) src).encode() : src.toString();
+            if (s.length() == 0) {
+                return '\0';
+            } else if (s.length() == 1) {
+                return s.charAt(0);
+            }
         }
-        return src.toString().charAt(0);
+        // This is only going to work for BMP strings because of the char return type
+        throw Py.TypeError(String.format("\"%s\" must be a 1-character string", name));
     }
 
-    private static int toInt(String name, PyObject src, int dflt) {
+       private static int toInt(String name, PyObject src, int dflt) {
         if (src == null) {
             return dflt;
         }
@@ -176,14 +181,14 @@
     private static String toStr(String name, PyObject src, String dflt) {
         if (src == null) {
             return dflt;
-        }
-        if (src == Py.None) {
+        } else if (src == Py.None) {
             return null;
+        } else if (src instanceof PyUnicode) {
+            return ((PyUnicode) src).encode().toString();
+        } else if (src instanceof PyString) {
+            return src.toString();
         }
-        if (!(src instanceof PyBaseString)) {
-            throw Py.TypeError(String.format("\"%s\" must be an string", name));
-        }
-        return src.toString();
+        throw Py.TypeError(String.format("\"%s\" must be a string", name));
     }
 
     @ExposedGet(name = "escapechar")
diff --git a/src/org/python/modules/_csv/PyWriter.java b/src/org/python/modules/_csv/PyWriter.java
--- a/src/org/python/modules/_csv/PyWriter.java
+++ b/src/org/python/modules/_csv/PyWriter.java
@@ -1,4 +1,4 @@
-/* Copyright (c) Jython Developers */
+/* Copyright (c)2017 Jython Developers */
 package org.python.modules._csv;
 
 import org.python.core.Py;
@@ -7,6 +7,7 @@
 import org.python.core.PyObject;
 import org.python.core.PyString;
 import org.python.core.PyType;
+import org.python.core.PyUnicode;
 import org.python.core.Traverseproc;
 import org.python.core.Visitproc;
 import org.python.expose.ExposedType;
@@ -21,11 +22,9 @@
 @ExposedType(name = "_csv.writer", doc = PyWriter.writer_doc)
 public class PyWriter extends PyObject implements Traverseproc {
 
-    public static final String writer_doc =
-    "CSV writer\n" +
-    "\n" +
-    "Writer objects are responsible for generating tabular data\n" +
-    "in CSV format from sequence input.\n";
+    public static final String writer_doc = "CSV writer\n\n"//
+            + "Writer objects are responsible for generating tabular data\n"
+            + "in CSV format from sequence input.\n";
 
     public static final PyType TYPE = PyType.fromClass(PyWriter.class);
 
@@ -53,11 +52,10 @@
         this.dialect = dialect;
     }
 
-    public static PyString __doc__writerows = Py.newString(
-            "writerows(sequence of sequences)\n" +
-            "\n" +
-            "Construct and write a series of sequences to a csv file.  Non-string\n" +
-            "elements will be converted to string.");
+    public static PyString __doc__writerows = Py.newString(//
+            "writerows(sequence of sequences)\n\n"
+            + "Construct and write a series of sequences to a csv file.  Non-string\n"
+            + "elements will be converted to string.");
 
     public void writerows(PyObject seqseq) {
         writer_writerows(seqseq);
@@ -82,12 +80,10 @@
         }
     }
 
-    public static PyString __doc__writerow = Py.newString(
-            "writerow(sequence)\n" +
-            "\n" +
-            "Construct and write a CSV record from a sequence of fields.  Non-string\n" +
-            "elements will be converted to string."
-            );
+    public static PyString __doc__writerow = Py.newString(//
+            "writerow(sequence)\n\n"
+            + "Construct and write a CSV record from a sequence of fields.  Non-string\n"
+            + "elements will be converted to string.");
 
     public boolean writerow(PyObject seq) {
         return writer_writerow(seq);
@@ -134,14 +130,17 @@
                     quoted = false;
             }
 
-            if (field instanceof PyString) {
+            if (field instanceof PyUnicode) {
+                // Unicode fields get the default encoding (must yield U16 bytes).
+                append_ok = join_append(((PyString) field).encode(), len == 1);
+            } else if (field instanceof PyString) {
+                // Not unicode, so must be U16 bytes.
                 append_ok = join_append(field.toString(), len == 1);
             } else if (field == Py.None) {
                 append_ok = join_append("", len == 1);
             } else {
                 PyObject str;
-                //XXX: in 3.x this check can go away and we can just always use
-                //     __str__
+                // XXX: in 3.x this check can go away and we can just always use __str__
                 if (field.getClass() == PyFloat.class) {
                     str = field.__repr__();
                 } else {
@@ -195,9 +194,9 @@
     }
 
     /**
-     * This method behaves differently depending on the value of copy_phase: if copy_phase
-     * is false, then the method determines the new record length. If copy_phase is true
-     * then the new field is appended to the record.
+     * This method behaves differently depending on the value of copy_phase: if copy_phase is false,
+     * then the method determines the new record length. If copy_phase is true then the new field is
+     * appended to the record.
      */
     private int join_append_data(String field, boolean quote_empty, boolean copy_phase) {
         int i;
@@ -225,7 +224,7 @@
                 break;
             }
             if (c == dialect.delimiter || c == dialect.escapechar || c == dialect.quotechar
-                || dialect.lineterminator.indexOf(c) > -1) {
+                    || dialect.lineterminator.indexOf(c) > -1) {
                 if (dialect.quoting == QuoteStyle.QUOTE_NONE) {
                     want_escape = true;
                 } else {
@@ -282,7 +281,6 @@
         rec_len++;
     }
 
-
     /* Traverseproc implementation */
     @Override
     public int traverse(Visitproc visit, Object arg) {

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Thu Nov 23 13:12:38 2017
From: jython-checkins at python.org (jeff.allen)
Date: Thu, 23 Nov 2017 18:12:38 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Added_tag_v2=2E7=2E2a1_for?=
 =?utf-8?q?_changeset_dfc49bafbe79?=
Message-ID: <20171123181156.66881.68987DC9FA21C87F@mg.python.org>

https://hg.python.org/jython/rev/fb0952d97b20
changeset:   8142:fb0952d97b20
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Thu Nov 23 18:10:51 2017 +0000
summary:
  Added tag v2.7.2a1 for changeset dfc49bafbe79

files:
  .hgtags |  1 +
  1 files changed, 1 insertions(+), 0 deletions(-)


diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -109,3 +109,4 @@
 a5a06c9efdb6dd361d5f5c5c1ef07c2ac802e2e0 v2.7.1rc3
 b6e989b788d563b8ecb0c0458ab486fca8d128d6 v2.7.1rc3
 dd7e191d4c90d9f5d5fe8f0840f186697ecf272a v2.7.1
+dfc49bafbe79566bd54c8d417829e001ff2316ea v2.7.2a1

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Thu Nov 23 13:12:50 2017
From: jython-checkins at python.org (jeff.allen)
Date: Thu, 23 Nov 2017 18:12:50 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Update_version_reported_to?=
 =?utf-8?q?_2=2E7=2E2a1?=
Message-ID: <20171123181156.66542.11914A562FC65202@mg.python.org>

https://hg.python.org/jython/rev/dfc49bafbe79
changeset:   8141:dfc49bafbe79
tag:         v2.7.2a1
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Thu Nov 23 18:08:23 2017 +0000
summary:
  Update version reported to 2.7.2a1

files:
  NEWS       |   5 ++++-
  README.txt |  19 +++++++------------
  build.xml  |  10 +++++-----
  3 files changed, 16 insertions(+), 18 deletions(-)


diff --git a/NEWS b/NEWS
--- a/NEWS
+++ b/NEWS
@@ -2,8 +2,11 @@
 
 For more details, please see https://hg.python.org/jython
 
-Development tip
+Jython 2.7.2a1
   Bugs fixed
+    - [ 2632 ] Handle unicode data appropriately in csv module
+    - [ 2638 ] str not default-decoded in str-unicode operations
+    - [ 2622 ] json dumps error (use of AbstractDict)
     - [ 2607, 2620 ] Error loading Python DLL (error code 14001)
     - [ 2612 ] NPE while trying to load class
     - [ 2609 ] PyType.fromClass publication race (discovered in strptime and re)
diff --git a/README.txt b/README.txt
--- a/README.txt
+++ b/README.txt
@@ -1,8 +1,8 @@
 Jython: Python for the Java Platform
 
-Welcome to Jython 2.7.1!
+Welcome to Jython 2.7.2a1.
 
-This is the final release of the 2.7.1 version of Jython. Along with
+This is an alpha release of the 2.7.2 version of Jython. Along with
 language and runtime compatibility with CPython 2.7, Jython 2.7
 provides substantial support of the Python ecosystem. This includes
 built-in support of pip/setuptools (you can use with bin/pip) and a
@@ -31,13 +31,8 @@
 
 See ACKNOWLEDGMENTS for details about Jython's copyright, license,
 contributors, and mailing lists; and NEWS for detailed release notes,
-including bugs fixed, backwards breaking changes, and new
-features. Thanks go to Google for sponsoring Stefan Richthofer for the
-Google Summer of Code; there are so many others to thank, but Stefan's
-work proved instrumental for getting 2.7.1 out, all in preparation for
-his actual work on JyNI for the summer of 2017
-(http://jyni.org/). Motivation helps! We also deeply thank all who
-contribute to Jython, including - but not limited to - bug reports,
-patches, pull requests, documentation changes, support emails, and
-fantastic conversation on Freenode at #jython. Join us there for your
-questions and answers!
+including bugs fixed, backwards breaking changes, and new features. We
+sincerely thank all who contribute to Jython, including - but not
+limited to - bug reports, patches, pull requests, documentation
+changes, support emails, and fantastic conversation on Freenode at
+#jython. Join us there for your questions and answers!
diff --git a/build.xml b/build.xml
--- a/build.xml
+++ b/build.xml
@@ -84,15 +84,15 @@
         <property name="PY_RELEASE_LEVEL_SNAPSHOT" value="170"/> <!-- 0xAA -->
 
         <!-- The current version info -->
-        <property name="jython.version" value="2.7.1"/>
-        <property name="jython.version.noplus" value="2.7.1"/>
+        <property name="jython.version" value="2.7.2a1"/>
+        <property name="jython.version.noplus" value="2.7.2a1"/>
         <property name="jython.major_version" value="2"/>
         <property name="jython.minor_version" value="7"/>
-        <property name="jython.micro_version" value="1"/>
-        <property name="jython.release_level" value="${PY_RELEASE_LEVEL_FINAL}"/>
+        <property name="jython.micro_version" value="2"/>
+        <property name="jython.release_level" value="${PY_RELEASE_LEVEL_ALPHA}"/>
         <!-- Usually zero, only used for alpha, beta and candidate versions
              where it must be greater than zero. -->
-        <property name="jython.release_serial" value="0"/>
+        <property name="jython.release_serial" value="1"/>
         <property name="jython.java.version" value="1.7"/>
 
         <condition property="do.snapshot.build">

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Thu Nov 23 13:32:22 2017
From: jython-checkins at python.org (jeff.allen)
Date: Thu, 23 Nov 2017 18:32:22 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Identify_as_2=2E7=2E2a1+?=
Message-ID: <20171123183217.45101.14C71771D63386D7@mg.python.org>

https://hg.python.org/jython/rev/dd42e3dc8b05
changeset:   8143:dd42e3dc8b05
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Thu Nov 23 18:31:14 2017 +0000
summary:
  Identify as 2.7.2a1+

I *think* this is what we do to identify that the current (dev) version
is 2.7.2a1 plus some changes, not the one released as 2.7.2a1.

files:
  build.xml |  2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)


diff --git a/build.xml b/build.xml
--- a/build.xml
+++ b/build.xml
@@ -84,7 +84,7 @@
         <property name="PY_RELEASE_LEVEL_SNAPSHOT" value="170"/> <!-- 0xAA -->
 
         <!-- The current version info -->
-        <property name="jython.version" value="2.7.2a1"/>
+        <property name="jython.version" value="2.7.2a1+"/>
         <property name="jython.version.noplus" value="2.7.2a1"/>
         <property name="jython.major_version" value="2"/>
         <property name="jython.minor_version" value="7"/>

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Sun Nov 26 03:07:59 2017
From: jython-checkins at python.org (jeff.allen)
Date: Sun, 26 Nov 2017 08:07:59 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Add_a_test_for_PyShadowStr?=
 =?utf-8?q?ing_matching?=
Message-ID: <20171126080659.74384.148FBACAF27D897E@mg.python.org>

https://hg.python.org/jython/rev/30a70b0ac355
changeset:   8144:30a70b0ac355
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sun Nov 26 07:16:26 2017 +0000
summary:
  Add a test for PyShadowString matching

files:
  Lib/test/test_shadowstr_jy.py |  120 ++++++++++++++++++++++
  1 files changed, 120 insertions(+), 0 deletions(-)


diff --git a/Lib/test/test_shadowstr_jy.py b/Lib/test/test_shadowstr_jy.py
new file mode 100644
--- /dev/null
+++ b/Lib/test/test_shadowstr_jy.py
@@ -0,0 +1,120 @@
+# Made for Jython
+
+# Tests of built-in type shadowstr
+
+import os
+import sys
+from test import string_tests
+from test.test_support import run_unittest, is_jython
+from test.test_str import StrTest
+import unittest
+
+from org.python.core import PyShadowString
+
+# Ideally we would test shadowstr is a str but the tests need to sub-class it
+#
+# class StrTestCase( # Should pass all tests for str
+#     string_tests.CommonTest,
+#     string_tests.MixinStrUnicodeUserStringTest,
+#     string_tests.MixinStrUserStringTest,
+#     string_tests.MixinStrUnicodeTest,
+#     ):
+#
+#     type2test = PyShadowString
+
+
+class ShadowStrTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self.ss = PyShadowString("hello", "bonjour")
+
+    def check_first_eq(self):
+        self.assertTrue(self.ss == "hello")
+        self.assertFalse(self.ss == "bonjour")
+
+    def check_both_eq(self):
+        self.assertTrue(self.ss == "hello")
+        self.assertTrue(self.ss == "bonjour")
+
+    def test_eq(self):
+        # Test recognition unconditionally
+        self.check_first_eq()
+        self.ss.addtarget(None) # match any
+        self.check_both_eq()
+
+    def test_eq_class(self):
+        # Test recognition of class context only
+        self.check_first_eq()
+        # The Java class of a python module may be <module>$py
+        self.ss.addtarget(r"test\.test_shadowstr_jy\$py") # class only
+        # Or it may be org.python.pycode._pyx<n>
+        self.ss.addtarget(r"org\.python\.pycode\._pyx\d+") # class only
+        self.check_both_eq()
+
+    def test_eq_method(self):
+        # Test recognition of method context only
+        self.check_first_eq()
+        # The Java method name of a python function is name$<n>
+        self.ss.addtarget(None, r"test_eq_method\$\d+") # method only
+        self.check_both_eq()
+
+    def test_eq_class_method(self):
+        # Test recognition of class and method context
+        self.check_first_eq()
+        # Match this method in this module
+        self.ss.addtarget(r"test\.test_shadowstr_jy\$py", # class
+                          r"test_eq_class_method\$\d+") # method
+        self.ss.addtarget(r"org\.python\.pycode\._pyx\d+", # class
+                          r"test_eq_class_method\$\d+") # method
+        self.check_both_eq()
+
+    def check_first_startswith(self):
+        self.assertTrue(self.ss.startswith("hel"))
+        self.assertFalse(self.ss.startswith("bon"))
+
+    def check_both_startswith(self):
+        self.assertTrue(self.ss.startswith("hel"))
+        self.assertTrue(self.ss.startswith("bon"))
+
+    def test_startswith(self):
+        # Test recognition unconditionally
+        self.check_first_startswith()
+        self.ss.addtarget(None) # match any
+        self.check_both_startswith()
+
+    def test_startswith_class(self):
+        # Test recognition of class context only
+        self.check_first_startswith()
+        # The Java class of a python module may be <module>$py
+        self.ss.addtarget(r"test\.test_shadowstr_jy\$py") # class only
+        # Or it may be org.python.pycode._pyx<n>
+        self.ss.addtarget(r"org\.python\.pycode\._pyx\d+") # class only
+        self.check_both_startswith()
+
+    def test_startswith_method(self):
+        # Test recognition of method context only
+        self.check_first_startswith()
+        # The Java method name of a python function is name$<n>
+        self.ss.addtarget(None, r"test_startswith_method\$\d+") # method only
+        self.check_both_startswith()
+
+    def test_startswith_class_method(self):
+        # Test recognition of class and method context
+        self.check_first_startswith()
+        # Match this method in this module
+        self.ss.addtarget(r"test\.test_shadowstr_jy\$py", # class
+                          r"test_startswith_class_method\$\d+") # method
+        self.ss.addtarget(r"org\.python\.pycode\._pyx\d+", # class
+                          r"test_startswith_class_method\$\d+") # method
+        self.check_both_startswith()
+
+
+def test_main():
+    run_unittest(
+            #StrTestCase,
+            ShadowStrTestCase,
+        )
+
+
+if __name__ == "__main__":
+    test_main()

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Sun Nov 26 13:10:07 2017
From: jython-checkins at python.org (jeff.allen)
Date: Sun, 26 Nov 2017 18:10:07 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Correct_use_of_encoding_in?=
 =?utf-8?q?_test=5Fos=5Fjy_test_of_getcwd=2E_Fixes_=232646=2E?=
Message-ID: <20171126181006.66292.F4BA18626E486DC2@mg.python.org>

https://hg.python.org/jython/rev/320911f1aeba
changeset:   8145:320911f1aeba
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sun Nov 26 17:19:57 2017 +0000
summary:
  Correct use of encoding in test_os_jy test of getcwd. Fixes #2646.

All information is in the FS encoding, so decode() was spurious, and
only passed because of a false conception of == between str and unicode,
corrected in #2630. We re-enable the test for Windows, as it keeps
catching us out, add a similar test for getcwdu, and beef up comments.

files:
  Lib/test/regrtest.py   |   2 +-
  Lib/test/test_os_jy.py |  26 ++++++++++++++++++++------
  2 files changed, 21 insertions(+), 7 deletions(-)


diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -1370,7 +1370,7 @@
     'java.nt':     # Expected to fail on Windows
         """
         test_mailbox           # fails miserably and ruins other tests
-        test_os_jy             # Locale tests fail on Cygwin (but not Windows)
+        # test_os_jy             # Locale tests fail on Cygwin (but not Windows)
         # test_popen             # Passes, but see http://bugs.python.org/issue1559298
         test_select_new        # Hangs (Windows), though ok run singly
         test_urllib2           # file not on local host (likely Windows only)
diff --git a/Lib/test/test_os_jy.py b/Lib/test/test_os_jy.py
--- a/Lib/test/test_os_jy.py
+++ b/Lib/test/test_os_jy.py
@@ -231,16 +231,30 @@
                             'sys.stdout.write(os.getenv("TEST_HOME"))'],
                     stdout=subprocess.PIPE,
                     env=newenv)
-            # Decode with default encoding utf-8 (because ... ?)
+            # Decode with FS encoding used by subprocess communication
             self.assertEqual(p.stdout.read().decode('utf-8'), expected)
 
     def test_getcwd(self):
         with test_support.temp_cwd(name=u"tempcwd-??") as temp_cwd:
-            p = subprocess.Popen([sys.executable, "-c",
-                                  'import sys,os;' \
-                                  'sys.stdout.write(os.getcwd().encode("utf-8"))'],
-                                 stdout=subprocess.PIPE)
-            self.assertEqual(p.stdout.read().decode("utf-8"), temp_cwd)
+            # os.getcwd reports the working directory as an FS-encoded str,
+            # which is also the encoding used in subprocess communication.
+            p = subprocess.Popen([
+                    sys.executable, "-c",
+                    'import sys,os;' \
+                    'sys.stdout.write(os.getcwd())'],
+                stdout=subprocess.PIPE)
+            self.assertEqual(p.stdout.read(), temp_cwd)
+
+    def test_getcwdu(self):
+        with test_support.temp_cwd(name=u"tempcwd-??") as temp_cwd:
+            # os.getcwdu reports the working directory as unicode,
+            # which must be encoded for subprocess communication.
+            p = subprocess.Popen([
+                    sys.executable, "-c",
+                    'import sys,os;' \
+                    'sys.stdout.write(os.getcwdu().encode(sys.getfilesystemencoding()))'],
+                stdout=subprocess.PIPE)
+            self.assertEqual(p.stdout.read(), temp_cwd)
 
     def test_listdir(self):
         # It is hard to avoid Unicode paths on systems like OS X. Use relative

-- 
Repository URL: https://hg.python.org/jython