From jython-checkins at python.org Wed Sep 2 01:37:42 2015 From: jython-checkins at python.org (jim.baker) Date: Tue, 01 Sep 2015 23:37:42 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Next_method_resolution_for_?= =?utf-8?q?super_on_=5F=5Finit=5F=5F_should_call_the?= Message-ID: <20150901233742.24929.32437@psf.io> https://hg.python.org/jython/rev/406ca703d7d3 changeset: 7718:406ca703d7d3 user: Darjus Loktevic date: Tue Sep 01 17:37:34 2015 -0600 summary: Next method resolution for super on __init__ should call the corresponding Java constructor when the MRO contains a Java type proxy. Fixes http://bugs.jython.org/issue2375 files: Lib/test/test_java_subclasses.py | 21 ++++++++++++++++++++ src/org/python/core/PyType.java | 4 ++- 2 files changed, 24 insertions(+), 1 deletions(-) diff --git a/Lib/test/test_java_subclasses.py b/Lib/test/test_java_subclasses.py --- a/Lib/test/test_java_subclasses.py +++ b/Lib/test/test_java_subclasses.py @@ -14,6 +14,7 @@ from java.awt import Color, Component, Dimension, Rectangle from javax.swing import ComboBoxModel, ListModel from javax.swing.table import AbstractTableModel +from javax.swing.tree import DefaultTreeModel, DefaultMutableTreeNode from org.python.tests import BeanInterface, Callbacker, Coercions, OwnMethodCaller from javatests import ( @@ -625,6 +626,25 @@ self.assertFalse(thread.isAlive()) +class OldAndNewStyleInitSuperTest(unittest.TestCase): + """ + http://bugs.jython.org/issue2375 + """ + + def test_new_style_init(self): + class AModel(DefaultTreeModel): + def __init__(self): + super(AModel, self).__init__(DefaultMutableTreeNode()) + + AModel() + + def test_old_style_init(self): + class AModel(DefaultTreeModel): + def __init__(self): + DefaultTreeModel.__init__(self, DefaultMutableTreeNode()) + + AModel() + def test_main(): test_support.run_unittest( InterfaceTest, @@ -636,6 +656,7 @@ MetaClassTest, AbstractMethodTest, SuperIsSuperTest, + OldAndNewStyleInitSuperTest, HierarchyTest, ChooseCorrectToJavaTest) diff --git a/src/org/python/core/PyType.java b/src/org/python/core/PyType.java --- a/src/org/python/core/PyType.java +++ b/src/org/python/core/PyType.java @@ -1303,7 +1303,9 @@ // So break out of this infinite loop by ignoring this entry for super purposes. // The use of super__ parallels the workaround seen in PyReflectedFunction // Fixes http://bugs.jython.org/issue1540 - if(!name.startsWith("super__")) { + // Also ignore this if we're doing super during __init__ as we want it to behave + // Fixes http://bugs.jython.org/issue2375 + if(name != "__init__" && !name.startsWith("super__")) { lookupName = "super__" + name; } else { lookupName = name; -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Thu Sep 3 14:43:24 2015 From: jython-checkins at python.org (stefan.richthofer) Date: Thu, 03 Sep 2015 12:43:24 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Moved_getJarFileName=28=29_?= =?utf-8?q?to_public_API_and_solved_Issue_2386=2E?= Message-ID: <20150903124321.101496.77806@psf.io> https://hg.python.org/jython/rev/9bb29fb686d2 changeset: 7719:9bb29fb686d2 user: Stefan Richthofer date: Thu Sep 03 14:42:46 2015 +0200 summary: Moved getJarFileName() to public API and solved Issue 2386. files: Lib/distutils/sysconfig.py | 16 +- lib-python/2.7/distutils/command/build.py | 6 +- src/org/python/core/Py.java | 155 +++++++++- src/org/python/core/PySystemState.java | 66 +---- tests/java/org/python/core/PySystemStateTest.java | 20 +- 5 files changed, 181 insertions(+), 82 deletions(-) diff --git a/Lib/distutils/sysconfig.py b/Lib/distutils/sysconfig.py --- a/Lib/distutils/sysconfig.py +++ b/Lib/distutils/sysconfig.py @@ -15,6 +15,7 @@ import re import string import sys +from org.python.core import Py from distutils.errors import DistutilsPlatformError @@ -25,7 +26,14 @@ # Path to the base directory of the project. On Windows the binary may # live in project/PCBuild9. If we're dealing with an x64 Windows build, # it'll live in project/PCbuild/amd64. -project_base = os.path.dirname(os.path.realpath(sys.executable)) + +def getJythonBinDir(): + if not sys.executable is None: + return os.path.dirname(os.path.realpath(sys.executable)) + else: + return Py.getDefaultBinDir() + +project_base = getJythonBinDir() if os.name == "nt" and "pcbuild" in project_base[-8:].lower(): project_base = os.path.abspath(os.path.join(project_base, os.path.pardir)) # PC/VS7.1 @@ -74,7 +82,7 @@ if os.name == "posix": if python_build: - buildir = os.path.dirname(os.path.realpath(sys.executable)) + buildir = getJythonBinDir() if plat_specific: # python.h is located in the buildir inc_dir = buildir @@ -222,7 +230,7 @@ def get_makefile_filename(): """Return full pathname of installed Makefile from the Python build.""" if python_build: - return os.path.join(os.path.dirname(os.path.realpath(sys.executable)), + return os.path.join(getJythonBinDir(), "Makefile") lib_dir = get_python_lib(plat_specific=1, standard_lib=1) return os.path.join(lib_dir, "config", "Makefile") @@ -460,7 +468,7 @@ g['SO'] = '.pyd' g['EXE'] = ".exe" g['VERSION'] = get_python_version().replace(".", "") - g['BINDIR'] = os.path.dirname(os.path.realpath(sys.executable)) + g['BINDIR'] = getJythonBinDir() global _config_vars _config_vars = g diff --git a/lib-python/2.7/distutils/command/build.py b/lib-python/2.7/distutils/command/build.py --- a/lib-python/2.7/distutils/command/build.py +++ b/lib-python/2.7/distutils/command/build.py @@ -115,7 +115,11 @@ 'scripts-' + sys.version[0:3]) if self.executable is None: - self.executable = os.path.normpath(sys.executable) + if not sys.executable is None: + self.executable = os.path.normpath(sys.executable) + else: + from org.python.core import Py + self.executable = Py.getDefaultExecutableName() def run(self): # Run all relevant sub-commands. This will be some subset of: diff --git a/src/org/python/core/Py.java b/src/org/python/core/Py.java --- a/src/org/python/core/Py.java +++ b/src/org/python/core/Py.java @@ -15,6 +15,8 @@ import java.io.StreamCorruptedException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.net.URL; +import java.net.URLDecoder; import java.sql.Date; import java.sql.Time; import java.sql.Timestamp; @@ -29,8 +31,8 @@ import jnr.constants.platform.Errno; import jnr.posix.POSIX; import jnr.posix.POSIXFactory; +import jnr.posix.util.Platform; -import jnr.posix.util.Platform; import org.python.antlr.base.mod; import org.python.core.adapter.ClassicPyObjectAdapter; import org.python.core.adapter.ExtensiblePyObjectAdapter; @@ -2301,6 +2303,155 @@ } return objs.toArray(Py.EmptyObjects); } + + /** + * Infers the usual Jython executable name from the position of the + * jar-file returned by {@link #getJarFileName()} by replacing the + * file name with "bin/jython". This is intended as an easy fallback + * for cases where {@code sys.executable} is {@code None} due to + * direct launching via the java executable.
+ * Note that this does not necessarily return the actual executable, + * but instead infers the place where it is usually expected to be. + * Use {@code sys.executable} to get the actual executable (may be + * {@code None}. + * + * In contrast to {@link #getJarFileName()} and + * {@link #getJarFileNameFromURL(java.net.URL)} this method returns + * the path using system-specific separator characters. + * + * @return usual Jython-executable as absolute path + */ + public static String getDefaultExecutableName() { + return getDefaultBinDir()+File.separator+( + Platform.IS_WINDOWS ? "jython.exe" : "jython"); + } + + /** + * Infers the usual Jython bin-dir from the position of the jar-file + * returned by {@link #getJarFileName()} byr replacing the file name + * with "bin". This is intended as an easy fallback for cases where + * {@code sys.executable} is {@code null} due to direct launching via + * the java executable.
+ * Note that this does not necessarily return the actual bin-directory, + * but instead infers the place where it is usually expected to be. + * + * In contrast to {@link #getJarFileName()} and + * {@link #getJarFileNameFromURL(java.net.URL)} this method returns + * the path using system-specific separator characters. + * + * @return usual Jython bin-dir as absolute path + */ + public static String getDefaultBinDir() { + String jar = _getJarFileName(); + if (File.separatorChar != '/') { + jar = jar.replace('/', File.separatorChar); + } + int start = 0; + if (Platform.IS_WINDOWS && jar.startsWith(File.separator)) { + ++start; + } + return jar.substring(start, jar.lastIndexOf(File.separatorChar)+1)+"bin"; + } + + /** + * Utility-method to obtain the name (including absolute path) of the currently used + * jython-jar-file. Usually this is jython.jar, but can also be jython-dev.jar or + * jython-standalone.jar or something custom. + * + * @return the full name of the jar file containing this class, null + * if not available. + */ + public static String getJarFileName() { + String jar = _getJarFileName(); + if (File.separatorChar != '/') { + jar = jar.replace('/', File.separatorChar); + } + int start = 0; + if (Platform.IS_WINDOWS && jar.startsWith(File.separator)) { + ++start; + } + return jar.substring(start); + } + + /** + * Utility-method to obtain the name (including absolute path) of the currently used + * jython-jar-file. Usually this is jython.jar, but can also be jython-dev.jar or + * jython-standalone.jar or something custom. + * + * Note that it does not use system-specific seperator-chars, but always + * '/'. Also, on windows it might prepend a '/' before the drive-letter. (Is this a bug?) + * + * @return the full name of the jar file containing this class, null + * if not available. + */ + protected static String _getJarFileName() { + Class thisClass = Py.class; + String fullClassName = thisClass.getName(); + String className = fullClassName.substring(fullClassName.lastIndexOf(".") + 1); + URL url = thisClass.getResource(className + ".class"); + return getJarFileNameFromURL(url); + } + + /**exclusively used by {@link #getJarFileNameFromURL(java.net.URL)}.*/ + private static final String JAR_URL_PREFIX = "jar:file:"; + /**exclusively used by {@link #getJarFileNameFromURL(java.net.URL)}.*/ + private static final String JAR_SEPARATOR = "!"; + /**exclusively used by {@link #getJarFileNameFromURL(java.net.URL)}.*/ + private static final String VFSZIP_PREFIX = "vfszip:"; + /**exclusively used by {@link #getJarFileNameFromURL(java.net.URL)}.*/ + private static final String VFS_PREFIX = "vfs:"; + + /** + * Converts a url that points to a jar-file to the actual jar-file name. + * Note that it does not use system-specific seperator-chars, but always + * '/'. Also, on windows it might prepend a '/' before the drive-letter. + */ + public static String getJarFileNameFromURL(URL url) { + String jarFileName = null; + if (url != null) { + try { + // escape plus signs, since the URLDecoder would turn them into spaces + final String plus = "\\+"; + final String escapedPlus = "__ppluss__"; + String rawUrl = url.toString(); + rawUrl = rawUrl.replaceAll(plus, escapedPlus); + String urlString = URLDecoder.decode(rawUrl, "UTF-8"); + urlString = urlString.replaceAll(escapedPlus, plus); + int jarSeparatorIndex = urlString.lastIndexOf(JAR_SEPARATOR); + if (urlString.startsWith(JAR_URL_PREFIX) && jarSeparatorIndex > 0) { + // jar:file:/install_dir/jython.jar!/org/python/core/PySystemState.class + jarFileName = urlString.substring(JAR_URL_PREFIX.length(), jarSeparatorIndex); + } else if (urlString.startsWith(VFSZIP_PREFIX)) { + // vfszip:/some/path/jython.jar/org/python/core/PySystemState.class + final String path = Py.class.getName().replace('.', '/'); + int jarIndex = urlString.indexOf(".jar/".concat(path)); + if (jarIndex > 0) { + jarIndex += 4; + int start = VFSZIP_PREFIX.length(); + if (Platform.IS_WINDOWS) { + // vfszip:/C:/some/path/jython.jar/org/python/core/PySystemState.class + start++; + } + jarFileName = urlString.substring(start, jarIndex); + } + } else if (urlString.startsWith(VFS_PREFIX)) { + // vfs:/some/path/jython.jar/org/python/core/PySystemState.class + final String path = Py.class.getName().replace('.', '/'); + int jarIndex = urlString.indexOf(".jar/".concat(path)); + if (jarIndex > 0) { + jarIndex += 4; + int start = VFS_PREFIX.length(); + if (Platform.IS_WINDOWS) { + // vfs:/C:/some/path/jython.jar/org/python/core/PySystemState.class + start++; + } + jarFileName = urlString.substring(start, jarIndex); + } + } + } catch (Exception e) {} + } + return jarFileName; + } } class FixedFileWrapper extends StdoutWrapper { @@ -2407,7 +2558,7 @@ } /** - * A function object wrapper for a java method which comply with the + * A function object wrapper for a java method that complies with the * PyArgsKeywordsCall standard. */ @Untraversable diff --git a/src/org/python/core/PySystemState.java b/src/org/python/core/PySystemState.java --- a/src/org/python/core/PySystemState.java +++ b/src/org/python/core/PySystemState.java @@ -64,11 +64,6 @@ public static final String JYTHON_JAR = "jython.jar"; public static final String JYTHON_DEV_JAR = "jython-dev.jar"; - private static final String JAR_URL_PREFIX = "jar:file:"; - private static final String JAR_SEPARATOR = "!"; - private static final String VFSZIP_PREFIX = "vfszip:"; - private static final String VFS_PREFIX = "vfs:"; - public static final PyString version = new PyString(Version.getVersion()); public static final PyTuple subversion = new PyTuple(new PyString("Jython"), Py.newString(""), @@ -1036,7 +1031,7 @@ initialized = true; Py.setAdapter(adapter); boolean standalone = false; - String jarFileName = getJarFileName(); + String jarFileName = Py._getJarFileName(); if (jarFileName != null) { standalone = isStandalone(jarFileName); } @@ -1343,65 +1338,6 @@ return standalone; } - /** - * @return the full name of the jar file containing this class, null if not - * available. - */ - private static String getJarFileName() { - Class thisClass = PySystemState.class; - String fullClassName = thisClass.getName(); - String className = fullClassName.substring(fullClassName.lastIndexOf(".") + 1); - URL url = thisClass.getResource(className + ".class"); - return getJarFileNameFromURL(url); - } - - protected static String getJarFileNameFromURL(URL url) { - String jarFileName = null; - if (url != null) { - try { - // escape plus signs, since the URLDecoder would turn them into spaces - final String plus = "\\+"; - final String escapedPlus = "__ppluss__"; - String rawUrl = url.toString(); - rawUrl = rawUrl.replaceAll(plus, escapedPlus); - String urlString = URLDecoder.decode(rawUrl, "UTF-8"); - urlString = urlString.replaceAll(escapedPlus, plus); - int jarSeparatorIndex = urlString.lastIndexOf(JAR_SEPARATOR); - if (urlString.startsWith(JAR_URL_PREFIX) && jarSeparatorIndex > 0) { - // jar:file:/install_dir/jython.jar!/org/python/core/PySystemState.class - jarFileName = urlString.substring(JAR_URL_PREFIX.length(), jarSeparatorIndex); - } else if (urlString.startsWith(VFSZIP_PREFIX)) { - // vfszip:/some/path/jython.jar/org/python/core/PySystemState.class - final String path = PySystemState.class.getName().replace('.', '/'); - int jarIndex = urlString.indexOf(".jar/".concat(path)); - if (jarIndex > 0) { - jarIndex += 4; - int start = VFSZIP_PREFIX.length(); - if (Platform.IS_WINDOWS) { - // vfszip:/C:/some/path/jython.jar/org/python/core/PySystemState.class - start++; - } - jarFileName = urlString.substring(start, jarIndex); - } - } else if (urlString.startsWith(VFS_PREFIX)) { - // vfs:/some/path/jython.jar/org/python/core/PySystemState.class - final String path = PySystemState.class.getName().replace('.', '/'); - int jarIndex = urlString.indexOf(".jar/".concat(path)); - if (jarIndex > 0) { - jarIndex += 4; - int start = VFS_PREFIX.length(); - if (Platform.IS_WINDOWS) { - // vfs:/C:/some/path/jython.jar/org/python/core/PySystemState.class - start++; - } - jarFileName = urlString.substring(start, jarIndex); - } - } - } catch (Exception e) {} - } - return jarFileName; - } - private static void addPaths(PyList path, String pypath) { StringTokenizer tok = new StringTokenizer(pypath, java.io.File.pathSeparator); while (tok.hasMoreTokens()) { diff --git a/tests/java/org/python/core/PySystemStateTest.java b/tests/java/org/python/core/PySystemStateTest.java --- a/tests/java/org/python/core/PySystemStateTest.java +++ b/tests/java/org/python/core/PySystemStateTest.java @@ -13,19 +13,19 @@ public void testGetJarFileNameFromURL() throws Exception { // null - assertNull(PySystemState.getJarFileNameFromURL(null)); + assertNull(Py.getJarFileNameFromURL(null)); // plain jar url String urlString = "jar:file:/some_dir/some.jar!/a/package/with/A.class"; URL url = new URL(urlString); - assertEquals("/some_dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("/some_dir/some.jar", Py.getJarFileNameFromURL(url)); // jar url to decode urlString = "jar:file:/some%20dir/some.jar!/a/package/with/A.class"; url = new URL(urlString); - assertEquals("/some dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("/some dir/some.jar", Py.getJarFileNameFromURL(url)); // jar url with + signs to escape urlString = "jar:file:/some+dir/some.jar!/a/package/with/A.class"; url = new URL(urlString); - assertEquals("/some+dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("/some+dir/some.jar", Py.getJarFileNameFromURL(url)); } public void testGetJarFileNameFromURL_jboss() throws Exception { @@ -41,33 +41,33 @@ url = new URL(protocol, host, port, file, handler); // tests with jboss on windows gave URL's like this: assertEquals("vfszip:/C:/some_dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("C:/some_dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("C:/some_dir/some.jar", Py.getJarFileNameFromURL(url)); // jboss url to decode file = "/C:/some%20dir/some.jar/org/python/core/PySystemState.class"; url = new URL(protocol, host, port, file, handler); assertEquals("vfszip:/C:/some%20dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("C:/some dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("C:/some dir/some.jar", Py.getJarFileNameFromURL(url)); // jboss url with + to escape file = "/C:/some+dir/some.jar/org/python/core/PySystemState.class"; url = new URL(protocol, host, port, file, handler); assertEquals("vfszip:/C:/some+dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("C:/some+dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("C:/some+dir/some.jar", Py.getJarFileNameFromURL(url)); } else { // plain jboss url file = "/some_dir/some.jar/org/python/core/PySystemState.class"; url = new URL(protocol, host, port, file, handler); assertEquals("vfszip:/some_dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("/some_dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("/some_dir/some.jar", Py.getJarFileNameFromURL(url)); // jboss url to decode file = "/some%20dir/some.jar/org/python/core/PySystemState.class"; url = new URL(protocol, host, port, file, handler); assertEquals("vfszip:/some%20dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("/some dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("/some dir/some.jar", Py.getJarFileNameFromURL(url)); // jboss url with + to escape file = "/some+dir/some.jar/org/python/core/PySystemState.class"; url = new URL(protocol, host, port, file, handler); assertEquals("vfszip:/some+dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("/some+dir/some.jar", PySystemState.getJarFileNameFromURL(url)); + assertEquals("/some+dir/some.jar", Py.getJarFileNameFromURL(url)); } } -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Thu Sep 3 15:13:52 2015 From: jython-checkins at python.org (stefan.richthofer) Date: Thu, 03 Sep 2015 13:13:52 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Fixed_modification_in_pytho?= =?utf-8?q?n-lib_from_last_commit=2E?= Message-ID: <20150903131341.66848.87842@psf.io> https://hg.python.org/jython/rev/01c9a4a0681b changeset: 7720:01c9a4a0681b user: Stefan Richthofer date: Thu Sep 03 15:13:32 2015 +0200 summary: Fixed modification in python-lib from last commit. files: Lib/distutils/command/build.py | 151 ++++++++++ lib-python/2.7/distutils/command/build.py | 6 +- 2 files changed, 152 insertions(+), 5 deletions(-) diff --git a/Lib/distutils/command/build.py b/Lib/distutils/command/build.py new file mode 100644 --- /dev/null +++ b/Lib/distutils/command/build.py @@ -0,0 +1,151 @@ +"""distutils.command.build + +Implements the Distutils 'build' command.""" + +__revision__ = "$Id$" + +import sys, os + +from distutils.util import get_platform +from distutils.core import Command +from distutils.errors import DistutilsOptionError + +def show_compilers(): + from distutils.ccompiler import show_compilers + show_compilers() + +class build(Command): + + description = "build everything needed to install" + + user_options = [ + ('build-base=', 'b', + "base directory for build library"), + ('build-purelib=', None, + "build directory for platform-neutral distributions"), + ('build-platlib=', None, + "build directory for platform-specific distributions"), + ('build-lib=', None, + "build directory for all distribution (defaults to either " + + "build-purelib or build-platlib"), + ('build-scripts=', None, + "build directory for scripts"), + ('build-temp=', 't', + "temporary build directory"), + ('plat-name=', 'p', + "platform name to build for, if supported " + "(default: %s)" % get_platform()), + ('compiler=', 'c', + "specify the compiler type"), + ('debug', 'g', + "compile extensions and libraries with debugging information"), + ('force', 'f', + "forcibly build everything (ignore file timestamps)"), + ('executable=', 'e', + "specify final destination interpreter path (build.py)"), + ] + + boolean_options = ['debug', 'force'] + + help_options = [ + ('help-compiler', None, + "list available compilers", show_compilers), + ] + + def initialize_options(self): + self.build_base = 'build' + # these are decided only after 'build_base' has its final value + # (unless overridden by the user or client) + self.build_purelib = None + self.build_platlib = None + self.build_lib = None + self.build_temp = None + self.build_scripts = None + self.compiler = None + self.plat_name = None + self.debug = None + self.force = 0 + self.executable = None + + def finalize_options(self): + if self.plat_name is None: + self.plat_name = get_platform() + else: + # plat-name only supported for windows (other platforms are + # supported via ./configure flags, if at all). Avoid misleading + # other platforms. + if os.name != 'nt': + raise DistutilsOptionError( + "--plat-name only supported on Windows (try " + "using './configure --help' on your platform)") + + plat_specifier = ".%s-%s" % (self.plat_name, sys.version[0:3]) + + # Make it so Python 2.x and Python 2.x with --with-pydebug don't + # share the same build directories. Doing so confuses the build + # process for C modules + if hasattr(sys, 'gettotalrefcount'): + plat_specifier += '-pydebug' + + # 'build_purelib' and 'build_platlib' just default to 'lib' and + # 'lib.' under the base build directory. We only use one of + # them for a given distribution, though -- + if self.build_purelib is None: + self.build_purelib = os.path.join(self.build_base, 'lib') + if self.build_platlib is None: + self.build_platlib = os.path.join(self.build_base, + 'lib' + plat_specifier) + + # 'build_lib' is the actual directory that we will use for this + # particular module distribution -- if user didn't supply it, pick + # one of 'build_purelib' or 'build_platlib'. + if self.build_lib is None: + if self.distribution.ext_modules: + self.build_lib = self.build_platlib + else: + self.build_lib = self.build_purelib + + # 'build_temp' -- temporary directory for compiler turds, + # "build/temp." + if self.build_temp is None: + self.build_temp = os.path.join(self.build_base, + 'temp' + plat_specifier) + if self.build_scripts is None: + self.build_scripts = os.path.join(self.build_base, + 'scripts-' + sys.version[0:3]) + + if self.executable is None: + if not sys.executable is None: + self.executable = os.path.normpath(sys.executable) + else: + from org.python.core import Py + self.executable = Py.getDefaultExecutableName() + + def run(self): + # Run all relevant sub-commands. This will be some subset of: + # - build_py - pure Python modules + # - build_clib - standalone C libraries + # - build_ext - Python extensions + # - build_scripts - (Python) scripts + for cmd_name in self.get_sub_commands(): + self.run_command(cmd_name) + + # -- Predicates for the sub-command list --------------------------- + + def has_pure_modules (self): + return self.distribution.has_pure_modules() + + def has_c_libraries (self): + return self.distribution.has_c_libraries() + + def has_ext_modules (self): + return self.distribution.has_ext_modules() + + def has_scripts (self): + return self.distribution.has_scripts() + + sub_commands = [('build_py', has_pure_modules), + ('build_clib', has_c_libraries), + ('build_ext', has_ext_modules), + ('build_scripts', has_scripts), + ] diff --git a/lib-python/2.7/distutils/command/build.py b/lib-python/2.7/distutils/command/build.py --- a/lib-python/2.7/distutils/command/build.py +++ b/lib-python/2.7/distutils/command/build.py @@ -115,11 +115,7 @@ 'scripts-' + sys.version[0:3]) if self.executable is None: - if not sys.executable is None: - self.executable = os.path.normpath(sys.executable) - else: - from org.python.core import Py - self.executable = Py.getDefaultExecutableName() + self.executable = os.path.normpath(sys.executable) def run(self): # Run all relevant sub-commands. This will be some subset of: -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Fri Sep 4 06:41:05 2015 From: jython-checkins at python.org (jim.baker) Date: Fri, 04 Sep 2015 04:41:05 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Various_minor_socket_improv?= =?utf-8?q?ements_and_bug_fixes?= Message-ID: <20150904044102.114789.70702@psf.io> https://hg.python.org/jython/rev/b3b82ef080a9 changeset: 7721:b3b82ef080a9 user: Jim Baker date: Thu Sep 03 22:40:26 2015 -0600 summary: Various minor socket improvements and bug fixes socket.socket is now a class and can be subclassed (fixes #2366), protocol defaults to 0 (fixes #2374), exports SocketType (fixes #2383); SSLSocket.sendto no longer directly raises errno.EPROTO, which is missing on Windows, but instead conforms to CPython semantics of ignoring destination address (fixes #2315). Improved stability of test_socket. Upgraded Netty jars to 4.0.31. files: Lib/_socket.py | 11 +- Lib/socket.py | 2 +- Lib/ssl.py | 4 +- Lib/test/test_socket.py | 66 ++++++++++- build.xml | 20 +- extlibs/netty-buffer-4.0.25.Final.jar | Bin extlibs/netty-buffer-4.0.31.Final.jar | Bin extlibs/netty-codec-4.0.25.Final.jar | Bin extlibs/netty-codec-4.0.31.Final.jar | Bin extlibs/netty-common-4.0.25.Final.jar | Bin extlibs/netty-common-4.0.31.Final.jar | Bin extlibs/netty-handler-4.0.25.Final.jar | Bin extlibs/netty-handler-4.0.31.Final.jar | Bin extlibs/netty-transport-4.0.25.Final.jar | Bin extlibs/netty-transport-4.0.31.Final.jar | Bin 15 files changed, 79 insertions(+), 24 deletions(-) diff --git a/Lib/_socket.py b/Lib/_socket.py --- a/Lib/_socket.py +++ b/Lib/_socket.py @@ -702,11 +702,11 @@ class _realsocket(object): - def __init__(self, family=None, type=None, proto=None): + def __init__(self, family=None, type=None, proto=0): # FIXME verify args are correct self.family = family self.type = type - if proto is None: + if not proto: if type == SOCK_STREAM: proto = IPPROTO_TCP elif type == SOCK_DGRAM: @@ -1309,8 +1309,7 @@ __doc__ = _realsocket.__doc__ - - def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, _sock=None): + def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=None, _sock=None): if _sock is None: _sock = _realsocket(family, type, proto) self._sock = _sock @@ -1453,10 +1452,6 @@ # EXPORTED constructors -def socket(family=None, type=None, proto=None): - return _socketobject(family, type, proto) - - def select(rlist, wlist, xlist, timeout=None): for lst in (rlist, wlist, xlist): if not isinstance(lst, Iterable): diff --git a/Lib/socket.py b/Lib/socket.py --- a/Lib/socket.py +++ b/Lib/socket.py @@ -1,7 +1,7 @@ # dispatches to _socket for actual implementation from _socket import ( - socket, error, herror, gaierror, timeout, has_ipv6, + socket, SocketType, error, herror, gaierror, timeout, has_ipv6, create_connection, diff --git a/Lib/ssl.py b/Lib/ssl.py --- a/Lib/ssl.py +++ b/Lib/ssl.py @@ -189,7 +189,9 @@ return self.sock.recv_into(buffer, nbytes, flags) def sendto(self, string, arg1, arg2=None): - raise socket_error(errno.EPROTO) + # as observed on CPython, sendto when wrapped ignores the + # destination address, thereby behaving just like send + return self.sock.send(string) def close(self): self.sock.close() diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -94,7 +94,7 @@ Note, the server setup function cannot call any blocking functions that rely on the client thread during setup, - unless serverExplicityReady() is called just before + unless serverExplicitReady() is called just before the blocking call (such as in setting up a client/server connection and performing the accept() in setUp(). """ @@ -656,6 +656,9 @@ sock.close() self.assertRaises(socket.error, sock.send, "spam") + def testSocketTypeAvailable(self): + self.assertIs(socket.socket, socket.SocketType) + class IPAddressTests(unittest.TestCase): def testValidIpV4Addresses(self): @@ -1372,7 +1375,6 @@ rfds, wfds, xfds = select.select([self.cli], [self.cli], [], 0.1) if rfds or wfds or xfds: break - self.failUnless(self.cli in wfds) try: self.cli.send(MSG) except socket.error: @@ -2549,13 +2551,67 @@ try: self.s.getpeername() except socket.error, se: - # FIXME Apparently Netty's doesn't set remoteAddress, even if connected, for datagram channels - # so we may have to shadow + # FIXME Apparently Netty doesn't set remoteAddress, + # even if connected, for datagram channels so we may + # have to shadow self.fail("getpeername() on connected UDP socket should not have raised socket.error") self.failUnlessEqual(self.s.getpeername(), self._udp_peer.getsockname()) finally: self._udp_peer.close() +class ConfigurableClientSocketTest(SocketTCPTest, ThreadableTest): + + # Too bad we are not using cooperative multiple inheritance - + # **super is super**, after all! So this means we currently have + # a bit of code duplication with respect to other unit tests. May + # want to refactor these unit tests accordingly at some point. + + def config_client(self): + raise NotImplementedError("subclassing unit tests must define") + + def __init__(self, methodName='runTest'): + SocketTCPTest.__init__(self, methodName=methodName) + ThreadableTest.__init__(self) + + def setUp(self): + SocketTCPTest.setUp(self) + # Indicate explicitly we're ready for the client thread to + # proceed and then perform the blocking call to accept + self.serverExplicitReady() + self.cli_conn, _ = self.serv.accept() + + def clientSetUp(self): + self.cli = self.config_client() + self.cli.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self.cli.connect((self.HOST, self.PORT)) + self.serv_conn = self.cli + + def clientTearDown(self): + self.cli.close() + self.cli = None + ThreadableTest.clientTearDown(self) + + def testRecv(self): + # Testing large receive over TCP + msg = self.cli_conn.recv(1024) + self.assertEqual(msg, MSG) + + def _testRecv(self): + self.serv_conn.send(MSG) + +class ProtocolCanBeZeroTest(ConfigurableClientSocketTest): + + def config_client(self): + return socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) + +class SocketClassCanBeSubclassed(ConfigurableClientSocketTest): + + def config_client(self): + class MySocket(socket.socket): + pass + return MySocket() + + def test_main(): tests = [ GeneralModuleTests, @@ -2590,6 +2646,8 @@ TestGetSockAndPeerNameTCPClient, TestGetSockAndPeerNameTCPServer, TestGetSockAndPeerNameUDP, + ProtocolCanBeZeroTest, + SocketClassCanBeSubclassed ] if hasattr(socket, "socketpair"): diff --git a/build.xml b/build.xml --- a/build.xml +++ b/build.xml @@ -178,11 +178,11 @@ - - - - - + + + + + @@ -589,15 +589,15 @@ - + - + - + - + - + diff --git a/extlibs/netty-buffer-4.0.25.Final.jar b/extlibs/netty-buffer-4.0.25.Final.jar deleted file mode 100644 index de8fa8e44ed46356174579086a545f043d0d225c..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 GIT binary patch [stripped] diff --git a/extlibs/netty-buffer-4.0.31.Final.jar b/extlibs/netty-buffer-4.0.31.Final.jar new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..759696378f5a57652b4db83462043bc5bca8b542 GIT binary patch [stripped] diff --git a/extlibs/netty-codec-4.0.25.Final.jar b/extlibs/netty-codec-4.0.25.Final.jar deleted file mode 100644 index f1a618c8f1c02025ad202d53a3d9894f703abf5d..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 GIT binary patch [stripped] diff --git a/extlibs/netty-codec-4.0.31.Final.jar b/extlibs/netty-codec-4.0.31.Final.jar new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a943e6bf7361835342c8637c8db3588e87d70e19 GIT binary patch [stripped] diff --git a/extlibs/netty-common-4.0.25.Final.jar b/extlibs/netty-common-4.0.25.Final.jar deleted file mode 100644 index f23daacc18d360fa782bf5bb355034da77408cf5..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 GIT binary patch [stripped] diff --git a/extlibs/netty-common-4.0.31.Final.jar b/extlibs/netty-common-4.0.31.Final.jar new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ed507f2d323abfbd593442e32d9ecbd2ace149b4 GIT binary patch [stripped] diff --git a/extlibs/netty-handler-4.0.25.Final.jar b/extlibs/netty-handler-4.0.25.Final.jar deleted file mode 100644 index b1c61f2ce65544cc6b9643c9cbe931882f0450b2..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 GIT binary patch [stripped] diff --git a/extlibs/netty-handler-4.0.31.Final.jar b/extlibs/netty-handler-4.0.31.Final.jar new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..1a002fb26d8b46e7e132eabf952ba127d9ac2bb3 GIT binary patch [stripped] diff --git a/extlibs/netty-transport-4.0.25.Final.jar b/extlibs/netty-transport-4.0.25.Final.jar deleted file mode 100644 index 2edc97363251cb88ed2eae2d44bea1a4e43602fb..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 GIT binary patch [stripped] diff --git a/extlibs/netty-transport-4.0.31.Final.jar b/extlibs/netty-transport-4.0.31.Final.jar new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..87881ca0beac4e5c5aecb099acffd4d673922d55 GIT binary patch [stripped] -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Fri Sep 4 18:32:54 2015 From: jython-checkins at python.org (stefan.richthofer) Date: Fri, 04 Sep 2015 16:32:54 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Improved_usability_of_Jytho?= =?utf-8?q?n_from_Java-side_by_adding_some_utility-methods_to?= Message-ID: <20150904163236.115151.32544@psf.io> https://hg.python.org/jython/rev/9e204456051d changeset: 7722:9e204456051d user: Stefan Richthofer date: Fri Sep 04 18:32:25 2015 +0200 summary: Improved usability of Jython from Java-side by adding some utility-methods to org.python.core.Py. These simplify the use of Python-constructors from Java-side. files: src/org/python/core/Py.java | 202 ++++++++++++++++++++++++ 1 files changed, 202 insertions(+), 0 deletions(-) diff --git a/src/org/python/core/Py.java b/src/org/python/core/Py.java --- a/src/org/python/core/Py.java +++ b/src/org/python/core/Py.java @@ -22,7 +22,9 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; import com.google.common.base.CharMatcher; @@ -2452,6 +2454,206 @@ } return jarFileName; } + +//------------------------contructor-section--------------------------- + static class py2JyClassCacheItem { + List> interfaces; + List pyClasses; + + public py2JyClassCacheItem(Class initClass, PyObject initPyClass) { + if (!initClass.isInterface()) throw + new IllegalArgumentException("cls must be an interface."); + interfaces = new ArrayList<>(1); + pyClasses = new ArrayList<>(1); + interfaces.add(initClass); + pyClasses.add(initPyClass); + } + + public PyObject get(Class cls) { + for (int i = 0; i < interfaces.size(); ++i) { + if (cls.isAssignableFrom(interfaces.get(i))) + return pyClasses.get(i); + } + return null; + } + + public void add(Class cls, PyObject pyCls) { + if (!cls.isInterface()) throw + new IllegalArgumentException("cls must be an interface."); + interfaces.add(0, cls); + for (int i = interfaces.size()-1; i > 0; --i) { + if (interfaces.get(i).isAssignableFrom(cls)) + interfaces.remove(i); + } + pyClasses.add(pyCls); + } + } + + protected static Map py2JyClassCache = new HashMap<>(); + + protected static PyObject ensureInterface(PyObject cls, Class interfce) { + PyObject pjc = PyType.fromClass(interfce); + if (Py.isSubClass(cls, pjc)) { + return cls; + } + PyObject[] bases = {cls, pjc}; + return Py.makeClass(interfce.getName(), bases, new PyStringMap()); + } + + /** + * Returns a Python-class that extends {@code cls} and {@code interfce}. + * If {@code cls} already extends {@code interfce}, simply {@code cls} + * is returned. Otherwise a new class is created (if not yet cached). + * It caches such classes and only creates a new one if no appropriate + * class was cached yet. + * + * @return a Python-class that extends {@code cls} and {@code interfce} + */ + public static PyObject javaPyClass(PyObject cls, Class interfce) { + py2JyClassCacheItem cacheItem = py2JyClassCache.get(cls); + PyObject result; + if (cacheItem == null) { + result = ensureInterface(cls, interfce); + cacheItem = new py2JyClassCacheItem(interfce, result); + py2JyClassCache.put(cls, cacheItem); + } else { + result = cacheItem.get(interfce); + if (result == null) { + result = ensureInterface(cls, interfce); + cacheItem.add(interfce, result); + } + } + return result; + } + + /** + * This method is a compact helper to access Python-constructors from Java. + * It creates an instance of {@code cls} and retruns it in form of + * {@code jcls}, which must be an interface. This method even works if + * {@code cls} does not extend {@code jcls} in Python-code. In that case, + * it uses {@link #javaPyClass(PyObject, Class)} to create an appropriate + * class on the fly.
+ * It automatically converts {@code args} to {@link org.python.core.PyObject}s.
+ * For keyword-support use + * {@link #newJavaObject(PyObject, Class, String[], Object...)}. + * + * {@see #newJavaObject(PyObject, Class, PyObject[], String[])} + * {@see #newJavaObject(PyObject, Class, String[], Object...)} + * {@see #newJavaObject(PyModule, Class, Object...)} + * {@see #newJavaObject(PyModule, Class, String[], Object...)} + * + * @param cls - the class to be instanciated + * @param jcls - the Java-type to be returned + * @param args are automatically converted to Jython-PyObjects + * @return an instance of cls in form of the interface jcls + */ + @SuppressWarnings("unchecked") + public static T newJavaObject(PyObject cls, Class jcls, Object... args) { + PyObject cls2 = javaPyClass(cls, jcls); + PyObject resultPy = cls2.__call__(Py.javas2pys(args)); + return (T) resultPy.__tojava__(jcls); + } + + /** + * This method is a compact helper to access Python-constructors from Java. + * It creates an instance of {@code cls} and retruns it in form of + * {@code jcls}, which must be an interface. This method even works if + * {@code cls} does not extend {@code jcls} in Python-code. In that case, + * it uses {@link #javaPyClass(PyObject, Class)} to create an appropriate + * class on the fly.
+ * {@code keywordss} are applied to the last {@code args} in the list. + * + * {@see #newJavaObject(PyObject, Class, Object...)} + * {@see #newJavaObject(PyObject, Class, String[], Object...)} + * {@see #newJavaObject(PyModule, Class, Object...)} + * {@see #newJavaObject(PyModule, Class, String[], Object...)} + * + * @param cls - the class to be instanciated + * @param jcls - the Java-type to be returned + * @param keywords are applied to the last args + * @param args for the Python-class constructor + * @return an instance of cls in form of the interface jcls + */ + @SuppressWarnings("unchecked") + public static T newJavaObject(PyObject cls, Class jcls, PyObject[] args, String[] keywords) { + PyObject cls2 = javaPyClass(cls, jcls); + PyObject resultPy = cls2.__call__(args, keywords); + return (T) resultPy.__tojava__(jcls); + } + + /** + * This method is a compact helper to access Python-constructors from Java. + * It creates an instance of {@code cls} and retruns it in form of + * {@code jcls}, which must be an interface. This method even works if + * {@code cls} does not extend {@code jcls} in Python-code. In that case, + * it uses {@link #javaPyClass(PyObject, Class)} to create an appropriate + * class on the fly.
+ * It automatically converts {@code args} to {@link org.python.core.PyObject}s.
+ * {@code keywordss} are applied to the last {@code args} in the list. + * + * {@see #newJavaObject(PyObject, Class, PyObject[], String[])} + * {@see #newJavaObject(PyObject, Class, Object...)} + * {@see #newJavaObject(PyModule, Class, Object...)} + * {@see #newJavaObject(PyModule, Class, String[], Object...)} + * + * @param cls - the class to be instanciated + * @param jcls - the Java-type to be returned + * @param keywords are applied to the last args + * @param args are automatically converted to Jython-PyObjects + * @return an instance of cls in form of the interface jcls + */ + @SuppressWarnings("unchecked") + public static T newJavaObject(PyObject cls, Class jcls, String[] keywords, Object... args) { + PyObject cls2 = javaPyClass(cls, jcls); + PyObject resultPy = cls2.__call__(Py.javas2pys(args), keywords); + return (T) resultPy.__tojava__(jcls); + } + + /** + * Works like {@link #newJavaObject(PyObject, Class, Object...)}, but looks + * up the Python-class in the module-dict using the interface-name, i.e. + * {@code jcls.getSimpleName()}.
+ * For keywords-support use {@link #newJavaObject(PyModule, Class, String[], Object...)}. + * + * {@see #newJavaObject(PyModule, Class, String[], Object...)} + * {@see #newJavaObject(PyObject, Class, PyObject[], String[])} + * {@see #newJavaObject(PyObject, Class, Object...)} + * {@see #newJavaObject(PyObject, Class, String[], Object...)} + * + * @param module the module containing the desired class + * @param jcls Java-type of the desired clas, must have the same name + * @param args constructor-arguments + * @return a new instance of the desired class + */ + @SuppressWarnings("unchecked") + public static T newJavaObject(PyModule module, Class jcls, Object... args) { + PyObject cls = module.__getattr__(jcls.getSimpleName().intern()); + return newJavaObject(cls, jcls, args); + } + + /** + * Works like {@link #newJavaObject(PyObject, Class, String[], Object...)}, but looks + * up the Python-class in the module-dict using the interface-name, i.e. + * {@code jcls.getSimpleName()}.
+ * {@code keywordss} are applied to the last {@code args} in the list. + * + * {@see #newJavaObject(PyModule, Class, Object...)} + * {@see #newJavaObject(PyObject, Class, PyObject[], String[])} + * {@see #newJavaObject(PyObject, Class, Object...)} + * {@see #newJavaObject(PyObject, Class, String[], Object...)} + * + * @param module the module containing the desired class + * @param jcls Java-type of the desired class, must have the same name + * @param keywords are applied to the last {@code args} in the list + * @param args constructor-arguments + * @return a new instance of the desired class + */ + @SuppressWarnings("unchecked") + public static T newJavaObject(PyModule module, Class jcls, String[] keywords, Object... args) { + PyObject cls = module.__getattr__(jcls.getSimpleName().intern()); + return newJavaObject(cls, jcls, keywords, args); + } +//----------------end of constructor-section------------------ } class FixedFileWrapper extends StdoutWrapper { -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sat Sep 5 02:45:03 2015 From: jython-checkins at python.org (stefan.richthofer) Date: Sat, 05 Sep 2015 00:45:03 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_fixed_that_Py=2EgetJarFileN?= =?utf-8?q?ameFromURL_prepends_a_slash_before_drive-letter_on?= Message-ID: <20150905004503.66848.55770@psf.io> https://hg.python.org/jython/rev/850f5980a2e3 changeset: 7723:850f5980a2e3 user: Stefan Richthofer date: Sat Sep 05 02:44:50 2015 +0200 summary: fixed that Py.getJarFileNameFromURL prepends a slash before drive-letter on windows. files: src/org/python/core/Py.java | 26 +++++++++--------------- 1 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/org/python/core/Py.java b/src/org/python/core/Py.java --- a/src/org/python/core/Py.java +++ b/src/org/python/core/Py.java @@ -2348,11 +2348,7 @@ if (File.separatorChar != '/') { jar = jar.replace('/', File.separatorChar); } - int start = 0; - if (Platform.IS_WINDOWS && jar.startsWith(File.separator)) { - ++start; - } - return jar.substring(start, jar.lastIndexOf(File.separatorChar)+1)+"bin"; + return jar.substring(0, jar.lastIndexOf(File.separatorChar)+1)+"bin"; } /** @@ -2368,11 +2364,7 @@ if (File.separatorChar != '/') { jar = jar.replace('/', File.separatorChar); } - int start = 0; - if (Platform.IS_WINDOWS && jar.startsWith(File.separator)) { - ++start; - } - return jar.substring(start); + return jar; } /** @@ -2380,13 +2372,12 @@ * jython-jar-file. Usually this is jython.jar, but can also be jython-dev.jar or * jython-standalone.jar or something custom. * - * Note that it does not use system-specific seperator-chars, but always - * '/'. Also, on windows it might prepend a '/' before the drive-letter. (Is this a bug?) + * Note that it does not use system-specific seperator-chars, but always '/'. * * @return the full name of the jar file containing this class, null * if not available. */ - protected static String _getJarFileName() { + public static String _getJarFileName() { Class thisClass = Py.class; String fullClassName = thisClass.getName(); String className = fullClassName.substring(fullClassName.lastIndexOf(".") + 1); @@ -2405,8 +2396,7 @@ /** * Converts a url that points to a jar-file to the actual jar-file name. - * Note that it does not use system-specific seperator-chars, but always - * '/'. Also, on windows it might prepend a '/' before the drive-letter. + * Note that it does not use system-specific seperator-chars, but always '/'. */ public static String getJarFileNameFromURL(URL url) { String jarFileName = null; @@ -2422,7 +2412,11 @@ int jarSeparatorIndex = urlString.lastIndexOf(JAR_SEPARATOR); if (urlString.startsWith(JAR_URL_PREFIX) && jarSeparatorIndex > 0) { // jar:file:/install_dir/jython.jar!/org/python/core/PySystemState.class - jarFileName = urlString.substring(JAR_URL_PREFIX.length(), jarSeparatorIndex); + int start = JAR_URL_PREFIX.length(); + if (Platform.IS_WINDOWS) { + start++; + } + jarFileName = urlString.substring(start, jarSeparatorIndex); } else if (urlString.startsWith(VFSZIP_PREFIX)) { // vfszip:/some/path/jython.jar/org/python/core/PySystemState.class final String path = Py.class.getName().replace('.', '/'); -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sat Sep 5 02:56:22 2015 From: jython-checkins at python.org (stefan.richthofer) Date: Sat, 05 Sep 2015 00:56:22 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_fixed_some_issue_in_Py-cons?= =?utf-8?q?tructor_support-code_from_two_commits_before=2E?= Message-ID: <20150905005622.14865.42047@psf.io> https://hg.python.org/jython/rev/45a9d8f613b9 changeset: 7724:45a9d8f613b9 user: Stefan Richthofer date: Sat Sep 05 02:56:13 2015 +0200 summary: fixed some issue in Py-constructor support-code from two commits before. (occurred to me during some review) files: src/org/python/core/Py.java | 6 ++++-- 1 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/org/python/core/Py.java b/src/org/python/core/Py.java --- a/src/org/python/core/Py.java +++ b/src/org/python/core/Py.java @@ -2475,11 +2475,13 @@ if (!cls.isInterface()) throw new IllegalArgumentException("cls must be an interface."); interfaces.add(0, cls); + pyClasses.add(0, pyCls); for (int i = interfaces.size()-1; i > 0; --i) { - if (interfaces.get(i).isAssignableFrom(cls)) + if (interfaces.get(i).isAssignableFrom(cls)) { interfaces.remove(i); + pyClasses.remove(i); + } } - pyClasses.add(pyCls); } } -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Fri Sep 11 00:58:40 2015 From: jython-checkins at python.org (jeff.allen) Date: Thu, 10 Sep 2015 22:58:40 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_bytearray_character_operati?= =?utf-8?q?ons_based_on_ctypes_table_for_speed=2E?= Message-ID: <20150910225840.11268.50253@psf.io> https://hg.python.org/jython/rev/8cfb9a81c632 changeset: 7727:8cfb9a81c632 user: Jeff Allen date: Wed Sep 09 21:09:25 2015 +0100 summary: bytearray character operations based on ctypes table for speed. Continues remedy for #2364, but following benchmark timing of alternative loops. files: src/org/python/core/BaseBytes.java | 226 +++++++++++----- 1 files changed, 156 insertions(+), 70 deletions(-) diff --git a/src/org/python/core/BaseBytes.java b/src/org/python/core/BaseBytes.java --- a/src/org/python/core/BaseBytes.java +++ b/src/org/python/core/BaseBytes.java @@ -3089,39 +3089,63 @@ // Character class operations // + + // Bit to twiddle (XOR) for lowercase letter to uppercase and vice-versa. + private static final int SWAP_CASE = 0x20; + + // Bit masks and sets to use with the byte classification table + private static final byte UPPER = 0b1; + private static final byte LOWER = 0b10; + private static final byte DIGIT = 0b100; + private static final byte SPACE = 0b1000; + private static final byte ALPHA = UPPER | LOWER; + private static final byte ALNUM = ALPHA | DIGIT; + + // Character (byte) classification table. + private static final byte[] ctype = new byte[256]; + static { + for (int c = 'A'; c <= 'Z'; c++) { + ctype[0x80 + c] = UPPER; + ctype[0x80 + SWAP_CASE + c] = LOWER; + } + for (int c = '0'; c <= '9'; c++) { + ctype[0x80 + c] = DIGIT; + } + for (char c : " \t\n\u000b\f\r".toCharArray()) { + ctype[0x80 + c] = SPACE; + } + } + /** @return 'A'<= b <='Z'. */ - static final boolean isupper(int b) { - return ((b - 'A') & 0xff) < 26; + static final boolean isupper(byte b) { + return (ctype[0x80 + b] & UPPER) != 0; } /** @return 'a'<= b <='z'. */ - static final boolean islower(int b) { - return ((b - 'a') & 0xff) < 26; + static final boolean islower(byte b) { + return (ctype[0x80 + b] & LOWER) != 0; } /** @return 'A'<= b <='Z' or 'a'<= b <='z'. */ - static final boolean isalpha(int b) { - return (((b | 0x20) - 'a') & 0xff) < 26; // b|0x20 maps A to a, Z to z, etc. + static final boolean isalpha(byte b) { + return (ctype[0x80 + b] & ALPHA) != 0; } /** @return '0'<= b <='9'. */ - static final boolean isdigit(int b) { - return ((b - '0') & 0xff) < 10; + static final boolean isdigit(byte b) { + return (ctype[0x80 + b] & DIGIT) != 0; } /** @return 'A'<= b <='Z' or 'a'<= b <='z' or '0'<= b <='9'. */ - static final boolean isalnum(int b) { - return isalpha(b) || isdigit(b); + static final boolean isalnum(byte b) { + return (ctype[0x80 + b] & ALNUM) != 0; } /** @return b in ' \t\n\v\f\r' */ - static final boolean isspace(int b) { - return b == ' ' || ((b - '\t') & 0xff) < 5; + static final boolean isspace(byte b) { + return (ctype[0x80 + b] & SPACE) != 0; } - /** Bit to twiddle (XOR) for lowercase letter to uppercase and vice-versa */ - private final int SWAP_CASE = 0x20; - /** * Java API equivalent of Python isalnum(). This method treats the bytes as * US-ASCII code points. @@ -3140,11 +3164,19 @@ * least one byte, false otherwise. */ final boolean basebytes_isalnum() { - int i; - // Work backwards through the bytes, stopping early if the test is false. - for (i = size - 1; i >= 0 && isalnum(storage[offset + i]); --i) {} - // Result is true if we reached the beginning (and there were some bytes) - return i < 0 && size > 0; + if (size == 1) { + // Special case strings of length one (i.e. characters) + return isalnum(storage[offset]); + } else { + // Work through the bytes, stopping early if the test is false. + for (int i = 0; i < size; i++) { + if (!isalnum(storage[offset + i])) { + return false; + } + } + // Result is true if we reached the end (and there were some bytes) + return size > 0; + } } /** @@ -3165,11 +3197,19 @@ * otherwise */ final boolean basebytes_isalpha() { - int i; - // Work backwards through the bytes, stopping early if the test is false. - for (i = size - 1; i >= 0 && isalpha(storage[offset + i]); --i) {} - // Result is true if we reached the beginning (and there were some bytes) - return i < 0 && size > 0; + if (size == 1) { + // Special case strings of length one (i.e. characters) + return isalpha(storage[offset]); + } else { + // Work through the bytes, stopping early if the test is false. + for (int i = 0; i < size; i++) { + if (!isalpha(storage[offset + i])) { + return false; + } + } + // Result is true if we reached the end (and there were some bytes) + return size > 0; + } } /** @@ -3190,11 +3230,19 @@ * byte, false otherwise. */ final boolean basebytes_isdigit() { - int i; - // Work backwards through the bytes, stopping early if the test is false. - for (i = size - 1; i >= 0 && isdigit(storage[offset + i]); --i) {} - // Result is true if we reached the beginning (and there were some bytes) - return i < 0 && size > 0; + if (size == 1) { + // Special case strings of length one (i.e. characters) + return isdigit(storage[offset]); + } else { + // Work through the bytes, stopping early if the test is false. + for (int i = 0; i < size; i++) { + if (!isdigit(storage[offset + i])) { + return false; + } + } + // Result is true if we reached the end (and there were some bytes) + return size > 0; + } } /** @@ -3215,20 +3263,35 @@ * there is at least one cased byte, false otherwise. */ final boolean basebytes_islower() { - boolean hasCased = false; - // Test the bytes - for (int i = 0; i < size; i++) { - int c = byteAt(i); - if (isupper(c)) { + if (size == 1) { + // Special case strings of length one (i.e. characters) + return islower(storage[offset]); + + } else { + int i; + byte c = 0; + // Test the bytes until a cased byte is encountered + for (i = 0; i < size; i++) { + if (isalpha(c = storage[offset + i])) { + break; + } + } + + if (i == size || isupper(c)) { + // We reached the end without finding a cased byte, or it was upper case. return false; - } else if (hasCased) { - continue; // Don't need to keep checking for cased characters - } else if (islower(c)) { - hasCased = true; } + + // Continue to end or until an upper case byte is encountered + for (i = i + 1; i < size; i++) { + if (isupper(storage[offset + i])) { + return false; + } + } + + // Found no upper case bytes, and at least one lower case byte. + return true; } - // Found no upper case bytes, but did we find any cased bytes at all? - return hasCased; } /** @@ -3249,11 +3312,19 @@ * there is at least one byte, false otherwise. */ final boolean basebytes_isspace() { - int i; - // Work backwards through the bytes, stopping early if the test is false. - for (i = size - 1; i >= 0 && isspace(storage[offset + i]); --i) {} - // Result is true if we reached the beginning (and there were some bytes) - return i < 0 && size > 0; + if (size == 1) { + // Special case strings of length one (i.e. characters) + return isspace(storage[offset]); + } else { + // Work through the bytes, stopping early if the test is false. + for (int i = 0; i < size; i++) { + if (!isspace(storage[offset + i])) { + return false; + } + } + // Result is true if we reached the end (and there were some bytes) + return size > 0; + } } /** @@ -3283,7 +3354,7 @@ // 2 = in a word (hence have have seen cased character) for (int i = 0; i < size; i++) { - int c = byteAt(i); + byte c = storage[offset+i]; if (isupper(c)) { if (state == 2) { // Violation: can't continue a word in upper case @@ -3326,20 +3397,35 @@ * there is at least one cased byte, false otherwise. */ final boolean basebytes_isupper() { - boolean hasCased = false; - // Test the bytes - for (int i = 0; i < size; i++) { - int c = byteAt(i); - if (islower(c)) { + if (size == 1) { + // Special case strings of length one (i.e. characters) + return isupper(storage[offset]); + + } else { + int i; + byte c = 0; + // Test the bytes until a cased byte is encountered + for (i = 0; i < size; i++) { + if (isalpha(c = storage[offset + i])) { + break; + } + } + + if (i == size || islower(c)) { + // We reached the end without finding a cased byte, or it was lower case. return false; - } else if (hasCased) { - continue; // Don't need to keep checking for cased characters - } else if (isupper(c)) { - hasCased = true; } + + // Continue to end or until a lower case byte is encountered + for (i = i + 1; i < size; i++) { + if (islower(storage[offset + i])) { + return false; + } + } + + // Found no lower case bytes, and at least one upper case byte. + return true; } - // Found no lower case bytes, but did we find any cased bytes at all? - return hasCased; } // @@ -3370,21 +3456,21 @@ if (size > 0) { // Treat first character - int c = byteAt(0); + byte c = storage[offset]; if (islower(c)) { c ^= SWAP_CASE; // 'a' -> 'A', etc. } // Put the adjusted character in the output as a byte - builder.append((byte)c); + builder.append(c); // Treat the rest for (int i = 1; i < size; i++) { - c = byteAt(i); + c = storage[offset+i]; if (isupper(c)) { c ^= SWAP_CASE; // 'A' -> 'a', etc. } // Put the adjusted character in the output as a byte - builder.append((byte)c); + builder.append(c); } } @@ -3413,12 +3499,12 @@ Builder builder = getBuilder(size); for (int i = 0; i < size; i++) { - int c = byteAt(i); + byte c = storage[offset+i]; if (isupper(c)) { c ^= SWAP_CASE; // 'A' -> 'a', etc. } // Put the adjusted character in the output as a byte - builder.append((byte)c); + builder.append(c); } return builder.getResult(); @@ -3446,12 +3532,12 @@ Builder builder = getBuilder(size); for (int i = 0; i < size; i++) { - int c = byteAt(i); + byte c = storage[offset+i]; if (isalpha(c)) { c ^= SWAP_CASE; // 'a' -> 'A', 'A' -> 'a', etc. } // Put the adjusted character in the output as a byte - builder.append((byte)c); + builder.append(c); } return builder.getResult(); @@ -3484,7 +3570,7 @@ boolean inWord = false; // We begin, not in a word (sequence of cased characters) for (int i = 0; i < size; i++) { - int c = byteAt(i); + byte c = storage[offset+i]; if (!inWord) { // When we are not in a word ... @@ -3504,7 +3590,7 @@ } } // Put the adjusted character in the output as a byte - builder.append((byte)c); + builder.append(c); } return builder.getResult(); } @@ -3532,12 +3618,12 @@ Builder builder = getBuilder(size); for (int i = 0; i < size; i++) { - int c = byteAt(i); + byte c = storage[offset+i]; if (islower(c)) { c ^= SWAP_CASE; // 'a' -> 'A' etc. } // Put the adjusted character in the output as a byte - builder.append((byte)c); + builder.append(c); } return builder.getResult(); -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Fri Sep 11 00:58:40 2015 From: jython-checkins at python.org (jeff.allen) Date: Thu, 10 Sep 2015 22:58:40 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Remove_unicode_reliance_on_?= =?utf-8?q?str/bytes_where_character_traits_differ=2E?= Message-ID: <20150910225840.15722.36625@psf.io> https://hg.python.org/jython/rev/a8e0825ab780 changeset: 7728:a8e0825ab780 user: Jeff Allen date: Thu Sep 10 07:54:29 2015 +0100 summary: Remove unicode reliance on str/bytes where character traits differ. Prior to this change, PyUnicode relied on methods from PyString, when the string was basic plane, that classify characters (e.g. isalpha()). This assumed PyString made a Unicode interpretation of the String. When #2364 is fixed in PyString, this assumption will be incorrect. files: src/org/python/core/PyUnicode.java | 140 ++++++++-------- 1 files changed, 71 insertions(+), 69 deletions(-) diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java --- a/src/org/python/core/PyUnicode.java +++ b/src/org/python/core/PyUnicode.java @@ -9,6 +9,7 @@ import java.util.Set; import com.google.common.base.CharMatcher; + import org.python.core.stringlib.FieldNameIterator; import org.python.core.stringlib.MarkupIterator; import org.python.expose.ExposedMethod; @@ -582,7 +583,9 @@ } public static String checkEncoding(String s) { - if (s == null || CharMatcher.ASCII.matchesAllOf(s)) { return s; } + if (s == null || CharMatcher.ASCII.matchesAllOf(s)) { + return s; + } return codecs.PyUnicode_EncodeASCII(s, s.length(), null); } @@ -739,19 +742,21 @@ return Py.makeCharacter(codepoint, true); } + @Override public int getInt(int i) { return getString().codePointAt(translator.utf16Index(i)); } - private class SubsequenceIteratorImpl implements Iterator { + /** + * An iterator returning code points from this array, for use when not basic plane. + */ + private class SubsequenceIteratorImpl extends SubsequenceIteratorBasic { - private int current, k, stop, step; + private int k; // UTF-16 index (of current) SubsequenceIteratorImpl(int start, int stop, int step) { - current = start; + super(start, stop, step); k = translator.utf16Index(current); - this.stop = stop; - this.step = step; } SubsequenceIteratorImpl() { @@ -759,22 +764,7 @@ } @Override - public boolean hasNext() { - return current < stop; - } - - @Override - public Object next() { - int codePoint = nextCodePoint(); - current += 1; - for (int j = 1; j < step && hasNext(); j++) { - nextCodePoint(); - current += 1; - } - return codePoint; - } - - private int nextCodePoint() { + protected int nextCodePoint() { int U; int W1 = getString().charAt(k); if (W1 >= 0xD800 && W1 < 0xDC00) { @@ -785,8 +775,45 @@ U = W1; k += 1; } + current += 1; return U; } + } + + /** + * An iterator returning code points from this array, for use when basic plane. + */ + private class SubsequenceIteratorBasic implements Iterator { + + protected int current, stop, step; // Character indexes + + SubsequenceIteratorBasic(int start, int stop, int step) { + current = start; + this.stop = stop; + this.step = step; + } + + SubsequenceIteratorBasic() { + this(0, getCodePointCount(), 1); + } + + @Override + public boolean hasNext() { + return current < stop; + } + + @Override + public Integer next() { + int codePoint = nextCodePoint(); + for (int j = 1; j < step && hasNext(); j++) { + nextCodePoint(); + } + return codePoint; + } + + protected int nextCodePoint() { + return getString().charAt(current++); + } @Override public void remove() { @@ -845,16 +872,31 @@ } // XXX: Parameterize SubsequenceIteratorImpl and friends (and make them Iterable) + /** Get an iterator over the code point sequence. */ public Iterator newSubsequenceIterator() { - return new SubsequenceIteratorImpl(); + if (isBasicPlane()) { + return new SubsequenceIteratorBasic(); + } else { + return new SubsequenceIteratorImpl(); + } } + /** Get an iterator over a slice of the code point sequence. */ public Iterator newSubsequenceIterator(int start, int stop, int step) { - if (step < 0) { - return new SteppedIterator(step * -1, new ReversedIterator(new SubsequenceIteratorImpl( - stop + 1, start + 1, 1))); + if (isBasicPlane()) { + if (step < 0) { + return new SteppedIterator(step * -1, new ReversedIterator( + new SubsequenceIteratorBasic(stop + 1, start + 1, 1))); + } else { + return new SubsequenceIteratorBasic(start, stop, step); + } } else { - return new SubsequenceIteratorImpl(start, stop, step); + if (step < 0) { + return new SteppedIterator(step * -1, new ReversedIterator( + new SubsequenceIteratorImpl(stop + 1, start + 1, 1))); + } else { + return new SubsequenceIteratorImpl(start, stop, step); + } } } @@ -948,9 +990,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_title_doc) final PyObject unicode_title() { - if (isBasicPlane()) { - return new PyUnicode(str_title()); - } StringBuilder buffer = new StringBuilder(getString().length()); boolean previous_is_cased = false; for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { @@ -973,9 +1012,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_swapcase_doc) final PyObject unicode_swapcase() { - if (isBasicPlane()) { - return new PyUnicode(str_swapcase()); - } StringBuilder buffer = new StringBuilder(getString().length()); for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { int codePoint = iter.next(); @@ -1416,9 +1452,6 @@ @ExposedMethod(defaults = "false", doc = BuiltinDocs.unicode___getslice___doc) final PyList unicode_splitlines(boolean keepends) { - if (isBasicPlane()) { - return str_splitlines(keepends); - } return new PyList(new LineSplitIterator(keepends)); } @@ -1582,9 +1615,6 @@ if (getString().length() == 0) { return this; } - if (isBasicPlane()) { - return new PyUnicode(str_capitalize()); - } StringBuilder buffer = new StringBuilder(getString().length()); boolean first = true; for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { @@ -1671,13 +1701,8 @@ return _codecs.translateCharmap(this, "ignore", table); } - // these tests need to be UTF-16 aware because they are character-by-character tests, - // so we can only use equivalent str_XXX tests if we are in basic plane @ExposedMethod(doc = BuiltinDocs.unicode_islower_doc) final boolean unicode_islower() { - if (isBasicPlane()) { - return str_islower(); - } boolean cased = false; for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { int codepoint = iter.next(); @@ -1692,9 +1717,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_isupper_doc) final boolean unicode_isupper() { - if (isBasicPlane()) { - return str_isupper(); - } boolean cased = false; for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { int codepoint = iter.next(); @@ -1709,9 +1731,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_isalpha_doc) final boolean unicode_isalpha() { - if (isBasicPlane()) { - return str_isalpha(); - } if (getCodePointCount() == 0) { return false; } @@ -1725,15 +1744,13 @@ @ExposedMethod(doc = BuiltinDocs.unicode_isalnum_doc) final boolean unicode_isalnum() { - if (isBasicPlane()) { - return str_isalnum(); - } if (getCodePointCount() == 0) { return false; } for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { int codePoint = iter.next(); - if (!(Character.isLetterOrDigit(codePoint) || Character.getType(codePoint) == Character.LETTER_NUMBER)) { + if (!(Character.isLetterOrDigit(codePoint) || // + Character.getType(codePoint) == Character.LETTER_NUMBER)) { return false; } } @@ -1742,9 +1759,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_isdecimal_doc) final boolean unicode_isdecimal() { - if (isBasicPlane()) { - return str_isdecimal(); - } if (getCodePointCount() == 0) { return false; } @@ -1758,9 +1772,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_isdigit_doc) final boolean unicode_isdigit() { - if (isBasicPlane()) { - return str_isdigit(); - } if (getCodePointCount() == 0) { return false; } @@ -1774,9 +1785,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_isnumeric_doc) final boolean unicode_isnumeric() { - if (isBasicPlane()) { - return str_isnumeric(); - } if (getCodePointCount() == 0) { return false; } @@ -1792,9 +1800,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_istitle_doc) final boolean unicode_istitle() { - if (isBasicPlane()) { - return str_istitle(); - } if (getCodePointCount() == 0) { return false; } @@ -1823,9 +1828,6 @@ @ExposedMethod(doc = BuiltinDocs.unicode_isspace_doc) final boolean unicode_isspace() { - if (isBasicPlane()) { - return str_isspace(); - } if (getCodePointCount() == 0) { return false; } -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Fri Sep 11 00:58:40 2015 From: jython-checkins at python.org (jeff.allen) Date: Thu, 10 Sep 2015 22:58:40 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_str_character_operations_be?= =?utf-8?q?come_ASCII=2E_Completes_fix_for_=232364=2E?= Message-ID: <20150910225840.101484.85674@psf.io> https://hg.python.org/jython/rev/a77dad1d7050 changeset: 7729:a77dad1d7050 user: Jeff Allen date: Thu Sep 10 23:14:16 2015 +0100 summary: str character operations become ASCII. Completes fix for #2364. PyString isalpha, islower, isdigit and so on now use character classification methods from BaseBytes, resulting in a pure ASCII interpretation. (Possibly leaves some Unicode-ness in other methods.) Tests are in place for non-byte characters, just in case. files: NEWS | 1 + src/org/python/core/PyString.java | 264 ++++++++--------- 2 files changed, 125 insertions(+), 140 deletions(-) diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -10,6 +10,7 @@ - [ 2158, 2259 ] Fixed behaviour of relative from ... import * - [ 1879 ] -m command now executes scripts from inside a jar file - [ 2058 ] ClasspathPyImporter implements PEP 302 get_data (and others) + - [ 2364 ] bytearray and str: isalpha(), isupper() etc. now match Python 2 Jython 2.7 same as 2.7rc3 diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java --- a/src/org/python/core/PyString.java +++ b/src/org/python/core/PyString.java @@ -6,10 +6,10 @@ import java.math.BigInteger; import java.util.ArrayList; import java.util.Collection; +import java.util.List; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.List; -import java.util.Locale; import org.python.core.buffer.BaseBuffer; import org.python.core.buffer.SimpleStringBuffer; @@ -2662,8 +2662,8 @@ * Return the (lazily) compiled regular expression for a Python complex number. This is used * within the regular expression patterns that define a priori acceptable strings in the complex * constructors. The expression contributes five named capture groups a, b, x, y and j. x and y - * are the two floats encountered, and if j is present, one of them is the imaginary part. - * a and b are the optional parentheses. They must either both be present or both omitted. + * are the two floats encountered, and if j is present, one of them is the imaginary part. a and + * b are the optional parentheses. They must either both be present or both omitted. */ private static synchronized Pattern getComplexPattern() { if (complexPattern == null) { @@ -3602,77 +3602,99 @@ @ExposedMethod(doc = BuiltinDocs.str_islower_doc) final boolean str_islower() { - int n = getString().length(); - - /* Shortcut for single character strings */ + String s = getString(); + int n = s.length(); + if (n == 1) { - return Character.isLowerCase(getString().charAt(0)); + // Special case single character strings. + return _islower(s.charAt(0)); } boolean cased = false; for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - - if (Character.isUpperCase(ch) || Character.isTitleCase(ch)) { + char ch = s.charAt(i); + if (_isupper(ch)) { return false; - } else if (!cased && Character.isLowerCase(ch)) { + } else if (!cased && _islower(ch)) { cased = true; } } return cased; } + private boolean _islower(char ch) { + if (ch < 256) { + return BaseBytes.islower((byte)ch); + } else { + // This is an internal error. Really, the test should be unnecessary. + throw new java.lang.IllegalArgumentException("non-byte character in PyString"); + } + } + public boolean isupper() { return str_isupper(); } @ExposedMethod(doc = BuiltinDocs.str_isupper_doc) final boolean str_isupper() { - int n = getString().length(); - - /* Shortcut for single character strings */ + String s = getString(); + int n = s.length(); + if (n == 1) { - return Character.isUpperCase(getString().charAt(0)); + // Special case single character strings. + return _isupper(s.charAt(0)); } boolean cased = false; for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - - if (Character.isLowerCase(ch) || Character.isTitleCase(ch)) { + char ch = s.charAt(i); + if (_islower(ch)) { return false; - } else if (!cased && Character.isUpperCase(ch)) { + } else if (!cased && _isupper(ch)) { cased = true; } } return cased; } + private boolean _isupper(char ch) { + if (ch < 256) { + return BaseBytes.isupper((byte)ch); + } else { + // This is an internal error. Really, the test should be unnecessary. + throw new java.lang.IllegalArgumentException("non-byte character in PyString"); + } + } + public boolean isalpha() { return str_isalpha(); } @ExposedMethod(doc = BuiltinDocs.str_isalpha_doc) final boolean str_isalpha() { - int n = getString().length(); - - /* Shortcut for single character strings */ + String s = getString(); + int n = s.length(); + if (n == 1) { - return Character.isLetter(getString().charAt(0)); + // Special case single character strings. + return _isalpha(s.charAt(0)); } - if (n == 0) { - return false; - } - for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - - if (!Character.isLetter(ch)) { + if (!_isalpha(s.charAt(i))) { return false; } } - return true; + return n > 0; + } + + private boolean _isalpha(char ch) { + if (ch < 256) { + return BaseBytes.isalpha((byte)ch); + } else { + // This is an internal error. Really, the test should be unnecessary. + throw new java.lang.IllegalArgumentException("non-byte character in PyString"); + } } public boolean isalnum() { @@ -3681,33 +3703,30 @@ @ExposedMethod(doc = BuiltinDocs.str_isalnum_doc) final boolean str_isalnum() { - int n = getString().length(); - - /* Shortcut for single character strings */ + String s = getString(); + int n = s.length(); + if (n == 1) { - return _isalnum(getString().charAt(0)); + // Special case single character strings. + return _isalnum(s.charAt(0)); } - if (n == 0) { - return false; - } - for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - - if (!_isalnum(ch)) { + if (!_isalnum(s.charAt(i))) { return false; } } - return true; + return n > 0; } private boolean _isalnum(char ch) { - // This can ever be entirely compatible with CPython. In CPython - // The type is not used, the numeric property is determined from - // the presense of digit, decimal or numeric fields. These fields - // are not available in exactly the same way in java. - return Character.isLetterOrDigit(ch) || Character.getType(ch) == Character.LETTER_NUMBER; + // This is now entirely compatible with CPython, as long as only bytes are stored. + if (ch < 256) { + return BaseBytes.isalnum((byte)ch); + } else { + // This is an internal error. Really, the test should be unnecessary. + throw new java.lang.IllegalArgumentException("non-byte character in PyString"); + } } public boolean isdecimal() { @@ -3715,59 +3734,44 @@ } @ExposedMethod(doc = BuiltinDocs.unicode_isdecimal_doc) - final boolean str_isdecimal() { - int n = getString().length(); - - /* Shortcut for single character strings */ + final boolean str_isdecimal() { // XXX this ought not to exist in str (in Python 2) + return str_isdigit(); + } + + private boolean _isdecimal(char ch) { + // See the comment in _isalnum. Here it is even worse. + return Character.getType(ch) == Character.DECIMAL_DIGIT_NUMBER; + } + + public boolean isdigit() { + return str_isdigit(); + } + + @ExposedMethod(doc = BuiltinDocs.str_isdigit_doc) + final boolean str_isdigit() { + String s = getString(); + int n = s.length(); + if (n == 1) { - char ch = getString().charAt(0); - return _isdecimal(ch); + // Special case single character strings. + return _isdigit(s.charAt(0)); } - if (n == 0) { - return false; - } - for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - - if (!_isdecimal(ch)) { + if (!_isdigit(s.charAt(i))) { return false; } } - return true; - } - - private boolean _isdecimal(char ch) { - // See the comment in _isalnum. Here it is even worse. - return Character.getType(ch) == Character.DECIMAL_DIGIT_NUMBER; - } - - public boolean isdigit() { - return str_isdigit(); - } - - @ExposedMethod(doc = BuiltinDocs.str_isdigit_doc) - final boolean str_isdigit() { - int n = getString().length(); - - /* Shortcut for single character strings */ - if (n == 1) { - return Character.isDigit(getString().charAt(0)); + return n > 0; + } + + private boolean _isdigit(char ch) { + if (ch < 256) { + return BaseBytes.isdigit((byte)ch); + } else { + // This is an internal error. Really, the test should be unnecessary. + throw new java.lang.IllegalArgumentException("non-byte character in PyString"); } - - if (n == 0) { - return false; - } - - for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - - if (!Character.isDigit(ch)) { - return false; - } - } - return true; } public boolean isnumeric() { @@ -3775,31 +3779,8 @@ } @ExposedMethod(doc = BuiltinDocs.unicode_isnumeric_doc) - final boolean str_isnumeric() { - int n = getString().length(); - - /* Shortcut for single character strings */ - if (n == 1) { - return _isnumeric(getString().charAt(0)); - } - - if (n == 0) { - return false; - } - - for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - if (!_isnumeric(ch)) { - return false; - } - } - return true; - } - - private boolean _isnumeric(char ch) { - int type = Character.getType(ch); - return type == Character.DECIMAL_DIGIT_NUMBER || type == Character.LETTER_NUMBER - || type == Character.OTHER_NUMBER; + final boolean str_isnumeric() { // XXX this ought not to exist in str (in Python 2) + return str_isdigit(); } public boolean istitle() { @@ -3808,26 +3789,25 @@ @ExposedMethod(doc = BuiltinDocs.str_istitle_doc) final boolean str_istitle() { - int n = getString().length(); - - /* Shortcut for single character strings */ + String s = getString(); + int n = s.length(); + if (n == 1) { - return Character.isTitleCase(getString().charAt(0)) - || Character.isUpperCase(getString().charAt(0)); + // Special case single character strings. + return _isupper(s.charAt(0)); } boolean cased = false; boolean previous_is_cased = false; for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - - if (Character.isUpperCase(ch) || Character.isTitleCase(ch)) { + char ch = s.charAt(i); + if (_isupper(ch)) { if (previous_is_cased) { return false; } previous_is_cased = true; cased = true; - } else if (Character.isLowerCase(ch)) { + } else if (_islower(ch)) { if (!previous_is_cased) { return false; } @@ -3846,25 +3826,29 @@ @ExposedMethod(doc = BuiltinDocs.str_isspace_doc) final boolean str_isspace() { - int n = getString().length(); - - /* Shortcut for single character strings */ + String s = getString(); + int n = s.length(); + if (n == 1) { - return Character.isWhitespace(getString().charAt(0)); + // Special case single character strings. + return _isspace(s.charAt(0)); } - if (n == 0) { - return false; - } - for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - - if (!Character.isWhitespace(ch)) { + if (!_isspace(s.charAt(i))) { return false; } } - return true; + return n > 0; + } + + private boolean _isspace(char ch) { + if (ch < 256) { + return BaseBytes.isspace((byte)ch); + } else { + // This is an internal error. Really, the test should be unnecessary. + throw new java.lang.IllegalArgumentException("non-byte character in PyString"); + } } public boolean isunicode() { -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Fri Sep 11 00:58:40 2015 From: jython-checkins at python.org (jeff.allen) Date: Thu, 10 Sep 2015 22:58:40 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_bytearray_character_operati?= =?utf-8?q?ons_become_ASCII=2E_Partly_addresses_=232364=2E?= Message-ID: <20150910225840.11264.28565@psf.io> https://hg.python.org/jython/rev/50082331db8d changeset: 7726:50082331db8d user: Jeff Allen date: Thu Sep 03 22:14:09 2015 +0100 summary: bytearray character operations become ASCII. Partly addresses #2364. Adds isalpha, islower, isdigit and so on to BaseBytes and uses them in place of Unicode operations. bytearray now conforms to clarified Python language specification and CPython behaviour. Javadoc and regression tests amended. (Corresponding bytes/str change pending.) files: Lib/test/test_bytes.py | 18 +- Lib/test/test_bytes_jy.py | 63 ++++ src/org/python/core/BaseBytes.java | 261 ++++++++-------- 3 files changed, 190 insertions(+), 152 deletions(-) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -357,11 +357,7 @@ self.assertEqual(b.split(None, 2), [b'arf', b'barf']) for b in (b'a\x1Cb', b'a\x1Db', b'a\x1Eb', b'a\x1Fb'): b = self.type2test(b) - if not test.test_support.is_jython: - self.assertEqual(b.split(), [b]) - else: - # \x1c .. \x1f are whitespace Jython (which follows Java) - self.assertEqual(b.split(), [b'a', b'b']) + self.assertEqual(b.split(), [b]) self.assertEqual(self.type2test(b' a bb c ').split(None, 0), [b'a bb c ']) self.assertEqual(self.type2test(b' a bb c ').split(None, 1), [b'a', b'bb c ']) self.assertEqual(self.type2test(b' a bb c ').split(None, 2), [b'a', b'bb', b'c ']) @@ -372,11 +368,7 @@ def test_split_unicodewhitespace(self): b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F") - if not test.test_support.is_jython: - self.assertEqual(b.split(), [b'\x1c\x1d\x1e\x1f']) - else: - # \x1c .. \x1f are whitespace Jython - self.assertEqual(b.split(), []) + self.assertEqual(b.split(), [b'\x1c\x1d\x1e\x1f']) def test_rsplit(self): b = self.type2test(b'mississippi') @@ -401,11 +393,7 @@ def test_rsplit_unicodewhitespace(self): b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F") - if not test.test_support.is_jython: - self.assertEqual(b.rsplit(), [b'\x1c\x1d\x1e\x1f']) - else: - # \x1c .. \x1f are whitespace Jython - self.assertEqual(b.rsplit(), []) + self.assertEqual(b.rsplit(), [b'\x1c\x1d\x1e\x1f']) def test_partition(self): b = self.type2test(b'mississippi') diff --git a/Lib/test/test_bytes_jy.py b/Lib/test/test_bytes_jy.py --- a/Lib/test/test_bytes_jy.py +++ b/Lib/test/test_bytes_jy.py @@ -51,6 +51,69 @@ for n in range(-1, 3) : irepeat_export(b'', n) + # The following test_is* tests supplement string_tests for non-ascii examples. + # The principle is to choose some character codes that are letters, digits + # or spaces in Unicode but not in ASCII and check they are *not* categorised + # as such in a byte context. + + def checkequal(self, expected, obj, methodname, *args): + "check that object.method() returns expected result" + for B in (bytearray,): # (bytes, bytearray): + obj = B(obj) + realresult = getattr(obj, methodname)() + grumble = "%r.%s() returned %s" % (obj, methodname, realresult) + self.assertIs(expected, realresult, grumble) + # print grumble, 'x' if realresult != expected else '.' + + LOWER = b'\xe0\xe7\xe9\xff' # Uppercase in Latin-1 but not ascii + UPPER = b'\xc0\xc7\xc9\xdd' # Lowercase in Latin-1 but not ascii + DIGIT = b'\xb9\xb2\xb3' # sup 1, 2, 3: numeric in Python (not Java) + SPACE = b'\x85\xa0' # NEXT LINE, NBSP: space in Python (not Java) + + def test_isalpha(self): + for c in self.UPPER + self.LOWER: + self.checkequal(False, c, 'isalpha') + self.checkequal(False, b'a' + c + b'Z', 'isalpha') + + def test_isdigit(self): + for c in self.DIGIT: + self.checkequal(False, c, 'isdigit') + self.checkequal(False, b'1' + c + b'3', 'isdigit') + + def test_islower(self): + for c in self.LOWER: + self.checkequal(False, c, 'islower') + for c in self.UPPER: + self.checkequal(True, b'a' + c + b'z', 'islower') + + def test_isupper(self): + for c in self.UPPER: + self.checkequal(False, c, 'isupper') + for c in self.LOWER: + self.checkequal(True, b'A' + c + b'Z', 'isupper') + + def test_isspace(self): + for c in self.SPACE: + self.checkequal(False, c, 'isspace') + self.checkequal(False, b'\t' + c + b' ', 'isspace') + + def test_isalnum(self): + for c in self.UPPER + self.LOWER + self.DIGIT: + self.checkequal(False, c, 'isalnum') + self.checkequal(False, b'a' + c + b'3', 'isalnum') + + def test_istitle(self): + for c in self.UPPER: + # c should be an un-cased character (effectively a space) + self.checkequal(False, c, 'istitle') + self.checkequal(True, b'A' + c + b'Titlecased Line', 'istitle') + self.checkequal(True, b'A' + c + b' Titlecased Line', 'istitle') + self.checkequal(True, b'A ' + c + b'Titlecased Line', 'istitle') + for c in self.LOWER: + # c should be an un-cased character (effectively a space) + self.checkequal(True, b'A' + c + b'Titlecased Line', 'istitle') + self.checkequal(True, b'A ' + c + b' Titlecased Line', 'istitle') + def test_main(): test.test_support.run_unittest( diff --git a/src/org/python/core/BaseBytes.java b/src/org/python/core/BaseBytes.java --- a/src/org/python/core/BaseBytes.java +++ b/src/org/python/core/BaseBytes.java @@ -1737,9 +1737,9 @@ */ protected int lstripIndex() { int limit = offset + size; - // Run up the storage until non-whitespace (or hit end)t + // Run up the storage until non-whitespace (or hit end) for (int left = offset; left < limit; left++) { - if (!Character.isWhitespace(storage[left] & 0xff)) { + if (!isspace(storage[left])) { return left - offset; } } @@ -1777,7 +1777,7 @@ protected int rstripIndex() { // Run down the storage until next is non-whitespace (or hit start) for (int right = offset + size; right > offset; --right) { - if (!Character.isWhitespace(storage[right - 1] & 0xff)) { + if (!isspace(storage[right - 1])) { return right - offset; } } @@ -2604,7 +2604,7 @@ // Scan backwards over trailing whitespace for (q = offset + size; q > offset; --q) { - if (!Character.isWhitespace(storage[q - 1] & 0xff)) { + if (!isspace(storage[q - 1])) { break; } } @@ -2617,7 +2617,7 @@ // Delimit the word whose last byte is storage[q-1] // Skip p backwards over the non-whitespace for (p = q; p > offset; --p) { - if (Character.isWhitespace(storage[p - 1] & 0xff)) { + if (isspace(storage[p - 1])) { break; } } @@ -2626,7 +2626,7 @@ result.add(0, word); // Skip q backwards over the whitespace for (q = p; q > offset; --q) { - if (!Character.isWhitespace(storage[q - 1] & 0xff)) { + if (!isspace(storage[q - 1])) { break; } } @@ -2795,7 +2795,7 @@ int p, q; // Indexes of unsplit text and whitespace // Scan over leading whitespace - for (p = offset; p < limit && Character.isWhitespace(storage[p] & 0xff); p++) { + for (p = offset; p < limit && isspace(storage[p]); p++) { ; // continue } @@ -2807,13 +2807,13 @@ // Delimit a word at p // storage[p] is not whitespace or at the limit: it is the start of a word // Skip q over the non-whitespace at p - for (q = p; q < limit && !Character.isWhitespace(storage[q] & 0xff); q++) { + for (q = p; q < limit && !isspace(storage[q]); q++) { ; // continue } // storage[q] is whitespace or it is at the limit result.append(getslice(p - offset, q - offset)); // Skip p over the whitespace at q - for (p = q; p < limit && Character.isWhitespace(storage[p] & 0xff); p++) { + for (p = q; p < limit && isspace(storage[p]); p++) { ; // continue } } @@ -3089,11 +3089,44 @@ // Character class operations // + /** @return 'A'<= b <='Z'. */ + static final boolean isupper(int b) { + return ((b - 'A') & 0xff) < 26; + } + + /** @return 'a'<= b <='z'. */ + static final boolean islower(int b) { + return ((b - 'a') & 0xff) < 26; + } + + /** @return 'A'<= b <='Z' or 'a'<= b <='z'. */ + static final boolean isalpha(int b) { + return (((b | 0x20) - 'a') & 0xff) < 26; // b|0x20 maps A to a, Z to z, etc. + } + + /** @return '0'<= b <='9'. */ + static final boolean isdigit(int b) { + return ((b - '0') & 0xff) < 10; + } + + /** @return 'A'<= b <='Z' or 'a'<= b <='z' or '0'<= b <='9'. */ + static final boolean isalnum(int b) { + return isalpha(b) || isdigit(b); + } + + /** @return b in ' \t\n\v\f\r' */ + static final boolean isspace(int b) { + return b == ' ' || ((b - '\t') & 0xff) < 5; + } + + /** Bit to twiddle (XOR) for lowercase letter to uppercase and vice-versa */ + private final int SWAP_CASE = 0x20; + /** - * Java API equivalent of Python isalnum(). This method treats the bytes as Unicode - * pont codes and is consistent with Java's {@link Character#isLetterOrDigit(char)}. + * Java API equivalent of Python isalnum(). This method treats the bytes as + * US-ASCII code points. * - * @return true if all bytes in the array are point codes for alphanumerics and there is at + * @return true if all bytes in the array are code points for alphanumerics and there is at * least one byte, false otherwise. */ public boolean isalnum() { @@ -3103,27 +3136,20 @@ /** * Ready-to-expose implementation of Python isalnum(). * - * @return true if all bytes in the array are point codes for alphanumerics and there is at + * @return true if all bytes in the array are code points for alphanumerics and there is at * least one byte, false otherwise. */ final boolean basebytes_isalnum() { - if (size <= 0) { - // Treat empty string as special case - return false; - } else { - // Test the bytes - for (int i = 0; i < size; i++) { - if (!Character.isLetterOrDigit(charAt(i))) { - return false; - } - } - return true; - } + int i; + // Work backwards through the bytes, stopping early if the test is false. + for (i = size - 1; i >= 0 && isalnum(storage[offset + i]); --i) {} + // Result is true if we reached the beginning (and there were some bytes) + return i < 0 && size > 0; } /** - * Java API equivalent of Python isalpha(). This method treats the bytes as Unicode - * pont codes and is consistent with Java's {@link Character#isLetter(char)}. + * Java API equivalent of Python isalpha(). This method treats the bytes as + * US-ASCII code points. * * @return true if all bytes in the array are alphabetic and there is at least one byte, false * otherwise @@ -3139,25 +3165,18 @@ * otherwise */ final boolean basebytes_isalpha() { - if (size <= 0) { - // Treat empty string as special case - return false; - } else { - // Test the bytes - for (int i = 0; i < size; i++) { - if (!Character.isLetter(charAt(i))) { - return false; - } - } - return true; - } + int i; + // Work backwards through the bytes, stopping early if the test is false. + for (i = size - 1; i >= 0 && isalpha(storage[offset + i]); --i) {} + // Result is true if we reached the beginning (and there were some bytes) + return i < 0 && size > 0; } /** - * Java API equivalent of Python isdigit(). This method treats the bytes as Unicode - * pont codes and is consistent with Java's {@link Character#isDigit(char)}. + * Java API equivalent of Python isdigit(). This method treats the bytes as + * US-ASCII code points. * - * @return true if all bytes in the array are point codes for digits and there is at least one + * @return true if all bytes in the array are code points for digits and there is at least one * byte, false otherwise. */ public boolean isdigit() { @@ -3167,29 +3186,22 @@ /** * Ready-to-expose implementation of Python isdigit(). * - * @return true if all bytes in the array are point codes for digits and there is at least one + * @return true if all bytes in the array are code points for digits and there is at least one * byte, false otherwise. */ final boolean basebytes_isdigit() { - if (size <= 0) { - // Treat empty string as special case - return false; - } else { - // Test the bytes - for (int i = 0; i < size; i++) { - if (!Character.isDigit(charAt(i))) { - return false; - } - } - return true; - } + int i; + // Work backwards through the bytes, stopping early if the test is false. + for (i = size - 1; i >= 0 && isdigit(storage[offset + i]); --i) {} + // Result is true if we reached the beginning (and there were some bytes) + return i < 0 && size > 0; } /** - * Java API equivalent of Python islower(). This method treats the bytes as Unicode - * pont codes and is consistent with Java's {@link Character#isLowerCase(char)}. + * Java API equivalent of Python islower(). This method treats the bytes as + * US-ASCII code points. * - * @return true if all cased bytes in the array are point codes for lowercase characters and + * @return true if all cased bytes in the array are code points for lowercase characters and * there is at least one cased byte, false otherwise. */ public boolean islower() { @@ -3199,19 +3211,19 @@ /** * Ready-to-expose implementation of Python islower(). * - * @return true if all cased bytes in the array are point codes for lowercase characters and + * @return true if all cased bytes in the array are code points for lowercase characters and * there is at least one cased byte, false otherwise. */ final boolean basebytes_islower() { boolean hasCased = false; // Test the bytes for (int i = 0; i < size; i++) { - char c = charAt(i); - if (Character.isUpperCase(c)) { + int c = byteAt(i); + if (isupper(c)) { return false; } else if (hasCased) { continue; // Don't need to keep checking for cased characters - } else if (Character.isLowerCase(c)) { + } else if (islower(c)) { hasCased = true; } } @@ -3220,10 +3232,10 @@ } /** - * Java API equivalent of Python isspace(). This method treats the bytes as Unicode - * pont codes and is consistent with Java's {@link Character#isWhitespace(char)}. + * Java API equivalent of Python isspace(). This method treats the bytes as + * US-ASCII code points. * - * @return true if all the bytes in the array are point codes for whitespace characters and + * @return true if all the bytes in the array are code points for whitespace characters and * there is at least one byte, false otherwise. */ public boolean isspace() { @@ -3233,28 +3245,20 @@ /** * Ready-to-expose implementation of Python isspace(). * - * @return true if all the bytes in the array are point codes for whitespace characters and + * @return true if all the bytes in the array are code points for whitespace characters and * there is at least one byte, false otherwise. */ final boolean basebytes_isspace() { - if (size <= 0) { - // Treat empty string as special case - return false; - } else { - // Test the bytes - for (int i = 0; i < size; i++) { - if (!Character.isWhitespace(charAt(i))) { - return false; - } - } - return true; - } + int i; + // Work backwards through the bytes, stopping early if the test is false. + for (i = size - 1; i >= 0 && isspace(storage[offset + i]); --i) {} + // Result is true if we reached the beginning (and there were some bytes) + return i < 0 && size > 0; } /** - * Java API equivalent of Python istitle(). This method treats the bytes as Unicode - * pont codes and is consistent with Java's {@link Character#isUpperCase(char)} and - * {@link Character#isLowerCase(char)}. + * Java API equivalent of Python istitle(). This method treats the bytes as + * US-ASCII code points. * * @return true if the string is a titlecased string and there is at least one cased byte, for * example uppercase characters may only follow uncased bytes and lowercase characters @@ -3279,8 +3283,8 @@ // 2 = in a word (hence have have seen cased character) for (int i = 0; i < size; i++) { - char c = charAt(i); - if (Character.isUpperCase(c)) { + int c = byteAt(i); + if (isupper(c)) { if (state == 2) { // Violation: can't continue a word in upper case return false; @@ -3288,7 +3292,7 @@ // Validly in a word state = 2; } - } else if (Character.isLowerCase(c)) { + } else if (islower(c)) { if (state != 2) { // Violation: can't start a word in lower case return false; @@ -3305,10 +3309,10 @@ } /** - * Java API equivalent of Python isupper(). This method treats the bytes as Unicode - * pont codes and is consistent with Java's {@link Character#isUpperCase(char)}. + * Java API equivalent of Python isupper(). This method treats the bytes as + * US-ASCII code points. * - * @return true if all cased bytes in the array are point codes for uppercase characters and + * @return true if all cased bytes in the array are code points for uppercase characters and * there is at least one cased byte, false otherwise. */ public boolean isupper() { @@ -3318,19 +3322,19 @@ /** * Ready-to-expose implementation of Python isupper(). * - * @return true if all cased bytes in the array are point codes for uppercase characters and + * @return true if all cased bytes in the array are code points for uppercase characters and * there is at least one cased byte, false otherwise. */ final boolean basebytes_isupper() { boolean hasCased = false; // Test the bytes for (int i = 0; i < size; i++) { - char c = charAt(i); - if (Character.isLowerCase(c)) { + int c = byteAt(i); + if (islower(c)) { return false; } else if (hasCased) { continue; // Don't need to keep checking for cased characters - } else if (Character.isUpperCase(c)) { + } else if (isupper(c)) { hasCased = true; } } @@ -3344,9 +3348,8 @@ /** * Java API equivalent of Python capitalize(). This method treats the bytes as - * Unicode pont codes and is consistent with Java's {@link Character#toUpperCase(char)} and - * {@link Character#toLowerCase(char)}. The BaseBytes returned by this method has - * the same actual type as this/self. + * US-ASCII code points. The BaseBytes returned by this method has the same actual + * type as this/self. * * @return a copy of the array with its first character capitalized and the rest lowercased. */ @@ -3367,18 +3370,18 @@ if (size > 0) { // Treat first character - char c = charAt(0); - if (Character.isLowerCase(c)) { - c = Character.toUpperCase(c); + int c = byteAt(0); + if (islower(c)) { + c ^= SWAP_CASE; // 'a' -> 'A', etc. } // Put the adjusted character in the output as a byte builder.append((byte)c); // Treat the rest for (int i = 1; i < size; i++) { - c = charAt(i); - if (Character.isUpperCase(c)) { - c = Character.toLowerCase(c); + c = byteAt(i); + if (isupper(c)) { + c ^= SWAP_CASE; // 'A' -> 'a', etc. } // Put the adjusted character in the output as a byte builder.append((byte)c); @@ -3389,9 +3392,8 @@ } /** - * Java API equivalent of Python lower(). This method treats the bytes as Unicode - * pont codes and is consistent with Java's {@link Character#toLowerCase(char)}. The - * BaseBytes returned by this method has the same actual type as + * Java API equivalent of Python lower(). This method treats the bytes as US-ASCII + * code points. The BaseBytes returned by this method has the same actual type as * this/self. * * @return a copy of the array with all the cased characters converted to lowercase. @@ -3411,9 +3413,9 @@ Builder builder = getBuilder(size); for (int i = 0; i < size; i++) { - char c = charAt(i); - if (Character.isUpperCase(c)) { - c = Character.toLowerCase(c); + int c = byteAt(i); + if (isupper(c)) { + c ^= SWAP_CASE; // 'A' -> 'a', etc. } // Put the adjusted character in the output as a byte builder.append((byte)c); @@ -3424,9 +3426,8 @@ /** * Java API equivalent of Python swapcase(). This method treats the bytes as - * Unicode pont codes and is consistent with Java's {@link Character#toUpperCase(char)} and - * {@link Character#toLowerCase(char)}. The BaseBytes returned by this method has - * the same actual type as this/self. + * US-ASCII code points. The BaseBytes returned by this method has the same actual + * type as this/self. * * @return a copy of the array with uppercase characters converted to lowercase and vice versa. */ @@ -3445,11 +3446,9 @@ Builder builder = getBuilder(size); for (int i = 0; i < size; i++) { - char c = charAt(i); - if (Character.isUpperCase(c)) { - c = Character.toLowerCase(c); - } else if (Character.isLowerCase(c)) { - c = Character.toUpperCase(c); + int c = byteAt(i); + if (isalpha(c)) { + c ^= SWAP_CASE; // 'a' -> 'A', 'A' -> 'a', etc. } // Put the adjusted character in the output as a byte builder.append((byte)c); @@ -3485,22 +3484,22 @@ boolean inWord = false; // We begin, not in a word (sequence of cased characters) for (int i = 0; i < size; i++) { - char c = charAt(i); + int c = byteAt(i); if (!inWord) { // When we are not in a word ... - if (Character.isLowerCase(c)) { - c = Character.toUpperCase(c); // ... a lowercase letter must be upcased + if (islower(c)) { + c ^= SWAP_CASE; // ... a lowercase letter must be upcased inWord = true; // and it starts a word. - } else if (Character.isUpperCase(c)) { + } else if (isupper(c)) { inWord = true; // ... an uppercase letter just starts the word } } else { // When we are in a word ... - if (Character.isUpperCase(c)) { - c = Character.toLowerCase(c); // ... an uppercase letter must be downcased - } else if (!Character.isLowerCase(c)) { + if (isupper(c)) { + c ^= SWAP_CASE; // ... an uppercase letter must be downcased + } else if (!islower(c)) { inWord = false; // ... and a non-letter ends the word } } @@ -3533,9 +3532,9 @@ Builder builder = getBuilder(size); for (int i = 0; i < size; i++) { - char c = charAt(i); - if (Character.isLowerCase(c)) { - c = Character.toUpperCase(c); + int c = byteAt(i); + if (islower(c)) { + c ^= SWAP_CASE; // 'a' -> 'A' etc. } // Put the adjusted character in the output as a byte builder.append((byte)c); @@ -3575,18 +3574,6 @@ } /** - * Return the Python byte (in range 0 to 255 inclusive) at the given index, interpreted as an - * unsigned point code, without checking the index. - * - * @param index of value in byte array - * @return the char value at the index - * @throws IndexOutOfBoundsException if outside storage array - */ - private final char charAt(int index) throws IndexOutOfBoundsException { - return (char)(0xff & storage[index + offset]); - } - - /** * Helper to implement {@link #repeat(int)}. Use something like: * *
@@ -3638,7 +3625,7 @@
 
     /**
      * Almost ready-to-expose Python __repr__(), based on treating the bytes as point
-     * codes. The value added by this method is conversion of non-printing point codes to
+     * codes. The value added by this method is conversion of non-printing code points to
      * hexadecimal escapes in printable ASCII, and bracketed by the given before and after strings.
      * These are used to get the required presentation:
      *

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Fri Sep 11 00:58:41 2015
From: jython-checkins at python.org (jeff.allen)
Date: Thu, 10 Sep 2015 22:58:41 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Dead_code_removed_from_PyMo?=
	=?utf-8?q?dule=2E?=
Message-ID: <20150910225840.17967.48873@psf.io>

https://hg.python.org/jython/rev/57fb3b499c0d
changeset:   7725:57fb3b499c0d
parent:      7717:b2c98eeaa744
user:        Jeff Allen 
date:        Tue Sep 01 21:34:21 2015 +0100
summary:
  Dead code removed from PyModule.

Discussion in #1423 refers.

files:
  src/org/python/core/PyModule.java |  8 ++------
  1 files changed, 2 insertions(+), 6 deletions(-)


diff --git a/src/org/python/core/PyModule.java b/src/org/python/core/PyModule.java
--- a/src/org/python/core/PyModule.java
+++ b/src/org/python/core/PyModule.java
@@ -106,15 +106,11 @@
         PyObject modules = Py.getSystemState().modules;
         PyObject attr = modules.__finditem__(fullName);
 
-        if (path == Py.None) {
-            // XXX: disabled
-            //attr = imp.loadFromClassLoader(fullName,
-            //                               Py.getSystemState().getClassLoader());
-        } else if (path instanceof PyList) {
+        if (path instanceof PyList) {
             if (attr == null) {
                 attr = imp.find_module(name, fullName, (PyList)path);
             }
-        } else {
+        } else if (path != Py.None) {
             throw Py.TypeError("__path__ must be list or None");
         }
 

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Fri Sep 11 00:58:40 2015
From: jython-checkins at python.org (jeff.allen)
Date: Thu, 10 Sep 2015 22:58:40 +0000
Subject: [Jython-checkins] =?utf-8?q?jython_=28merge_default_-=3E_default?=
	=?utf-8?q?=29=3A_Merge_=232364_fixes?=
Message-ID: <20150910225840.15716.26939@psf.io>

https://hg.python.org/jython/rev/7d55b82d5842
changeset:   7730:7d55b82d5842
parent:      7724:45a9d8f613b9
parent:      7729:a77dad1d7050
user:        Jeff Allen 
date:        Thu Sep 10 23:56:36 2015 +0100
summary:
  Merge #2364 fixes

files:
  Lib/test/test_bytes.py             |   18 +-
  Lib/test/test_bytes_jy.py          |   63 +++
  NEWS                               |    1 +
  src/org/python/core/BaseBytes.java |  343 ++++++++++------
  src/org/python/core/PyModule.java  |    8 +-
  src/org/python/core/PyString.java  |  264 ++++++------
  src/org/python/core/PyUnicode.java |  140 +++---
  7 files changed, 472 insertions(+), 365 deletions(-)


diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -357,11 +357,7 @@
             self.assertEqual(b.split(None, 2), [b'arf', b'barf'])
         for b in (b'a\x1Cb', b'a\x1Db', b'a\x1Eb', b'a\x1Fb'):
             b = self.type2test(b)
-            if not test.test_support.is_jython:
-                self.assertEqual(b.split(), [b])
-            else:
-                # \x1c .. \x1f are whitespace Jython (which follows Java)
-                self.assertEqual(b.split(), [b'a', b'b'])
+            self.assertEqual(b.split(), [b])
         self.assertEqual(self.type2test(b'  a  bb  c  ').split(None, 0), [b'a  bb  c  '])
         self.assertEqual(self.type2test(b'  a  bb  c  ').split(None, 1), [b'a', b'bb  c  '])
         self.assertEqual(self.type2test(b'  a  bb  c  ').split(None, 2), [b'a', b'bb', b'c  '])
@@ -372,11 +368,7 @@
 
     def test_split_unicodewhitespace(self):
         b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F")
-        if not test.test_support.is_jython:
-            self.assertEqual(b.split(), [b'\x1c\x1d\x1e\x1f'])
-        else:
-            # \x1c .. \x1f are whitespace Jython
-            self.assertEqual(b.split(), [])
+        self.assertEqual(b.split(), [b'\x1c\x1d\x1e\x1f'])
 
     def test_rsplit(self):
         b = self.type2test(b'mississippi')
@@ -401,11 +393,7 @@
 
     def test_rsplit_unicodewhitespace(self):
         b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F")
-        if not test.test_support.is_jython:
-            self.assertEqual(b.rsplit(), [b'\x1c\x1d\x1e\x1f'])
-        else:
-            # \x1c .. \x1f are whitespace Jython
-            self.assertEqual(b.rsplit(), [])
+        self.assertEqual(b.rsplit(), [b'\x1c\x1d\x1e\x1f'])
 
     def test_partition(self):
         b = self.type2test(b'mississippi')
diff --git a/Lib/test/test_bytes_jy.py b/Lib/test/test_bytes_jy.py
--- a/Lib/test/test_bytes_jy.py
+++ b/Lib/test/test_bytes_jy.py
@@ -51,6 +51,69 @@
         for n in range(-1, 3) :
             irepeat_export(b'', n)
 
+    # The following test_is* tests supplement string_tests for non-ascii examples.
+    # The principle is to choose some character codes that are letters, digits
+    # or spaces in Unicode but not in ASCII and check they are *not* categorised
+    # as such in a byte context.
+
+    def checkequal(self, expected, obj, methodname, *args):
+        "check that object.method() returns expected result"
+        for B in (bytearray,): # (bytes, bytearray):
+            obj = B(obj)
+            realresult = getattr(obj, methodname)()
+            grumble = "%r.%s() returned %s" % (obj, methodname, realresult)
+            self.assertIs(expected, realresult, grumble)
+            # print grumble, 'x' if realresult != expected else '.'
+
+    LOWER = b'\xe0\xe7\xe9\xff' # Uppercase in Latin-1 but not ascii
+    UPPER = b'\xc0\xc7\xc9\xdd' # Lowercase in Latin-1 but not ascii
+    DIGIT = b'\xb9\xb2\xb3'     # sup 1, 2, 3: numeric in Python (not Java)
+    SPACE = b'\x85\xa0'         # NEXT LINE, NBSP: space in Python (not Java)
+
+    def test_isalpha(self):
+        for c in self.UPPER + self.LOWER:
+            self.checkequal(False, c, 'isalpha')
+            self.checkequal(False, b'a' + c + b'Z', 'isalpha')
+
+    def test_isdigit(self):
+        for c in self.DIGIT:
+            self.checkequal(False, c, 'isdigit')
+            self.checkequal(False, b'1' + c + b'3', 'isdigit')
+
+    def test_islower(self):
+        for c in self.LOWER:
+            self.checkequal(False, c, 'islower')
+        for c in self.UPPER:
+            self.checkequal(True, b'a' + c + b'z', 'islower')
+
+    def test_isupper(self):
+        for c in self.UPPER:
+            self.checkequal(False, c, 'isupper')
+        for c in self.LOWER:
+            self.checkequal(True, b'A' + c + b'Z', 'isupper')
+
+    def test_isspace(self):
+        for c in self.SPACE:
+            self.checkequal(False, c, 'isspace')
+            self.checkequal(False, b'\t' + c + b' ', 'isspace')
+
+    def test_isalnum(self):
+        for c in self.UPPER + self.LOWER + self.DIGIT:
+            self.checkequal(False, c, 'isalnum')
+            self.checkequal(False, b'a' + c + b'3', 'isalnum')
+
+    def test_istitle(self):
+        for c in self.UPPER:
+            # c should be an un-cased character (effectively a space)
+            self.checkequal(False, c, 'istitle')
+            self.checkequal(True, b'A' + c + b'Titlecased Line', 'istitle')
+            self.checkequal(True, b'A' + c + b' Titlecased Line', 'istitle')
+            self.checkequal(True, b'A ' + c + b'Titlecased Line', 'istitle')
+        for c in self.LOWER:
+            # c should be an un-cased character (effectively a space)
+            self.checkequal(True, b'A' + c + b'Titlecased Line', 'istitle')
+            self.checkequal(True, b'A ' + c + b' Titlecased Line', 'istitle')
+
 
 def test_main():
     test.test_support.run_unittest(
diff --git a/NEWS b/NEWS
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,7 @@
    - [ 2158, 2259 ] Fixed behaviour of relative from ... import *
    - [ 1879 ] -m command now executes scripts from inside a jar file 
    - [ 2058 ] ClasspathPyImporter implements PEP 302 get_data (and others)
+   - [ 2364 ] bytearray and str: isalpha(), isupper() etc. now match Python 2
 
 Jython 2.7
   same as 2.7rc3
diff --git a/src/org/python/core/BaseBytes.java b/src/org/python/core/BaseBytes.java
--- a/src/org/python/core/BaseBytes.java
+++ b/src/org/python/core/BaseBytes.java
@@ -1737,9 +1737,9 @@
      */
     protected int lstripIndex() {
         int limit = offset + size;
-        // Run up the storage until non-whitespace (or hit end)t
+        // Run up the storage until non-whitespace (or hit end)
         for (int left = offset; left < limit; left++) {
-            if (!Character.isWhitespace(storage[left] & 0xff)) {
+            if (!isspace(storage[left])) {
                 return left - offset;
             }
         }
@@ -1777,7 +1777,7 @@
     protected int rstripIndex() {
         // Run down the storage until next is non-whitespace (or hit start)
         for (int right = offset + size; right > offset; --right) {
-            if (!Character.isWhitespace(storage[right - 1] & 0xff)) {
+            if (!isspace(storage[right - 1])) {
                 return right - offset;
             }
         }
@@ -2604,7 +2604,7 @@
 
         // Scan backwards over trailing whitespace
         for (q = offset + size; q > offset; --q) {
-            if (!Character.isWhitespace(storage[q - 1] & 0xff)) {
+            if (!isspace(storage[q - 1])) {
                 break;
             }
         }
@@ -2617,7 +2617,7 @@
             // Delimit the word whose last byte is storage[q-1]
             // Skip p backwards over the non-whitespace
             for (p = q; p > offset; --p) {
-                if (Character.isWhitespace(storage[p - 1] & 0xff)) {
+                if (isspace(storage[p - 1])) {
                     break;
                 }
             }
@@ -2626,7 +2626,7 @@
             result.add(0, word);
             // Skip q backwards over the whitespace
             for (q = p; q > offset; --q) {
-                if (!Character.isWhitespace(storage[q - 1] & 0xff)) {
+                if (!isspace(storage[q - 1])) {
                     break;
                 }
             }
@@ -2795,7 +2795,7 @@
         int p, q; // Indexes of unsplit text and whitespace
 
         // Scan over leading whitespace
-        for (p = offset; p < limit && Character.isWhitespace(storage[p] & 0xff); p++) {
+        for (p = offset; p < limit && isspace(storage[p]); p++) {
             ; // continue
         }
 
@@ -2807,13 +2807,13 @@
             // Delimit a word at p
             // storage[p] is not whitespace or at the limit: it is the start of a word
             // Skip q over the non-whitespace at p
-            for (q = p; q < limit && !Character.isWhitespace(storage[q] & 0xff); q++) {
+            for (q = p; q < limit && !isspace(storage[q]); q++) {
                 ; // continue
             }
             // storage[q] is whitespace or it is at the limit
             result.append(getslice(p - offset, q - offset));
             // Skip p over the whitespace at q
-            for (p = q; p < limit && Character.isWhitespace(storage[p] & 0xff); p++) {
+            for (p = q; p < limit && isspace(storage[p]); p++) {
                 ; // continue
             }
         }
@@ -3089,11 +3089,68 @@
     // Character class operations
     //
 
+
+    // Bit to twiddle (XOR) for lowercase letter to uppercase and vice-versa.
+    private static final int SWAP_CASE = 0x20;
+
+    // Bit masks and sets to use with the byte classification table
+    private static final byte UPPER = 0b1;
+    private static final byte LOWER = 0b10;
+    private static final byte DIGIT = 0b100;
+    private static final byte SPACE = 0b1000;
+    private static final byte ALPHA = UPPER | LOWER;
+    private static final byte ALNUM = ALPHA | DIGIT;
+
+    // Character (byte) classification table.
+    private static final byte[] ctype = new byte[256];
+    static {
+        for (int c = 'A'; c <= 'Z'; c++) {
+            ctype[0x80 + c] = UPPER;
+            ctype[0x80 + SWAP_CASE + c] = LOWER;
+        }
+        for (int c = '0'; c <= '9'; c++) {
+            ctype[0x80 + c] = DIGIT;
+        }
+        for (char c : " \t\n\u000b\f\r".toCharArray()) {
+            ctype[0x80 + c] = SPACE;
+        }
+    }
+
+    /** @return 'A'<= b <='Z'. */
+    static final boolean isupper(byte b) {
+        return (ctype[0x80 + b] & UPPER) != 0;
+    }
+
+    /** @return 'a'<= b <='z'. */
+    static final boolean islower(byte b) {
+        return (ctype[0x80 + b] & LOWER) != 0;
+    }
+
+    /** @return 'A'<= b <='Z' or 'a'<= b <='z'. */
+    static final boolean isalpha(byte b) {
+        return (ctype[0x80 + b] & ALPHA) != 0;
+    }
+
+    /** @return '0'<= b <='9'. */
+    static final boolean isdigit(byte b) {
+        return (ctype[0x80 + b] & DIGIT) != 0;
+    }
+
+    /** @return 'A'<= b <='Z' or 'a'<= b <='z' or '0'<= b <='9'. */
+    static final boolean isalnum(byte b) {
+        return (ctype[0x80 + b] & ALNUM) != 0;
+    }
+
+    /** @return b in ' \t\n\v\f\r' */
+    static final boolean isspace(byte b) {
+        return (ctype[0x80 + b] & SPACE) != 0;
+    }
+
     /**
-     * Java API equivalent of Python isalnum(). This method treats the bytes as Unicode
-     * pont codes and is consistent with Java's {@link Character#isLetterOrDigit(char)}.
+     * Java API equivalent of Python isalnum(). This method treats the bytes as
+     * US-ASCII code points.
      *
-     * @return true if all bytes in the array are point codes for alphanumerics and there is at
+     * @return true if all bytes in the array are code points for alphanumerics and there is at
      *         least one byte, false otherwise.
      */
     public boolean isalnum() {
@@ -3103,27 +3160,28 @@
     /**
      * Ready-to-expose implementation of Python isalnum().
      *
-     * @return true if all bytes in the array are point codes for alphanumerics and there is at
+     * @return true if all bytes in the array are code points for alphanumerics and there is at
      *         least one byte, false otherwise.
      */
     final boolean basebytes_isalnum() {
-        if (size <= 0) {
-            // Treat empty string as special case
-            return false;
+        if (size == 1) {
+            // Special case strings of length one (i.e. characters)
+            return isalnum(storage[offset]);
         } else {
-            // Test the bytes
+            // Work through the bytes, stopping early if the test is false.
             for (int i = 0; i < size; i++) {
-                if (!Character.isLetterOrDigit(charAt(i))) {
+                if (!isalnum(storage[offset + i])) {
                     return false;
                 }
             }
-            return true;
+            // Result is true if we reached the end (and there were some bytes)
+            return size > 0;
         }
     }
 
     /**
-     * Java API equivalent of Python isalpha(). This method treats the bytes as Unicode
-     * pont codes and is consistent with Java's {@link Character#isLetter(char)}.
+     * Java API equivalent of Python isalpha(). This method treats the bytes as
+     * US-ASCII code points.
      *
      * @return true if all bytes in the array are alphabetic and there is at least one byte, false
      *         otherwise
@@ -3139,25 +3197,26 @@
      *         otherwise
      */
     final boolean basebytes_isalpha() {
-        if (size <= 0) {
-            // Treat empty string as special case
-            return false;
+        if (size == 1) {
+            // Special case strings of length one (i.e. characters)
+            return isalpha(storage[offset]);
         } else {
-            // Test the bytes
+            // Work through the bytes, stopping early if the test is false.
             for (int i = 0; i < size; i++) {
-                if (!Character.isLetter(charAt(i))) {
+                if (!isalpha(storage[offset + i])) {
                     return false;
                 }
             }
-            return true;
+            // Result is true if we reached the end (and there were some bytes)
+            return size > 0;
         }
     }
 
     /**
-     * Java API equivalent of Python isdigit(). This method treats the bytes as Unicode
-     * pont codes and is consistent with Java's {@link Character#isDigit(char)}.
+     * Java API equivalent of Python isdigit(). This method treats the bytes as
+     * US-ASCII code points.
      *
-     * @return true if all bytes in the array are point codes for digits and there is at least one
+     * @return true if all bytes in the array are code points for digits and there is at least one
      *         byte, false otherwise.
      */
     public boolean isdigit() {
@@ -3167,29 +3226,30 @@
     /**
      * Ready-to-expose implementation of Python isdigit().
      *
-     * @return true if all bytes in the array are point codes for digits and there is at least one
+     * @return true if all bytes in the array are code points for digits and there is at least one
      *         byte, false otherwise.
      */
     final boolean basebytes_isdigit() {
-        if (size <= 0) {
-            // Treat empty string as special case
-            return false;
+        if (size == 1) {
+            // Special case strings of length one (i.e. characters)
+            return isdigit(storage[offset]);
         } else {
-            // Test the bytes
+            // Work through the bytes, stopping early if the test is false.
             for (int i = 0; i < size; i++) {
-                if (!Character.isDigit(charAt(i))) {
+                if (!isdigit(storage[offset + i])) {
                     return false;
                 }
             }
-            return true;
+            // Result is true if we reached the end (and there were some bytes)
+            return size > 0;
         }
     }
 
     /**
-     * Java API equivalent of Python islower(). This method treats the bytes as Unicode
-     * pont codes and is consistent with Java's {@link Character#isLowerCase(char)}.
+     * Java API equivalent of Python islower(). This method treats the bytes as
+     * US-ASCII code points.
      *
-     * @return true if all cased bytes in the array are point codes for lowercase characters and
+     * @return true if all cased bytes in the array are code points for lowercase characters and
      *         there is at least one cased byte, false otherwise.
      */
     public boolean islower() {
@@ -3199,31 +3259,46 @@
     /**
      * Ready-to-expose implementation of Python islower().
      *
-     * @return true if all cased bytes in the array are point codes for lowercase characters and
+     * @return true if all cased bytes in the array are code points for lowercase characters and
      *         there is at least one cased byte, false otherwise.
      */
     final boolean basebytes_islower() {
-        boolean hasCased = false;
-        // Test the bytes
-        for (int i = 0; i < size; i++) {
-            char c = charAt(i);
-            if (Character.isUpperCase(c)) {
+        if (size == 1) {
+            // Special case strings of length one (i.e. characters)
+            return islower(storage[offset]);
+
+        } else {
+            int i;
+            byte c = 0;
+            // Test the bytes until a cased byte is encountered
+            for (i = 0; i < size; i++) {
+                if (isalpha(c = storage[offset + i])) {
+                    break;
+                }
+            }
+
+            if (i == size || isupper(c)) {
+                // We reached the end without finding a cased byte, or it was upper case.
                 return false;
-            } else if (hasCased) {
-                continue;   // Don't need to keep checking for cased characters
-            } else if (Character.isLowerCase(c)) {
-                hasCased = true;
             }
+
+            // Continue to end or until an upper case byte is encountered
+            for (i = i + 1; i < size; i++) {
+                if (isupper(storage[offset + i])) {
+                    return false;
+                }
+            }
+
+            // Found no upper case bytes, and at least one lower case byte.
+            return true;
         }
-        // Found no upper case bytes, but did we find any cased bytes at all?
-        return hasCased;
     }
 
     /**
-     * Java API equivalent of Python isspace(). This method treats the bytes as Unicode
-     * pont codes and is consistent with Java's {@link Character#isWhitespace(char)}.
+     * Java API equivalent of Python isspace(). This method treats the bytes as
+     * US-ASCII code points.
      *
-     * @return true if all the bytes in the array are point codes for whitespace characters and
+     * @return true if all the bytes in the array are code points for whitespace characters and
      *         there is at least one byte, false otherwise.
      */
     public boolean isspace() {
@@ -3233,28 +3308,28 @@
     /**
      * Ready-to-expose implementation of Python isspace().
      *
-     * @return true if all the bytes in the array are point codes for whitespace characters and
+     * @return true if all the bytes in the array are code points for whitespace characters and
      *         there is at least one byte, false otherwise.
      */
     final boolean basebytes_isspace() {
-        if (size <= 0) {
-            // Treat empty string as special case
-            return false;
+        if (size == 1) {
+            // Special case strings of length one (i.e. characters)
+            return isspace(storage[offset]);
         } else {
-            // Test the bytes
+            // Work through the bytes, stopping early if the test is false.
             for (int i = 0; i < size; i++) {
-                if (!Character.isWhitespace(charAt(i))) {
+                if (!isspace(storage[offset + i])) {
                     return false;
                 }
             }
-            return true;
+            // Result is true if we reached the end (and there were some bytes)
+            return size > 0;
         }
     }
 
     /**
-     * Java API equivalent of Python istitle(). This method treats the bytes as Unicode
-     * pont codes and is consistent with Java's {@link Character#isUpperCase(char)} and
-     * {@link Character#isLowerCase(char)}.
+     * Java API equivalent of Python istitle(). This method treats the bytes as
+     * US-ASCII code points.
      *
      * @return true if the string is a titlecased string and there is at least one cased byte, for
      *         example uppercase characters may only follow uncased bytes and lowercase characters
@@ -3279,8 +3354,8 @@
         // 2 = in a word (hence have have seen cased character)
 
         for (int i = 0; i < size; i++) {
-            char c = charAt(i);
-            if (Character.isUpperCase(c)) {
+            byte c = storage[offset+i];
+            if (isupper(c)) {
                 if (state == 2) {
                     // Violation: can't continue a word in upper case
                     return false;
@@ -3288,7 +3363,7 @@
                     // Validly in a word
                     state = 2;
                 }
-            } else if (Character.isLowerCase(c)) {
+            } else if (islower(c)) {
                 if (state != 2) {
                     // Violation: can't start a word in lower case
                     return false;
@@ -3305,10 +3380,10 @@
     }
 
     /**
-     * Java API equivalent of Python isupper(). This method treats the bytes as Unicode
-     * pont codes and is consistent with Java's {@link Character#isUpperCase(char)}.
+     * Java API equivalent of Python isupper(). This method treats the bytes as
+     * US-ASCII code points.
      *
-     * @return true if all cased bytes in the array are point codes for uppercase characters and
+     * @return true if all cased bytes in the array are code points for uppercase characters and
      *         there is at least one cased byte, false otherwise.
      */
     public boolean isupper() {
@@ -3318,24 +3393,39 @@
     /**
      * Ready-to-expose implementation of Python isupper().
      *
-     * @return true if all cased bytes in the array are point codes for uppercase characters and
+     * @return true if all cased bytes in the array are code points for uppercase characters and
      *         there is at least one cased byte, false otherwise.
      */
     final boolean basebytes_isupper() {
-        boolean hasCased = false;
-        // Test the bytes
-        for (int i = 0; i < size; i++) {
-            char c = charAt(i);
-            if (Character.isLowerCase(c)) {
+        if (size == 1) {
+            // Special case strings of length one (i.e. characters)
+            return isupper(storage[offset]);
+
+        } else {
+            int i;
+            byte c = 0;
+            // Test the bytes until a cased byte is encountered
+            for (i = 0; i < size; i++) {
+                if (isalpha(c = storage[offset + i])) {
+                    break;
+                }
+            }
+
+            if (i == size || islower(c)) {
+                // We reached the end without finding a cased byte, or it was lower case.
                 return false;
-            } else if (hasCased) {
-                continue;   // Don't need to keep checking for cased characters
-            } else if (Character.isUpperCase(c)) {
-                hasCased = true;
             }
+
+            // Continue to end or until a lower case byte is encountered
+            for (i = i + 1; i < size; i++) {
+                if (islower(storage[offset + i])) {
+                    return false;
+                }
+            }
+
+            // Found no lower case bytes, and at least one upper case byte.
+            return true;
         }
-        // Found no lower case bytes, but did we find any cased bytes at all?
-        return hasCased;
     }
 
     //
@@ -3344,9 +3434,8 @@
 
     /**
      * Java API equivalent of Python capitalize(). This method treats the bytes as
-     * Unicode pont codes and is consistent with Java's {@link Character#toUpperCase(char)} and
-     * {@link Character#toLowerCase(char)}. The BaseBytes returned by this method has
-     * the same actual type as this/self.
+     * US-ASCII code points. The BaseBytes returned by this method has the same actual
+     * type as this/self.
      *
      * @return a copy of the array with its first character capitalized and the rest lowercased.
      */
@@ -3367,21 +3456,21 @@
 
         if (size > 0) {
             // Treat first character
-            char c = charAt(0);
-            if (Character.isLowerCase(c)) {
-                c = Character.toUpperCase(c);
+            byte c = storage[offset];
+            if (islower(c)) {
+                c ^= SWAP_CASE;         // 'a' -> 'A', etc.
             }
             // Put the adjusted character in the output as a byte
-            builder.append((byte)c);
+            builder.append(c);
 
             // Treat the rest
             for (int i = 1; i < size; i++) {
-                c = charAt(i);
-                if (Character.isUpperCase(c)) {
-                    c = Character.toLowerCase(c);
+                c = storage[offset+i];
+                if (isupper(c)) {
+                    c ^= SWAP_CASE;     // 'A' -> 'a', etc.
                 }
                 // Put the adjusted character in the output as a byte
-                builder.append((byte)c);
+                builder.append(c);
             }
         }
 
@@ -3389,9 +3478,8 @@
     }
 
     /**
-     * Java API equivalent of Python lower(). This method treats the bytes as Unicode
-     * pont codes and is consistent with Java's {@link Character#toLowerCase(char)}. The
-     * BaseBytes returned by this method has the same actual type as
+     * Java API equivalent of Python lower(). This method treats the bytes as US-ASCII
+     * code points. The BaseBytes returned by this method has the same actual type as
      * this/self.
      *
      * @return a copy of the array with all the cased characters converted to lowercase.
@@ -3411,12 +3499,12 @@
         Builder builder = getBuilder(size);
 
         for (int i = 0; i < size; i++) {
-            char c = charAt(i);
-            if (Character.isUpperCase(c)) {
-                c = Character.toLowerCase(c);
+            byte c = storage[offset+i];
+            if (isupper(c)) {
+                c ^= SWAP_CASE;     // 'A' -> 'a', etc.
             }
             // Put the adjusted character in the output as a byte
-            builder.append((byte)c);
+            builder.append(c);
         }
 
         return builder.getResult();
@@ -3424,9 +3512,8 @@
 
     /**
      * Java API equivalent of Python swapcase(). This method treats the bytes as
-     * Unicode pont codes and is consistent with Java's {@link Character#toUpperCase(char)} and
-     * {@link Character#toLowerCase(char)}. The BaseBytes returned by this method has
-     * the same actual type as this/self.
+     * US-ASCII code points. The BaseBytes returned by this method has the same actual
+     * type as this/self.
      *
      * @return a copy of the array with uppercase characters converted to lowercase and vice versa.
      */
@@ -3445,14 +3532,12 @@
         Builder builder = getBuilder(size);
 
         for (int i = 0; i < size; i++) {
-            char c = charAt(i);
-            if (Character.isUpperCase(c)) {
-                c = Character.toLowerCase(c);
-            } else if (Character.isLowerCase(c)) {
-                c = Character.toUpperCase(c);
+            byte c = storage[offset+i];
+            if (isalpha(c)) {
+                c ^= SWAP_CASE;     // 'a' -> 'A', 'A' -> 'a', etc.
             }
             // Put the adjusted character in the output as a byte
-            builder.append((byte)c);
+            builder.append(c);
         }
 
         return builder.getResult();
@@ -3485,27 +3570,27 @@
         boolean inWord = false; // We begin, not in a word (sequence of cased characters)
 
         for (int i = 0; i < size; i++) {
-            char c = charAt(i);
+            byte c = storage[offset+i];
 
             if (!inWord) {
                 // When we are not in a word ...
-                if (Character.isLowerCase(c)) {
-                    c = Character.toUpperCase(c);   // ... a lowercase letter must be upcased
+                if (islower(c)) {
+                    c ^= SWAP_CASE;                 // ... a lowercase letter must be upcased
                     inWord = true;                  // and it starts a word.
-                } else if (Character.isUpperCase(c)) {
+                } else if (isupper(c)) {
                     inWord = true;                  // ... an uppercase letter just starts the word
                 }
 
             } else {
                 // When we are in a word ...
-                if (Character.isUpperCase(c)) {
-                    c = Character.toLowerCase(c);   // ... an uppercase letter must be downcased
-                } else if (!Character.isLowerCase(c)) {
+                if (isupper(c)) {
+                    c ^= SWAP_CASE;                 // ... an uppercase letter must be downcased
+                } else if (!islower(c)) {
                     inWord = false;                 // ... and a non-letter ends the word
                 }
             }
             // Put the adjusted character in the output as a byte
-            builder.append((byte)c);
+            builder.append(c);
         }
         return builder.getResult();
     }
@@ -3533,12 +3618,12 @@
         Builder builder = getBuilder(size);
 
         for (int i = 0; i < size; i++) {
-            char c = charAt(i);
-            if (Character.isLowerCase(c)) {
-                c = Character.toUpperCase(c);
+            byte c = storage[offset+i];
+            if (islower(c)) {
+                c ^= SWAP_CASE;     // 'a' -> 'A' etc.
             }
             // Put the adjusted character in the output as a byte
-            builder.append((byte)c);
+            builder.append(c);
         }
 
         return builder.getResult();
@@ -3575,18 +3660,6 @@
     }
 
     /**
-     * Return the Python byte (in range 0 to 255 inclusive) at the given index, interpreted as an
-     * unsigned point code, without checking the index.
-     *
-     * @param index of value in byte array
-     * @return the char value at the index
-     * @throws IndexOutOfBoundsException if outside storage array
-     */
-    private final char charAt(int index) throws IndexOutOfBoundsException {
-        return (char)(0xff & storage[index + offset]);
-    }
-
-    /**
      * Helper to implement {@link #repeat(int)}. Use something like:
      *
      * 
@@ -3638,7 +3711,7 @@
 
     /**
      * Almost ready-to-expose Python __repr__(), based on treating the bytes as point
-     * codes. The value added by this method is conversion of non-printing point codes to
+     * codes. The value added by this method is conversion of non-printing code points to
      * hexadecimal escapes in printable ASCII, and bracketed by the given before and after strings.
      * These are used to get the required presentation:
      *
diff --git a/src/org/python/core/PyModule.java b/src/org/python/core/PyModule.java
--- a/src/org/python/core/PyModule.java
+++ b/src/org/python/core/PyModule.java
@@ -106,15 +106,11 @@
         PyObject modules = Py.getSystemState().modules;
         PyObject attr = modules.__finditem__(fullName);
 
-        if (path == Py.None) {
-            // XXX: disabled
-            //attr = imp.loadFromClassLoader(fullName,
-            //                               Py.getSystemState().getClassLoader());
-        } else if (path instanceof PyList) {
+        if (path instanceof PyList) {
             if (attr == null) {
                 attr = imp.find_module(name, fullName, (PyList)path);
             }
-        } else {
+        } else if (path != Py.None) {
             throw Py.TypeError("__path__ must be list or None");
         }
 
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -6,10 +6,10 @@
 import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
+import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import java.util.List;
-import java.util.Locale;
 
 import org.python.core.buffer.BaseBuffer;
 import org.python.core.buffer.SimpleStringBuffer;
@@ -2662,8 +2662,8 @@
      * Return the (lazily) compiled regular expression for a Python complex number. This is used
      * within the regular expression patterns that define a priori acceptable strings in the complex
      * constructors. The expression contributes five named capture groups a, b, x, y and j. x and y
-     * are the two floats encountered, and if j is present, one of them is the imaginary part.
-     * a and b are the optional parentheses. They must either both be present or both omitted.
+     * are the two floats encountered, and if j is present, one of them is the imaginary part. a and
+     * b are the optional parentheses. They must either both be present or both omitted.
      */
     private static synchronized Pattern getComplexPattern() {
         if (complexPattern == null) {
@@ -3602,77 +3602,99 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_islower_doc)
     final boolean str_islower() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
+        String s = getString();
+        int n = s.length();
+
         if (n == 1) {
-            return Character.isLowerCase(getString().charAt(0));
+            // Special case single character strings.
+            return _islower(s.charAt(0));
         }
 
         boolean cased = false;
         for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-
-            if (Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
+            char ch = s.charAt(i);
+            if (_isupper(ch)) {
                 return false;
-            } else if (!cased && Character.isLowerCase(ch)) {
+            } else if (!cased && _islower(ch)) {
                 cased = true;
             }
         }
         return cased;
     }
 
+    private boolean _islower(char ch) {
+        if (ch < 256) {
+            return BaseBytes.islower((byte)ch);
+        } else {
+            // This is an internal error. Really, the test should be unnecessary.
+            throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+        }
+    }
+
     public boolean isupper() {
         return str_isupper();
     }
 
     @ExposedMethod(doc = BuiltinDocs.str_isupper_doc)
     final boolean str_isupper() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
+        String s = getString();
+        int n = s.length();
+
         if (n == 1) {
-            return Character.isUpperCase(getString().charAt(0));
+            // Special case single character strings.
+            return _isupper(s.charAt(0));
         }
 
         boolean cased = false;
         for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-
-            if (Character.isLowerCase(ch) || Character.isTitleCase(ch)) {
+            char ch = s.charAt(i);
+            if (_islower(ch)) {
                 return false;
-            } else if (!cased && Character.isUpperCase(ch)) {
+            } else if (!cased && _isupper(ch)) {
                 cased = true;
             }
         }
         return cased;
     }
 
+    private boolean _isupper(char ch) {
+        if (ch < 256) {
+            return BaseBytes.isupper((byte)ch);
+        } else {
+            // This is an internal error. Really, the test should be unnecessary.
+            throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+        }
+    }
+
     public boolean isalpha() {
         return str_isalpha();
     }
 
     @ExposedMethod(doc = BuiltinDocs.str_isalpha_doc)
     final boolean str_isalpha() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
+        String s = getString();
+        int n = s.length();
+
         if (n == 1) {
-            return Character.isLetter(getString().charAt(0));
+            // Special case single character strings.
+            return _isalpha(s.charAt(0));
         }
 
-        if (n == 0) {
-            return false;
-        }
-
         for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-
-            if (!Character.isLetter(ch)) {
+            if (!_isalpha(s.charAt(i))) {
                 return false;
             }
         }
-        return true;
+        return n > 0;
+    }
+
+    private boolean _isalpha(char ch) {
+        if (ch < 256) {
+            return BaseBytes.isalpha((byte)ch);
+        } else {
+            // This is an internal error. Really, the test should be unnecessary.
+            throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+        }
     }
 
     public boolean isalnum() {
@@ -3681,33 +3703,30 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_isalnum_doc)
     final boolean str_isalnum() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
+        String s = getString();
+        int n = s.length();
+
         if (n == 1) {
-            return _isalnum(getString().charAt(0));
+            // Special case single character strings.
+            return _isalnum(s.charAt(0));
         }
 
-        if (n == 0) {
-            return false;
-        }
-
         for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-
-            if (!_isalnum(ch)) {
+            if (!_isalnum(s.charAt(i))) {
                 return false;
             }
         }
-        return true;
+        return n > 0;
     }
 
     private boolean _isalnum(char ch) {
-        // This can ever be entirely compatible with CPython. In CPython
-        // The type is not used, the numeric property is determined from
-        // the presense of digit, decimal or numeric fields. These fields
-        // are not available in exactly the same way in java.
-        return Character.isLetterOrDigit(ch) || Character.getType(ch) == Character.LETTER_NUMBER;
+        // This is now entirely compatible with CPython, as long as only bytes are stored.
+        if (ch < 256) {
+            return BaseBytes.isalnum((byte)ch);
+        } else {
+            // This is an internal error. Really, the test should be unnecessary.
+            throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+        }
     }
 
     public boolean isdecimal() {
@@ -3715,59 +3734,44 @@
     }
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isdecimal_doc)
-    final boolean str_isdecimal() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
+    final boolean str_isdecimal() { // XXX this ought not to exist in str (in Python 2)
+        return str_isdigit();
+    }
+
+    private boolean _isdecimal(char ch) {
+        // See the comment in _isalnum. Here it is even worse.
+        return Character.getType(ch) == Character.DECIMAL_DIGIT_NUMBER;
+    }
+
+    public boolean isdigit() {
+        return str_isdigit();
+    }
+
+    @ExposedMethod(doc = BuiltinDocs.str_isdigit_doc)
+    final boolean str_isdigit() {
+        String s = getString();
+        int n = s.length();
+
         if (n == 1) {
-            char ch = getString().charAt(0);
-            return _isdecimal(ch);
+            // Special case single character strings.
+            return _isdigit(s.charAt(0));
         }
 
-        if (n == 0) {
-            return false;
-        }
-
         for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-
-            if (!_isdecimal(ch)) {
+            if (!_isdigit(s.charAt(i))) {
                 return false;
             }
         }
-        return true;
-    }
-
-    private boolean _isdecimal(char ch) {
-        // See the comment in _isalnum. Here it is even worse.
-        return Character.getType(ch) == Character.DECIMAL_DIGIT_NUMBER;
-    }
-
-    public boolean isdigit() {
-        return str_isdigit();
-    }
-
-    @ExposedMethod(doc = BuiltinDocs.str_isdigit_doc)
-    final boolean str_isdigit() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
-        if (n == 1) {
-            return Character.isDigit(getString().charAt(0));
+        return n > 0;
+    }
+
+    private boolean _isdigit(char ch) {
+        if (ch < 256) {
+            return BaseBytes.isdigit((byte)ch);
+        } else {
+            // This is an internal error. Really, the test should be unnecessary.
+            throw new java.lang.IllegalArgumentException("non-byte character in PyString");
         }
-
-        if (n == 0) {
-            return false;
-        }
-
-        for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-
-            if (!Character.isDigit(ch)) {
-                return false;
-            }
-        }
-        return true;
     }
 
     public boolean isnumeric() {
@@ -3775,31 +3779,8 @@
     }
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isnumeric_doc)
-    final boolean str_isnumeric() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
-        if (n == 1) {
-            return _isnumeric(getString().charAt(0));
-        }
-
-        if (n == 0) {
-            return false;
-        }
-
-        for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-            if (!_isnumeric(ch)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    private boolean _isnumeric(char ch) {
-        int type = Character.getType(ch);
-        return type == Character.DECIMAL_DIGIT_NUMBER || type == Character.LETTER_NUMBER
-                || type == Character.OTHER_NUMBER;
+    final boolean str_isnumeric() { // XXX this ought not to exist in str (in Python 2)
+        return str_isdigit();
     }
 
     public boolean istitle() {
@@ -3808,26 +3789,25 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_istitle_doc)
     final boolean str_istitle() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
+        String s = getString();
+        int n = s.length();
+
         if (n == 1) {
-            return Character.isTitleCase(getString().charAt(0))
-                    || Character.isUpperCase(getString().charAt(0));
+            // Special case single character strings.
+            return _isupper(s.charAt(0));
         }
 
         boolean cased = false;
         boolean previous_is_cased = false;
         for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-
-            if (Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
+            char ch = s.charAt(i);
+            if (_isupper(ch)) {
                 if (previous_is_cased) {
                     return false;
                 }
                 previous_is_cased = true;
                 cased = true;
-            } else if (Character.isLowerCase(ch)) {
+            } else if (_islower(ch)) {
                 if (!previous_is_cased) {
                     return false;
                 }
@@ -3846,25 +3826,29 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_isspace_doc)
     final boolean str_isspace() {
-        int n = getString().length();
-
-        /* Shortcut for single character strings */
+        String s = getString();
+        int n = s.length();
+
         if (n == 1) {
-            return Character.isWhitespace(getString().charAt(0));
+            // Special case single character strings.
+            return _isspace(s.charAt(0));
         }
 
-        if (n == 0) {
-            return false;
-        }
-
         for (int i = 0; i < n; i++) {
-            char ch = getString().charAt(i);
-
-            if (!Character.isWhitespace(ch)) {
+            if (!_isspace(s.charAt(i))) {
                 return false;
             }
         }
-        return true;
+        return n > 0;
+    }
+
+    private boolean _isspace(char ch) {
+        if (ch < 256) {
+            return BaseBytes.isspace((byte)ch);
+        } else {
+            // This is an internal error. Really, the test should be unnecessary.
+            throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+        }
     }
 
     public boolean isunicode() {
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -9,6 +9,7 @@
 import java.util.Set;
 
 import com.google.common.base.CharMatcher;
+
 import org.python.core.stringlib.FieldNameIterator;
 import org.python.core.stringlib.MarkupIterator;
 import org.python.expose.ExposedMethod;
@@ -582,7 +583,9 @@
     }
 
     public static String checkEncoding(String s) {
-        if (s == null || CharMatcher.ASCII.matchesAllOf(s)) { return s; }
+        if (s == null || CharMatcher.ASCII.matchesAllOf(s)) {
+            return s;
+        }
         return codecs.PyUnicode_EncodeASCII(s, s.length(), null);
     }
 
@@ -739,19 +742,21 @@
         return Py.makeCharacter(codepoint, true);
     }
 
+    @Override
     public int getInt(int i) {
         return getString().codePointAt(translator.utf16Index(i));
     }
 
-    private class SubsequenceIteratorImpl implements Iterator {
+    /**
+     * An iterator returning code points from this array, for use when not basic plane.
+     */
+    private class SubsequenceIteratorImpl extends SubsequenceIteratorBasic {
 
-        private int current, k, stop, step;
+        private int k; // UTF-16 index (of current)
 
         SubsequenceIteratorImpl(int start, int stop, int step) {
-            current = start;
+            super(start, stop, step);
             k = translator.utf16Index(current);
-            this.stop = stop;
-            this.step = step;
         }
 
         SubsequenceIteratorImpl() {
@@ -759,22 +764,7 @@
         }
 
         @Override
-        public boolean hasNext() {
-            return current < stop;
-        }
-
-        @Override
-        public Object next() {
-            int codePoint = nextCodePoint();
-            current += 1;
-            for (int j = 1; j < step && hasNext(); j++) {
-                nextCodePoint();
-                current += 1;
-            }
-            return codePoint;
-        }
-
-        private int nextCodePoint() {
+        protected int nextCodePoint() {
             int U;
             int W1 = getString().charAt(k);
             if (W1 >= 0xD800 && W1 < 0xDC00) {
@@ -785,8 +775,45 @@
                 U = W1;
                 k += 1;
             }
+            current += 1;
             return U;
         }
+    }
+
+    /**
+     * An iterator returning code points from this array, for use when basic plane.
+     */
+    private class SubsequenceIteratorBasic implements Iterator {
+
+        protected int current, stop, step; // Character indexes
+
+        SubsequenceIteratorBasic(int start, int stop, int step) {
+            current = start;
+            this.stop = stop;
+            this.step = step;
+        }
+
+        SubsequenceIteratorBasic() {
+            this(0, getCodePointCount(), 1);
+        }
+
+        @Override
+        public boolean hasNext() {
+            return current < stop;
+        }
+
+        @Override
+        public Integer next() {
+            int codePoint = nextCodePoint();
+            for (int j = 1; j < step && hasNext(); j++) {
+                nextCodePoint();
+            }
+            return codePoint;
+        }
+
+        protected int nextCodePoint() {
+            return getString().charAt(current++);
+        }
 
         @Override
         public void remove() {
@@ -845,16 +872,31 @@
     }
 
     // XXX: Parameterize SubsequenceIteratorImpl and friends (and make them Iterable)
+    /** Get an iterator over the code point sequence. */
     public Iterator newSubsequenceIterator() {
-        return new SubsequenceIteratorImpl();
+        if (isBasicPlane()) {
+            return new SubsequenceIteratorBasic();
+        } else {
+            return new SubsequenceIteratorImpl();
+        }
     }
 
+    /** Get an iterator over a slice of the code point sequence. */
     public Iterator newSubsequenceIterator(int start, int stop, int step) {
-        if (step < 0) {
-            return new SteppedIterator(step * -1, new ReversedIterator(new SubsequenceIteratorImpl(
-                    stop + 1, start + 1, 1)));
+        if (isBasicPlane()) {
+            if (step < 0) {
+                return new SteppedIterator(step * -1, new ReversedIterator(
+                        new SubsequenceIteratorBasic(stop + 1, start + 1, 1)));
+            } else {
+                return new SubsequenceIteratorBasic(start, stop, step);
+            }
         } else {
-            return new SubsequenceIteratorImpl(start, stop, step);
+            if (step < 0) {
+                return new SteppedIterator(step * -1, new ReversedIterator(
+                        new SubsequenceIteratorImpl(stop + 1, start + 1, 1)));
+            } else {
+                return new SubsequenceIteratorImpl(start, stop, step);
+            }
         }
     }
 
@@ -948,9 +990,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_title_doc)
     final PyObject unicode_title() {
-        if (isBasicPlane()) {
-            return new PyUnicode(str_title());
-        }
         StringBuilder buffer = new StringBuilder(getString().length());
         boolean previous_is_cased = false;
         for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) {
@@ -973,9 +1012,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_swapcase_doc)
     final PyObject unicode_swapcase() {
-        if (isBasicPlane()) {
-            return new PyUnicode(str_swapcase());
-        }
         StringBuilder buffer = new StringBuilder(getString().length());
         for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) {
             int codePoint = iter.next();
@@ -1416,9 +1452,6 @@
 
     @ExposedMethod(defaults = "false", doc = BuiltinDocs.unicode___getslice___doc)
     final PyList unicode_splitlines(boolean keepends) {
-        if (isBasicPlane()) {
-            return str_splitlines(keepends);
-        }
         return new PyList(new LineSplitIterator(keepends));
 
     }
@@ -1582,9 +1615,6 @@
         if (getString().length() == 0) {
             return this;
         }
-        if (isBasicPlane()) {
-            return new PyUnicode(str_capitalize());
-        }
         StringBuilder buffer = new StringBuilder(getString().length());
         boolean first = true;
         for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) {
@@ -1671,13 +1701,8 @@
         return _codecs.translateCharmap(this, "ignore", table);
     }
 
-    // these tests need to be UTF-16 aware because they are character-by-character tests,
-    // so we can only use equivalent str_XXX tests if we are in basic plane
     @ExposedMethod(doc = BuiltinDocs.unicode_islower_doc)
     final boolean unicode_islower() {
-        if (isBasicPlane()) {
-            return str_islower();
-        }
         boolean cased = false;
         for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) {
             int codepoint = iter.next();
@@ -1692,9 +1717,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isupper_doc)
     final boolean unicode_isupper() {
-        if (isBasicPlane()) {
-            return str_isupper();
-        }
         boolean cased = false;
         for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) {
             int codepoint = iter.next();
@@ -1709,9 +1731,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isalpha_doc)
     final boolean unicode_isalpha() {
-        if (isBasicPlane()) {
-            return str_isalpha();
-        }
         if (getCodePointCount() == 0) {
             return false;
         }
@@ -1725,15 +1744,13 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isalnum_doc)
     final boolean unicode_isalnum() {
-        if (isBasicPlane()) {
-            return str_isalnum();
-        }
         if (getCodePointCount() == 0) {
             return false;
         }
         for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) {
             int codePoint = iter.next();
-            if (!(Character.isLetterOrDigit(codePoint) || Character.getType(codePoint) == Character.LETTER_NUMBER)) {
+            if (!(Character.isLetterOrDigit(codePoint) || //
+            Character.getType(codePoint) == Character.LETTER_NUMBER)) {
                 return false;
             }
         }
@@ -1742,9 +1759,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isdecimal_doc)
     final boolean unicode_isdecimal() {
-        if (isBasicPlane()) {
-            return str_isdecimal();
-        }
         if (getCodePointCount() == 0) {
             return false;
         }
@@ -1758,9 +1772,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isdigit_doc)
     final boolean unicode_isdigit() {
-        if (isBasicPlane()) {
-            return str_isdigit();
-        }
         if (getCodePointCount() == 0) {
             return false;
         }
@@ -1774,9 +1785,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isnumeric_doc)
     final boolean unicode_isnumeric() {
-        if (isBasicPlane()) {
-            return str_isnumeric();
-        }
         if (getCodePointCount() == 0) {
             return false;
         }
@@ -1792,9 +1800,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_istitle_doc)
     final boolean unicode_istitle() {
-        if (isBasicPlane()) {
-            return str_istitle();
-        }
         if (getCodePointCount() == 0) {
             return false;
         }
@@ -1823,9 +1828,6 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_isspace_doc)
     final boolean unicode_isspace() {
-        if (isBasicPlane()) {
-            return str_isspace();
-        }
         if (getCodePointCount() == 0) {
             return false;
         }

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Fri Sep 11 03:41:22 2015
From: jython-checkins at python.org (frank.wierzbicki)
Date: Fri, 11 Sep 2015 01:41:22 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Added_tag_v2=2E7=2E1b1_for_?=
	=?utf-8?q?changeset_2f0b46abbe31?=
Message-ID: <20150911014122.68869.81474@psf.io>

https://hg.python.org/jython/rev/f93af43e0baf
changeset:   7733:f93af43e0baf
user:        Frank Wierzbicki 
date:        Fri Sep 11 01:41:03 2015 +0000
summary:
  Added tag v2.7.1b1 for changeset 2f0b46abbe31

files:
  .hgtags |  2 ++
  1 files changed, 2 insertions(+), 0 deletions(-)


diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -95,3 +95,5 @@
 3a7bb3fb3338964c70d3c734294bfc0f6bc37a3a v2.7.0
 77e0e7c87bd15bad165cebdf8ac88dbae623f339 v2.7.0
 7d55b82d5842bcf602e4835b9ddbedf46da83bfa v2.7.1b1
+7d55b82d5842bcf602e4835b9ddbedf46da83bfa v2.7.1b1
+2f0b46abbe31275edc3257f54f8222fd37752d65 v2.7.1b1

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Fri Sep 11 03:41:21 2015
From: jython-checkins at python.org (frank.wierzbicki)
Date: Fri, 11 Sep 2015 01:41:21 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Updates_for_2=2E7=2E1_beta1?=
	=?utf-8?q?_release=2E?=
Message-ID: <20150911014121.14885.17412@psf.io>

https://hg.python.org/jython/rev/2f0b46abbe31
changeset:   7732:2f0b46abbe31
tag:         v2.7.1b1
user:        Frank Wierzbicki 
date:        Fri Sep 11 01:40:35 2015 +0000
summary:
  Updates for 2.7.1 beta1 release.

files:
  README.txt |   8 ++++----
  build.xml  |  12 ++++++------
  2 files changed, 10 insertions(+), 10 deletions(-)


diff --git a/README.txt b/README.txt
--- a/README.txt
+++ b/README.txt
@@ -1,10 +1,10 @@
 Jython: Python for the Java Platform
 
-Welcome to Jython 2.7.0!
+Welcome to Jython 2.7.1 beta 1!
 
-This is the final release of the 2.7.0 version of Jython. Along with language
-and runtime compatibility with CPython 2.7.0, Jython 2.7 provides substantial
-support of the Python ecosystem. This includes built-in support of
+This is the first beta release of the 2.7.1 version of Jython. Along with
+language and runtime compatibility with CPython 2.7.1, Jython 2.7 provides
+substantial support of the Python ecosystem. This includes built-in support of
 pip/setuptools (you can use with bin/pip) and a native launcher for Windows
 (bin/jython.exe), with the implication that you can finally install Jython
 scripts on Windows.
diff --git a/build.xml b/build.xml
--- a/build.xml
+++ b/build.xml
@@ -84,12 +84,12 @@
          
 
         
-        
-        
+        
+        
         
         
-        
-        
+        
+        
         
         
@@ -412,8 +412,8 @@
     
         
-        
+        
         
             =======================
             --------------------------

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Fri Sep 11 03:41:22 2015
From: jython-checkins at python.org (frank.wierzbicki)
Date: Fri, 11 Sep 2015 01:41:22 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Added_tag_v2=2E7=2E1b1_for_?=
	=?utf-8?q?changeset_7d55b82d5842?=
Message-ID: <20150911014121.66862.79862@psf.io>

https://hg.python.org/jython/rev/85f81a1b9d6b
changeset:   7731:85f81a1b9d6b
user:        Frank Wierzbicki 
date:        Fri Sep 11 01:38:47 2015 +0000
summary:
  Added tag v2.7.1b1 for changeset 7d55b82d5842

files:
  .hgtags |  1 +
  1 files changed, 1 insertions(+), 0 deletions(-)


diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -94,3 +94,4 @@
 3a7bb3fb3338964c70d3c734294bfc0f6bc37a3a v2.7.0
 3a7bb3fb3338964c70d3c734294bfc0f6bc37a3a v2.7.0
 77e0e7c87bd15bad165cebdf8ac88dbae623f339 v2.7.0
+7d55b82d5842bcf602e4835b9ddbedf46da83bfa v2.7.1b1

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Fri Sep 11 03:43:22 2015
From: jython-checkins at python.org (frank.wierzbicki)
Date: Fri, 11 Sep 2015 01:43:22 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Remove_+_from_version=2E?=
Message-ID: <20150911014322.15734.19297@psf.io>

https://hg.python.org/jython/rev/44403bccae21
changeset:   7734:44403bccae21
user:        Frank Wierzbicki 
date:        Fri Sep 11 01:43:18 2015 +0000
summary:
  Remove + from version.

files:
  build.xml |  2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)


diff --git a/build.xml b/build.xml
--- a/build.xml
+++ b/build.xml
@@ -84,7 +84,7 @@
          
 
         
-        
+        
         
         
         

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Fri Sep 11 03:43:31 2015
From: jython-checkins at python.org (frank.wierzbicki)
Date: Fri, 11 Sep 2015 01:43:31 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_Added_tag_v2=2E7=2E1b1_for_?=
	=?utf-8?q?changeset_44403bccae21?=
Message-ID: <20150911014331.66872.29644@psf.io>

https://hg.python.org/jython/rev/7ebdc6c80d55
changeset:   7735:7ebdc6c80d55
user:        Frank Wierzbicki 
date:        Fri Sep 11 01:43:27 2015 +0000
summary:
  Added tag v2.7.1b1 for changeset 44403bccae21

files:
  .hgtags |  2 ++
  1 files changed, 2 insertions(+), 0 deletions(-)


diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -97,3 +97,5 @@
 7d55b82d5842bcf602e4835b9ddbedf46da83bfa v2.7.1b1
 7d55b82d5842bcf602e4835b9ddbedf46da83bfa v2.7.1b1
 2f0b46abbe31275edc3257f54f8222fd37752d65 v2.7.1b1
+2f0b46abbe31275edc3257f54f8222fd37752d65 v2.7.1b1
+44403bccae2163186f8ab46b4a544a48e137bba6 v2.7.1b1

-- 
Repository URL: https://hg.python.org/jython

From jython-checkins at python.org  Sun Sep 27 21:45:39 2015
From: jython-checkins at python.org (stefan.richthofer)
Date: Sun, 27 Sep 2015 19:45:39 +0000
Subject: [Jython-checkins] =?utf-8?q?jython=3A_JyNI-related_work_on_GC-sup?=
	=?utf-8?q?port=2E?=
Message-ID: <20150927194538.82646.87408@psf.io>

https://hg.python.org/jython/rev/a32410e8df69
changeset:   7736:a32410e8df69
user:        Stefan Richthofer 
date:        Sun Sep 27 21:45:18 2015 +0200
summary:
  JyNI-related work on GC-support.

files:
  src/org/python/core/finalization/FinalizablePyObject.java |   2 -
  src/org/python/modules/_weakref/GlobalRef.java            |   5 +-
  src/org/python/modules/gc.java                            |  54 ++++++++-
  3 files changed, 48 insertions(+), 13 deletions(-)


diff --git a/src/org/python/core/finalization/FinalizablePyObject.java b/src/org/python/core/finalization/FinalizablePyObject.java
--- a/src/org/python/core/finalization/FinalizablePyObject.java
+++ b/src/org/python/core/finalization/FinalizablePyObject.java
@@ -1,7 +1,5 @@
 package org.python.core.finalization;
 
-import org.python.core.JyAttribute;
-
 /**
  * 

* This interface allows {@code PyObject}s to have finalizers. diff --git a/src/org/python/modules/_weakref/GlobalRef.java b/src/org/python/modules/_weakref/GlobalRef.java --- a/src/org/python/modules/_weakref/GlobalRef.java +++ b/src/org/python/modules/_weakref/GlobalRef.java @@ -63,6 +63,9 @@ private static ConcurrentMap objects = Generic.concurrentMap(); private static List delayedCallbacks; + /* + * Consider to make this protected, so use of newInstance is enforced. + */ public GlobalRef(PyObject object) { super(object, referenceQueue); hashCode = System.identityHashCode(object); @@ -247,7 +250,7 @@ objects.put(newRef, ref); JyAttribute.setAttr(object, JyAttribute.WEAK_REF_ATTR, ref); } else { - // We clear the not-needed Global ref so that it won't + // We clear the not-needed GlobalRef so that it won't // pop up in ref-reaper thread's activity. newRef.clear(); newRef.cleared = true; diff --git a/src/org/python/modules/gc.java b/src/org/python/modules/gc.java --- a/src/org/python/modules/gc.java +++ b/src/org/python/modules/gc.java @@ -1093,15 +1093,16 @@ DelayedFinalizationProcess.defaultInstance) != -1) { suspendDelayedFinalization(); } - if (delayedWeakrefCallbacksEnabled()) { - if (GlobalRef.hasDelayedCallbacks()) { - Thread dlcProcess = new Thread() { - public void run() { - GlobalRef.processDelayedCallbacks(); - } - }; - dlcProcess.start(); - } + if (!delayedWeakrefCallbacksEnabled() && + GlobalRef.hasDelayedCallbacks()) { + // If delayed callbacks were turned off, we process remaining + // queued callbacks immediately (but in a new thread though): + Thread dlcProcess = new Thread() { + public void run() { + GlobalRef.processDelayedCallbacks(); + } + }; + dlcProcess.start(); } } @@ -1394,8 +1395,41 @@ } public static void notifyPreFinalization() { + long callTime = System.currentTimeMillis(); +/* + * This section is experimental and kept for further investigation. In theory, it can + * prevent potential problems in JyNI-gc, if a gc-run overlaps the previous run's + * post-finalization phase. However it currently breaks gc-tests, so is out-commented + * so far. In practical sense, JyNI's gc-support also works fine without it so far. + */ +// if (postFinalizationPending) { +// if ((gcFlags & VERBOSE_COLLECT) != 0) { +// writeDebug("gc", "waiting for pending post-finalization process."); +// } +// /* It is important to have the while (which is actually an "if" since the +// * InterruptedException is very unlikely to occur) *inside* the synchronized +// * block. Otherwise the notification might come just between the check and the wait, +// * causing an endless waiting. This is no pure academic consideration, but was +// * actually observed to happen from time to time, especially on faster systems. +// */ +// synchronized(PostFinalizationProcessor.class) { +// while (postFinalizationPending) { +// try { +// PostFinalizationProcessor.class.wait(); +// } catch (InterruptedException ie3) {} +// } +// } +// if ((gcFlags & VERBOSE_COLLECT) != 0) { +// writeDebug("gc", "post-finalization finished."); +// } +// } +// /* +// * Increment of openFinalizeCount must not happen before waiting for pending +// * post-finalization process is done. Otherwise PostFinalizationProcessor can +// * be waiting for a neutral openFinalizeCount, causing a deadlock. +// */ ++openFinalizeCount; - if (System.currentTimeMillis() - postFinalizationTimestamp + if (callTime - postFinalizationTimestamp < postFinalizationTimeOut) { return; } -- Repository URL: https://hg.python.org/jython