From jython-checkins at python.org Tue Jan 1 18:58:21 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 01 Jan 2019 23:58:21 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Tolerate_localisation_and_?= =?utf-8?q?non-ascii_strings_in_os=2Euname=28=29=2E_Fixes_=232726=2E?= Message-ID: <20190101235821.1.1D73B924D590F516@mg.python.org> https://hg.python.org/jython/rev/9f020857adce changeset: 8211:9f020857adce user: Jeff Allen date: Tue Jan 01 12:21:30 2019 +0000 summary: Tolerate localisation and non-ascii strings in os.uname(). Fixes #2726. files: Lib/test/test_os_jy.py | 12 ++++ NEWS | 1 + src/org/python/core/PySystemState.java | 26 ++++----- src/org/python/modules/posix/PosixModule.java | 8 +- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_os_jy.py b/Lib/test/test_os_jy.py --- a/Lib/test/test_os_jy.py +++ b/Lib/test/test_os_jy.py @@ -322,6 +322,18 @@ "File %r (%r) should be testable for existence" % (f, entry_path)) + def test_uname(self): + # Test that os.uname returns a tuple of (arbitrary) strings. + # uname failed on on a Chinese localised system (see + # https://bugs.jython.org/issue2726). This test really needs to + # run in that environment or it passes too easily. + result = os.uname() + # (sysname, nodename, release, version, machine) + self.assertEqual(type(result), tuple) + self.assertEqual(len(result), 5) + for s in result: self.assertEqual(type(s), str) + + class LocaleTestCase(unittest.TestCase): def get_installed_locales(self, codes, msg=None): diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ Development tip Bugs fixed + - [ 2726 ] os.uname() throws IllegalArgumentException on Windows (Chinese localisation) - [ 2719 ] Divergence of __str__ and __repr__ from CPython - [ 2714 ] Locale and java version failures in test_os_jy - [ GH-111 ] Proper processing of gzip trailer without resubmission diff --git a/src/org/python/core/PySystemState.java b/src/org/python/core/PySystemState.java --- a/src/org/python/core/PySystemState.java +++ b/src/org/python/core/PySystemState.java @@ -1772,30 +1772,26 @@ /** * Attempt to find the OS version. The mechanism on Windows is to extract it from the result of - * cmd.exe /C ver, and otherwise (assumed Unix-like OS) to use - * uname -v. + * {@code cmd.exe /C ver}, and otherwise (assumed Unix-like OS) to use {@code uname -v}. */ public static String getSystemVersionString() { if (System.getProperty("os.name").startsWith("Windows")) { - String ver = getCommandResult("cmd.exe", "/c", "ver"); - int start = ver.toLowerCase().indexOf("version "); - if (start != -1) { - start += 8; - int end = ver.length(); - if (ver.endsWith("]")) { - --end; - } - ver = ver.substring(start, end); - } - return ver; + // Windows ver command returns a string similar to: + // "Microsoft Windows [Version 10.0.10586]" + // "Microsoft Windows XP [Version 5.1.2600]" + // "Microsoft Windows [?? 10.0.17134.472]" + // We match the dots and digits within square brackets. + Pattern p = Pattern.compile("\\[.* ([\\d.]+)\\]"); + Matcher m = p.matcher(getCommandResult("cmd.exe", "/c", "ver")); + return m.find() ? m.group(1) : ""; } else { return getCommandResult("uname", "-v"); } } /** - * Run a command as a sub-process and return as the result the first line of output that consist - * of more than white space. It returns "" on any kind of error. + * Run a command as a sub-process and return as the result the first line of output that + * consists of more than white space. It returns "" on any kind of error. * * @param command as strings (as for ProcessBuilder) * @return the first line with content, or "" diff --git a/src/org/python/modules/posix/PosixModule.java b/src/org/python/modules/posix/PosixModule.java --- a/src/org/python/modules/posix/PosixModule.java +++ b/src/org/python/modules/posix/PosixModule.java @@ -1167,11 +1167,11 @@ } PyObject[] vals = { - Py.newString(sysname), + Py.fileSystemEncode(sysname), Py.fileSystemEncode(uname_nodename), - Py.newString(sysrelease), - Py.newString(uname_sysver), - Py.newString(uname_machine) + Py.fileSystemEncode(sysrelease), + Py.fileSystemEncode(uname_sysver), + Py.fileSystemEncode(uname_machine) }; uname_cache = new PyTuple(vals, false); return uname_cache; -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Jan 1 18:58:22 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 01 Jan 2019 23:58:22 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Refactoring_around_os=2Eun?= =?utf-8?q?ame=28=29_to_extract_common_code=2E?= Message-ID: <20190101235822.1.BCD69E74C54DD60C@mg.python.org> https://hg.python.org/jython/rev/4bbacf481833 changeset: 8212:4bbacf481833 user: Jeff Allen date: Tue Jan 01 23:14:03 2019 +0000 summary: Refactoring around os.uname() to extract common code. Use Py.getCommandResult and Py.getenv, the latter so that os.environ is consulted to pick up changes. Also a slight tweak to the logic allows us to pass test_uname_win32_ARCHITEW6432 in test_platform, if that test is enabled and uname_cache disabled (temporarily). Top-level classes hiding in Hider.java are made nested to placate IDE linters. files: Lib/test/test_platform.py | 1 + src/org/python/core/PrePy.java | 33 + src/org/python/core/PySystemState.java | 34 +- src/org/python/modules/posix/Hider.java | 37 +- src/org/python/modules/posix/PosixModule.java | 227 +++------ 5 files changed, 137 insertions(+), 195 deletions(-) diff --git a/Lib/test/test_platform.py b/Lib/test/test_platform.py --- a/Lib/test/test_platform.py +++ b/Lib/test/test_platform.py @@ -136,6 +136,7 @@ self.assertTrue(any(res)) @unittest.skipUnless(sys.platform.startswith('win'), "windows only test") + # One may enable this for Jython, but must disable the cache in os.uname() to pass. def test_uname_win32_ARCHITEW6432(self): # Issue 7860: make sure we get architecture from the correct variable # on 64 bit Windows: if PROCESSOR_ARCHITEW6432 exists we should be diff --git a/src/org/python/core/PrePy.java b/src/org/python/core/PrePy.java --- a/src/org/python/core/PrePy.java +++ b/src/org/python/core/PrePy.java @@ -264,4 +264,37 @@ } return url; } + + /** + * Run a command as a sub-process and return as the result the first line of output that + * consists of more than white space. It returns "" on any kind of error. + * + * @param command as strings (as for ProcessBuilder) + * @return the first line with content, or "" + */ + public static String getCommandResult(String... command) { + String result = "", line = null; + ProcessBuilder pb = new ProcessBuilder(command); + try { + Process p = pb.start(); + java.io.BufferedReader br = + new java.io.BufferedReader(new java.io.InputStreamReader(p.getInputStream())); + // We read to the end-of-stream in case the sub-process cannot end cleanly without. + while ((line = br.readLine()) != null) { + if (line.length() > 0 && result.length() == 0) { + // This is the first line with content (maybe). + result = line.trim(); + } + } + br.close(); + // Now we wait for the sub-process to terminate nicely. + if (p.waitFor() != 0) { + // Bad exit status: don't take the result. + result = ""; + } + } catch (IOException | InterruptedException | SecurityException e) { + result = ""; + } + return result; + } } diff --git a/src/org/python/core/PySystemState.java b/src/org/python/core/PySystemState.java --- a/src/org/python/core/PySystemState.java +++ b/src/org/python/core/PySystemState.java @@ -996,7 +996,7 @@ } else if (os != null && os.startsWith("Windows")) { // Go via the Windows code page built-in command "chcp". - String output = getCommandResult("cmd", "/c", "chcp"); + String output = Py.getCommandResult("cmd", "/c", "chcp"); /* * The output will be like "Active code page: 850" or maybe "Aktive Codepage: 1252." or * "?? ?? ???: 949". Assume the first number with 2 or more digits is the code page. @@ -1009,7 +1009,7 @@ } else { // Try a Unix-like "locale charmap". - String output = getCommandResult("locale", "charmap"); + String output = Py.getCommandResult("locale", "charmap"); // The result of "locale charmap" is just the charmap name ~ Charset or codec name. if (output.length() > 0) { return output; @@ -1782,10 +1782,10 @@ // "Microsoft Windows [?? 10.0.17134.472]" // We match the dots and digits within square brackets. Pattern p = Pattern.compile("\\[.* ([\\d.]+)\\]"); - Matcher m = p.matcher(getCommandResult("cmd.exe", "/c", "ver")); + Matcher m = p.matcher(Py.getCommandResult("cmd.exe", "/c", "ver")); return m.find() ? m.group(1) : ""; } else { - return getCommandResult("uname", "-v"); + return Py.getCommandResult("uname", "-v"); } } @@ -1795,31 +1795,11 @@ * * @param command as strings (as for ProcessBuilder) * @return the first line with content, or "" + * @deprecated Use {@link Py#getCommandResult(String...)} instead */ + @Deprecated private static String getCommandResult(String... command) { - String result = "", line = null; - ProcessBuilder pb = new ProcessBuilder(command); - try { - Process p = pb.start(); - java.io.BufferedReader br = - new java.io.BufferedReader(new java.io.InputStreamReader(p.getInputStream())); - // We read to the end-of-stream in case the sub-process cannot end cleanly without. - while ((line = br.readLine()) != null) { - if (line.length() > 0 && result.length() == 0) { - // This is the first line with content (maybe). - result = line.trim(); - } - } - br.close(); - // Now we wait for the sub-process to terminate nicely. - if (p.waitFor() != 0) { - // Bad exit status: don't take the result. - result = ""; - } - } catch (IOException | InterruptedException | SecurityException e) { - result = ""; - } - return result; + return PrePy.getCommandResult(command); } /* Traverseproc implementation */ diff --git a/src/org/python/modules/posix/Hider.java b/src/org/python/modules/posix/Hider.java --- a/src/org/python/modules/posix/Hider.java +++ b/src/org/python/modules/posix/Hider.java @@ -50,27 +50,24 @@ } return false; } -} -/** - * Tags PosixModule methods as hidden on the specified OS or PosixImpl. - */ - at Retention(RetentionPolicy.RUNTIME) - at Target(ElementType.METHOD) - at interface Hide { + /** Tags PosixModule methods as hidden on the specified OS or PosixImpl. */ + @Retention(RetentionPolicy.RUNTIME) + @Target(ElementType.METHOD) + @interface Hide { - /** Hide method on these OSes. */ - OS[] value() default {}; + /** Hide method on these OSes. */ + OS[] value() default {}; - /** - * @Hide(posixImpl = PosixImpl.JAVA) hides the method from Python when the POSIX - * library isn't native. The default NOT_APPLICABLE means the POSIX implementation - * doesn't matter. - */ - PosixImpl posixImpl() default PosixImpl.NOT_APPLICABLE; + /** + * @Hide(posixImpl = PosixImpl.JAVA) hides the method from Python when the POSIX + * library isn't native. The default NOT_APPLICABLE means the POSIX implementation + * doesn't matter. + */ + PosixImpl posixImpl() default PosixImpl.NOT_APPLICABLE; + } + + /** The type of underlying POSIX library implementation (native or not). */ + enum PosixImpl {NOT_APPLICABLE, NATIVE, JAVA}; + } - -/** - * The type of underlying POSIX library implementation (native or not). - */ -enum PosixImpl {NOT_APPLICABLE, NATIVE, JAVA}; diff --git a/src/org/python/modules/posix/PosixModule.java b/src/org/python/modules/posix/PosixModule.java --- a/src/org/python/modules/posix/PosixModule.java +++ b/src/org/python/modules/posix/PosixModule.java @@ -343,7 +343,7 @@ public static PyString __doc__chown = new PyString( "chown(path, uid, gid)\n\n" + "Change the owner and group id of path to the numeric uid and gid."); - @Hide(OS.NT) + @Hider.Hide(OS.NT) public static void chown(PyObject path, int uid, int gid) { if (posix.chown(absolutePath(path).toString(), uid, gid) < 0) { throw errorFromErrno(path); @@ -362,7 +362,7 @@ } } - @Hide(OS.NT) + @Hider.Hide(OS.NT) public static void closerange(PyObject fd_lowObj, PyObject fd_highObj) { int fd_low = getFD(fd_lowObj).getIntFD(false); int fd_high = getFD(fd_highObj).getIntFD(false); @@ -424,7 +424,7 @@ "fdatasync(fildes)\n\n" + "force write of file with filedescriptor to disk.\n" + "does not force update of metadata."); - @Hide(OS.NT) + @Hider.Hide(OS.NT) public static void fdatasync(PyObject fd) { Object javaobj = fd.__tojava__(RawIOBase.class); if (javaobj != Py.NoConversion) { @@ -501,7 +501,7 @@ public static PyString __doc__getegid = new PyString( "getegid() -> egid\n\n" + "Return the current process's effective group id."); - @Hide(OS.NT) + @Hider.Hide(OS.NT) public static int getegid() { return posix.getegid(); } @@ -509,7 +509,7 @@ public static PyString __doc__geteuid = new PyString( "geteuid() -> euid\n\n" + "Return the current process's effective user id."); - @Hide(OS.NT) + @Hider.Hide(OS.NT) public static int geteuid() { return posix.geteuid(); } @@ -517,7 +517,7 @@ public static PyString __doc__getgid = new PyString( "getgid() -> gid\n\n" + "Return the current process's group id."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static int getgid() { return posix.getgid(); } @@ -525,7 +525,7 @@ public static PyString __doc__getlogin = new PyString( "getlogin() -> string\n\n" + "Return the actual login name."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static PyObject getlogin() { String login = posix.getlogin(); if (login == null) { @@ -539,7 +539,7 @@ public static PyString __doc__getppid = new PyString( "getppid() -> ppid\n\n" + "Return the parent's process id."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static int getppid() { return posix.getppid(); } @@ -547,7 +547,7 @@ public static PyString __doc__getuid = new PyString( "getuid() -> uid\n\n" + "Return the current process's user id."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static int getuid() { return posix.getuid(); } @@ -556,7 +556,7 @@ "getpid() -> pid\n\n" + "Return the current process id"); - @Hide(posixImpl = PosixImpl.JAVA) + @Hider.Hide(posixImpl = Hider.PosixImpl.JAVA) public static int getpid() { return posix.getpid(); } @@ -564,7 +564,7 @@ public static PyString __doc__getpgrp = new PyString( "getpgrp() -> pgrp\n\n" + "Return the current process group id."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static int getpgrp() { return posix.getpgrp(); } @@ -575,7 +575,7 @@ "isatty(fd) -> bool\n\n" + "Return True if the file descriptor 'fd' is an open file descriptor\n" + "connected to the slave end of a terminal."); - @Hide(posixImpl = PosixImpl.JAVA) + @Hider.Hide(posixImpl = Hider.PosixImpl.JAVA) public static boolean isatty(PyObject fdObj) { Object tojava = fdObj.__tojava__(IOBase.class); if (tojava != Py.NoConversion) { @@ -609,7 +609,7 @@ public static PyString __doc__kill = new PyString( "kill(pid, sig)\n\n" + "Kill a process with a signal."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static void kill(int pid, int sig) { if (posix.kill(pid, sig) < 0) { throw errorFromErrno(); @@ -620,7 +620,7 @@ "lchmod(path, mode)\n\n" + "Change the access permissions of a file. If path is a symlink, this\n" + "affects the link itself rather than the target."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static void lchmod(PyObject path, int mode) { if (posix.lchmod(absolutePath(path).toString(), mode) < 0) { throw errorFromErrno(path); @@ -631,7 +631,7 @@ "lchown(path, uid, gid)\n\n" + "Change the owner and group id of path to the numeric uid and gid.\n" + "This function will not follow symbolic links."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static void lchown(PyObject path, int uid, int gid) { if (posix.lchown(absolutePath(path).toString(), uid, gid) < 0) { throw errorFromErrno(path); @@ -642,7 +642,7 @@ "link(src, dst)\n\n" + "Create a hard link to a file."); - @Hide(OS.NT) + @Hider.Hide(OS.NT) public static void link(PyObject src, PyObject dst) { try { Files.createLink(Paths.get(asPath(dst)), Paths.get(asPath(src))); @@ -857,7 +857,7 @@ public static PyString __doc__readlink = new PyString( "readlink(path) -> path\n\n" + "Return a string representing the path to which the symbolic link points."); - @Hide(OS.NT) + @Hider.Hide(OS.NT) public static PyString readlink(PyObject path) { try { return Py.newStringOrUnicode(path, Files.readSymbolicLink(absolutePath(path)).toString()); @@ -908,7 +908,7 @@ public static PyString __doc__setpgrp = new PyString( "setpgrp()\n\n" + "Make this process a session leader."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static void setpgrp() { if (posix.setpgrp(0, 0) < 0) { throw errorFromErrno(); @@ -918,7 +918,7 @@ public static PyString __doc__setsid = new PyString( "setsid()\n\n" + "Call the system call setsid()."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static void setsid() { if (posix.setsid() < 0) { throw errorFromErrno(); @@ -946,7 +946,7 @@ "symlink(src, dst)\n\n" + "Create a symbolic link pointing to src named dst."); - @Hide(OS.NT) + @Hider.Hide(OS.NT) public static void symlink(PyObject src, PyObject dst) { try { Files.createSymbolicLink(Paths.get(asPath(dst)), Paths.get(asPath(src))); @@ -967,7 +967,7 @@ "times() -> (utime, stime, cutime, cstime, elapsed_time)\n\n" + "Return a tuple of floating point numbers indicating process times."); - @Hide(posixImpl = PosixImpl.JAVA) + @Hider.Hide(posixImpl = Hider.PosixImpl.JAVA) public static PyTuple times() { Times times = posix.times(); long CLK_TCK = Sysconf._SC_CLK_TCK.longValue(); @@ -983,7 +983,7 @@ public static PyString __doc__umask = new PyString( "umask(new_mask) -> old_mask\n\n" + "Set the current numeric umask and return the previous umask."); - @Hide(posixImpl = PosixImpl.JAVA) + @Hider.Hide(posixImpl = Hider.PosixImpl.JAVA) public static int umask(int mask) { return posix.umask(mask); } @@ -1042,138 +1042,69 @@ * @return PyTuple containing sysname, nodename, release, version, machine */ public static PyTuple uname() { - if (uname_cache != null) { - return uname_cache; - } - String sysname = System.getProperty("os.name"); - String sysrelease; - boolean win; - if (sysname.equals("Mac OS X")) { - sysname = "Darwin"; - win = false; - try { - Process p = Runtime.getRuntime().exec("uname -r"); - java.io.BufferedReader br = new java.io.BufferedReader( - new java.io.InputStreamReader(p.getInputStream())); - sysrelease = br.readLine(); - // to end the process sanely in case we deal with some - // implementation that emits additional new-lines: - while (br.readLine() != null) { - ; - } - br.close(); - if (p.waitFor() != 0) { - sysrelease = ""; - } - } catch (Exception e) { - sysrelease = ""; + if (uname_cache == null) { + // First call: have to construct the result. + String sysname = System.getProperty("os.name"); + String sysrelease, nodename, machine; + boolean win = false; + + if (sysname.equals("Mac OS X")) { + sysname = "Darwin"; + sysrelease = Py.getCommandResult("uname", "-r"); + } else if (sysname.startsWith("Windows")) { + sysrelease = sysname.length() > 7 ? sysname.substring(8) + : System.getProperty("os.version", ""); + sysname = "Windows"; + win = true; + } else { + sysrelease = System.getProperty("os.version", ""); } - } else { - win = sysname.startsWith("Windows"); - if (win) { - sysrelease = sysname.length() > 7 ? sysname.substring(8) : - System.getProperty("os.version"); - sysname = "Windows"; - } else { - sysrelease = System.getProperty("os.version"); - } - } - String uname_nodename; - try { - uname_nodename = java.net.InetAddress.getLocalHost().getHostName(); - } catch (Exception e) { - // Do nothing to leverage fallback - uname_nodename = null; - } - if (uname_nodename == null && win) { - uname_nodename = System.getenv("USERDOMAIN"); - } - if (uname_nodename == null) { try { - Process p = Runtime.getRuntime().exec( - win ? "hostname" : "uname -n"); - java.io.BufferedReader br = new java.io.BufferedReader( - new java.io.InputStreamReader(p.getInputStream())); - uname_nodename = br.readLine(); - // to end the process sanely in case we deal with some - // implementation that emits additional new-lines: - while (br.readLine() != null) { - ; - } - br.close(); - if (p.waitFor() != 0) { - uname_nodename = ""; - } + nodename = java.net.InetAddress.getLocalHost().getHostName(); } catch (Exception e) { - uname_nodename = ""; - } - } - - String uname_sysver = PySystemState.getSystemVersionString(); - - String uname_machine; - try { - if (win) { - String machine = System.getenv("PROCESSOR_ARCHITECTURE"); - if (machine.equals("x86")) { - // maybe 32-bit process running on 64 bit machine - machine = System.getenv("PROCESSOR_ARCHITEW6432"); - } - // if machine == null it's actually a 32-bit machine - uname_machine = machine == null ? "x86" : machine; -// We refrain from this normalization in order to match platform.uname behavior on Windows: -/* if (machine == null) { - uname_machine = "i686"; - } else if (machine.equals("AMD64") || machine.equals("EM64T")) { - uname_machine = "x86_64"; - } else if (machine.equals("IA64")) { - uname_machine = "ia64"; + // If that fails, try the shell. + if (win) { + nodename = Py.getenv("USERDOMAIN", ""); + if (nodename.isEmpty()) { + nodename = Py.getCommandResult("hostname"); + } } else { - uname_machine = machine.toLowerCase(); - } */ - } else { - Process p = Runtime.getRuntime().exec("uname -m"); - java.io.BufferedReader br = new java.io.BufferedReader( - new java.io.InputStreamReader(p.getInputStream())); - uname_machine = br.readLine(); - // to end the process sanely in case we deal with some - // implementation that emits additional new-lines: - while (br.readLine() != null) { - ; - } - br.close(); - if (p.waitFor() != 0) { - // To leverage os.arch-fallback: - uname_machine = null; + nodename = Py.getCommandResult("uname", "-n"); } } - } catch (Exception e) { - // To leverage os.arch-fallback: - uname_machine = null; + + String sysver = PySystemState.getSystemVersionString(); + + if (win) { + // Check if 32-bit process on a 64 bit machine (compare platform.py) + machine = Py.getenv("PROCESSOR_ARCHITEW6432", ""); + if (machine.isEmpty()) { + // Otherwise, this contains the value (or we default to null) + machine = Py.getenv("PROCESSOR_ARCHITECTURE", ""); + } + } else { + machine = Py.getCommandResult("uname", "-m"); + } + + if (machine.isEmpty()) { + machine = System.getProperty("os.arch", ""); + if (machine.equals("amd64")) { + // 64-bit processor presents as x86_64 on Linux and AMD64 on Windows. + machine = win ? "AMD64" : "x86_64"; + } else if (machine.equals("x86")) { + machine = "i686"; + } + } + + uname_cache = new PyTuple(new PyObject[] { + Py.fileSystemEncode(sysname), + Py.fileSystemEncode(nodename), + Py.fileSystemEncode(sysrelease), + Py.fileSystemEncode(sysver), + Py.fileSystemEncode(machine)}, + false); } - if (uname_machine == null) { - String machine = System.getProperty("os.arch"); - if (machine == null) { - uname_machine = ""; - } else if (machine.equals("amd64")) { - // Normalize the common amd64-case to x86_64: - uname_machine = "x86_64"; - } else if (machine.equals("x86")) { - uname_machine = "i686"; - } else { - uname_machine = machine; - } - } - - PyObject[] vals = { - Py.fileSystemEncode(sysname), - Py.fileSystemEncode(uname_nodename), - Py.fileSystemEncode(sysrelease), - Py.fileSystemEncode(uname_sysver), - Py.fileSystemEncode(uname_machine) - }; - uname_cache = new PyTuple(vals, false); return uname_cache; } @@ -1251,7 +1182,7 @@ public static PyString __doc__wait = new PyString( "wait() -> (pid, status)\n\n" + "Wait for completion of a child process."); - @Hide(value=OS.NT, posixImpl = PosixImpl.JAVA) + @Hider.Hide(value=OS.NT, posixImpl = Hider.PosixImpl.JAVA) public static PyObject wait$() { int[] status = new int[1]; int pid = posix.wait(status); @@ -1264,7 +1195,7 @@ public static PyString __doc__waitpid = new PyString( "wait() -> (pid, status)\n\n" + "Wait for completion of a child process."); - @Hide(posixImpl = PosixImpl.JAVA) + @Hider.Hide(posixImpl = Hider.PosixImpl.JAVA) public static PyObject waitpid(int pid, int options) { int[] status = new int[1]; pid = posix.waitpid(pid, status, options); -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sun Jan 6 04:31:33 2019 From: jython-checkins at python.org (jeff.allen) Date: Sun, 06 Jan 2019 09:31:33 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Enable_test=5Flarge=5Fmeth?= =?utf-8?q?od=5Fbytecode=5Fjy_in_regrtest_=28for_less_than_Java_9=29=2E?= Message-ID: <20190106093133.1.0A244EE0F3DFC511@mg.python.org> https://hg.python.org/jython/rev/291b0e7b0d55 changeset: 8213:291b0e7b0d55 user: Jeff Allen date: Sat Jan 05 05:07:10 2019 +0000 summary: Enable test_large_method_bytecode_jy in regrtest (for less than Java 9). Give test_large_method_bytecode_jy a test_main so that it runs under regrtest. Also, the update to ASM 7.0 offers us a better way to detect oversized code in Module.java. files: Lib/test/test_large_method_bytecode_jy.py | 11 ++++- src/org/python/compiler/Module.java | 23 +++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_large_method_bytecode_jy.py b/Lib/test/test_large_method_bytecode_jy.py --- a/Lib/test/test_large_method_bytecode_jy.py +++ b/Lib/test/test_large_method_bytecode_jy.py @@ -16,6 +16,7 @@ import unittest from test import test_support + at unittest.skipIf(test_support.get_java_version() >= (9,), "Fails on Java 9+, see #2663") class large_method_tests(unittest.TestCase): '''Tests some oversized functions and methods. ''' @@ -52,6 +53,7 @@ ''' self.assertEqual(large_methods.small_function(), 'small 10') + at unittest.skipIf(test_support.get_java_version() >= (9,), "Fails on Java 9+, issue #2663") class large_module_tests(unittest.TestCase): '''Tests a module with oversized main-code. So the whole module is represented as a single PyBytecode object. @@ -82,6 +84,11 @@ def test_large_module_small_func(self): self.assertEqual(large_module.small_function(), 'small 10') +def test_main(): + test_support.run_unittest( + large_method_tests, + large_module_tests + ) + if __name__ == "__main__": - unittest.main() - + test_main() diff --git a/src/org/python/compiler/Module.java b/src/org/python/compiler/Module.java --- a/src/org/python/compiler/Module.java +++ b/src/org/python/compiler/Module.java @@ -21,6 +21,7 @@ import javax.xml.bind.DatatypeConverter; import org.objectweb.asm.Label; +import org.objectweb.asm.MethodTooLargeException; import org.objectweb.asm.Opcodes; import org.objectweb.asm.Type; import org.python.antlr.ParseException; @@ -631,7 +632,7 @@ for (i = 0; i < labels.length; i++) { labels[i] = new Label(); } - + // Get index for function to call code.iload(1); code.tableswitch(0, labels.length - 1, def, labels); @@ -840,7 +841,7 @@ } } } - + private static String serializePyBytecode(PyBytecode btcode) throws java.io.IOException { // For some reason we cannot do this using _marshal: /* @@ -871,7 +872,7 @@ } private static final int maxLiteral = 65535; - + /** * This method stores Python-Bytecode in String literals. * While Java supports rather long strings, constrained only by @@ -930,15 +931,14 @@ Module module = new Module(name, filename, linenumbers, mtime); _module_init(node, module, printResults, cflags); module.write(ostream); - } catch (RuntimeException re) { - if (re.getMessage() != null && re.getMessage().equals("Method code too large!")) { + } catch (MethodTooLargeException re) { PyBytecode btcode = loadPyBytecode(filename, true); int thresh = 22000; // No idea, how to determine at this point if a method is oversized, so we just try // a threshold regarding Python code-length, while JVM restriction is actually about // Java bytecode length. Anyway; given that code-lengths are strongly related, this // should work well enough. - + while (true) { // Always enjoy to write a line like this :) try { List largest_m_codes = new ArrayList<>(); @@ -993,12 +993,8 @@ _module_init(node, module, printResults, cflags); module.write(ostream); break; - } catch (RuntimeException e) { - if (re.getMessage() == null || !e.getMessage().equals("Method code too large!")) { - throw e; - } else { - thresh -= 100; - } + } catch (MethodTooLargeException e) { + thresh -= 100; } if (thresh == 10000) { /* This value should be well feasible by JVM-bytecode, so something else must be wrong. */ @@ -1007,9 +1003,6 @@ "\nby PyBytecode-approach:\n"+filename); } } - } else { - throw re; - } } } -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sun Jan 6 04:31:34 2019 From: jython-checkins at python.org (jeff.allen) Date: Sun, 06 Jan 2019 09:31:34 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Implement_our_own_limited_?= =?utf-8?q?base64_encode/decode=2E_=28Fixes_=232663=29?= Message-ID: <20190106093134.1.0B0DF43C08997027@mg.python.org> https://hg.python.org/jython/rev/79cd4168b63d changeset: 8214:79cd4168b63d user: Jeff Allen date: Sun Jan 06 09:02:01 2019 +0000 summary: Implement our own limited base64 encode/decode. (Fixes #2663) The DatatypeConverter class we used to solve a problem with large modules was removed at Java 9 SE but the replacement Base64 class does not exist in Java 7 which we'd like to support. Also tidies some formatting and comments. files: Lib/test/test_large_method_bytecode_jy.py | 2 - NEWS | 1 + src/org/python/compiler/Module.java | 290 ++++++--- src/org/python/core/BytecodeLoader.java | 83 ++- 4 files changed, 270 insertions(+), 106 deletions(-) diff --git a/Lib/test/test_large_method_bytecode_jy.py b/Lib/test/test_large_method_bytecode_jy.py --- a/Lib/test/test_large_method_bytecode_jy.py +++ b/Lib/test/test_large_method_bytecode_jy.py @@ -16,7 +16,6 @@ import unittest from test import test_support - at unittest.skipIf(test_support.get_java_version() >= (9,), "Fails on Java 9+, see #2663") class large_method_tests(unittest.TestCase): '''Tests some oversized functions and methods. ''' @@ -53,7 +52,6 @@ ''' self.assertEqual(large_methods.small_function(), 'small 10') - at unittest.skipIf(test_support.get_java_version() >= (9,), "Fails on Java 9+, issue #2663") class large_module_tests(unittest.TestCase): '''Tests a module with oversized main-code. So the whole module is represented as a single PyBytecode object. diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ Development tip Bugs fixed + - [ 2663 ] Remove dependency on javax.xml.bind.DatatypeConverter - [ 2726 ] os.uname() throws IllegalArgumentException on Windows (Chinese localisation) - [ 2719 ] Divergence of __str__ and __repr__ from CPython - [ 2714 ] Locale and java version failures in test_os_jy diff --git a/src/org/python/compiler/Module.java b/src/org/python/compiler/Module.java --- a/src/org/python/compiler/Module.java +++ b/src/org/python/compiler/Module.java @@ -858,151 +858,239 @@ bytes not directly suitable as String-values. cStringIO does not use Base64 or something, but rather supports only string-compatible data. */ - // so we use Java-reflection... + // so we use Java-serialization... // serialize the object ByteArrayOutputStream bo = new ByteArrayOutputStream(); ObjectOutputStream so = new ObjectOutputStream(bo); so.writeObject(btcode); so.flush(); - String code_str = DatatypeConverter.printBase64Binary(bo.toByteArray()); + // From Java 8 use: String code_str = Base64.getEncoder().encodeToString(bo.toByteArray()); + String code_str = base64encodeToString(bo.toByteArray()); so.close(); bo.close(); return code_str; } + /** + * Implement a simplified base64 encoding compatible with the decoding in BytecodeLoader. This + * encoder adds no '=' padding or line-breaks. equivalent to + * {@code binascii.b2a_base64(bytes).rstrip('=\n')}. + * + * @param data to encode + * @return the string encoding the data + */ + private static String base64encodeToString(byte[] data) { + + final int N = data.length; + int tail = N % 3; + + StringBuilder chars = new StringBuilder(((N / 3) + 1) * 4); + + // Process bytes in blocks of three + int b = 0, quantum; + while (b <= N - 3) { + // Process [b:b+3] + quantum = ((data[b++] & 0xff) << 16) + ((data[b++] & 0xff) << 8) + (data[b++] & 0xff); + chars.append(base64enc[quantum >> 18]); + chars.append(base64enc[(quantum >> 12) & 0x3f]); + chars.append(base64enc[(quantum >> 6) & 0x3f]); + chars.append(base64enc[quantum & 0x3f]); + } + + // Process the tail bytes + if (tail >= 1) { + quantum = ((data[b++] & 0xff) << 8); + if (tail == 2) { + quantum += data[b++] & 0xff; + } + chars.append(base64enc[quantum >> 10]); + chars.append(base64enc[(quantum >> 4) & 0x3f]); + if (tail == 2) { + chars.append(base64enc[(quantum << 2) & 0x3f]); + } + } + + return chars.toString(); + } + + /** Look-up table for {@link #base64encodeToString(byte[])}. */ + private static final char[] base64enc = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); + private static final int maxLiteral = 65535; /** - * This method stores Python-Bytecode in String literals. - * While Java supports rather long strings, constrained only by - * int-addressing of arrays, it supports only up to 65535 characters - * in literals (not sure how escape-sequences are counted). - * To circumvent this limitation, the code is automatically splitted - * into several literals with the following naming-scheme. - * - * - The marker-interface 'ContainsPyBytecode' indicates that a class - * contains (static final) literals of the following scheme: - * - a prefix of '___' indicates a bytecode-containing string literal - * - a number indicating the number of parts follows - * - '0_' indicates that no splitting occurred - * - otherwise another number follows, naming the index of the literal - * - indexing starts at 0 - * + * This method stores Base64 encoded Python byte code in one or more String literals. + *

+ * While Java String objects are limited only by the address range of arrays, the class file + * standard only supports literals representable in at most 65535 bytes of modified UTF-8. This + * method us used only with base64 Strings (therefore ASCII without nulls) and so each character + * occupies exactly 1 byte in the class file after encoding to UTF-8. + *

+ * To work within the 65535 byte limitation, the {@code code_str} is split into several literals + * with the following naming-scheme: + *

    + *
  • The marker-interface 'ContainsPyBytecode' indicates that a class contains (static final) + * literals of the following scheme: + *
  • a prefix of '___' indicates a bytecode-containing string literal + *
  • a number indicating the number of parts follows + *
  • '0_' indicates that no splitting occurred + *
  • otherwise another number follows, naming the index of the literal + *
  • indexing starts at 0 + *
* Examples: - * ___0_method1 contains bytecode for method1 - * ___2_0_method2 contains first part of method2's bytecode - * ___2_1_method2 contains second part of method2's bytecode - * - * Note that this approach is provisional. In future, Jython might contain - * the bytecode directly as bytecode-objects. The current approach was - * feasible with far less complicated JVM bytecode-manipulation, but needs - * special treatment after class-loading. + *
    + *
  • {@code ___0_method1} contains bytecode for method1 + *
  • {@code ___2_0_method2} contains first part of method2's bytecode + *
  • {@code ___2_1_method2} contains second part of method2's bytecode + *
+ * Note that this approach is provisional. In future, Jython might contain the bytecode directly + * as bytecode-objects. The current approach was feasible with far less complicated JVM + * bytecode-manipulation, but needs special treatment after class-loading. + * + * @param name of the method or function being generated + * @param code_str Base64 encoded CPython byte code + * @param module currently being defined as a class file + * @throws java.io.IOException */ private static void insert_code_str_to_classfile(String name, String code_str, Module module) throws java.io.IOException { - // We might need to split the code into several literals. - if (code_str.length() > maxLiteral) { - int splits = code_str.length()/maxLiteral; - if (code_str.length()%maxLiteral > 0) { + if (code_str.length() <= maxLiteral) { + // This can go as a single literal + module.classfile.addFinalStringLiteral("___0_" + name, code_str); + } else { + // We need to split the code into several literals. + int splits = code_str.length() / maxLiteral; + if (code_str.length() % maxLiteral > 0) { ++splits; } int pos = 0, i = 0; - for (; pos+maxLiteral <= code_str.length(); ++i) { - module.classfile.addFinalStringLiteral( - "___"+splits+"_"+i+"_"+name, - code_str.substring(pos, pos+maxLiteral)); + for (; pos + maxLiteral <= code_str.length(); ++i) { + module.classfile.addFinalStringLiteral("___" + splits + "_" + i + "_" + name, + code_str.substring(pos, pos + maxLiteral)); pos += maxLiteral; } if (i < splits) { - module.classfile.addFinalStringLiteral( - "___"+splits+"_"+i+"_"+name, + module.classfile.addFinalStringLiteral("___" + splits + "_" + i + "_" + name, code_str.substring(pos)); } - } else { - module.classfile.addFinalStringLiteral("___0_"+name, code_str); } } + /** + * Create and write a Python module as a Java class file. + * + * @param node AST of the module to write + * @param ostream stream onto which to write it + * @param name + * @param filename + * @param linenumbers + * @param printResults + * @param cflags + * @param mtime + * @throws Exception + */ public static void compile(mod node, OutputStream ostream, String name, String filename, boolean linenumbers, boolean printResults, CompilerFlags cflags, long mtime) throws Exception { + try { Module module = new Module(name, filename, linenumbers, mtime); _module_init(node, module, printResults, cflags); module.write(ostream); + } catch (MethodTooLargeException re) { - PyBytecode btcode = loadPyBytecode(filename, true); - int thresh = 22000; - // No idea, how to determine at this point if a method is oversized, so we just try - // a threshold regarding Python code-length, while JVM restriction is actually about - // Java bytecode length. Anyway; given that code-lengths are strongly related, this - // should work well enough. + PyBytecode btcode = loadPyBytecode(filename, true); + int thresh = 22000; + /* + * No idea, how to determine at this point if a method is oversized, so we just try a + * threshold regarding Python code-length, while JVM restriction is actually about Java + * bytecode length. Anyway; given that code-lengths are strongly related, this should + * work well enough. + */ + while (true) { // Always enjoy to write a line like this :) + try { + List largest_m_codes = new ArrayList<>(); + Stack buffer = new Stack<>(); + // HashSet allCodes = new HashSet<>(); + buffer.push(btcode); + // allCodes.add(btcode); - while (true) { // Always enjoy to write a line like this :) - try { - List largest_m_codes = new ArrayList<>(); - Stack buffer = new Stack<>(); - //HashSet allCodes = new HashSet<>(); - buffer.push(btcode); - //allCodes.add(btcode); - while (!buffer.isEmpty()) { - // Probably this cannot yield cycles, so cycle-proof stuff - // is out-commented for now. (everything regarding 'allCodes') - PyBytecode bcode = buffer.pop(); - if (bcode.co_code.length > thresh) { - largest_m_codes.add(bcode); - } else { - // If a function needs to be represented as CPython bytecode, we create - // all inner PyCode-items (classes, functions, methods) also as CPython - // bytecode implicitly, so no need to look at them individually. - // Maybe we can later optimize this such that inner methods can be - // JVM-bytecode as well (if not oversized themselves). - for (PyObject item: bcode.co_consts) { - if (item instanceof PyBytecode /*&& !allCodes.contains(item)*/) { - PyBytecode mpbc = (PyBytecode) item; - buffer.push(mpbc); - //allCodes.add(mpbc); - } + while (!buffer.isEmpty()) { + /* + * Probably this cannot yield cycles, so cycle-proof stuff is out-commented + * for now. (everything regarding 'allCodes') + */ + PyBytecode bcode = buffer.pop(); + if (bcode.co_code.length > thresh) { + largest_m_codes.add(bcode); + } else { + /* + * If a function needs to be represented as CPython bytecode, we create + * all inner PyCode-items (classes, functions, methods) also as CPython + * bytecode implicitly, so no need to look at them individually. Maybe + * we can later optimize this such that inner methods can be + * JVM-bytecode as well (if not oversized themselves). + */ + for (PyObject item : bcode.co_consts) { + if (item instanceof PyBytecode /* && !allCodes.contains(item) */) { + PyBytecode mpbc = (PyBytecode) item; + buffer.push(mpbc); + // allCodes.add(mpbc); } } } - Module module = new Module(name, filename, linenumbers, mtime); - module.oversized_methods = new Hashtable<>(largest_m_codes.size()); - int ov_id = 0; - String name_id; - for (PyBytecode largest_m_code: largest_m_codes) { - if (!PyCodeConstant.isJavaIdentifier(largest_m_code.co_name)) { - name_id = "f$_"+ov_id++; - } else { - name_id = largest_m_code.co_name+"$_"+ov_id++; - } - if (largest_m_code.co_name.equals("")) { - // In Jython's opinion module begins at line 0 - // (while CPython reports line 1) - module.oversized_methods.put( - largest_m_code.co_name+0, name_id); - } else { - module.oversized_methods.put( - largest_m_code.co_name+largest_m_code.co_firstlineno, name_id); - } - String code_str = serializePyBytecode(largest_m_code); - insert_code_str_to_classfile(name_id, code_str, module); + } + + Module module = new Module(name, filename, linenumbers, mtime); + + module.oversized_methods = new Hashtable<>(largest_m_codes.size()); + int ov_id = 0; + String name_id; + + for (PyBytecode largest_m_code : largest_m_codes) { + if (!PyCodeConstant.isJavaIdentifier(largest_m_code.co_name)) { + name_id = "f$_" + ov_id++; + } else { + name_id = largest_m_code.co_name + "$_" + ov_id++; + } + if (largest_m_code.co_name.equals("")) { + /* + * In Jython's opinion module begins at line 0 (while CPython reports + * line 1) + */ + module.oversized_methods.put(largest_m_code.co_name + 0, name_id); + } else { + module.oversized_methods.put( + largest_m_code.co_name + largest_m_code.co_firstlineno, + name_id); } - module.classfile.addInterface(p(org.python.core.ContainsPyBytecode.class)); - _module_init(node, module, printResults, cflags); - module.write(ostream); - break; - } catch (MethodTooLargeException e) { - thresh -= 100; + + String code_str = serializePyBytecode(largest_m_code); + insert_code_str_to_classfile(name_id, code_str, module); } - if (thresh == 10000) { /* This value should be well feasible by JVM-bytecode, - so something else must be wrong. */ - throw new RuntimeException( - "For unknown reason, too large method code couldn't be resolved"+ - "\nby PyBytecode-approach:\n"+filename); - } + + module.classfile.addInterface(p(org.python.core.ContainsPyBytecode.class)); + + _module_init(node, module, printResults, cflags); + module.write(ostream); + + break; + + } catch (MethodTooLargeException e) { + thresh -= 1000; } + if (thresh < 10000) { + /* + * This value should be well feasible by JVM-bytecode, so something else must be + * wrong. + */ + throw new RuntimeException( + "For unknown reason, too large method code couldn't be resolved" + + "\nby PyBytecode-approach:\n" + filename); + } + } } } diff --git a/src/org/python/core/BytecodeLoader.java b/src/org/python/core/BytecodeLoader.java --- a/src/org/python/core/BytecodeLoader.java +++ b/src/org/python/core/BytecodeLoader.java @@ -77,9 +77,9 @@ } private static PyCode parseSerializedCode(String code_str) - throws IOException, ClassNotFoundException - { - byte[] b = DatatypeConverter.parseBase64Binary(code_str); + throws IOException, ClassNotFoundException { + // From Java 8 use: byte[] b = Base64.getDecoder().decode(code_str); + byte[] b = base64decode(code_str); ByteArrayInputStream bi = new ByteArrayInputStream(b); ObjectInputStream si = new ObjectInputStream(bi); PyBytecode meth_code = (PyBytecode) si.readObject(); @@ -89,6 +89,83 @@ } /** + * Implement a restricted form of base64 decoding compatible with the encoding in Module. This + * decoder treats characters outside the set of 64 necessary to encode data as errors, including + * the pad "=". As a result, the length of the argument exactly determines the size of array + * returned. + * + * @param src to decode + * @return a new byte array + * @throws IllegalArgumentException if src has an invalid character or impossible length. + */ + private static byte[] base64decode(String src) throws IllegalArgumentException { + + // Length L is a multiple of 4 plus 0, 2 or 3 tail characters (bearing 0, 8, or 16 bits) + final int L = src.length(); + final int tail = L % 4; // 0 to 3 where 1 (an extra 6 bits) is invalid. + if (tail == 1) { + throw new IllegalArgumentException("Input length invalid (4n+1)"); + } + + // src encodes exactly this many bytes: + final int N = (L / 4) * 3 + (tail > 0 ? tail - 1 : 0); + byte[] data = new byte[N]; + + // Work through src in blocks of 4 + int s = 0, b = 0, quantum; + while (s <= L - 4) { + // Process src[s:s+4] + quantum = (base64CharToBits(src.charAt(s++)) << 18) + + (base64CharToBits(src.charAt(s++)) << 12) + + (base64CharToBits(src.charAt(s++)) << 6) + base64CharToBits(src.charAt(s++)); + data[b++] = (byte) (quantum >> 16); + data[b++] = (byte) (quantum >> 8); + data[b++] = (byte) quantum; + } + + // Now deal with 2 or 3 tail characters, generating one or two bytes. + if (tail >= 2) { + // Repeat the loop body, but everything is 8 bits to the right. + quantum = (base64CharToBits(src.charAt(s++)) << 10) + + (base64CharToBits(src.charAt(s++)) << 4); + data[b++] = (byte) (quantum >> 8); + if (tail == 3) { + quantum += (base64CharToBits(src.charAt(s++)) >> 2); + data[b++] = (byte) quantum; + } + } + + return data; + } + + /** + * Helper for {@link #base64decode(String)}, converting one character. + * @param c to convert + * @return value 0..63 + * @throws IllegalArgumentException if not a base64 character + */ + private static int base64CharToBits(char c) throws IllegalArgumentException { + if (c >= 'a') { + if (c <= 'z') { + return c - ('a' - 26); + } + } else if (c >= 'A') { + if (c <= 'Z') { + return c - 'A'; + } + } else if (c >= '0') { + if (c <= '9') { + return c + (52 - '0'); + } + } else if (c == '+') { + return 62; + } else if (c == '/') { + return 63; + } + throw new IllegalArgumentException("Invalid character " + c); + } + + /** * This method looks for Python-Bytecode stored in String literals. * While Java supports rather long strings, constrained only by * int-addressing of arrays, it supports only up to 65535 characters -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Jan 8 01:18:14 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 08 Jan 2019 06:18:14 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Remove_mention_of_python27?= =?utf-8?q?=2Edll_that_prevented_installation_on_Linux=2E?= Message-ID: <20190108061814.1.A76BADF74FD42314@mg.python.org> https://hg.python.org/jython/rev/a8026585b3c2 changeset: 8215:a8026585b3c2 user: Jeff Allen date: Mon Jan 07 21:51:03 2019 +0000 summary: Remove mention of python27.dll that prevented installation on Linux. jython.exe embeds its own CPython since #d638b2c5, so python27.dll was removed, but we missed the mention in the Linux installer. files: installer/src/java/org/python/util/install/StartScriptGenerator.java | 1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/installer/src/java/org/python/util/install/StartScriptGenerator.java b/installer/src/java/org/python/util/install/StartScriptGenerator.java --- a/installer/src/java/org/python/util/install/StartScriptGenerator.java +++ b/installer/src/java/org/python/util/install/StartScriptGenerator.java @@ -81,7 +81,6 @@ } Files.delete(bindir.resolve("jython.py")); Files.delete(bindir.resolve("jython.exe")); - Files.delete(bindir.resolve("python27.dll")); Files.setPosixFilePermissions(bindir.resolve("jython"), PosixFilePermissions.fromString("rwxr-xr-x")); // 0755 } -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Jan 8 17:13:26 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 08 Jan 2019 22:13:26 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_=28Trivial=29_Remove_2_unu?= =?utf-8?q?sed_imports=2E_Now_builds_on_Java_11=2E?= Message-ID: <20190108221326.1.00F3E0D9197AF8F7@mg.python.org> https://hg.python.org/jython/rev/64a1c3aff9ed changeset: 8216:64a1c3aff9ed user: Jeff Allen date: Tue Jan 08 22:09:47 2019 +0000 summary: (Trivial) Remove 2 unused imports. Now builds on Java 11. files: src/org/python/compiler/Module.java | 1 - src/org/python/core/BytecodeLoader.java | 1 - 2 files changed, 0 insertions(+), 2 deletions(-) diff --git a/src/org/python/compiler/Module.java b/src/org/python/compiler/Module.java --- a/src/org/python/compiler/Module.java +++ b/src/org/python/compiler/Module.java @@ -18,7 +18,6 @@ import java.util.Hashtable; import java.util.Stack; import java.util.List; -import javax.xml.bind.DatatypeConverter; import org.objectweb.asm.Label; import org.objectweb.asm.MethodTooLargeException; diff --git a/src/org/python/core/BytecodeLoader.java b/src/org/python/core/BytecodeLoader.java --- a/src/org/python/core/BytecodeLoader.java +++ b/src/org/python/core/BytecodeLoader.java @@ -8,7 +8,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.ObjectInputStream; -import javax.xml.bind.DatatypeConverter; import org.objectweb.asm.ClassReader; import org.python.util.Generic; -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Wed Jan 9 17:31:13 2019 From: jython-checkins at python.org (jeff.allen) Date: Wed, 09 Jan 2019 22:31:13 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Move_all_ASM_API_reference?= =?utf-8?q?s_to_ASM7=2E?= Message-ID: <20190109223113.1.0DDF04EBC99AEB14@mg.python.org> https://hg.python.org/jython/rev/37365325d253 changeset: 8217:37365325d253 user: Jeff Allen date: Wed Jan 09 17:28:41 2019 +0000 summary: Move all ASM API references to ASM7. This addresses a failure to recognise java.util.regex.Pattern and Matcher as public when scanning packages. Changes elsewhere are for consistency. files: src/org/python/compiler/Code.java | 2 +- src/org/python/core/AnnotationReader.java | 4 ++-- src/org/python/core/packagecache/PackageManager.java | 2 +- src/org/python/expose/generate/ExposedFieldFinder.java | 2 +- src/org/python/expose/generate/ExposedMethodFinder.java | 2 +- src/org/python/expose/generate/ExposedTypeProcessor.java | 4 ++-- src/org/python/expose/generate/RestrictiveAnnotationVisitor.java | 2 +- src/org/python/modules/jffi/SkinnyMethodAdapter.java | 2 +- tests/java/org/python/expose/generate/ExposeMethodFinderTest.java | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/org/python/compiler/Code.java b/src/org/python/compiler/Code.java --- a/src/org/python/compiler/Code.java +++ b/src/org/python/compiler/Code.java @@ -22,7 +22,7 @@ //XXX: I'd really like to get sig and access out of here since MethodVistitor // should already have this information. public Code(MethodVisitor mv, String sig, int access) { - super(ASM5); + super(ASM7); this.mv = mv; this.sig = sig; nlocals = -sigSize(sig, false); diff --git a/src/org/python/core/AnnotationReader.java b/src/org/python/core/AnnotationReader.java --- a/src/org/python/core/AnnotationReader.java +++ b/src/org/python/core/AnnotationReader.java @@ -36,7 +36,7 @@ * @throws IOException - if the classfile is malformed. */ public AnnotationReader(byte[] data) throws IOException { - super(Opcodes.ASM5); + super(Opcodes.ASM7); ClassReader r; try { r = new ClassReader(data); @@ -52,7 +52,7 @@ nextVisitIsVersion = desc.equals("Lorg/python/compiler/APIVersion;"); nextVisitIsMTime = desc.equals("Lorg/python/compiler/MTime;"); nextVisitIsFilename = desc.equals("Lorg/python/compiler/Filename;"); - return new AnnotationVisitor(Opcodes.ASM5) { + return new AnnotationVisitor(Opcodes.ASM7) { public void visit(String name, Object value) { if (nextVisitIsVersion) { diff --git a/src/org/python/core/packagecache/PackageManager.java b/src/org/python/core/packagecache/PackageManager.java --- a/src/org/python/core/packagecache/PackageManager.java +++ b/src/org/python/core/packagecache/PackageManager.java @@ -180,7 +180,7 @@ private int class_access; public AccessVisitor() throws IOException { - super(Opcodes.ASM5); + super(Opcodes.ASM7); } @Override diff --git a/src/org/python/expose/generate/ExposedFieldFinder.java b/src/org/python/expose/generate/ExposedFieldFinder.java --- a/src/org/python/expose/generate/ExposedFieldFinder.java +++ b/src/org/python/expose/generate/ExposedFieldFinder.java @@ -14,7 +14,7 @@ private String doc; public ExposedFieldFinder(String name, FieldVisitor delegate) { - super(Opcodes.ASM5); + super(Opcodes.ASM7); fieldName = name; this.delegate = delegate; } diff --git a/src/org/python/expose/generate/ExposedMethodFinder.java b/src/org/python/expose/generate/ExposedMethodFinder.java --- a/src/org/python/expose/generate/ExposedMethodFinder.java +++ b/src/org/python/expose/generate/ExposedMethodFinder.java @@ -35,7 +35,7 @@ String desc, String[] exceptions, MethodVisitor delegate) { - super(Opcodes.ASM5, delegate); + super(Opcodes.ASM7, delegate); this.typeName = typeName; this.onType = onType; this.access = access; diff --git a/src/org/python/expose/generate/ExposedTypeProcessor.java b/src/org/python/expose/generate/ExposedTypeProcessor.java --- a/src/org/python/expose/generate/ExposedTypeProcessor.java +++ b/src/org/python/expose/generate/ExposedTypeProcessor.java @@ -114,7 +114,7 @@ private boolean generatedStaticBlock; private TypeProcessor(ClassVisitor cv) { - super(Opcodes.ASM5, cv); + super(Opcodes.ASM7, cv); } @Override @@ -228,7 +228,7 @@ desc, signature, exceptions); - return new MethodVisitor(Opcodes.ASM5, passthroughVisitor) { + return new MethodVisitor(Opcodes.ASM7, passthroughVisitor) { @Override public void visitCode() { diff --git a/src/org/python/expose/generate/RestrictiveAnnotationVisitor.java b/src/org/python/expose/generate/RestrictiveAnnotationVisitor.java --- a/src/org/python/expose/generate/RestrictiveAnnotationVisitor.java +++ b/src/org/python/expose/generate/RestrictiveAnnotationVisitor.java @@ -10,7 +10,7 @@ public class RestrictiveAnnotationVisitor extends AnnotationVisitor { public RestrictiveAnnotationVisitor() { - super(Opcodes.ASM5); + super(Opcodes.ASM7); } public AnnotationVisitor visitAnnotation(String name, String desc) { diff --git a/src/org/python/modules/jffi/SkinnyMethodAdapter.java b/src/org/python/modules/jffi/SkinnyMethodAdapter.java --- a/src/org/python/modules/jffi/SkinnyMethodAdapter.java +++ b/src/org/python/modules/jffi/SkinnyMethodAdapter.java @@ -31,7 +31,7 @@ private ClassVisitor cv; public SkinnyMethodAdapter(ClassVisitor cv, int flags, String name, String signature, String something, String[] exceptions) { - super(ASM4); + super(ASM7); setMethodVisitor(cv.visitMethod(flags, name, signature, something, exceptions)); this.cv = cv; this.name = name; diff --git a/tests/java/org/python/expose/generate/ExposeMethodFinderTest.java b/tests/java/org/python/expose/generate/ExposeMethodFinderTest.java --- a/tests/java/org/python/expose/generate/ExposeMethodFinderTest.java +++ b/tests/java/org/python/expose/generate/ExposeMethodFinderTest.java @@ -16,7 +16,7 @@ methodName, descriptor, null, - new MethodVisitor(Opcodes.ASM4) {}) { + new MethodVisitor(Opcodes.ASM7) {}) { @Override public void handleResult(InstanceMethodExposer exposer) { -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sat Jan 12 08:26:19 2019 From: jython-checkins at python.org (jeff.allen) Date: Sat, 12 Jan 2019 13:26:19 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Scan_the_whole_Java_runtim?= =?utf-8?q?e_filesystem_for_packages_=28fixes_=232362=29=2E?= Message-ID: <20190112132619.1.ABAAC68F37765956@mg.python.org> https://hg.python.org/jython/rev/7fd811df6d33 changeset: 8218:7fd811df6d33 user: Jeff Allen date: Sat Jan 12 12:23:05 2019 +0000 summary: Scan the whole Java runtime filesystem for packages (fixes #2362). It does not seem necessary to provide the user with a way to filter modules to be included. A fuller implementation later should allow for a module path. files: registry | 17 +- src/org/python/core/packagecache/CachedJarsPackageManager.java | 49 ++++-- src/org/python/core/packagecache/PathPackageManager.java | 4 +- src/org/python/core/packagecache/SysPackageManager.java | 79 +++++++-- 4 files changed, 98 insertions(+), 51 deletions(-) diff --git a/registry b/registry --- a/registry +++ b/registry @@ -10,10 +10,8 @@ #python.path = d:\\python20\\lib # Set the directory to use for caches (currently just package information) -# This directory should be writable by the user -# If this is an absolute path it is used as given -# Otherwise it is interpreted relative to sys.prefix -# (typically the directory of this file) +# This directory should be writable by the user. If this is an absolute path it is used as given, +# otherwise it is interpreted relative to sys.prefix (typically the directory of this file). python.cachedir = cachedir # Setting this property to true disables the package scan for the cachedir. @@ -22,15 +20,12 @@ # Properties to check for initializing and updating the package cache # Values shown here are those hard-coded in Jython's cache manager. -# Treat JARs on the classpath and (before Java 9) in the JRE as a source of Python packages. -#python.packages.paths = java.class.path, sun.boot.class.path # before Java 9 +# Treat JARs on the classpath and (up to Java 8) in the JRE as a source of Python packages. +#python.packages.paths = java.class.path, sun.boot.class.path # up to Java 8 #python.packages.paths = java.class.path # from Java 9 # Treat installed optional (Java) packages as source of Python packages (before Java 9) -#python.packages.directories = java.ext.dirs # before Java 9 +#python.packages.directories = java.ext.dirs # up to Java 8 #python.packages.directories # undefined from Java 9 -# Treat the following Java modules as sources of of Python packages (from Java 9) -#python.packages.modules = java.base, java.desktop, java.logging, java.se, java.sql, java.xml - # Set verbosity to error, warning, message, comment, or debug # for varying levels of informative messages from Jython. Normally @@ -39,7 +34,7 @@ # Jython ships with a JLine console (http://jline.sourceforge.net/) out of the # box. This is selected by default in the Jython command-line application -# (org.python.util,jython) if you do not define python.console to be another +# (org.python.util.jython) if you do not define python.console to be another # class on the command line. Alternatively, you could set python.console here, # but be aware that this will also affect the console in applications that # embed a PythonInterpreter, or use Jython as a JSR-223 script engine. diff --git a/src/org/python/core/packagecache/CachedJarsPackageManager.java b/src/org/python/core/packagecache/CachedJarsPackageManager.java --- a/src/org/python/core/packagecache/CachedJarsPackageManager.java +++ b/src/org/python/core/packagecache/CachedJarsPackageManager.java @@ -261,14 +261,18 @@ } /** - * Create (or ensure we have) a {@link PyJavaPackage}, for each package in a jar specified by a - * file or URL, descending from {@link PackageManager#topLevelPackage} in this - * {@link PackageManager} instance. Ensure that the class list in each package is updated with - * the classes this JAR supplies to it. This information may be from a previously cached account - * of the JAR, if the last-modified time of the JAR matches a cached value. Otherwise, it will - * be obtained by inspecting the JAR, and a new cache will be written (if requested). Eventually - * updated info is (re-)cached if param cache is true. Persistent cache storage access goes - * through {@link #inOpenCacheFile(String)} and {@link #outCreateCacheFile(JarXEntry, boolean)}. + * Create (or ensure we have) a {@link PyJavaPackage}, descending from + * {@link PackageManager#topLevelPackage} in this {@link PackageManager} instance, for each + * package in a jar specified by a file or URL. Ensure that the class list in each package is + * updated with the classes this JAR supplies to it. + * + * The information concerning packages in the JAR and the classes they contain, may be read from + * from a previously cached account of the JAR, if the last-modified time of the JAR matches a + * cached value. If it is not read from a cache, it will be obtained by inspecting the JAR, and + * a new cache will be written (if requested). + * + * Access to persistent cache storage goes through {@link #inOpenCacheFile(String)} and + * {@link #outCreateCacheFile(JarXEntry, boolean)}. * * @param jarurl identifying the JAR if {@code jarfile} is {@code null} * @param jarfile identifying the JAR @@ -276,13 +280,17 @@ */ private void addJarToPackages(URL jarurl, File jarfile, boolean writeCache) { try { + // We try to read the cache (for this jar) if caching is in operation. boolean readCache = this.index != null; + // We write a cache if caching is in operation AND writing has been requested. writeCache &= readCache; + URLConnection jarconn = null; boolean localfile = true; + // If a local JAR file was not given directly in jarfile, try to find one from the URL. if (jarfile == null) { - // We were not given a File, so the URL must be reliable (but maybe not a file) + // We were not given a File, so the URL must be reliable (but may not be a file) jarconn = jarurl.openConnection(); // The following comment may be out of date. Also 2 reasons or just the bug? /* @@ -296,12 +304,13 @@ jarfilename = jarfilename.replace('/', File.separatorChar); jarfile = new File(jarfilename); } else { + // We can't find a local file localfile = false; } } - // Claimed JAR does not exist. Silently ignore. if (localfile && !jarfile.exists()) { + // Local JAR file claimed or deduced does not exist. Silently ignore. return; } @@ -527,9 +536,10 @@ } } - /** Scan a module from the modular JVM, creating package objects. */ - protected void addModule(Path modulePath) { + /** Scan a Java module, creating package objects. */ + protected void addModuleToPackages(Path modulePath) { try { + comment("reading packages from " + modulePath); Map packages = getModularPackages(modulePath); addPackages(packages, modulePath.toUri().toString()); } catch (IOException ioe) { @@ -543,9 +553,14 @@ * (surviving) classes in two lists: accessible and inaccessible, which is judged according to * {@link #filterByAccess(String, int)}. The returned map is from package to these two lists, * now comma-separated lists, with an '@' between them. + * + * @param modulePath up to and including the name of the module + * @return map from packages to classes + * @throws IOException */ private Map getModularPackages(Path modulePath) throws IOException { + final int M = modulePath.getNameCount(); final Map modPackages = Generic.map(); FileVisitor visitor = new SimpleFileVisitor() { @@ -555,18 +570,18 @@ throws IOException { //System.out.println(" visitFile:" + file); - // file has at least 4 parts /modules/[module]/ ... /[name].class + // file starts with modulePath, then has package & class: / ... /[name].class int n = file.getNameCount(); // Apply name and access tests and conditionally add to modPackages String fileName = file.getFileName().toString(); - if (fileName.endsWith(".class") && n > 3) { + if (fileName.endsWith(".class") && n > M + 1) { // Split off the bare class name String className = fileName.substring(0, fileName.length() - 6); // Check acceptable name: in practice, this is used to ignore inner classes. if (!filterByName(className, false)) { - // File this class by name against the package - String packageName = file.subpath(2, n - 1).toString().replace('/', '.'); + // Parts M to n-1 define the package of this class + String packageName = file.subpath(M, n - 1).toString().replace('/', '.'); ClassList classes = modPackages.get(packageName); if (classes == null) { @@ -575,7 +590,7 @@ modPackages.put(packageName, classes); } - // Put the class on the right list + // Put the class on the accessible or inaccessible list try (InputStream c = Files.newInputStream(file, StandardOpenOption.READ)) { int access = checkAccess(c); if ((access != -1) && !filterByAccess(fileName, access)) { diff --git a/src/org/python/core/packagecache/PathPackageManager.java b/src/org/python/core/packagecache/PathPackageManager.java --- a/src/org/python/core/packagecache/PathPackageManager.java +++ b/src/org/python/core/packagecache/PathPackageManager.java @@ -191,8 +191,8 @@ } /** - * Scan a path that may be a mixture of directory and JAR specifiers, and within each path entry - * index the packages. Calls {@link #addDirectory} if a path entry refers to a dir, + * Scan a Java class-path that may be a mixture of directory and JAR specifiers, and within each + * path entry index the packages. Calls {@link #addDirectory} if a path entry refers to a dir, * {@link #addJarToPackages(java.io.File, boolean)} with param cache true if the path entry * refers to a jar. */ diff --git a/src/org/python/core/packagecache/SysPackageManager.java b/src/org/python/core/packagecache/SysPackageManager.java --- a/src/org/python/core/packagecache/SysPackageManager.java +++ b/src/org/python/core/packagecache/SysPackageManager.java @@ -9,11 +9,17 @@ import org.python.core.PySystemState; import java.io.File; +import java.io.IOException; import java.net.URI; import java.nio.file.FileSystem; import java.nio.file.FileSystems; +import java.nio.file.FileVisitResult; +import java.nio.file.FileVisitor; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.ProviderNotFoundException; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; import java.util.Properties; import java.util.Set; import java.util.StringTokenizer; @@ -67,7 +73,7 @@ /** * Index the contents of every JAR or ZIP in a directory. * - * @param jdir direcory containing some JAR or ZIP files + * @param jdir directory containing some JAR or ZIP files * @param cache * @param saveCache */ @@ -107,30 +113,61 @@ } /** + * Index the packages in every module in a directory. Entries in the directory that are not modules + * (do not contain a {@code module-info.class}) are ignored. Only modules exploded on the file system of this path are (currently) supported, + * and at the time of writing, we only use this method on the {@code jrt:} file system. + * + * @param moduleDir directory containing some modules + */ + private void addModuleDir(final Path moduleDir) { + try { + // Walk the directory tree with this visitor + FileVisitor visitor = new SimpleFileVisitor() { + + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) { + // System.out.println(dir); + if (dir.equals(moduleDir)) { + // Ignore this, it's just the root) + } else if (Files.exists(dir.resolve("module-info.class"))) { + // dir is a module: scan packages from it. + addModuleToPackages(dir); + return FileVisitResult.SKIP_SUBTREE; + } + return FileVisitResult.CONTINUE; + } + }; + + Files.walkFileTree(moduleDir, visitor); + + } catch (IOException e) { + warning("error enumerating Java modules in " + moduleDir + ": " + e.getMessage()); + } + } + + /** * Walk the packages found in paths specified indirectly through the given {@code Properties} - * object, which in practice is the Jython registry. + * object. * - * @param registry + * @param registry in practice, the Jython registry */ private void findAllPackages(Properties registry) { - String defaultPaths = "java.class.path"; - String defaultDirectories = ""; - String defaultModules = "java.base,java.desktop,java.logging,java.se,java.sql,java.xml"; - + /* + * Packages in the Java runtime environment are enumerated in the jrt file system (from Java + * 9 onwards), or in JARs and directories designated by the properties + * sun.boot.class.path and java.ext.dirs (up to Java 8). + */ + String defaultClassPaths, defaultDirectories; try { // Support for the modular JVM (particular packages). - // XXX This may not be our final approach: maybe enumerate all the packages instead? - Set modules = - split(registry.getProperty("python.packages.modules", defaultModules)); FileSystem jrtfs = FileSystems.getFileSystem(URI.create("jrt:/")); - for (String moduleName : modules) { - Path modulePath = jrtfs.getPath("/modules/" + moduleName); - addModule(modulePath); - } + addModuleDir(jrtfs.getPath("/modules/")); + defaultClassPaths = "java.class.path"; + defaultDirectories = ""; } catch (ProviderNotFoundException e) { // Running on a JVM before Java 9: add boot class path and optional extensions. - defaultPaths = "java.class.path,sun.boot.class.path"; + defaultClassPaths = "java.class.path,sun.boot.class.path"; defaultDirectories = "java.ext.dirs"; } @@ -139,13 +176,13 @@ * string. The default setting causes directories and JARs on the classpath and in the JRE * (before Java 9) to be sources of Python packages. */ - Set paths = split(registry.getProperty("python.packages.paths", defaultPaths)); - for (String name : paths) { - // Each property is a path string containing directories - String path = registry.getProperty(name); - if (path != null) { + Set cps = split(registry.getProperty("python.packages.paths", defaultClassPaths)); + for (String name : cps) { + // Each property is a class-path string containing JARS and directories + String classPath = registry.getProperty(name); + if (classPath != null) { // Each path may be a mixture of directory and JAR specifiers (source of packages) - addClassPath(path); + addClassPath(classPath); } } -- Repository URL: https://hg.python.org/jython