From jython-checkins at python.org Tue Dec 24 06:10:09 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 24 Dec 2019 11:10:09 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Add_sign-on_log_message_to?= =?utf-8?q?_PyServlet?= Message-ID: <20191224111009.1.08EE97A64741EE52@mg.python.org> https://hg.python.org/jython/rev/84ad2ab25bca changeset: 8312:84ad2ab25bca user: Jeff Allen date: Wed Dec 18 18:02:29 2019 +0000 summary: Add sign-on log message to PyServlet A latent bug (maybe) where PrePy.getEffectiveLoggingLevel causes an NPE, and revealed in experimentation, is also treated. files: src/org/python/Version.java | 4 +- src/org/python/core/PrePy.java | 27 +++++++------ src/org/python/core/PySystemState.java | 8 ++-- src/org/python/util/PyServlet.java | 28 ++++++++++--- src/org/python/util/jython.java | 4 +- 5 files changed, 44 insertions(+), 27 deletions(-) diff --git a/src/org/python/Version.java b/src/org/python/Version.java --- a/src/org/python/Version.java +++ b/src/org/python/Version.java @@ -121,7 +121,7 @@ * Describe the current Java VM. */ public static String getVM() { - return String.format("\n[%s (%s)]", System.getProperty("java.vm.name"), + return String.format("[%s (%s)]", System.getProperty("java.vm.name"), System.getProperty("java.vm.vendor")); } @@ -130,7 +130,7 @@ * the Java VM). */ public static String getVersion() { - return String.format("%.80s (%.80s) %.80s", PY_VERSION, getBuildInfo(), getVM()); + return String.format("%.80s (%.80s)\n%.80s", PY_VERSION, getBuildInfo(), getVM()); } public static Set getDefaultCodeFlags() { diff --git a/src/org/python/core/PrePy.java b/src/org/python/core/PrePy.java --- a/src/org/python/core/PrePy.java +++ b/src/org/python/core/PrePy.java @@ -106,14 +106,15 @@ /** * Convenience function to get the effective level of a given Logger, looking up the parent - * chain. + * chain. If the root logger is reached without an explicit level set, assume + * {@code Level.INFO}. */ private static Level getEffectiveLoggingLevel(Logger logger) { - Level level; - while ((level = logger.getLevel()) == null) { + Level level = null; + while (logger != null && (level = logger.getLevel()) == null) { logger = logger.getParent(); } - return level; + return level != null ? level : Level.INFO; } /** Convenience function to get the effective level of Logger "org.python". */ @@ -125,7 +126,7 @@ * Used by {@link #maybeWrite(Level, String)}, the terminus of all verbosity-based logging * calls, to detect changes made directly to {@link Options#verbose}. */ - private static int savedVerbosity = Py.MESSAGE; + private static int savedVerbosity = MESSAGE; /** * Set the level of the Jython logger "org.python" using the standard {@code java.util.logging} @@ -368,12 +369,12 @@ * then be opened using {@code jarFile = new JarFile(jarFileName)}. The path to the JAR is * returned. If the JAR is accessed by another mechanism ({@code http:} say) this will fail. *

- * The JBoss URL must be a reference to exactly - * {@code vfs:/org/python/core/PySystemState.class}, or the same thing using the - * {@code vfszip:} protocol, where <JAR> stands for the absolute path to the Jython JAR in - * VFS. There is no "!/" marker: in JBoss VFS a JAR is treated just like a directory and can no - * longer be opened as a JAR. The method essentially just swaps a VFS protocol for the Java - * {@code file:} protocol. The path returned will be correct only if this naive swap is valid. + * The JBoss URL must be a reference to exactly {@code vfs:/org/python/core/PrePy.class}, + * or the same thing using the {@code vfszip:} protocol, where <JAR> stands for the + * absolute path to the Jython JAR in VFS. There is no "!/" marker: in JBoss VFS a JAR is + * treated just like a directory and can no longer be opened as a JAR. The method essentially + * just swaps a VFS protocol for the Java {@code file:} protocol. The path returned will be + * correct only if this naive swap is valid. * * @param url into the JAR * @return the file path or {@code null} in the event of a detectable error @@ -396,9 +397,9 @@ case "vfs": case "vfszip": - // path is /some/path/some-jython.jar/org/python/core/PySystemState.class + // path is /some/path/some-jython.jar/org/python/core/PrePy.class String path = url.getPath(); - final String target = ".jar/" + Py.class.getName().replace('.', '/'); + final String target = ".jar/org/python/core/PrePy.class"; int jarIndex = path.indexOf(target); if (jarIndex > 0) { // path contains the target class in a JAR, so make a file URL for it diff --git a/src/org/python/core/PySystemState.java b/src/org/python/core/PySystemState.java --- a/src/org/python/core/PySystemState.java +++ b/src/org/python/core/PySystemState.java @@ -30,6 +30,7 @@ import java.util.concurrent.locks.ReentrantLock; import java.util.jar.JarEntry; import java.util.jar.JarFile; +import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -59,7 +60,7 @@ public class PySystemState extends PyObject implements AutoCloseable, ClassDictInit, Closeable, Traverseproc { - protected static final Logger logger = Logger.getLogger("org.python"); + private static final Logger logger = Logger.getLogger("org.python.core"); protected static final String CACHEDIR_DEFAULT_NAME = "cachedir"; @@ -94,10 +95,8 @@ public final static PyTuple _mercurial = new PyTuple(Py.newString("Jython"), Py.newString(Version.getHGIdentifier()), Py.newString(Version.getHGVersion())); - /** - * The copyright notice for this release. - */ + /** The copyright notice for this release. */ public static final PyObject copyright = Py.newString("Copyright (c) 2000-2017 Jython Developers.\n" + "All rights reserved.\n\n" + "Copyright (c) 2000 BeOpen.com.\n" + "All Rights Reserved.\n\n" @@ -251,6 +250,7 @@ __dict__.__setitem__("displayhook", __displayhook__); __dict__.__setitem__("excepthook", __excepthook__); + logger.config("sys module instance created"); } public static void classDictInit(PyObject dict) { diff --git a/src/org/python/util/PyServlet.java b/src/org/python/util/PyServlet.java --- a/src/org/python/util/PyServlet.java +++ b/src/org/python/util/PyServlet.java @@ -5,6 +5,8 @@ import java.util.Enumeration; import java.util.Map; import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -15,12 +17,12 @@ import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; +import org.python.Version; import org.python.core.PrePy; import org.python.core.Py; import org.python.core.PyException; import org.python.core.PyObject; import org.python.core.PyString; -import org.python.core.PyStringMap; import org.python.core.PySystemState; /** @@ -63,26 +65,35 @@ */ public class PyServlet extends HttpServlet { + protected static final Logger logger = Logger.getLogger("org.python.servlet"); + public static final String SKIP_INIT_NAME = "skip_jython_initialization"; protected static final String INIT_ATTR = "__jython_initialized__"; @Override public void init() { + logger.log(Level.INFO, "Jython {0} servlet {1}", + new Object[] {Version.PY_VERSION, getServletName()}); + + // Config parameters Properties props = new Properties(); - // Config parameters Enumeration e = getInitParameterNames(); while (e.hasMoreElements()) { String name = (String)e.nextElement(); props.put(name, getInitParameter(name)); } + boolean initialize = getServletConfig().getInitParameter(SKIP_INIT_NAME) != null; + if (getServletContext().getAttribute(INIT_ATTR) != null) { if (initialize) { - System.err.println("Jython has already been initialized in this context, not " - + "initializing for " + getServletName() + ". Add " + SKIP_INIT_NAME - + " to as an init param to this servlet's configuration to indicate this " - + "is expected."); + logger.log(Level.WARNING, // + "Jython has already been initialized in this context." + + " Not initializing for ''{0}''." + + " Add {1} as an init param to this servlet''s configuration" + + " to indicate this is expected.", + new Object[] {getServletName(), SKIP_INIT_NAME}); } } else if (initialize) { init(props, getServletContext()); @@ -96,9 +107,11 @@ * context, the system state initialization code only runs once. */ protected static void init(Properties props, ServletContext context) { + String rootPath = getRootPath(context); context.setAttribute(INIT_ATTR, true); Properties baseProps = PrePy.getSystemProperties(); + // Context parameters Enumeration e = context.getInitParameterNames(); while (e.hasMoreElements()) { @@ -109,6 +122,7 @@ && baseProps.getProperty("python.home") == null) { props.put("python.home", rootPath + "WEB-INF" + File.separator + "lib"); } + PySystemState.initialize(baseProps, props, new String[0]); PySystemState.add_package("javax.servlet"); PySystemState.add_package("javax.servlet.http"); @@ -237,7 +251,7 @@ public HttpServlet servlet; CacheEntry(HttpServlet servlet, long date) { - this.servlet= servlet; + this.servlet = servlet; this.date = date; } } diff --git a/src/org/python/util/jython.java b/src/org/python/util/jython.java --- a/src/org/python/util/jython.java +++ b/src/org/python/util/jython.java @@ -166,7 +166,9 @@ /** * Try to set the format for SimpleFormatter if no other mechanism has been provided, and - * security allows it. + * security allows it. Note that the absolute fall-back format is: + * {@code "%1$tb %1$td, %1$tY %1$tl:%1$tM:%1$tS %1$Tp %2$s%n%4$s: %5$s%6$s%n"}, + * defined ultimately in {@code sun.util.logging.LoggingSupport}. * * @param format to set for {@code java.util.logging.SimpleFormatter} * @throws SecurityException if not allowed to read or set necessary properties. -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Dec 24 06:10:10 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 24 Dec 2019 11:10:10 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Allow_other_threads_to_com?= =?utf-8?q?plete_after_main_exits_=28fixes_=232836=29=2E?= Message-ID: <20191224111010.1.A71EAF67DB65AB39@mg.python.org> https://hg.python.org/jython/rev/069db4761b37 changeset: 8314:069db4761b37 user: Jeff Allen date: Sun Dec 22 16:33:35 2019 +0000 summary: Allow other threads to complete after main exits (fixes #2836). We leave exiting to the JVM when a main Python program was run and ended with good status. files: NEWS | 1 + src/org/python/util/jython.java | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -9,6 +9,7 @@ Jython 2.7.2b3 Bugs fixed - [ 2820 ] Import fails with UnicodeDecodeError if sys.path contains invalid UTF-8 bytes + - [ 2836 ] Java Swing library works only in interactive jython session Jython 2.7.2b2 Bugs fixed diff --git a/src/org/python/util/jython.java b/src/org/python/util/jython.java --- a/src/org/python/util/jython.java +++ b/src/org/python/util/jython.java @@ -674,9 +674,16 @@ } } - // Shut down in a tidy way - interp.cleanup(); - exit(sts); + /* + * If we arrive here then we ran some Python code. It is possible that threads we started + * are still running, so if the status is currently good, just return into the JVM. (This + * exits with good status if nothing goes wrong subsequently.) + */ + if (sts != Status.OK) { + // Something went wrong running Python code: shut down in a tidy way. + interp.cleanup(); + exit(sts); + } } /** -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Dec 24 06:10:12 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 24 Dec 2019 11:10:12 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Naming=2C_data_type_and_fo?= =?utf-8?q?rmatting_changes_to_binascii=2Ejava?= Message-ID: <20191224111012.1.B0E2430DC2B58A71@mg.python.org> https://hg.python.org/jython/rev/37639a141a66 changeset: 8316:37639a141a66 user: Jeff Allen date: Tue Dec 24 07:40:57 2019 +0000 summary: Naming, data type and formatting changes to binascii.java There are no functional changes, but some re-ordering and renaming, and some dead statics are removed. The choice of data types is reconsidered to reduce masking and casting. files: src/org/python/modules/binascii.java | 1069 ++++++------- 1 files changed, 479 insertions(+), 590 deletions(-) diff --git a/src/org/python/modules/binascii.java b/src/org/python/modules/binascii.java --- a/src/org/python/modules/binascii.java +++ b/src/org/python/modules/binascii.java @@ -1,17 +1,14 @@ /* * Copyright 2019 Jython Developers + * * Original conversion from CPython source copyright 1998 Finn Bock. * - * This program contains material copyrighted by: - * Copyright (c) 1991, 1992, 1993, 1994 by Stichting Mathematisch Centrum, - * Amsterdam, The Netherlands. + * This program contains material copyrighted by: Copyright (c) 1991, 1992, 1993, 1994 by Stichting + * Mathematisch Centrum, Amsterdam, The Netherlands. */ package org.python.modules; - -import java.util.regex.Pattern; - import org.python.core.ArgParser; import org.python.core.BufferProtocol; import org.python.core.Py; @@ -24,105 +21,73 @@ import org.python.core.PyTuple; import org.python.core.PyUnicode; import org.python.core.buffer.SimpleStringBuffer; -import org.python.core.util.StringUtil; /** - * The binascii.java module contains a number of methods to convert - * between binary and various ASCII-encoded binary - * representations. Normally, you will not use these modules directly but - * use wrapper modules like uu or - * hexbin instead, this module solely - * exists because bit-manipulation of large amounts of data is slow in - * Python. + * The binascii.java module contains a number of methods to convert between binary and + * various ASCII-encoded binary representations. Normally, you will not use these modules directly + * but use wrapper modules like uu or hexbin instead, this module solely exists + * because bit-manipulation of large amounts of data is slow in Python. * *

* The binascii.java module defines the following functions: * *

a2b_uu (string): - * Convert a single line of uuencoded data back to binary and return the - * binary data. Lines normally contain 45 (binary) bytes, except for the - * last line. Line data may be followed by whitespace. - *; Convert a single line of uuencoded data back to binary and return the binary data. Lines + * normally contain 45 (binary) bytes, except for the last line. Line data may be followed by + * whitespace.
b2a_uu (data): - * Convert binary data to a line of ASCII characters, the return value - * is the converted line, including a newline char. The length of - * data should be at most 45. - *; Convert binary data to a line of ASCII characters, the return value is the converted line, + * including a newline char. The length of data should be at most 45.
a2b_base64 (string): - * Convert a block of base64 data back to binary and return the - * binary data. More than one line may be passed at a time. - *; Convert a block of base64 data back to binary and return the binary data. More than one line + * may be passed at a time.
b2a_base64 (data): - * Convert binary data to a line of ASCII characters in base64 coding. - * The return value is the converted line, including a newline char. - * The length of data should be at most 57 to adhere to the base64 - * standard. - *; Convert binary data to a line of ASCII characters in base64 coding. The return value is the + * converted line, including a newline char. The length of data should be at most 57 to + * adhere to the base64 standard.
a2b_hqx (string): - * Convert binhex4 formatted ASCII data to binary, without doing - * RLE-decompression. The string should contain a complete number of - * binary bytes, or (in case of the last portion of the binhex4 data) - * have the remaining bits zero. - *; Convert binhex4 formatted ASCII data to binary, without doing RLE-decompression. The string + * should contain a complete number of binary bytes, or (in case of the last portion of the binhex4 + * data) have the remaining bits zero.
rledecode_hqx (data): - * Perform RLE-decompression on the data, as per the binhex4 - * standard. The algorithm uses 0x90 after a byte as a repeat - * indicator, followed by a count. A count of 0 specifies a byte - * value of 0x90. The routine returns the decompressed data, - * unless data input data ends in an orphaned repeat indicator, in which - * case the Incomplete exception is raised. - *; Perform RLE-decompression on the data, as per the binhex4 standard. The algorithm uses + * 0x90 after a byte as a repeat indicator, followed by a count. A count of 0 + * specifies a byte value of 0x90. The routine returns the decompressed data, unless data + * input data ends in an orphaned repeat indicator, in which case the Incomplete exception + * is raised.
rlecode_hqx (data): - * Perform binhex4 style RLE-compression on data and return the - * result. - *; Perform binhex4 style RLE-compression on data and return the result.
b2a_hqx (data): - * Perform hexbin4 binary-to-ASCII translation and return the - * resulting string. The argument should already be RLE-coded, and have a - * length divisible by 3 (except possibly the last fragment). - *; Perform hexbin4 binary-to-ASCII translation and return the resulting string. The argument + * should already be RLE-coded, and have a length divisible by 3 (except possibly the last + * fragment).
crc_hqx (data, crc): - * Compute the binhex4 crc value of data, starting with an initial - * crc and returning the result. - *; Compute the binhex4 crc value of data, starting with an initial crc and + * returning the result.
Error: - * Exception raised on errors. These are usually programming errors. - *; Exception raised on errors. These are usually programming errors.
Incomplete: - * Exception raised on incomplete data. These are usually not programming - * errors, but may be handled by reading a little more data and trying - * again. - *; Exception raised on incomplete data. These are usually not programming errors, but may be + * handled by reading a little more data and trying again.

* - * The module is a line-by-line conversion of the original binasciimodule.c - * written by Jack Jansen, except that all mistakes and errors are my own. + * The module is a line-by-line conversion of the original binasciimodule.c written by Jack Jansen, + * except that all mistakes and errors are my own. * * @author Finn Bock, bckfnn at pipmail.dknet.dk * @version binascii.java,v 1.6 1999/02/20 11:37:07 fb Exp - + * */ public class binascii { @@ -130,8 +95,8 @@ public static final PyObject Error = Py.makeClass("Error", Py.Exception, exceptionNamespace()); - public static final PyObject Incomplete = Py.makeClass("Incomplete", Py.Exception, - exceptionNamespace()); + public static final PyObject Incomplete = + Py.makeClass("Incomplete", Py.Exception, exceptionNamespace()); public static PyObject exceptionNamespace() { PyObject dict = new PyStringMap(); @@ -142,12 +107,12 @@ // hqx lookup table, ascii->binary. private static char RUNCHAR = 0x90; - private static short DONE = 0x7F; - private static short SKIP = 0x7E; - private static short FAIL = 0x7D; + private static byte DONE = 0x7F; + private static byte SKIP = 0x7E; + private static byte FAIL = 0x7D; //@formatter:off - private static short[] table_a2b_hqx = { + private static byte[] table_a2b_hqx = { /* ^@ ^A ^B ^C ^D ^E ^F ^G */ /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, /* \b \t \n ^K ^L \r ^N ^O */ @@ -199,13 +164,11 @@ }; //@formatter:on - private static byte[] table_b2a_hqx = - StringUtil.toBytes("!\"#$%&'()*+,-012345689 at ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr"); - - + private static char[] table_b2a_hqx = + "!\"#$%&'()*+,-012345689 at ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr".toCharArray(); //@formatter:off - private static short table_a2b_base64[] = { + private static byte table_a2b_base64[] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, @@ -222,9 +185,8 @@ /* Max binary chunk size */ private static int BASE64_MAXBIN = Integer.MAX_VALUE / 2 - 3; - private static byte[] table_b2a_base64 = - StringUtil.toBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); - + private static char[] table_b2a_base64 = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); //@formatter:off private static int[] crctab_hqx = { @@ -263,283 +225,259 @@ }; //@formatter:on - - - public static PyString __doc__a2b_uu = new PyString( - "(ascii) -> bin. Decode a line of uuencoded data" - ); - + public static PyString __doc__a2b_uu = + new PyString("(ascii) -> bin. Decode a line of uuencoded data"); /** - * Convert a single line of uuencoded data back to binary and return the - * binary data. Lines normally contain 45 (binary) bytes, except for the - * last line. Line data may be followed by whitespace. + * Convert a single line of uuencoded data back to binary and return the binary data. Lines + * normally contain 45 (binary) bytes, except for the last line. Line data may be followed by + * whitespace. */ - public static PyString a2b_uu(PyObject bp) { - int leftbits = 0; - int leftchar = 0; + public static PyString a2b_uu(PyObject text) { - StringBuilder bin_data = new StringBuilder(); - - try (PyBuffer ascii_data = getTextBuffer(bp)) { - if (ascii_data.getLen() == 0) { + try (PyBuffer textBuf = getByteBuffer(text)) { + int textLen = textBuf.getLen(); + if (textLen == 0) { return new PyString(""); } - char this_ch; - int i; + StringBuilder dataBuf = new StringBuilder(); - int ascii_len = ascii_data.getLen()-1; + int bits = 0; // store bits not yet emitted (max 12 bits) + int bitCount = 0; // how many (valid) bits waiting + int index = 0; - int bin_len = (ascii_data.intAt(0) - ' ') & 077; + int dataExpected = (textBuf.intAt(0) - ' ') & 077; + textLen -= 1; - for (i = 0; bin_len > 0 && ascii_len > 0; i++, ascii_len--) { - this_ch = (char) ascii_data.intAt(i+1); - if (this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) { - // Whitespace. Assume some spaces got eaten at - // end-of-line. (We check this later) - this_ch = 0; + for (; dataExpected > 0 && textLen > 0; index++, textLen--) { + int ch = textBuf.intAt(index + 1); + int sixBits; + + if (ch == '\n' || ch == '\r' || textLen <= 0) { + // Whitespace. Assume some spaces got eaten at end-of-line. + // (We check this later.) + sixBits = 0; } else { - // Check the character for legality - // The 64 in stead of the expected 63 is because - // there are a few uuencodes out there that use - // '@' as zero instead of space. - if ( this_ch < ' ' || this_ch > (' ' + 64)) { + /* + * Check the character for legality The 64 instead of the expected 63 is because + * there are a few uuencodes out there that use '@' as zero instead of space. + */ + if (ch < ' ' || ch > (' ' + 64)) { throw new PyException(Error, "Illegal char"); } - this_ch = (char)((this_ch - ' ') & 077); + sixBits = (ch - ' ') & 0x3f; } - // Shift it in on the low end, and see if there's - // a byte ready for output. - leftchar = (leftchar << 6) | (this_ch); - leftbits += 6; - if (leftbits >= 8) { - leftbits -= 8; - bin_data.append((char)((leftchar >> leftbits) & 0xff)); - leftchar &= ((1 << leftbits) - 1); - bin_len--; + + // Shift it in on the low end, and see if there's a byte ready for output. + bits = (bits << 6) | sixBits; + bitCount += 6; + if (bitCount >= 8) { + bitCount -= 8; + int b = (bits >> bitCount) & 0xff; + dataBuf.append((char) b); // byte + bits &= (1 << bitCount) - 1; + dataExpected--; } } - // Finally, check that if there's anything left on the line - // that it's whitespace only. - while (ascii_len-- > 0) { - this_ch = (char) ascii_data.intAt(++i); + // Finally, check that anything left on the line is white space. + while (textLen-- > 0) { + int ch = textBuf.intAt(++index); // Extra '@' may be written as padding in some cases - if (this_ch != ' ' && this_ch != '@' && - this_ch != '\n' && this_ch != '\r') { + if (ch != ' ' && ch != '@' && ch != '\n' && ch != '\r') { throw new PyException(Error, "Trailing garbage"); } } // finally, if we haven't decoded enough stuff, fill it up with zeros - for (; i < bin_len; i++) { - bin_data.append((char) 0); + for (; index < dataExpected; index++) { + dataBuf.append((char) 0); } - return new PyString(bin_data.toString()); + return new PyString(dataBuf.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("a2b_uu", bp); + throw argMustBeBytes("a2b_uu", text); } } - public static PyString __doc__b2a_uu = new PyString( - "(bin) -> ascii. Uuencode line of data" - ); - + public static PyString __doc__b2a_uu = new PyString("(bin) -> ascii. Uuencode line of data"); /** - * Convert binary data to a line of ASCII characters, the return value - * is the converted line, including a newline char. The length of - * data should be at most 45. + * Convert binary data to a line of ASCII characters, the return value is the converted line, + * including a newline char. The length of data should be at most 45. */ - public static PyString b2a_uu(PyObject bp) { - int leftbits = 0; - char this_ch; - int leftchar = 0; + public static PyString b2a_uu(PyObject data) { - try (PyBuffer bin_data = getTextBuffer(bp)) { + try (PyBuffer dataBuf = getByteBuffer(data)) { - int bin_len = bin_data.getLen(); - if (bin_len > 45) { + int dataLen = dataBuf.getLen(); + if (dataLen > 45) { // The 45 is a limit that appears in all uuencode's throw new PyException(Error, "At most 45 bytes at once"); } - // Each 3 bytes in (rounded up) produces 4 characters out. - int ascii_len = 4 * ((bin_len + 2) / 3); - // Plus a 1 byte length and '\n' - StringBuilder ascii_data = new StringBuilder(ascii_len + 2); - // Store the length */ - ascii_data.append((char)(' ' + (bin_len & 077))); + // Each 3 bytes (rounded up) produce 4 characters, plus a 1 byte length and '\n' + StringBuilder textBuf = new StringBuilder(4 * ((dataLen + 2) / 3) + 2); + int bitCount = 0; + int bits = 0; + + // Store the length + textBuf.append((char) (' ' + (dataLen & 077))); - for (int i = 0; bin_len > 0 || leftbits != 0; i++, bin_len--) { + for (int i = 0; dataLen > 0 || bitCount != 0; i++, dataLen--) { // Shift the data (or padding) into our buffer - if (bin_len > 0) { - leftchar = (leftchar << 8) | (char) bin_data.intAt(i); + if (dataLen > 0) { + bits = (bits << 8) | dataBuf.intAt(i); } else { - leftchar <<= 8; + bits <<= 8; } - leftbits += 8; + bitCount += 8; // See if there are 6-bit groups ready - while (leftbits >= 6) { - this_ch = (char)((leftchar >> (leftbits-6)) & 0x3f); - leftbits -= 6; - ascii_data.append((char)(this_ch + ' ')); + while (bitCount >= 6) { + bitCount -= 6; + int sixBits = (bits >> bitCount) & 0x3f; + textBuf.append((char) (sixBits + ' ')); } } - ascii_data.append('\n'); // Append a courtesy newline - return new PyString(ascii_data.toString()); + textBuf.append('\n'); // Append a courtesy newline + return new PyString(textBuf.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("b2a_uu", bp); + throw argMustBeBytes("b2a_uu", data); } } + /** Finds & returns the (num+1)th valid character for base64, or -1 if none. */ private static int binascii_find_valid(PyBuffer b, int offset, int num) { int blen = b.getLen() - offset; - - /* Finds & returns the (num+1)th - ** valid character for base64, or -1 if none. - */ - int ret = -1; while ((blen > 0) && (ret == -1)) { int c = b.intAt(offset); - short b64val = table_a2b_base64[c & 0x7f]; - if (((c <= 0x7f) && (b64val != -1)) ) { + byte b64val = table_a2b_base64[c & 0x7f]; + if (((c <= 0x7f) && (b64val != -1))) { if (num == 0) { ret = c; } num--; } - offset++; blen--; } return ret; } - - - public static PyString __doc__a2b_base64 = new PyString( - "(ascii) -> bin. Decode a line of base64 data" - ); + public static PyString __doc__a2b_base64 = + new PyString("(ascii) -> bin. Decode a line of base64 data"); /** - * Convert a block of base64 data back to binary and return the - * binary data. More than one line may be passed at a time. + * Convert a block of base64 data back to binary and return the binary data. More than one line + * may be passed at a time. */ - public static PyString a2b_base64(PyObject bp) { - int leftbits = 0; - char this_ch; - int leftchar = 0; - int quad_pos = 0; + public static PyString a2b_base64(PyObject text) { - try (PyBuffer ascii_data = getTextBuffer(bp)) { - int ascii_len = ascii_data.getLen(); - - int bin_len = 0; + try (PyBuffer textBuf = getByteBuffer(text)) { + int textLen = textBuf.getLen(); - // Every 4 characters (rounded up) maps to 3 bytes - StringBuilder bin_data = new StringBuilder(3 * ((ascii_len + 3) / 4)); + // Every 4 characters (rounded up) map to 3 bytes. (Or fewer, if there are extras.) + int dataLen = 3 * ((textLen + 3) / 4); + // These characters will represent bytes, in the usual Jython 2 way. + StringBuilder dataBuf = new StringBuilder(dataLen); + int bits = 0; // store bits not yet emitted (max 12 bits) + int bitCount = 0; // how many (valid) bits waiting + int quad_pos = 0; - for (int i = 0; ascii_len > 0; ascii_len--, i++) { + for (int i = 0; textLen > 0; textLen--, i++) { // Skip some punctuation - this_ch = (char) ascii_data.intAt(i); - if (this_ch > 0x7F || this_ch == '\r' || this_ch == '\n' || this_ch == ' ') { + int ch = textBuf.intAt(i); + if (ch > 0x7F || ch == '\r' || ch == '\n' || ch == ' ') { continue; - } + + } else - if (this_ch == BASE64_PAD) { + if (ch == BASE64_PAD) { if (quad_pos < 2 || (quad_pos == 2 - && binascii_find_valid(ascii_data, i, 1) != BASE64_PAD)) { + && binascii_find_valid(textBuf, i, 1) != BASE64_PAD)) { continue; } else { - // A pad sequence means no more input. - // We've already interpreted the data + // A pad sequence means no more input. We've already interpreted the data // from the quad at this point. - leftbits = 0; + bitCount = 0; break; } - } + } else { - short this_v = table_a2b_base64[this_ch]; - if (this_v == -1) { - continue; - } + int sixBits = table_a2b_base64[ch]; + if (sixBits == -1) { + continue; + } - // Shift it in on the low end, and see if there's - // a byte ready for output. - quad_pos = (quad_pos + 1) & 0x03; - leftchar = (leftchar << 6) | (this_v); - leftbits += 6; - if (leftbits >= 8) { - leftbits -= 8; - bin_data.append((char) ((leftchar >> leftbits) & 0xff)); - bin_len++; - leftchar &= ((1 << leftbits) - 1); + // Shift it in on the low end, and see if there's a byte ready for output. + quad_pos = (quad_pos + 1) & 0x03; + bits = (bits << 6) | sixBits; + bitCount += 6; + if (bitCount >= 8) { + bitCount -= 8; + dataBuf.append((char) ((bits >> bitCount) & 0xff)); // byte + // Erase the bits we emitted + bits &= (1 << bitCount) - 1; + } } } // Check that no bits are left - if (leftbits != 0) { + if (bitCount != 0) { throw new PyException(Error, "Incorrect padding"); } - return new PyString(bin_data.toString()); + return new PyString(dataBuf.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("a2b_base64", bp); + throw argMustBeBytes("a2b_base64", text); } } - - public static PyString __doc__b2a_base64 = new PyString( - "(bin) -> ascii. Base64-code line of data" - ); - + public static PyString __doc__b2a_base64 = + new PyString("(bin) -> ascii. Base64-code line of data"); /** - * Convert binary data to a line of ASCII characters in base64 coding. - * The return value is the converted line, including a newline char. + * Convert binary data to a line of ASCII characters in base64 coding. The return value is the + * converted line, including a newline char. */ - public static PyString b2a_base64(PyObject bp) { - int leftbits = 0; // how many bits waiting - char this_ch; - int leftchar = 0; // store bits not yet emitted (max 12 bits) + public static PyString b2a_base64(PyObject data) { - try (PyBuffer bin_data = getTextBuffer(bp)) { - int bin_len = bin_data.getLen(); - if (bin_len > BASE64_MAXBIN) { + try (PyBuffer dataBuf = getByteBuffer(data)) { + int dataLen = dataBuf.getLen(); + if (dataLen > BASE64_MAXBIN) { throw new PyException(Error, "Too much data for base64 line"); } // Every 3 bytes (rounded up) maps to 4 characters (and there's a newline) - StringBuilder ascii_data = new StringBuilder(4 * ((bin_len + 2) / 3) + 1); + StringBuilder ascii_data = new StringBuilder(4 * ((dataLen + 2) / 3) + 1); + int bits = 0; // store bits not yet emitted (max 14 bits) + int bitCount = 0; // how many (valid) bits waiting - for (int i = 0; bin_len > 0; bin_len--, i++) { + for (int i = 0; i < dataLen; i++) { // Shift the data into our buffer - leftchar = (leftchar << 8) | (char) bin_data.intAt(i); // charAt(i); - leftbits += 8; + bits = (bits << 8) | dataBuf.intAt(i); + bitCount += 8; - // See if there are 6-bit groups ready - while (leftbits >= 6) { - this_ch = (char) ((leftchar >> (leftbits - 6)) & 0x3f); - leftbits -= 6; - ascii_data.append((char) table_b2a_base64[this_ch]); + // While there are 6-bit groups available, emit them as characters. + while (bitCount >= 6) { + bitCount -= 6; + ascii_data.append(table_b2a_base64[(bits >> bitCount) & 0x3f]); } } // Emit the balance of bits and append a newline - if (leftbits == 2) { - ascii_data.append((char) table_b2a_base64[(leftchar & 3) << 4]); + if (bitCount == 2) { + ascii_data.append(table_b2a_base64[(bits & 3) << 4]); ascii_data.append(BASE64_PAD); ascii_data.append(BASE64_PAD); - } else if (leftbits == 4) { - ascii_data.append((char) table_b2a_base64[(leftchar & 0xf) << 2]); + } else if (bitCount == 4) { + ascii_data.append(table_b2a_base64[(bits & 0xf) << 2]); ascii_data.append(BASE64_PAD); } ascii_data.append('\n'); // Append a courtesy newline @@ -547,373 +485,342 @@ return new PyString(ascii_data.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("b2a_base64", bp); + throw argMustBeBytes("b2a_base64", data); } } - public static PyString __doc__a2b_hqx = new PyString( - "ascii -> bin, done. Decode .hqx coding" - ); + public static PyString __doc__a2b_hqx = new PyString("ascii -> bin, done. Decode .hqx coding"); /** - * Convert binhex4 formatted ASCII data to binary, without doing - * RLE-decompression. The string should contain a complete number of - * binary bytes, or (in case of the last portion of the binhex4 data) - * have the remaining bits zero. + * Convert binhex4 formatted ASCII data to binary, without doing RLE-decompression. The string + * should contain a complete number of binary bytes, or (in case of the last portion of the + * binhex4 data) have the remaining bits zero. */ - public static PyTuple a2b_hqx(PyObject bp) { - int leftbits = 0; - char this_ch; - int leftchar = 0; - boolean done = false; + public static PyTuple a2b_hqx(PyObject text) { - try (PyBuffer ascii_data = getTextBuffer(bp)) { + try (PyBuffer textBuf = getByteBuffer(text)) { - int len = ascii_data.getLen(); - StringBuilder bin_data = new StringBuilder(); + int textLen = textBuf.getLen(); + StringBuilder dataBuf = new StringBuilder(); + int bitCount = 0; + int bits = 0; + boolean done = false; - for (int i = 0; len > 0; len--, i++) { + for (int i = 0; i < textLen; i++) { // Get the byte and look it up - this_ch = (char) table_a2b_hqx[ascii_data.intAt(i)]; - if (this_ch == SKIP) { + byte b = table_a2b_hqx[textBuf.intAt(i)]; + + if (b == SKIP) { continue; - } - if (this_ch == FAIL) { + + } else if (b == FAIL) { throw new PyException(Error, "Illegal char"); - } - if (this_ch == DONE) { + + } else if (b == DONE) { // The terminating colon done = true; break; - } - // Shift it into the buffer and see if any bytes are ready - leftchar = (leftchar << 6) | (this_ch); - leftbits += 6; - if (leftbits >= 8) { - leftbits -= 8; - bin_data.append((char) ((leftchar >> leftbits) & 0xff)); - leftchar &= ((1 << leftbits) - 1); + } else { + // Shift it into the buffer and see if any bytes are ready + bits = (bits << 6) | b; + bitCount += 6; + if (bitCount >= 8) { + bitCount -= 8; + dataBuf.append((char) ((bits >> bitCount) & 0xff)); // byte + bits &= (1 << bitCount) - 1; + } } } - if (leftbits != 0 && !done) { + if (bitCount != 0 && !done) { throw new PyException(Incomplete, "String has incomplete number of bytes"); } - return new PyTuple(new PyString(bin_data.toString()), Py.newInteger(done ? 1 : 0)); + return new PyTuple(new PyString(dataBuf.toString()), Py.newInteger(done ? 1 : 0)); } catch (ClassCastException e) { - throw argMustBeBytes("a2b_hqx", bp); + throw argMustBeBytes("a2b_hqx", text); } } - public static PyString __doc__rlecode_hqx = new PyString( - "Binhex RLE-code binary data" - ); + public static PyString __doc__rlecode_hqx = new PyString("Binhex RLE-code binary data"); + + /** Perform binhex4 style RLE-compression on data and return the result. */ + static public PyString rlecode_hqx(PyObject data) { - /** - * Perform binhex4 style RLE-compression on data and return the - * result. - */ - static public PyString rlecode_hqx(PyObject bp) { + try (PyBuffer inBuf = getByteBuffer(data)) { + int len = inBuf.getLen(); + StringBuilder outBuf = new StringBuilder(); - try (PyBuffer in_data = getTextBuffer(bp)) { + for (int in = 0; in < len; in++) { + char ch = (char) inBuf.intAt(in); - int len = in_data.getLen(); - StringBuilder out_data = new StringBuilder(); - - for (int in=0; in < len; in++) { - char ch = (char) in_data.intAt(in); if (ch == RUNCHAR) { // RUNCHAR. Escape it. - out_data.append(RUNCHAR); - out_data.append((char) 0); + outBuf.append(RUNCHAR); + outBuf.append((char) 0); + } else { // Check how many following are the same int inend; - for (inend=in+1; inend < len && - (char) in_data.intAt(inend) == ch && - inend < in+255; inend++) { - ; - } + for (inend = in + 1; inend < len && ((char) inBuf.intAt(inend)) == ch + && inend < in + 255; inend++) { /* nothing */ } if (inend - in > 3) { // More than 3 in a row. Output RLE. - out_data.append(ch); - out_data.append(RUNCHAR); - out_data.append((char) (inend-in)); - in = inend-1; + outBuf.append(ch); + outBuf.append(RUNCHAR); + outBuf.append((char) (inend - in)); + in = inend - 1; } else { // Less than 3. Output the byte itself - out_data.append(ch); + outBuf.append(ch); } } } - return new PyString(out_data.toString()); + + return new PyString(outBuf.toString()); + } catch (ClassCastException e) { - throw argMustBeBytes("rlecode_hqx", bp); + throw argMustBeBytes("rlecode_hqx", data); } } - - public static PyString __doc__b2a_hqx = new PyString( - "Encode .hqx data" - ); + public static PyString __doc__b2a_hqx = new PyString("Encode .hqx data"); /** - * Perform hexbin4 binary-to-ASCII translation and return the - * resulting string. The argument should already be RLE-coded, and have a - * length divisible by 3 (except possibly the last fragment). + * Perform hexbin4 binary-to-ASCII translation and return the resulting string. The argument + * should already be RLE-coded, and have a length divisible by 3 (except possibly the last + * fragment). */ - public static PyString b2a_hqx(PyObject bp) { - int leftbits = 0; - char this_ch; - int leftchar = 0; + public static PyString b2a_hqx(PyObject data) { - try (PyBuffer bin_data = getTextBuffer(bp)) { + try (PyBuffer dataBuf = getByteBuffer(data)) { + int len = dataBuf.getLen(); - int len = bin_data.getLen(); - StringBuilder ascii_data = new StringBuilder(); + StringBuilder textBuf = new StringBuilder(); + int bits = 0; + int bitCount = 0; for (int i = 0; len > 0; len--, i++) { // Shift into our buffer, and output any 6bits ready - leftchar = (leftchar << 8) | (char) bin_data.intAt(i); - leftbits += 8; - while (leftbits >= 6) { - this_ch = (char) ((leftchar >> (leftbits - 6)) & 0x3f); - leftbits -= 6; - ascii_data.append((char) table_b2a_hqx[this_ch]); + bits = (bits << 8) | (char) dataBuf.intAt(i); + bitCount += 8; + while (bitCount >= 6) { + bitCount -= 6; + textBuf.append(table_b2a_hqx[(bits >> bitCount) & 0x3f]); } } // Output a possible runt byte - if (leftbits != 0) { - leftchar <<= (6 - leftbits); - ascii_data.append((char) table_b2a_hqx[leftchar & 0x3f]); + if (bitCount != 0) { + bits <<= (6 - bitCount); + textBuf.append(table_b2a_hqx[bits & 0x3f]); } - return new PyString(ascii_data.toString()); + return new PyString(textBuf.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("b2a_hqx", bp); + throw argMustBeBytes("b2a_hqx", data); } } - public static PyString __doc__rledecode_hqx = new PyString( - "Decode hexbin RLE-coded string" - ); - + public static PyString __doc__rledecode_hqx = new PyString("Decode hexbin RLE-coded string"); /** - * Perform RLE-decompression on the data, as per the binhex4 - * standard. The algorithm uses 0x90 after a byte as a repeat - * indicator, followed by a count. A count of 0 specifies a byte - * value of 0x90. The routine returns the decompressed data, - * unless data input data ends in an orphaned repeat indicator, in which - * case the Incomplete exception is raised. + * Perform RLE-decompression on the data, as per the binhex4 standard. The algorithm uses + * 0x90 after a byte as a repeat indicator, followed by a count. A count of 0 + * specifies a byte value of 0x90. The routine returns the decompressed data, unless + * data input data ends in an orphaned repeat indicator, in which case the Incomplete + * exception is raised. */ - static public PyString rledecode_hqx(PyObject bp) { - char in_byte, in_repeat; + static public PyString rledecode_hqx(PyObject data) { - try (PyBuffer in_data = getTextBuffer(bp)) { - int in_len = in_data.getLen(); - int i = 0; - - StringBuilder out_data = new StringBuilder(); + try (PyBuffer inBuf = getByteBuffer(data)) { + int inLen = inBuf.getLen(); + int index = 0; // Empty string is a special case - if (in_len == 0) { + if (inLen == 0) { return Py.EmptyString; } + // Pretty much throughout, we use a char to store a byte :( + StringBuilder outBuf = new StringBuilder(); + // Handle first byte separately (since we have to get angry // in case of an orphaned RLE code). - if (--in_len < 0) { + if (--inLen < 0) { throw new PyException(Incomplete); } - in_byte = (char) in_data.intAt(i++); + char outByte = (char) inBuf.intAt(index++); - if (in_byte == RUNCHAR) { - if (--in_len < 0) { + if (outByte == RUNCHAR) { + if (--inLen < 0) { throw new PyException(Incomplete); } - in_repeat = (char) in_data.intAt(i++); + int in_repeat = inBuf.intAt(index++); if (in_repeat != 0) { // Note Error, not Incomplete (which is at the end // of the string only). This is a programmer error. throw new PyException(Error, "Orphaned RLE code at start"); } - out_data.append(RUNCHAR); + outBuf.append(RUNCHAR); } else { - out_data.append(in_byte); + outBuf.append(outByte); } - while (in_len > 0) { - if (--in_len < 0) { + while (inLen > 0) { + if (--inLen < 0) { throw new PyException(Incomplete); } - in_byte = (char) in_data.intAt(i++); + outByte = (char) inBuf.intAt(index++); - if (in_byte == RUNCHAR) { - if (--in_len < 0) { + if (outByte == RUNCHAR) { + if (--inLen < 0) { throw new PyException(Incomplete); } - in_repeat = (char) in_data.intAt(i++); + int in_repeat = inBuf.intAt(index++); if (in_repeat == 0) { // Just an escaped RUNCHAR value - out_data.append(RUNCHAR); + outBuf.append(RUNCHAR); } else { // Pick up value and output a sequence of it - in_byte = out_data.charAt(out_data.length()-1); + outByte = outBuf.charAt(outBuf.length() - 1); while (--in_repeat > 0) { - out_data.append(in_byte); + outBuf.append(outByte); } } } else { // Normal byte - out_data.append(in_byte); + outBuf.append(outByte); } } - return new PyString(out_data.toString()); + return new PyString(outBuf.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("rledecode_hqx", bp); + throw argMustBeBytes("rledecode_hqx", data); } } - public static PyString __doc__crc_hqx = new PyString( - "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally" - ); - + public static PyString __doc__crc_hqx = + new PyString("(data, oldcrc) -> newcrc. Compute hqx CRC incrementally"); /** - * Compute the binhex4 crc value of data, starting with an initial - * crc and returning the result. + * Compute the binhex4 crc value of data, starting with an initial crc and + * returning the result. */ - public static int crc_hqx(PyObject bp, int crc) { - try (PyBuffer bin_data = getTextBuffer(bp)) { - int len = bin_data.getLen(); - int i = 0; - - while(len-- > 0) { - crc=((crc<<8)&0xff00) ^ - crctab_hqx[((crc>>8)&0xff)^ (char) bin_data.intAt(i++)]; + public static int crc_hqx(PyObject data, int crc) { + try (PyBuffer buf = getByteBuffer(data)) { + int len = buf.getLen(); + for (int i = 0; i < len; i++) { + crc = ((crc << 8) & 0xff00) ^ crctab_hqx[((crc >> 8) & 0xff) ^ buf.intAt(i)]; } return crc; - } catch (ClassCastException e) { - throw argMustBeBytes("crc_hqx", bp); + throw argMustBeBytes("crc_hqx", data); } } - - -//@formatter:off -static long[] crc_32_tab = new long[] { -0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, -0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, -0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, -0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, -0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, -0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, -0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, -0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, -0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, -0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, -0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, -0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, -0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, -0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, -0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, -0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, -0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, -0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, -0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, -0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, -0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, -0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, -0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, -0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, -0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, -0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, -0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, -0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, -0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, -0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, -0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, -0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, -0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, -0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, -0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, -0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, -0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, -0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, -0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, -0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, -0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, -0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, -0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, -0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, -0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, -0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, -0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, -0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, -0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, -0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, -0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, -0x2d02ef8dL -}; -//@formatter:on + //@formatter:off + static int[] crc_32_tab = new int[] { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d + }; + //@formatter:on public static int crc32(PyObject bp) { return crc32(bp, 0); } - public static int crc32(PyObject bp, long crc) { + public static int crc32(PyObject data, long long_crc) { - crc &= 0xFFFFFFFFL; - crc = crc ^ 0xFFFFFFFFL; + int crc = ~(int) long_crc; - try (PyBuffer bin_data = getTextBuffer(bp)) { - int len = bin_data.getLen(); + try (PyBuffer dataBuf = getByteBuffer(data)) { + int len = dataBuf.getLen(); for (int i = 0; i < len; i++) { - char ch = (char) bin_data.intAt(i); - crc = (int)crc_32_tab[(int) ((crc ^ ch) & 0xffL)] ^ (crc >> 8); - /* Note: (crc >> 8) MUST zero fill on left */ - crc &= 0xFFFFFFFFL; + int b = dataBuf.intAt(i); + crc = crc_32_tab[(crc ^ b) & 0xff] ^ (crc >>> 8); + /* Note: (crc >> 8) MUST zero fill on left */ } + return ~crc; + } catch (ClassCastException e) { - throw argMustBeBytes("crc32", bp); + throw argMustBeBytes("crc32", data); } - if (crc >= 0x80000000) { - return -(int)(crc+1 & 0xFFFFFFFF); - } else { - return (int)(crc & 0xFFFFFFFF); - } } private static char[] hexdigit = "0123456789abcdef".toCharArray(); - public static PyString __doc__b2a_hex = new PyString( - "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n" + - "\n" + - "This function is also available as \"hexlify()\"." - ); + public static PyString __doc__b2a_hex = + new PyString("b2a_hex(data) -> s; Hexadecimal representation of binary data.\n" + "\n" + + "This function is also available as \"hexlify()\"."); - public static PyString b2a_hex(PyObject bp) { + public static PyString b2a_hex(PyObject data) { - try (PyBuffer argbuf = getTextBuffer(bp)) { + try (PyBuffer dataBuf = getByteBuffer(data)) { - int arglen = argbuf.getLen(); - StringBuilder retbuf = new StringBuilder(arglen * 2); + int dataLen = dataBuf.getLen(); + StringBuilder retbuf = new StringBuilder(dataLen * 2); - /* make hex version of string, taken from shamodule.c */ - for (int i = 0; i < arglen; i++) { - char ch = (char) argbuf.intAt(i); + // make hex version of string, taken from shamodule.c + for (int i = 0; i < dataLen; i++) { + int ch = dataBuf.intAt(i); retbuf.append(hexdigit[(ch >>> 4) & 0xF]); retbuf.append(hexdigit[ch & 0xF]); } @@ -921,7 +828,7 @@ return new PyString(retbuf.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("b2a_hex", bp); + throw argMustBeBytes("b2a_hex", data); } } @@ -929,32 +836,29 @@ return b2a_hex(argbuf); } + public static PyString a2b_hex$doc = + new PyString("a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n" + "\n" + + "hexstr must contain an even number of hex digits " + + "(upper or lower case).\n" + + "This function is also available as \"unhexlify()\""); - public static PyString a2b_hex$doc = new PyString( - "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n" + - "\n" + - "hexstr must contain an even number of hex digits "+ - "(upper or lower case).\n"+ - "This function is also available as \"unhexlify()\"" - ); + public static PyString a2b_hex(PyObject hexstr) { - public static PyString a2b_hex(PyObject bp) { + try (PyBuffer buf = getByteBuffer(hexstr)) { - try (PyBuffer argbuf = getTextBuffer(bp)) { - - int arglen = argbuf.getLen(); - StringBuilder retbuf = new StringBuilder(arglen / 2); + int bufLen = buf.getLen(); + StringBuilder retbuf = new StringBuilder(bufLen / 2); /* * XXX What should we do about strings with an odd length? Should we add an implicit * leading zero, or a trailing zero? For now, raise an exception. */ - if (arglen % 2 != 0) { + if (bufLen % 2 != 0) { throw Py.TypeError("Odd-length string"); } - for (int i = 0; i < arglen; i += 2) { - int top = Character.digit(argbuf.intAt(i), 16); - int bot = Character.digit(argbuf.intAt(i + 1), 16); + for (int i = 0; i < bufLen; i += 2) { + int top = Character.digit(buf.intAt(i), 16); + int bot = Character.digit(buf.intAt(i + 1), 16); if (top == -1 || bot == -1) { throw Py.TypeError("Non-hexadecimal digit found"); } @@ -964,7 +868,7 @@ return new PyString(retbuf.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("a2b_hex", bp); + throw argMustBeBytes("a2b_hex", hexstr); } } @@ -974,33 +878,28 @@ final private static char[] upper_hexdigit = "0123456789ABCDEF".toCharArray(); - private static StringBuilder qpEscape(StringBuilder sb, char c) - { - sb.append('='); + private static StringBuilder qpEscape(StringBuilder sb, char c) { + sb.append('='); sb.append(upper_hexdigit[(c >>> 4) & 0xF]); sb.append(upper_hexdigit[c & 0xF]); return sb; } - final private static Pattern UNDERSCORE = Pattern.compile("_"); - final public static PyString __doc__a2b_qp = new PyString("Decode a string of qp-encoded data"); - public static boolean getIntFlagAsBool(ArgParser ap, int index, int dflt, String errMsg) { - boolean val; + private static boolean getIntFlagAsBool(ArgParser ap, int index, int dflt, String errMsg) { try { - val = ap.getInt(index, dflt) != 0; + boolean val = ap.getInt(index, dflt) != 0; + return val; } catch (PyException e) { - if (e.match(Py.AttributeError) || e.match(Py.ValueError)) { + if (e.match(Py.AttributeError) || e.match(Py.ValueError)) { throw Py.TypeError(errMsg); } - throw e; + throw e; } - return val; } - public static PyString a2b_qp(PyObject[] arg, String[] kws) - { + public static PyString a2b_qp(PyObject[] arg, String[] kws) { ArgParser ap = new ArgParser("a2b_qp", arg, kws, new String[] {"s", "header"}); PyObject bp = ap.getPyObject(0); @@ -1008,66 +907,66 @@ StringBuilder sb = new StringBuilder(); boolean header = getIntFlagAsBool(ap, 1, 0, "an integer is required"); - try (PyBuffer ascii_data = getTextBuffer((PyObject)bp)) { - for (int i=0, m=ascii_data.getLen(); i= '0' && c <= '9' || c >= 'A' && c <= 'F') && i < m) { + char nc = (char) ascii_data.intAt(i++); + if ((nc >= '0' && nc <= '9' || nc >= 'A' && nc <= 'F')) { + sb.append((char) (Character.digit(c, 16) << 4 + | Character.digit(nc, 16))); + } else { + sb.append('=').append(c).append(nc); } - } else { - sb.append(c); + } else if (c != '\n') { + sb.append('=').append(c); + } } + } else { + sb.append(c); + } } - return new PyString(sb.toString()); + return new PyString(sb.toString()); } catch (ClassCastException e) { throw argMustBeBytes("a2b_qp", bp); } } - final private static Pattern RN_TO_N = Pattern.compile("\r\n"); - final private static Pattern N_TO_RN = Pattern.compile("(? s;\n" - + "Encode a string using quoted-printable encoding.\n\n" - + "On encoding, when istext is set, newlines are not encoded, and white\n" - + "space at end of lines is. When istext is not set, \r and \n (CR/LF) are\n" - + "both encoded. When quotetabs is set, space and tabs are encoded."); + final public static PyString __doc__b2a_qp = + new PyString("b2a_qp(data, quotetabs=0, istext=1, header=0) -> s;\n" + + "Encode a string using quoted-printable encoding.\n\n" + + "On encoding, when istext is set, newlines are not encoded, and white\n" + + "space at end of lines is. When istext is not set, \r and \n (CR/LF) are\n" + + "both encoded. When quotetabs is set, space and tabs are encoded."); public static PyString b2a_qp(PyObject[] arg, String[] kws) { - ArgParser ap = new ArgParser("b2a_qp", arg, kws, new String[] {"s", "quotetabs", "istext", "header"}); + ArgParser ap = new ArgParser("b2a_qp", arg, kws, + new String[] {"s", "quotetabs", "istext", "header"}); boolean quotetabs = getIntFlagAsBool(ap, 1, 0, "an integer is required"); boolean istext = getIntFlagAsBool(ap, 2, 1, "an integer is required"); boolean header = getIntFlagAsBool(ap, 3, 0, "an integer is required"); - PyObject bp = ap.getPyObject(0); + PyObject data = ap.getPyObject(0); - try (PyBuffer bin_data = getTextBuffer(bp)) { + try (PyBuffer dataBuf = getByteBuffer(data)) { - int datalen = bin_data.getLen(); - StringBuilder sb = new StringBuilder(datalen); + int dataLen = dataBuf.getLen(); + StringBuilder sb = new StringBuilder(dataLen); String lineEnd = "\n"; // Work out if line endings should be crlf. - for (int i = 0, m = bin_data.getLen(); i < m; i++) { - if ('\n' == bin_data.intAt(i)) { - if (i > 0 && '\r' == bin_data.intAt(i-1)) { + for (int i = 0, m = dataBuf.getLen(); i < m; i++) { + if ('\n' == dataBuf.intAt(i)) { + if (i > 0 && '\r' == dataBuf.intAt(i - 1)) { lineEnd = "\r\n"; } break; @@ -1078,21 +977,17 @@ int MAXLINESIZE = 76; int in = 0; - while (in < datalen) { - char ch = (char) bin_data.intAt(in); - if ((ch > 126) || - (ch == '=') || - (header && ch == '_') || - ((ch == '.') && (count == 0) && - ((in+1 == datalen) || (char) bin_data.intAt(in+1) == '\n' || (char) bin_data.intAt(in+1) == '\r')) || - (!istext && ((ch == '\r') || (ch == '\n'))) || - ((ch == '\t' || ch == ' ') && (in + 1 == datalen)) || - ((ch < 33) && - (ch != '\r') && (ch != '\n') && - (quotetabs || - (!quotetabs && ((ch != '\t') && (ch != ' ')))))) - { - if ((count + 3 )>= MAXLINESIZE) { + while (in < dataLen) { + char ch = (char) dataBuf.intAt(in); + if ((ch > 126) || (ch == '=') || (header && ch == '_') + || ((ch == '.') && (count == 0) + && ((in + 1 == dataLen) || (char) dataBuf.intAt(in + 1) == '\n' + || (char) dataBuf.intAt(in + 1) == '\r')) + || (!istext && ((ch == '\r') || (ch == '\n'))) + || ((ch == '\t' || ch == ' ') && (in + 1 == dataLen)) + || ((ch < 33) && (ch != '\r') && (ch != '\n') + && (quotetabs || (!quotetabs && ((ch != '\t') && (ch != ' ')))))) { + if ((count + 3) >= MAXLINESIZE) { sb.append('='); sb.append(lineEnd); count = 0; @@ -1100,43 +995,37 @@ qpEscape(sb, ch); in++; count += 3; - } - else { - if (istext && - ((ch == '\n') || - ((in+1 < datalen) && (ch == '\r') && - (bin_data.intAt(in+1) == '\n')))) - { + } else { + if (istext && ((ch == '\n') || ((in + 1 < dataLen) && (ch == '\r') + && (dataBuf.intAt(in + 1) == '\n')))) { count = 0; - /* Protect against whitespace on end of line */ + // Protect against whitespace on end of line int out = sb.length(); - if (out > 0 && ((sb.charAt(out-1) == ' ') || (sb.charAt(out-1) == '\t'))) { - ch = sb.charAt(out-1); - sb.setLength(out-1); - qpEscape(sb, ch); - } + if (out > 0 + && ((sb.charAt(out - 1) == ' ') || (sb.charAt(out - 1) == '\t'))) { + ch = sb.charAt(out - 1); + sb.setLength(out - 1); + qpEscape(sb, ch); + } - sb.append(lineEnd); - if (ch == '\r') { - in+=2; - } else { - in++; - } - } - else { - if ((in + 1 != datalen) && - ((char) bin_data.intAt(in+1) != '\n') && - (count + 1) >= MAXLINESIZE) { + sb.append(lineEnd); + if (ch == '\r') { + in += 2; + } else { + in++; + } + } else { + if ((in + 1 != dataLen) && ((char) dataBuf.intAt(in + 1) != '\n') + && (count + 1) >= MAXLINESIZE) { sb.append('='); sb.append(lineEnd); count = 0; - } + } count++; if (header && ch == ' ') { sb.append('_'); in++; - } - else { + } else { sb.append(ch); in++; } @@ -1147,7 +1036,7 @@ return new PyString(sb.toString()); } catch (ClassCastException e) { - throw argMustBeBytes("b2a_qp", bp); + throw argMustBeBytes("b2a_qp", data); } } @@ -1162,7 +1051,7 @@ * @return a byte-buffer view of argument (or default decoding if {@code unicode}) * @throws ClassCastException where the text object does not implement the buffer protocol */ - private static PyBuffer getTextBuffer(PyObject text) throws ClassCastException { + private static PyBuffer getByteBuffer(PyObject text) throws ClassCastException { if (text instanceof PyUnicode) { String s = ((PyUnicode) text).encode(); return new SimpleStringBuffer(PyBUF.SIMPLE, null, s); -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Dec 24 06:10:11 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 24 Dec 2019 11:10:11 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Tolerate_unicode_arguments?= =?utf-8?q?_to_binascii_methods_=28fixes_=232826=29=2E?= Message-ID: <20191224111011.1.72B48290DA62D63A@mg.python.org> https://hg.python.org/jython/rev/7fe475b0fea2 changeset: 8315:7fe475b0fea2 user: Jeff Allen date: Mon Dec 23 17:51:51 2019 +0000 summary: Tolerate unicode arguments to binascii methods (fixes #2826). Where a unicode argument is given, it is interpreted as bytes through the default encoding. A test suite is added (missing from CPython 2) to extend test_binascii to unicode arguments (ASCII decoding only). There is some tidying up in binascii.java, but more is needed. files: Lib/test/test_binascii_jy.py | 72 +++++ NEWS | 1 + src/org/python/modules/binascii.java | 197 +++++++------- 3 files changed, 169 insertions(+), 101 deletions(-) diff --git a/Lib/test/test_binascii_jy.py b/Lib/test/test_binascii_jy.py new file mode 100644 --- /dev/null +++ b/Lib/test/test_binascii_jy.py @@ -0,0 +1,72 @@ +"""Test unicode handling in the binascii Java module.""" + +from test import test_support +from test.test_binascii import BinASCIITest +import unittest +import binascii + + +class UnicodeBinASCIITest(BinASCIITest): + + type2test = unicode + + # Create binary test data, but only 7-bit data to survive implicit unicode to str conversion. + rawdata = "The quick brown fox jumps over the lazy dog.\r\n" + rawdata += "".join(map(chr, xrange(128))) + rawdata += "\r\nHello world.\n" + + def test_base64invalid(self): + # Test base64 with random invalid characters sprinkled throughout. + # This is a copy of BinASCIITest.test_base64invalid with 256 changed to 128 where we + # generate "fillers". + + # Creating the modified test reveals a latent bug in the test as written, which is that the + # padding character "=" is/was inserted as a filler. In the original test, the location of + # that is harmless. With the change 256 to 128, it causes early termination of the + # a2b_base64 conversion (both CPython and Jython). We therefore make padding a valid + # character, excluding it from the fillers. + + MAX_BASE64 = 57 + lines = [] + for i in range(0, len(self.data), MAX_BASE64): + b = self.type2test(self.rawdata[i:i+MAX_BASE64]) + a = binascii.b2a_base64(b) + lines.append(a) + + fillers = "" + valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/" + valid += "=" # pad character also valid + for i in xrange(128): # not 256 as in BinASCIITest.test_base64invalid + c = chr(i) + if c not in valid: + fillers += c + + def addnoise(line): + noise = fillers + ratio = len(line) // len(noise) + res = "" + while line and noise: + if len(line) // len(noise) > ratio: + c, line = line[0], line[1:] + else: + c, noise = noise[0], noise[1:] + res += c + return res + noise + line + + res = "" + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_base64(a) + res += b + self.assertEqual(res, self.rawdata) + + # Test base64 with just invalid characters, which should return + # empty strings. TBD: shouldn't it raise an exception instead ? + self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), '') + + +def test_main(): + test_support.run_unittest(UnicodeBinASCIITest) + +if __name__ == "__main__": + test_main() diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -9,6 +9,7 @@ Jython 2.7.2b3 Bugs fixed - [ 2820 ] Import fails with UnicodeDecodeError if sys.path contains invalid UTF-8 bytes + - [ 2826 ] Unicode hex string decode failure - [ 2836 ] Java Swing library works only in interactive jython session Jython 2.7.2b2 diff --git a/src/org/python/modules/binascii.java b/src/org/python/modules/binascii.java --- a/src/org/python/modules/binascii.java +++ b/src/org/python/modules/binascii.java @@ -1,5 +1,6 @@ /* - * Copyright 1998 Finn Bock. + * Copyright 2019 Jython Developers + * Original conversion from CPython source copyright 1998 Finn Bock. * * This program contains material copyrighted by: * Copyright (c) 1991, 1992, 1993, 1994 by Stichting Mathematisch Centrum, @@ -145,6 +146,7 @@ private static short SKIP = 0x7E; private static short FAIL = 0x7D; + //@formatter:off private static short[] table_a2b_hqx = { /* ^@ ^A ^B ^C ^D ^E ^F ^G */ /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, @@ -195,13 +197,14 @@ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, }; + //@formatter:on private static byte[] table_b2a_hqx = StringUtil.toBytes("!\"#$%&'()*+,-012345689 at ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr"); - + //@formatter:off private static short table_a2b_base64[] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, @@ -212,6 +215,7 @@ -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 }; + //@formatter:on private static char BASE64_PAD = '='; @@ -222,7 +226,7 @@ StringUtil.toBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); - + //@formatter:off private static int[] crctab_hqx = { 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, @@ -257,6 +261,7 @@ 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0, }; + //@formatter:on @@ -270,7 +275,7 @@ * binary data. Lines normally contain 45 (binary) bytes, except for the * last line. Line data may be followed by whitespace. */ - public static PyString a2b_uu(BufferProtocol bp) { + public static PyString a2b_uu(PyObject bp) { int leftbits = 0; int leftchar = 0; @@ -349,20 +354,22 @@ * is the converted line, including a newline char. The length of * data should be at most 45. */ - public static PyString b2a_uu(BufferProtocol bp) { + public static PyString b2a_uu(PyObject bp) { int leftbits = 0; char this_ch; int leftchar = 0; - PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE); + try (PyBuffer bin_data = getTextBuffer(bp)) { - StringBuilder ascii_data = new StringBuilder(); - try { int bin_len = bin_data.getLen(); if (bin_len > 45) { // The 45 is a limit that appears in all uuencode's throw new PyException(Error, "At most 45 bytes at once"); } + // Each 3 bytes in (rounded up) produces 4 characters out. + int ascii_len = 4 * ((bin_len + 2) / 3); + // Plus a 1 byte length and '\n' + StringBuilder ascii_data = new StringBuilder(ascii_len + 2); // Store the length */ ascii_data.append((char)(' ' + (bin_len & 077))); @@ -383,15 +390,15 @@ ascii_data.append((char)(this_ch + ' ')); } } - } finally { - bin_data.release(); + + ascii_data.append('\n'); // Append a courtesy newline + return new PyString(ascii_data.toString()); + + } catch (ClassCastException e) { + throw argMustBeBytes("b2a_uu", bp); } - ascii_data.append('\n'); // Append a courtesy newline - - return new PyString(ascii_data.toString()); } - private static int binascii_find_valid(PyBuffer b, int offset, int num) { int blen = b.getLen() - offset; @@ -427,7 +434,7 @@ * Convert a block of base64 data back to binary and return the * binary data. More than one line may be passed at a time. */ - public static PyString a2b_base64(BufferProtocol bp) { + public static PyString a2b_base64(PyObject bp) { int leftbits = 0; char this_ch; int leftchar = 0; @@ -437,7 +444,9 @@ int ascii_len = ascii_data.getLen(); int bin_len = 0; - StringBuilder bin_data = new StringBuilder(); + + // Every 4 characters (rounded up) maps to 3 bytes + StringBuilder bin_data = new StringBuilder(3 * ((ascii_len + 3) / 4)); for (int i = 0; ascii_len > 0; ascii_len--, i++) { // Skip some punctuation @@ -498,18 +507,18 @@ * Convert binary data to a line of ASCII characters in base64 coding. * The return value is the converted line, including a newline char. */ - public static PyString b2a_base64(BufferProtocol bp) { - int leftbits = 0; + public static PyString b2a_base64(PyObject bp) { + int leftbits = 0; // how many bits waiting char this_ch; - int leftchar = 0; - - StringBuilder ascii_data = new StringBuilder(); + int leftchar = 0; // store bits not yet emitted (max 12 bits) try (PyBuffer bin_data = getTextBuffer(bp)) { int bin_len = bin_data.getLen(); if (bin_len > BASE64_MAXBIN) { throw new PyException(Error, "Too much data for base64 line"); } + // Every 3 bytes (rounded up) maps to 4 characters (and there's a newline) + StringBuilder ascii_data = new StringBuilder(4 * ((bin_len + 2) / 3) + 1); for (int i = 0; bin_len > 0; bin_len--, i++) { // Shift the data into our buffer @@ -524,6 +533,7 @@ } } + // Emit the balance of bits and append a newline if (leftbits == 2) { ascii_data.append((char) table_b2a_base64[(leftchar & 3) << 4]); ascii_data.append(BASE64_PAD); @@ -551,7 +561,7 @@ * binary bytes, or (in case of the last portion of the binhex4 data) * have the remaining bits zero. */ - public static PyTuple a2b_hqx(BufferProtocol bp) { + public static PyTuple a2b_hqx(PyObject bp) { int leftbits = 0; char this_ch; int leftchar = 0; @@ -606,13 +616,13 @@ * Perform binhex4 style RLE-compression on data and return the * result. */ - static public PyString rlecode_hqx(BufferProtocol bp) { - PyBuffer in_data = bp.getBuffer(PyBUF.SIMPLE); - int len = in_data.getLen(); + static public PyString rlecode_hqx(PyObject bp) { + + try (PyBuffer in_data = getTextBuffer(bp)) { - StringBuilder out_data = new StringBuilder(); + int len = in_data.getLen(); + StringBuilder out_data = new StringBuilder(); - try { for (int in=0; in < len; in++) { char ch = (char) in_data.intAt(in); if (ch == RUNCHAR) { @@ -639,10 +649,10 @@ } } } - } finally { - in_data.release(); + return new PyString(out_data.toString()); + } catch (ClassCastException e) { + throw argMustBeBytes("rlecode_hqx", bp); } - return new PyString(out_data.toString()); } @@ -655,7 +665,7 @@ * resulting string. The argument should already be RLE-coded, and have a * length divisible by 3 (except possibly the last fragment). */ - public static PyString b2a_hqx(BufferProtocol bp) { + public static PyString b2a_hqx(PyObject bp) { int leftbits = 0; char this_ch; int leftchar = 0; @@ -702,21 +712,20 @@ * unless data input data ends in an orphaned repeat indicator, in which * case the Incomplete exception is raised. */ - static public PyString rledecode_hqx(BufferProtocol bp) { + static public PyString rledecode_hqx(PyObject bp) { char in_byte, in_repeat; - PyBuffer in_data = bp.getBuffer(PyBUF.SIMPLE); - int in_len = in_data.getLen(); - int i = 0; + try (PyBuffer in_data = getTextBuffer(bp)) { + int in_len = in_data.getLen(); + int i = 0; - StringBuilder out_data = new StringBuilder(); - try { + StringBuilder out_data = new StringBuilder(); + // Empty string is a special case if (in_len == 0) { return Py.EmptyString; } - // Handle first byte separately (since we have to get angry // in case of an orphaned RLE code). if (--in_len < 0) { @@ -767,14 +776,14 @@ out_data.append(in_byte); } } - } finally { - in_data.release(); + + return new PyString(out_data.toString()); + + } catch (ClassCastException e) { + throw argMustBeBytes("rledecode_hqx", bp); } - return new PyString(out_data.toString()); } - - public static PyString __doc__crc_hqx = new PyString( "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally" ); @@ -784,26 +793,25 @@ * Compute the binhex4 crc value of data, starting with an initial * crc and returning the result. */ - public static int crc_hqx(BufferProtocol bp, int crc) { - PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE); - int len = bin_data.getLen(); - int i = 0; + public static int crc_hqx(PyObject bp, int crc) { + try (PyBuffer bin_data = getTextBuffer(bp)) { + int len = bin_data.getLen(); + int i = 0; - try { while(len-- > 0) { crc=((crc<<8)&0xff00) ^ crctab_hqx[((crc>>8)&0xff)^ (char) bin_data.intAt(i++)]; } - } finally { - bin_data.release(); + return crc; + + } catch (ClassCastException e) { + throw argMustBeBytes("crc_hqx", bp); } - - return crc; } - +//@formatter:off static long[] crc_32_tab = new long[] { 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, @@ -858,27 +866,29 @@ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, 0x2d02ef8dL }; +//@formatter:on - public static int crc32(BufferProtocol bp) { + public static int crc32(PyObject bp) { return crc32(bp, 0); } - public static int crc32(BufferProtocol bp, long crc) { - PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE); - int len = bin_data.getLen(); + public static int crc32(PyObject bp, long crc) { crc &= 0xFFFFFFFFL; crc = crc ^ 0xFFFFFFFFL; - try { + + try (PyBuffer bin_data = getTextBuffer(bp)) { + int len = bin_data.getLen(); for (int i = 0; i < len; i++) { char ch = (char) bin_data.intAt(i); crc = (int)crc_32_tab[(int) ((crc ^ ch) & 0xffL)] ^ (crc >> 8); /* Note: (crc >> 8) MUST zero fill on left */ crc &= 0xFFFFFFFFL; } - } finally { - bin_data.release(); + } catch (ClassCastException e) { + throw argMustBeBytes("crc32", bp); } + if (crc >= 0x80000000) { return -(int)(crc+1 & 0xFFFFFFFF); } else { @@ -886,7 +896,6 @@ } } - private static char[] hexdigit = "0123456789abcdef".toCharArray(); public static PyString __doc__b2a_hex = new PyString( @@ -895,7 +904,7 @@ "This function is also available as \"hexlify()\"." ); - public static PyString b2a_hex(BufferProtocol bp) { + public static PyString b2a_hex(PyObject bp) { try (PyBuffer argbuf = getTextBuffer(bp)) { @@ -916,7 +925,7 @@ } } - public static PyString hexlify(BufferProtocol argbuf) { + public static PyString hexlify(PyObject argbuf) { return b2a_hex(argbuf); } @@ -929,9 +938,9 @@ "This function is also available as \"unhexlify()\"" ); - public static PyString a2b_hex(BufferProtocol bp) { + public static PyString a2b_hex(PyObject bp) { - try (PyBuffer argbuf = bp.getBuffer(PyBUF.SIMPLE)) { + try (PyBuffer argbuf = getTextBuffer(bp)) { int arglen = argbuf.getLen(); StringBuilder retbuf = new StringBuilder(arglen / 2); @@ -959,7 +968,7 @@ } } - public static PyString unhexlify(BufferProtocol argbuf) { + public static PyString unhexlify(PyObject argbuf) { return a2b_hex(argbuf); } @@ -994,20 +1003,12 @@ { ArgParser ap = new ArgParser("a2b_qp", arg, kws, new String[] {"s", "header"}); - PyObject pyObject = ap.getPyObject(0); - BufferProtocol bp; - if (pyObject instanceof BufferProtocol) { - bp = (BufferProtocol) pyObject; - } else { - throw Py.TypeError("expected something conforming to the buffer protocol, got " - + pyObject.getType().fastGetName()); - } + PyObject bp = ap.getPyObject(0); StringBuilder sb = new StringBuilder(); boolean header = getIntFlagAsBool(ap, 1, 0, "an integer is required"); - PyBuffer ascii_data = bp.getBuffer(PyBUF.SIMPLE); - try { + try (PyBuffer ascii_data = getTextBuffer((PyObject)bp)) { for (int i=0, m=ascii_data.getLen(); i https://hg.python.org/jython/rev/3e46a80390fb changeset: 8313:3e46a80390fb user: Jeff Allen date: Sat Dec 21 19:52:20 2019 +0000 summary: Tolerate undecodable bytes sys.path elements (fixes #2820). We treat these as import failures, and walk on down the sys.path. files: NEWS | 1 + src/org/python/core/PyNullImporter.java | 5 +- src/org/python/core/SyspathJavaLoader.java | 6 +- src/org/python/core/imp.java | 57 ++++++++- src/org/python/core/packagecache/PathPackageManager.java | 39 +++--- src/org/python/modules/zipimport/zipimporter.java | 3 +- 6 files changed, 77 insertions(+), 34 deletions(-) diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -8,6 +8,7 @@ Jython 2.7.2b3 Bugs fixed + - [ 2820 ] Import fails with UnicodeDecodeError if sys.path contains invalid UTF-8 bytes Jython 2.7.2b2 Bugs fixed diff --git a/src/org/python/core/PyNullImporter.java b/src/org/python/core/PyNullImporter.java --- a/src/org/python/core/PyNullImporter.java +++ b/src/org/python/core/PyNullImporter.java @@ -20,11 +20,10 @@ public PyNullImporter(PyObject pathObj) { super(); - String pathStr = Py.fileSystemDecode(pathObj); + String pathStr = imp.fileSystemDecode(pathObj); if (pathStr.equals("")) { throw Py.ImportError("empty pathname"); - } - if (isDir(pathStr)) { + } else if (isDir(pathStr)) { throw Py.ImportError("existing directory: " + pathStr); } } diff --git a/src/org/python/core/SyspathJavaLoader.java b/src/org/python/core/SyspathJavaLoader.java --- a/src/org/python/core/SyspathJavaLoader.java +++ b/src/org/python/core/SyspathJavaLoader.java @@ -113,11 +113,11 @@ byte[] buffer; PyObject entry = replacePathItem(sys, i, path); if (entry instanceof SyspathArchive) { - SyspathArchive archive = (SyspathArchive)entry; + SyspathArchive archive = (SyspathArchive) entry; buffer = getBytesFromArchive(archive, name); } else { - String dir = Py.fileSystemDecode(entry); - buffer = getBytesFromDir(dir, name); + String dir = imp.fileSystemDecode(entry, false); + buffer = dir != null ? getBytesFromDir(dir, name) : null; } if (buffer != null) { definePackageForClass(name); diff --git a/src/org/python/core/imp.java b/src/org/python/core/imp.java --- a/src/org/python/core/imp.java +++ b/src/org/python/core/imp.java @@ -1,10 +1,6 @@ // Copyright (c) Corporation for National Research Initiatives package org.python.core; -import org.python.compiler.Module; -import org.python.core.util.FileUtil; -import org.python.core.util.PlatformUtil; - import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; @@ -13,8 +9,12 @@ import java.io.InputStream; import java.util.Map; import java.util.concurrent.locks.ReentrantLock; +import java.util.logging.Level; import java.util.logging.Logger; -import java.util.logging.Level; + +import org.python.compiler.Module; +import org.python.core.util.FileUtil; +import org.python.core.util.PlatformUtil; /** * Utility functions for "import" support. @@ -744,7 +744,7 @@ // Note the path here may be sys.path or the search path of a Python package. path = path == null ? sys.path : path; - for (int i = 0; i < path.__len__(); i++) { + for (int i = 0; ret == null && i < path.__len__(); i++) { PyObject p = path.__getitem__(i); // Is there a path-specific importer? PyObject importer = getPathImporter(sys.path_importer_cache, sys.path_hooks, p); @@ -757,9 +757,9 @@ } } // p could be a unicode or bytes object (in the file system encoding) - ret = loadFromSource(sys, name, moduleName, Py.fileSystemDecode(p)); - if (ret != null) { - return ret; + String pathElement = fileSystemDecode(p, false); + if (pathElement != null) { + ret = loadFromSource(sys, name, moduleName, pathElement); } } @@ -1388,6 +1388,45 @@ } /** + * A wrapper for {@link Py#fileSystemDecode(PyObject)} for project internal use within + * the import mechanism to convert decoding errors that occur during import to either + * {@code null} or {@link Py#ImportError(String)} calls (and a log message), which usually + * results in quiet failure. + * + * @param p assumed to be a (partial) file path + * @param raiseImportError if true and {@code p} cannot be decoded raise {@code ImportError}. + * @return String form of the object {@code p} (or {@code null}). + */ + public static String fileSystemDecode(PyObject p, boolean raiseImportError) { + try { + return Py.fileSystemDecode(p); + } catch (PyException e) { + if (e.match(Py.UnicodeDecodeError)) { + // p is bytes we cannot convert to a String using the FS encoding + if (raiseImportError) { + logger.log(Level.CONFIG, "Cannot decode path entry {0}", p.__repr__()); + throw Py.ImportError("cannot decode"); + } + return null; + } else { + // Any other kind of exception continues as itself + throw e; + } + } + } + + /** + * For project internal use, equivalent to {@code fileSystemDecode(p, true)} (see + * {@link #fileSystemDecode(PyObject, boolean)}). + * + * @param p assumed to be a (partial) file path + * @return String form of the object {@code p}. + */ + public static String fileSystemDecode(PyObject p) { + return fileSystemDecode(p, true); + } + + /** * Ensure that the items mentioned in the from-list of an import are actually present, even if * they are modules we have not imported yet. * diff --git a/src/org/python/core/packagecache/PathPackageManager.java b/src/org/python/core/packagecache/PathPackageManager.java --- a/src/org/python/core/packagecache/PathPackageManager.java +++ b/src/org/python/core/packagecache/PathPackageManager.java @@ -11,6 +11,7 @@ import java.util.logging.Level; import org.python.core.Py; +import org.python.core.PyException; import org.python.core.PyJavaPackage; import org.python.core.PyList; import org.python.core.PyObject; @@ -41,28 +42,30 @@ for (int i = 0; i < path.__len__(); i++) { - // Each entry in the path may be byte-encoded or unicode PyObject entry = path.pyget(i); - String dir = Py.fileSystemDecode(entry); - File f = new RelativeFile(dir, child); - try { - if (f.isDirectory() && imp.caseok(f, name)) { - /* - * f is a directory matching the package name. This directory is considered to - * define a package if it contains no Python (source or compiled), or contains a - * Java .class file (not compiled from Python). - */ - PackageExistsFileFilter m = new PackageExistsFileFilter(); - f.listFiles(m); - boolean exists = m.packageExists(); - if (exists) { - logger.log(Level.CONFIG, "# trying {0}", f.getAbsolutePath()); + // Each entry in the path may be byte-encoded or unicode + String dir = imp.fileSystemDecode(entry, false); + if (dir != null) { + File f = new RelativeFile(dir, child); + try { + if (f.isDirectory() && imp.caseok(f, name)) { + /* + * f is a directory matching the package name. This directory is considered + * to define a package if it contains no Python (source or compiled), or + * contains a Java .class file (not compiled from Python). + */ + PackageExistsFileFilter m = new PackageExistsFileFilter(); + f.listFiles(m); + boolean exists = m.packageExists(); + if (exists) { + logger.log(Level.CONFIG, "# trying {0}", f.getAbsolutePath()); + } + return exists; } - return exists; + } catch (SecurityException se) { + return false; } - } catch (SecurityException se) { - return false; } } return false; diff --git a/src/org/python/modules/zipimport/zipimporter.java b/src/org/python/modules/zipimport/zipimporter.java --- a/src/org/python/modules/zipimport/zipimporter.java +++ b/src/org/python/modules/zipimport/zipimporter.java @@ -25,6 +25,7 @@ import org.python.core.PyUnicode; import org.python.core.Traverseproc; import org.python.core.Visitproc; +import org.python.core.imp; import org.python.core.util.FileUtil; import org.python.core.util.StringUtil; import org.python.core.util.importer; @@ -90,7 +91,7 @@ @ExposedMethod final void zipimporter___init__(PyObject[] args, String[] kwds) { ArgParser ap = new ArgParser("__init__", args, kwds, new String[] {"path"}); - String path = Py.fileSystemDecode(ap.getPyObject(0)); + String path = imp.fileSystemDecode(ap.getPyObject(0)); zipimporter___init__(path); } -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Dec 24 06:10:13 2019 From: jython-checkins at python.org (jeff.allen) Date: Tue, 24 Dec 2019 11:10:13 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Fix_regression_in_PySystem?= =?utf-8?q?StateTest_caused_by_change_to_PrePy?= Message-ID: <20191224111013.1.5F6F8622423BC0A2@mg.python.org> https://hg.python.org/jython/rev/05b258a6573c changeset: 8317:05b258a6573c user: Jeff Allen date: Tue Dec 24 09:43:29 2019 +0000 summary: Fix regression in PySystemStateTest caused by change to PrePy A change to PrePy.getJarFileNameFromURL(URL) in cset:159c277c4a80 broke the test. getJarFileNameFromURL now no longer insists on a particular class as target (so we can avoid e.g. waking the type system). Also, let's test it with non-ascii file names. files: src/org/python/core/PrePy.java | 36 +++-- tests/java/org/python/core/PySystemStateTest.java | 56 ++++++--- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/src/org/python/core/PrePy.java b/src/org/python/core/PrePy.java --- a/src/org/python/core/PrePy.java +++ b/src/org/python/core/PrePy.java @@ -13,6 +13,8 @@ import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import jnr.posix.util.Platform; @@ -361,20 +363,25 @@ } /** - * Return the path in the file system (as a string) of a JAR located by a URL. Three protocols - * are supported, Java JAR-file protocol, and two JBoss protocols "vfs" and "vfszip". + * Return the path in the file system (as a string) of a JAR located using the URL of a class + * file that it contains. Classes in Java can be asked for the URL of their associated resources + * (including their own class definition), and so the caller of this method may present such a + * URL as the basis for locating the JAR from which it came. + *

+ * Three protocols are supported, Java JAR-file protocol, and two JBoss protocols "vfs" and + * "vfszip". *

* The JAR-file protocol URL, which must be a {@code jar:file:} reference to a contained element * (that is, it has a "!/" part) is able to identify an actual JAR in a file system that may * then be opened using {@code jarFile = new JarFile(jarFileName)}. The path to the JAR is * returned. If the JAR is accessed by another mechanism ({@code http:} say) this will fail. *

- * The JBoss URL must be a reference to exactly {@code vfs:/org/python/core/PrePy.class}, - * or the same thing using the {@code vfszip:} protocol, where <JAR> stands for the - * absolute path to the Jython JAR in VFS. There is no "!/" marker: in JBoss VFS a JAR is - * treated just like a directory and can no longer be opened as a JAR. The method essentially - * just swaps a VFS protocol for the Java {@code file:} protocol. The path returned will be - * correct only if this naive swap is valid. + * The JBoss URL must be a reference to a class in {@code vfs:/org/python/core/}, or the + * same thing using the {@code vfszip:} protocol, where <JAR> stands for the absolute path + * to the Jython JAR in VFS. There is no "!/" marker: in JBoss VFS a JAR is treated just like a + * directory and can no longer be opened as a JAR. The method essentially just swaps a VFS + * protocol for the Java {@code file:} protocol. The path returned will be correct only if this + * naive swap is valid. * * @param url into the JAR * @return the file path or {@code null} in the event of a detectable error @@ -397,13 +404,14 @@ case "vfs": case "vfszip": - // path is /some/path/some-jython.jar/org/python/core/PrePy.class + // path is /some/path/some-jython.jar/org/python/core/some-name.class String path = url.getPath(); - final String target = ".jar/org/python/core/PrePy.class"; - int jarIndex = path.indexOf(target); - if (jarIndex > 0) { - // path contains the target class in a JAR, so make a file URL for it - fileURI = new URL("file:" + path.substring(0, jarIndex + 4)).toURI(); + Pattern p = Pattern.compile("/([^./]+\\.jar)/org/python/core/\\w+.class"); + Matcher m = p.matcher(path); + if (m.find()) { + // path contains the target class in a JAR (named in group 1). + // Make a file URL from all the text up to the end of group 1. + fileURI = new URL("file:" + path.substring(0, m.end(1))).toURI(); } break; diff --git a/tests/java/org/python/core/PySystemStateTest.java b/tests/java/org/python/core/PySystemStateTest.java --- a/tests/java/org/python/core/PySystemStateTest.java +++ b/tests/java/org/python/core/PySystemStateTest.java @@ -115,41 +115,61 @@ final String host = ""; final int port = -1; final URLStreamHandler handler = new TestJBossURLStreamHandler(); + // Test with any class file in org.python.core + final String classPart = "/org/python/core/PySystemState.class"; String file; URL url; if (Platform.IS_WINDOWS) { // plain jboss url - file = "/C:/some_dir/some.jar/org/python/core/PySystemState.class"; + file = "/C:/some_dir/some.jar" + classPart; url = new URL(protocol, host, port, file, handler); // tests with jboss on windows gave URL's like this: - assertEquals("vfszip:/C:/some_dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("C:\\some_dir\\some.jar", Py.getJarFileNameFromURL(url)); + assertEquals("vfszip:/C:/some_dir/some.jar" + classPart, url.toString()); + String result = Py.getJarFileNameFromURL(url); + assertEquals("C:\\some_dir\\some.jar", result); // jboss url to decode - file = "/C:/some%20dir/some.jar/org/python/core/PySystemState.class"; + file = "/C:/some%20dir/some.jar" + classPart; url = new URL(protocol, host, port, file, handler); - assertEquals("vfszip:/C:/some%20dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("C:\\some dir\\some.jar", Py.getJarFileNameFromURL(url)); + assertEquals("vfszip:/C:/some%20dir/some.jar" + classPart, url.toString()); + result = Py.getJarFileNameFromURL(url); + assertEquals("C:\\some dir\\some.jar", result); // jboss url with + to escape - file = "/C:/some+dir/some.jar/org/python/core/PySystemState.class"; + file = "/C:/some+dir/some.jar" + classPart; url = new URL(protocol, host, port, file, handler); - assertEquals("vfszip:/C:/some+dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("C:\\some+dir\\some.jar", Py.getJarFileNameFromURL(url)); + assertEquals("vfszip:/C:/some+dir/some.jar" + classPart, url.toString()); + result = Py.getJarFileNameFromURL(url); + assertEquals("C:\\some+dir\\some.jar", result); + // jboss url with challenging JAR name (assume will be provided RFC-2396 encoded) + file = "/C:/n%c3%a5gon/katalog/r%c3%a4tt.jar" + classPart; + url = new URL(protocol, host, port, file, handler); + assertEquals("vfszip:/C:/n%c3%a5gon/katalog/r%c3%a4tt.jar" + classPart, url.toString()); + result = Py.getJarFileNameFromURL(url); + assertEquals("C:\\n?gon\\katalog\\r?tt.jar", result); } else { // plain jboss url - file = "/some_dir/some.jar/org/python/core/PySystemState.class"; + file = "/some_dir/some.jar" + classPart; url = new URL(protocol, host, port, file, handler); - assertEquals("vfszip:/some_dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("/some_dir/some.jar", Py.getJarFileNameFromURL(url)); + assertEquals("vfszip:/some_dir/some.jar" + classPart, url.toString()); + String result = Py.getJarFileNameFromURL(url); + assertEquals("/some_dir/some.jar", result); // jboss url to decode - file = "/some%20dir/some.jar/org/python/core/PySystemState.class"; + file = "/some dir/some.jar" + classPart; url = new URL(protocol, host, port, file, handler); - assertEquals("vfszip:/some%20dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("/some dir/some.jar", Py.getJarFileNameFromURL(url)); + assertEquals("vfszip:/some%20dir/some.jar" + classPart, url.toString()); + result = Py.getJarFileNameFromURL(url); + assertEquals("/some dir/some.jar", result); // jboss url with + to escape - file = "/some+dir/some.jar/org/python/core/PySystemState.class"; + file = "/some+dir/some.jar" + classPart; url = new URL(protocol, host, port, file, handler); - assertEquals("vfszip:/some+dir/some.jar/org/python/core/PySystemState.class", url.toString()); - assertEquals("/some+dir/some.jar", Py.getJarFileNameFromURL(url)); + assertEquals("vfszip:/some+dir/some.jar" + classPart, url.toString()); + result = Py.getJarFileNameFromURL(url); + assertEquals("/some+dir/some.jar", result); + // jboss url with challenging JAR name (assume will be provided RFC-2396 encoded) + file = "/n%c3%a5gon/katalog/r%c3%a4tt.jar" + classPart; + url = new URL(protocol, host, port, file, handler); + assertEquals("vfszip:/n%c3%a5gon/katalog/r%c3%a4tt.jar" + classPart, url.toString()); + result = Py.getJarFileNameFromURL(url); + assertEquals("/n?gon/katalog/r?tt.jar", result); } } -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Fri Dec 27 18:22:50 2019 From: jython-checkins at python.org (jeff.allen) Date: Fri, 27 Dec 2019 23:22:50 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Add_JUnit_test_for_concurr?= =?utf-8?q?ent_interpreters=2E?= Message-ID: <20191227232250.1.3D29342B105212CE@mg.python.org> https://hg.python.org/jython/rev/e306270a4771 changeset: 8318:e306270a4771 user: Jeff Allen date: Fri Dec 27 23:11:52 2019 +0000 summary: Add JUnit test for concurrent interpreters. This is to investigate issue #2834, but is not a fix. It provides a failing test (except that the failing assertions are commented out). These assert that importing the same class definition to interpreters with distinct class loaders results in distinct Python type objects and Java classes. In fact, the first interpreter ends up sharing them with all subsequent ones through the common package manager. files: tests/java/org/python/core/ConcurrentTypeTest.java | 415 ++++++++++ 1 files changed, 415 insertions(+), 0 deletions(-) diff --git a/tests/java/org/python/core/ConcurrentTypeTest.java b/tests/java/org/python/core/ConcurrentTypeTest.java new file mode 100644 --- /dev/null +++ b/tests/java/org/python/core/ConcurrentTypeTest.java @@ -0,0 +1,415 @@ +// Copyright (c)2019 Jython Developers +// Licensed to the PSF under a Contributor Agreement +package org.python.core; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.tools.FileObject; +import javax.tools.ForwardingJavaFileManager; +import javax.tools.JavaCompiler; +import javax.tools.JavaCompiler.CompilationTask; +import javax.tools.JavaFileManager; +import javax.tools.JavaFileObject; +import javax.tools.JavaFileObject.Kind; +import javax.tools.SimpleJavaFileObject; +import javax.tools.StandardJavaFileManager; +import javax.tools.ToolProvider; + +import org.junit.Test; +import org.python.util.PythonInterpreter; + +/** + * Unit test exercising the import and type system from concurrent threads. Over the years, Jython + * has experienced multiple issues with respect to the management of types and import, in the + * presence of multiple threads and interpreters. It seems difficult to get this right. + *

+ * The initial version of this unit test was created in response to + * issue 2834. + */ +public class ConcurrentTypeTest { + + private static int RUNNERS = 30; + + static { + // Do not need site.py for test: makes more complicated in IDE. + Options.importSite = false; + } + + private abstract static class ScriptRunner implements Runnable { + + final String script; + final Thread thread; + final PyStringMap globals = Py.newStringMap(); + /** Sub-class constructor must assign the configured interpreter. */ + protected PythonInterpreter interp; + + ScriptRunner(String script) { + this.script = script; + this.thread = new Thread(this); + } + + @Override + public void run() { + interp.exec(script); + } + } + + /** + * Test concurrency when importing the same Java class where the interpreters all share a + * {@code PySystemState}. + */ + @Test + public void testSharedState() { + + // Make all the runners in advance. + List runners = new ArrayList<>(RUNNERS); + for (int i = 0; i < RUNNERS; i++) { + runners.add(new SharedStateRunner(javaImportScript)); + } + + // Start the runners then let all of them finish (or fail). + awaitAll(runners); + + // Check status of every thread + for (SharedStateRunner r : runners) { + PyObject status = r.globals.__finditem__("status"); + assertTrue("status not set to an int", status instanceof PyInteger); + assertEquals(((PyInteger) status).asInt(), 1); + } + } + + /** + * Script to import all names from a Java class for {@link #testSharedState()} and + * {@link #testSeparateState()}. + */ + //@formatter:off + static final String javaImportScript = String.join("\n", new String[] { + "from javax.swing.text.Utilities import *", + "try:", + " f = getNextWord", + " status = 1", + "except Exception:", + " status = 0" + }); + //@formatter:on + + /** + * Each instance of this type has its own interpreter, but they all share the same (default) + * {@code PySystemState} + */ + private static class SharedStateRunner extends ScriptRunner { + + SharedStateRunner(String script) { + super(script); + this.interp = new PythonInterpreter(globals); + } + } + + /** + * Test concurrency when importing the same Java class where the interpreters all have their own + * {@code PySystemState}. + */ + @Test + public void testSeparateState() { + + // Make all the runners in advance. + List runners = new ArrayList<>(RUNNERS); + for (int i = 0; i < RUNNERS; i++) { + runners.add(new SeparateStateRunner(javaImportScript)); + } + + // Start the runners then let all of them finish (or fail). + awaitAll(runners); + + // Check status of every thread + for (SeparateStateRunner r : runners) { + PyObject status = r.globals.__finditem__("status"); + assertTrue("status not set to an int", status instanceof PyInteger); + assertEquals(((PyInteger) status).asInt(), 1); + } + } + + /** + * Each instance of this type has its own {@code PySystemState}, as well as its own interpreter. + */ + private static class SeparateStateRunner extends ScriptRunner { + + final PySystemState sys = new PySystemState(); + + SeparateStateRunner(String script) { + super(script); + this.interp = new PythonInterpreter(globals, sys); + } + + } + + /** + * Test concurrency when importing the same Java class where the interpreters all have their own + * {@code ClassLoader}. In this variant we import * from Foo, and test the static members. + */ + @Test + public void testSeparateLoader() { + + // Compile the Java source and cache it in this file manager: */ + ClassCacheFileManager fileManager = getClassCacheFileManager(loadedJava, "Foo"); + + // Make all the runners in advance, primed with the same script. + List runners = new ArrayList<>(RUNNERS); + for (int i = 0; i < RUNNERS; i++) { + runners.add(new SeparateLoaderRunner(loaderScript, fileManager.newClassLoader())); + } + + // Start the runners then let all of them finish (or fail). + awaitAll(runners); + + // Check status of every thread + for (SeparateLoaderRunner r : runners) { + PyObject staticConstant = r.globals.__finditem__("staticConstant"); + assertEquals(staticConstant.asInt(), 42); + PyObject x = r.globals.__finditem__("x"); + assertEquals(x.asInt(), 42); + } + } + + /** + * A class defined in Java that is compiled as part of the tests {@link #testSeparateLoader()} + * and {@link #testSeparateLoader2()} and made available to Jython through a sp[ecific class + * loader. See {@link ClassCacheFileManager}. + */ + //@formatter:off + static final String loadedJava = String.join("\n", new String[] { + "package thin.air;", + "public class Foo {", + " public static final int staticConstant = 42;", + " public String member = \"forty-two\";", + " public static int staticMethod() { return 42; }", + " public String method() { return member; }", + "}" + }); + //@formatter:on + + /** + * Script to import all names from a Java class conjured from thin air (via class loader), usaed + * by {@link #testSeparateLoader()}. + */ + //@formatter:off + static final String loaderScript = String.join("\n", new String[] { + "from thin.air.Foo import *", + "x = staticMethod()" + }); + //@formatter:on + + /** + * Test concurrency when importing the same Java class where the interpreters all have their own + * {@code ClassLoader}. In this variant we import Foo, and instantiate one to test the instance + * members. + */ + @Test + public void testSeparateLoader2() { + + // Compile the Java source and cache it in this file manager: */ + ClassCacheFileManager fileManager = getClassCacheFileManager(loadedJava, "Foo"); + + // Make all the runners in advance, primed with the same script. + List runners = new ArrayList<>(RUNNERS); + for (int i = 0; i < RUNNERS; i++) { + runners.add(new SeparateLoaderRunner(loaderScript2, fileManager.newClassLoader())); + } + + // Start the runners then let all of them finish (or fail). + awaitAll(runners); + + // Check status of every thread + Set