[Jython-checkins] jython: Tolerate undecodable bytes sys.path elements (fixes #2820).

jeff.allen jython-checkins at python.org
Tue Dec 24 06:10:10 EST 2019


https://hg.python.org/jython/rev/3e46a80390fb
changeset:   8313:3e46a80390fb
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sat Dec 21 19:52:20 2019 +0000
summary:
  Tolerate undecodable bytes sys.path elements (fixes #2820).

We treat these as import failures, and walk on down the sys.path.

files:
  NEWS                                                     |   1 +
  src/org/python/core/PyNullImporter.java                  |   5 +-
  src/org/python/core/SyspathJavaLoader.java               |   6 +-
  src/org/python/core/imp.java                             |  57 ++++++++-
  src/org/python/core/packagecache/PathPackageManager.java |  39 +++---
  src/org/python/modules/zipimport/zipimporter.java        |   3 +-
  6 files changed, 77 insertions(+), 34 deletions(-)


diff --git a/NEWS b/NEWS
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,7 @@
 
 Jython 2.7.2b3
   Bugs fixed
+    - [ 2820 ] Import fails with UnicodeDecodeError if sys.path contains invalid UTF-8 bytes
 
 Jython 2.7.2b2
   Bugs fixed
diff --git a/src/org/python/core/PyNullImporter.java b/src/org/python/core/PyNullImporter.java
--- a/src/org/python/core/PyNullImporter.java
+++ b/src/org/python/core/PyNullImporter.java
@@ -20,11 +20,10 @@
 
     public PyNullImporter(PyObject pathObj) {
         super();
-        String pathStr = Py.fileSystemDecode(pathObj);
+        String pathStr = imp.fileSystemDecode(pathObj);
         if (pathStr.equals("")) {
             throw Py.ImportError("empty pathname");
-        }
-        if (isDir(pathStr)) {
+        } else if (isDir(pathStr)) {
             throw Py.ImportError("existing directory: " + pathStr);
         }
     }
diff --git a/src/org/python/core/SyspathJavaLoader.java b/src/org/python/core/SyspathJavaLoader.java
--- a/src/org/python/core/SyspathJavaLoader.java
+++ b/src/org/python/core/SyspathJavaLoader.java
@@ -113,11 +113,11 @@
         	byte[] buffer;
             PyObject entry = replacePathItem(sys, i, path);
             if (entry instanceof SyspathArchive) {
-                SyspathArchive archive = (SyspathArchive)entry;
+                SyspathArchive archive = (SyspathArchive) entry;
                 buffer = getBytesFromArchive(archive, name);
             } else {
-                String dir = Py.fileSystemDecode(entry);
-                buffer = getBytesFromDir(dir, name);
+                String dir = imp.fileSystemDecode(entry, false);
+                buffer = dir != null ? getBytesFromDir(dir, name) : null;
             }
             if (buffer != null) {
             	definePackageForClass(name);
diff --git a/src/org/python/core/imp.java b/src/org/python/core/imp.java
--- a/src/org/python/core/imp.java
+++ b/src/org/python/core/imp.java
@@ -1,10 +1,6 @@
 // Copyright (c) Corporation for National Research Initiatives
 package org.python.core;
 
-import org.python.compiler.Module;
-import org.python.core.util.FileUtil;
-import org.python.core.util.PlatformUtil;
-
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
@@ -13,8 +9,12 @@
 import java.io.InputStream;
 import java.util.Map;
 import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
 import java.util.logging.Logger;
-import java.util.logging.Level;
+
+import org.python.compiler.Module;
+import org.python.core.util.FileUtil;
+import org.python.core.util.PlatformUtil;
 
 /**
  * Utility functions for "import" support.
@@ -744,7 +744,7 @@
 
         // Note the path here may be sys.path or the search path of a Python package.
         path = path == null ? sys.path : path;
-        for (int i = 0; i < path.__len__(); i++) {
+        for (int i = 0; ret == null && i < path.__len__(); i++) {
             PyObject p = path.__getitem__(i);
             // Is there a path-specific importer?
             PyObject importer = getPathImporter(sys.path_importer_cache, sys.path_hooks, p);
@@ -757,9 +757,9 @@
                 }
             }
             // p could be a unicode or bytes object (in the file system encoding)
-            ret = loadFromSource(sys, name, moduleName, Py.fileSystemDecode(p));
-            if (ret != null) {
-                return ret;
+            String pathElement = fileSystemDecode(p, false);
+            if (pathElement != null) {
+                ret = loadFromSource(sys, name, moduleName, pathElement);
             }
         }
 
@@ -1388,6 +1388,45 @@
     }
 
     /**
+     * A wrapper for {@link Py#fileSystemDecode(PyObject)} for <b>project internal use</b> within
+     * the import mechanism to convert decoding errors that occur during import to either
+     * {@code null} or {@link Py#ImportError(String)} calls (and a log message), which usually
+     * results in quiet failure.
+     *
+     * @param p assumed to be a (partial) file path
+     * @param raiseImportError if true and {@code p} cannot be decoded raise {@code ImportError}.
+     * @return String form of the object {@code p} (or {@code null}).
+     */
+    public static String fileSystemDecode(PyObject p, boolean raiseImportError) {
+        try {
+            return Py.fileSystemDecode(p);
+        } catch (PyException e) {
+            if (e.match(Py.UnicodeDecodeError)) {
+                // p is bytes we cannot convert to a String using the FS encoding
+                if (raiseImportError) {
+                    logger.log(Level.CONFIG, "Cannot decode path entry {0}", p.__repr__());
+                    throw Py.ImportError("cannot decode");
+                }
+                return null;
+            } else {
+                // Any other kind of exception continues as itself
+                throw e;
+            }
+        }
+    }
+
+    /**
+     * For <b>project internal use</b>, equivalent to {@code fileSystemDecode(p, true)} (see
+     * {@link #fileSystemDecode(PyObject, boolean)}).
+     *
+     * @param p assumed to be a (partial) file path
+     * @return String form of the object {@code p}.
+     */
+    public static String fileSystemDecode(PyObject p) {
+        return fileSystemDecode(p, true);
+    }
+
+    /**
      * Ensure that the items mentioned in the from-list of an import are actually present, even if
      * they are modules we have not imported yet.
      *
diff --git a/src/org/python/core/packagecache/PathPackageManager.java b/src/org/python/core/packagecache/PathPackageManager.java
--- a/src/org/python/core/packagecache/PathPackageManager.java
+++ b/src/org/python/core/packagecache/PathPackageManager.java
@@ -11,6 +11,7 @@
 import java.util.logging.Level;
 
 import org.python.core.Py;
+import org.python.core.PyException;
 import org.python.core.PyJavaPackage;
 import org.python.core.PyList;
 import org.python.core.PyObject;
@@ -41,28 +42,30 @@
 
         for (int i = 0; i < path.__len__(); i++) {
 
-            // Each entry in the path may be byte-encoded or unicode
             PyObject entry = path.pyget(i);
-            String dir = Py.fileSystemDecode(entry);
-            File f = new RelativeFile(dir, child);
 
-            try {
-                if (f.isDirectory() && imp.caseok(f, name)) {
-                    /*
-                     * f is a directory matching the package name. This directory is considered to
-                     * define a package if it contains no Python (source or compiled), or contains a
-                     * Java .class file (not compiled from Python).
-                     */
-                    PackageExistsFileFilter m = new PackageExistsFileFilter();
-                    f.listFiles(m);
-                    boolean exists = m.packageExists();
-                    if (exists) {
-                        logger.log(Level.CONFIG, "# trying {0}", f.getAbsolutePath());
+            // Each entry in the path may be byte-encoded or unicode
+            String dir = imp.fileSystemDecode(entry, false);
+            if (dir != null) {
+                File f = new RelativeFile(dir, child);
+                try {
+                    if (f.isDirectory() && imp.caseok(f, name)) {
+                        /*
+                         * f is a directory matching the package name. This directory is considered
+                         * to define a package if it contains no Python (source or compiled), or
+                         * contains a Java .class file (not compiled from Python).
+                         */
+                        PackageExistsFileFilter m = new PackageExistsFileFilter();
+                        f.listFiles(m);
+                        boolean exists = m.packageExists();
+                        if (exists) {
+                            logger.log(Level.CONFIG, "# trying {0}", f.getAbsolutePath());
+                        }
+                        return exists;
                     }
-                    return exists;
+                } catch (SecurityException se) {
+                    return false;
                 }
-            } catch (SecurityException se) {
-                return false;
             }
         }
         return false;
diff --git a/src/org/python/modules/zipimport/zipimporter.java b/src/org/python/modules/zipimport/zipimporter.java
--- a/src/org/python/modules/zipimport/zipimporter.java
+++ b/src/org/python/modules/zipimport/zipimporter.java
@@ -25,6 +25,7 @@
 import org.python.core.PyUnicode;
 import org.python.core.Traverseproc;
 import org.python.core.Visitproc;
+import org.python.core.imp;
 import org.python.core.util.FileUtil;
 import org.python.core.util.StringUtil;
 import org.python.core.util.importer;
@@ -90,7 +91,7 @@
     @ExposedMethod
     final void zipimporter___init__(PyObject[] args, String[] kwds) {
         ArgParser ap = new ArgParser("__init__", args, kwds, new String[] {"path"});
-        String path = Py.fileSystemDecode(ap.getPyObject(0));
+        String path = imp.fileSystemDecode(ap.getPyObject(0));
         zipimporter___init__(path);
     }
 

-- 
Repository URL: https://hg.python.org/jython


More information about the Jython-checkins mailing list