[Jython-checkins] jython: Use UTF-8 for file paths expressed in bytes.

jeff.allen jython-checkins at python.org
Sun May 21 05:06:52 EDT 2017


https://hg.python.org/jython/rev/1888a0b15f81
changeset:   8084:1888a0b15f81
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Thu Apr 20 23:20:46 2017 +0100
summary:
  Use UTF-8 for file paths expressed in bytes.

This fairly extensive change regularises the approach to file and path names in
the interests of handling non-ascii paths correctly. See notes to issue #2356.
We are not finished with the consequential changes, but to commit work so far
helps make it manageable. regrtest runs with 24 failed tests.

files:
  CPythonLib.includes                           |    1 +
  Lib/ntpath.py                                 |  560 ----------
  Lib/subprocess.py                             |   38 +-
  src/org/python/core/Py.java                   |  134 ++-
  src/org/python/core/PyBytecode.java           |    9 +-
  src/org/python/core/PyFile.java               |    4 -
  src/org/python/core/PyNullImporter.java       |   13 +-
  src/org/python/core/PySystemState.java        |   53 +-
  src/org/python/core/PyTableCode.java          |    6 +-
  src/org/python/core/StdoutWrapper.java        |    3 +-
  src/org/python/core/imp.java                  |   13 +-
  src/org/python/core/io/FileIO.java            |   10 +-
  src/org/python/modules/_imp.java              |   30 +-
  src/org/python/modules/posix/PosixModule.java |   18 +-
  14 files changed, 224 insertions(+), 668 deletions(-)


diff --git a/CPythonLib.includes b/CPythonLib.includes
--- a/CPythonLib.includes
+++ b/CPythonLib.includes
@@ -110,6 +110,7 @@
 netrc.py
 nntplib.py
 numbers.py
+ntpath.py
 nturl2path.py
 opcode.py
 optparse.py
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
deleted file mode 100644
--- a/Lib/ntpath.py
+++ /dev/null
@@ -1,560 +0,0 @@
-# Module 'ntpath' -- common operations on WinNT/Win95 pathnames
-"""Common pathname manipulations, WindowsNT/95 version.
-
-Instead of importing this module directly, import os and refer to this
-module as os.path.
-"""
-
-import os
-import sys
-import stat
-import genericpath
-import warnings
-
-from genericpath import *
-
-__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
-           "basename","dirname","commonprefix","getsize","getmtime",
-           "getatime","getctime", "islink","exists","lexists","isdir","isfile",
-           "ismount","walk","expanduser","expandvars","normpath","abspath",
-           "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
-           "extsep","devnull","realpath","supports_unicode_filenames","relpath"]
-
-# strings representing various path-related bits and pieces
-curdir = '.'
-pardir = '..'
-extsep = '.'
-sep = '\\'
-pathsep = ';'
-altsep = '/'
-defpath = '.;C:\\bin'
-if 'ce' in sys.builtin_module_names:
-    defpath = '\\Windows'
-elif 'os2' in sys.builtin_module_names:
-    # OS/2 w/ VACPP
-    altsep = '/'
-devnull = 'nul'
-
-# Normalize the case of a pathname and map slashes to backslashes.
-# Other normalizations (such as optimizing '../' away) are not done
-# (this is done by normpath).
-
-def normcase(s):
-    """Normalize case of pathname.
-
-    Makes all characters lowercase and all slashes into backslashes."""
-    return s.replace("/", "\\").lower()
-
-
-# Return whether a path is absolute.
-# Trivial in Posix, harder on the Mac or MS-DOS.
-# For DOS it is absolute if it starts with a slash or backslash (current
-# volume), or if a pathname after the volume letter and colon / UNC resource
-# starts with a slash or backslash.
-
-def isabs(s):
-    """Test whether a path is absolute"""
-    s = splitdrive(s)[1]
-    return s != '' and s[:1] in '/\\'
-
-
-# Join two (or more) paths.
-
-def join(a, *p):
-    """Join two or more pathname components, inserting "\\" as needed.
-    If any component is an absolute path, all previous path components
-    will be discarded."""
-    path = a
-    for b in p:
-        b_wins = 0  # set to 1 iff b makes path irrelevant
-        if path == "":
-            b_wins = 1
-
-        elif isabs(b):
-            # This probably wipes out path so far.  However, it's more
-            # complicated if path begins with a drive letter:
-            #     1. join('c:', '/a') == 'c:/a'
-            #     2. join('c:/', '/a') == 'c:/a'
-            # But
-            #     3. join('c:/a', '/b') == '/b'
-            #     4. join('c:', 'd:/') = 'd:/'
-            #     5. join('c:/', 'd:/') = 'd:/'
-            if path[1:2] != ":" or b[1:2] == ":":
-                # Path doesn't start with a drive letter, or cases 4 and 5.
-                b_wins = 1
-
-            # Else path has a drive letter, and b doesn't but is absolute.
-            elif len(path) > 3 or (len(path) == 3 and
-                                   path[-1] not in "/\\"):
-                # case 3
-                b_wins = 1
-
-        if b_wins:
-            path = b
-        else:
-            # Join, and ensure there's a separator.
-            assert len(path) > 0
-            if path[-1] in "/\\":
-                if b and b[0] in "/\\":
-                    path += b[1:]
-                else:
-                    path += b
-            elif path[-1] == ":":
-                path += b
-            elif b:
-                if b[0] in "/\\":
-                    path += b
-                else:
-                    path += "\\" + b
-            else:
-                # path is not empty and does not end with a backslash,
-                # but b is empty; since, e.g., split('a/') produces
-                # ('a', ''), it's best if join() adds a backslash in
-                # this case.
-                path += '\\'
-
-    return path
-
-
-# Split a path in a drive specification (a drive letter followed by a
-# colon) and the path specification.
-# It is always true that drivespec + pathspec == p
-def splitdrive(p):
-    """Split a pathname into drive and path specifiers. Returns a 2-tuple
-"(drive,path)";  either part may be empty"""
-    if p[1:2] == ':':
-        return p[0:2], p[2:]
-    return '', p
-
-
-# Parse UNC paths
-def splitunc(p):
-    """Split a pathname into UNC mount point and relative path specifiers.
-
-    Return a 2-tuple (unc, rest); either part may be empty.
-    If unc is not empty, it has the form '//host/mount' (or similar
-    using backslashes).  unc+rest is always the input path.
-    Paths containing drive letters never have an UNC part.
-    """
-    if p[1:2] == ':':
-        return '', p # Drive letter present
-    firstTwo = p[0:2]
-    if firstTwo == '//' or firstTwo == '\\\\':
-        # is a UNC path:
-        # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
-        # \\machine\mountpoint\directories...
-        #           directory ^^^^^^^^^^^^^^^
-        normp = normcase(p)
-        index = normp.find('\\', 2)
-        if index == -1:
-            ##raise RuntimeError, 'illegal UNC path: "' + p + '"'
-            return ("", p)
-        index = normp.find('\\', index + 1)
-        if index == -1:
-            index = len(p)
-        return p[:index], p[index:]
-    return '', p
-
-
-# Split a path in head (everything up to the last '/') and tail (the
-# rest).  After the trailing '/' is stripped, the invariant
-# join(head, tail) == p holds.
-# The resulting head won't end in '/' unless it is the root.
-
-def split(p):
-    """Split a pathname.
-
-    Return tuple (head, tail) where tail is everything after the final slash.
-    Either part may be empty."""
-
-    d, p = splitdrive(p)
-    # set i to index beyond p's last slash
-    i = len(p)
-    while i and p[i-1] not in '/\\':
-        i = i - 1
-    head, tail = p[:i], p[i:]  # now tail has no slashes
-    # remove trailing slashes from head, unless it's all slashes
-    head2 = head
-    while head2 and head2[-1] in '/\\':
-        head2 = head2[:-1]
-    head = head2 or head
-    return d + head, tail
-
-
-# Split a path in root and extension.
-# The extension is everything starting at the last dot in the last
-# pathname component; the root is everything before that.
-# It is always true that root + ext == p.
-
-def splitext(p):
-    return genericpath._splitext(p, sep, altsep, extsep)
-splitext.__doc__ = genericpath._splitext.__doc__
-
-
-# Return the tail (basename) part of a path.
-
-def basename(p):
-    """Returns the final component of a pathname"""
-    return split(p)[1]
-
-
-# Return the head (dirname) part of a path.
-
-def dirname(p):
-    """Returns the directory component of a pathname"""
-    return split(p)[0]
-
-# Is a path a symbolic link?
-# This will always return false on systems where posix.lstat doesn't exist.
-
-def islink(path):
-    """Test for symbolic link.
-    On WindowsNT/95 and OS/2 always returns false
-    """
-    return False
-
-# alias exists to lexists
-lexists = exists
-
-# Is a path a mount point?  Either a root (with or without drive letter)
-# or an UNC path with at most a / or \ after the mount point.
-
-def ismount(path):
-    """Test whether a path is a mount point (defined as root of drive)"""
-    unc, rest = splitunc(path)
-    if unc:
-        return rest in ("", "/", "\\")
-    p = splitdrive(path)[1]
-    return len(p) == 1 and p[0] in '/\\'
-
-
-# Directory tree walk.
-# For each directory under top (including top itself, but excluding
-# '.' and '..'), func(arg, dirname, filenames) is called, where
-# dirname is the name of the directory and filenames is the list
-# of files (and subdirectories etc.) in the directory.
-# The func may modify the filenames list, to implement a filter,
-# or to impose a different order of visiting.
-
-def walk(top, func, arg):
-    """Directory tree walk with callback function.
-
-    For each directory in the directory tree rooted at top (including top
-    itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
-    dirname is the name of the directory, and fnames a list of the names of
-    the files and subdirectories in dirname (excluding '.' and '..').  func
-    may modify the fnames list in-place (e.g. via del or slice assignment),
-    and walk will only recurse into the subdirectories whose names remain in
-    fnames; this can be used to implement a filter, or to impose a specific
-    order of visiting.  No semantics are defined for, or required of, arg,
-    beyond that arg is always passed to func.  It can be used, e.g., to pass
-    a filename pattern, or a mutable object designed to accumulate
-    statistics.  Passing None for arg is common."""
-    warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
-                      stacklevel=2)
-    try:
-        names = os.listdir(top)
-    except os.error:
-        return
-    func(arg, top, names)
-    for name in names:
-        name = join(top, name)
-        if isdir(name):
-            walk(name, func, arg)
-
-
-# Expand paths beginning with '~' or '~user'.
-# '~' means $HOME; '~user' means that user's home directory.
-# If the path doesn't begin with '~', or if the user or $HOME is unknown,
-# the path is returned unchanged (leaving error reporting to whatever
-# function is called with the expanded path as argument).
-# See also module 'glob' for expansion of *, ? and [...] in pathnames.
-# (A function should also be defined to do full *sh-style environment
-# variable expansion.)
-
-def expanduser(path):
-    """Expand ~ and ~user constructs.
-
-    If user or $HOME is unknown, do nothing."""
-    if path[:1] != '~':
-        return path
-    i, n = 1, len(path)
-    while i < n and path[i] not in '/\\':
-        i = i + 1
-
-    if 'HOME' in os.environ:
-        userhome = os.environ['HOME']
-    elif 'USERPROFILE' in os.environ:
-        userhome = os.environ['USERPROFILE']
-    elif not 'HOMEPATH' in os.environ:
-        return path
-    else:
-        try:
-            drive = os.environ['HOMEDRIVE']
-        except KeyError:
-            drive = ''
-        userhome = join(drive, os.environ['HOMEPATH'])
-
-    if i != 1: #~user
-        userhome = join(dirname(userhome), path[1:i])
-
-    return userhome + path[i:]
-
-
-# Expand paths containing shell variable substitutions.
-# The following rules apply:
-#       - no expansion within single quotes
-#       - '$$' is translated into '$'
-#       - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
-#       - ${varname} is accepted.
-#       - $varname is accepted.
-#       - %varname% is accepted.
-#       - varnames can be made out of letters, digits and the characters '_-'
-#         (though is not verifed in the ${varname} and %varname% cases)
-# XXX With COMMAND.COM you can use any characters in a variable name,
-# XXX except '^|<>='.
-
-def expandvars(path):
-    """Expand shell variables of the forms $var, ${var} and %var%.
-
-    Unknown variables are left unchanged."""
-    if '$' not in path and '%' not in path:
-        return path
-    import string
-    varchars = string.ascii_letters + string.digits + '_-'
-    res = ''
-    index = 0
-    pathlen = len(path)
-    while index < pathlen:
-        c = path[index]
-        if c == '\'':   # no expansion within single quotes
-            path = path[index + 1:]
-            pathlen = len(path)
-            try:
-                index = path.index('\'')
-                res = res + '\'' + path[:index + 1]
-            except ValueError:
-                res = res + path
-                index = pathlen - 1
-        elif c == '%':  # variable or '%'
-            if path[index + 1:index + 2] == '%':
-                res = res + c
-                index = index + 1
-            else:
-                path = path[index+1:]
-                pathlen = len(path)
-                try:
-                    index = path.index('%')
-                except ValueError:
-                    res = res + '%' + path
-                    index = pathlen - 1
-                else:
-                    var = path[:index]
-                    if var in os.environ:
-                        res = res + os.environ[var]
-                    else:
-                        res = res + '%' + var + '%'
-        elif c == '$':  # variable or '$$'
-            if path[index + 1:index + 2] == '$':
-                res = res + c
-                index = index + 1
-            elif path[index + 1:index + 2] == '{':
-                path = path[index+2:]
-                pathlen = len(path)
-                try:
-                    index = path.index('}')
-                    var = path[:index]
-                    if var in os.environ:
-                        res = res + os.environ[var]
-                    else:
-                        res = res + '${' + var + '}'
-                except ValueError:
-                    res = res + '${' + path
-                    index = pathlen - 1
-            else:
-                var = ''
-                index = index + 1
-                c = path[index:index + 1]
-                while c != '' and c in varchars:
-                    var = var + c
-                    index = index + 1
-                    c = path[index:index + 1]
-                if var in os.environ:
-                    res = res + os.environ[var]
-                else:
-                    res = res + '$' + var
-                if c != '':
-                    index = index - 1
-        else:
-            res = res + c
-        index = index + 1
-    return res
-
-
-# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
-# Previously, this function also truncated pathnames to 8+3 format,
-# but as this module is called "ntpath", that's obviously wrong!
-
-def normpath(path):
-    """Normalize path, eliminating double slashes, etc."""
-    # Preserve unicode (if path is unicode)
-    backslash, dot = (u'\\', u'.') if isinstance(path, unicode) else ('\\', '.')
-    if path.startswith(('\\\\.\\', '\\\\?\\')):
-        # in the case of paths with these prefixes:
-        # \\.\ -> device names
-        # \\?\ -> literal paths
-        # do not do any normalization, but return the path unchanged
-        return path
-    path = path.replace("/", "\\")
-    prefix, path = splitdrive(path)
-    # We need to be careful here. If the prefix is empty, and the path starts
-    # with a backslash, it could either be an absolute path on the current
-    # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
-    # is therefore imperative NOT to collapse multiple backslashes blindly in
-    # that case.
-    # The code below preserves multiple backslashes when there is no drive
-    # letter. This means that the invalid filename \\\a\b is preserved
-    # unchanged, where a\\\b is normalised to a\b. It's not clear that there
-    # is any better behaviour for such edge cases.
-    if prefix == '':
-        # No drive letter - preserve initial backslashes
-        while path[:1] == "\\":
-            prefix = prefix + backslash
-            path = path[1:]
-    else:
-        # We have a drive letter - collapse initial backslashes
-        if path.startswith("\\"):
-            prefix = prefix + backslash
-            path = path.lstrip("\\")
-    comps = path.split("\\")
-    i = 0
-    while i < len(comps):
-        if comps[i] in ('.', ''):
-            del comps[i]
-        elif comps[i] == '..':
-            if i > 0 and comps[i-1] != '..':
-                del comps[i-1:i+1]
-                i -= 1
-            elif i == 0 and prefix.endswith("\\"):
-                del comps[i]
-            else:
-                i += 1
-        else:
-            i += 1
-    # If the path is now empty, substitute '.'
-    if not prefix and not comps:
-        comps.append(dot)
-    return prefix + backslash.join(comps)
-
-
-# Return an absolute path.
-try:
-    from nt import _getfullpathname
-
-except ImportError: # no built-in nt module - maybe it's Jython ;)
-
-    if os._name == 'nt' :
-        # on Windows so Java version of sys deals in NT paths
-        def abspath(path):
-            """Return the absolute version of a path."""
-            try:
-                if isinstance(path, unicode):
-                    # Result must be unicode
-                    if path:
-                        path = sys.getPath(path)
-                    else:
-                        # Empty path must return current working directory
-                        path = os.getcwdu()
-                else:
-                    # Result must be bytes
-                    if path:
-                        path = sys.getPath(path).encode('latin-1')
-                    else:
-                        # Empty path must return current working directory
-                        path = os.getcwd()
-            except EnvironmentError:
-                 pass # Bad path - return unchanged.
-            return normpath(path)
-
-    else:
-        # not running on Windows - mock up something sensible
-        def abspath(path):
-            """Return the absolute version of a path."""
-            try:
-                if isinstance(path, unicode):
-                    # Result must be unicode
-                    if path:
-                        path = join(os.getcwdu(), path)
-                    else:
-                        # Empty path must return current working directory
-                        path = os.getcwdu()
-                else:
-                    # Result must be bytes
-                    if path:
-                        path = join(os.getcwd(), path)
-                    else:
-                        # Empty path must return current working directory
-                        path = os.getcwd()
-            except EnvironmentError:
-                 pass # Bad path - return unchanged.
-            return normpath(path)
-
-else:  # use native Windows method on Windows
-    def abspath(path):
-        """Return the absolute version of a path."""
-
-        if path: # Empty path must return current working directory.
-            try:
-                path = _getfullpathname(path)
-            except WindowsError:
-                pass # Bad path - return unchanged.
-        elif isinstance(path, unicode):
-            path = os.getcwdu()
-        else:
-            path = os.getcwd()
-        return normpath(path)
-
-# realpath is a no-op on systems without islink support
-realpath = abspath
-# Win9x family and earlier have no Unicode filename support.
-supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
-                              sys.getwindowsversion()[3] >= 2)
-
-def _abspath_split(path):
-    abs = abspath(normpath(path))
-    prefix, rest = splitunc(abs)
-    is_unc = bool(prefix)
-    if not is_unc:
-        prefix, rest = splitdrive(abs)
-    return is_unc, prefix, [x for x in rest.split(sep) if x]
-
-def relpath(path, start=curdir):
-    """Return a relative version of a path"""
-
-    if not path:
-        raise ValueError("no path specified")
-
-    start_is_unc, start_prefix, start_list = _abspath_split(start)
-    path_is_unc, path_prefix, path_list = _abspath_split(path)
-
-    if path_is_unc ^ start_is_unc:
-        raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
-                                                            % (path, start))
-    if path_prefix.lower() != start_prefix.lower():
-        if path_is_unc:
-            raise ValueError("path is on UNC root %s, start on UNC root %s"
-                                                % (path_prefix, start_prefix))
-        else:
-            raise ValueError("path is on drive %s, start on drive %s"
-                                                % (path_prefix, start_prefix))
-    # Work out how much of the filepath is shared by start and path.
-    i = 0
-    for e1, e2 in zip(start_list, path_list):
-        if e1.lower() != e2.lower():
-            break
-        i += 1
-
-    rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
-    if not rel_list:
-        return curdir
-    return join(*rel_list)
diff --git a/Lib/subprocess.py b/Lib/subprocess.py
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -438,6 +438,7 @@
     import java.nio.ByteBuffer
     import org.python.core.io.RawIOBase
     import org.python.core.io.StreamIO
+    from org.python.core.Py import fileSystemDecode
 else:
     import select
     _has_poll = hasattr(select, 'poll')
@@ -779,7 +780,7 @@
         maintain those byte values (which may be butchered as
         Strings) for the subprocess if they haven't been modified.
         """
-        # Determine what's safe to merge
+        # Determine what's necessary to merge (new or different)
         merge_env = dict((key, value) for key, value in env.iteritems()
                          if key not in builder_env or
                          builder_env.get(key) != value)
@@ -789,8 +790,10 @@
         for entry in entries:
             if entry.getKey() not in env:
                 entries.remove()
-
-        builder_env.putAll(merge_env)
+        # add anything new or different in env
+        for key, value in merge_env.iteritems():
+            # If the new value is bytes, assume it to be FS-encoded
+            builder_env.put(key, fileSystemDecode(value))
 
 
 class Popen(object):
@@ -1308,9 +1311,6 @@
                 args = _cmdline2listimpl(args)
             else:
                 args = list(args)
-                # NOTE: CPython posix (execv) will str() any unicode
-                # args first, maybe we should do the same on
-                # posix. Windows passes unicode through, however
                 if any(not isinstance(arg, (str, unicode)) for arg in args):
                     raise TypeError('args must contain only strings')
             args = _escape_args(args)
@@ -1321,6 +1321,11 @@
             if executable is not None:
                 args[0] = executable
 
+            # NOTE: CPython posix (execv) will FS-encode any unicode args, but
+            # pass on bytes unchanged, because that's what the system expects.
+            # Java expects unicode, so we do the converse: leave unicode
+            # unchanged but FS-decode any supplied as bytes.
+            args = [fileSystemDecode(arg) for arg in args]
             builder = java.lang.ProcessBuilder(args)
 
             if stdin is None:
@@ -1330,16 +1335,20 @@
             if stderr is None:
                 builder.redirectError(java.lang.ProcessBuilder.Redirect.INHERIT)
 
-            # os.environ may be inherited for compatibility with CPython
+            # os.environ may be inherited for compatibility with CPython.
+            # Elements taken from os.environ are FS-decoded to unicode.
             _setup_env(dict(os.environ if env is None else env),
                        builder.environment())
 
+            # The current working directory must also be unicode.
             if cwd is None:
-                cwd = os.getcwd()
-            elif not os.path.exists(cwd):
-                raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), cwd)
-            elif not os.path.isdir(cwd):
-                raise OSError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), cwd)
+                cwd = os.getcwdu()
+            else:
+                cwd = fileSystemDecode(cwd)
+                if not os.path.exists(cwd):
+                    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), cwd)
+                elif not os.path.isdir(cwd):
+                    raise OSError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), cwd)
             builder.directory(java.io.File(cwd))
 
             # Let Java manage redirection of stderr to stdout (it's more
@@ -1890,9 +1899,10 @@
     args = _cmdline2listimpl(command)
     args = _escape_args(args)
     args = _shell_command + args
-    cwd = os.getcwd()
+    cwd = os.getcwdu()
 
-
+    # Python supplies FS-encoded arguments while Java expects String
+    args = [fileSystemDecode(arg) for arg in args]
 
     builder = java.lang.ProcessBuilder(args)
     builder.directory(java.io.File(cwd))
diff --git a/src/org/python/core/Py.java b/src/org/python/core/Py.java
--- a/src/org/python/core/Py.java
+++ b/src/org/python/core/Py.java
@@ -84,6 +84,7 @@
             throw new StreamCorruptedException("unknown singleton: " + which);
         }
     }
+
     /* Holds the singleton None and Ellipsis objects */
     /** The singleton None Python object **/
     public final static PyObject None = new PyNone();
@@ -222,6 +223,10 @@
         return new PyException(Py.IOError, args);
     }
 
+    public static PyException IOError(Constant errno, String filename) {
+        return new PyException(Py.IOError, Py.fileSystemEncode(filename)); // XXX newStringOrUnicode?
+    }
+
     public static PyException IOError(Constant errno, PyObject filename) {
         int value = errno.intValue();
         PyObject args = new PyTuple(Py.newInteger(value), PosixModule.strerror(value), filename);
@@ -683,6 +688,103 @@
         }
     }
 
+    /**
+     * Return a file name or path as Unicode (Java UTF-16 <code>String</code>), decoded if necessary
+     * from a Python <code>bytes</code> object, using the file system encoding. In Jython, this
+     * encoding is UTF-8, irrespective of the OS platform. This method is comparable with Python 3
+     * <code>os.fsdecode</code>, but for Java use, in places such as the <code>os</code> module. If
+     * the argument is not a <code>PyUnicode</code>, it will be decoded using the nominal Jython
+     * file system encoding. If the argument <i>is</i> a <code>PyUnicode</code>, its
+     * <code>String</code> is returned.
+     *
+     * @param filename as <code>bytes</code> to decode, or already as <code>unicode</code>
+     * @return unicode version of path
+     */
+    public static String fileSystemDecode(PyString filename) {
+        String s = filename.getString();
+        if (filename instanceof PyUnicode || CharMatcher.ascii().matchesAllOf(s)) {
+            // Already encoded or usable as ASCII
+            return s;
+        } else {
+            // It's bytes, so must decode properly
+            assert "utf-8".equals(PySystemState.FILE_SYSTEM_ENCODING.toString());
+            return codecs.PyUnicode_DecodeUTF8(s, null);
+        }
+    }
+
+    /**
+     * As {@link #fileSystemDecode(PyString)} but raising <code>ValueError</code> if not a
+     * <code>str</code> or <code>unicode</code>.
+     *
+     * @param filename as <code>bytes</code> to decode, or already as <code>unicode</code>
+     * @return unicode version of the file name
+     */
+    public static String fileSystemDecode(PyObject filename) {
+        if (filename instanceof PyString) {
+            return fileSystemDecode((PyString)filename);
+        } else
+            throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
+                    filename.getType().fastGetName()));
+    }
+
+    /**
+     * Return a PyString object we can use as a file name or file path in places where Python
+     * expects a <code>bytes</code> (that is a <code>str</code>) object in the file system encoding.
+     * In Jython, this encoding is UTF-8, irrespective of the OS platform.
+     * <p>
+     * This is subtly different from CPython's use of "file system encoding", which tracks the
+     * platform's choice so that OS services may be called that have a bytes interface. Jython's
+     * interaction with the OS occurs via Java using String arguments representing Unicode values,
+     * so we have no need to match the encoding actually chosen by the platform (e.g. 'mbcs' on
+     * Windows). Rather we need a nominal Jython file system encoding, for use where the standard
+     * library forces byte paths on us (in Python 2). There is no reason for this choice to vary
+     * with OS platform. Methods receiving paths as <code>bytes</code> will
+     * {@link #fileSystemDecode(PyString)} them again for Java.
+     *
+     * @param filename as <code>unicode</code> to encode, or already as <code>bytes</code>
+     * @return encoded bytes version of path
+     */
+    public static PyString fileSystemEncode(String filename) {
+        if (CharMatcher.ascii().matchesAllOf(filename)) {
+            // Just wrap it as US-ASCII is a subset of the file system encoding
+            return Py.newString(filename);
+        } else {
+            // It's non just US-ASCII, so must encode properly
+            assert "utf-8".equals(PySystemState.FILE_SYSTEM_ENCODING.toString());
+            return Py.newString(codecs.PyUnicode_EncodeUTF8(filename, null));
+        }
+    }
+
+    /**
+     * Return a PyString object we can use as a file name or file path in places where Python
+     * expects a <code>bytes</code> (that is, <code>str</code>) object in the file system encoding.
+     * In Jython, this encoding is UTF-8, irrespective of the OS platform. This method is comparable
+     * with Python 3 <code>os.fsencode</code>. If the argument is a PyString, it is returned
+     * unchanged. If the argument is a PyUnicode, it is converted to a <code>bytes</code> using the
+     * nominal Jython file system encoding.
+     *
+     * @param filename as <code>unicode</code> to encode, or already as <code>bytes</code>
+     * @return encoded bytes version of path
+     */
+    public static PyString fileSystemEncode(PyString filename) {
+        return (filename instanceof PyUnicode) ? fileSystemEncode(filename.getString()) : filename;
+    }
+
+    /**
+     * Convert a <code>PyList</code> path to a list of Java <code>String</code> objects decoded from
+     * the path elements to strings guaranteed usable in the Java API.
+     *
+     * @param path a Python search path
+     * @return equivalent Java list
+     */
+    private static List<String> fileSystemDecode(PyList path) {
+        List<String> list = new ArrayList<>(path.__len__());
+        for (PyObject filename : path.getList()) {
+            list.add(fileSystemDecode(filename));
+        }
+        return list;
+    }
+
     public static PyStringMap newStringMap() {
         // enable lazy bootstrapping (see issue #1671)
         if (!PyType.hasBuilder(PyStringMap.class)) {
@@ -1282,7 +1384,7 @@
             if (moduleName == null) {
                 buf.append("<unknown>");
             } else {
-                String moduleStr = moduleName.toString();
+                String moduleStr = Py.fileSystemDecode(moduleName);
                 if (!moduleStr.equals("exceptions")) {
                     buf.append(moduleStr);
                     buf.append(".");
@@ -1294,7 +1396,7 @@
         }
         if (value != null && value != Py.None) {
             // only print colon if the str() of the object is not the empty string
-            PyObject s = useRepr ? value.__repr__() : value.__str__();
+            PyObject s = useRepr ? value.__repr__() : value;
             if (!(s instanceof PyString) || s.__len__() != 0) {
                 buf.append(": ");
             }
@@ -1565,6 +1667,16 @@
         }
     }
 
+    private static final String IMPORT_SITE_ERROR = ""
+            + "Cannot import site module and its dependencies: %s\n"
+            + "Determine if the following attributes are correct:\n" //
+            + "  * sys.path: %s\n"
+            + "    This attribute might be including the wrong directories, such as from CPython\n"
+            + "  * sys.prefix: %s\n"
+            + "    This attribute is set by the system property python.home, although it can\n"
+            + "    be often automatically determined by the location of the Jython jar file\n\n"
+            + "You can use the -S option or python.import.site=false to not import the site module";
+
     public static boolean importSiteIfSelected() {
         if (Options.importSite) {
             try {
@@ -1574,18 +1686,10 @@
             } catch (PyException pye) {
                 if (pye.match(Py.ImportError)) {
                     PySystemState sys = Py.getSystemState();
-                    throw Py.ImportError(String.format(""
-                                    + "Cannot import site module and its dependencies: %s\n"
-                                    + "Determine if the following attributes are correct:\n"
-                                    + "  * sys.path: %s\n"
-                                    + "    This attribute might be including the wrong directories, such as from CPython\n"
-                                    + "  * sys.prefix: %s\n"
-                                    + "    This attribute is set by the system property python.home, although it can\n"
-                                    + "    be often automatically determined by the location of the Jython jar file\n\n"
-                                    + "You can use the -S option or python.import.site=false to not import the site module",
-                            pye.value.__getattr__("args").__getitem__(0),
-                            sys.path,
-                            sys.prefix));
+                    String value = pye.value.__getattr__("args").__getitem__(0).toString();
+                    List<String> path = fileSystemDecode(sys.path);
+                    throw Py.ImportError(
+                            String.format(IMPORT_SITE_ERROR, value, path, PySystemState.prefix));
                 } else {
                     throw pye;
                 }
@@ -2266,7 +2370,7 @@
         }
         /* Here we would actually like to call cls.__findattr__("__metaclass__")
          * rather than cls.getType(). However there are circumstances where the
-         * metaclass doesn't show up as __metaclass__. On the other hand we need 
+         * metaclass doesn't show up as __metaclass__. On the other hand we need
          * to avoid that checker refers to builtin type___subclasscheck__ or
          * type___instancecheck__. Filtering out checker-instances of
          * PyBuiltinMethodNarrow does the trick. We also filter out PyMethodDescr
diff --git a/src/org/python/core/PyBytecode.java b/src/org/python/core/PyBytecode.java
--- a/src/org/python/core/PyBytecode.java
+++ b/src/org/python/core/PyBytecode.java
@@ -116,11 +116,13 @@
         throw Py.AttributeError(name);
     }
 
+    @Override
     public void __setattr__(String name, PyObject value) {
         // no writable attributes
         throwReadonly(name);
     }
 
+    @Override
     public void __delattr__(String name) {
         throwReadonly(name);
     }
@@ -137,6 +139,7 @@
         return new PyTuple(pystr);
     }
 
+    @Override
     public PyObject __findattr_ex__(String name) {
         // have to craft co_varnames specially
         if (name == "co_varnames") {
@@ -149,7 +152,7 @@
             return toPyStringTuple(co_freevars);
         }
         if (name == "co_filename") {
-            return new PyString(co_filename);
+            return Py.fileSystemEncode(co_filename); // bytes object expected by clients
         }
         if (name == "co_name") {
             return new PyString(co_name);
@@ -1156,7 +1159,7 @@
                         "zap" this information, to prevent END_FINALLY from
                         re-raising the exception.  (But non-local gotos
                         should still be resumed.)
-                     */    
+                     */
                         PyObject exit;
                         PyObject u = stack.pop(), v, w;
                         if (u == Py.None) {
@@ -1350,7 +1353,7 @@
             if (why != Why.RETURN) {
                 retval = Py.None;
             }
-        } else { 
+        } else {
             // store the stack in the frame for reentry from the yield;
             f.f_savedlocals = stack.popN(stack.size());
         }
diff --git a/src/org/python/core/PyFile.java b/src/org/python/core/PyFile.java
--- a/src/org/python/core/PyFile.java
+++ b/src/org/python/core/PyFile.java
@@ -168,10 +168,6 @@
         ArgParser ap = new ArgParser("file", args, kwds, new String[] {"name", "mode", "buffering"},
                                      1);
         PyObject name = ap.getPyObject(0);
-        if (!(name instanceof PyString)) {
-            throw Py.TypeError("coercing to Unicode: need string, '" + name.getType().fastGetName()
-                               + "' type found");
-        }
         String mode = ap.getString(1, "r");
         int bufsize = ap.getInt(2, -1);
         file___init__(new FileIO((PyString) name, parseMode(mode)), name, mode, bufsize);
diff --git a/src/org/python/core/PyNullImporter.java b/src/org/python/core/PyNullImporter.java
--- a/src/org/python/core/PyNullImporter.java
+++ b/src/org/python/core/PyNullImporter.java
@@ -20,7 +20,7 @@
 
     public PyNullImporter(PyObject pathObj) {
         super();
-        String pathStr = asPath(pathObj);
+        String pathStr = Py.fileSystemDecode(pathObj);
         if (pathStr.equals("")) {
             throw Py.ImportError("empty pathname");
         }
@@ -42,17 +42,6 @@
         return Py.None;
     }
 
-    // FIXME Refactoring move helper function to a central util library
-    // FIXME Also can take in account working in zip file systems
-
-    private static String asPath(PyObject pathObj) {
-        if (!(pathObj instanceof PyString)) {
-            throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
-                    pathObj.getType().fastGetName()));
-        }
-        return pathObj.toString();
-    }
-
     private static boolean isDir(String pathStr) {
         if (pathStr.equals("")) {
             return false;
diff --git a/src/org/python/core/PySystemState.java b/src/org/python/core/PySystemState.java
--- a/src/org/python/core/PySystemState.java
+++ b/src/org/python/core/PySystemState.java
@@ -82,6 +82,9 @@
 
     public final static PyString float_repr_style = Py.newString("short");
 
+    /** Nominal Jython file system encoding (as <code>sys.getfilesystemencoding()</code>) */
+    static final PyString FILE_SYSTEM_ENCODING = Py.newString("utf-8");
+
     public static boolean py3kwarning = false;
 
     public final static Class flags = Options.class;
@@ -109,13 +112,13 @@
     public static PackageManager packageManager;
     private static File cachedir;
 
-    private static PyList defaultPath;
-    private static PyList defaultArgv;
-    private static PyObject defaultExecutable;
+    private static PyList defaultPath; // list of bytes or unicode
+    private static PyList defaultArgv; // list of bytes or unicode
+    private static PyObject defaultExecutable; // bytes or unicode or None
 
     public static Properties registry; // = init_registry();
-    public static PyObject prefix;
-    public static PyObject exec_prefix = Py.EmptyString;
+    public static PyObject prefix; // bytes or unicode
+    public static PyObject exec_prefix = Py.EmptyString; // bytes or unicode
 
     public static final PyString byteorder = new PyString("big");
     public static final int maxint = Integer.MAX_VALUE;
@@ -504,7 +507,7 @@
     }
 
     public PyObject getfilesystemencoding() {
-        return Py.None;
+        return FILE_SYSTEM_ENCODING;
     }
 
 
@@ -840,10 +843,10 @@
             }
         }
         if (prefix != null) {
-            PySystemState.prefix = Py.newString(prefix);
+            PySystemState.prefix = Py.newStringOrUnicode(prefix);
         }
         if (exec_prefix != null) {
-            PySystemState.exec_prefix = Py.newString(exec_prefix);
+            PySystemState.exec_prefix = Py.newStringOrUnicode(exec_prefix);
         }
         try {
             String jythonpath = System.getenv("JYTHONPATH");
@@ -1174,16 +1177,16 @@
         PyList argv = new PyList();
         if (args != null) {
             for (String arg : args) {
-                argv.append(Py.newStringOrUnicode(arg));
+                argv.append(Py.newStringOrUnicode(arg)); // XXX or always newUnicode?
             }
         }
         return argv;
     }
 
     /**
-     * Determine the default sys.executable value from the registry.
-     * If registry is not set (as in standalone jython jar), will use sys.prefix + /bin/jython(.exe) and the file may
-     * not exist. Users can create a wrapper in it's place to make it work in embedded environments.
+     * Determine the default sys.executable value from the registry. If registry is not set (as in
+     * standalone jython jar), we will use sys.prefix + /bin/jython(.exe) and the file may not
+     * exist. Users can create a wrapper in it's place to make it work in embedded environments.
      * Only if sys.prefix is null, returns Py.None
      *
      * @param props a Properties registry
@@ -1191,26 +1194,26 @@
      */
     private static PyObject initExecutable(Properties props) {
         String executable = props.getProperty("python.executable");
-        if (executable == null) {
+        File executableFile;
+        if (executable != null) {
+            // The executable from the registry is a Unicode String path
+            executableFile = new File(executable);
+        } else {
             if (prefix == null) {
                 return Py.None;
             } else {
-                executable = prefix.asString() + File.separator + "bin" + File.separator;
-                if (Platform.IS_WINDOWS) {
-                    executable += "jython.exe";
-                } else {
-                    executable += "jython";
-                }
+                // The prefix is a unicode or encoded bytes object
+                executableFile = new File(Py.fileSystemDecode(prefix),
+                        Platform.IS_WINDOWS ? "bin\\jython.exe" : "bin/jython");
             }
         }
 
-        File executableFile = new File(executable);
         try {
             executableFile = executableFile.getCanonicalFile();
         } catch (IOException ioe) {
             executableFile = executableFile.getAbsoluteFile();
         }
-        return new PyString(executableFile.getPath());
+        return Py.newStringOrUnicode(executableFile.getPath()); // XXX always bytes in CPython
     }
 
     /**
@@ -1353,8 +1356,8 @@
         PyList path = new PyList();
         addPaths(path, props.getProperty("python.path", ""));
         if (prefix != null) {
-            String libpath = new File(prefix.toString(), "Lib").toString();
-            path.append(new PyString(libpath));
+            String libpath = new File(Py.fileSystemDecode(prefix), "Lib").toString();
+            path.append(Py.fileSystemEncode(libpath)); // XXX or newStringOrUnicode or newUnicode?
         }
         if (standalone) {
             // standalone jython: add the /Lib directory inside JYTHON_JAR to the path
@@ -1397,7 +1400,8 @@
     private static void addPaths(PyList path, String pypath) {
         StringTokenizer tok = new StringTokenizer(pypath, java.io.File.pathSeparator);
         while (tok.hasMoreTokens()) {
-            path.append(new PyString(tok.nextToken().trim()));
+            // Use unicode object if necessary to represent the element
+            path.append(Py.newStringOrUnicode(tok.nextToken().trim()));
         }
     }
 
@@ -1540,6 +1544,7 @@
         closer.cleanup();
     }
 
+    @Override
     public void close() { cleanup(); }
 
     public static class PySystemStateCloser {
diff --git a/src/org/python/core/PyTableCode.java b/src/org/python/core/PyTableCode.java
--- a/src/org/python/core/PyTableCode.java
+++ b/src/org/python/core/PyTableCode.java
@@ -66,6 +66,7 @@
         // co_lnotab, co_stacksize
     };
 
+    @Override
     public PyObject __dir__() {
         PyString members[] = new PyString[__members__.length];
         for (int i = 0; i < __members__.length; i++)
@@ -80,11 +81,13 @@
         throw Py.AttributeError(name);
     }
 
+    @Override
     public void __setattr__(String name, PyObject value) {
         // no writable attributes
         throwReadonly(name);
     }
 
+    @Override
     public void __delattr__(String name) {
         throwReadonly(name);
     }
@@ -99,6 +102,7 @@
         return new PyTuple(pystr);
     }
 
+    @Override
     public PyObject __findattr_ex__(String name) {
         // have to craft co_varnames specially
         if (name == "co_varnames") {
@@ -111,7 +115,7 @@
             return toPyStringTuple(co_freevars);
         }
         if (name == "co_filename") {
-            return new PyString(co_filename);
+            return Py.fileSystemEncode(co_filename); // bytes object expected by clients
         }
         if (name == "co_name") {
             return new PyString(co_name);
diff --git a/src/org/python/core/StdoutWrapper.java b/src/org/python/core/StdoutWrapper.java
--- a/src/org/python/core/StdoutWrapper.java
+++ b/src/org/python/core/StdoutWrapper.java
@@ -105,7 +105,8 @@
         String s;
         if (o instanceof PyUnicode) {
             // Use the encoding and policy defined for the stream. (Each may be null.)
-            s = ((PyUnicode)o).encode(file.encoding, file.errors);
+            s = ((PyUnicode)o).encode(file.encoding, "replace"); //FIXME: back to ...
+            // s = ((PyUnicode)o).encode(file.encoding, file.errors);
         } else {
             s = o.__str__().toString();
         }
diff --git a/src/org/python/core/imp.java b/src/org/python/core/imp.java
--- a/src/org/python/core/imp.java
+++ b/src/org/python/core/imp.java
@@ -418,7 +418,8 @@
         }
 
         if (moduleLocation != null) {
-            module.__setattr__("__file__", new PyString(moduleLocation));
+            // Standard library expects __file__ to be encoded bytes
+            module.__setattr__("__file__", Py.fileSystemEncode(moduleLocation));
         } else if (module.__findattr__("__file__") == null) {
             // Should probably never happen (but maybe with an odd custom builtins, or
             // Java Integration)
@@ -543,10 +544,8 @@
                     return loadFromLoader(loader, moduleName);
                 }
             }
-            if (!(p instanceof PyUnicode)) {
-                p = p.__str__();
-            }
-            ret = loadFromSource(sys, name, moduleName, p.toString());
+            // p could be unicode or bytes (in the file system encoding)
+            ret = loadFromSource(sys, name, moduleName, Py.fileSystemDecode(p));
             if (ret != null) {
                 return ret;
             }
@@ -606,7 +605,7 @@
         // display names are for identification purposes (e.g. __file__): when entry is
         // null it forces java.io.File to be a relative path (e.g. foo/bar.py instead of
         // /tmp/foo/bar.py)
-        String displayDirName = entry.equals("") ? null : entry.toString();
+        String displayDirName = entry.equals("") ? null : entry;
         String displaySourceName = new File(new File(displayDirName, name), sourceName).getPath();
         String displayCompiledName =
                 new File(new File(displayDirName, name), compiledName).getPath();
@@ -640,7 +639,7 @@
             compiledFile = new File(dirName, compiledName);
         } else {
             PyModule m = addModule(modName);
-            PyObject filename = new PyString(new File(displayDirName, name).getPath());
+            PyObject filename = Py.newStringOrUnicode(new File(displayDirName, name).getPath());  // XXX fileSystemEncode?
             m.__dict__.__setitem__("__path__", new PyList(new PyObject[] {filename}));
         }
 
diff --git a/src/org/python/core/io/FileIO.java b/src/org/python/core/io/FileIO.java
--- a/src/org/python/core/io/FileIO.java
+++ b/src/org/python/core/io/FileIO.java
@@ -64,10 +64,10 @@
     private boolean emulateAppend;
 
     /**
-     * @see #FileIO(PyString name, String mode)
+     * @see #FileIO(String name, String mode)
      */
-    public FileIO(String name, String mode) {
-        this(Py.newString(name), mode);
+    public FileIO(PyString name, String mode) {
+        this(Py.fileSystemDecode(name), mode);
     }
 
     /**
@@ -80,9 +80,9 @@
      * @param name the name of the file
      * @param mode a raw io file mode String
      */
-    public FileIO(PyString name, String mode) {
+    public FileIO(String name, String mode) {
         parseMode(mode);
-        File absPath = new RelativeFile(name.toString());
+        File absPath = new RelativeFile(name);
 
         try {
             if ((appending && !(reading || plus)) || (writing && !reading && !plus)) {
diff --git a/src/org/python/modules/_imp.java b/src/org/python/modules/_imp.java
--- a/src/org/python/modules/_imp.java
+++ b/src/org/python/modules/_imp.java
@@ -68,7 +68,7 @@
      * This needs to be consolidated with the code in (@see org.python.core.imp).
      *
      * @param name module name
-     * @param entry a path String
+     * @param entry a path String (Unicode file or directory name)
      * @param findingPackage if looking for a package only try to locate __init__
      * @return null if no module found otherwise module information
      */
@@ -190,8 +190,10 @@
 
     public static PyObject find_module(String name, PyObject path) {
         if (path == Py.None && PySystemState.getBuiltin(name) != null) {
-            return new PyTuple(Py.None, Py.newString(name),
-                               new PyTuple(Py.EmptyString, Py.EmptyString,
+            return new PyTuple(Py.None,
+                               Py.newString(name),
+                               new PyTuple(Py.EmptyString,
+                                           Py.EmptyString,
                                            Py.newInteger(C_BUILTIN)));
         }
 
@@ -199,14 +201,14 @@
             path = Py.getSystemState().path;
         }
         for (PyObject p : path.asIterable()) {
-            ModuleInfo mi = findFromSource(name, p.toString(), false, true);
+            ModuleInfo mi = findFromSource(name, Py.fileSystemDecode(p), false, true);
             if(mi == null) {
                 continue;
             }
             return new PyTuple(mi.file,
-                               new PyString(mi.filename),
-                               new PyTuple(new PyString(mi.suffix),
-                                           new PyString(mi.mode),
+                               Py.newStringOrUnicode(mi.filename),
+                               new PyTuple(Py.newString(mi.suffix),
+                                           Py.newString(mi.mode),
                                            Py.newInteger(mi.type)));
         }
         throw Py.ImportError("No module named " + name);
@@ -216,7 +218,8 @@
         PyObject mod = Py.None;
         PySystemState sys = Py.getSystemState();
         int type = data.__getitem__(2).asInt();
-        while(mod == Py.None) {
+        String filenameString = Py.fileSystemDecode(filename);
+        while (mod == Py.None) {
             String compiledName;
             switch (type) {
                 case PY_SOURCE:
@@ -226,7 +229,7 @@
                     }
 
                     // XXX: This should load the accompanying byte code file instead, if it exists
-                    String resolvedFilename = sys.getPath(filename.toString());
+                    String resolvedFilename = sys.getPath(filenameString);
                     compiledName = makeCompiledFilename(resolvedFilename);
                     if (name.endsWith(".__init__")) {
                         name = name.substring(0, name.length() - ".__init__".length());
@@ -241,19 +244,20 @@
                     }
 
                     mod = imp.createFromSource(name.intern(), (InputStream)o,
-                            filename.toString(), compiledName, mtime);
+                            filenameString, compiledName, mtime);
                     break;
                 case PY_COMPILED:
-                    mod = load_compiled(name, filename.toString(), file);
+                    mod = load_compiled(name, filenameString, file);
                     break;
                 case PKG_DIRECTORY:
                     PyModule m = imp.addModule(name);
                     m.__dict__.__setitem__("__path__", new PyList(new PyObject[] {filename}));
                     m.__dict__.__setitem__("__file__", filename);
-                    ModuleInfo mi = findFromSource(name, filename.toString(), true, true);
+                    ModuleInfo mi = findFromSource(name, filenameString, true, true);
                     type = mi.type;
                     file = mi.file;
-                    filename = new PyString(mi.filename);
+                    filenameString = mi.filename;
+                    filename = Py.newStringOrUnicode(filenameString);
                     break;
                 default:
                     throw Py.ImportError("No module named " + name);
diff --git a/src/org/python/modules/posix/PosixModule.java b/src/org/python/modules/posix/PosixModule.java
--- a/src/org/python/modules/posix/PosixModule.java
+++ b/src/org/python/modules/posix/PosixModule.java
@@ -486,7 +486,8 @@
         "getcwd() -> path\n\n" +
         "Return a string representing the current working directory.");
     public static PyObject getcwd() {
-        return Py.newStringOrUnicode(Py.getSystemState().getCurrentWorkingDir());
+        // The return value is bytes in the file system encoding
+        return Py.fileSystemEncode(Py.getSystemState().getCurrentWorkingDir());
     }
 
     public static PyString __doc__getcwdu = new PyString(
@@ -1343,25 +1344,24 @@
             return environ;
         }
         for (Map.Entry<String, String> entry : env.entrySet()) {
+            // The shell restricts names to a subset of ASCII and values are encoded byte strings.
             environ.__setitem__(
-                    Py.newStringOrUnicode(entry.getKey()),
-                    Py.newStringOrUnicode(entry.getValue()));
+                    Py.newString(entry.getKey()),
+                    Py.fileSystemEncode(entry.getValue()));
         }
         return environ;
     }
 
     /**
-     * Return a path as a String from a PyObject
+     * Return a path as a String from a PyObject, which must be <code>str</code> or
+     * <code>unicode</code>. If the path is a <code>str</code> (that is, <code>bytes</code>), it is
+     * interpreted into Unicode using the file system encoding.
      *
      * @param path a PyObject, raising a TypeError if an invalid path type
      * @return a String path
      */
     private static String asPath(PyObject path) {
-        if (path instanceof PyString) {
-            return path.toString();
-        }
-        throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
-                                         path.getType().fastGetName()));
+        return Py.fileSystemDecode(path);
     }
 
     /**

-- 
Repository URL: https://hg.python.org/jython


More information about the Jython-checkins mailing list