[Jython-checkins] jython (merge default -> default): Merge work on non-ascii file/user names to trunk.

jeff.allen jython-checkins at python.org
Sun May 21 05:01:59 EDT 2017


https://hg.python.org/jython/rev/060e4e4a06d8
changeset:   8087:060e4e4a06d8
parent:      8075:0a00982f6ea5
parent:      8086:147fe05920a4
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sun Apr 30 23:07:30 2017 +0100
summary:
  Merge work on non-ascii file/user names to trunk.

files:
  CPythonLib.includes                                      |    1 +
  Lib/javashell.py                                         |    2 +-
  Lib/ntpath.py                                            |  560 ----------
  Lib/subprocess.py                                        |   38 +-
  Lib/sysconfig.py                                         |    6 +
  Lib/test/test_exceptions.py                              |    3 -
  Lib/test/test_exceptions_jy.py                           |    5 +-
  Lib/test/test_httpservers.py                             |    3 +
  Lib/test/test_java_visibility.py                         |   11 +-
  Lib/test/test_jser.py                                    |    4 +-
  Lib/test/test_jython_launcher.py                         |    8 +-
  Lib/test/test_ssl.py                                     |    8 +-
  Lib/test/test_support.py                                 |    2 +-
  Lib/test/test_zipimport_jy.py                            |    6 +-
  build.xml                                                |    3 +
  src/org/python/core/Py.java                              |  297 ++++-
  src/org/python/core/PyBaseException.java                 |   17 +-
  src/org/python/core/PyBytecode.java                      |    9 +-
  src/org/python/core/PyException.java                     |   25 +-
  src/org/python/core/PyFile.java                          |    4 -
  src/org/python/core/PyNullImporter.java                  |   13 +-
  src/org/python/core/PyString.java                        |    6 +-
  src/org/python/core/PySystemState.java                   |   65 +-
  src/org/python/core/PyTableCode.java                     |    6 +-
  src/org/python/core/PyUnicode.java                       |    4 +-
  src/org/python/core/SyspathArchive.java                  |    2 +-
  src/org/python/core/SyspathJavaLoader.java               |   55 +-
  src/org/python/core/__builtin__.java                     |    8 +-
  src/org/python/core/imp.java                             |   26 +-
  src/org/python/core/io/FileIO.java                       |    4 +-
  src/org/python/core/packagecache/PathPackageManager.java |   14 +-
  src/org/python/modules/_imp.java                         |   81 +-
  src/org/python/modules/_py_compile.java                  |   36 +-
  src/org/python/modules/posix/PosixModule.java            |   18 +-
  src/org/python/modules/zipimport/zipimporter.java        |    8 +-
  src/org/python/util/jython.java                          |    4 +-
  src/shell/jython.exe                                     |  Bin 
  src/shell/jython.py                                      |  314 +++--
  38 files changed, 733 insertions(+), 943 deletions(-)


diff --git a/CPythonLib.includes b/CPythonLib.includes
--- a/CPythonLib.includes
+++ b/CPythonLib.includes
@@ -110,6 +110,7 @@
 netrc.py
 nntplib.py
 numbers.py
+ntpath.py
 nturl2path.py
 opcode.py
 optparse.py
diff --git a/Lib/javashell.py b/Lib/javashell.py
--- a/Lib/javashell.py
+++ b/Lib/javashell.py
@@ -55,7 +55,7 @@
 
         env = self._formatEnvironment( self.environment )
         try:
-            p = Runtime.getRuntime().exec( shellCmd, env, File(os.getcwd()) )
+            p = Runtime.getRuntime().exec( shellCmd, env, File(os.getcwdu()) )
             return p
         except IOException, ex:
             raise OSError(
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
deleted file mode 100644
--- a/Lib/ntpath.py
+++ /dev/null
@@ -1,560 +0,0 @@
-# Module 'ntpath' -- common operations on WinNT/Win95 pathnames
-"""Common pathname manipulations, WindowsNT/95 version.
-
-Instead of importing this module directly, import os and refer to this
-module as os.path.
-"""
-
-import os
-import sys
-import stat
-import genericpath
-import warnings
-
-from genericpath import *
-
-__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
-           "basename","dirname","commonprefix","getsize","getmtime",
-           "getatime","getctime", "islink","exists","lexists","isdir","isfile",
-           "ismount","walk","expanduser","expandvars","normpath","abspath",
-           "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
-           "extsep","devnull","realpath","supports_unicode_filenames","relpath"]
-
-# strings representing various path-related bits and pieces
-curdir = '.'
-pardir = '..'
-extsep = '.'
-sep = '\\'
-pathsep = ';'
-altsep = '/'
-defpath = '.;C:\\bin'
-if 'ce' in sys.builtin_module_names:
-    defpath = '\\Windows'
-elif 'os2' in sys.builtin_module_names:
-    # OS/2 w/ VACPP
-    altsep = '/'
-devnull = 'nul'
-
-# Normalize the case of a pathname and map slashes to backslashes.
-# Other normalizations (such as optimizing '../' away) are not done
-# (this is done by normpath).
-
-def normcase(s):
-    """Normalize case of pathname.
-
-    Makes all characters lowercase and all slashes into backslashes."""
-    return s.replace("/", "\\").lower()
-
-
-# Return whether a path is absolute.
-# Trivial in Posix, harder on the Mac or MS-DOS.
-# For DOS it is absolute if it starts with a slash or backslash (current
-# volume), or if a pathname after the volume letter and colon / UNC resource
-# starts with a slash or backslash.
-
-def isabs(s):
-    """Test whether a path is absolute"""
-    s = splitdrive(s)[1]
-    return s != '' and s[:1] in '/\\'
-
-
-# Join two (or more) paths.
-
-def join(a, *p):
-    """Join two or more pathname components, inserting "\\" as needed.
-    If any component is an absolute path, all previous path components
-    will be discarded."""
-    path = a
-    for b in p:
-        b_wins = 0  # set to 1 iff b makes path irrelevant
-        if path == "":
-            b_wins = 1
-
-        elif isabs(b):
-            # This probably wipes out path so far.  However, it's more
-            # complicated if path begins with a drive letter:
-            #     1. join('c:', '/a') == 'c:/a'
-            #     2. join('c:/', '/a') == 'c:/a'
-            # But
-            #     3. join('c:/a', '/b') == '/b'
-            #     4. join('c:', 'd:/') = 'd:/'
-            #     5. join('c:/', 'd:/') = 'd:/'
-            if path[1:2] != ":" or b[1:2] == ":":
-                # Path doesn't start with a drive letter, or cases 4 and 5.
-                b_wins = 1
-
-            # Else path has a drive letter, and b doesn't but is absolute.
-            elif len(path) > 3 or (len(path) == 3 and
-                                   path[-1] not in "/\\"):
-                # case 3
-                b_wins = 1
-
-        if b_wins:
-            path = b
-        else:
-            # Join, and ensure there's a separator.
-            assert len(path) > 0
-            if path[-1] in "/\\":
-                if b and b[0] in "/\\":
-                    path += b[1:]
-                else:
-                    path += b
-            elif path[-1] == ":":
-                path += b
-            elif b:
-                if b[0] in "/\\":
-                    path += b
-                else:
-                    path += "\\" + b
-            else:
-                # path is not empty and does not end with a backslash,
-                # but b is empty; since, e.g., split('a/') produces
-                # ('a', ''), it's best if join() adds a backslash in
-                # this case.
-                path += '\\'
-
-    return path
-
-
-# Split a path in a drive specification (a drive letter followed by a
-# colon) and the path specification.
-# It is always true that drivespec + pathspec == p
-def splitdrive(p):
-    """Split a pathname into drive and path specifiers. Returns a 2-tuple
-"(drive,path)";  either part may be empty"""
-    if p[1:2] == ':':
-        return p[0:2], p[2:]
-    return '', p
-
-
-# Parse UNC paths
-def splitunc(p):
-    """Split a pathname into UNC mount point and relative path specifiers.
-
-    Return a 2-tuple (unc, rest); either part may be empty.
-    If unc is not empty, it has the form '//host/mount' (or similar
-    using backslashes).  unc+rest is always the input path.
-    Paths containing drive letters never have an UNC part.
-    """
-    if p[1:2] == ':':
-        return '', p # Drive letter present
-    firstTwo = p[0:2]
-    if firstTwo == '//' or firstTwo == '\\\\':
-        # is a UNC path:
-        # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
-        # \\machine\mountpoint\directories...
-        #           directory ^^^^^^^^^^^^^^^
-        normp = normcase(p)
-        index = normp.find('\\', 2)
-        if index == -1:
-            ##raise RuntimeError, 'illegal UNC path: "' + p + '"'
-            return ("", p)
-        index = normp.find('\\', index + 1)
-        if index == -1:
-            index = len(p)
-        return p[:index], p[index:]
-    return '', p
-
-
-# Split a path in head (everything up to the last '/') and tail (the
-# rest).  After the trailing '/' is stripped, the invariant
-# join(head, tail) == p holds.
-# The resulting head won't end in '/' unless it is the root.
-
-def split(p):
-    """Split a pathname.
-
-    Return tuple (head, tail) where tail is everything after the final slash.
-    Either part may be empty."""
-
-    d, p = splitdrive(p)
-    # set i to index beyond p's last slash
-    i = len(p)
-    while i and p[i-1] not in '/\\':
-        i = i - 1
-    head, tail = p[:i], p[i:]  # now tail has no slashes
-    # remove trailing slashes from head, unless it's all slashes
-    head2 = head
-    while head2 and head2[-1] in '/\\':
-        head2 = head2[:-1]
-    head = head2 or head
-    return d + head, tail
-
-
-# Split a path in root and extension.
-# The extension is everything starting at the last dot in the last
-# pathname component; the root is everything before that.
-# It is always true that root + ext == p.
-
-def splitext(p):
-    return genericpath._splitext(p, sep, altsep, extsep)
-splitext.__doc__ = genericpath._splitext.__doc__
-
-
-# Return the tail (basename) part of a path.
-
-def basename(p):
-    """Returns the final component of a pathname"""
-    return split(p)[1]
-
-
-# Return the head (dirname) part of a path.
-
-def dirname(p):
-    """Returns the directory component of a pathname"""
-    return split(p)[0]
-
-# Is a path a symbolic link?
-# This will always return false on systems where posix.lstat doesn't exist.
-
-def islink(path):
-    """Test for symbolic link.
-    On WindowsNT/95 and OS/2 always returns false
-    """
-    return False
-
-# alias exists to lexists
-lexists = exists
-
-# Is a path a mount point?  Either a root (with or without drive letter)
-# or an UNC path with at most a / or \ after the mount point.
-
-def ismount(path):
-    """Test whether a path is a mount point (defined as root of drive)"""
-    unc, rest = splitunc(path)
-    if unc:
-        return rest in ("", "/", "\\")
-    p = splitdrive(path)[1]
-    return len(p) == 1 and p[0] in '/\\'
-
-
-# Directory tree walk.
-# For each directory under top (including top itself, but excluding
-# '.' and '..'), func(arg, dirname, filenames) is called, where
-# dirname is the name of the directory and filenames is the list
-# of files (and subdirectories etc.) in the directory.
-# The func may modify the filenames list, to implement a filter,
-# or to impose a different order of visiting.
-
-def walk(top, func, arg):
-    """Directory tree walk with callback function.
-
-    For each directory in the directory tree rooted at top (including top
-    itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
-    dirname is the name of the directory, and fnames a list of the names of
-    the files and subdirectories in dirname (excluding '.' and '..').  func
-    may modify the fnames list in-place (e.g. via del or slice assignment),
-    and walk will only recurse into the subdirectories whose names remain in
-    fnames; this can be used to implement a filter, or to impose a specific
-    order of visiting.  No semantics are defined for, or required of, arg,
-    beyond that arg is always passed to func.  It can be used, e.g., to pass
-    a filename pattern, or a mutable object designed to accumulate
-    statistics.  Passing None for arg is common."""
-    warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
-                      stacklevel=2)
-    try:
-        names = os.listdir(top)
-    except os.error:
-        return
-    func(arg, top, names)
-    for name in names:
-        name = join(top, name)
-        if isdir(name):
-            walk(name, func, arg)
-
-
-# Expand paths beginning with '~' or '~user'.
-# '~' means $HOME; '~user' means that user's home directory.
-# If the path doesn't begin with '~', or if the user or $HOME is unknown,
-# the path is returned unchanged (leaving error reporting to whatever
-# function is called with the expanded path as argument).
-# See also module 'glob' for expansion of *, ? and [...] in pathnames.
-# (A function should also be defined to do full *sh-style environment
-# variable expansion.)
-
-def expanduser(path):
-    """Expand ~ and ~user constructs.
-
-    If user or $HOME is unknown, do nothing."""
-    if path[:1] != '~':
-        return path
-    i, n = 1, len(path)
-    while i < n and path[i] not in '/\\':
-        i = i + 1
-
-    if 'HOME' in os.environ:
-        userhome = os.environ['HOME']
-    elif 'USERPROFILE' in os.environ:
-        userhome = os.environ['USERPROFILE']
-    elif not 'HOMEPATH' in os.environ:
-        return path
-    else:
-        try:
-            drive = os.environ['HOMEDRIVE']
-        except KeyError:
-            drive = ''
-        userhome = join(drive, os.environ['HOMEPATH'])
-
-    if i != 1: #~user
-        userhome = join(dirname(userhome), path[1:i])
-
-    return userhome + path[i:]
-
-
-# Expand paths containing shell variable substitutions.
-# The following rules apply:
-#       - no expansion within single quotes
-#       - '$$' is translated into '$'
-#       - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
-#       - ${varname} is accepted.
-#       - $varname is accepted.
-#       - %varname% is accepted.
-#       - varnames can be made out of letters, digits and the characters '_-'
-#         (though is not verifed in the ${varname} and %varname% cases)
-# XXX With COMMAND.COM you can use any characters in a variable name,
-# XXX except '^|<>='.
-
-def expandvars(path):
-    """Expand shell variables of the forms $var, ${var} and %var%.
-
-    Unknown variables are left unchanged."""
-    if '$' not in path and '%' not in path:
-        return path
-    import string
-    varchars = string.ascii_letters + string.digits + '_-'
-    res = ''
-    index = 0
-    pathlen = len(path)
-    while index < pathlen:
-        c = path[index]
-        if c == '\'':   # no expansion within single quotes
-            path = path[index + 1:]
-            pathlen = len(path)
-            try:
-                index = path.index('\'')
-                res = res + '\'' + path[:index + 1]
-            except ValueError:
-                res = res + path
-                index = pathlen - 1
-        elif c == '%':  # variable or '%'
-            if path[index + 1:index + 2] == '%':
-                res = res + c
-                index = index + 1
-            else:
-                path = path[index+1:]
-                pathlen = len(path)
-                try:
-                    index = path.index('%')
-                except ValueError:
-                    res = res + '%' + path
-                    index = pathlen - 1
-                else:
-                    var = path[:index]
-                    if var in os.environ:
-                        res = res + os.environ[var]
-                    else:
-                        res = res + '%' + var + '%'
-        elif c == '$':  # variable or '$$'
-            if path[index + 1:index + 2] == '$':
-                res = res + c
-                index = index + 1
-            elif path[index + 1:index + 2] == '{':
-                path = path[index+2:]
-                pathlen = len(path)
-                try:
-                    index = path.index('}')
-                    var = path[:index]
-                    if var in os.environ:
-                        res = res + os.environ[var]
-                    else:
-                        res = res + '${' + var + '}'
-                except ValueError:
-                    res = res + '${' + path
-                    index = pathlen - 1
-            else:
-                var = ''
-                index = index + 1
-                c = path[index:index + 1]
-                while c != '' and c in varchars:
-                    var = var + c
-                    index = index + 1
-                    c = path[index:index + 1]
-                if var in os.environ:
-                    res = res + os.environ[var]
-                else:
-                    res = res + '$' + var
-                if c != '':
-                    index = index - 1
-        else:
-            res = res + c
-        index = index + 1
-    return res
-
-
-# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
-# Previously, this function also truncated pathnames to 8+3 format,
-# but as this module is called "ntpath", that's obviously wrong!
-
-def normpath(path):
-    """Normalize path, eliminating double slashes, etc."""
-    # Preserve unicode (if path is unicode)
-    backslash, dot = (u'\\', u'.') if isinstance(path, unicode) else ('\\', '.')
-    if path.startswith(('\\\\.\\', '\\\\?\\')):
-        # in the case of paths with these prefixes:
-        # \\.\ -> device names
-        # \\?\ -> literal paths
-        # do not do any normalization, but return the path unchanged
-        return path
-    path = path.replace("/", "\\")
-    prefix, path = splitdrive(path)
-    # We need to be careful here. If the prefix is empty, and the path starts
-    # with a backslash, it could either be an absolute path on the current
-    # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
-    # is therefore imperative NOT to collapse multiple backslashes blindly in
-    # that case.
-    # The code below preserves multiple backslashes when there is no drive
-    # letter. This means that the invalid filename \\\a\b is preserved
-    # unchanged, where a\\\b is normalised to a\b. It's not clear that there
-    # is any better behaviour for such edge cases.
-    if prefix == '':
-        # No drive letter - preserve initial backslashes
-        while path[:1] == "\\":
-            prefix = prefix + backslash
-            path = path[1:]
-    else:
-        # We have a drive letter - collapse initial backslashes
-        if path.startswith("\\"):
-            prefix = prefix + backslash
-            path = path.lstrip("\\")
-    comps = path.split("\\")
-    i = 0
-    while i < len(comps):
-        if comps[i] in ('.', ''):
-            del comps[i]
-        elif comps[i] == '..':
-            if i > 0 and comps[i-1] != '..':
-                del comps[i-1:i+1]
-                i -= 1
-            elif i == 0 and prefix.endswith("\\"):
-                del comps[i]
-            else:
-                i += 1
-        else:
-            i += 1
-    # If the path is now empty, substitute '.'
-    if not prefix and not comps:
-        comps.append(dot)
-    return prefix + backslash.join(comps)
-
-
-# Return an absolute path.
-try:
-    from nt import _getfullpathname
-
-except ImportError: # no built-in nt module - maybe it's Jython ;)
-
-    if os._name == 'nt' :
-        # on Windows so Java version of sys deals in NT paths
-        def abspath(path):
-            """Return the absolute version of a path."""
-            try:
-                if isinstance(path, unicode):
-                    # Result must be unicode
-                    if path:
-                        path = sys.getPath(path)
-                    else:
-                        # Empty path must return current working directory
-                        path = os.getcwdu()
-                else:
-                    # Result must be bytes
-                    if path:
-                        path = sys.getPath(path).encode('latin-1')
-                    else:
-                        # Empty path must return current working directory
-                        path = os.getcwd()
-            except EnvironmentError:
-                 pass # Bad path - return unchanged.
-            return normpath(path)
-
-    else:
-        # not running on Windows - mock up something sensible
-        def abspath(path):
-            """Return the absolute version of a path."""
-            try:
-                if isinstance(path, unicode):
-                    # Result must be unicode
-                    if path:
-                        path = join(os.getcwdu(), path)
-                    else:
-                        # Empty path must return current working directory
-                        path = os.getcwdu()
-                else:
-                    # Result must be bytes
-                    if path:
-                        path = join(os.getcwd(), path)
-                    else:
-                        # Empty path must return current working directory
-                        path = os.getcwd()
-            except EnvironmentError:
-                 pass # Bad path - return unchanged.
-            return normpath(path)
-
-else:  # use native Windows method on Windows
-    def abspath(path):
-        """Return the absolute version of a path."""
-
-        if path: # Empty path must return current working directory.
-            try:
-                path = _getfullpathname(path)
-            except WindowsError:
-                pass # Bad path - return unchanged.
-        elif isinstance(path, unicode):
-            path = os.getcwdu()
-        else:
-            path = os.getcwd()
-        return normpath(path)
-
-# realpath is a no-op on systems without islink support
-realpath = abspath
-# Win9x family and earlier have no Unicode filename support.
-supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
-                              sys.getwindowsversion()[3] >= 2)
-
-def _abspath_split(path):
-    abs = abspath(normpath(path))
-    prefix, rest = splitunc(abs)
-    is_unc = bool(prefix)
-    if not is_unc:
-        prefix, rest = splitdrive(abs)
-    return is_unc, prefix, [x for x in rest.split(sep) if x]
-
-def relpath(path, start=curdir):
-    """Return a relative version of a path"""
-
-    if not path:
-        raise ValueError("no path specified")
-
-    start_is_unc, start_prefix, start_list = _abspath_split(start)
-    path_is_unc, path_prefix, path_list = _abspath_split(path)
-
-    if path_is_unc ^ start_is_unc:
-        raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
-                                                            % (path, start))
-    if path_prefix.lower() != start_prefix.lower():
-        if path_is_unc:
-            raise ValueError("path is on UNC root %s, start on UNC root %s"
-                                                % (path_prefix, start_prefix))
-        else:
-            raise ValueError("path is on drive %s, start on drive %s"
-                                                % (path_prefix, start_prefix))
-    # Work out how much of the filepath is shared by start and path.
-    i = 0
-    for e1, e2 in zip(start_list, path_list):
-        if e1.lower() != e2.lower():
-            break
-        i += 1
-
-    rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
-    if not rel_list:
-        return curdir
-    return join(*rel_list)
diff --git a/Lib/subprocess.py b/Lib/subprocess.py
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -438,6 +438,7 @@
     import java.nio.ByteBuffer
     import org.python.core.io.RawIOBase
     import org.python.core.io.StreamIO
+    from org.python.core.Py import fileSystemDecode
 else:
     import select
     _has_poll = hasattr(select, 'poll')
@@ -779,7 +780,7 @@
         maintain those byte values (which may be butchered as
         Strings) for the subprocess if they haven't been modified.
         """
-        # Determine what's safe to merge
+        # Determine what's necessary to merge (new or different)
         merge_env = dict((key, value) for key, value in env.iteritems()
                          if key not in builder_env or
                          builder_env.get(key) != value)
@@ -789,8 +790,10 @@
         for entry in entries:
             if entry.getKey() not in env:
                 entries.remove()
-
-        builder_env.putAll(merge_env)
+        # add anything new or different in env
+        for key, value in merge_env.iteritems():
+            # If the new value is bytes, assume it to be FS-encoded
+            builder_env.put(key, fileSystemDecode(value))
 
 
 class Popen(object):
@@ -1308,9 +1311,6 @@
                 args = _cmdline2listimpl(args)
             else:
                 args = list(args)
-                # NOTE: CPython posix (execv) will str() any unicode
-                # args first, maybe we should do the same on
-                # posix. Windows passes unicode through, however
                 if any(not isinstance(arg, (str, unicode)) for arg in args):
                     raise TypeError('args must contain only strings')
             args = _escape_args(args)
@@ -1321,6 +1321,11 @@
             if executable is not None:
                 args[0] = executable
 
+            # NOTE: CPython posix (execv) will FS-encode any unicode args, but
+            # pass on bytes unchanged, because that's what the system expects.
+            # Java expects unicode, so we do the converse: leave unicode
+            # unchanged but FS-decode any supplied as bytes.
+            args = [fileSystemDecode(arg) for arg in args]
             builder = java.lang.ProcessBuilder(args)
 
             if stdin is None:
@@ -1330,16 +1335,20 @@
             if stderr is None:
                 builder.redirectError(java.lang.ProcessBuilder.Redirect.INHERIT)
 
-            # os.environ may be inherited for compatibility with CPython
+            # os.environ may be inherited for compatibility with CPython.
+            # Elements taken from os.environ are FS-decoded to unicode.
             _setup_env(dict(os.environ if env is None else env),
                        builder.environment())
 
+            # The current working directory must also be unicode.
             if cwd is None:
-                cwd = os.getcwd()
-            elif not os.path.exists(cwd):
-                raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), cwd)
-            elif not os.path.isdir(cwd):
-                raise OSError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), cwd)
+                cwd = os.getcwdu()
+            else:
+                cwd = fileSystemDecode(cwd)
+                if not os.path.exists(cwd):
+                    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), cwd)
+                elif not os.path.isdir(cwd):
+                    raise OSError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), cwd)
             builder.directory(java.io.File(cwd))
 
             # Let Java manage redirection of stderr to stdout (it's more
@@ -1890,9 +1899,10 @@
     args = _cmdline2listimpl(command)
     args = _escape_args(args)
     args = _shell_command + args
-    cwd = os.getcwd()
+    cwd = os.getcwdu()
 
-
+    # Python supplies FS-encoded arguments while Java expects String
+    args = [fileSystemDecode(arg) for arg in args]
 
     builder = java.lang.ProcessBuilder(args)
     builder.directory(java.io.File(cwd))
diff --git a/Lib/sysconfig.py b/Lib/sysconfig.py
--- a/Lib/sysconfig.py
+++ b/Lib/sysconfig.py
@@ -5,6 +5,11 @@
 import os
 from os.path import pardir, realpath
 
+def fileSystemEncode(path):
+    if isinstance(path, unicode):
+        return path.encode(sys.getfilesystemencoding())
+    return path
+
 _INSTALL_SCHEMES = {
     'posix_prefix': {
         'stdlib': '{base}/lib/python{py_version_short}',
@@ -116,6 +121,7 @@
 
 def _safe_realpath(path):
     try:
+        path = fileSystemEncode(path)
         return realpath(path)
     except OSError:
         return path
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -524,7 +524,6 @@
         self.check_same_msg(Exception(), '')
 
 
-    @unittest.skipIf(is_jython, "FIXME: not working in Jython")
     def test_0_args_with_overridden___str__(self):
         """Check same msg for exceptions with 0 args and overridden __str__"""
         # str() and unicode() on an exception with overridden __str__ that
@@ -550,7 +549,6 @@
         self.assertRaises(UnicodeEncodeError, str, e)
         self.assertEqual(unicode(e), u'f\xf6\xf6')
 
-    @unittest.skipIf(is_jython, "FIXME: not working in Jython")
     def test_1_arg_with_overridden___str__(self):
         """Check same msg for exceptions with overridden __str__ and 1 arg"""
         # when __str__ is overridden and __unicode__ is not implemented
@@ -575,7 +573,6 @@
         for args in argslist:
             self.check_same_msg(Exception(*args), repr(args))
 
-    @unittest.skipIf(is_jython, "FIXME: not working in Jython")
     def test_many_args_with_overridden___str__(self):
         """Check same msg for exceptions with overridden __str__ and many args"""
         # if __str__ returns an ascii string / ascii unicode string
diff --git a/Lib/test/test_exceptions_jy.py b/Lib/test/test_exceptions_jy.py
--- a/Lib/test/test_exceptions_jy.py
+++ b/Lib/test/test_exceptions_jy.py
@@ -70,11 +70,12 @@
         # But the exception hook, via Py#displayException, does not fail when attempting to __str__ the exception args
         with test_support.captured_stderr() as s:
             sys.excepthook(RuntimeError, u"Drink \u2615", None)
-        self.assertEqual(s.getvalue(), "RuntimeError\n")  
+        # At minimum, it tells us what kind of exception it was
+        self.assertEqual(s.getvalue()[:12], "RuntimeError")
         # It is fine with ascii values, of course
         with test_support.captured_stderr() as s:
             sys.excepthook(RuntimeError, u"Drink java", None)
-        self.assertEqual(s.getvalue(), "RuntimeError: Drink java\n")  
+        self.assertEqual(s.getvalue(), "RuntimeError: Drink java\n")
 
 
 def test_main():
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -378,6 +378,9 @@
 
 @unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0,
         "This test can't be run reliably as root (issue #13308).")
+ at unittest.skipIf((not hasattr(os, 'symlink')) and
+        sys.executable.encode('ascii', 'replace') != sys.executable,
+        "Executable path is not pure ASCII.") # these fail for CPython too
 class CGIHTTPServerTestCase(BaseTestCase):
     class request_handler(NoLogRequestHandler, CGIHTTPRequestHandler):
         pass
diff --git a/Lib/test/test_java_visibility.py b/Lib/test/test_java_visibility.py
--- a/Lib/test/test_java_visibility.py
+++ b/Lib/test/test_java_visibility.py
@@ -13,6 +13,7 @@
 from org.python.tests.multihidden import BaseConnection
 
 class VisibilityTest(unittest.TestCase):
+
     def test_invisible(self):
         for item in dir(Invisible):
             self.assert_(not item.startswith("package"))
@@ -178,6 +179,7 @@
 
 
 class JavaClassTest(unittest.TestCase):
+
     def test_class_methods_visible(self):
         self.assertFalse(HashMap.isInterface(),
                 'java.lang.Class methods should be visible on Class instances')
@@ -198,6 +200,7 @@
         self.assertEquals(3, s.b, "Defined fields should take precedence")
 
 class CoercionTest(unittest.TestCase):
+
     def test_int_coercion(self):
         c = Coercions()
         self.assertEquals("5", c.takeInt(5))
@@ -234,6 +237,7 @@
         self.assertEquals(c.tellClassNameObject(ht), "class java.util.Hashtable")
 
 class RespectJavaAccessibilityTest(unittest.TestCase):
+
     def run_accessibility_script(self, script, error=AttributeError):
         fn = test_support.findfile(script)
         self.assertRaises(error, execfile, fn)
@@ -254,6 +258,7 @@
         self.run_accessibility_script("call_overridden_method.py")
 
 class ClassloaderTest(unittest.TestCase):
+
     def test_loading_classes_without_import(self):
         cl = test_support.make_jar_classloader("../callbacker_test.jar")
         X = cl.loadClass("org.python.tests.Callbacker")
@@ -265,11 +270,13 @@
         self.assertEquals(None, called[0])
 
 def test_main():
-    test_support.run_unittest(VisibilityTest,
+    test_support.run_unittest(
+            VisibilityTest,
             JavaClassTest,
             CoercionTest,
             RespectJavaAccessibilityTest,
-            ClassloaderTest)
+            ClassloaderTest
+        )
 
 if __name__ == "__main__":
     test_main()
diff --git a/Lib/test/test_jser.py b/Lib/test/test_jser.py
--- a/Lib/test/test_jser.py
+++ b/Lib/test/test_jser.py
@@ -15,7 +15,9 @@
 class JavaSerializationTests(unittest.TestCase):
 
     def setUp(self):
-        self.sername = os.path.join(sys.prefix, "test.ser")
+        name = os.path.join(sys.prefix, "test.ser")
+        # As we are using java.io directly, ensure file name is a unicode
+        self.sername = name.decode(sys.getfilesystemencoding())
 
     def tearDown(self):
         os.remove(self.sername)
diff --git a/Lib/test/test_jython_launcher.py b/Lib/test/test_jython_launcher.py
--- a/Lib/test/test_jython_launcher.py
+++ b/Lib/test/test_jython_launcher.py
@@ -31,7 +31,6 @@
         # by the installer
         return executable
 
-
 def get_uname():
     _uname = None
     try:
@@ -49,9 +48,8 @@
 
 
 class TestLauncher(unittest.TestCase):
-    
+
     def get_cmdline(self, cmd, env):
-
         output = subprocess.check_output(cmd, env=env).rstrip()
         if is_windows:
             return subprocess._cmdline2list(output)
@@ -76,7 +74,7 @@
                 k, v = arg[2:].split("=")
                 props[k] = v
         return props
-            
+
     def test_classpath_env(self):
         env = self.get_newenv()
         env["CLASSPATH"] = some_jar
@@ -207,7 +205,7 @@
 
     def test_file(self):
         self.assertCommand(['test.py'])
-    
+
     def test_dash(self):
         self.assertCommand(['-i'])
 
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -27,7 +27,13 @@
 HOST = support.HOST
 
 def data_file(*name):
-    return os.path.join(os.path.dirname(__file__), *name)
+    file = os.path.join(os.path.dirname(__file__), *name)
+    # Ensure we return unicode path. This tweak is not a divergence:
+    # CPython 2.7.13 fails the same way for a non-ascii location.
+    if isinstance(file, unicode):
+        return file
+    else:
+        return file.decode(sys.getfilesystemencoding())
 
 # The custom key and certificate files used in test_ssl are generated
 # using Lib/test/make_ssl_certs.py.
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -509,7 +509,7 @@
 if is_jython:
     # Jython disallows @ in module names
     TESTFN = '$test'
-    TESTFN_UNICODE = "$test-\xe0\xf2"
+    TESTFN_UNICODE = u"$test-\u87d2\u86c7" # = test python (Chinese)
     TESTFN_ENCODING = sys.getfilesystemencoding()
 elif os.name == 'riscos':
     TESTFN = 'testfile'
diff --git a/Lib/test/test_zipimport_jy.py b/Lib/test/test_zipimport_jy.py
--- a/Lib/test/test_zipimport_jy.py
+++ b/Lib/test/test_zipimport_jy.py
@@ -51,8 +51,10 @@
         A(path).somevar = 1
 
 def test_main():
-    test_support.run_unittest(SyspathZipimportTest)
-    test_support.run_unittest(ZipImporterDictTest)
+    test_support.run_unittest(
+        SyspathZipimportTest,
+        ZipImporterDictTest
+    )
 
 if __name__ == "__main__":
     test_main()
diff --git a/build.xml b/build.xml
--- a/build.xml
+++ b/build.xml
@@ -236,6 +236,7 @@
         <echo>output.dir         = '${output.dir}'</echo>
         <echo>compile.dir        = '${compile.dir}'</echo>
         <echo>exposed.dir        = '${exposed.dir}'</echo>
+        <echo>gensrc.dir         = '${gensrc.dir}'</echo>
         <echo>dist.dir           = '${dist.dir}'</echo>
         <echo>apidoc.dir         = '${apidoc.dir}'</echo>
         <echo>templates.dir      = '${templates.dir}'</echo>
@@ -434,6 +435,7 @@
     <target name="antlr_gen" depends="prepare-output" unless="antlr.notneeded">
         <java classname="org.antlr.Tool" failonerror="false" fork="true" dir="${jython.base.dir}">
             <jvmarg value="-Xmx512m"/>
+            <jvmarg value="-Dfile.encoding=UTF-8"/>
             <arg value="-Xconversiontimeout"/>
             <arg value="2000"/>
             <arg value="-fo"/>
@@ -694,6 +696,7 @@
         <javadoc sourcepath="${source.dir}"
                  destdir="${apidoc.dir}"
                  source="${jdk.source.version}"
+                 encoding="UTF-8"
                  maxmemory="1024m"
                  public="true"
                  breakiterator="yes"
diff --git a/src/org/python/core/Py.java b/src/org/python/core/Py.java
--- a/src/org/python/core/Py.java
+++ b/src/org/python/core/Py.java
@@ -2,6 +2,7 @@
 package org.python.core;
 
 import java.io.ByteArrayOutputStream;
+import java.io.CharArrayWriter;
 import java.io.File;
 import java.io.FileDescriptor;
 import java.io.FileNotFoundException;
@@ -10,7 +11,7 @@
 import java.io.InputStream;
 import java.io.ObjectStreamException;
 import java.io.OutputStream;
-import java.io.PrintStream;
+import java.io.PrintWriter;
 import java.io.Serializable;
 import java.io.StreamCorruptedException;
 import java.lang.reflect.InvocationTargetException;
@@ -25,7 +26,14 @@
 import java.util.List;
 import java.util.Set;
 
+import org.python.antlr.base.mod;
+import org.python.core.adapter.ClassicPyObjectAdapter;
+import org.python.core.adapter.ExtensiblePyObjectAdapter;
+import org.python.modules.posix.PosixModule;
+import org.python.util.Generic;
+
 import com.google.common.base.CharMatcher;
+
 import jline.console.UserInterruptException;
 import jnr.constants.Constant;
 import jnr.constants.platform.Errno;
@@ -33,14 +41,6 @@
 import jnr.posix.POSIXFactory;
 import jnr.posix.util.Platform;
 
-import org.python.antlr.base.mod;
-import org.python.core.adapter.ClassicPyObjectAdapter;
-import org.python.core.adapter.ExtensiblePyObjectAdapter;
-import org.python.core.Traverseproc;
-import org.python.core.Visitproc;
-import org.python.modules.posix.PosixModule;
-import org.python.util.Generic;
-
 /** Builtin types that are used to setup PyObject.
  *
  * Resolve circular dependency with some laziness. */
@@ -84,6 +84,7 @@
             throw new StreamCorruptedException("unknown singleton: " + which);
         }
     }
+
     /* Holds the singleton None and Ellipsis objects */
     /** The singleton None Python object **/
     public final static PyObject None = new PyNone();
@@ -129,7 +130,6 @@
     public final static long TPFLAGS_IS_ABSTRACT = 1L << 20;
 
 
-
     /** A unique object to indicate no conversion is possible
     in __tojava__ methods **/
     public final static Object NoConversion = new PySingleton("Error");
@@ -222,6 +222,10 @@
         return new PyException(Py.IOError, args);
     }
 
+    public static PyException IOError(Constant errno, String filename) {
+        return IOError(errno, Py.fileSystemEncode(filename));
+    }
+
     public static PyException IOError(Constant errno, PyObject filename) {
         int value = errno.intValue();
         PyObject args = new PyTuple(Py.newInteger(value), PosixModule.strerror(value), filename);
@@ -683,6 +687,103 @@
         }
     }
 
+    /**
+     * Return a file name or path as Unicode (Java UTF-16 <code>String</code>), decoded if necessary
+     * from a Python <code>bytes</code> object, using the file system encoding. In Jython, this
+     * encoding is UTF-8, irrespective of the OS platform. This method is comparable with Python 3
+     * <code>os.fsdecode</code>, but for Java use, in places such as the <code>os</code> module. If
+     * the argument is not a <code>PyUnicode</code>, it will be decoded using the nominal Jython
+     * file system encoding. If the argument <i>is</i> a <code>PyUnicode</code>, its
+     * <code>String</code> is returned.
+     *
+     * @param filename as <code>bytes</code> to decode, or already as <code>unicode</code>
+     * @return unicode version of path
+     */
+    public static String fileSystemDecode(PyString filename) {
+        String s = filename.getString();
+        if (filename instanceof PyUnicode || CharMatcher.ascii().matchesAllOf(s)) {
+            // Already encoded or usable as ASCII
+            return s;
+        } else {
+            // It's bytes, so must decode properly
+            assert "utf-8".equals(PySystemState.FILE_SYSTEM_ENCODING.toString());
+            return codecs.PyUnicode_DecodeUTF8(s, null);
+        }
+    }
+
+    /**
+     * As {@link #fileSystemDecode(PyString)} but raising <code>ValueError</code> if not a
+     * <code>str</code> or <code>unicode</code>.
+     *
+     * @param filename as <code>bytes</code> to decode, or already as <code>unicode</code>
+     * @return unicode version of the file name
+     */
+    public static String fileSystemDecode(PyObject filename) {
+        if (filename instanceof PyString) {
+            return fileSystemDecode((PyString)filename);
+        } else
+            throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
+                    filename.getType().fastGetName()));
+    }
+
+    /**
+     * Return a PyString object we can use as a file name or file path in places where Python
+     * expects a <code>bytes</code> (that is a <code>str</code>) object in the file system encoding.
+     * In Jython, this encoding is UTF-8, irrespective of the OS platform.
+     * <p>
+     * This is subtly different from CPython's use of "file system encoding", which tracks the
+     * platform's choice so that OS services may be called that have a bytes interface. Jython's
+     * interaction with the OS occurs via Java using String arguments representing Unicode values,
+     * so we have no need to match the encoding actually chosen by the platform (e.g. 'mbcs' on
+     * Windows). Rather we need a nominal Jython file system encoding, for use where the standard
+     * library forces byte paths on us (in Python 2). There is no reason for this choice to vary
+     * with OS platform. Methods receiving paths as <code>bytes</code> will
+     * {@link #fileSystemDecode(PyString)} them again for Java.
+     *
+     * @param filename as <code>unicode</code> to encode, or already as <code>bytes</code>
+     * @return encoded bytes version of path
+     */
+    public static PyString fileSystemEncode(String filename) {
+        if (CharMatcher.ascii().matchesAllOf(filename)) {
+            // Just wrap it as US-ASCII is a subset of the file system encoding
+            return Py.newString(filename);
+        } else {
+            // It's non just US-ASCII, so must encode properly
+            assert "utf-8".equals(PySystemState.FILE_SYSTEM_ENCODING.toString());
+            return Py.newString(codecs.PyUnicode_EncodeUTF8(filename, null));
+        }
+    }
+
+    /**
+     * Return a PyString object we can use as a file name or file path in places where Python
+     * expects a <code>bytes</code> (that is, <code>str</code>) object in the file system encoding.
+     * In Jython, this encoding is UTF-8, irrespective of the OS platform. This method is comparable
+     * with Python 3 <code>os.fsencode</code>. If the argument is a PyString, it is returned
+     * unchanged. If the argument is a PyUnicode, it is converted to a <code>bytes</code> using the
+     * nominal Jython file system encoding.
+     *
+     * @param filename as <code>unicode</code> to encode, or already as <code>bytes</code>
+     * @return encoded bytes version of path
+     */
+    public static PyString fileSystemEncode(PyString filename) {
+        return (filename instanceof PyUnicode) ? fileSystemEncode(filename.getString()) : filename;
+    }
+
+    /**
+     * Convert a <code>PyList</code> path to a list of Java <code>String</code> objects decoded from
+     * the path elements to strings guaranteed usable in the Java API.
+     *
+     * @param path a Python search path
+     * @return equivalent Java list
+     */
+    private static List<String> fileSystemDecode(PyList path) {
+        List<String> list = new ArrayList<>(path.__len__());
+        for (PyObject filename : path.getList()) {
+            list.add(fileSystemDecode(filename));
+        }
+        return list;
+    }
+
     public static PyStringMap newStringMap() {
         // enable lazy bootstrapping (see issue #1671)
         if (!PyType.hasBuilder(PyStringMap.class)) {
@@ -1073,11 +1174,11 @@
         }
         Py.getSystemState().callExitFunc();
     }
-    //XXX: this needs review to make sure we are cutting out all of the Java
-    //     exceptions.
+
+    //XXX: this needs review to make sure we are cutting out all of the Java exceptions.
     private static String getStackTrace(Throwable javaError) {
-        ByteArrayOutputStream buf = new ByteArrayOutputStream();
-        javaError.printStackTrace(new PrintStream(buf));
+        CharArrayWriter buf = new CharArrayWriter();
+        javaError.printStackTrace(new PrintWriter(buf));
 
         String str = buf.toString();
         int index = -1;
@@ -1170,31 +1271,55 @@
         ts.exception = null;
     }
 
-    public static void displayException(PyObject type, PyObject value, PyObject tb,
-                                        PyObject file) {
+    /**
+     * Print the description of an exception as a big string. The arguments are closely equivalent
+     * to the tuple returned by Python <code>sys.exc_info</code>, on standard error or a given
+     * byte-oriented file. Compare with Python <code>traceback.print_exception</code>.
+     *
+     * @param type of exception
+     * @param value the exception parameter (second argument to <code>raise</code>)
+     * @param tb traceback of the call stack where the exception originally occurred
+     * @param file to print encoded string to, or null meaning standard error
+     */
+    public static void displayException(PyObject type, PyObject value, PyObject tb, PyObject file) {
+
+        // Output is to standard error, unless a file object has been given.
         StdoutWrapper stderr = Py.stderr;
         if (file != null) {
             stderr = new FixedFileWrapper(file);
         }
         flushLine();
 
+        // The creation of the report operates entirely in Java String (to support Unicode).
+        String formattedException = exceptionToString(type, value, tb);
+        stderr.print(formattedException);
+    }
+
+    /**
+     * Format the description of an exception as a big string. The arguments are closely equivalent
+     * to the tuple returned by Python <code>sys.exc_info</code>. Compare with Python
+     * <code>traceback.format_exception</code>.
+     *
+     * @param type of exception
+     * @param value the exception parameter (second argument to <code>raise</code>)
+     * @param tb traceback of the call stack where the exception originally occurred
+     * @return string representation of the traceback and exception
+     */
+    static String exceptionToString(PyObject type, PyObject value, PyObject tb) {
+
+        // Compose the stack dump, syntax error, and actual exception in this buffer:
+        StringBuilder buf;
+
         if (tb instanceof PyTraceback) {
-            stderr.print(((PyTraceback) tb).dumpStack());
+            buf = new StringBuilder(((PyTraceback)tb).dumpStack());
+        } else {
+            buf = new StringBuilder();
         }
+
         if (__builtin__.isinstance(value, Py.SyntaxError)) {
-            PyObject filename = value.__findattr__("filename");
-            PyObject text = value.__findattr__("text");
-            PyObject lineno = value.__findattr__("lineno");
-            stderr.print("  File \"");
-            stderr.print(filename == Py.None || filename == null ?
-                         "<string>" : filename.toString());
-            stderr.print("\", line ");
-            stderr.print(lineno == null ? Py.newString("0") : lineno);
-            stderr.print("\n");
-            if (text != Py.None && text != null && text.__len__() != 0) {
-                printSyntaxErrorText(stderr, value.__findattr__("offset").asInt(),
-                                     text.toString());
-            }
+            // The value part of the exception is a syntax error: first emit that.
+            appendSyntaxError(buf, value);
+            // Now supersede it with just the syntax error message for the next phase.
             value = value.__findattr__("msg");
             if (value == null) {
                 value = Py.None;
@@ -1203,26 +1328,53 @@
 
         if (value.getJavaProxy() != null) {
             Object javaError = value.__tojava__(Throwable.class);
-
             if (javaError != null && javaError != Py.NoConversion) {
-                stderr.println(getStackTrace((Throwable) javaError));
+                // The value is some Java Throwable: append that too
+                buf.append(getStackTrace((Throwable)javaError));
             }
         }
+
+        // Be prepared for formatting the value part to fail (fall back to just the type)
         try {
-            stderr.println(formatException(type, value));
+            buf.append(formatException(type, value));
         } catch (Exception ex) {
-            stderr.println(formatException(type, Py.None));
+            buf.append(formatException(type, Py.None));
+        }
+        buf.append('\n');
+
+        return buf.toString();
+    }
+
+    /**
+     * Helper to {@link #tracebackToString(PyObject, PyObject)} when the value in an exception turns
+     * out to be a syntax error.
+     */
+    private static void appendSyntaxError(StringBuilder buf, PyObject value) {
+
+        PyObject filename = value.__findattr__("filename");
+        PyObject text = value.__findattr__("text");
+        PyObject lineno = value.__findattr__("lineno");
+
+        buf.append("  File \"");
+        buf.append(filename == Py.None || filename == null ? "<string>" : filename.toString());
+        buf.append("\", line ");
+        buf.append(lineno == null ? Py.newString('0') : lineno);
+        buf.append('\n');
+
+        if (text != Py.None && text != null && text.__len__() != 0) {
+            appendSyntaxErrorText(buf, value.__findattr__("offset").asInt(), text.toString());
         }
     }
 
+
     /**
-     * Print the two lines showing where a SyntaxError was caused.
+     * Generate two lines showing where a SyntaxError was caused.
      *
-     * @param out StdoutWrapper to print to
+     * @param buf to append with generated message text
      * @param offset the offset into text
-     * @param text a source code String line
+     * @param text a source code line
      */
-    private static void printSyntaxErrorText(StdoutWrapper out, int offset, String text) {
+    private static void appendSyntaxErrorText(StringBuilder buf, int offset, String text) {
         if (offset >= 0) {
             if (offset > 0 && offset == text.length()) {
                 offset--;
@@ -1250,19 +1402,21 @@
             text = text.substring(i, text.length());
         }
 
-        out.print("    ");
-        out.print(text);
+        buf.append("    ");
+        buf.append(text);
         if (text.length() == 0 || !text.endsWith("\n")) {
-            out.print("\n");
+            buf.append('\n');
         }
         if (offset == -1) {
             return;
         }
-        out.print("    ");
+
+        // The indicator line "        ^"
+        buf.append("    ");
         for (offset--; offset > 0; offset--) {
-            out.print(" ");
+            buf.append(' ');
         }
-        out.print("^\n");
+        buf.append("^\n");
     }
 
     public static String formatException(PyObject type, PyObject value) {
@@ -1290,19 +1444,34 @@
             }
             buf.append(className);
         } else {
-            buf.append(useRepr ? type.__repr__() : type.__str__());
+            // Never happens since Python 2.7? Do something sensible anyway.
+            buf.append(asMessageString(type, useRepr));
         }
+
         if (value != null && value != Py.None) {
-            // only print colon if the str() of the object is not the empty string
-            PyObject s = useRepr ? value.__repr__() : value.__str__();
-            if (!(s instanceof PyString) || s.__len__() != 0) {
-                buf.append(": ");
+            String s = asMessageString(value, useRepr);
+            // Print colon and object (unless it renders as "")
+            if (s.length() > 0) {
+                buf.append(": ").append(s);
             }
-            buf.append(s);
         }
+
         return buf.toString();
     }
 
+    /** Defensive method to avoid exceptions from decoding (or import encodings) */
+    private static String asMessageString(PyObject value, boolean useRepr) {
+        if (useRepr)
+            value = value.__repr__();
+        if (value instanceof PyUnicode) {
+            return value.asString();
+        } else {
+            // Carefully avoid decoding errors that would swallow the intended message
+            String s = value.__str__().getString();
+            return PyString.encode_UnicodeEscape(s, false);
+        }
+    }
+
     public static void writeUnraisable(Throwable unraisable, PyObject obj) {
         PyException pye = JavaError(unraisable);
         stderr.println(String.format("Exception %s in %s ignored",
@@ -1565,6 +1734,16 @@
         }
     }
 
+    private static final String IMPORT_SITE_ERROR = ""
+            + "Cannot import site module and its dependencies: %s\n"
+            + "Determine if the following attributes are correct:\n" //
+            + "  * sys.path: %s\n"
+            + "    This attribute might be including the wrong directories, such as from CPython\n"
+            + "  * sys.prefix: %s\n"
+            + "    This attribute is set by the system property python.home, although it can\n"
+            + "    be often automatically determined by the location of the Jython jar file\n\n"
+            + "You can use the -S option or python.import.site=false to not import the site module";
+
     public static boolean importSiteIfSelected() {
         if (Options.importSite) {
             try {
@@ -1574,18 +1753,10 @@
             } catch (PyException pye) {
                 if (pye.match(Py.ImportError)) {
                     PySystemState sys = Py.getSystemState();
-                    throw Py.ImportError(String.format(""
-                                    + "Cannot import site module and its dependencies: %s\n"
-                                    + "Determine if the following attributes are correct:\n"
-                                    + "  * sys.path: %s\n"
-                                    + "    This attribute might be including the wrong directories, such as from CPython\n"
-                                    + "  * sys.prefix: %s\n"
-                                    + "    This attribute is set by the system property python.home, although it can\n"
-                                    + "    be often automatically determined by the location of the Jython jar file\n\n"
-                                    + "You can use the -S option or python.import.site=false to not import the site module",
-                            pye.value.__getattr__("args").__getitem__(0),
-                            sys.path,
-                            sys.prefix));
+                    String value = pye.value.__getattr__("args").__getitem__(0).toString();
+                    List<String> path = fileSystemDecode(sys.path);
+                    throw Py.ImportError(
+                            String.format(IMPORT_SITE_ERROR, value, path, PySystemState.prefix));
                 } else {
                     throw pye;
                 }
@@ -2266,7 +2437,7 @@
         }
         /* Here we would actually like to call cls.__findattr__("__metaclass__")
          * rather than cls.getType(). However there are circumstances where the
-         * metaclass doesn't show up as __metaclass__. On the other hand we need 
+         * metaclass doesn't show up as __metaclass__. On the other hand we need
          * to avoid that checker refers to builtin type___subclasscheck__ or
          * type___instancecheck__. Filtering out checker-instances of
          * PyBuiltinMethodNarrow does the trick. We also filter out PyMethodDescr
diff --git a/src/org/python/core/PyBaseException.java b/src/org/python/core/PyBaseException.java
--- a/src/org/python/core/PyBaseException.java
+++ b/src/org/python/core/PyBaseException.java
@@ -169,12 +169,17 @@
     @ExposedMethod(doc = BuiltinDocs.BaseException___str___doc)
     final PyString BaseException___str__() {
         switch (args.__len__()) {
-        case 0:
-            return Py.EmptyString;
-        case 1:
-            return args.__getitem__(0).__str__();
-        default:
-            return args.__str__();
+            case 0:
+                return Py.EmptyString;
+            case 1:
+                PyObject arg = args.__getitem__(0);
+                if (arg instanceof PyString) {
+                    return (PyString)arg;
+                } else {
+                    return arg.__str__();
+                }
+            default:
+                return args.__str__();
         }
     }
 
diff --git a/src/org/python/core/PyBytecode.java b/src/org/python/core/PyBytecode.java
--- a/src/org/python/core/PyBytecode.java
+++ b/src/org/python/core/PyBytecode.java
@@ -116,11 +116,13 @@
         throw Py.AttributeError(name);
     }
 
+    @Override
     public void __setattr__(String name, PyObject value) {
         // no writable attributes
         throwReadonly(name);
     }
 
+    @Override
     public void __delattr__(String name) {
         throwReadonly(name);
     }
@@ -137,6 +139,7 @@
         return new PyTuple(pystr);
     }
 
+    @Override
     public PyObject __findattr_ex__(String name) {
         // have to craft co_varnames specially
         if (name == "co_varnames") {
@@ -149,7 +152,7 @@
             return toPyStringTuple(co_freevars);
         }
         if (name == "co_filename") {
-            return new PyString(co_filename);
+            return Py.fileSystemEncode(co_filename); // bytes object expected by clients
         }
         if (name == "co_name") {
             return new PyString(co_name);
@@ -1156,7 +1159,7 @@
                         "zap" this information, to prevent END_FINALLY from
                         re-raising the exception.  (But non-local gotos
                         should still be resumed.)
-                     */    
+                     */
                         PyObject exit;
                         PyObject u = stack.pop(), v, w;
                         if (u == Py.None) {
@@ -1350,7 +1353,7 @@
             if (why != Why.RETURN) {
                 retval = Py.None;
             }
-        } else { 
+        } else {
             // store the stack in the frame for reentry from the yield;
             f.f_savedlocals = stack.popN(stack.size());
         }
diff --git a/src/org/python/core/PyException.java b/src/org/python/core/PyException.java
--- a/src/org/python/core/PyException.java
+++ b/src/org/python/core/PyException.java
@@ -62,21 +62,31 @@
     }
 
     private boolean printingStackTrace = false;
+    @Override
     public void printStackTrace() {
         Py.printException(this);
     }
 
+    @Override
     public Throwable fillInStackTrace() {
         return Options.includeJavaStackInExceptions ? super.fillInStackTrace() : this;
     }
 
+    @Override
     public synchronized void printStackTrace(PrintStream s) {
         if (printingStackTrace) {
             super.printStackTrace(s);
         } else {
             try {
+                /*
+                 * Ensure that non-ascii characters are made printable. IOne would prefer to emit
+                 * Unicode, but the output stream too often only accepts bytes. (s is not
+                 * necessarily a console, e.g. during a doctest.)
+                 */
+                PyFile err = new PyFile(s);
+                err.setEncoding("ascii", "backslashreplace");
                 printingStackTrace = true;
-                Py.displayException(type, value, traceback, new PyFile(s));
+                Py.displayException(type, value, traceback, err);
             } finally {
                 printingStackTrace = false;
             }
@@ -92,12 +102,9 @@
         }
     }
 
+    @Override
     public synchronized String toString() {
-        ByteArrayOutputStream buf = new ByteArrayOutputStream();
-        if (!printingStackTrace) {
-            printStackTrace(new PrintStream(buf));
-        }
-        return buf.toString();
+        return Py.exceptionToString(type, value, traceback);
     }
 
     /**
@@ -332,10 +339,11 @@
     public static String exceptionClassName(PyObject obj) {
         return obj instanceof PyClass ? ((PyClass)obj).__name__ : ((PyType)obj).fastGetName();
     }
-    
-    
+
+
     /* Traverseproc support */
 
+    @Override
     public int traverse(Visitproc visit, Object arg) {
         int retValue;
         if (type != null) {
@@ -357,6 +365,7 @@
         return 0;
     }
 
+    @Override
     public boolean refersDirectlyTo(PyObject ob) {
     	return ob != null && (type == ob || value == ob || traceback == ob);
     }
diff --git a/src/org/python/core/PyFile.java b/src/org/python/core/PyFile.java
--- a/src/org/python/core/PyFile.java
+++ b/src/org/python/core/PyFile.java
@@ -168,10 +168,6 @@
         ArgParser ap = new ArgParser("file", args, kwds, new String[] {"name", "mode", "buffering"},
                                      1);
         PyObject name = ap.getPyObject(0);
-        if (!(name instanceof PyString)) {
-            throw Py.TypeError("coercing to Unicode: need string, '" + name.getType().fastGetName()
-                               + "' type found");
-        }
         String mode = ap.getString(1, "r");
         int bufsize = ap.getInt(2, -1);
         file___init__(new FileIO((PyString) name, parseMode(mode)), name, mode, bufsize);
diff --git a/src/org/python/core/PyNullImporter.java b/src/org/python/core/PyNullImporter.java
--- a/src/org/python/core/PyNullImporter.java
+++ b/src/org/python/core/PyNullImporter.java
@@ -20,7 +20,7 @@
 
     public PyNullImporter(PyObject pathObj) {
         super();
-        String pathStr = asPath(pathObj);
+        String pathStr = Py.fileSystemDecode(pathObj);
         if (pathStr.equals("")) {
             throw Py.ImportError("empty pathname");
         }
@@ -42,17 +42,6 @@
         return Py.None;
     }
 
-    // FIXME Refactoring move helper function to a central util library
-    // FIXME Also can take in account working in zip file systems
-
-    private static String asPath(PyObject pathObj) {
-        if (!(pathObj instanceof PyString)) {
-            throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
-                    pathObj.getType().fastGetName()));
-        }
-        return pathObj.toString();
-    }
-
     private static boolean isDir(String pathStr) {
         if (pathStr.equals("")) {
             return false;
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -79,7 +79,7 @@
     }
 
     PyString(StringBuilder buffer) {
-        this(TYPE, new String(buffer));
+        this(TYPE, buffer.toString());
     }
 
     /**
@@ -3998,9 +3998,9 @@
      * Implements PEP-3101 {}-formatting methods <code>str.format()</code> and
      * <code>unicode.format()</code>. When called with <code>enclosingIterator == null</code>, this
      * method takes this object as its formatting string. The method is also called (calls itself)
-     * to deal with nested formatting sepecifications. In that case, <code>enclosingIterator</code>
+     * to deal with nested formatting specifications. In that case, <code>enclosingIterator</code>
      * is a {@link MarkupIterator} on this object and <code>value</code> is a substring of this
-     * object needing recursive transaltion.
+     * object needing recursive translation.
      *
      * @param args to be interpolated into the string
      * @param keywords for the trailing args
diff --git a/src/org/python/core/PySystemState.java b/src/org/python/core/PySystemState.java
--- a/src/org/python/core/PySystemState.java
+++ b/src/org/python/core/PySystemState.java
@@ -82,6 +82,9 @@
 
     public final static PyString float_repr_style = Py.newString("short");
 
+    /** Nominal Jython file system encoding (as <code>sys.getfilesystemencoding()</code>) */
+    static final PyString FILE_SYSTEM_ENCODING = Py.newString("utf-8");
+
     public static boolean py3kwarning = false;
 
     public final static Class flags = Options.class;
@@ -109,12 +112,25 @@
     public static PackageManager packageManager;
     private static File cachedir;
 
-    private static PyList defaultPath;
-    private static PyList defaultArgv;
-    private static PyObject defaultExecutable;
+    private static PyList defaultPath; // list of bytes or unicode
+    private static PyList defaultArgv; // list of bytes or unicode
+    private static PyObject defaultExecutable; // bytes or unicode or None
 
     public static Properties registry; // = init_registry();
+    /**
+     * A string giving the site-specific directory prefix where the platform independent Python
+     * files are installed; by default, this is based on the property <code>python.home</code> or
+     * the location of the Jython JAR. The main collection of Python library modules is installed in
+     * the directory <code>prefix/Lib</code>. This object should contain bytes in the file system
+     * encoding for consistency with use in the standard library (see <code>sysconfig.py</code>).
+     */
     public static PyObject prefix;
+    /**
+     * A string giving the site-specific directory prefix where the platform-dependent Python files
+     * are installed; by default, this is the same as {@link #exec_prefix}. This object should
+     * contain bytes in the file system encoding for consistency with use in the standard library
+     * (see <code>sysconfig.py</code>).
+     */
     public static PyObject exec_prefix = Py.EmptyString;
 
     public static final PyString byteorder = new PyString("big");
@@ -504,7 +520,7 @@
     }
 
     public PyObject getfilesystemencoding() {
-        return Py.None;
+        return FILE_SYSTEM_ENCODING;
     }
 
 
@@ -840,10 +856,10 @@
             }
         }
         if (prefix != null) {
-            PySystemState.prefix = Py.newString(prefix);
+            PySystemState.prefix = Py.fileSystemEncode(prefix);
         }
         if (exec_prefix != null) {
-            PySystemState.exec_prefix = Py.newString(exec_prefix);
+            PySystemState.exec_prefix = Py.fileSystemEncode(exec_prefix);
         }
         try {
             String jythonpath = System.getenv("JYTHONPATH");
@@ -1155,7 +1171,8 @@
         }
         cachedir = new File(props.getProperty(PYTHON_CACHEDIR, CACHEDIR_DEFAULT_NAME));
         if (!cachedir.isAbsolute()) {
-            cachedir = new File(prefix == null ? null : prefix.toString(), cachedir.getPath());
+            String prefixString = prefix == null ? null : Py.fileSystemDecode(prefix);
+            cachedir = new File(prefixString, cachedir.getPath());
         }
     }
 
@@ -1174,16 +1191,16 @@
         PyList argv = new PyList();
         if (args != null) {
             for (String arg : args) {
-                argv.append(Py.newStringOrUnicode(arg));
+                argv.append(Py.newStringOrUnicode(arg)); // XXX or always newUnicode?
             }
         }
         return argv;
     }
 
     /**
-     * Determine the default sys.executable value from the registry.
-     * If registry is not set (as in standalone jython jar), will use sys.prefix + /bin/jython(.exe) and the file may
-     * not exist. Users can create a wrapper in it's place to make it work in embedded environments.
+     * Determine the default sys.executable value from the registry. If registry is not set (as in
+     * standalone jython jar), we will use sys.prefix + /bin/jython(.exe) and the file may not
+     * exist. Users can create a wrapper in it's place to make it work in embedded environments.
      * Only if sys.prefix is null, returns Py.None
      *
      * @param props a Properties registry
@@ -1191,26 +1208,26 @@
      */
     private static PyObject initExecutable(Properties props) {
         String executable = props.getProperty("python.executable");
-        if (executable == null) {
+        File executableFile;
+        if (executable != null) {
+            // The executable from the registry is a Unicode String path
+            executableFile = new File(executable);
+        } else {
             if (prefix == null) {
                 return Py.None;
             } else {
-                executable = prefix.asString() + File.separator + "bin" + File.separator;
-                if (Platform.IS_WINDOWS) {
-                    executable += "jython.exe";
-                } else {
-                    executable += "jython";
-                }
+                // The prefix is a unicode or encoded bytes object
+                executableFile = new File(Py.fileSystemDecode(prefix),
+                        Platform.IS_WINDOWS ? "bin\\jython.exe" : "bin/jython");
             }
         }
 
-        File executableFile = new File(executable);
         try {
             executableFile = executableFile.getCanonicalFile();
         } catch (IOException ioe) {
             executableFile = executableFile.getAbsoluteFile();
         }
-        return new PyString(executableFile.getPath());
+        return Py.newStringOrUnicode(executableFile.getPath()); // XXX always bytes in CPython
     }
 
     /**
@@ -1353,8 +1370,8 @@
         PyList path = new PyList();
         addPaths(path, props.getProperty("python.path", ""));
         if (prefix != null) {
-            String libpath = new File(prefix.toString(), "Lib").toString();
-            path.append(new PyString(libpath));
+            String libpath = new File(Py.fileSystemDecode(prefix), "Lib").toString();
+            path.append(Py.fileSystemEncode(libpath)); // XXX or newUnicode?
         }
         if (standalone) {
             // standalone jython: add the /Lib directory inside JYTHON_JAR to the path
@@ -1397,7 +1414,8 @@
     private static void addPaths(PyList path, String pypath) {
         StringTokenizer tok = new StringTokenizer(pypath, java.io.File.pathSeparator);
         while (tok.hasMoreTokens()) {
-            path.append(new PyString(tok.nextToken().trim()));
+            // Use unicode object if necessary to represent the element
+            path.append(Py.newStringOrUnicode(tok.nextToken().trim())); // XXX or newUnicode?
         }
     }
 
@@ -1540,6 +1558,7 @@
         closer.cleanup();
     }
 
+    @Override
     public void close() { cleanup(); }
 
     public static class PySystemStateCloser {
diff --git a/src/org/python/core/PyTableCode.java b/src/org/python/core/PyTableCode.java
--- a/src/org/python/core/PyTableCode.java
+++ b/src/org/python/core/PyTableCode.java
@@ -66,6 +66,7 @@
         // co_lnotab, co_stacksize
     };
 
+    @Override
     public PyObject __dir__() {
         PyString members[] = new PyString[__members__.length];
         for (int i = 0; i < __members__.length; i++)
@@ -80,11 +81,13 @@
         throw Py.AttributeError(name);
     }
 
+    @Override
     public void __setattr__(String name, PyObject value) {
         // no writable attributes
         throwReadonly(name);
     }
 
+    @Override
     public void __delattr__(String name) {
         throwReadonly(name);
     }
@@ -99,6 +102,7 @@
         return new PyTuple(pystr);
     }
 
+    @Override
     public PyObject __findattr_ex__(String name) {
         // have to craft co_varnames specially
         if (name == "co_varnames") {
@@ -111,7 +115,7 @@
             return toPyStringTuple(co_freevars);
         }
         if (name == "co_filename") {
-            return new PyString(co_filename);
+            return Py.fileSystemEncode(co_filename); // bytes object expected by clients
         }
         if (name == "co_name") {
             return new PyString(co_name);
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -89,7 +89,7 @@
     }
 
     PyUnicode(StringBuilder buffer) {
-        this(TYPE, new String(buffer));
+        this(TYPE, buffer.toString());
     }
 
     private static StringBuilder fromCodePoints(Iterator<Integer> iter) {
@@ -713,7 +713,7 @@
         for (Iterator<Integer> iter = newSubsequenceIterator(start, stop, step); iter.hasNext();) {
             buffer.appendCodePoint(iter.next());
         }
-        return createInstance(new String(buffer));
+        return createInstance(buffer.toString());
     }
 
     @ExposedMethod(type = MethodType.CMP, doc = BuiltinDocs.unicode___getslice___doc)
diff --git a/src/org/python/core/SyspathArchive.java b/src/org/python/core/SyspathArchive.java
--- a/src/org/python/core/SyspathArchive.java
+++ b/src/org/python/core/SyspathArchive.java
@@ -4,7 +4,7 @@
 import java.util.zip.*;
 
 @Untraversable
-public class SyspathArchive extends PyString {
+public class SyspathArchive extends PyUnicode {
     private ZipFile zipFile;
 
     public SyspathArchive(String archiveName) throws IOException {
diff --git a/src/org/python/core/SyspathJavaLoader.java b/src/org/python/core/SyspathJavaLoader.java
--- a/src/org/python/core/SyspathJavaLoader.java
+++ b/src/org/python/core/SyspathJavaLoader.java
@@ -26,20 +26,20 @@
     public SyspathJavaLoader(ClassLoader parent) {
     	super(parent);
     }
-    
 
-    /** 
+
+    /**
      * Returns a byte[] with the contents read from an InputStream.
-     * 
+     *
      * The stream is closed after reading the bytes.
-     *  
-     * @param input The input stream 
+     *
+     * @param input The input stream
      * @param size The number of bytes to read
-     *   
+     *
      * @return an array of byte[size] with the contents read
      * */
     private byte[] getBytesFromInputStream(InputStream input, int size) {
-    	try { 
+    	try {
 	    	byte[] buffer = new byte[size];
 	        int nread = 0;
 	        while(nread < size) {
@@ -56,9 +56,9 @@
             }
     	}
     }
-     
+
     private byte[] getBytesFromDir(String dir, String name) {
-    	try { 
+    	try {
     		File file = getFile(dir, name);
 	        if (file == null) {
 	            return null;
@@ -71,7 +71,7 @@
         }
 
     }
-    
+
     private byte[] getBytesFromArchive(SyspathArchive archive, String name) {
         String entryname = name.replace('.', SLASH_CHAR) + ".class";
         ZipEntry ze = archive.getEntry(entryname);
@@ -79,7 +79,7 @@
             return null;
         }
         try {
-			return getBytesFromInputStream(archive.getInputStream(ze), 
+			return getBytesFromInputStream(archive.getInputStream(ze),
 					                       (int)ze.getSize());
 		} catch (IOException e) {
 			return null;
@@ -98,11 +98,11 @@
         }
         return pkg;
     }
-    
+
     @Override
     protected Class<?> findClass(String name) throws ClassNotFoundException {
     	PySystemState sys = Py.getSystemState();
-    	ClassLoader sysClassLoader = sys.getClassLoader(); 
+    	ClassLoader sysClassLoader = sys.getClassLoader();
     	if (sysClassLoader != null) {
     		// sys.classLoader overrides this class loader!
     		return sysClassLoader.loadClass(name);
@@ -114,13 +114,10 @@
             PyObject entry = replacePathItem(sys, i, path);
             if (entry instanceof SyspathArchive) {
                 SyspathArchive archive = (SyspathArchive)entry;
-                buffer = getBytesFromArchive(archive, name);                
+                buffer = getBytesFromArchive(archive, name);
             } else {
-                if (!(entry instanceof PyUnicode)) {
-                    entry = entry.__str__();
-                }
-                String dir = entry.toString();
-            	buffer = getBytesFromDir(dir, name);
+                String dir = Py.fileSystemDecode(entry);
+                buffer = getBytesFromDir(dir, name);
             }
             if (buffer != null) {
             	definePackageForClass(name);
@@ -130,7 +127,7 @@
         // couldn't find the .class file on sys.path
         throw new ClassNotFoundException(name);
     }
-       
+
     @Override
     protected URL findResource(String res) {
     	PySystemState sys = Py.getSystemState();
@@ -157,10 +154,7 @@
                 }
                 continue;
             }
-            if (!(entry instanceof PyUnicode)) {
-                entry = entry.__str__();
-            }
-            String dir = sys.getPath(entry.toString());
+            String dir = sys.getPath(Py.fileSystemDecode(entry));
             try {
 				File resource = new File(dir, res);
 				if (!resource.exists()) {
@@ -179,7 +173,7 @@
         throws IOException
     {
         List<URL> resources = new ArrayList<URL>();
-        
+
         PySystemState sys = Py.getSystemState();
 
         res = deslashResource(res);
@@ -204,10 +198,7 @@
                 }
                 continue;
             }
-            if (!(entry instanceof PyUnicode)) {
-                entry = entry.__str__();
-            }
-            String dir = sys.getPath(entry.toString());
+            String dir = sys.getPath(Py.fileSystemDecode(entry));
             try {
                 File resource = new File(dir, res);
                 if (!resource.exists()) {
@@ -220,7 +211,7 @@
         }
         return Collections.enumeration(resources);
     }
-    
+
     static PyObject replacePathItem(PySystemState sys, int idx, PyList paths) {
         PyObject path = paths.__getitem__(idx);
         if (path instanceof SyspathArchive) {
@@ -229,9 +220,9 @@
         }
 
         try {
-            // this has the side affect of adding the jar to the PackageManager during the
+            // this has the side effect of adding the jar to the PackageManager during the
             // initialization of the SyspathArchive
-            path = new SyspathArchive(sys.getPath(path.toString()));
+            path = new SyspathArchive(sys.getPath(Py.fileSystemDecode(path)));
         } catch (Exception e) {
             return path;
         }
diff --git a/src/org/python/core/__builtin__.java b/src/org/python/core/__builtin__.java
--- a/src/org/python/core/__builtin__.java
+++ b/src/org/python/core/__builtin__.java
@@ -85,7 +85,7 @@
             case 18:
                 return __builtin__.eval(arg1);
             case 19:
-                __builtin__.execfile(arg1.asString());
+                __builtin__.execfile(Py.fileSystemDecode(arg1));
                 return Py.None;
             case 23:
                 return __builtin__.hex(arg1);
@@ -141,7 +141,7 @@
             case 18:
                 return __builtin__.eval(arg1, arg2);
             case 19:
-                __builtin__.execfile(arg1.asString(), arg2);
+                __builtin__.execfile(Py.fileSystemDecode(arg1), arg2);
                 return Py.None;
             case 20:
                 return __builtin__.filter(arg1, arg2);
@@ -191,7 +191,7 @@
             case 18:
                 return __builtin__.eval(arg1, arg2, arg3);
             case 19:
-                __builtin__.execfile(arg1.asString(), arg2, arg3);
+                __builtin__.execfile(Py.fileSystemDecode(arg1), arg2, arg3);
                 return Py.None;
             case 21:
                 return __builtin__.getattr(arg1, arg2, arg3);
@@ -1629,7 +1629,7 @@
                                                    "dont_inherit"},
                                      3);
         PyObject source = ap.getPyObject(0);
-        String filename = ap.getString(1);
+        String filename = Py.fileSystemDecode(ap.getPyObject(1));
         String mode = ap.getString(2);
         int flags = ap.getInt(3, 0);
         boolean dont_inherit = ap.getPyObject(4, Py.False).__nonzero__();
diff --git a/src/org/python/core/imp.java b/src/org/python/core/imp.java
--- a/src/org/python/core/imp.java
+++ b/src/org/python/core/imp.java
@@ -294,6 +294,7 @@
         return compileSource(name, makeStream(file), sourceFilename, mtime);
     }
 
+    /** Remove the last three characters of a file name and add the compiled suffix "$py.class". */
     public static String makeCompiledFilename(String filename) {
         return filename.substring(0, filename.length() - 3) + "$py.class";
     }
@@ -418,7 +419,8 @@
         }
 
         if (moduleLocation != null) {
-            module.__setattr__("__file__", new PyString(moduleLocation));
+            // Standard library expects __file__ to be encoded bytes
+            module.__setattr__("__file__", Py.fileSystemEncode(moduleLocation));
         } else if (module.__findattr__("__file__") == null) {
             // Should probably never happen (but maybe with an odd custom builtins, or
             // Java Integration)
@@ -543,10 +545,8 @@
                     return loadFromLoader(loader, moduleName);
                 }
             }
-            if (!(p instanceof PyUnicode)) {
-                p = p.__str__();
-            }
-            ret = loadFromSource(sys, name, moduleName, p.toString());
+            // p could be unicode or bytes (in the file system encoding)
+            ret = loadFromSource(sys, name, moduleName, Py.fileSystemDecode(p));
             if (ret != null) {
                 return ret;
             }
@@ -606,7 +606,7 @@
         // display names are for identification purposes (e.g. __file__): when entry is
         // null it forces java.io.File to be a relative path (e.g. foo/bar.py instead of
         // /tmp/foo/bar.py)
-        String displayDirName = entry.equals("") ? null : entry.toString();
+        String displayDirName = entry.equals("") ? null : entry;
         String displaySourceName = new File(new File(displayDirName, name), sourceName).getPath();
         String displayCompiledName =
                 new File(new File(displayDirName, name), compiledName).getPath();
@@ -640,7 +640,7 @@
             compiledFile = new File(dirName, compiledName);
         } else {
             PyModule m = addModule(modName);
-            PyObject filename = new PyString(new File(displayDirName, name).getPath());
+            PyObject filename = Py.newStringOrUnicode(new File(displayDirName, name).getPath());
             m.__dict__.__setitem__("__path__", new PyList(new PyObject[] {filename}));
         }
 
@@ -928,9 +928,6 @@
                 }
             }
         }
-        if (name.indexOf(File.separatorChar) != -1) {
-            throw Py.ImportError("Import by filename is not supported.");
-        }
         PyObject modules = Py.getSystemState().modules;
         PyObject pkgMod = null;
         String pkgName = null;
@@ -974,6 +971,13 @@
         return mod;
     }
 
+    /** Defend against attempt to import by filename (withdrawn feature). */
+    private static void checkNotFile(String name){
+        if (name.indexOf(File.separatorChar) != -1) {
+            throw Py.ImportError("Import by filename is not supported.");
+        }
+    }
+
     private static void ensureFromList(PyObject mod, PyObject fromlist, String name) {
         ensureFromList(mod, fromlist, name, false);
     }
@@ -1016,6 +1020,7 @@
      * @return an imported module (Java or Python)
      */
     public static PyObject importName(String name, boolean top) {
+        checkNotFile(name);
         PyUnicode.checkEncoding(name);
         ReentrantLock importLock = Py.getSystemState().getImportLock();
         importLock.lock();
@@ -1036,6 +1041,7 @@
      */
     public static PyObject importName(String name, boolean top, PyObject modDict,
             PyObject fromlist, int level) {
+        checkNotFile(name);
         PyUnicode.checkEncoding(name);
         ReentrantLock importLock = Py.getSystemState().getImportLock();
         importLock.lock();
diff --git a/src/org/python/core/io/FileIO.java b/src/org/python/core/io/FileIO.java
--- a/src/org/python/core/io/FileIO.java
+++ b/src/org/python/core/io/FileIO.java
@@ -67,7 +67,7 @@
      * @see #FileIO(PyString name, String mode)
      */
     public FileIO(String name, String mode) {
-        this(Py.newString(name), mode);
+        this(Py.newUnicode(name), mode);
     }
 
     /**
@@ -82,7 +82,7 @@
      */
     public FileIO(PyString name, String mode) {
         parseMode(mode);
-        File absPath = new RelativeFile(name.toString());
+        File absPath = new RelativeFile(Py.fileSystemDecode(name));
 
         try {
             if ((appending && !(reading || plus)) || (writing && !reading && !plus)) {
diff --git a/src/org/python/core/packagecache/PathPackageManager.java b/src/org/python/core/packagecache/PathPackageManager.java
--- a/src/org/python/core/packagecache/PathPackageManager.java
+++ b/src/org/python/core/packagecache/PathPackageManager.java
@@ -40,12 +40,9 @@
                 + name;
 
         for (int i = 0; i < path.__len__(); i++) {
+            // Each entry in the path may be byte-encoded or unicode
             PyObject entry = path.pyget(i);
-            if (!(entry instanceof PyUnicode)) {
-                entry = entry.__str__();
-            }
-            String dir = entry.toString();
-
+            String dir = Py.fileSystemDecode(entry);
             File f = new RelativeFile(dir, child);
             try {
                 if (f.isDirectory() && imp.caseok(f, name)) {
@@ -103,11 +100,8 @@
         String child = jpkg.__name__.replace('.', File.separatorChar);
 
         for (int i = 0; i < path.__len__(); i++) {
-            PyObject entry = path.pyget(i);
-            if (!(entry instanceof PyUnicode)) {
-                entry = entry.__str__();
-            }
-            String dir = entry.toString();
+            // Each entry in the path may be byte-encoded or unicode
+            String dir = Py.fileSystemDecode(path.pyget(i));
 
             if (dir.length() == 0) {
                 dir = null;
diff --git a/src/org/python/modules/_imp.java b/src/org/python/modules/_imp.java
--- a/src/org/python/modules/_imp.java
+++ b/src/org/python/modules/_imp.java
@@ -68,14 +68,14 @@
      * This needs to be consolidated with the code in (@see org.python.core.imp).
      *
      * @param name module name
-     * @param entry a path String
+     * @param entry a path String (Unicode file or directory name)
      * @param findingPackage if looking for a package only try to locate __init__
      * @return null if no module found otherwise module information
      */
     static ModuleInfo findFromSource(String name, String entry, boolean findingPackage,
                                      boolean preferSource) {
         String sourceName = "__init__.py";
-        String compiledName = makeCompiledFilename(sourceName);
+        String compiledName = imp.makeCompiledFilename(sourceName);
         String directoryName = PySystemState.getPathLazy(entry);
         // displayDirName is for identification purposes: when null it
         // forces java.io.File to be a relative path (e.g. foo/bar.py
@@ -97,7 +97,7 @@
             } else {
                 Py.writeDebug("import", "trying source " + dir.getPath());
                 sourceName = name + ".py";
-                compiledName = makeCompiledFilename(sourceName);
+                compiledName = imp.makeCompiledFilename(sourceName);
                 sourceFile = new File(directoryName, sourceName);
                 compiledFile = new File(directoryName, compiledName);
             }
@@ -152,8 +152,7 @@
             throw Py.TypeError("must be a file-like object");
         }
         PySystemState sys = Py.getSystemState();
-        String compiledFilename =
-                makeCompiledFilename(sys.getPath(filename));
+        String compiledFilename = imp.makeCompiledFilename(sys.getPath(filename));
         mod = imp.createFromSource(modname.intern(), (InputStream)o,
                                                    filename, compiledFilename);
         PyObject modules = sys.modules;
@@ -161,15 +160,38 @@
         return mod;
     }
 
-    public static PyObject load_compiled(String name, String pathname) {
-        return load_compiled(name, pathname, new PyFile(pathname, "rb", -1));
-    }
-
     public static PyObject reload(PyObject module) {
         return __builtin__.reload(module);
     }
 
-    public static PyObject load_compiled(String name, String pathname, PyObject file) {
+    /**
+     * Return a module with the given <code>name</code>, the result of executing the compiled code
+     * at the given <code>pathname</code>. If this path is a <code>PyUnicode</code>, it is used
+     * exactly; if it is a <code>PyString</code> it is taken to be file-system encoded.
+     *
+     * @param name the module name
+     * @param pathname to the compiled module (becomes <code>__file__</code>)
+     * @return the module called <code>name</code>
+     */
+    public static PyObject load_compiled(String name, PyString pathname) {
+        String _pathname = Py.fileSystemDecode(pathname);
+        return _load_compiled(name, _pathname, new PyFile(_pathname, "rb", -1));
+    }
+
+    /**
+     * Return a module with the given <code>name</code>, the result of executing the compiled code
+     * in the given <code>file</code> stream.
+     *
+     * @param name the module name
+     * @param pathname a file path that is not null (becomes <code>__file__</code>)
+     * @param file stream from which the compiled code is taken
+     * @return the module called <code>name</code>
+     */
+    public static PyObject load_compiled(String name, PyString pathname, PyObject file) {
+        return _load_compiled(name, Py.fileSystemDecode(pathname), file);
+    }
+
+    private static PyObject _load_compiled(String name, String pathname, PyObject file) {
         InputStream stream = (InputStream) file.__tojava__(InputStream.class);
         if (stream == Py.NoConversion) {
             throw Py.TypeError("must be a file-like object");
@@ -190,8 +212,10 @@
 
     public static PyObject find_module(String name, PyObject path) {
         if (path == Py.None && PySystemState.getBuiltin(name) != null) {
-            return new PyTuple(Py.None, Py.newString(name),
-                               new PyTuple(Py.EmptyString, Py.EmptyString,
+            return new PyTuple(Py.None,
+                               Py.newString(name),
+                               new PyTuple(Py.EmptyString,
+                                           Py.EmptyString,
                                            Py.newInteger(C_BUILTIN)));
         }
 
@@ -199,14 +223,14 @@
             path = Py.getSystemState().path;
         }
         for (PyObject p : path.asIterable()) {
-            ModuleInfo mi = findFromSource(name, p.toString(), false, true);
+            ModuleInfo mi = findFromSource(name, Py.fileSystemDecode(p), false, true);
             if(mi == null) {
                 continue;
             }
             return new PyTuple(mi.file,
-                               new PyString(mi.filename),
-                               new PyTuple(new PyString(mi.suffix),
-                                           new PyString(mi.mode),
+                               Py.newStringOrUnicode(mi.filename),
+                               new PyTuple(Py.newString(mi.suffix),
+                                           Py.newString(mi.mode),
                                            Py.newInteger(mi.type)));
         }
         throw Py.ImportError("No module named " + name);
@@ -216,7 +240,8 @@
         PyObject mod = Py.None;
         PySystemState sys = Py.getSystemState();
         int type = data.__getitem__(2).asInt();
-        while(mod == Py.None) {
+        String filenameString = Py.fileSystemDecode(filename);
+        while (mod == Py.None) {
             String compiledName;
             switch (type) {
                 case PY_SOURCE:
@@ -226,8 +251,8 @@
                     }
 
                     // XXX: This should load the accompanying byte code file instead, if it exists
-                    String resolvedFilename = sys.getPath(filename.toString());
-                    compiledName = makeCompiledFilename(resolvedFilename);
+                    String resolvedFilename = sys.getPath(filenameString);
+                    compiledName = imp.makeCompiledFilename(resolvedFilename);
                     if (name.endsWith(".__init__")) {
                         name = name.substring(0, name.length() - ".__init__".length());
                     } else if (name.equals("__init__")) {
@@ -241,19 +266,20 @@
                     }
 
                     mod = imp.createFromSource(name.intern(), (InputStream)o,
-                            filename.toString(), compiledName, mtime);
+                            filenameString, compiledName, mtime);
                     break;
                 case PY_COMPILED:
-                    mod = load_compiled(name, filename.toString(), file);
+                    mod = _load_compiled(name, filenameString, file);
                     break;
                 case PKG_DIRECTORY:
                     PyModule m = imp.addModule(name);
                     m.__dict__.__setitem__("__path__", new PyList(new PyObject[] {filename}));
                     m.__dict__.__setitem__("__file__", filename);
-                    ModuleInfo mi = findFromSource(name, filename.toString(), true, true);
+                    ModuleInfo mi = findFromSource(name, filenameString, true, true);
                     type = mi.type;
                     file = mi.file;
-                    filename = new PyString(mi.filename);
+                    filenameString = mi.filename;
+                    filename = Py.newStringOrUnicode(filenameString);
                     break;
                 default:
                     throw Py.ImportError("No module named " + name);
@@ -264,8 +290,13 @@
         return mod;
     }
 
-    public static String makeCompiledFilename(String filename) {
-        return imp.makeCompiledFilename(filename);
+    /**
+     * Variant of {@link imp#makeCompiledFilename(String)} dealing with encoded bytes. In the context
+     * where this is used from Python, a result in encoded bytes is preferable.
+     */
+    public static PyString makeCompiledFilename(PyString filename) {
+        filename = Py.fileSystemEncode(filename);
+        return Py.newString(imp.makeCompiledFilename(filename.getString()));
     }
 
     public static PyObject get_magic() {
diff --git a/src/org/python/modules/_py_compile.java b/src/org/python/modules/_py_compile.java
--- a/src/org/python/modules/_py_compile.java
+++ b/src/org/python/modules/_py_compile.java
@@ -12,22 +12,30 @@
 public class _py_compile {
     public static PyList __all__ = new PyList(new PyString[] { new PyString("compile") });
 
-    public static boolean compile(String filename, String cfile, String dfile) {
-        // Resolve relative path names. dfile is only used for error messages and should not be
-        // resolved
+    /**
+     * Java wrapper on the module compiler in support of of py_compile.compile. Filenames here will
+     * be interpreted as Unicode if they are PyUnicode, and as byte-encoded names if they only
+     * PyString.
+     *
+     * @param fileName actual source file name
+     * @param compiledName compiled filename
+     * @param displayName displayed source filename, only used for error messages (and not resolved)
+     * @return true if successful
+     */
+    public static boolean compile(PyString fileName, PyString compiledName, PyString displayName) {
+        // Resolve source path and check it exists
         PySystemState sys = Py.getSystemState();
-        filename = sys.getPath(filename);
-        cfile = sys.getPath(cfile);
+        String file = sys.getPath(Py.fileSystemDecode(fileName));
+        File f = new File(file);
+        if (!f.exists()) {
+            throw Py.IOError(Errno.ENOENT, file);
+        }
 
-        File file = new File(filename);
-        if (!file.exists()) {
-            throw Py.IOError(Errno.ENOENT, Py.newString(filename));
-        }
-        String name = getModuleName(file);
-
-        byte[] bytes = org.python.core.imp.compileSource(name, file, dfile, cfile);
-        org.python.core.imp.cacheCompiledSource(filename, cfile, bytes);
-
+        // Convert file in which to put the byte code and display name (each may be null)
+        String c = (compiledName == null) ? null : sys.getPath(Py.fileSystemDecode(compiledName));
+        String d = (displayName == null) ? null : Py.fileSystemDecode(displayName);
+        byte[] bytes = org.python.core.imp.compileSource(getModuleName(f), f, d, c);
+        org.python.core.imp.cacheCompiledSource(file, c, bytes);
         return bytes.length > 0;
     }
 
diff --git a/src/org/python/modules/posix/PosixModule.java b/src/org/python/modules/posix/PosixModule.java
--- a/src/org/python/modules/posix/PosixModule.java
+++ b/src/org/python/modules/posix/PosixModule.java
@@ -486,7 +486,8 @@
         "getcwd() -> path\n\n" +
         "Return a string representing the current working directory.");
     public static PyObject getcwd() {
-        return Py.newStringOrUnicode(Py.getSystemState().getCurrentWorkingDir());
+        // The return value is bytes in the file system encoding
+        return Py.fileSystemEncode(Py.getSystemState().getCurrentWorkingDir());
     }
 
     public static PyString __doc__getcwdu = new PyString(
@@ -1343,25 +1344,24 @@
             return environ;
         }
         for (Map.Entry<String, String> entry : env.entrySet()) {
+            // The shell restricts names to a subset of ASCII and values are encoded byte strings.
             environ.__setitem__(
-                    Py.newStringOrUnicode(entry.getKey()),
-                    Py.newStringOrUnicode(entry.getValue()));
+                    Py.newString(entry.getKey()),
+                    Py.fileSystemEncode(entry.getValue()));
         }
         return environ;
     }
 
     /**
-     * Return a path as a String from a PyObject
+     * Return a path as a String from a PyObject, which must be <code>str</code> or
+     * <code>unicode</code>. If the path is a <code>str</code> (that is, <code>bytes</code>), it is
+     * interpreted into Unicode using the file system encoding.
      *
      * @param path a PyObject, raising a TypeError if an invalid path type
      * @return a String path
      */
     private static String asPath(PyObject path) {
-        if (path instanceof PyString) {
-            return path.toString();
-        }
-        throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
-                                         path.getType().fastGetName()));
+        return Py.fileSystemDecode(path);
     }
 
     /**
diff --git a/src/org/python/modules/zipimport/zipimporter.java b/src/org/python/modules/zipimport/zipimporter.java
--- a/src/org/python/modules/zipimport/zipimporter.java
+++ b/src/org/python/modules/zipimport/zipimporter.java
@@ -20,6 +20,7 @@
 import org.python.core.PySystemState;
 import org.python.core.PyTuple;
 import org.python.core.PyType;
+import org.python.core.PyUnicode;
 import org.python.core.Traverseproc;
 import org.python.core.Visitproc;
 import org.python.core.util.FileUtil;
@@ -80,7 +81,7 @@
     @ExposedMethod
     final void zipimporter___init__(PyObject[] args, String[] kwds) {
         ArgParser ap = new ArgParser("__init__", args, kwds, new String[] {"path"});
-        String path = ap.getString(0);
+        String path = Py.fileSystemDecode(ap.getPyObject(0));
         zipimporter___init__(path);
     }
 
@@ -113,10 +114,11 @@
             pathFile = parentFile;
         }
         if (archive != null) {
-            files = zipimport._zip_directory_cache.__finditem__(archive);
+            PyUnicode archivePath = Py.newUnicode(archive);
+            files = zipimport._zip_directory_cache.__finditem__(archivePath);
             if (files == null) {
                 files = readDirectory(archive);
-                zipimport._zip_directory_cache.__setitem__(archive, files);
+                zipimport._zip_directory_cache.__setitem__(archivePath, files);
             }
         } else {
             throw zipimport.ZipImportError("not a Zip file: " + path);
diff --git a/src/org/python/util/jython.java b/src/org/python/util/jython.java
--- a/src/org/python/util/jython.java
+++ b/src/org/python/util/jython.java
@@ -341,8 +341,8 @@
             } else {
                 try {
                     interp.globals.__setitem__(new PyString("__file__"),
-                            new PyString(opts.filename));
-
+                            // Note that __file__ is widely expected to be encoded bytes
+                            Py.fileSystemEncode(opts.filename));
                     FileInputStream file;
                     try {
                         file = new FileInputStream(new RelativeFile(opts.filename));
diff --git a/src/shell/jython.exe b/src/shell/jython.exe
index 7c9cbe9eec239c5768c17f873726220b09966341..b7500204c603274a6bdb9ec15064bd27f31c14ac
GIT binary patch
[stripped]
diff --git a/src/shell/jython.py b/src/shell/jython.py
--- a/src/shell/jython.py
+++ b/src/shell/jython.py
@@ -20,19 +20,68 @@
 
 is_windows = os.name == "nt" or (os.name == "java" and os._name == "nt")
 
+# A note about encoding:
+#
+# A major motivation for this program is to launch Jython on Windows, where
+# console and file encoding may be different. Command-line arguments and
+# environment variables are presented in Python 2.7 as byte-data, encoded
+# "somehow". It becomes important to know which decoding to use as soon as
+# paths may contain non-ascii characters. It is not the console encoding.
+# Experiment shows that sys.getfilesystemencoding() is generally applicable
+# to arguments, environment variables and spawning a subprocess.
+#
+# On a Windows 10 box, this comes up with pseudo-codec 'mbcs'. This supports
+# European accented characters pretty well.
+#
+# When localised to Chinese(simplified) the FS encoding mbcs includes many
+# more points than cp936 (the console encoding), although it still struggles
+# with European accented characters.
+
+ENCODING = sys.getfilesystemencoding() or "utf-8"
+
+
+def get_env(envvar, default=None):
+    """ Return the named environment variable, decoded to Unicode."""
+    v = os.environ.get(envvar, default)
+    # Tolerate default given as bytes, as we're bound to forget sometimes
+    if isinstance(v, bytes):
+        v = v.decode(ENCODING)
+    # Remove quotes sometimes necessary around the value
+    if v is not None and v.startswith('"') and v.endswith('"'):
+        v = v[1:-1]
+    return v
+
+def encode_list(args, encoding=ENCODING):
+    """ Convert list of Unicode strings to list of encoded byte strings."""
+    r = []
+    for a in args:
+        if not isinstance(a, bytes): a = a.encode(encoding)
+        r.append(a)
+    return r
+
+def decode_list(args, encoding=ENCODING):
+    """ Convert list of byte strings to list of Unicode strings."""
+    r = []
+    for a in args:
+        if not isinstance(a, unicode): a = a.decode(encoding)
+        r.append(a)
+    return r
 
 def parse_launcher_args(args):
+    """ Process the given argument list into two objects, the first part being
+        a namespace of checked arguments to the interpreter itself, and the rest
+        being the Python program it will run and its arguments.
+    """
     class Namespace(object):
         pass
     parsed = Namespace()
-    parsed.java = []
-    parsed.properties = OrderedDict()
-    parsed.boot = False
-    parsed.jdb = False
-    parsed.help = False
-    parsed.print_requested = False
-    parsed.profile = False
-    parsed.jdb = None
+    parsed.boot = False # --boot flag given
+    parsed.jdb = False # --jdb flag given
+    parsed.help = False # --help or -h flag given
+    parsed.print_requested = False # --print flag given
+    parsed.profile = False # --profile flag given
+    parsed.properties = OrderedDict() # properties to give the JVM
+    parsed.java = [] # any other arguments to give the JVM
 
     it = iter(args)
     next(it)  # ignore sys.argv[0]
@@ -42,11 +91,11 @@
             arg = next(it)
         except StopIteration:
             break
-        if arg.startswith("-D"):
-            k, v = arg[2:].split("=")
+        if arg.startswith(u"-D"):
+            k, v = arg[2:].split(u"=")
             parsed.properties[k] = v
             i += 1
-        elif arg in ("-J-classpath", "-J-cp"):
+        elif arg in (u"-J-classpath", u"-J-cp"):
             try:
                 next_arg = next(it)
             except StopIteration:
@@ -55,24 +104,24 @@
                 bad_option("Bad option for -J-classpath")
             parsed.classpath = next_arg
             i += 2
-        elif arg.startswith("-J-Xmx"):
+        elif arg.startswith(u"-J-Xmx"):
             parsed.mem = arg[2:]
             i += 1
-        elif arg.startswith("-J-Xss"):
+        elif arg.startswith(u"-J-Xss"):
             parsed.stack = arg[2:]
             i += 1
-        elif arg.startswith("-J"):
+        elif arg.startswith(u"-J"):
             parsed.java.append(arg[2:])
             i += 1
-        elif arg == "--print":
+        elif arg == u"--print":
             parsed.print_requested = True
             i += 1
-        elif arg in ("-h", "--help"):
+        elif arg in (u"-h", u"--help"):
             parsed.help = True
-        elif arg in ("--boot", "--jdb", "--profile"):
+        elif arg in (u"--boot", u"--jdb", u"--profile"):
             setattr(parsed, arg[2:], True)
             i += 1
-        elif arg == "--":
+        elif arg == u"--":
             i += 1
             break
         else:
@@ -92,13 +141,13 @@
         if hasattr(self, "_uname"):
             return self._uname
         if is_windows:
-            self._uname = "windows"
+            self._uname = u"windows"
         else:
             uname = subprocess.check_output(["uname"]).strip().lower()
             if uname.startswith("cygwin"):
-                self._uname = "cygwin"
+                self._uname = u"cygwin"
             else:
-                self._uname = uname
+                self._uname = uname.decode(ENCODING)
         return self._uname
 
     @property
@@ -114,22 +163,23 @@
         return self._java_command
 
     def setup_java_command(self):
+        """ Sets java_home and java_command according to environment and parsed
+            launcher arguments --jdb and --help.
+        """
         if self.args.help:
             self._java_home = None
-            self._java_command = "java"
+            self._java_command = u"java"
             return
-            
-        if "JAVA_HOME" not in os.environ:
-            self._java_home = None
-            self._java_command = "jdb" if self.args.jdb else "java"
+
+        command = u"jdb" if self.args.jdb else u"java"
+
+        self._java_home = get_env("JAVA_HOME")
+        if self._java_home is None or self.uname == u"cygwin":
+            # Assume java or jdb on the path
+            self._java_command = command
         else:
-            self._java_home = os.environ["JAVA_HOME"]
-            if self.uname == "cygwin":
-                self._java_command = "jdb" if self.args.jdb else "java"
-            else:
-                self._java_command = os.path.join(
-                    self.java_home, "bin",
-                    "jdb" if self.args.jdb else "java")
+            # Assume java or jdb in JAVA_HOME/bin
+            self._java_command = os.path.join(self._java_home, u"bin", command)
 
     @property
     def executable(self):
@@ -139,28 +189,37 @@
         # Modified from
         # http://stackoverflow.com/questions/3718657/how-to-properly-determine-current-script-directory-in-python/22881871#22881871
         if getattr(sys, "frozen", False): # py2exe, PyInstaller, cx_Freeze
-            path = os.path.abspath(sys.executable)
+            # Frozen. Let it go with the executable path.
+            bytes_path = sys.executable
         else:
-            def inspect_this(): pass
-            path = inspect.getabsfile(inspect_this)
-        self._executable = os.path.realpath(path)
+            # Not frozen. Any object defined in this file will do. 
+            bytes_path = inspect.getfile(JythonCommand)
+        # Python 2 thinks in bytes. Carefully normalise in Unicode.
+        path = os.path.realpath(bytes_path.decode(ENCODING))
+        try:
+            # If possible, make this relative to the CWD.
+            # This helps manage multi-byte names in installation location.
+            path = os.path.relpath(path, os.getcwdu())
+        except ValueError:
+            # Many reasons why this might be impossible: use an absolute path.
+            path = os.path.abspath(path)
+        self._executable = path
         return self._executable
 
     @property
     def jython_home(self):
         if hasattr(self, "_jython_home"):
             return self._jython_home
-        if "JYTHON_HOME" in os.environ:
-            self._jython_home = os.environ["JYTHON_HOME"]
-        else:
-            self._jython_home = os.path.dirname(os.path.dirname(self.executable))
-        if self.uname == "cygwin":
-            self._jython_home = subprocess.check_output(["cygpath", "--windows", self._jython_home]).strip()
+        self._jython_home = get_env("JYTHON_HOME") or os.path.dirname(
+                    os.path.dirname(self.executable))
+        if self.uname == u"cygwin":
+            # Even on Cygwin, we need a Windows-style path for this
+            home = unicode_subprocess(["cygpath", "--windows", home])
         return self._jython_home
 
     @property
     def jython_opts():
-        return os.environ.get("JYTHON_OPTS", "")
+        return get_env("JYTHON_OPTS", "")
 
     @property
     def classpath_delimiter(self):
@@ -179,11 +238,9 @@
             else:
                 jars.append(os.path.join(self.jython_home, "javalib", "*"))
         elif not os.path.exists(os.path.join(self.jython_home, "jython.jar")): 
-            bad_option("""{jython_home} contains neither jython-dev.jar nor jython.jar.
+            bad_option(u"""{} contains neither jython-dev.jar nor jython.jar.
 Try running this script from the 'bin' directory of an installed Jython or 
-setting {envvar_specifier}JYTHON_HOME.""".format(
-                    jython_home=self.jython_home,
-                    envvar_specifier="%" if self.uname == "windows" else "$"))
+setting JYTHON_HOME.""".format(self.jython_home))
         else:
             jars = [os.path.join(self.jython_home, "jython.jar")]
         self._jython_jars = jars
@@ -194,14 +251,14 @@
         if hasattr(self.args, "classpath"):
             return self.args.classpath
         else:
-            return os.environ.get("CLASSPATH", ".")
+            return get_env("CLASSPATH", ".")
 
     @property
     def java_mem(self):
         if hasattr(self.args, "mem"):
             return self.args.mem
         else:
-            return os.environ.get("JAVA_MEM", "-Xmx512m")
+            return get_env("JAVA_MEM", "-Xmx512m")
 
     @property
     def java_stack(self):
@@ -213,7 +270,7 @@
     @property
     def java_opts(self):
         return [self.java_mem, self.java_stack]
-        
+
     @property
     def java_profile_agent(self):
         return os.path.join(self.jython_home, "javalib", "profile.jar")
@@ -222,68 +279,84 @@
         if "JAVA_ENCODING" not in os.environ and self.uname == "darwin" and "file.encoding" not in self.args.properties:
             self.args.properties["file.encoding"] = "UTF-8"
 
-    def convert(self, arg):
-        if sys.stdout.encoding:
-            return arg.encode(sys.stdout.encoding)
-        else:
-            return arg
-
     def make_classpath(self, jars):
         return self.classpath_delimiter.join(jars)
 
     def convert_path(self, arg):
-        if self.uname == "cygwin":
-            if not arg.startswith("/cygdrive/"):
-                new_path = self.convert(arg).replace("/", "\\")
+        if self.uname == u"cygwin":
+            if not arg.startswith(u"/cygdrive/"):
+                return arg.replace(u"/", u"\\")
             else:
-                new_path = subprocess.check_output(["cygpath", "-pw", self.convert(arg)]).strip()
-            return new_path
+                arg = arg.replace('*', r'\*') # prevent globbing
+                return unicode_subprocess(["cygpath", "-pw", arg])
         else:
-            return self.convert(arg)
+            return arg
+
+    def unicode_subprocess(self, unicode_command):
+        """ Launch a command with subprocess.check_output() and read the
+            output, except everything is expected to be in Unicode.
+        """
+        cmd = []
+        for c in unicode_command:
+            if isinstance(c, bytes):
+                cmd.append(c)
+            else:
+                cmd.append(c.encode(ENCODING))
+        return subprocess.check_output(cmd).strip().decode(ENCODING)
 
     @property
     def command(self):
+        # Set default file encoding for just for Darwin (?)
         self.set_encoding()
+
+        # Begin to build the Java part of the ultimate command
         args = [self.java_command]
         args.extend(self.java_opts)
         args.extend(self.args.java)
 
+        # Get the class path right (depends on --boot)
         classpath = self.java_classpath
         jython_jars = self.jython_jars
         if self.args.boot:
-            args.append("-Xbootclasspath/a:%s" % self.convert_path(self.make_classpath(jython_jars)))
+            args.append(u"-Xbootclasspath/a:%s" % self.convert_path(self.make_classpath(jython_jars)))
         else:
             classpath = self.make_classpath(jython_jars) + self.classpath_delimiter + classpath
-        args.extend(["-classpath", self.convert_path(classpath)])
+        args.extend([u"-classpath", self.convert_path(classpath)])
 
         if "python.home" not in self.args.properties:
-            args.append("-Dpython.home=%s" % self.convert_path(self.jython_home))
+            args.append(u"-Dpython.home=%s" % self.convert_path(self.jython_home))
         if "python.executable" not in self.args.properties:
-            args.append("-Dpython.executable=%s" % self.convert_path(self.executable))
+            args.append(u"-Dpython.executable=%s" % self.convert_path(self.executable))
         if "python.launcher.uname" not in self.args.properties:
-            args.append("-Dpython.launcher.uname=%s" % self.uname)
-        # Determines whether running on a tty for the benefit of
+            args.append(u"-Dpython.launcher.uname=%s" % self.uname)
+
+        # Determine whether running on a tty for the benefit of
         # running on Cygwin. This step is needed because the Mintty
         # terminal emulator doesn't behave like a standard Microsoft
         # Windows tty, and so JNR Posix doesn't detect it properly.
         if "python.launcher.tty" not in self.args.properties:
-            args.append("-Dpython.launcher.tty=%s" % str(os.isatty(sys.stdin.fileno())).lower())
-        if self.uname == "cygwin" and "python.console" not in self.args.properties:
-            args.append("-Dpython.console=org.python.core.PlainConsole")
+            args.append(u"-Dpython.launcher.tty=%s" % str(os.isatty(sys.stdin.fileno())).lower())
+        if self.uname == u"cygwin" and "python.console" not in self.args.properties:
+            args.append(u"-Dpython.console=org.python.core.PlainConsole")
+
         if self.args.profile:
-            args.append("-XX:-UseSplitVerifier")
-            args.append("-javaagent:%s" % self.convert_path(self.java_profile_agent))
+            args.append(u"-XX:-UseSplitVerifier")
+            args.append(u"-javaagent:%s" % self.convert_path(self.java_profile_agent))
+
         for k, v in self.args.properties.iteritems():
-            args.append("-D%s=%s" % (self.convert(k), self.convert(v)))
-        args.append("org.python.util.jython")
+            args.append(u"-D%s=%s" % (k, v))
+
+        args.append(u"org.python.util.jython")
+
         if self.args.help:
-            args.append("--help")
+            args.append(u"--help")
+
         args.extend(self.jython_args)
         return args
 
 
 def bad_option(msg):
-    print >> sys.stderr, """
+    print >> sys.stderr, u"""
 {msg}
 usage: jython [option] ... [-c cmd | -m mod | file | -] [arg] ...
 Try `jython -h' for more information.
@@ -312,19 +385,24 @@
 """
 
 def support_java_opts(args):
+    """ Generator from options intended for the JVM. Options beginning -D go
+        through unchanged, others are prefixed with -J.
+    """
+    # Input is expected to be Unicode, but just in case ...
+    if isinstance(args, bytes): args = args.decode(ENCODING)
     it = iter(args)
     while it:
         arg = next(it)
-        if arg.startswith("-D"):
+        if arg.startswith(u"-D"):
             yield arg
-        elif arg in ("-classpath", "-cp"):
-            yield "-J" + arg
+        elif arg in (u"-classpath", u"-cp"):
+            yield u"-J" + arg
             try:
                 yield next(it)
             except StopIteration:
                 bad_option("Argument expected for -classpath option in JAVA_OPTS")
         else:
-            yield "-J" + arg
+            yield u"-J" + arg
 
 
 # copied from subprocess module in Jython; see
@@ -378,37 +456,36 @@
 
     return argv
 
-
-def decode_args(sys_args):
-    args = [sys_args[0]]
-
-    def get_env_opts(envvar):
-        opts = os.environ.get(envvar, "")
-        if is_windows:
-            return cmdline2list(opts)
-        else:
-            return shlex.split(opts)
-
-    java_opts = get_env_opts("JAVA_OPTS")
-    jython_opts = get_env_opts("JYTHON_OPTS")
-
-    args.extend(support_java_opts(java_opts))
-    args.extend(sys_args[1:])
-
-    if sys.stdout.encoding:
-        if sys.stdout.encoding.lower() == "cp65001":
-            sys.exit("""Jython does not support code page 65001 (CP_UTF8).
-Please try another code page by setting it with the chcp command.""")
-        args = [arg.decode(sys.stdout.encoding) for arg in args]
-        jython_opts = [arg.decode(sys.stdout.encoding) for arg in jython_opts]
-
-    return args, jython_opts
-
+def get_env_opts(envvar):
+    """ Return a list of the values in the named environment variable,
+        split according to shell conventions, and decoded to Unicode.
+    """
+    opts = os.environ.get(envvar, "") # bytes at this point
+    if is_windows:
+        opts = cmdline2list(opts)
+    else:
+        opts = shlex.split(opts)
+    return decode_list(opts)
 
 def main(sys_args):
-    sys_args, jython_opts = decode_args(sys_args)
+    # The entire program must work in Unicode
+    sys_args = decode_list(sys_args)
+
+    # sys_args[0] is this script (which we'll replace with 'java' eventually).
+    # Insert options for the java command from the environment.
+    sys_args[1:1] = support_java_opts(get_env_opts("JAVA_OPTS"))
+
+    # Parse the composite arguments (yes, even the ones from JAVA_OPTS),
+    # and return the "unparsed" tail considered arguments for Jython itself.
     args, jython_args = parse_launcher_args(sys_args)
+
+    # Build the data from which we can generate the command ultimately.
+    # Jython options supplied from the environment stand in front of the
+    # unparsed tail from the command line. 
+    jython_opts = get_env_opts("JYTHON_OPTS")
     jython_command = JythonCommand(args, jython_opts + jython_args)
+
+    # This is the "fully adjusted" command to launch, but still as Unicode.
     command = jython_command.command
 
     if args.profile and not args.help:
@@ -416,23 +493,32 @@
             os.unlink("profile.txt")
         except OSError:
             pass
+
     if args.print_requested and not args.help:
-        if jython_command.uname == "windows":
-            print subprocess.list2cmdline(jython_command.command)
+        if jython_command.uname == u"windows":
+            # Add escapes and quotes necessary to Windows.
+            # Normally used for a byte strings but Python is tolerant :)
+            command_line = subprocess.list2cmdline(command)
         else:
-            print " ".join(pipes.quote(arg) for arg in jython_command.command)
+            # Just concatenate with spaces
+            command_line = u" ".join(command)
+        # It is possible the Unicode cannot be encoded for the console
+        enc = sys.stdout.encoding or 'ascii'
+        sys.stdout.write(command_line.encode(enc, 'replace'))
     else:
-        if not (is_windows or not hasattr(os, "execvp") or args.help or jython_command.uname == "cygwin"):
+        if not (is_windows or not hasattr(os, "execvp") or args.help or 
+                jython_command.uname == u"cygwin"):
             # Replace this process with the java process.
             #
             # NB such replacements actually do not work under Windows,
             # but if tried, they also fail very badly by hanging.
             # So don't even try!
+            command = encode_list(command)
             os.execvp(command[0], command[1:])
         else:
             result = 1
             try:
-                result = subprocess.call(command)
+                result = subprocess.call(encode_list(command))
                 if args.help:
                     print_help()
             except KeyboardInterrupt:

-- 
Repository URL: https://hg.python.org/jython


More information about the Jython-checkins mailing list