[Jython-checkins] jython (merge default -> default): Merge work on non-ascii file/user names to trunk.
jeff.allen
jython-checkins at python.org
Sun May 21 05:01:59 EDT 2017
https://hg.python.org/jython/rev/060e4e4a06d8
changeset: 8087:060e4e4a06d8
parent: 8075:0a00982f6ea5
parent: 8086:147fe05920a4
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Apr 30 23:07:30 2017 +0100
summary:
Merge work on non-ascii file/user names to trunk.
files:
CPythonLib.includes | 1 +
Lib/javashell.py | 2 +-
Lib/ntpath.py | 560 ----------
Lib/subprocess.py | 38 +-
Lib/sysconfig.py | 6 +
Lib/test/test_exceptions.py | 3 -
Lib/test/test_exceptions_jy.py | 5 +-
Lib/test/test_httpservers.py | 3 +
Lib/test/test_java_visibility.py | 11 +-
Lib/test/test_jser.py | 4 +-
Lib/test/test_jython_launcher.py | 8 +-
Lib/test/test_ssl.py | 8 +-
Lib/test/test_support.py | 2 +-
Lib/test/test_zipimport_jy.py | 6 +-
build.xml | 3 +
src/org/python/core/Py.java | 297 ++++-
src/org/python/core/PyBaseException.java | 17 +-
src/org/python/core/PyBytecode.java | 9 +-
src/org/python/core/PyException.java | 25 +-
src/org/python/core/PyFile.java | 4 -
src/org/python/core/PyNullImporter.java | 13 +-
src/org/python/core/PyString.java | 6 +-
src/org/python/core/PySystemState.java | 65 +-
src/org/python/core/PyTableCode.java | 6 +-
src/org/python/core/PyUnicode.java | 4 +-
src/org/python/core/SyspathArchive.java | 2 +-
src/org/python/core/SyspathJavaLoader.java | 55 +-
src/org/python/core/__builtin__.java | 8 +-
src/org/python/core/imp.java | 26 +-
src/org/python/core/io/FileIO.java | 4 +-
src/org/python/core/packagecache/PathPackageManager.java | 14 +-
src/org/python/modules/_imp.java | 81 +-
src/org/python/modules/_py_compile.java | 36 +-
src/org/python/modules/posix/PosixModule.java | 18 +-
src/org/python/modules/zipimport/zipimporter.java | 8 +-
src/org/python/util/jython.java | 4 +-
src/shell/jython.exe | Bin
src/shell/jython.py | 314 +++--
38 files changed, 733 insertions(+), 943 deletions(-)
diff --git a/CPythonLib.includes b/CPythonLib.includes
--- a/CPythonLib.includes
+++ b/CPythonLib.includes
@@ -110,6 +110,7 @@
netrc.py
nntplib.py
numbers.py
+ntpath.py
nturl2path.py
opcode.py
optparse.py
diff --git a/Lib/javashell.py b/Lib/javashell.py
--- a/Lib/javashell.py
+++ b/Lib/javashell.py
@@ -55,7 +55,7 @@
env = self._formatEnvironment( self.environment )
try:
- p = Runtime.getRuntime().exec( shellCmd, env, File(os.getcwd()) )
+ p = Runtime.getRuntime().exec( shellCmd, env, File(os.getcwdu()) )
return p
except IOException, ex:
raise OSError(
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
deleted file mode 100644
--- a/Lib/ntpath.py
+++ /dev/null
@@ -1,560 +0,0 @@
-# Module 'ntpath' -- common operations on WinNT/Win95 pathnames
-"""Common pathname manipulations, WindowsNT/95 version.
-
-Instead of importing this module directly, import os and refer to this
-module as os.path.
-"""
-
-import os
-import sys
-import stat
-import genericpath
-import warnings
-
-from genericpath import *
-
-__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
- "basename","dirname","commonprefix","getsize","getmtime",
- "getatime","getctime", "islink","exists","lexists","isdir","isfile",
- "ismount","walk","expanduser","expandvars","normpath","abspath",
- "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
- "extsep","devnull","realpath","supports_unicode_filenames","relpath"]
-
-# strings representing various path-related bits and pieces
-curdir = '.'
-pardir = '..'
-extsep = '.'
-sep = '\\'
-pathsep = ';'
-altsep = '/'
-defpath = '.;C:\\bin'
-if 'ce' in sys.builtin_module_names:
- defpath = '\\Windows'
-elif 'os2' in sys.builtin_module_names:
- # OS/2 w/ VACPP
- altsep = '/'
-devnull = 'nul'
-
-# Normalize the case of a pathname and map slashes to backslashes.
-# Other normalizations (such as optimizing '../' away) are not done
-# (this is done by normpath).
-
-def normcase(s):
- """Normalize case of pathname.
-
- Makes all characters lowercase and all slashes into backslashes."""
- return s.replace("/", "\\").lower()
-
-
-# Return whether a path is absolute.
-# Trivial in Posix, harder on the Mac or MS-DOS.
-# For DOS it is absolute if it starts with a slash or backslash (current
-# volume), or if a pathname after the volume letter and colon / UNC resource
-# starts with a slash or backslash.
-
-def isabs(s):
- """Test whether a path is absolute"""
- s = splitdrive(s)[1]
- return s != '' and s[:1] in '/\\'
-
-
-# Join two (or more) paths.
-
-def join(a, *p):
- """Join two or more pathname components, inserting "\\" as needed.
- If any component is an absolute path, all previous path components
- will be discarded."""
- path = a
- for b in p:
- b_wins = 0 # set to 1 iff b makes path irrelevant
- if path == "":
- b_wins = 1
-
- elif isabs(b):
- # This probably wipes out path so far. However, it's more
- # complicated if path begins with a drive letter:
- # 1. join('c:', '/a') == 'c:/a'
- # 2. join('c:/', '/a') == 'c:/a'
- # But
- # 3. join('c:/a', '/b') == '/b'
- # 4. join('c:', 'd:/') = 'd:/'
- # 5. join('c:/', 'd:/') = 'd:/'
- if path[1:2] != ":" or b[1:2] == ":":
- # Path doesn't start with a drive letter, or cases 4 and 5.
- b_wins = 1
-
- # Else path has a drive letter, and b doesn't but is absolute.
- elif len(path) > 3 or (len(path) == 3 and
- path[-1] not in "/\\"):
- # case 3
- b_wins = 1
-
- if b_wins:
- path = b
- else:
- # Join, and ensure there's a separator.
- assert len(path) > 0
- if path[-1] in "/\\":
- if b and b[0] in "/\\":
- path += b[1:]
- else:
- path += b
- elif path[-1] == ":":
- path += b
- elif b:
- if b[0] in "/\\":
- path += b
- else:
- path += "\\" + b
- else:
- # path is not empty and does not end with a backslash,
- # but b is empty; since, e.g., split('a/') produces
- # ('a', ''), it's best if join() adds a backslash in
- # this case.
- path += '\\'
-
- return path
-
-
-# Split a path in a drive specification (a drive letter followed by a
-# colon) and the path specification.
-# It is always true that drivespec + pathspec == p
-def splitdrive(p):
- """Split a pathname into drive and path specifiers. Returns a 2-tuple
-"(drive,path)"; either part may be empty"""
- if p[1:2] == ':':
- return p[0:2], p[2:]
- return '', p
-
-
-# Parse UNC paths
-def splitunc(p):
- """Split a pathname into UNC mount point and relative path specifiers.
-
- Return a 2-tuple (unc, rest); either part may be empty.
- If unc is not empty, it has the form '//host/mount' (or similar
- using backslashes). unc+rest is always the input path.
- Paths containing drive letters never have an UNC part.
- """
- if p[1:2] == ':':
- return '', p # Drive letter present
- firstTwo = p[0:2]
- if firstTwo == '//' or firstTwo == '\\\\':
- # is a UNC path:
- # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
- # \\machine\mountpoint\directories...
- # directory ^^^^^^^^^^^^^^^
- normp = normcase(p)
- index = normp.find('\\', 2)
- if index == -1:
- ##raise RuntimeError, 'illegal UNC path: "' + p + '"'
- return ("", p)
- index = normp.find('\\', index + 1)
- if index == -1:
- index = len(p)
- return p[:index], p[index:]
- return '', p
-
-
-# Split a path in head (everything up to the last '/') and tail (the
-# rest). After the trailing '/' is stripped, the invariant
-# join(head, tail) == p holds.
-# The resulting head won't end in '/' unless it is the root.
-
-def split(p):
- """Split a pathname.
-
- Return tuple (head, tail) where tail is everything after the final slash.
- Either part may be empty."""
-
- d, p = splitdrive(p)
- # set i to index beyond p's last slash
- i = len(p)
- while i and p[i-1] not in '/\\':
- i = i - 1
- head, tail = p[:i], p[i:] # now tail has no slashes
- # remove trailing slashes from head, unless it's all slashes
- head2 = head
- while head2 and head2[-1] in '/\\':
- head2 = head2[:-1]
- head = head2 or head
- return d + head, tail
-
-
-# Split a path in root and extension.
-# The extension is everything starting at the last dot in the last
-# pathname component; the root is everything before that.
-# It is always true that root + ext == p.
-
-def splitext(p):
- return genericpath._splitext(p, sep, altsep, extsep)
-splitext.__doc__ = genericpath._splitext.__doc__
-
-
-# Return the tail (basename) part of a path.
-
-def basename(p):
- """Returns the final component of a pathname"""
- return split(p)[1]
-
-
-# Return the head (dirname) part of a path.
-
-def dirname(p):
- """Returns the directory component of a pathname"""
- return split(p)[0]
-
-# Is a path a symbolic link?
-# This will always return false on systems where posix.lstat doesn't exist.
-
-def islink(path):
- """Test for symbolic link.
- On WindowsNT/95 and OS/2 always returns false
- """
- return False
-
-# alias exists to lexists
-lexists = exists
-
-# Is a path a mount point? Either a root (with or without drive letter)
-# or an UNC path with at most a / or \ after the mount point.
-
-def ismount(path):
- """Test whether a path is a mount point (defined as root of drive)"""
- unc, rest = splitunc(path)
- if unc:
- return rest in ("", "/", "\\")
- p = splitdrive(path)[1]
- return len(p) == 1 and p[0] in '/\\'
-
-
-# Directory tree walk.
-# For each directory under top (including top itself, but excluding
-# '.' and '..'), func(arg, dirname, filenames) is called, where
-# dirname is the name of the directory and filenames is the list
-# of files (and subdirectories etc.) in the directory.
-# The func may modify the filenames list, to implement a filter,
-# or to impose a different order of visiting.
-
-def walk(top, func, arg):
- """Directory tree walk with callback function.
-
- For each directory in the directory tree rooted at top (including top
- itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
- dirname is the name of the directory, and fnames a list of the names of
- the files and subdirectories in dirname (excluding '.' and '..'). func
- may modify the fnames list in-place (e.g. via del or slice assignment),
- and walk will only recurse into the subdirectories whose names remain in
- fnames; this can be used to implement a filter, or to impose a specific
- order of visiting. No semantics are defined for, or required of, arg,
- beyond that arg is always passed to func. It can be used, e.g., to pass
- a filename pattern, or a mutable object designed to accumulate
- statistics. Passing None for arg is common."""
- warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
- stacklevel=2)
- try:
- names = os.listdir(top)
- except os.error:
- return
- func(arg, top, names)
- for name in names:
- name = join(top, name)
- if isdir(name):
- walk(name, func, arg)
-
-
-# Expand paths beginning with '~' or '~user'.
-# '~' means $HOME; '~user' means that user's home directory.
-# If the path doesn't begin with '~', or if the user or $HOME is unknown,
-# the path is returned unchanged (leaving error reporting to whatever
-# function is called with the expanded path as argument).
-# See also module 'glob' for expansion of *, ? and [...] in pathnames.
-# (A function should also be defined to do full *sh-style environment
-# variable expansion.)
-
-def expanduser(path):
- """Expand ~ and ~user constructs.
-
- If user or $HOME is unknown, do nothing."""
- if path[:1] != '~':
- return path
- i, n = 1, len(path)
- while i < n and path[i] not in '/\\':
- i = i + 1
-
- if 'HOME' in os.environ:
- userhome = os.environ['HOME']
- elif 'USERPROFILE' in os.environ:
- userhome = os.environ['USERPROFILE']
- elif not 'HOMEPATH' in os.environ:
- return path
- else:
- try:
- drive = os.environ['HOMEDRIVE']
- except KeyError:
- drive = ''
- userhome = join(drive, os.environ['HOMEPATH'])
-
- if i != 1: #~user
- userhome = join(dirname(userhome), path[1:i])
-
- return userhome + path[i:]
-
-
-# Expand paths containing shell variable substitutions.
-# The following rules apply:
-# - no expansion within single quotes
-# - '$$' is translated into '$'
-# - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
-# - ${varname} is accepted.
-# - $varname is accepted.
-# - %varname% is accepted.
-# - varnames can be made out of letters, digits and the characters '_-'
-# (though is not verifed in the ${varname} and %varname% cases)
-# XXX With COMMAND.COM you can use any characters in a variable name,
-# XXX except '^|<>='.
-
-def expandvars(path):
- """Expand shell variables of the forms $var, ${var} and %var%.
-
- Unknown variables are left unchanged."""
- if '$' not in path and '%' not in path:
- return path
- import string
- varchars = string.ascii_letters + string.digits + '_-'
- res = ''
- index = 0
- pathlen = len(path)
- while index < pathlen:
- c = path[index]
- if c == '\'': # no expansion within single quotes
- path = path[index + 1:]
- pathlen = len(path)
- try:
- index = path.index('\'')
- res = res + '\'' + path[:index + 1]
- except ValueError:
- res = res + path
- index = pathlen - 1
- elif c == '%': # variable or '%'
- if path[index + 1:index + 2] == '%':
- res = res + c
- index = index + 1
- else:
- path = path[index+1:]
- pathlen = len(path)
- try:
- index = path.index('%')
- except ValueError:
- res = res + '%' + path
- index = pathlen - 1
- else:
- var = path[:index]
- if var in os.environ:
- res = res + os.environ[var]
- else:
- res = res + '%' + var + '%'
- elif c == '$': # variable or '$$'
- if path[index + 1:index + 2] == '$':
- res = res + c
- index = index + 1
- elif path[index + 1:index + 2] == '{':
- path = path[index+2:]
- pathlen = len(path)
- try:
- index = path.index('}')
- var = path[:index]
- if var in os.environ:
- res = res + os.environ[var]
- else:
- res = res + '${' + var + '}'
- except ValueError:
- res = res + '${' + path
- index = pathlen - 1
- else:
- var = ''
- index = index + 1
- c = path[index:index + 1]
- while c != '' and c in varchars:
- var = var + c
- index = index + 1
- c = path[index:index + 1]
- if var in os.environ:
- res = res + os.environ[var]
- else:
- res = res + '$' + var
- if c != '':
- index = index - 1
- else:
- res = res + c
- index = index + 1
- return res
-
-
-# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
-# Previously, this function also truncated pathnames to 8+3 format,
-# but as this module is called "ntpath", that's obviously wrong!
-
-def normpath(path):
- """Normalize path, eliminating double slashes, etc."""
- # Preserve unicode (if path is unicode)
- backslash, dot = (u'\\', u'.') if isinstance(path, unicode) else ('\\', '.')
- if path.startswith(('\\\\.\\', '\\\\?\\')):
- # in the case of paths with these prefixes:
- # \\.\ -> device names
- # \\?\ -> literal paths
- # do not do any normalization, but return the path unchanged
- return path
- path = path.replace("/", "\\")
- prefix, path = splitdrive(path)
- # We need to be careful here. If the prefix is empty, and the path starts
- # with a backslash, it could either be an absolute path on the current
- # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
- # is therefore imperative NOT to collapse multiple backslashes blindly in
- # that case.
- # The code below preserves multiple backslashes when there is no drive
- # letter. This means that the invalid filename \\\a\b is preserved
- # unchanged, where a\\\b is normalised to a\b. It's not clear that there
- # is any better behaviour for such edge cases.
- if prefix == '':
- # No drive letter - preserve initial backslashes
- while path[:1] == "\\":
- prefix = prefix + backslash
- path = path[1:]
- else:
- # We have a drive letter - collapse initial backslashes
- if path.startswith("\\"):
- prefix = prefix + backslash
- path = path.lstrip("\\")
- comps = path.split("\\")
- i = 0
- while i < len(comps):
- if comps[i] in ('.', ''):
- del comps[i]
- elif comps[i] == '..':
- if i > 0 and comps[i-1] != '..':
- del comps[i-1:i+1]
- i -= 1
- elif i == 0 and prefix.endswith("\\"):
- del comps[i]
- else:
- i += 1
- else:
- i += 1
- # If the path is now empty, substitute '.'
- if not prefix and not comps:
- comps.append(dot)
- return prefix + backslash.join(comps)
-
-
-# Return an absolute path.
-try:
- from nt import _getfullpathname
-
-except ImportError: # no built-in nt module - maybe it's Jython ;)
-
- if os._name == 'nt' :
- # on Windows so Java version of sys deals in NT paths
- def abspath(path):
- """Return the absolute version of a path."""
- try:
- if isinstance(path, unicode):
- # Result must be unicode
- if path:
- path = sys.getPath(path)
- else:
- # Empty path must return current working directory
- path = os.getcwdu()
- else:
- # Result must be bytes
- if path:
- path = sys.getPath(path).encode('latin-1')
- else:
- # Empty path must return current working directory
- path = os.getcwd()
- except EnvironmentError:
- pass # Bad path - return unchanged.
- return normpath(path)
-
- else:
- # not running on Windows - mock up something sensible
- def abspath(path):
- """Return the absolute version of a path."""
- try:
- if isinstance(path, unicode):
- # Result must be unicode
- if path:
- path = join(os.getcwdu(), path)
- else:
- # Empty path must return current working directory
- path = os.getcwdu()
- else:
- # Result must be bytes
- if path:
- path = join(os.getcwd(), path)
- else:
- # Empty path must return current working directory
- path = os.getcwd()
- except EnvironmentError:
- pass # Bad path - return unchanged.
- return normpath(path)
-
-else: # use native Windows method on Windows
- def abspath(path):
- """Return the absolute version of a path."""
-
- if path: # Empty path must return current working directory.
- try:
- path = _getfullpathname(path)
- except WindowsError:
- pass # Bad path - return unchanged.
- elif isinstance(path, unicode):
- path = os.getcwdu()
- else:
- path = os.getcwd()
- return normpath(path)
-
-# realpath is a no-op on systems without islink support
-realpath = abspath
-# Win9x family and earlier have no Unicode filename support.
-supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
- sys.getwindowsversion()[3] >= 2)
-
-def _abspath_split(path):
- abs = abspath(normpath(path))
- prefix, rest = splitunc(abs)
- is_unc = bool(prefix)
- if not is_unc:
- prefix, rest = splitdrive(abs)
- return is_unc, prefix, [x for x in rest.split(sep) if x]
-
-def relpath(path, start=curdir):
- """Return a relative version of a path"""
-
- if not path:
- raise ValueError("no path specified")
-
- start_is_unc, start_prefix, start_list = _abspath_split(start)
- path_is_unc, path_prefix, path_list = _abspath_split(path)
-
- if path_is_unc ^ start_is_unc:
- raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
- % (path, start))
- if path_prefix.lower() != start_prefix.lower():
- if path_is_unc:
- raise ValueError("path is on UNC root %s, start on UNC root %s"
- % (path_prefix, start_prefix))
- else:
- raise ValueError("path is on drive %s, start on drive %s"
- % (path_prefix, start_prefix))
- # Work out how much of the filepath is shared by start and path.
- i = 0
- for e1, e2 in zip(start_list, path_list):
- if e1.lower() != e2.lower():
- break
- i += 1
-
- rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
- if not rel_list:
- return curdir
- return join(*rel_list)
diff --git a/Lib/subprocess.py b/Lib/subprocess.py
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -438,6 +438,7 @@
import java.nio.ByteBuffer
import org.python.core.io.RawIOBase
import org.python.core.io.StreamIO
+ from org.python.core.Py import fileSystemDecode
else:
import select
_has_poll = hasattr(select, 'poll')
@@ -779,7 +780,7 @@
maintain those byte values (which may be butchered as
Strings) for the subprocess if they haven't been modified.
"""
- # Determine what's safe to merge
+ # Determine what's necessary to merge (new or different)
merge_env = dict((key, value) for key, value in env.iteritems()
if key not in builder_env or
builder_env.get(key) != value)
@@ -789,8 +790,10 @@
for entry in entries:
if entry.getKey() not in env:
entries.remove()
-
- builder_env.putAll(merge_env)
+ # add anything new or different in env
+ for key, value in merge_env.iteritems():
+ # If the new value is bytes, assume it to be FS-encoded
+ builder_env.put(key, fileSystemDecode(value))
class Popen(object):
@@ -1308,9 +1311,6 @@
args = _cmdline2listimpl(args)
else:
args = list(args)
- # NOTE: CPython posix (execv) will str() any unicode
- # args first, maybe we should do the same on
- # posix. Windows passes unicode through, however
if any(not isinstance(arg, (str, unicode)) for arg in args):
raise TypeError('args must contain only strings')
args = _escape_args(args)
@@ -1321,6 +1321,11 @@
if executable is not None:
args[0] = executable
+ # NOTE: CPython posix (execv) will FS-encode any unicode args, but
+ # pass on bytes unchanged, because that's what the system expects.
+ # Java expects unicode, so we do the converse: leave unicode
+ # unchanged but FS-decode any supplied as bytes.
+ args = [fileSystemDecode(arg) for arg in args]
builder = java.lang.ProcessBuilder(args)
if stdin is None:
@@ -1330,16 +1335,20 @@
if stderr is None:
builder.redirectError(java.lang.ProcessBuilder.Redirect.INHERIT)
- # os.environ may be inherited for compatibility with CPython
+ # os.environ may be inherited for compatibility with CPython.
+ # Elements taken from os.environ are FS-decoded to unicode.
_setup_env(dict(os.environ if env is None else env),
builder.environment())
+ # The current working directory must also be unicode.
if cwd is None:
- cwd = os.getcwd()
- elif not os.path.exists(cwd):
- raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), cwd)
- elif not os.path.isdir(cwd):
- raise OSError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), cwd)
+ cwd = os.getcwdu()
+ else:
+ cwd = fileSystemDecode(cwd)
+ if not os.path.exists(cwd):
+ raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), cwd)
+ elif not os.path.isdir(cwd):
+ raise OSError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), cwd)
builder.directory(java.io.File(cwd))
# Let Java manage redirection of stderr to stdout (it's more
@@ -1890,9 +1899,10 @@
args = _cmdline2listimpl(command)
args = _escape_args(args)
args = _shell_command + args
- cwd = os.getcwd()
+ cwd = os.getcwdu()
-
+ # Python supplies FS-encoded arguments while Java expects String
+ args = [fileSystemDecode(arg) for arg in args]
builder = java.lang.ProcessBuilder(args)
builder.directory(java.io.File(cwd))
diff --git a/Lib/sysconfig.py b/Lib/sysconfig.py
--- a/Lib/sysconfig.py
+++ b/Lib/sysconfig.py
@@ -5,6 +5,11 @@
import os
from os.path import pardir, realpath
+def fileSystemEncode(path):
+ if isinstance(path, unicode):
+ return path.encode(sys.getfilesystemencoding())
+ return path
+
_INSTALL_SCHEMES = {
'posix_prefix': {
'stdlib': '{base}/lib/python{py_version_short}',
@@ -116,6 +121,7 @@
def _safe_realpath(path):
try:
+ path = fileSystemEncode(path)
return realpath(path)
except OSError:
return path
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -524,7 +524,6 @@
self.check_same_msg(Exception(), '')
- @unittest.skipIf(is_jython, "FIXME: not working in Jython")
def test_0_args_with_overridden___str__(self):
"""Check same msg for exceptions with 0 args and overridden __str__"""
# str() and unicode() on an exception with overridden __str__ that
@@ -550,7 +549,6 @@
self.assertRaises(UnicodeEncodeError, str, e)
self.assertEqual(unicode(e), u'f\xf6\xf6')
- @unittest.skipIf(is_jython, "FIXME: not working in Jython")
def test_1_arg_with_overridden___str__(self):
"""Check same msg for exceptions with overridden __str__ and 1 arg"""
# when __str__ is overridden and __unicode__ is not implemented
@@ -575,7 +573,6 @@
for args in argslist:
self.check_same_msg(Exception(*args), repr(args))
- @unittest.skipIf(is_jython, "FIXME: not working in Jython")
def test_many_args_with_overridden___str__(self):
"""Check same msg for exceptions with overridden __str__ and many args"""
# if __str__ returns an ascii string / ascii unicode string
diff --git a/Lib/test/test_exceptions_jy.py b/Lib/test/test_exceptions_jy.py
--- a/Lib/test/test_exceptions_jy.py
+++ b/Lib/test/test_exceptions_jy.py
@@ -70,11 +70,12 @@
# But the exception hook, via Py#displayException, does not fail when attempting to __str__ the exception args
with test_support.captured_stderr() as s:
sys.excepthook(RuntimeError, u"Drink \u2615", None)
- self.assertEqual(s.getvalue(), "RuntimeError\n")
+ # At minimum, it tells us what kind of exception it was
+ self.assertEqual(s.getvalue()[:12], "RuntimeError")
# It is fine with ascii values, of course
with test_support.captured_stderr() as s:
sys.excepthook(RuntimeError, u"Drink java", None)
- self.assertEqual(s.getvalue(), "RuntimeError: Drink java\n")
+ self.assertEqual(s.getvalue(), "RuntimeError: Drink java\n")
def test_main():
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -378,6 +378,9 @@
@unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0,
"This test can't be run reliably as root (issue #13308).")
+ at unittest.skipIf((not hasattr(os, 'symlink')) and
+ sys.executable.encode('ascii', 'replace') != sys.executable,
+ "Executable path is not pure ASCII.") # these fail for CPython too
class CGIHTTPServerTestCase(BaseTestCase):
class request_handler(NoLogRequestHandler, CGIHTTPRequestHandler):
pass
diff --git a/Lib/test/test_java_visibility.py b/Lib/test/test_java_visibility.py
--- a/Lib/test/test_java_visibility.py
+++ b/Lib/test/test_java_visibility.py
@@ -13,6 +13,7 @@
from org.python.tests.multihidden import BaseConnection
class VisibilityTest(unittest.TestCase):
+
def test_invisible(self):
for item in dir(Invisible):
self.assert_(not item.startswith("package"))
@@ -178,6 +179,7 @@
class JavaClassTest(unittest.TestCase):
+
def test_class_methods_visible(self):
self.assertFalse(HashMap.isInterface(),
'java.lang.Class methods should be visible on Class instances')
@@ -198,6 +200,7 @@
self.assertEquals(3, s.b, "Defined fields should take precedence")
class CoercionTest(unittest.TestCase):
+
def test_int_coercion(self):
c = Coercions()
self.assertEquals("5", c.takeInt(5))
@@ -234,6 +237,7 @@
self.assertEquals(c.tellClassNameObject(ht), "class java.util.Hashtable")
class RespectJavaAccessibilityTest(unittest.TestCase):
+
def run_accessibility_script(self, script, error=AttributeError):
fn = test_support.findfile(script)
self.assertRaises(error, execfile, fn)
@@ -254,6 +258,7 @@
self.run_accessibility_script("call_overridden_method.py")
class ClassloaderTest(unittest.TestCase):
+
def test_loading_classes_without_import(self):
cl = test_support.make_jar_classloader("../callbacker_test.jar")
X = cl.loadClass("org.python.tests.Callbacker")
@@ -265,11 +270,13 @@
self.assertEquals(None, called[0])
def test_main():
- test_support.run_unittest(VisibilityTest,
+ test_support.run_unittest(
+ VisibilityTest,
JavaClassTest,
CoercionTest,
RespectJavaAccessibilityTest,
- ClassloaderTest)
+ ClassloaderTest
+ )
if __name__ == "__main__":
test_main()
diff --git a/Lib/test/test_jser.py b/Lib/test/test_jser.py
--- a/Lib/test/test_jser.py
+++ b/Lib/test/test_jser.py
@@ -15,7 +15,9 @@
class JavaSerializationTests(unittest.TestCase):
def setUp(self):
- self.sername = os.path.join(sys.prefix, "test.ser")
+ name = os.path.join(sys.prefix, "test.ser")
+ # As we are using java.io directly, ensure file name is a unicode
+ self.sername = name.decode(sys.getfilesystemencoding())
def tearDown(self):
os.remove(self.sername)
diff --git a/Lib/test/test_jython_launcher.py b/Lib/test/test_jython_launcher.py
--- a/Lib/test/test_jython_launcher.py
+++ b/Lib/test/test_jython_launcher.py
@@ -31,7 +31,6 @@
# by the installer
return executable
-
def get_uname():
_uname = None
try:
@@ -49,9 +48,8 @@
class TestLauncher(unittest.TestCase):
-
+
def get_cmdline(self, cmd, env):
-
output = subprocess.check_output(cmd, env=env).rstrip()
if is_windows:
return subprocess._cmdline2list(output)
@@ -76,7 +74,7 @@
k, v = arg[2:].split("=")
props[k] = v
return props
-
+
def test_classpath_env(self):
env = self.get_newenv()
env["CLASSPATH"] = some_jar
@@ -207,7 +205,7 @@
def test_file(self):
self.assertCommand(['test.py'])
-
+
def test_dash(self):
self.assertCommand(['-i'])
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -27,7 +27,13 @@
HOST = support.HOST
def data_file(*name):
- return os.path.join(os.path.dirname(__file__), *name)
+ file = os.path.join(os.path.dirname(__file__), *name)
+ # Ensure we return unicode path. This tweak is not a divergence:
+ # CPython 2.7.13 fails the same way for a non-ascii location.
+ if isinstance(file, unicode):
+ return file
+ else:
+ return file.decode(sys.getfilesystemencoding())
# The custom key and certificate files used in test_ssl are generated
# using Lib/test/make_ssl_certs.py.
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -509,7 +509,7 @@
if is_jython:
# Jython disallows @ in module names
TESTFN = '$test'
- TESTFN_UNICODE = "$test-\xe0\xf2"
+ TESTFN_UNICODE = u"$test-\u87d2\u86c7" # = test python (Chinese)
TESTFN_ENCODING = sys.getfilesystemencoding()
elif os.name == 'riscos':
TESTFN = 'testfile'
diff --git a/Lib/test/test_zipimport_jy.py b/Lib/test/test_zipimport_jy.py
--- a/Lib/test/test_zipimport_jy.py
+++ b/Lib/test/test_zipimport_jy.py
@@ -51,8 +51,10 @@
A(path).somevar = 1
def test_main():
- test_support.run_unittest(SyspathZipimportTest)
- test_support.run_unittest(ZipImporterDictTest)
+ test_support.run_unittest(
+ SyspathZipimportTest,
+ ZipImporterDictTest
+ )
if __name__ == "__main__":
test_main()
diff --git a/build.xml b/build.xml
--- a/build.xml
+++ b/build.xml
@@ -236,6 +236,7 @@
<echo>output.dir = '${output.dir}'</echo>
<echo>compile.dir = '${compile.dir}'</echo>
<echo>exposed.dir = '${exposed.dir}'</echo>
+ <echo>gensrc.dir = '${gensrc.dir}'</echo>
<echo>dist.dir = '${dist.dir}'</echo>
<echo>apidoc.dir = '${apidoc.dir}'</echo>
<echo>templates.dir = '${templates.dir}'</echo>
@@ -434,6 +435,7 @@
<target name="antlr_gen" depends="prepare-output" unless="antlr.notneeded">
<java classname="org.antlr.Tool" failonerror="false" fork="true" dir="${jython.base.dir}">
<jvmarg value="-Xmx512m"/>
+ <jvmarg value="-Dfile.encoding=UTF-8"/>
<arg value="-Xconversiontimeout"/>
<arg value="2000"/>
<arg value="-fo"/>
@@ -694,6 +696,7 @@
<javadoc sourcepath="${source.dir}"
destdir="${apidoc.dir}"
source="${jdk.source.version}"
+ encoding="UTF-8"
maxmemory="1024m"
public="true"
breakiterator="yes"
diff --git a/src/org/python/core/Py.java b/src/org/python/core/Py.java
--- a/src/org/python/core/Py.java
+++ b/src/org/python/core/Py.java
@@ -2,6 +2,7 @@
package org.python.core;
import java.io.ByteArrayOutputStream;
+import java.io.CharArrayWriter;
import java.io.File;
import java.io.FileDescriptor;
import java.io.FileNotFoundException;
@@ -10,7 +11,7 @@
import java.io.InputStream;
import java.io.ObjectStreamException;
import java.io.OutputStream;
-import java.io.PrintStream;
+import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StreamCorruptedException;
import java.lang.reflect.InvocationTargetException;
@@ -25,7 +26,14 @@
import java.util.List;
import java.util.Set;
+import org.python.antlr.base.mod;
+import org.python.core.adapter.ClassicPyObjectAdapter;
+import org.python.core.adapter.ExtensiblePyObjectAdapter;
+import org.python.modules.posix.PosixModule;
+import org.python.util.Generic;
+
import com.google.common.base.CharMatcher;
+
import jline.console.UserInterruptException;
import jnr.constants.Constant;
import jnr.constants.platform.Errno;
@@ -33,14 +41,6 @@
import jnr.posix.POSIXFactory;
import jnr.posix.util.Platform;
-import org.python.antlr.base.mod;
-import org.python.core.adapter.ClassicPyObjectAdapter;
-import org.python.core.adapter.ExtensiblePyObjectAdapter;
-import org.python.core.Traverseproc;
-import org.python.core.Visitproc;
-import org.python.modules.posix.PosixModule;
-import org.python.util.Generic;
-
/** Builtin types that are used to setup PyObject.
*
* Resolve circular dependency with some laziness. */
@@ -84,6 +84,7 @@
throw new StreamCorruptedException("unknown singleton: " + which);
}
}
+
/* Holds the singleton None and Ellipsis objects */
/** The singleton None Python object **/
public final static PyObject None = new PyNone();
@@ -129,7 +130,6 @@
public final static long TPFLAGS_IS_ABSTRACT = 1L << 20;
-
/** A unique object to indicate no conversion is possible
in __tojava__ methods **/
public final static Object NoConversion = new PySingleton("Error");
@@ -222,6 +222,10 @@
return new PyException(Py.IOError, args);
}
+ public static PyException IOError(Constant errno, String filename) {
+ return IOError(errno, Py.fileSystemEncode(filename));
+ }
+
public static PyException IOError(Constant errno, PyObject filename) {
int value = errno.intValue();
PyObject args = new PyTuple(Py.newInteger(value), PosixModule.strerror(value), filename);
@@ -683,6 +687,103 @@
}
}
+ /**
+ * Return a file name or path as Unicode (Java UTF-16 <code>String</code>), decoded if necessary
+ * from a Python <code>bytes</code> object, using the file system encoding. In Jython, this
+ * encoding is UTF-8, irrespective of the OS platform. This method is comparable with Python 3
+ * <code>os.fsdecode</code>, but for Java use, in places such as the <code>os</code> module. If
+ * the argument is not a <code>PyUnicode</code>, it will be decoded using the nominal Jython
+ * file system encoding. If the argument <i>is</i> a <code>PyUnicode</code>, its
+ * <code>String</code> is returned.
+ *
+ * @param filename as <code>bytes</code> to decode, or already as <code>unicode</code>
+ * @return unicode version of path
+ */
+ public static String fileSystemDecode(PyString filename) {
+ String s = filename.getString();
+ if (filename instanceof PyUnicode || CharMatcher.ascii().matchesAllOf(s)) {
+ // Already encoded or usable as ASCII
+ return s;
+ } else {
+ // It's bytes, so must decode properly
+ assert "utf-8".equals(PySystemState.FILE_SYSTEM_ENCODING.toString());
+ return codecs.PyUnicode_DecodeUTF8(s, null);
+ }
+ }
+
+ /**
+ * As {@link #fileSystemDecode(PyString)} but raising <code>ValueError</code> if not a
+ * <code>str</code> or <code>unicode</code>.
+ *
+ * @param filename as <code>bytes</code> to decode, or already as <code>unicode</code>
+ * @return unicode version of the file name
+ */
+ public static String fileSystemDecode(PyObject filename) {
+ if (filename instanceof PyString) {
+ return fileSystemDecode((PyString)filename);
+ } else
+ throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
+ filename.getType().fastGetName()));
+ }
+
+ /**
+ * Return a PyString object we can use as a file name or file path in places where Python
+ * expects a <code>bytes</code> (that is a <code>str</code>) object in the file system encoding.
+ * In Jython, this encoding is UTF-8, irrespective of the OS platform.
+ * <p>
+ * This is subtly different from CPython's use of "file system encoding", which tracks the
+ * platform's choice so that OS services may be called that have a bytes interface. Jython's
+ * interaction with the OS occurs via Java using String arguments representing Unicode values,
+ * so we have no need to match the encoding actually chosen by the platform (e.g. 'mbcs' on
+ * Windows). Rather we need a nominal Jython file system encoding, for use where the standard
+ * library forces byte paths on us (in Python 2). There is no reason for this choice to vary
+ * with OS platform. Methods receiving paths as <code>bytes</code> will
+ * {@link #fileSystemDecode(PyString)} them again for Java.
+ *
+ * @param filename as <code>unicode</code> to encode, or already as <code>bytes</code>
+ * @return encoded bytes version of path
+ */
+ public static PyString fileSystemEncode(String filename) {
+ if (CharMatcher.ascii().matchesAllOf(filename)) {
+ // Just wrap it as US-ASCII is a subset of the file system encoding
+ return Py.newString(filename);
+ } else {
+ // It's non just US-ASCII, so must encode properly
+ assert "utf-8".equals(PySystemState.FILE_SYSTEM_ENCODING.toString());
+ return Py.newString(codecs.PyUnicode_EncodeUTF8(filename, null));
+ }
+ }
+
+ /**
+ * Return a PyString object we can use as a file name or file path in places where Python
+ * expects a <code>bytes</code> (that is, <code>str</code>) object in the file system encoding.
+ * In Jython, this encoding is UTF-8, irrespective of the OS platform. This method is comparable
+ * with Python 3 <code>os.fsencode</code>. If the argument is a PyString, it is returned
+ * unchanged. If the argument is a PyUnicode, it is converted to a <code>bytes</code> using the
+ * nominal Jython file system encoding.
+ *
+ * @param filename as <code>unicode</code> to encode, or already as <code>bytes</code>
+ * @return encoded bytes version of path
+ */
+ public static PyString fileSystemEncode(PyString filename) {
+ return (filename instanceof PyUnicode) ? fileSystemEncode(filename.getString()) : filename;
+ }
+
+ /**
+ * Convert a <code>PyList</code> path to a list of Java <code>String</code> objects decoded from
+ * the path elements to strings guaranteed usable in the Java API.
+ *
+ * @param path a Python search path
+ * @return equivalent Java list
+ */
+ private static List<String> fileSystemDecode(PyList path) {
+ List<String> list = new ArrayList<>(path.__len__());
+ for (PyObject filename : path.getList()) {
+ list.add(fileSystemDecode(filename));
+ }
+ return list;
+ }
+
public static PyStringMap newStringMap() {
// enable lazy bootstrapping (see issue #1671)
if (!PyType.hasBuilder(PyStringMap.class)) {
@@ -1073,11 +1174,11 @@
}
Py.getSystemState().callExitFunc();
}
- //XXX: this needs review to make sure we are cutting out all of the Java
- // exceptions.
+
+ //XXX: this needs review to make sure we are cutting out all of the Java exceptions.
private static String getStackTrace(Throwable javaError) {
- ByteArrayOutputStream buf = new ByteArrayOutputStream();
- javaError.printStackTrace(new PrintStream(buf));
+ CharArrayWriter buf = new CharArrayWriter();
+ javaError.printStackTrace(new PrintWriter(buf));
String str = buf.toString();
int index = -1;
@@ -1170,31 +1271,55 @@
ts.exception = null;
}
- public static void displayException(PyObject type, PyObject value, PyObject tb,
- PyObject file) {
+ /**
+ * Print the description of an exception as a big string. The arguments are closely equivalent
+ * to the tuple returned by Python <code>sys.exc_info</code>, on standard error or a given
+ * byte-oriented file. Compare with Python <code>traceback.print_exception</code>.
+ *
+ * @param type of exception
+ * @param value the exception parameter (second argument to <code>raise</code>)
+ * @param tb traceback of the call stack where the exception originally occurred
+ * @param file to print encoded string to, or null meaning standard error
+ */
+ public static void displayException(PyObject type, PyObject value, PyObject tb, PyObject file) {
+
+ // Output is to standard error, unless a file object has been given.
StdoutWrapper stderr = Py.stderr;
if (file != null) {
stderr = new FixedFileWrapper(file);
}
flushLine();
+ // The creation of the report operates entirely in Java String (to support Unicode).
+ String formattedException = exceptionToString(type, value, tb);
+ stderr.print(formattedException);
+ }
+
+ /**
+ * Format the description of an exception as a big string. The arguments are closely equivalent
+ * to the tuple returned by Python <code>sys.exc_info</code>. Compare with Python
+ * <code>traceback.format_exception</code>.
+ *
+ * @param type of exception
+ * @param value the exception parameter (second argument to <code>raise</code>)
+ * @param tb traceback of the call stack where the exception originally occurred
+ * @return string representation of the traceback and exception
+ */
+ static String exceptionToString(PyObject type, PyObject value, PyObject tb) {
+
+ // Compose the stack dump, syntax error, and actual exception in this buffer:
+ StringBuilder buf;
+
if (tb instanceof PyTraceback) {
- stderr.print(((PyTraceback) tb).dumpStack());
+ buf = new StringBuilder(((PyTraceback)tb).dumpStack());
+ } else {
+ buf = new StringBuilder();
}
+
if (__builtin__.isinstance(value, Py.SyntaxError)) {
- PyObject filename = value.__findattr__("filename");
- PyObject text = value.__findattr__("text");
- PyObject lineno = value.__findattr__("lineno");
- stderr.print(" File \"");
- stderr.print(filename == Py.None || filename == null ?
- "<string>" : filename.toString());
- stderr.print("\", line ");
- stderr.print(lineno == null ? Py.newString("0") : lineno);
- stderr.print("\n");
- if (text != Py.None && text != null && text.__len__() != 0) {
- printSyntaxErrorText(stderr, value.__findattr__("offset").asInt(),
- text.toString());
- }
+ // The value part of the exception is a syntax error: first emit that.
+ appendSyntaxError(buf, value);
+ // Now supersede it with just the syntax error message for the next phase.
value = value.__findattr__("msg");
if (value == null) {
value = Py.None;
@@ -1203,26 +1328,53 @@
if (value.getJavaProxy() != null) {
Object javaError = value.__tojava__(Throwable.class);
-
if (javaError != null && javaError != Py.NoConversion) {
- stderr.println(getStackTrace((Throwable) javaError));
+ // The value is some Java Throwable: append that too
+ buf.append(getStackTrace((Throwable)javaError));
}
}
+
+ // Be prepared for formatting the value part to fail (fall back to just the type)
try {
- stderr.println(formatException(type, value));
+ buf.append(formatException(type, value));
} catch (Exception ex) {
- stderr.println(formatException(type, Py.None));
+ buf.append(formatException(type, Py.None));
+ }
+ buf.append('\n');
+
+ return buf.toString();
+ }
+
+ /**
+ * Helper to {@link #tracebackToString(PyObject, PyObject)} when the value in an exception turns
+ * out to be a syntax error.
+ */
+ private static void appendSyntaxError(StringBuilder buf, PyObject value) {
+
+ PyObject filename = value.__findattr__("filename");
+ PyObject text = value.__findattr__("text");
+ PyObject lineno = value.__findattr__("lineno");
+
+ buf.append(" File \"");
+ buf.append(filename == Py.None || filename == null ? "<string>" : filename.toString());
+ buf.append("\", line ");
+ buf.append(lineno == null ? Py.newString('0') : lineno);
+ buf.append('\n');
+
+ if (text != Py.None && text != null && text.__len__() != 0) {
+ appendSyntaxErrorText(buf, value.__findattr__("offset").asInt(), text.toString());
}
}
+
/**
- * Print the two lines showing where a SyntaxError was caused.
+ * Generate two lines showing where a SyntaxError was caused.
*
- * @param out StdoutWrapper to print to
+ * @param buf to append with generated message text
* @param offset the offset into text
- * @param text a source code String line
+ * @param text a source code line
*/
- private static void printSyntaxErrorText(StdoutWrapper out, int offset, String text) {
+ private static void appendSyntaxErrorText(StringBuilder buf, int offset, String text) {
if (offset >= 0) {
if (offset > 0 && offset == text.length()) {
offset--;
@@ -1250,19 +1402,21 @@
text = text.substring(i, text.length());
}
- out.print(" ");
- out.print(text);
+ buf.append(" ");
+ buf.append(text);
if (text.length() == 0 || !text.endsWith("\n")) {
- out.print("\n");
+ buf.append('\n');
}
if (offset == -1) {
return;
}
- out.print(" ");
+
+ // The indicator line " ^"
+ buf.append(" ");
for (offset--; offset > 0; offset--) {
- out.print(" ");
+ buf.append(' ');
}
- out.print("^\n");
+ buf.append("^\n");
}
public static String formatException(PyObject type, PyObject value) {
@@ -1290,19 +1444,34 @@
}
buf.append(className);
} else {
- buf.append(useRepr ? type.__repr__() : type.__str__());
+ // Never happens since Python 2.7? Do something sensible anyway.
+ buf.append(asMessageString(type, useRepr));
}
+
if (value != null && value != Py.None) {
- // only print colon if the str() of the object is not the empty string
- PyObject s = useRepr ? value.__repr__() : value.__str__();
- if (!(s instanceof PyString) || s.__len__() != 0) {
- buf.append(": ");
+ String s = asMessageString(value, useRepr);
+ // Print colon and object (unless it renders as "")
+ if (s.length() > 0) {
+ buf.append(": ").append(s);
}
- buf.append(s);
}
+
return buf.toString();
}
+ /** Defensive method to avoid exceptions from decoding (or import encodings) */
+ private static String asMessageString(PyObject value, boolean useRepr) {
+ if (useRepr)
+ value = value.__repr__();
+ if (value instanceof PyUnicode) {
+ return value.asString();
+ } else {
+ // Carefully avoid decoding errors that would swallow the intended message
+ String s = value.__str__().getString();
+ return PyString.encode_UnicodeEscape(s, false);
+ }
+ }
+
public static void writeUnraisable(Throwable unraisable, PyObject obj) {
PyException pye = JavaError(unraisable);
stderr.println(String.format("Exception %s in %s ignored",
@@ -1565,6 +1734,16 @@
}
}
+ private static final String IMPORT_SITE_ERROR = ""
+ + "Cannot import site module and its dependencies: %s\n"
+ + "Determine if the following attributes are correct:\n" //
+ + " * sys.path: %s\n"
+ + " This attribute might be including the wrong directories, such as from CPython\n"
+ + " * sys.prefix: %s\n"
+ + " This attribute is set by the system property python.home, although it can\n"
+ + " be often automatically determined by the location of the Jython jar file\n\n"
+ + "You can use the -S option or python.import.site=false to not import the site module";
+
public static boolean importSiteIfSelected() {
if (Options.importSite) {
try {
@@ -1574,18 +1753,10 @@
} catch (PyException pye) {
if (pye.match(Py.ImportError)) {
PySystemState sys = Py.getSystemState();
- throw Py.ImportError(String.format(""
- + "Cannot import site module and its dependencies: %s\n"
- + "Determine if the following attributes are correct:\n"
- + " * sys.path: %s\n"
- + " This attribute might be including the wrong directories, such as from CPython\n"
- + " * sys.prefix: %s\n"
- + " This attribute is set by the system property python.home, although it can\n"
- + " be often automatically determined by the location of the Jython jar file\n\n"
- + "You can use the -S option or python.import.site=false to not import the site module",
- pye.value.__getattr__("args").__getitem__(0),
- sys.path,
- sys.prefix));
+ String value = pye.value.__getattr__("args").__getitem__(0).toString();
+ List<String> path = fileSystemDecode(sys.path);
+ throw Py.ImportError(
+ String.format(IMPORT_SITE_ERROR, value, path, PySystemState.prefix));
} else {
throw pye;
}
@@ -2266,7 +2437,7 @@
}
/* Here we would actually like to call cls.__findattr__("__metaclass__")
* rather than cls.getType(). However there are circumstances where the
- * metaclass doesn't show up as __metaclass__. On the other hand we need
+ * metaclass doesn't show up as __metaclass__. On the other hand we need
* to avoid that checker refers to builtin type___subclasscheck__ or
* type___instancecheck__. Filtering out checker-instances of
* PyBuiltinMethodNarrow does the trick. We also filter out PyMethodDescr
diff --git a/src/org/python/core/PyBaseException.java b/src/org/python/core/PyBaseException.java
--- a/src/org/python/core/PyBaseException.java
+++ b/src/org/python/core/PyBaseException.java
@@ -169,12 +169,17 @@
@ExposedMethod(doc = BuiltinDocs.BaseException___str___doc)
final PyString BaseException___str__() {
switch (args.__len__()) {
- case 0:
- return Py.EmptyString;
- case 1:
- return args.__getitem__(0).__str__();
- default:
- return args.__str__();
+ case 0:
+ return Py.EmptyString;
+ case 1:
+ PyObject arg = args.__getitem__(0);
+ if (arg instanceof PyString) {
+ return (PyString)arg;
+ } else {
+ return arg.__str__();
+ }
+ default:
+ return args.__str__();
}
}
diff --git a/src/org/python/core/PyBytecode.java b/src/org/python/core/PyBytecode.java
--- a/src/org/python/core/PyBytecode.java
+++ b/src/org/python/core/PyBytecode.java
@@ -116,11 +116,13 @@
throw Py.AttributeError(name);
}
+ @Override
public void __setattr__(String name, PyObject value) {
// no writable attributes
throwReadonly(name);
}
+ @Override
public void __delattr__(String name) {
throwReadonly(name);
}
@@ -137,6 +139,7 @@
return new PyTuple(pystr);
}
+ @Override
public PyObject __findattr_ex__(String name) {
// have to craft co_varnames specially
if (name == "co_varnames") {
@@ -149,7 +152,7 @@
return toPyStringTuple(co_freevars);
}
if (name == "co_filename") {
- return new PyString(co_filename);
+ return Py.fileSystemEncode(co_filename); // bytes object expected by clients
}
if (name == "co_name") {
return new PyString(co_name);
@@ -1156,7 +1159,7 @@
"zap" this information, to prevent END_FINALLY from
re-raising the exception. (But non-local gotos
should still be resumed.)
- */
+ */
PyObject exit;
PyObject u = stack.pop(), v, w;
if (u == Py.None) {
@@ -1350,7 +1353,7 @@
if (why != Why.RETURN) {
retval = Py.None;
}
- } else {
+ } else {
// store the stack in the frame for reentry from the yield;
f.f_savedlocals = stack.popN(stack.size());
}
diff --git a/src/org/python/core/PyException.java b/src/org/python/core/PyException.java
--- a/src/org/python/core/PyException.java
+++ b/src/org/python/core/PyException.java
@@ -62,21 +62,31 @@
}
private boolean printingStackTrace = false;
+ @Override
public void printStackTrace() {
Py.printException(this);
}
+ @Override
public Throwable fillInStackTrace() {
return Options.includeJavaStackInExceptions ? super.fillInStackTrace() : this;
}
+ @Override
public synchronized void printStackTrace(PrintStream s) {
if (printingStackTrace) {
super.printStackTrace(s);
} else {
try {
+ /*
+ * Ensure that non-ascii characters are made printable. IOne would prefer to emit
+ * Unicode, but the output stream too often only accepts bytes. (s is not
+ * necessarily a console, e.g. during a doctest.)
+ */
+ PyFile err = new PyFile(s);
+ err.setEncoding("ascii", "backslashreplace");
printingStackTrace = true;
- Py.displayException(type, value, traceback, new PyFile(s));
+ Py.displayException(type, value, traceback, err);
} finally {
printingStackTrace = false;
}
@@ -92,12 +102,9 @@
}
}
+ @Override
public synchronized String toString() {
- ByteArrayOutputStream buf = new ByteArrayOutputStream();
- if (!printingStackTrace) {
- printStackTrace(new PrintStream(buf));
- }
- return buf.toString();
+ return Py.exceptionToString(type, value, traceback);
}
/**
@@ -332,10 +339,11 @@
public static String exceptionClassName(PyObject obj) {
return obj instanceof PyClass ? ((PyClass)obj).__name__ : ((PyType)obj).fastGetName();
}
-
-
+
+
/* Traverseproc support */
+ @Override
public int traverse(Visitproc visit, Object arg) {
int retValue;
if (type != null) {
@@ -357,6 +365,7 @@
return 0;
}
+ @Override
public boolean refersDirectlyTo(PyObject ob) {
return ob != null && (type == ob || value == ob || traceback == ob);
}
diff --git a/src/org/python/core/PyFile.java b/src/org/python/core/PyFile.java
--- a/src/org/python/core/PyFile.java
+++ b/src/org/python/core/PyFile.java
@@ -168,10 +168,6 @@
ArgParser ap = new ArgParser("file", args, kwds, new String[] {"name", "mode", "buffering"},
1);
PyObject name = ap.getPyObject(0);
- if (!(name instanceof PyString)) {
- throw Py.TypeError("coercing to Unicode: need string, '" + name.getType().fastGetName()
- + "' type found");
- }
String mode = ap.getString(1, "r");
int bufsize = ap.getInt(2, -1);
file___init__(new FileIO((PyString) name, parseMode(mode)), name, mode, bufsize);
diff --git a/src/org/python/core/PyNullImporter.java b/src/org/python/core/PyNullImporter.java
--- a/src/org/python/core/PyNullImporter.java
+++ b/src/org/python/core/PyNullImporter.java
@@ -20,7 +20,7 @@
public PyNullImporter(PyObject pathObj) {
super();
- String pathStr = asPath(pathObj);
+ String pathStr = Py.fileSystemDecode(pathObj);
if (pathStr.equals("")) {
throw Py.ImportError("empty pathname");
}
@@ -42,17 +42,6 @@
return Py.None;
}
- // FIXME Refactoring move helper function to a central util library
- // FIXME Also can take in account working in zip file systems
-
- private static String asPath(PyObject pathObj) {
- if (!(pathObj instanceof PyString)) {
- throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
- pathObj.getType().fastGetName()));
- }
- return pathObj.toString();
- }
-
private static boolean isDir(String pathStr) {
if (pathStr.equals("")) {
return false;
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -79,7 +79,7 @@
}
PyString(StringBuilder buffer) {
- this(TYPE, new String(buffer));
+ this(TYPE, buffer.toString());
}
/**
@@ -3998,9 +3998,9 @@
* Implements PEP-3101 {}-formatting methods <code>str.format()</code> and
* <code>unicode.format()</code>. When called with <code>enclosingIterator == null</code>, this
* method takes this object as its formatting string. The method is also called (calls itself)
- * to deal with nested formatting sepecifications. In that case, <code>enclosingIterator</code>
+ * to deal with nested formatting specifications. In that case, <code>enclosingIterator</code>
* is a {@link MarkupIterator} on this object and <code>value</code> is a substring of this
- * object needing recursive transaltion.
+ * object needing recursive translation.
*
* @param args to be interpolated into the string
* @param keywords for the trailing args
diff --git a/src/org/python/core/PySystemState.java b/src/org/python/core/PySystemState.java
--- a/src/org/python/core/PySystemState.java
+++ b/src/org/python/core/PySystemState.java
@@ -82,6 +82,9 @@
public final static PyString float_repr_style = Py.newString("short");
+ /** Nominal Jython file system encoding (as <code>sys.getfilesystemencoding()</code>) */
+ static final PyString FILE_SYSTEM_ENCODING = Py.newString("utf-8");
+
public static boolean py3kwarning = false;
public final static Class flags = Options.class;
@@ -109,12 +112,25 @@
public static PackageManager packageManager;
private static File cachedir;
- private static PyList defaultPath;
- private static PyList defaultArgv;
- private static PyObject defaultExecutable;
+ private static PyList defaultPath; // list of bytes or unicode
+ private static PyList defaultArgv; // list of bytes or unicode
+ private static PyObject defaultExecutable; // bytes or unicode or None
public static Properties registry; // = init_registry();
+ /**
+ * A string giving the site-specific directory prefix where the platform independent Python
+ * files are installed; by default, this is based on the property <code>python.home</code> or
+ * the location of the Jython JAR. The main collection of Python library modules is installed in
+ * the directory <code>prefix/Lib</code>. This object should contain bytes in the file system
+ * encoding for consistency with use in the standard library (see <code>sysconfig.py</code>).
+ */
public static PyObject prefix;
+ /**
+ * A string giving the site-specific directory prefix where the platform-dependent Python files
+ * are installed; by default, this is the same as {@link #exec_prefix}. This object should
+ * contain bytes in the file system encoding for consistency with use in the standard library
+ * (see <code>sysconfig.py</code>).
+ */
public static PyObject exec_prefix = Py.EmptyString;
public static final PyString byteorder = new PyString("big");
@@ -504,7 +520,7 @@
}
public PyObject getfilesystemencoding() {
- return Py.None;
+ return FILE_SYSTEM_ENCODING;
}
@@ -840,10 +856,10 @@
}
}
if (prefix != null) {
- PySystemState.prefix = Py.newString(prefix);
+ PySystemState.prefix = Py.fileSystemEncode(prefix);
}
if (exec_prefix != null) {
- PySystemState.exec_prefix = Py.newString(exec_prefix);
+ PySystemState.exec_prefix = Py.fileSystemEncode(exec_prefix);
}
try {
String jythonpath = System.getenv("JYTHONPATH");
@@ -1155,7 +1171,8 @@
}
cachedir = new File(props.getProperty(PYTHON_CACHEDIR, CACHEDIR_DEFAULT_NAME));
if (!cachedir.isAbsolute()) {
- cachedir = new File(prefix == null ? null : prefix.toString(), cachedir.getPath());
+ String prefixString = prefix == null ? null : Py.fileSystemDecode(prefix);
+ cachedir = new File(prefixString, cachedir.getPath());
}
}
@@ -1174,16 +1191,16 @@
PyList argv = new PyList();
if (args != null) {
for (String arg : args) {
- argv.append(Py.newStringOrUnicode(arg));
+ argv.append(Py.newStringOrUnicode(arg)); // XXX or always newUnicode?
}
}
return argv;
}
/**
- * Determine the default sys.executable value from the registry.
- * If registry is not set (as in standalone jython jar), will use sys.prefix + /bin/jython(.exe) and the file may
- * not exist. Users can create a wrapper in it's place to make it work in embedded environments.
+ * Determine the default sys.executable value from the registry. If registry is not set (as in
+ * standalone jython jar), we will use sys.prefix + /bin/jython(.exe) and the file may not
+ * exist. Users can create a wrapper in it's place to make it work in embedded environments.
* Only if sys.prefix is null, returns Py.None
*
* @param props a Properties registry
@@ -1191,26 +1208,26 @@
*/
private static PyObject initExecutable(Properties props) {
String executable = props.getProperty("python.executable");
- if (executable == null) {
+ File executableFile;
+ if (executable != null) {
+ // The executable from the registry is a Unicode String path
+ executableFile = new File(executable);
+ } else {
if (prefix == null) {
return Py.None;
} else {
- executable = prefix.asString() + File.separator + "bin" + File.separator;
- if (Platform.IS_WINDOWS) {
- executable += "jython.exe";
- } else {
- executable += "jython";
- }
+ // The prefix is a unicode or encoded bytes object
+ executableFile = new File(Py.fileSystemDecode(prefix),
+ Platform.IS_WINDOWS ? "bin\\jython.exe" : "bin/jython");
}
}
- File executableFile = new File(executable);
try {
executableFile = executableFile.getCanonicalFile();
} catch (IOException ioe) {
executableFile = executableFile.getAbsoluteFile();
}
- return new PyString(executableFile.getPath());
+ return Py.newStringOrUnicode(executableFile.getPath()); // XXX always bytes in CPython
}
/**
@@ -1353,8 +1370,8 @@
PyList path = new PyList();
addPaths(path, props.getProperty("python.path", ""));
if (prefix != null) {
- String libpath = new File(prefix.toString(), "Lib").toString();
- path.append(new PyString(libpath));
+ String libpath = new File(Py.fileSystemDecode(prefix), "Lib").toString();
+ path.append(Py.fileSystemEncode(libpath)); // XXX or newUnicode?
}
if (standalone) {
// standalone jython: add the /Lib directory inside JYTHON_JAR to the path
@@ -1397,7 +1414,8 @@
private static void addPaths(PyList path, String pypath) {
StringTokenizer tok = new StringTokenizer(pypath, java.io.File.pathSeparator);
while (tok.hasMoreTokens()) {
- path.append(new PyString(tok.nextToken().trim()));
+ // Use unicode object if necessary to represent the element
+ path.append(Py.newStringOrUnicode(tok.nextToken().trim())); // XXX or newUnicode?
}
}
@@ -1540,6 +1558,7 @@
closer.cleanup();
}
+ @Override
public void close() { cleanup(); }
public static class PySystemStateCloser {
diff --git a/src/org/python/core/PyTableCode.java b/src/org/python/core/PyTableCode.java
--- a/src/org/python/core/PyTableCode.java
+++ b/src/org/python/core/PyTableCode.java
@@ -66,6 +66,7 @@
// co_lnotab, co_stacksize
};
+ @Override
public PyObject __dir__() {
PyString members[] = new PyString[__members__.length];
for (int i = 0; i < __members__.length; i++)
@@ -80,11 +81,13 @@
throw Py.AttributeError(name);
}
+ @Override
public void __setattr__(String name, PyObject value) {
// no writable attributes
throwReadonly(name);
}
+ @Override
public void __delattr__(String name) {
throwReadonly(name);
}
@@ -99,6 +102,7 @@
return new PyTuple(pystr);
}
+ @Override
public PyObject __findattr_ex__(String name) {
// have to craft co_varnames specially
if (name == "co_varnames") {
@@ -111,7 +115,7 @@
return toPyStringTuple(co_freevars);
}
if (name == "co_filename") {
- return new PyString(co_filename);
+ return Py.fileSystemEncode(co_filename); // bytes object expected by clients
}
if (name == "co_name") {
return new PyString(co_name);
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -89,7 +89,7 @@
}
PyUnicode(StringBuilder buffer) {
- this(TYPE, new String(buffer));
+ this(TYPE, buffer.toString());
}
private static StringBuilder fromCodePoints(Iterator<Integer> iter) {
@@ -713,7 +713,7 @@
for (Iterator<Integer> iter = newSubsequenceIterator(start, stop, step); iter.hasNext();) {
buffer.appendCodePoint(iter.next());
}
- return createInstance(new String(buffer));
+ return createInstance(buffer.toString());
}
@ExposedMethod(type = MethodType.CMP, doc = BuiltinDocs.unicode___getslice___doc)
diff --git a/src/org/python/core/SyspathArchive.java b/src/org/python/core/SyspathArchive.java
--- a/src/org/python/core/SyspathArchive.java
+++ b/src/org/python/core/SyspathArchive.java
@@ -4,7 +4,7 @@
import java.util.zip.*;
@Untraversable
-public class SyspathArchive extends PyString {
+public class SyspathArchive extends PyUnicode {
private ZipFile zipFile;
public SyspathArchive(String archiveName) throws IOException {
diff --git a/src/org/python/core/SyspathJavaLoader.java b/src/org/python/core/SyspathJavaLoader.java
--- a/src/org/python/core/SyspathJavaLoader.java
+++ b/src/org/python/core/SyspathJavaLoader.java
@@ -26,20 +26,20 @@
public SyspathJavaLoader(ClassLoader parent) {
super(parent);
}
-
- /**
+
+ /**
* Returns a byte[] with the contents read from an InputStream.
- *
+ *
* The stream is closed after reading the bytes.
- *
- * @param input The input stream
+ *
+ * @param input The input stream
* @param size The number of bytes to read
- *
+ *
* @return an array of byte[size] with the contents read
* */
private byte[] getBytesFromInputStream(InputStream input, int size) {
- try {
+ try {
byte[] buffer = new byte[size];
int nread = 0;
while(nread < size) {
@@ -56,9 +56,9 @@
}
}
}
-
+
private byte[] getBytesFromDir(String dir, String name) {
- try {
+ try {
File file = getFile(dir, name);
if (file == null) {
return null;
@@ -71,7 +71,7 @@
}
}
-
+
private byte[] getBytesFromArchive(SyspathArchive archive, String name) {
String entryname = name.replace('.', SLASH_CHAR) + ".class";
ZipEntry ze = archive.getEntry(entryname);
@@ -79,7 +79,7 @@
return null;
}
try {
- return getBytesFromInputStream(archive.getInputStream(ze),
+ return getBytesFromInputStream(archive.getInputStream(ze),
(int)ze.getSize());
} catch (IOException e) {
return null;
@@ -98,11 +98,11 @@
}
return pkg;
}
-
+
@Override
protected Class<?> findClass(String name) throws ClassNotFoundException {
PySystemState sys = Py.getSystemState();
- ClassLoader sysClassLoader = sys.getClassLoader();
+ ClassLoader sysClassLoader = sys.getClassLoader();
if (sysClassLoader != null) {
// sys.classLoader overrides this class loader!
return sysClassLoader.loadClass(name);
@@ -114,13 +114,10 @@
PyObject entry = replacePathItem(sys, i, path);
if (entry instanceof SyspathArchive) {
SyspathArchive archive = (SyspathArchive)entry;
- buffer = getBytesFromArchive(archive, name);
+ buffer = getBytesFromArchive(archive, name);
} else {
- if (!(entry instanceof PyUnicode)) {
- entry = entry.__str__();
- }
- String dir = entry.toString();
- buffer = getBytesFromDir(dir, name);
+ String dir = Py.fileSystemDecode(entry);
+ buffer = getBytesFromDir(dir, name);
}
if (buffer != null) {
definePackageForClass(name);
@@ -130,7 +127,7 @@
// couldn't find the .class file on sys.path
throw new ClassNotFoundException(name);
}
-
+
@Override
protected URL findResource(String res) {
PySystemState sys = Py.getSystemState();
@@ -157,10 +154,7 @@
}
continue;
}
- if (!(entry instanceof PyUnicode)) {
- entry = entry.__str__();
- }
- String dir = sys.getPath(entry.toString());
+ String dir = sys.getPath(Py.fileSystemDecode(entry));
try {
File resource = new File(dir, res);
if (!resource.exists()) {
@@ -179,7 +173,7 @@
throws IOException
{
List<URL> resources = new ArrayList<URL>();
-
+
PySystemState sys = Py.getSystemState();
res = deslashResource(res);
@@ -204,10 +198,7 @@
}
continue;
}
- if (!(entry instanceof PyUnicode)) {
- entry = entry.__str__();
- }
- String dir = sys.getPath(entry.toString());
+ String dir = sys.getPath(Py.fileSystemDecode(entry));
try {
File resource = new File(dir, res);
if (!resource.exists()) {
@@ -220,7 +211,7 @@
}
return Collections.enumeration(resources);
}
-
+
static PyObject replacePathItem(PySystemState sys, int idx, PyList paths) {
PyObject path = paths.__getitem__(idx);
if (path instanceof SyspathArchive) {
@@ -229,9 +220,9 @@
}
try {
- // this has the side affect of adding the jar to the PackageManager during the
+ // this has the side effect of adding the jar to the PackageManager during the
// initialization of the SyspathArchive
- path = new SyspathArchive(sys.getPath(path.toString()));
+ path = new SyspathArchive(sys.getPath(Py.fileSystemDecode(path)));
} catch (Exception e) {
return path;
}
diff --git a/src/org/python/core/__builtin__.java b/src/org/python/core/__builtin__.java
--- a/src/org/python/core/__builtin__.java
+++ b/src/org/python/core/__builtin__.java
@@ -85,7 +85,7 @@
case 18:
return __builtin__.eval(arg1);
case 19:
- __builtin__.execfile(arg1.asString());
+ __builtin__.execfile(Py.fileSystemDecode(arg1));
return Py.None;
case 23:
return __builtin__.hex(arg1);
@@ -141,7 +141,7 @@
case 18:
return __builtin__.eval(arg1, arg2);
case 19:
- __builtin__.execfile(arg1.asString(), arg2);
+ __builtin__.execfile(Py.fileSystemDecode(arg1), arg2);
return Py.None;
case 20:
return __builtin__.filter(arg1, arg2);
@@ -191,7 +191,7 @@
case 18:
return __builtin__.eval(arg1, arg2, arg3);
case 19:
- __builtin__.execfile(arg1.asString(), arg2, arg3);
+ __builtin__.execfile(Py.fileSystemDecode(arg1), arg2, arg3);
return Py.None;
case 21:
return __builtin__.getattr(arg1, arg2, arg3);
@@ -1629,7 +1629,7 @@
"dont_inherit"},
3);
PyObject source = ap.getPyObject(0);
- String filename = ap.getString(1);
+ String filename = Py.fileSystemDecode(ap.getPyObject(1));
String mode = ap.getString(2);
int flags = ap.getInt(3, 0);
boolean dont_inherit = ap.getPyObject(4, Py.False).__nonzero__();
diff --git a/src/org/python/core/imp.java b/src/org/python/core/imp.java
--- a/src/org/python/core/imp.java
+++ b/src/org/python/core/imp.java
@@ -294,6 +294,7 @@
return compileSource(name, makeStream(file), sourceFilename, mtime);
}
+ /** Remove the last three characters of a file name and add the compiled suffix "$py.class". */
public static String makeCompiledFilename(String filename) {
return filename.substring(0, filename.length() - 3) + "$py.class";
}
@@ -418,7 +419,8 @@
}
if (moduleLocation != null) {
- module.__setattr__("__file__", new PyString(moduleLocation));
+ // Standard library expects __file__ to be encoded bytes
+ module.__setattr__("__file__", Py.fileSystemEncode(moduleLocation));
} else if (module.__findattr__("__file__") == null) {
// Should probably never happen (but maybe with an odd custom builtins, or
// Java Integration)
@@ -543,10 +545,8 @@
return loadFromLoader(loader, moduleName);
}
}
- if (!(p instanceof PyUnicode)) {
- p = p.__str__();
- }
- ret = loadFromSource(sys, name, moduleName, p.toString());
+ // p could be unicode or bytes (in the file system encoding)
+ ret = loadFromSource(sys, name, moduleName, Py.fileSystemDecode(p));
if (ret != null) {
return ret;
}
@@ -606,7 +606,7 @@
// display names are for identification purposes (e.g. __file__): when entry is
// null it forces java.io.File to be a relative path (e.g. foo/bar.py instead of
// /tmp/foo/bar.py)
- String displayDirName = entry.equals("") ? null : entry.toString();
+ String displayDirName = entry.equals("") ? null : entry;
String displaySourceName = new File(new File(displayDirName, name), sourceName).getPath();
String displayCompiledName =
new File(new File(displayDirName, name), compiledName).getPath();
@@ -640,7 +640,7 @@
compiledFile = new File(dirName, compiledName);
} else {
PyModule m = addModule(modName);
- PyObject filename = new PyString(new File(displayDirName, name).getPath());
+ PyObject filename = Py.newStringOrUnicode(new File(displayDirName, name).getPath());
m.__dict__.__setitem__("__path__", new PyList(new PyObject[] {filename}));
}
@@ -928,9 +928,6 @@
}
}
}
- if (name.indexOf(File.separatorChar) != -1) {
- throw Py.ImportError("Import by filename is not supported.");
- }
PyObject modules = Py.getSystemState().modules;
PyObject pkgMod = null;
String pkgName = null;
@@ -974,6 +971,13 @@
return mod;
}
+ /** Defend against attempt to import by filename (withdrawn feature). */
+ private static void checkNotFile(String name){
+ if (name.indexOf(File.separatorChar) != -1) {
+ throw Py.ImportError("Import by filename is not supported.");
+ }
+ }
+
private static void ensureFromList(PyObject mod, PyObject fromlist, String name) {
ensureFromList(mod, fromlist, name, false);
}
@@ -1016,6 +1020,7 @@
* @return an imported module (Java or Python)
*/
public static PyObject importName(String name, boolean top) {
+ checkNotFile(name);
PyUnicode.checkEncoding(name);
ReentrantLock importLock = Py.getSystemState().getImportLock();
importLock.lock();
@@ -1036,6 +1041,7 @@
*/
public static PyObject importName(String name, boolean top, PyObject modDict,
PyObject fromlist, int level) {
+ checkNotFile(name);
PyUnicode.checkEncoding(name);
ReentrantLock importLock = Py.getSystemState().getImportLock();
importLock.lock();
diff --git a/src/org/python/core/io/FileIO.java b/src/org/python/core/io/FileIO.java
--- a/src/org/python/core/io/FileIO.java
+++ b/src/org/python/core/io/FileIO.java
@@ -67,7 +67,7 @@
* @see #FileIO(PyString name, String mode)
*/
public FileIO(String name, String mode) {
- this(Py.newString(name), mode);
+ this(Py.newUnicode(name), mode);
}
/**
@@ -82,7 +82,7 @@
*/
public FileIO(PyString name, String mode) {
parseMode(mode);
- File absPath = new RelativeFile(name.toString());
+ File absPath = new RelativeFile(Py.fileSystemDecode(name));
try {
if ((appending && !(reading || plus)) || (writing && !reading && !plus)) {
diff --git a/src/org/python/core/packagecache/PathPackageManager.java b/src/org/python/core/packagecache/PathPackageManager.java
--- a/src/org/python/core/packagecache/PathPackageManager.java
+++ b/src/org/python/core/packagecache/PathPackageManager.java
@@ -40,12 +40,9 @@
+ name;
for (int i = 0; i < path.__len__(); i++) {
+ // Each entry in the path may be byte-encoded or unicode
PyObject entry = path.pyget(i);
- if (!(entry instanceof PyUnicode)) {
- entry = entry.__str__();
- }
- String dir = entry.toString();
-
+ String dir = Py.fileSystemDecode(entry);
File f = new RelativeFile(dir, child);
try {
if (f.isDirectory() && imp.caseok(f, name)) {
@@ -103,11 +100,8 @@
String child = jpkg.__name__.replace('.', File.separatorChar);
for (int i = 0; i < path.__len__(); i++) {
- PyObject entry = path.pyget(i);
- if (!(entry instanceof PyUnicode)) {
- entry = entry.__str__();
- }
- String dir = entry.toString();
+ // Each entry in the path may be byte-encoded or unicode
+ String dir = Py.fileSystemDecode(path.pyget(i));
if (dir.length() == 0) {
dir = null;
diff --git a/src/org/python/modules/_imp.java b/src/org/python/modules/_imp.java
--- a/src/org/python/modules/_imp.java
+++ b/src/org/python/modules/_imp.java
@@ -68,14 +68,14 @@
* This needs to be consolidated with the code in (@see org.python.core.imp).
*
* @param name module name
- * @param entry a path String
+ * @param entry a path String (Unicode file or directory name)
* @param findingPackage if looking for a package only try to locate __init__
* @return null if no module found otherwise module information
*/
static ModuleInfo findFromSource(String name, String entry, boolean findingPackage,
boolean preferSource) {
String sourceName = "__init__.py";
- String compiledName = makeCompiledFilename(sourceName);
+ String compiledName = imp.makeCompiledFilename(sourceName);
String directoryName = PySystemState.getPathLazy(entry);
// displayDirName is for identification purposes: when null it
// forces java.io.File to be a relative path (e.g. foo/bar.py
@@ -97,7 +97,7 @@
} else {
Py.writeDebug("import", "trying source " + dir.getPath());
sourceName = name + ".py";
- compiledName = makeCompiledFilename(sourceName);
+ compiledName = imp.makeCompiledFilename(sourceName);
sourceFile = new File(directoryName, sourceName);
compiledFile = new File(directoryName, compiledName);
}
@@ -152,8 +152,7 @@
throw Py.TypeError("must be a file-like object");
}
PySystemState sys = Py.getSystemState();
- String compiledFilename =
- makeCompiledFilename(sys.getPath(filename));
+ String compiledFilename = imp.makeCompiledFilename(sys.getPath(filename));
mod = imp.createFromSource(modname.intern(), (InputStream)o,
filename, compiledFilename);
PyObject modules = sys.modules;
@@ -161,15 +160,38 @@
return mod;
}
- public static PyObject load_compiled(String name, String pathname) {
- return load_compiled(name, pathname, new PyFile(pathname, "rb", -1));
- }
-
public static PyObject reload(PyObject module) {
return __builtin__.reload(module);
}
- public static PyObject load_compiled(String name, String pathname, PyObject file) {
+ /**
+ * Return a module with the given <code>name</code>, the result of executing the compiled code
+ * at the given <code>pathname</code>. If this path is a <code>PyUnicode</code>, it is used
+ * exactly; if it is a <code>PyString</code> it is taken to be file-system encoded.
+ *
+ * @param name the module name
+ * @param pathname to the compiled module (becomes <code>__file__</code>)
+ * @return the module called <code>name</code>
+ */
+ public static PyObject load_compiled(String name, PyString pathname) {
+ String _pathname = Py.fileSystemDecode(pathname);
+ return _load_compiled(name, _pathname, new PyFile(_pathname, "rb", -1));
+ }
+
+ /**
+ * Return a module with the given <code>name</code>, the result of executing the compiled code
+ * in the given <code>file</code> stream.
+ *
+ * @param name the module name
+ * @param pathname a file path that is not null (becomes <code>__file__</code>)
+ * @param file stream from which the compiled code is taken
+ * @return the module called <code>name</code>
+ */
+ public static PyObject load_compiled(String name, PyString pathname, PyObject file) {
+ return _load_compiled(name, Py.fileSystemDecode(pathname), file);
+ }
+
+ private static PyObject _load_compiled(String name, String pathname, PyObject file) {
InputStream stream = (InputStream) file.__tojava__(InputStream.class);
if (stream == Py.NoConversion) {
throw Py.TypeError("must be a file-like object");
@@ -190,8 +212,10 @@
public static PyObject find_module(String name, PyObject path) {
if (path == Py.None && PySystemState.getBuiltin(name) != null) {
- return new PyTuple(Py.None, Py.newString(name),
- new PyTuple(Py.EmptyString, Py.EmptyString,
+ return new PyTuple(Py.None,
+ Py.newString(name),
+ new PyTuple(Py.EmptyString,
+ Py.EmptyString,
Py.newInteger(C_BUILTIN)));
}
@@ -199,14 +223,14 @@
path = Py.getSystemState().path;
}
for (PyObject p : path.asIterable()) {
- ModuleInfo mi = findFromSource(name, p.toString(), false, true);
+ ModuleInfo mi = findFromSource(name, Py.fileSystemDecode(p), false, true);
if(mi == null) {
continue;
}
return new PyTuple(mi.file,
- new PyString(mi.filename),
- new PyTuple(new PyString(mi.suffix),
- new PyString(mi.mode),
+ Py.newStringOrUnicode(mi.filename),
+ new PyTuple(Py.newString(mi.suffix),
+ Py.newString(mi.mode),
Py.newInteger(mi.type)));
}
throw Py.ImportError("No module named " + name);
@@ -216,7 +240,8 @@
PyObject mod = Py.None;
PySystemState sys = Py.getSystemState();
int type = data.__getitem__(2).asInt();
- while(mod == Py.None) {
+ String filenameString = Py.fileSystemDecode(filename);
+ while (mod == Py.None) {
String compiledName;
switch (type) {
case PY_SOURCE:
@@ -226,8 +251,8 @@
}
// XXX: This should load the accompanying byte code file instead, if it exists
- String resolvedFilename = sys.getPath(filename.toString());
- compiledName = makeCompiledFilename(resolvedFilename);
+ String resolvedFilename = sys.getPath(filenameString);
+ compiledName = imp.makeCompiledFilename(resolvedFilename);
if (name.endsWith(".__init__")) {
name = name.substring(0, name.length() - ".__init__".length());
} else if (name.equals("__init__")) {
@@ -241,19 +266,20 @@
}
mod = imp.createFromSource(name.intern(), (InputStream)o,
- filename.toString(), compiledName, mtime);
+ filenameString, compiledName, mtime);
break;
case PY_COMPILED:
- mod = load_compiled(name, filename.toString(), file);
+ mod = _load_compiled(name, filenameString, file);
break;
case PKG_DIRECTORY:
PyModule m = imp.addModule(name);
m.__dict__.__setitem__("__path__", new PyList(new PyObject[] {filename}));
m.__dict__.__setitem__("__file__", filename);
- ModuleInfo mi = findFromSource(name, filename.toString(), true, true);
+ ModuleInfo mi = findFromSource(name, filenameString, true, true);
type = mi.type;
file = mi.file;
- filename = new PyString(mi.filename);
+ filenameString = mi.filename;
+ filename = Py.newStringOrUnicode(filenameString);
break;
default:
throw Py.ImportError("No module named " + name);
@@ -264,8 +290,13 @@
return mod;
}
- public static String makeCompiledFilename(String filename) {
- return imp.makeCompiledFilename(filename);
+ /**
+ * Variant of {@link imp#makeCompiledFilename(String)} dealing with encoded bytes. In the context
+ * where this is used from Python, a result in encoded bytes is preferable.
+ */
+ public static PyString makeCompiledFilename(PyString filename) {
+ filename = Py.fileSystemEncode(filename);
+ return Py.newString(imp.makeCompiledFilename(filename.getString()));
}
public static PyObject get_magic() {
diff --git a/src/org/python/modules/_py_compile.java b/src/org/python/modules/_py_compile.java
--- a/src/org/python/modules/_py_compile.java
+++ b/src/org/python/modules/_py_compile.java
@@ -12,22 +12,30 @@
public class _py_compile {
public static PyList __all__ = new PyList(new PyString[] { new PyString("compile") });
- public static boolean compile(String filename, String cfile, String dfile) {
- // Resolve relative path names. dfile is only used for error messages and should not be
- // resolved
+ /**
+ * Java wrapper on the module compiler in support of of py_compile.compile. Filenames here will
+ * be interpreted as Unicode if they are PyUnicode, and as byte-encoded names if they only
+ * PyString.
+ *
+ * @param fileName actual source file name
+ * @param compiledName compiled filename
+ * @param displayName displayed source filename, only used for error messages (and not resolved)
+ * @return true if successful
+ */
+ public static boolean compile(PyString fileName, PyString compiledName, PyString displayName) {
+ // Resolve source path and check it exists
PySystemState sys = Py.getSystemState();
- filename = sys.getPath(filename);
- cfile = sys.getPath(cfile);
+ String file = sys.getPath(Py.fileSystemDecode(fileName));
+ File f = new File(file);
+ if (!f.exists()) {
+ throw Py.IOError(Errno.ENOENT, file);
+ }
- File file = new File(filename);
- if (!file.exists()) {
- throw Py.IOError(Errno.ENOENT, Py.newString(filename));
- }
- String name = getModuleName(file);
-
- byte[] bytes = org.python.core.imp.compileSource(name, file, dfile, cfile);
- org.python.core.imp.cacheCompiledSource(filename, cfile, bytes);
-
+ // Convert file in which to put the byte code and display name (each may be null)
+ String c = (compiledName == null) ? null : sys.getPath(Py.fileSystemDecode(compiledName));
+ String d = (displayName == null) ? null : Py.fileSystemDecode(displayName);
+ byte[] bytes = org.python.core.imp.compileSource(getModuleName(f), f, d, c);
+ org.python.core.imp.cacheCompiledSource(file, c, bytes);
return bytes.length > 0;
}
diff --git a/src/org/python/modules/posix/PosixModule.java b/src/org/python/modules/posix/PosixModule.java
--- a/src/org/python/modules/posix/PosixModule.java
+++ b/src/org/python/modules/posix/PosixModule.java
@@ -486,7 +486,8 @@
"getcwd() -> path\n\n" +
"Return a string representing the current working directory.");
public static PyObject getcwd() {
- return Py.newStringOrUnicode(Py.getSystemState().getCurrentWorkingDir());
+ // The return value is bytes in the file system encoding
+ return Py.fileSystemEncode(Py.getSystemState().getCurrentWorkingDir());
}
public static PyString __doc__getcwdu = new PyString(
@@ -1343,25 +1344,24 @@
return environ;
}
for (Map.Entry<String, String> entry : env.entrySet()) {
+ // The shell restricts names to a subset of ASCII and values are encoded byte strings.
environ.__setitem__(
- Py.newStringOrUnicode(entry.getKey()),
- Py.newStringOrUnicode(entry.getValue()));
+ Py.newString(entry.getKey()),
+ Py.fileSystemEncode(entry.getValue()));
}
return environ;
}
/**
- * Return a path as a String from a PyObject
+ * Return a path as a String from a PyObject, which must be <code>str</code> or
+ * <code>unicode</code>. If the path is a <code>str</code> (that is, <code>bytes</code>), it is
+ * interpreted into Unicode using the file system encoding.
*
* @param path a PyObject, raising a TypeError if an invalid path type
* @return a String path
*/
private static String asPath(PyObject path) {
- if (path instanceof PyString) {
- return path.toString();
- }
- throw Py.TypeError(String.format("coercing to Unicode: need string, %s type found",
- path.getType().fastGetName()));
+ return Py.fileSystemDecode(path);
}
/**
diff --git a/src/org/python/modules/zipimport/zipimporter.java b/src/org/python/modules/zipimport/zipimporter.java
--- a/src/org/python/modules/zipimport/zipimporter.java
+++ b/src/org/python/modules/zipimport/zipimporter.java
@@ -20,6 +20,7 @@
import org.python.core.PySystemState;
import org.python.core.PyTuple;
import org.python.core.PyType;
+import org.python.core.PyUnicode;
import org.python.core.Traverseproc;
import org.python.core.Visitproc;
import org.python.core.util.FileUtil;
@@ -80,7 +81,7 @@
@ExposedMethod
final void zipimporter___init__(PyObject[] args, String[] kwds) {
ArgParser ap = new ArgParser("__init__", args, kwds, new String[] {"path"});
- String path = ap.getString(0);
+ String path = Py.fileSystemDecode(ap.getPyObject(0));
zipimporter___init__(path);
}
@@ -113,10 +114,11 @@
pathFile = parentFile;
}
if (archive != null) {
- files = zipimport._zip_directory_cache.__finditem__(archive);
+ PyUnicode archivePath = Py.newUnicode(archive);
+ files = zipimport._zip_directory_cache.__finditem__(archivePath);
if (files == null) {
files = readDirectory(archive);
- zipimport._zip_directory_cache.__setitem__(archive, files);
+ zipimport._zip_directory_cache.__setitem__(archivePath, files);
}
} else {
throw zipimport.ZipImportError("not a Zip file: " + path);
diff --git a/src/org/python/util/jython.java b/src/org/python/util/jython.java
--- a/src/org/python/util/jython.java
+++ b/src/org/python/util/jython.java
@@ -341,8 +341,8 @@
} else {
try {
interp.globals.__setitem__(new PyString("__file__"),
- new PyString(opts.filename));
-
+ // Note that __file__ is widely expected to be encoded bytes
+ Py.fileSystemEncode(opts.filename));
FileInputStream file;
try {
file = new FileInputStream(new RelativeFile(opts.filename));
diff --git a/src/shell/jython.exe b/src/shell/jython.exe
index 7c9cbe9eec239c5768c17f873726220b09966341..b7500204c603274a6bdb9ec15064bd27f31c14ac
GIT binary patch
[stripped]
diff --git a/src/shell/jython.py b/src/shell/jython.py
--- a/src/shell/jython.py
+++ b/src/shell/jython.py
@@ -20,19 +20,68 @@
is_windows = os.name == "nt" or (os.name == "java" and os._name == "nt")
+# A note about encoding:
+#
+# A major motivation for this program is to launch Jython on Windows, where
+# console and file encoding may be different. Command-line arguments and
+# environment variables are presented in Python 2.7 as byte-data, encoded
+# "somehow". It becomes important to know which decoding to use as soon as
+# paths may contain non-ascii characters. It is not the console encoding.
+# Experiment shows that sys.getfilesystemencoding() is generally applicable
+# to arguments, environment variables and spawning a subprocess.
+#
+# On a Windows 10 box, this comes up with pseudo-codec 'mbcs'. This supports
+# European accented characters pretty well.
+#
+# When localised to Chinese(simplified) the FS encoding mbcs includes many
+# more points than cp936 (the console encoding), although it still struggles
+# with European accented characters.
+
+ENCODING = sys.getfilesystemencoding() or "utf-8"
+
+
+def get_env(envvar, default=None):
+ """ Return the named environment variable, decoded to Unicode."""
+ v = os.environ.get(envvar, default)
+ # Tolerate default given as bytes, as we're bound to forget sometimes
+ if isinstance(v, bytes):
+ v = v.decode(ENCODING)
+ # Remove quotes sometimes necessary around the value
+ if v is not None and v.startswith('"') and v.endswith('"'):
+ v = v[1:-1]
+ return v
+
+def encode_list(args, encoding=ENCODING):
+ """ Convert list of Unicode strings to list of encoded byte strings."""
+ r = []
+ for a in args:
+ if not isinstance(a, bytes): a = a.encode(encoding)
+ r.append(a)
+ return r
+
+def decode_list(args, encoding=ENCODING):
+ """ Convert list of byte strings to list of Unicode strings."""
+ r = []
+ for a in args:
+ if not isinstance(a, unicode): a = a.decode(encoding)
+ r.append(a)
+ return r
def parse_launcher_args(args):
+ """ Process the given argument list into two objects, the first part being
+ a namespace of checked arguments to the interpreter itself, and the rest
+ being the Python program it will run and its arguments.
+ """
class Namespace(object):
pass
parsed = Namespace()
- parsed.java = []
- parsed.properties = OrderedDict()
- parsed.boot = False
- parsed.jdb = False
- parsed.help = False
- parsed.print_requested = False
- parsed.profile = False
- parsed.jdb = None
+ parsed.boot = False # --boot flag given
+ parsed.jdb = False # --jdb flag given
+ parsed.help = False # --help or -h flag given
+ parsed.print_requested = False # --print flag given
+ parsed.profile = False # --profile flag given
+ parsed.properties = OrderedDict() # properties to give the JVM
+ parsed.java = [] # any other arguments to give the JVM
it = iter(args)
next(it) # ignore sys.argv[0]
@@ -42,11 +91,11 @@
arg = next(it)
except StopIteration:
break
- if arg.startswith("-D"):
- k, v = arg[2:].split("=")
+ if arg.startswith(u"-D"):
+ k, v = arg[2:].split(u"=")
parsed.properties[k] = v
i += 1
- elif arg in ("-J-classpath", "-J-cp"):
+ elif arg in (u"-J-classpath", u"-J-cp"):
try:
next_arg = next(it)
except StopIteration:
@@ -55,24 +104,24 @@
bad_option("Bad option for -J-classpath")
parsed.classpath = next_arg
i += 2
- elif arg.startswith("-J-Xmx"):
+ elif arg.startswith(u"-J-Xmx"):
parsed.mem = arg[2:]
i += 1
- elif arg.startswith("-J-Xss"):
+ elif arg.startswith(u"-J-Xss"):
parsed.stack = arg[2:]
i += 1
- elif arg.startswith("-J"):
+ elif arg.startswith(u"-J"):
parsed.java.append(arg[2:])
i += 1
- elif arg == "--print":
+ elif arg == u"--print":
parsed.print_requested = True
i += 1
- elif arg in ("-h", "--help"):
+ elif arg in (u"-h", u"--help"):
parsed.help = True
- elif arg in ("--boot", "--jdb", "--profile"):
+ elif arg in (u"--boot", u"--jdb", u"--profile"):
setattr(parsed, arg[2:], True)
i += 1
- elif arg == "--":
+ elif arg == u"--":
i += 1
break
else:
@@ -92,13 +141,13 @@
if hasattr(self, "_uname"):
return self._uname
if is_windows:
- self._uname = "windows"
+ self._uname = u"windows"
else:
uname = subprocess.check_output(["uname"]).strip().lower()
if uname.startswith("cygwin"):
- self._uname = "cygwin"
+ self._uname = u"cygwin"
else:
- self._uname = uname
+ self._uname = uname.decode(ENCODING)
return self._uname
@property
@@ -114,22 +163,23 @@
return self._java_command
def setup_java_command(self):
+ """ Sets java_home and java_command according to environment and parsed
+ launcher arguments --jdb and --help.
+ """
if self.args.help:
self._java_home = None
- self._java_command = "java"
+ self._java_command = u"java"
return
-
- if "JAVA_HOME" not in os.environ:
- self._java_home = None
- self._java_command = "jdb" if self.args.jdb else "java"
+
+ command = u"jdb" if self.args.jdb else u"java"
+
+ self._java_home = get_env("JAVA_HOME")
+ if self._java_home is None or self.uname == u"cygwin":
+ # Assume java or jdb on the path
+ self._java_command = command
else:
- self._java_home = os.environ["JAVA_HOME"]
- if self.uname == "cygwin":
- self._java_command = "jdb" if self.args.jdb else "java"
- else:
- self._java_command = os.path.join(
- self.java_home, "bin",
- "jdb" if self.args.jdb else "java")
+ # Assume java or jdb in JAVA_HOME/bin
+ self._java_command = os.path.join(self._java_home, u"bin", command)
@property
def executable(self):
@@ -139,28 +189,37 @@
# Modified from
# http://stackoverflow.com/questions/3718657/how-to-properly-determine-current-script-directory-in-python/22881871#22881871
if getattr(sys, "frozen", False): # py2exe, PyInstaller, cx_Freeze
- path = os.path.abspath(sys.executable)
+ # Frozen. Let it go with the executable path.
+ bytes_path = sys.executable
else:
- def inspect_this(): pass
- path = inspect.getabsfile(inspect_this)
- self._executable = os.path.realpath(path)
+ # Not frozen. Any object defined in this file will do.
+ bytes_path = inspect.getfile(JythonCommand)
+ # Python 2 thinks in bytes. Carefully normalise in Unicode.
+ path = os.path.realpath(bytes_path.decode(ENCODING))
+ try:
+ # If possible, make this relative to the CWD.
+ # This helps manage multi-byte names in installation location.
+ path = os.path.relpath(path, os.getcwdu())
+ except ValueError:
+ # Many reasons why this might be impossible: use an absolute path.
+ path = os.path.abspath(path)
+ self._executable = path
return self._executable
@property
def jython_home(self):
if hasattr(self, "_jython_home"):
return self._jython_home
- if "JYTHON_HOME" in os.environ:
- self._jython_home = os.environ["JYTHON_HOME"]
- else:
- self._jython_home = os.path.dirname(os.path.dirname(self.executable))
- if self.uname == "cygwin":
- self._jython_home = subprocess.check_output(["cygpath", "--windows", self._jython_home]).strip()
+ self._jython_home = get_env("JYTHON_HOME") or os.path.dirname(
+ os.path.dirname(self.executable))
+ if self.uname == u"cygwin":
+ # Even on Cygwin, we need a Windows-style path for this
+ home = unicode_subprocess(["cygpath", "--windows", home])
return self._jython_home
@property
def jython_opts():
- return os.environ.get("JYTHON_OPTS", "")
+ return get_env("JYTHON_OPTS", "")
@property
def classpath_delimiter(self):
@@ -179,11 +238,9 @@
else:
jars.append(os.path.join(self.jython_home, "javalib", "*"))
elif not os.path.exists(os.path.join(self.jython_home, "jython.jar")):
- bad_option("""{jython_home} contains neither jython-dev.jar nor jython.jar.
+ bad_option(u"""{} contains neither jython-dev.jar nor jython.jar.
Try running this script from the 'bin' directory of an installed Jython or
-setting {envvar_specifier}JYTHON_HOME.""".format(
- jython_home=self.jython_home,
- envvar_specifier="%" if self.uname == "windows" else "$"))
+setting JYTHON_HOME.""".format(self.jython_home))
else:
jars = [os.path.join(self.jython_home, "jython.jar")]
self._jython_jars = jars
@@ -194,14 +251,14 @@
if hasattr(self.args, "classpath"):
return self.args.classpath
else:
- return os.environ.get("CLASSPATH", ".")
+ return get_env("CLASSPATH", ".")
@property
def java_mem(self):
if hasattr(self.args, "mem"):
return self.args.mem
else:
- return os.environ.get("JAVA_MEM", "-Xmx512m")
+ return get_env("JAVA_MEM", "-Xmx512m")
@property
def java_stack(self):
@@ -213,7 +270,7 @@
@property
def java_opts(self):
return [self.java_mem, self.java_stack]
-
+
@property
def java_profile_agent(self):
return os.path.join(self.jython_home, "javalib", "profile.jar")
@@ -222,68 +279,84 @@
if "JAVA_ENCODING" not in os.environ and self.uname == "darwin" and "file.encoding" not in self.args.properties:
self.args.properties["file.encoding"] = "UTF-8"
- def convert(self, arg):
- if sys.stdout.encoding:
- return arg.encode(sys.stdout.encoding)
- else:
- return arg
-
def make_classpath(self, jars):
return self.classpath_delimiter.join(jars)
def convert_path(self, arg):
- if self.uname == "cygwin":
- if not arg.startswith("/cygdrive/"):
- new_path = self.convert(arg).replace("/", "\\")
+ if self.uname == u"cygwin":
+ if not arg.startswith(u"/cygdrive/"):
+ return arg.replace(u"/", u"\\")
else:
- new_path = subprocess.check_output(["cygpath", "-pw", self.convert(arg)]).strip()
- return new_path
+ arg = arg.replace('*', r'\*') # prevent globbing
+ return unicode_subprocess(["cygpath", "-pw", arg])
else:
- return self.convert(arg)
+ return arg
+
+ def unicode_subprocess(self, unicode_command):
+ """ Launch a command with subprocess.check_output() and read the
+ output, except everything is expected to be in Unicode.
+ """
+ cmd = []
+ for c in unicode_command:
+ if isinstance(c, bytes):
+ cmd.append(c)
+ else:
+ cmd.append(c.encode(ENCODING))
+ return subprocess.check_output(cmd).strip().decode(ENCODING)
@property
def command(self):
+ # Set default file encoding for just for Darwin (?)
self.set_encoding()
+
+ # Begin to build the Java part of the ultimate command
args = [self.java_command]
args.extend(self.java_opts)
args.extend(self.args.java)
+ # Get the class path right (depends on --boot)
classpath = self.java_classpath
jython_jars = self.jython_jars
if self.args.boot:
- args.append("-Xbootclasspath/a:%s" % self.convert_path(self.make_classpath(jython_jars)))
+ args.append(u"-Xbootclasspath/a:%s" % self.convert_path(self.make_classpath(jython_jars)))
else:
classpath = self.make_classpath(jython_jars) + self.classpath_delimiter + classpath
- args.extend(["-classpath", self.convert_path(classpath)])
+ args.extend([u"-classpath", self.convert_path(classpath)])
if "python.home" not in self.args.properties:
- args.append("-Dpython.home=%s" % self.convert_path(self.jython_home))
+ args.append(u"-Dpython.home=%s" % self.convert_path(self.jython_home))
if "python.executable" not in self.args.properties:
- args.append("-Dpython.executable=%s" % self.convert_path(self.executable))
+ args.append(u"-Dpython.executable=%s" % self.convert_path(self.executable))
if "python.launcher.uname" not in self.args.properties:
- args.append("-Dpython.launcher.uname=%s" % self.uname)
- # Determines whether running on a tty for the benefit of
+ args.append(u"-Dpython.launcher.uname=%s" % self.uname)
+
+ # Determine whether running on a tty for the benefit of
# running on Cygwin. This step is needed because the Mintty
# terminal emulator doesn't behave like a standard Microsoft
# Windows tty, and so JNR Posix doesn't detect it properly.
if "python.launcher.tty" not in self.args.properties:
- args.append("-Dpython.launcher.tty=%s" % str(os.isatty(sys.stdin.fileno())).lower())
- if self.uname == "cygwin" and "python.console" not in self.args.properties:
- args.append("-Dpython.console=org.python.core.PlainConsole")
+ args.append(u"-Dpython.launcher.tty=%s" % str(os.isatty(sys.stdin.fileno())).lower())
+ if self.uname == u"cygwin" and "python.console" not in self.args.properties:
+ args.append(u"-Dpython.console=org.python.core.PlainConsole")
+
if self.args.profile:
- args.append("-XX:-UseSplitVerifier")
- args.append("-javaagent:%s" % self.convert_path(self.java_profile_agent))
+ args.append(u"-XX:-UseSplitVerifier")
+ args.append(u"-javaagent:%s" % self.convert_path(self.java_profile_agent))
+
for k, v in self.args.properties.iteritems():
- args.append("-D%s=%s" % (self.convert(k), self.convert(v)))
- args.append("org.python.util.jython")
+ args.append(u"-D%s=%s" % (k, v))
+
+ args.append(u"org.python.util.jython")
+
if self.args.help:
- args.append("--help")
+ args.append(u"--help")
+
args.extend(self.jython_args)
return args
def bad_option(msg):
- print >> sys.stderr, """
+ print >> sys.stderr, u"""
{msg}
usage: jython [option] ... [-c cmd | -m mod | file | -] [arg] ...
Try `jython -h' for more information.
@@ -312,19 +385,24 @@
"""
def support_java_opts(args):
+ """ Generator from options intended for the JVM. Options beginning -D go
+ through unchanged, others are prefixed with -J.
+ """
+ # Input is expected to be Unicode, but just in case ...
+ if isinstance(args, bytes): args = args.decode(ENCODING)
it = iter(args)
while it:
arg = next(it)
- if arg.startswith("-D"):
+ if arg.startswith(u"-D"):
yield arg
- elif arg in ("-classpath", "-cp"):
- yield "-J" + arg
+ elif arg in (u"-classpath", u"-cp"):
+ yield u"-J" + arg
try:
yield next(it)
except StopIteration:
bad_option("Argument expected for -classpath option in JAVA_OPTS")
else:
- yield "-J" + arg
+ yield u"-J" + arg
# copied from subprocess module in Jython; see
@@ -378,37 +456,36 @@
return argv
-
-def decode_args(sys_args):
- args = [sys_args[0]]
-
- def get_env_opts(envvar):
- opts = os.environ.get(envvar, "")
- if is_windows:
- return cmdline2list(opts)
- else:
- return shlex.split(opts)
-
- java_opts = get_env_opts("JAVA_OPTS")
- jython_opts = get_env_opts("JYTHON_OPTS")
-
- args.extend(support_java_opts(java_opts))
- args.extend(sys_args[1:])
-
- if sys.stdout.encoding:
- if sys.stdout.encoding.lower() == "cp65001":
- sys.exit("""Jython does not support code page 65001 (CP_UTF8).
-Please try another code page by setting it with the chcp command.""")
- args = [arg.decode(sys.stdout.encoding) for arg in args]
- jython_opts = [arg.decode(sys.stdout.encoding) for arg in jython_opts]
-
- return args, jython_opts
-
+def get_env_opts(envvar):
+ """ Return a list of the values in the named environment variable,
+ split according to shell conventions, and decoded to Unicode.
+ """
+ opts = os.environ.get(envvar, "") # bytes at this point
+ if is_windows:
+ opts = cmdline2list(opts)
+ else:
+ opts = shlex.split(opts)
+ return decode_list(opts)
def main(sys_args):
- sys_args, jython_opts = decode_args(sys_args)
+ # The entire program must work in Unicode
+ sys_args = decode_list(sys_args)
+
+ # sys_args[0] is this script (which we'll replace with 'java' eventually).
+ # Insert options for the java command from the environment.
+ sys_args[1:1] = support_java_opts(get_env_opts("JAVA_OPTS"))
+
+ # Parse the composite arguments (yes, even the ones from JAVA_OPTS),
+ # and return the "unparsed" tail considered arguments for Jython itself.
args, jython_args = parse_launcher_args(sys_args)
+
+ # Build the data from which we can generate the command ultimately.
+ # Jython options supplied from the environment stand in front of the
+ # unparsed tail from the command line.
+ jython_opts = get_env_opts("JYTHON_OPTS")
jython_command = JythonCommand(args, jython_opts + jython_args)
+
+ # This is the "fully adjusted" command to launch, but still as Unicode.
command = jython_command.command
if args.profile and not args.help:
@@ -416,23 +493,32 @@
os.unlink("profile.txt")
except OSError:
pass
+
if args.print_requested and not args.help:
- if jython_command.uname == "windows":
- print subprocess.list2cmdline(jython_command.command)
+ if jython_command.uname == u"windows":
+ # Add escapes and quotes necessary to Windows.
+ # Normally used for a byte strings but Python is tolerant :)
+ command_line = subprocess.list2cmdline(command)
else:
- print " ".join(pipes.quote(arg) for arg in jython_command.command)
+ # Just concatenate with spaces
+ command_line = u" ".join(command)
+ # It is possible the Unicode cannot be encoded for the console
+ enc = sys.stdout.encoding or 'ascii'
+ sys.stdout.write(command_line.encode(enc, 'replace'))
else:
- if not (is_windows or not hasattr(os, "execvp") or args.help or jython_command.uname == "cygwin"):
+ if not (is_windows or not hasattr(os, "execvp") or args.help or
+ jython_command.uname == u"cygwin"):
# Replace this process with the java process.
#
# NB such replacements actually do not work under Windows,
# but if tried, they also fail very badly by hanging.
# So don't even try!
+ command = encode_list(command)
os.execvp(command[0], command[1:])
else:
result = 1
try:
- result = subprocess.call(command)
+ result = subprocess.call(encode_list(command))
if args.help:
print_help()
except KeyboardInterrupt:
--
Repository URL: https://hg.python.org/jython
More information about the Jython-checkins
mailing list