[Jython-checkins] jython: Rework launcher jython.py to allow for non-ascii paths on Windows.
jeff.allen
jython-checkins at python.org
Sun May 21 05:06:50 EDT 2017
https://hg.python.org/jython/rev/977e34a69fda
changeset: 8083:977e34a69fda
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Apr 16 23:31:23 2017 +0100
summary:
Rework launcher jython.py to allow for non-ascii paths on Windows.
The launcher now works internally in Unicode. jython.exe has been regenerated
from it using PyInstaller 3.2.1 in a virtualenv under Python 2.7.13.
test_jython_launcher passes for a user "Épreuve" on Windows and Cygwin as long
as -S (don't import site) is given. Issue #2356 refers.
files:
Lib/test/test_jython_launcher.py | 8 +-
src/shell/jython.exe | Bin
src/shell/jython.py | 314 ++++++++++++------
3 files changed, 203 insertions(+), 119 deletions(-)
diff --git a/Lib/test/test_jython_launcher.py b/Lib/test/test_jython_launcher.py
--- a/Lib/test/test_jython_launcher.py
+++ b/Lib/test/test_jython_launcher.py
@@ -31,7 +31,6 @@
# by the installer
return executable
-
def get_uname():
_uname = None
try:
@@ -49,9 +48,8 @@
class TestLauncher(unittest.TestCase):
-
+
def get_cmdline(self, cmd, env):
-
output = subprocess.check_output(cmd, env=env).rstrip()
if is_windows:
return subprocess._cmdline2list(output)
@@ -76,7 +74,7 @@
k, v = arg[2:].split("=")
props[k] = v
return props
-
+
def test_classpath_env(self):
env = self.get_newenv()
env["CLASSPATH"] = some_jar
@@ -207,7 +205,7 @@
def test_file(self):
self.assertCommand(['test.py'])
-
+
def test_dash(self):
self.assertCommand(['-i'])
diff --git a/src/shell/jython.exe b/src/shell/jython.exe
index 7c9cbe9eec239c5768c17f873726220b09966341..b7500204c603274a6bdb9ec15064bd27f31c14ac
GIT binary patch
[stripped]
diff --git a/src/shell/jython.py b/src/shell/jython.py
--- a/src/shell/jython.py
+++ b/src/shell/jython.py
@@ -20,19 +20,68 @@
is_windows = os.name == "nt" or (os.name == "java" and os._name == "nt")
+# A note about encoding:
+#
+# A major motivation for this program is to launch Jython on Windows, where
+# console and file encoding may be different. Command-line arguments and
+# environment variables are presented in Python 2.7 as byte-data, encoded
+# "somehow". It becomes important to know which decoding to use as soon as
+# paths may contain non-ascii characters. It is not the console encoding.
+# Experiment shows that sys.getfilesystemencoding() is generally applicable
+# to arguments, environment variables and spawning a subprocess.
+#
+# On a Windows 10 box, this comes up with pseudo-codec 'mbcs'. This supports
+# European accented characters pretty well.
+#
+# When localised to Chinese(simplified) the FS encoding mbcs includes many
+# more points than cp936 (the console encoding), although it still struggles
+# with European accented characters.
+
+ENCODING = sys.getfilesystemencoding() or "utf-8"
+
+
+def get_env(envvar, default=None):
+ """ Return the named environment variable, decoded to Unicode."""
+ v = os.environ.get(envvar, default)
+ # Tolerate default given as bytes, as we're bound to forget sometimes
+ if isinstance(v, bytes):
+ v = v.decode(ENCODING)
+ # Remove quotes sometimes necessary around the value
+ if v is not None and v.startswith('"') and v.endswith('"'):
+ v = v[1:-1]
+ return v
+
+def encode_list(args, encoding=ENCODING):
+ """ Convert list of Unicode strings to list of encoded byte strings."""
+ r = []
+ for a in args:
+ if not isinstance(a, bytes): a = a.encode(encoding)
+ r.append(a)
+ return r
+
+def decode_list(args, encoding=ENCODING):
+ """ Convert list of byte strings to list of Unicode strings."""
+ r = []
+ for a in args:
+ if not isinstance(a, unicode): a = a.decode(encoding)
+ r.append(a)
+ return r
def parse_launcher_args(args):
+ """ Process the given argument list into two objects, the first part being
+ a namespace of checked arguments to the interpreter itself, and the rest
+ being the Python program it will run and its arguments.
+ """
class Namespace(object):
pass
parsed = Namespace()
- parsed.java = []
- parsed.properties = OrderedDict()
- parsed.boot = False
- parsed.jdb = False
- parsed.help = False
- parsed.print_requested = False
- parsed.profile = False
- parsed.jdb = None
+ parsed.boot = False # --boot flag given
+ parsed.jdb = False # --jdb flag given
+ parsed.help = False # --help or -h flag given
+ parsed.print_requested = False # --print flag given
+ parsed.profile = False # --profile flag given
+ parsed.properties = OrderedDict() # properties to give the JVM
+ parsed.java = [] # any other arguments to give the JVM
it = iter(args)
next(it) # ignore sys.argv[0]
@@ -42,11 +91,11 @@
arg = next(it)
except StopIteration:
break
- if arg.startswith("-D"):
- k, v = arg[2:].split("=")
+ if arg.startswith(u"-D"):
+ k, v = arg[2:].split(u"=")
parsed.properties[k] = v
i += 1
- elif arg in ("-J-classpath", "-J-cp"):
+ elif arg in (u"-J-classpath", u"-J-cp"):
try:
next_arg = next(it)
except StopIteration:
@@ -55,24 +104,24 @@
bad_option("Bad option for -J-classpath")
parsed.classpath = next_arg
i += 2
- elif arg.startswith("-J-Xmx"):
+ elif arg.startswith(u"-J-Xmx"):
parsed.mem = arg[2:]
i += 1
- elif arg.startswith("-J-Xss"):
+ elif arg.startswith(u"-J-Xss"):
parsed.stack = arg[2:]
i += 1
- elif arg.startswith("-J"):
+ elif arg.startswith(u"-J"):
parsed.java.append(arg[2:])
i += 1
- elif arg == "--print":
+ elif arg == u"--print":
parsed.print_requested = True
i += 1
- elif arg in ("-h", "--help"):
+ elif arg in (u"-h", u"--help"):
parsed.help = True
- elif arg in ("--boot", "--jdb", "--profile"):
+ elif arg in (u"--boot", u"--jdb", u"--profile"):
setattr(parsed, arg[2:], True)
i += 1
- elif arg == "--":
+ elif arg == u"--":
i += 1
break
else:
@@ -92,13 +141,13 @@
if hasattr(self, "_uname"):
return self._uname
if is_windows:
- self._uname = "windows"
+ self._uname = u"windows"
else:
uname = subprocess.check_output(["uname"]).strip().lower()
if uname.startswith("cygwin"):
- self._uname = "cygwin"
+ self._uname = u"cygwin"
else:
- self._uname = uname
+ self._uname = uname.decode(ENCODING)
return self._uname
@property
@@ -114,22 +163,23 @@
return self._java_command
def setup_java_command(self):
+ """ Sets java_home and java_command according to environment and parsed
+ launcher arguments --jdb and --help.
+ """
if self.args.help:
self._java_home = None
- self._java_command = "java"
+ self._java_command = u"java"
return
-
- if "JAVA_HOME" not in os.environ:
- self._java_home = None
- self._java_command = "jdb" if self.args.jdb else "java"
+
+ command = u"jdb" if self.args.jdb else u"java"
+
+ self._java_home = get_env("JAVA_HOME")
+ if self._java_home is None or self.uname == u"cygwin":
+ # Assume java or jdb on the path
+ self._java_command = command
else:
- self._java_home = os.environ["JAVA_HOME"]
- if self.uname == "cygwin":
- self._java_command = "jdb" if self.args.jdb else "java"
- else:
- self._java_command = os.path.join(
- self.java_home, "bin",
- "jdb" if self.args.jdb else "java")
+ # Assume java or jdb in JAVA_HOME/bin
+ self._java_command = os.path.join(self._java_home, u"bin", command)
@property
def executable(self):
@@ -139,28 +189,37 @@
# Modified from
# http://stackoverflow.com/questions/3718657/how-to-properly-determine-current-script-directory-in-python/22881871#22881871
if getattr(sys, "frozen", False): # py2exe, PyInstaller, cx_Freeze
- path = os.path.abspath(sys.executable)
+ # Frozen. Let it go with the executable path.
+ bytes_path = sys.executable
else:
- def inspect_this(): pass
- path = inspect.getabsfile(inspect_this)
- self._executable = os.path.realpath(path)
+ # Not frozen. Any object defined in this file will do.
+ bytes_path = inspect.getfile(JythonCommand)
+ # Python 2 thinks in bytes. Carefully normalise in Unicode.
+ path = os.path.realpath(bytes_path.decode(ENCODING))
+ try:
+ # If possible, make this relative to the CWD.
+ # This helps manage multi-byte names in installation location.
+ path = os.path.relpath(path, os.getcwdu())
+ except ValueError:
+ # Many reasons why this might be impossible: use an absolute path.
+ path = os.path.abspath(path)
+ self._executable = path
return self._executable
@property
def jython_home(self):
if hasattr(self, "_jython_home"):
return self._jython_home
- if "JYTHON_HOME" in os.environ:
- self._jython_home = os.environ["JYTHON_HOME"]
- else:
- self._jython_home = os.path.dirname(os.path.dirname(self.executable))
- if self.uname == "cygwin":
- self._jython_home = subprocess.check_output(["cygpath", "--windows", self._jython_home]).strip()
+ self._jython_home = get_env("JYTHON_HOME") or os.path.dirname(
+ os.path.dirname(self.executable))
+ if self.uname == u"cygwin":
+ # Even on Cygwin, we need a Windows-style path for this
+ home = unicode_subprocess(["cygpath", "--windows", home])
return self._jython_home
@property
def jython_opts():
- return os.environ.get("JYTHON_OPTS", "")
+ return get_env("JYTHON_OPTS", "")
@property
def classpath_delimiter(self):
@@ -179,11 +238,9 @@
else:
jars.append(os.path.join(self.jython_home, "javalib", "*"))
elif not os.path.exists(os.path.join(self.jython_home, "jython.jar")):
- bad_option("""{jython_home} contains neither jython-dev.jar nor jython.jar.
+ bad_option(u"""{} contains neither jython-dev.jar nor jython.jar.
Try running this script from the 'bin' directory of an installed Jython or
-setting {envvar_specifier}JYTHON_HOME.""".format(
- jython_home=self.jython_home,
- envvar_specifier="%" if self.uname == "windows" else "$"))
+setting JYTHON_HOME.""".format(self.jython_home))
else:
jars = [os.path.join(self.jython_home, "jython.jar")]
self._jython_jars = jars
@@ -194,14 +251,14 @@
if hasattr(self.args, "classpath"):
return self.args.classpath
else:
- return os.environ.get("CLASSPATH", ".")
+ return get_env("CLASSPATH", ".")
@property
def java_mem(self):
if hasattr(self.args, "mem"):
return self.args.mem
else:
- return os.environ.get("JAVA_MEM", "-Xmx512m")
+ return get_env("JAVA_MEM", "-Xmx512m")
@property
def java_stack(self):
@@ -213,7 +270,7 @@
@property
def java_opts(self):
return [self.java_mem, self.java_stack]
-
+
@property
def java_profile_agent(self):
return os.path.join(self.jython_home, "javalib", "profile.jar")
@@ -222,68 +279,84 @@
if "JAVA_ENCODING" not in os.environ and self.uname == "darwin" and "file.encoding" not in self.args.properties:
self.args.properties["file.encoding"] = "UTF-8"
- def convert(self, arg):
- if sys.stdout.encoding:
- return arg.encode(sys.stdout.encoding)
- else:
- return arg
-
def make_classpath(self, jars):
return self.classpath_delimiter.join(jars)
def convert_path(self, arg):
- if self.uname == "cygwin":
- if not arg.startswith("/cygdrive/"):
- new_path = self.convert(arg).replace("/", "\\")
+ if self.uname == u"cygwin":
+ if not arg.startswith(u"/cygdrive/"):
+ return arg.replace(u"/", u"\\")
else:
- new_path = subprocess.check_output(["cygpath", "-pw", self.convert(arg)]).strip()
- return new_path
+ arg = arg.replace('*', r'\*') # prevent globbing
+ return unicode_subprocess(["cygpath", "-pw", arg])
else:
- return self.convert(arg)
+ return arg
+
+ def unicode_subprocess(self, unicode_command):
+ """ Launch a command with subprocess.check_output() and read the
+ output, except everything is expected to be in Unicode.
+ """
+ cmd = []
+ for c in unicode_command:
+ if isinstance(c, bytes):
+ cmd.append(c)
+ else:
+ cmd.append(c.encode(ENCODING))
+ return subprocess.check_output(cmd).strip().decode(ENCODING)
@property
def command(self):
+ # Set default file encoding for just for Darwin (?)
self.set_encoding()
+
+ # Begin to build the Java part of the ultimate command
args = [self.java_command]
args.extend(self.java_opts)
args.extend(self.args.java)
+ # Get the class path right (depends on --boot)
classpath = self.java_classpath
jython_jars = self.jython_jars
if self.args.boot:
- args.append("-Xbootclasspath/a:%s" % self.convert_path(self.make_classpath(jython_jars)))
+ args.append(u"-Xbootclasspath/a:%s" % self.convert_path(self.make_classpath(jython_jars)))
else:
classpath = self.make_classpath(jython_jars) + self.classpath_delimiter + classpath
- args.extend(["-classpath", self.convert_path(classpath)])
+ args.extend([u"-classpath", self.convert_path(classpath)])
if "python.home" not in self.args.properties:
- args.append("-Dpython.home=%s" % self.convert_path(self.jython_home))
+ args.append(u"-Dpython.home=%s" % self.convert_path(self.jython_home))
if "python.executable" not in self.args.properties:
- args.append("-Dpython.executable=%s" % self.convert_path(self.executable))
+ args.append(u"-Dpython.executable=%s" % self.convert_path(self.executable))
if "python.launcher.uname" not in self.args.properties:
- args.append("-Dpython.launcher.uname=%s" % self.uname)
- # Determines whether running on a tty for the benefit of
+ args.append(u"-Dpython.launcher.uname=%s" % self.uname)
+
+ # Determine whether running on a tty for the benefit of
# running on Cygwin. This step is needed because the Mintty
# terminal emulator doesn't behave like a standard Microsoft
# Windows tty, and so JNR Posix doesn't detect it properly.
if "python.launcher.tty" not in self.args.properties:
- args.append("-Dpython.launcher.tty=%s" % str(os.isatty(sys.stdin.fileno())).lower())
- if self.uname == "cygwin" and "python.console" not in self.args.properties:
- args.append("-Dpython.console=org.python.core.PlainConsole")
+ args.append(u"-Dpython.launcher.tty=%s" % str(os.isatty(sys.stdin.fileno())).lower())
+ if self.uname == u"cygwin" and "python.console" not in self.args.properties:
+ args.append(u"-Dpython.console=org.python.core.PlainConsole")
+
if self.args.profile:
- args.append("-XX:-UseSplitVerifier")
- args.append("-javaagent:%s" % self.convert_path(self.java_profile_agent))
+ args.append(u"-XX:-UseSplitVerifier")
+ args.append(u"-javaagent:%s" % self.convert_path(self.java_profile_agent))
+
for k, v in self.args.properties.iteritems():
- args.append("-D%s=%s" % (self.convert(k), self.convert(v)))
- args.append("org.python.util.jython")
+ args.append(u"-D%s=%s" % (k, v))
+
+ args.append(u"org.python.util.jython")
+
if self.args.help:
- args.append("--help")
+ args.append(u"--help")
+
args.extend(self.jython_args)
return args
def bad_option(msg):
- print >> sys.stderr, """
+ print >> sys.stderr, u"""
{msg}
usage: jython [option] ... [-c cmd | -m mod | file | -] [arg] ...
Try `jython -h' for more information.
@@ -312,19 +385,24 @@
"""
def support_java_opts(args):
+ """ Generator from options intended for the JVM. Options beginning -D go
+ through unchanged, others are prefixed with -J.
+ """
+ # Input is expected to be Unicode, but just in case ...
+ if isinstance(args, bytes): args = args.decode(ENCODING)
it = iter(args)
while it:
arg = next(it)
- if arg.startswith("-D"):
+ if arg.startswith(u"-D"):
yield arg
- elif arg in ("-classpath", "-cp"):
- yield "-J" + arg
+ elif arg in (u"-classpath", u"-cp"):
+ yield u"-J" + arg
try:
yield next(it)
except StopIteration:
bad_option("Argument expected for -classpath option in JAVA_OPTS")
else:
- yield "-J" + arg
+ yield u"-J" + arg
# copied from subprocess module in Jython; see
@@ -378,37 +456,36 @@
return argv
-
-def decode_args(sys_args):
- args = [sys_args[0]]
-
- def get_env_opts(envvar):
- opts = os.environ.get(envvar, "")
- if is_windows:
- return cmdline2list(opts)
- else:
- return shlex.split(opts)
-
- java_opts = get_env_opts("JAVA_OPTS")
- jython_opts = get_env_opts("JYTHON_OPTS")
-
- args.extend(support_java_opts(java_opts))
- args.extend(sys_args[1:])
-
- if sys.stdout.encoding:
- if sys.stdout.encoding.lower() == "cp65001":
- sys.exit("""Jython does not support code page 65001 (CP_UTF8).
-Please try another code page by setting it with the chcp command.""")
- args = [arg.decode(sys.stdout.encoding) for arg in args]
- jython_opts = [arg.decode(sys.stdout.encoding) for arg in jython_opts]
-
- return args, jython_opts
-
+def get_env_opts(envvar):
+ """ Return a list of the values in the named environment variable,
+ split according to shell conventions, and decoded to Unicode.
+ """
+ opts = os.environ.get(envvar, "") # bytes at this point
+ if is_windows:
+ opts = cmdline2list(opts)
+ else:
+ opts = shlex.split(opts)
+ return decode_list(opts)
def main(sys_args):
- sys_args, jython_opts = decode_args(sys_args)
+ # The entire program must work in Unicode
+ sys_args = decode_list(sys_args)
+
+ # sys_args[0] is this script (which we'll replace with 'java' eventually).
+ # Insert options for the java command from the environment.
+ sys_args[1:1] = support_java_opts(get_env_opts("JAVA_OPTS"))
+
+ # Parse the composite arguments (yes, even the ones from JAVA_OPTS),
+ # and return the "unparsed" tail considered arguments for Jython itself.
args, jython_args = parse_launcher_args(sys_args)
+
+ # Build the data from which we can generate the command ultimately.
+ # Jython options supplied from the environment stand in front of the
+ # unparsed tail from the command line.
+ jython_opts = get_env_opts("JYTHON_OPTS")
jython_command = JythonCommand(args, jython_opts + jython_args)
+
+ # This is the "fully adjusted" command to launch, but still as Unicode.
command = jython_command.command
if args.profile and not args.help:
@@ -416,23 +493,32 @@
os.unlink("profile.txt")
except OSError:
pass
+
if args.print_requested and not args.help:
- if jython_command.uname == "windows":
- print subprocess.list2cmdline(jython_command.command)
+ if jython_command.uname == u"windows":
+ # Add escapes and quotes necessary to Windows.
+ # Normally used for a byte strings but Python is tolerant :)
+ command_line = subprocess.list2cmdline(command)
else:
- print " ".join(pipes.quote(arg) for arg in jython_command.command)
+ # Just concatenate with spaces
+ command_line = u" ".join(command)
+ # It is possible the Unicode cannot be encoded for the console
+ enc = sys.stdout.encoding or 'ascii'
+ sys.stdout.write(command_line.encode(enc, 'replace'))
else:
- if not (is_windows or not hasattr(os, "execvp") or args.help or jython_command.uname == "cygwin"):
+ if not (is_windows or not hasattr(os, "execvp") or args.help or
+ jython_command.uname == u"cygwin"):
# Replace this process with the java process.
#
# NB such replacements actually do not work under Windows,
# but if tried, they also fail very badly by hanging.
# So don't even try!
+ command = encode_list(command)
os.execvp(command[0], command[1:])
else:
result = 1
try:
- result = subprocess.call(command)
+ result = subprocess.call(encode_list(command))
if args.help:
print_help()
except KeyboardInterrupt:
--
Repository URL: https://hg.python.org/jython
More information about the Jython-checkins
mailing list