[Jython-checkins] jython: Improved locale support, especially for Turkish

jim.baker jython-checkins at python.org
Fri Jan 23 05:31:16 CET 2015


https://hg.python.org/jython/rev/5c60689c2819
changeset:   7556:5c60689c2819
user:        Jim Baker <jim.baker at rackspace.com>
date:        Thu Jan 22 21:31:05 2015 -0700
summary:
  Improved locale support, especially for Turkish

Fixes http://bugs.jython.org/issue1874 and
http://bugs.jython.org/issue2261.

See also http://bugs.python.org/issue17252#msg182519 for CPython 3;
for unicode strings, Jython follows Java's correct support for Turkish
lowercasing and uppercasing of u'i'/u'I' in a Turkish locale (CPython
3 does not have yet locale aware Unicode support).

files:
  Lib/test/test_os_jy.py                |  69 ++++++++++++++-
  src/org/python/core/PyString.java     |   5 +-
  src/org/python/core/PyUnicode.java    |   4 +-
  src/org/python/modules/posix/OS.java  |   3 +-
  src/org/python/modules/time/Time.java |   2 +-
  5 files changed, 75 insertions(+), 8 deletions(-)


diff --git a/Lib/test/test_os_jy.py b/Lib/test/test_os_jy.py
--- a/Lib/test/test_os_jy.py
+++ b/Lib/test/test_os_jy.py
@@ -128,7 +128,7 @@
         # lacks buffer api:
         self.assertRaises(TypeError, self.do_write, 1.5, 4)
 
-class OSUnicodeTestCase(unittest.TestCase):
+class UnicodeTestCase(unittest.TestCase):
 
     def test_env(self):
         with test_support.temp_cwd(name=u"tempcwd-中文"):
@@ -191,6 +191,70 @@
                 self.assertTrue(f.exists(), "File %r (%r) should be testable for existence" % (
                     f, entry_path))
 
+class LocaleTestCase(unittest.TestCase):
+
+    def get_installed_locales(self, codes, msg=None):
+        def normalize(code):
+            # OS X and Ubuntu (at the very least) differ slightly in locale code formatting
+            return code.strip().replace("-", "").lower()
+
+        try:
+            installed_codes = dict(((normalize(code), code) for 
+                                    code in subprocess.check_output(["locale", "-a"]).split()))
+        except subprocess.CalledProcessError:
+            unittest.skip("locale command not available, cannot test")
+
+        if msg is None:
+            msg = "One of %s tested locales is not installed" % (codes,)
+        available_codes = []
+        for code in codes:
+            if normalize(code) in installed_codes:
+                available_codes.append(installed_codes[normalize(code)])
+        unittest.skipUnless(available_codes, msg)
+        return available_codes
+
+    # must be on posix and turkish locale supported
+    def test_turkish_locale_posix_module(self):
+        # Verifies fix of http://bugs.jython.org/issue1874
+        self.get_installed_locales(["tr_TR.UTF-8"], "Turkish locale not installed, cannot test")
+        newenv = os.environ.copy()
+        newenv["LC_ALL"] = "tr_TR.UTF-8"  # set to Turkish locale
+        self.assertEqual(
+            subprocess.check_output(
+                [sys.executable, "-c",
+                 "import sys; assert 'posix' in sys.builtin_module_names"],
+                env=newenv),
+            "")
+
+    def test_turkish_locale_string_lower_upper(self):
+        # Verifies fix of http://bugs.jython.org/issue1874
+        self.get_installed_locales(["tr_TR.UTF-8"], "Turkish locale not installed, cannot test")
+        newenv = os.environ.copy()
+        newenv["LC_ALL"] = "tr_TR.UTF-8"  # set to Turkish locale
+        self.assertEqual(
+            subprocess.check_output(
+                [sys.executable, "-c",
+                 'print repr(["I".lower(), u"I".lower(), "i".upper(), u"i".upper()])'],
+                env=newenv),
+            # Should not convert str for 'i'/'I', but should convert
+            # unicode if in Turkish locale; this behavior intentionally is
+            # different than CPython; see also http://bugs.python.org/issue17252
+            "['i', u'\\u0131', 'I', u'\\u0130']\n")
+
+    def test_strptime_locale(self):
+        # Verifies fix of http://bugs.jython.org/issue2261
+        newenv = os.environ.copy()
+        codes = [
+            "cs_CZ.UTF-8", "pl_PL.UTF-8", "ru_RU.UTF-8",
+            "sk_SK.UTF-8", "uk_UA.UTF-8", "zh_CN.UTF-8"]
+        for code in self.get_installed_locales(codes):
+            newenv["LC_ALL"] = code
+            self.assertEqual(
+                subprocess.check_output(
+                    [sys.executable, "-c",
+                     'import datetime; print(datetime.datetime.strptime("2015-01-22", "%Y-%m-%d"))'],
+                    env=newenv),
+                "2015-01-22 00:00:00\n")
 
 
 def test_main():
@@ -199,7 +263,8 @@
         OSDirTestCase,
         OSStatTestCase,
         OSWriteTestCase,
-        OSUnicodeTestCase
+        UnicodeTestCase,
+        LocaleTestCase,
     )
 
 if __name__ == '__main__':
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -9,6 +9,7 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.List;
+import java.util.Locale;
 
 import org.python.core.buffer.BaseBuffer;
 import org.python.core.buffer.SimpleStringBuffer;
@@ -1048,7 +1049,7 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_lower_doc)
     final String str_lower() {
-        return getString().toLowerCase();
+        return getString().toLowerCase(Locale.ENGLISH);
     }
 
     public String upper() {
@@ -1057,7 +1058,7 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_upper_doc)
     final String str_upper() {
-        return getString().toUpperCase();
+        return getString().toUpperCase(Locale.ENGLISH);
     }
 
     public String title() {
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -936,12 +936,12 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_lower_doc)
     final PyObject unicode_lower() {
-        return new PyUnicode(str_lower());
+        return new PyUnicode(getString().toLowerCase());
     }
 
     @ExposedMethod(doc = BuiltinDocs.unicode_upper_doc)
     final PyObject unicode_upper() {
-        return new PyUnicode(str_upper());
+        return new PyUnicode(getString().toUpperCase());
     }
 
     @ExposedMethod(doc = BuiltinDocs.unicode_title_doc)
diff --git a/src/org/python/modules/posix/OS.java b/src/org/python/modules/posix/OS.java
--- a/src/org/python/modules/posix/OS.java
+++ b/src/org/python/modules/posix/OS.java
@@ -1,6 +1,7 @@
 /* Copyright (c) Jython Developers */
 package org.python.modules.posix;
 
+import java.util.Locale;
 import org.python.core.PySystemState;
 
 /**
@@ -32,7 +33,7 @@
     }
 
     String getModuleName() {
-        return name().toLowerCase();
+        return name().toLowerCase(Locale.ENGLISH);
     }
 
     String[][] getShellCommands() {
diff --git a/src/org/python/modules/time/Time.java b/src/org/python/modules/time/Time.java
--- a/src/org/python/modules/time/Time.java
+++ b/src/org/python/modules/time/Time.java
@@ -678,7 +678,7 @@
         //        os.environ)
         //
         // TODO:  Check how CPython deals with this problem.
-        return new PyString(s);
+        return Py.newStringOrUnicode(s);
     }
 
 

-- 
Repository URL: https://hg.python.org/jython


More information about the Jython-checkins mailing list