[Jython-checkins] jython: str.lower, upper, capitalize, title, swapcase become ASCII.

jeff.allen jython-checkins at python.org
Sat Oct 10 09:58:10 CEST 2015


https://hg.python.org/jython/rev/81d319539b45
changeset:   7749:81d319539b45
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Wed Sep 30 08:34:24 2015 +0100
summary:
  str.lower, upper, capitalize, title, swapcase become ASCII.

This follows up the fix for #2364, to correct the Unicode-ness in other
these methods. Tests added for non-byte characters. Benchmarking of the
code in islation shows it to be the same or faster. Other methods still
need the same treatment (strip, for example).

files:
  Lib/test/test_bytes_jy.py         |   43 +++++++-
  src/org/python/core/PyString.java |  100 +++++++++++++----
  2 files changed, 116 insertions(+), 27 deletions(-)


diff --git a/Lib/test/test_bytes_jy.py b/Lib/test/test_bytes_jy.py
--- a/Lib/test/test_bytes_jy.py
+++ b/Lib/test/test_bytes_jy.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+#
 # Tests against problems we have seen in Jython's implementation of
 # buffer, bytes, bytearray, and memoryview to prevent possible
 # regression as well as integration with Java.
@@ -58,11 +60,11 @@
 
     def checkequal(self, expected, obj, methodname, *args):
         "check that object.method() returns expected result"
-        for B in (bytearray,): # (bytes, bytearray):
+        for B in (bytes, bytearray):
             obj = B(obj)
             realresult = getattr(obj, methodname)()
-            grumble = "%r.%s() returned %s" % (obj, methodname, realresult)
-            self.assertIs(expected, realresult, grumble)
+            grumble = "%r.%s() returned %r" % (obj, methodname, realresult)
+            self.assertEqual(expected, realresult, grumble)
             # print grumble, 'x' if realresult != expected else '.'
 
     LOWER = b'\xe0\xe7\xe9\xff' # Uppercase in Latin-1 but not ascii
@@ -114,6 +116,41 @@
             self.checkequal(True, b'A' + c + b'Titlecased Line', 'istitle')
             self.checkequal(True, b'A ' + c + b' Titlecased Line', 'istitle')
 
+    # The following case-twiddling tests supplement string_tests for
+    # non-ascii examples, using characters that are upper/lower-case
+    # in latin-1 but uncased in ascii.
+
+    def test_upper(self):
+        self.checkequal(b"WAS LOWER:" + self.LOWER,
+                        b"was lower:" + self.LOWER, 'upper')
+
+    def test_lower(self):
+        self.checkequal(b"was upper:" + self.UPPER,
+                        b"WAS UPPER:" + self.UPPER, 'lower')
+
+    def test_capitalize(self):
+        for c in self.LOWER:
+            self.checkequal(c + b"abcde",
+                            c + b"AbCdE", 'capitalize')
+
+    def test_swapcase(self):
+        self.checkequal(b"WAS lower:" + self.LOWER,
+                        b"was LOWER:" + self.LOWER, 'swapcase')
+        self.checkequal(b"was UPPER:" + self.UPPER,
+                        b"WAS upper:" + self.UPPER, 'swapcase')
+
+    def test_title(self):
+        utitle = u"Le Dîner À Étretat"
+        title = utitle.encode('latin-1')
+        lower = utitle.lower().encode('latin-1')
+        upper = utitle.upper().encode('latin-1')
+        # Check we treat an accented character as un-cased (=space)
+        self.checkequal(u"Le DîNer à éTretat".encode('latin-1'),
+                        lower, 'title')
+        self.checkequal(u"Le DÎNer À ÉTretat".encode('latin-1'),
+                        upper, 'title')
+        self.checkequal(u"Le DîNer À ÉTretat".encode('latin-1'),
+                        title, 'title')
 
 def test_main():
     test.test_support.run_unittest(
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -1050,7 +1050,21 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_lower_doc)
     final String str_lower() {
-        return getString().toLowerCase(Locale.ROOT);
+        String s = getString();
+        int n = s.length();
+        if (n == 1) {
+            // Special-case single byte string
+            char c = s.charAt(0);
+            return _isupper(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+        } else {
+            // Copy chars to buffer, converting to lower-case.
+            char[] buf = new char[n];
+            for (int i = 0; i < n; i++) {
+                char c = s.charAt(i);
+                buf[i] = _isupper(c) ? (char)(c ^ SWAP_CASE) : c;
+            }
+            return new String(buf);
+        }
     }
 
     public String upper() {
@@ -1059,7 +1073,21 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_upper_doc)
     final String str_upper() {
-        return getString().toUpperCase(Locale.ROOT);
+        String s = getString();
+        int n = s.length();
+        if (n == 1) {
+            // Special-case single byte string
+            char c = s.charAt(0);
+            return _islower(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+        } else {
+            // Copy chars to buffer, converting to upper-case.
+            char[] buf = new char[n];
+            for (int i = 0; i < n; i++) {
+                char c = s.charAt(i);
+                buf[i] = _islower(c) ? (char)(c ^ SWAP_CASE) : c;
+            }
+            return new String(buf);
+        }
     }
 
     public String title() {
@@ -1070,19 +1098,25 @@
     final String str_title() {
         char[] chars = getString().toCharArray();
         int n = chars.length;
-
         boolean previous_is_cased = false;
         for (int i = 0; i < n; i++) {
             char ch = chars[i];
-            if (previous_is_cased) {
-                chars[i] = Character.toLowerCase(ch);
-            } else {
-                chars[i] = Character.toTitleCase(ch);
-            }
-
-            if (Character.isLowerCase(ch) || Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
+            if (_isalpha(ch)) {
+                if (previous_is_cased) {
+                    // Should be lower case
+                    if (_isupper(ch)) {
+                        chars[i] = (char)(ch ^ SWAP_CASE);
+                    }
+                } else {
+                    // Should be upper case
+                    if (_islower(ch)) {
+                        chars[i] = (char)(ch ^ SWAP_CASE);
+                    }
+                }
+                // And this was a letter
                 previous_is_cased = true;
             } else {
+                // This was not a letter
                 previous_is_cased = false;
             }
         }
@@ -1095,18 +1129,25 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_swapcase_doc)
     final String str_swapcase() {
-        char[] chars = getString().toCharArray();
-        int n = chars.length;
-        for (int i = 0; i < n; i++) {
-            char c = chars[i];
-            if (Character.isUpperCase(c)) {
-                chars[i] = Character.toLowerCase(c);
-            } else if (Character.isLowerCase(c)) {
-                chars[i] = Character.toUpperCase(c);
+        String s = getString();
+        int n = s.length();
+        if (n == 1) {
+            // Special-case single byte string
+            char c = s.charAt(0);
+            return _isalpha(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+        } else {
+            // Copy chars to buffer, converting lower to upper case, upper to lower case.
+            char[] buf = new char[n];
+            for (int i = 0; i < n; i++) {
+                char c = s.charAt(i);
+                buf[i] = _isalpha(c) ? (char)(c ^ SWAP_CASE) : c;
             }
+            return new String(buf);
         }
-        return new String(chars);
-    }
+    }
+
+    // Bit to twiddle (XOR) for lowercase letter to uppercase and vice-versa.
+    private static final int SWAP_CASE = 0x20;
 
     /**
      * Equivalent of Python <code>str.strip()</code> with no argument, meaning strip whitespace. Any
@@ -3071,11 +3112,22 @@
 
     @ExposedMethod(doc = BuiltinDocs.str_capitalize_doc)
     final String str_capitalize() {
-        if (getString().length() == 0) {
-            return getString();
+        String s = getString();
+        int n = s.length();
+        if (n == 0) {
+            return s;
+        } else {
+            char[] buf = new char[n];
+            // At least one byte: if lower convert to upper case.
+            char c = s.charAt(0);
+            buf[0] = _islower(c) ? (char)(c ^ SWAP_CASE) : c;
+            // Copy the rest, converting to lower case.
+            for (int i = 1; i < n; i++) {
+                c = s.charAt(i);
+                buf[i] = _isupper(c) ? (char)(c ^ SWAP_CASE) : c;
+            }
+            return new String(buf);
         }
-        String first = getString().substring(0, 1).toUpperCase();
-        return first.concat(getString().substring(1).toLowerCase());
     }
 
     /**

-- 
Repository URL: https://hg.python.org/jython


More information about the Jython-checkins mailing list