[Jython-checkins] jython: str.lower, upper, capitalize, title, swapcase become ASCII.
jeff.allen
jython-checkins at python.org
Sat Oct 10 09:58:10 CEST 2015
https://hg.python.org/jython/rev/81d319539b45
changeset: 7749:81d319539b45
user: Jeff Allen <ja.py at farowl.co.uk>
date: Wed Sep 30 08:34:24 2015 +0100
summary:
str.lower, upper, capitalize, title, swapcase become ASCII.
This follows up the fix for #2364, to correct the Unicode-ness in other
these methods. Tests added for non-byte characters. Benchmarking of the
code in islation shows it to be the same or faster. Other methods still
need the same treatment (strip, for example).
files:
Lib/test/test_bytes_jy.py | 43 +++++++-
src/org/python/core/PyString.java | 100 +++++++++++++----
2 files changed, 116 insertions(+), 27 deletions(-)
diff --git a/Lib/test/test_bytes_jy.py b/Lib/test/test_bytes_jy.py
--- a/Lib/test/test_bytes_jy.py
+++ b/Lib/test/test_bytes_jy.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+#
# Tests against problems we have seen in Jython's implementation of
# buffer, bytes, bytearray, and memoryview to prevent possible
# regression as well as integration with Java.
@@ -58,11 +60,11 @@
def checkequal(self, expected, obj, methodname, *args):
"check that object.method() returns expected result"
- for B in (bytearray,): # (bytes, bytearray):
+ for B in (bytes, bytearray):
obj = B(obj)
realresult = getattr(obj, methodname)()
- grumble = "%r.%s() returned %s" % (obj, methodname, realresult)
- self.assertIs(expected, realresult, grumble)
+ grumble = "%r.%s() returned %r" % (obj, methodname, realresult)
+ self.assertEqual(expected, realresult, grumble)
# print grumble, 'x' if realresult != expected else '.'
LOWER = b'\xe0\xe7\xe9\xff' # Uppercase in Latin-1 but not ascii
@@ -114,6 +116,41 @@
self.checkequal(True, b'A' + c + b'Titlecased Line', 'istitle')
self.checkequal(True, b'A ' + c + b' Titlecased Line', 'istitle')
+ # The following case-twiddling tests supplement string_tests for
+ # non-ascii examples, using characters that are upper/lower-case
+ # in latin-1 but uncased in ascii.
+
+ def test_upper(self):
+ self.checkequal(b"WAS LOWER:" + self.LOWER,
+ b"was lower:" + self.LOWER, 'upper')
+
+ def test_lower(self):
+ self.checkequal(b"was upper:" + self.UPPER,
+ b"WAS UPPER:" + self.UPPER, 'lower')
+
+ def test_capitalize(self):
+ for c in self.LOWER:
+ self.checkequal(c + b"abcde",
+ c + b"AbCdE", 'capitalize')
+
+ def test_swapcase(self):
+ self.checkequal(b"WAS lower:" + self.LOWER,
+ b"was LOWER:" + self.LOWER, 'swapcase')
+ self.checkequal(b"was UPPER:" + self.UPPER,
+ b"WAS upper:" + self.UPPER, 'swapcase')
+
+ def test_title(self):
+ utitle = u"Le Dîner À Étretat"
+ title = utitle.encode('latin-1')
+ lower = utitle.lower().encode('latin-1')
+ upper = utitle.upper().encode('latin-1')
+ # Check we treat an accented character as un-cased (=space)
+ self.checkequal(u"Le DîNer à éTretat".encode('latin-1'),
+ lower, 'title')
+ self.checkequal(u"Le DÎNer À ÉTretat".encode('latin-1'),
+ upper, 'title')
+ self.checkequal(u"Le DîNer À ÉTretat".encode('latin-1'),
+ title, 'title')
def test_main():
test.test_support.run_unittest(
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -1050,7 +1050,21 @@
@ExposedMethod(doc = BuiltinDocs.str_lower_doc)
final String str_lower() {
- return getString().toLowerCase(Locale.ROOT);
+ String s = getString();
+ int n = s.length();
+ if (n == 1) {
+ // Special-case single byte string
+ char c = s.charAt(0);
+ return _isupper(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+ } else {
+ // Copy chars to buffer, converting to lower-case.
+ char[] buf = new char[n];
+ for (int i = 0; i < n; i++) {
+ char c = s.charAt(i);
+ buf[i] = _isupper(c) ? (char)(c ^ SWAP_CASE) : c;
+ }
+ return new String(buf);
+ }
}
public String upper() {
@@ -1059,7 +1073,21 @@
@ExposedMethod(doc = BuiltinDocs.str_upper_doc)
final String str_upper() {
- return getString().toUpperCase(Locale.ROOT);
+ String s = getString();
+ int n = s.length();
+ if (n == 1) {
+ // Special-case single byte string
+ char c = s.charAt(0);
+ return _islower(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+ } else {
+ // Copy chars to buffer, converting to upper-case.
+ char[] buf = new char[n];
+ for (int i = 0; i < n; i++) {
+ char c = s.charAt(i);
+ buf[i] = _islower(c) ? (char)(c ^ SWAP_CASE) : c;
+ }
+ return new String(buf);
+ }
}
public String title() {
@@ -1070,19 +1098,25 @@
final String str_title() {
char[] chars = getString().toCharArray();
int n = chars.length;
-
boolean previous_is_cased = false;
for (int i = 0; i < n; i++) {
char ch = chars[i];
- if (previous_is_cased) {
- chars[i] = Character.toLowerCase(ch);
- } else {
- chars[i] = Character.toTitleCase(ch);
- }
-
- if (Character.isLowerCase(ch) || Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
+ if (_isalpha(ch)) {
+ if (previous_is_cased) {
+ // Should be lower case
+ if (_isupper(ch)) {
+ chars[i] = (char)(ch ^ SWAP_CASE);
+ }
+ } else {
+ // Should be upper case
+ if (_islower(ch)) {
+ chars[i] = (char)(ch ^ SWAP_CASE);
+ }
+ }
+ // And this was a letter
previous_is_cased = true;
} else {
+ // This was not a letter
previous_is_cased = false;
}
}
@@ -1095,18 +1129,25 @@
@ExposedMethod(doc = BuiltinDocs.str_swapcase_doc)
final String str_swapcase() {
- char[] chars = getString().toCharArray();
- int n = chars.length;
- for (int i = 0; i < n; i++) {
- char c = chars[i];
- if (Character.isUpperCase(c)) {
- chars[i] = Character.toLowerCase(c);
- } else if (Character.isLowerCase(c)) {
- chars[i] = Character.toUpperCase(c);
+ String s = getString();
+ int n = s.length();
+ if (n == 1) {
+ // Special-case single byte string
+ char c = s.charAt(0);
+ return _isalpha(c) ? String.valueOf((char)(c ^ SWAP_CASE)) : s;
+ } else {
+ // Copy chars to buffer, converting lower to upper case, upper to lower case.
+ char[] buf = new char[n];
+ for (int i = 0; i < n; i++) {
+ char c = s.charAt(i);
+ buf[i] = _isalpha(c) ? (char)(c ^ SWAP_CASE) : c;
}
+ return new String(buf);
}
- return new String(chars);
- }
+ }
+
+ // Bit to twiddle (XOR) for lowercase letter to uppercase and vice-versa.
+ private static final int SWAP_CASE = 0x20;
/**
* Equivalent of Python <code>str.strip()</code> with no argument, meaning strip whitespace. Any
@@ -3071,11 +3112,22 @@
@ExposedMethod(doc = BuiltinDocs.str_capitalize_doc)
final String str_capitalize() {
- if (getString().length() == 0) {
- return getString();
+ String s = getString();
+ int n = s.length();
+ if (n == 0) {
+ return s;
+ } else {
+ char[] buf = new char[n];
+ // At least one byte: if lower convert to upper case.
+ char c = s.charAt(0);
+ buf[0] = _islower(c) ? (char)(c ^ SWAP_CASE) : c;
+ // Copy the rest, converting to lower case.
+ for (int i = 1; i < n; i++) {
+ c = s.charAt(i);
+ buf[i] = _isupper(c) ? (char)(c ^ SWAP_CASE) : c;
+ }
+ return new String(buf);
}
- String first = getString().substring(0, 1).toUpperCase();
- return first.concat(getString().substring(1).toLowerCase());
}
/**
--
Repository URL: https://hg.python.org/jython
More information about the Jython-checkins
mailing list