[Jython-checkins] jython: str character operations become ASCII. Completes fix for #2364.
jeff.allen
jython-checkins at python.org
Fri Sep 11 00:58:40 CEST 2015
https://hg.python.org/jython/rev/a77dad1d7050
changeset: 7729:a77dad1d7050
user: Jeff Allen <ja.py at farowl.co.uk>
date: Thu Sep 10 23:14:16 2015 +0100
summary:
str character operations become ASCII. Completes fix for #2364.
PyString isalpha, islower, isdigit and so on now use character
classification methods from BaseBytes, resulting in a pure ASCII
interpretation. (Possibly leaves some Unicode-ness in other methods.)
Tests are in place for non-byte characters, just in case.
files:
NEWS | 1 +
src/org/python/core/PyString.java | 264 ++++++++---------
2 files changed, 125 insertions(+), 140 deletions(-)
diff --git a/NEWS b/NEWS
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,7 @@
- [ 2158, 2259 ] Fixed behaviour of relative from ... import *
- [ 1879 ] -m command now executes scripts from inside a jar file
- [ 2058 ] ClasspathPyImporter implements PEP 302 get_data (and others)
+ - [ 2364 ] bytearray and str: isalpha(), isupper() etc. now match Python 2
Jython 2.7
same as 2.7rc3
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -6,10 +6,10 @@
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.List;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.util.List;
-import java.util.Locale;
import org.python.core.buffer.BaseBuffer;
import org.python.core.buffer.SimpleStringBuffer;
@@ -2662,8 +2662,8 @@
* Return the (lazily) compiled regular expression for a Python complex number. This is used
* within the regular expression patterns that define a priori acceptable strings in the complex
* constructors. The expression contributes five named capture groups a, b, x, y and j. x and y
- * are the two floats encountered, and if j is present, one of them is the imaginary part.
- * a and b are the optional parentheses. They must either both be present or both omitted.
+ * are the two floats encountered, and if j is present, one of them is the imaginary part. a and
+ * b are the optional parentheses. They must either both be present or both omitted.
*/
private static synchronized Pattern getComplexPattern() {
if (complexPattern == null) {
@@ -3602,77 +3602,99 @@
@ExposedMethod(doc = BuiltinDocs.str_islower_doc)
final boolean str_islower() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
+ String s = getString();
+ int n = s.length();
+
if (n == 1) {
- return Character.isLowerCase(getString().charAt(0));
+ // Special case single character strings.
+ return _islower(s.charAt(0));
}
boolean cased = false;
for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
-
- if (Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
+ char ch = s.charAt(i);
+ if (_isupper(ch)) {
return false;
- } else if (!cased && Character.isLowerCase(ch)) {
+ } else if (!cased && _islower(ch)) {
cased = true;
}
}
return cased;
}
+ private boolean _islower(char ch) {
+ if (ch < 256) {
+ return BaseBytes.islower((byte)ch);
+ } else {
+ // This is an internal error. Really, the test should be unnecessary.
+ throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+ }
+ }
+
public boolean isupper() {
return str_isupper();
}
@ExposedMethod(doc = BuiltinDocs.str_isupper_doc)
final boolean str_isupper() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
+ String s = getString();
+ int n = s.length();
+
if (n == 1) {
- return Character.isUpperCase(getString().charAt(0));
+ // Special case single character strings.
+ return _isupper(s.charAt(0));
}
boolean cased = false;
for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
-
- if (Character.isLowerCase(ch) || Character.isTitleCase(ch)) {
+ char ch = s.charAt(i);
+ if (_islower(ch)) {
return false;
- } else if (!cased && Character.isUpperCase(ch)) {
+ } else if (!cased && _isupper(ch)) {
cased = true;
}
}
return cased;
}
+ private boolean _isupper(char ch) {
+ if (ch < 256) {
+ return BaseBytes.isupper((byte)ch);
+ } else {
+ // This is an internal error. Really, the test should be unnecessary.
+ throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+ }
+ }
+
public boolean isalpha() {
return str_isalpha();
}
@ExposedMethod(doc = BuiltinDocs.str_isalpha_doc)
final boolean str_isalpha() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
+ String s = getString();
+ int n = s.length();
+
if (n == 1) {
- return Character.isLetter(getString().charAt(0));
+ // Special case single character strings.
+ return _isalpha(s.charAt(0));
}
- if (n == 0) {
- return false;
- }
-
for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
-
- if (!Character.isLetter(ch)) {
+ if (!_isalpha(s.charAt(i))) {
return false;
}
}
- return true;
+ return n > 0;
+ }
+
+ private boolean _isalpha(char ch) {
+ if (ch < 256) {
+ return BaseBytes.isalpha((byte)ch);
+ } else {
+ // This is an internal error. Really, the test should be unnecessary.
+ throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+ }
}
public boolean isalnum() {
@@ -3681,33 +3703,30 @@
@ExposedMethod(doc = BuiltinDocs.str_isalnum_doc)
final boolean str_isalnum() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
+ String s = getString();
+ int n = s.length();
+
if (n == 1) {
- return _isalnum(getString().charAt(0));
+ // Special case single character strings.
+ return _isalnum(s.charAt(0));
}
- if (n == 0) {
- return false;
- }
-
for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
-
- if (!_isalnum(ch)) {
+ if (!_isalnum(s.charAt(i))) {
return false;
}
}
- return true;
+ return n > 0;
}
private boolean _isalnum(char ch) {
- // This can ever be entirely compatible with CPython. In CPython
- // The type is not used, the numeric property is determined from
- // the presense of digit, decimal or numeric fields. These fields
- // are not available in exactly the same way in java.
- return Character.isLetterOrDigit(ch) || Character.getType(ch) == Character.LETTER_NUMBER;
+ // This is now entirely compatible with CPython, as long as only bytes are stored.
+ if (ch < 256) {
+ return BaseBytes.isalnum((byte)ch);
+ } else {
+ // This is an internal error. Really, the test should be unnecessary.
+ throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+ }
}
public boolean isdecimal() {
@@ -3715,59 +3734,44 @@
}
@ExposedMethod(doc = BuiltinDocs.unicode_isdecimal_doc)
- final boolean str_isdecimal() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
+ final boolean str_isdecimal() { // XXX this ought not to exist in str (in Python 2)
+ return str_isdigit();
+ }
+
+ private boolean _isdecimal(char ch) {
+ // See the comment in _isalnum. Here it is even worse.
+ return Character.getType(ch) == Character.DECIMAL_DIGIT_NUMBER;
+ }
+
+ public boolean isdigit() {
+ return str_isdigit();
+ }
+
+ @ExposedMethod(doc = BuiltinDocs.str_isdigit_doc)
+ final boolean str_isdigit() {
+ String s = getString();
+ int n = s.length();
+
if (n == 1) {
- char ch = getString().charAt(0);
- return _isdecimal(ch);
+ // Special case single character strings.
+ return _isdigit(s.charAt(0));
}
- if (n == 0) {
- return false;
- }
-
for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
-
- if (!_isdecimal(ch)) {
+ if (!_isdigit(s.charAt(i))) {
return false;
}
}
- return true;
- }
-
- private boolean _isdecimal(char ch) {
- // See the comment in _isalnum. Here it is even worse.
- return Character.getType(ch) == Character.DECIMAL_DIGIT_NUMBER;
- }
-
- public boolean isdigit() {
- return str_isdigit();
- }
-
- @ExposedMethod(doc = BuiltinDocs.str_isdigit_doc)
- final boolean str_isdigit() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
- if (n == 1) {
- return Character.isDigit(getString().charAt(0));
+ return n > 0;
+ }
+
+ private boolean _isdigit(char ch) {
+ if (ch < 256) {
+ return BaseBytes.isdigit((byte)ch);
+ } else {
+ // This is an internal error. Really, the test should be unnecessary.
+ throw new java.lang.IllegalArgumentException("non-byte character in PyString");
}
-
- if (n == 0) {
- return false;
- }
-
- for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
-
- if (!Character.isDigit(ch)) {
- return false;
- }
- }
- return true;
}
public boolean isnumeric() {
@@ -3775,31 +3779,8 @@
}
@ExposedMethod(doc = BuiltinDocs.unicode_isnumeric_doc)
- final boolean str_isnumeric() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
- if (n == 1) {
- return _isnumeric(getString().charAt(0));
- }
-
- if (n == 0) {
- return false;
- }
-
- for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
- if (!_isnumeric(ch)) {
- return false;
- }
- }
- return true;
- }
-
- private boolean _isnumeric(char ch) {
- int type = Character.getType(ch);
- return type == Character.DECIMAL_DIGIT_NUMBER || type == Character.LETTER_NUMBER
- || type == Character.OTHER_NUMBER;
+ final boolean str_isnumeric() { // XXX this ought not to exist in str (in Python 2)
+ return str_isdigit();
}
public boolean istitle() {
@@ -3808,26 +3789,25 @@
@ExposedMethod(doc = BuiltinDocs.str_istitle_doc)
final boolean str_istitle() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
+ String s = getString();
+ int n = s.length();
+
if (n == 1) {
- return Character.isTitleCase(getString().charAt(0))
- || Character.isUpperCase(getString().charAt(0));
+ // Special case single character strings.
+ return _isupper(s.charAt(0));
}
boolean cased = false;
boolean previous_is_cased = false;
for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
-
- if (Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
+ char ch = s.charAt(i);
+ if (_isupper(ch)) {
if (previous_is_cased) {
return false;
}
previous_is_cased = true;
cased = true;
- } else if (Character.isLowerCase(ch)) {
+ } else if (_islower(ch)) {
if (!previous_is_cased) {
return false;
}
@@ -3846,25 +3826,29 @@
@ExposedMethod(doc = BuiltinDocs.str_isspace_doc)
final boolean str_isspace() {
- int n = getString().length();
-
- /* Shortcut for single character strings */
+ String s = getString();
+ int n = s.length();
+
if (n == 1) {
- return Character.isWhitespace(getString().charAt(0));
+ // Special case single character strings.
+ return _isspace(s.charAt(0));
}
- if (n == 0) {
- return false;
- }
-
for (int i = 0; i < n; i++) {
- char ch = getString().charAt(i);
-
- if (!Character.isWhitespace(ch)) {
+ if (!_isspace(s.charAt(i))) {
return false;
}
}
- return true;
+ return n > 0;
+ }
+
+ private boolean _isspace(char ch) {
+ if (ch < 256) {
+ return BaseBytes.isspace((byte)ch);
+ } else {
+ // This is an internal error. Really, the test should be unnecessary.
+ throw new java.lang.IllegalArgumentException("non-byte character in PyString");
+ }
}
public boolean isunicode() {
--
Repository URL: https://hg.python.org/jython
More information about the Jython-checkins
mailing list