[Jython-checkins] jython: buffer() support in (str|unicode).startswith, .endswith
jeff.allen
jython-checkins at python.org
Sun Oct 27 23:49:39 CET 2013
http://hg.python.org/jython/rev/3d730e5f7b65
changeset: 7145:3d730e5f7b65
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Oct 27 17:00:06 2013 +0000
summary:
buffer() support in (str|unicode).startswith, .endswith
Methods still only work for basic plane. Tests added to string_tests.py for
buffer arguments.
files:
Lib/test/string_tests.py | 31 ++
src/org/python/core/PyString.java | 178 ++++++++++++----
src/org/python/core/PyUnicode.java | 6 +-
3 files changed, 161 insertions(+), 54 deletions(-)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1026,6 +1026,23 @@
self.checkraises(TypeError, 'hello', 'startswith', (42,))
+ # Repeat some tests including buffer API objects (Jython addition)
+ if test_support.is_jython:
+ for buftype in (buffer, memoryview, bytearray):
+ self.checkequal(True, 'hello', 'startswith', buftype('he'))
+ self.checkequal(True, 'hello', 'startswith', buftype(''))
+ self.checkequal(False, 'hello', 'startswith', buftype('ello'))
+ self.checkequal(True, 'hello', 'startswith', buftype('ello'), 1)
+ self.checkequal(True, 'helloworld', 'startswith', buftype('lowo'), 3, 7)
+ self.checkequal(True, 'hello', 'startswith', buftype('he'), 0, -1)
+ self.checkequal(True, 'hello', 'startswith', buftype('ello'), -4)
+ self.checkequal(True, 'hello', 'startswith', buftype('o'), -1)
+ self.checkequal(True, 'hello', 'startswith', (buftype('he'), 'ha'))
+ self.checkequal(True, 'helloworld', 'startswith', (buftype('hellowo'),
+ 'rld', buftype('lowo')), 3)
+ self.checkequal(True, 'hello', 'startswith', ('lo', buftype('he')), 0, -1)
+ self.checkequal(True, 'hello', 'startswith', (buftype('he'), 'hel'), 0, 2)
+
def test_endswith(self):
self.checkequal(True, 'hello', 'endswith', 'lo')
self.checkequal(False, 'hello', 'endswith', 'he')
@@ -1075,6 +1092,20 @@
self.checkraises(TypeError, 'hello', 'endswith', (42,))
+ # Repeat some tests including buffer API objects (Jython addition)
+ if test_support.is_jython:
+ for buftype in (buffer, memoryview, bytearray):
+ self.checkequal(True, 'hello', 'endswith', buftype('lo'))
+ self.checkequal(False, 'hello', 'endswith', buftype('he'))
+ self.checkequal(True, 'hello', 'endswith', buftype(''))
+ self.checkequal(True, 'helloworld', 'endswith', buftype('worl'), 3, 9)
+ self.checkequal(True, 'helloworld', 'endswith', buftype('worl'), -5, -1)
+ self.checkequal(True, 'hello', 'endswith', (buftype('lo'), buftype('llo')))
+ self.checkequal(True, 'helloworld', 'endswith', ('hellowo',
+ buftype('rld'), buftype('lowo')), 3)
+ self.checkequal(True, 'hello', 'endswith', ('hell', buftype('ell')), 0, -1)
+ self.checkequal(True, 'hello', 'endswith', ('he', buftype('hell')), 0, 4)
+
def test___contains__(self):
self.checkequal(True, '', '__contains__', '')
self.checkequal(True, 'abc', '__contains__', '')
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -719,7 +719,7 @@
* be omitted, showing up here as null) to indicate that the criterion is whitespace. They also
* accept a unicode argument, not dealt with here.
*
- * @param obj to coerce to a String or nullk
+ * @param obj to coerce to a String or null
* @param name of method
* @return coerced value or null
* @throws PyException if the coercion fails
@@ -739,6 +739,27 @@
}
}
+ /**
+ * Return a String equivalent to the argument according to the calling conventions of the
+ * certain methods of <code>str</code>. Those methods accept anything bearing the buffer
+ * interface as a byte string, or accept a unicode argument for which they accept responsibility
+ * to interpret from its UTF16 encoded form (the internal representation returned by
+ * {@link PyUnicode#getString()}).
+ *
+ * @param obj to coerce to a String
+ * @return coerced value
+ * @throws PyException if the coercion fails
+ */
+ private static String asBMPStringOrError(PyObject obj) {
+ // PyUnicode accepted here. Care required in the client if obj is not basic plane.
+ String ret = asStringOrNull(obj);
+ if (ret != null) {
+ return ret;
+ } else {
+ throw Py.TypeError("expected str, bytearray, unicode or buffer compatible object");
+ }
+ }
+
@Override
public boolean __contains__(PyObject o) {
return str___contains__(o);
@@ -2309,7 +2330,7 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_count(subObj, start, end);
} else {
- // It ought to be None, null, some kind of bytes with the buffer API.
+ // It ought to be some kind of bytes with the buffer API.
String sub = asStringOrError(subObj);
return _count(sub, start, end);
}
@@ -2455,7 +2476,7 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_find(subObj, start, end);
} else {
- // It ought to be None, null, some kind of bytes with the buffer API.
+ // It ought to be some kind of bytes with the buffer API.
String sub = asStringOrError(subObj);
return _find(sub, start, end);
}
@@ -2541,7 +2562,7 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_rfind(subObj, start, end);
} else {
- // It ought to be None, null, some kind of bytes with the buffer API.
+ // It ought to be some kind of bytes with the buffer API.
String sub = asStringOrError(subObj);
return _rfind(sub, start, end);
}
@@ -3075,92 +3096,145 @@
return new PyUnicode(buf.toString());
}
+ /**
+ * Equivalent to the Python <code>str.startswith</code> method testing whether a string starts
+ * with a specified prefix. <code>prefix</code> can also be a tuple of prefixes to look for.
+ *
+ * @param prefix string to check for (or a <code>PyTuple</code> of them).
+ * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+ * <code>false</code>.
+ */
public boolean startswith(PyObject prefix) {
return str_startswith(prefix, null, null);
}
+ /**
+ * Equivalent to the Python <code>str.startswith</code> method, testing whether a string starts
+ * with a specified prefix, where a sub-range is specified by <code>[start:]</code>.
+ * <code>start</code> is interpreted as in slice notation, with null or {@link Py#None}
+ * representing "missing". <code>prefix</code> can also be a tuple of prefixes to look for.
+ *
+ * @param prefix string to check for (or a <code>PyTuple</code> of them).
+ * @param start start of slice.
+ * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+ * <code>false</code>.
+ */
public boolean startswith(PyObject prefix, PyObject offset) {
return str_startswith(prefix, offset, null);
}
+ /**
+ * Equivalent to the Python <code>str.startswith</code> method, testing whether a string starts
+ * with a specified prefix, where a sub-range is specified by <code>[start:end]</code>.
+ * Arguments <code>start</code> and <code>end</code> are interpreted as in slice notation, with
+ * null or {@link Py#None} representing "missing". <code>prefix</code> can also be a tuple of
+ * prefixes to look for.
+ *
+ * @param prefix string to check for (or a <code>PyTuple</code> of them).
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+ * <code>false</code>.
+ */
public boolean startswith(PyObject prefix, PyObject start, PyObject end) {
return str_startswith(prefix, start, end);
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_startswith_doc)
- final boolean str_startswith(PyObject prefix, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- int[] indices = translateIndices(start, end);
-
- if (prefix instanceof PyString) {
- String strPrefix = ((PyString)prefix).getString();
- if (indices[1] - indices[0] < strPrefix.length()) {
- return false;
- }
-
- return getString().startsWith(strPrefix, indices[0]);
- } else if (prefix instanceof PyTuple) {
- PyObject[] prefixes = ((PyTuple)prefix).getArray();
-
- for (int i = 0; i < prefixes.length; i++) {
- if (!(prefixes[i] instanceof PyString)) {
- throw Py.TypeError("expected a character buffer object");
- }
-
- String strPrefix = ((PyString)prefixes[i]).getString();
- if (indices[1] - indices[0] < strPrefix.length()) {
- continue;
- }
-
- if (getString().startsWith(strPrefix, indices[0])) {
+ final boolean str_startswith(PyObject prefix, PyObject startObj, PyObject endObj) {
+ int[] indices = translateIndices(startObj, endObj);
+ int start = indices[0];
+ int sliceLen = indices[1] - start;
+
+ if (!(prefix instanceof PyTuple)) {
+ // It ought to be PyUnicode or some kind of bytes with the buffer API.
+ String s = asBMPStringOrError(prefix);
+ // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+ return sliceLen >= s.length() && getString().startsWith(s, start);
+
+ } else {
+ // Loop will return true if this slice starts with any prefix in the tuple
+ for (PyObject prefixObj : ((PyTuple)prefix).getArray()) {
+ // It ought to be PyUnicode or some kind of bytes with the buffer API.
+ String s = asBMPStringOrError(prefixObj);
+ // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+ if (sliceLen >= s.length() && getString().startsWith(s, start)) {
return true;
}
}
+ // None matched
return false;
- } else {
- throw Py.TypeError("expected a character buffer object or tuple");
}
}
+ /**
+ * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+ * a specified suffix. <code>suffix</code> can also be a tuple of suffixes to look for.
+ *
+ * @param suffix string to check for (or a <code>PyTuple</code> of them).
+ * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+ * <code>false</code>.
+ */
public boolean endswith(PyObject suffix) {
return str_endswith(suffix, null, null);
}
+ /**
+ * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+ * a specified suffix, where a sub-range is specified by <code>[start:]</code>.
+ * <code>start</code> is interpreted as in slice notation, with null or {@link Py#None}
+ * representing "missing". <code>suffix</code> can also be a tuple of suffixes to look for.
+ *
+ * @param suffix string to check for (or a <code>PyTuple</code> of them).
+ * @param start start of slice.
+ * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+ * <code>false</code>.
+ */
public boolean endswith(PyObject suffix, PyObject start) {
return str_endswith(suffix, start, null);
}
+ /**
+ * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+ * a specified suffix, where a sub-range is specified by <code>[start:end]</code>. Arguments
+ * <code>start</code> and <code>end</code> are interpreted as in slice notation, with null or
+ * {@link Py#None} representing "missing". <code>suffix</code> can also be a tuple of suffixes
+ * to look for.
+ *
+ * @param suffix string to check for (or a <code>PyTuple</code> of them).
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+ * <code>false</code>.
+ */
public boolean endswith(PyObject suffix, PyObject start, PyObject end) {
return str_endswith(suffix, start, end);
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_endswith_doc)
- final boolean str_endswith(PyObject suffix, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- int[] indices = translateIndices(start, end);
-
+ final boolean str_endswith(PyObject suffix, PyObject startObj, PyObject endObj) {
+
+ int[] indices = translateIndices(startObj, endObj);
String substr = getString().substring(indices[0], indices[1]);
- if (suffix instanceof PyString) {
- return substr.endsWith(((PyString)suffix).getString());
- } else if (suffix instanceof PyTuple) {
- PyObject[] suffixes = ((PyTuple)suffix).getArray();
-
- for (int i = 0; i < suffixes.length; i++) {
- if (!(suffixes[i] instanceof PyString)) {
- throw Py.TypeError("expected a character buffer object");
- }
-
- if (substr.endsWith(((PyString)suffixes[i]).getString())) {
+
+ if (!(suffix instanceof PyTuple)) {
+ // It ought to be PyUnicode or some kind of bytes with the buffer API.
+ String s = asBMPStringOrError(suffix);
+ // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+ return substr.endsWith(s);
+
+ } else {
+ // Loop will return true if this slice ends with any suffix in the tuple
+ for (PyObject suffixObj : ((PyTuple)suffix).getArray()) {
+ // It ought to be PyUnicode or some kind of bytes with the buffer API.
+ String s = asBMPStringOrError(suffixObj);
+ // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+ if (substr.endsWith(s)) {
return true;
}
}
+ // None matched
return false;
- } else {
- throw Py.TypeError("expected a character buffer object or tuple");
}
}
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -1220,13 +1220,15 @@
return unicodeJoin(seq);
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_startswith_doc)
final boolean unicode_startswith(PyObject prefix, PyObject start, PyObject end) {
+ // FIXME: slice indexing logic incorrect when this is ASTRAL
return str_startswith(prefix, start, end);
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_endswith_doc)
final boolean unicode_endswith(PyObject suffix, PyObject start, PyObject end) {
+ // FIXME: slice indexing logic incorrect when this is ASTRAL
return str_endswith(suffix, start, end);
}
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list