[Jython-checkins] jython: buffer() support in (str|unicode).startswith, .endswith

jeff.allen jython-checkins at python.org
Sun Oct 27 23:49:39 CET 2013


http://hg.python.org/jython/rev/3d730e5f7b65
changeset:   7145:3d730e5f7b65
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sun Oct 27 17:00:06 2013 +0000
summary:
  buffer() support in (str|unicode).startswith, .endswith
Methods still only work for basic plane. Tests added to string_tests.py for
buffer arguments.

files:
  Lib/test/string_tests.py           |   31 ++
  src/org/python/core/PyString.java  |  178 ++++++++++++----
  src/org/python/core/PyUnicode.java |    6 +-
  3 files changed, 161 insertions(+), 54 deletions(-)


diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1026,6 +1026,23 @@
 
         self.checkraises(TypeError, 'hello', 'startswith', (42,))
 
+        # Repeat some tests including buffer API objects (Jython addition)
+        if test_support.is_jython:
+            for buftype in (buffer, memoryview, bytearray):
+                self.checkequal(True, 'hello', 'startswith', buftype('he'))
+                self.checkequal(True, 'hello', 'startswith', buftype(''))
+                self.checkequal(False, 'hello', 'startswith', buftype('ello'))
+                self.checkequal(True, 'hello', 'startswith', buftype('ello'), 1)
+                self.checkequal(True, 'helloworld', 'startswith', buftype('lowo'), 3, 7)
+                self.checkequal(True, 'hello', 'startswith', buftype('he'), 0, -1)
+                self.checkequal(True, 'hello', 'startswith', buftype('ello'), -4)
+                self.checkequal(True, 'hello', 'startswith', buftype('o'), -1)
+                self.checkequal(True, 'hello', 'startswith', (buftype('he'), 'ha'))
+                self.checkequal(True, 'helloworld', 'startswith', (buftype('hellowo'),
+                                                            'rld', buftype('lowo')), 3)
+                self.checkequal(True, 'hello', 'startswith', ('lo', buftype('he')), 0, -1)
+                self.checkequal(True, 'hello', 'startswith', (buftype('he'), 'hel'), 0, 2)
+
     def test_endswith(self):
         self.checkequal(True, 'hello', 'endswith', 'lo')
         self.checkequal(False, 'hello', 'endswith', 'he')
@@ -1075,6 +1092,20 @@
 
         self.checkraises(TypeError, 'hello', 'endswith', (42,))
 
+        # Repeat some tests including buffer API objects (Jython addition)
+        if test_support.is_jython:
+            for buftype in (buffer, memoryview, bytearray):
+                self.checkequal(True, 'hello', 'endswith', buftype('lo'))
+                self.checkequal(False, 'hello', 'endswith', buftype('he'))
+                self.checkequal(True, 'hello', 'endswith', buftype(''))
+                self.checkequal(True, 'helloworld', 'endswith', buftype('worl'), 3, 9)
+                self.checkequal(True, 'helloworld', 'endswith', buftype('worl'), -5, -1)
+                self.checkequal(True, 'hello', 'endswith', (buftype('lo'), buftype('llo')))
+                self.checkequal(True, 'helloworld', 'endswith', ('hellowo',
+                                                            buftype('rld'), buftype('lowo')), 3)
+                self.checkequal(True, 'hello', 'endswith', ('hell', buftype('ell')), 0, -1)
+                self.checkequal(True, 'hello', 'endswith', ('he', buftype('hell')), 0, 4)
+
     def test___contains__(self):
         self.checkequal(True, '', '__contains__', '')
         self.checkequal(True, 'abc', '__contains__', '')
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -719,7 +719,7 @@
      * be omitted, showing up here as null) to indicate that the criterion is whitespace. They also
      * accept a unicode argument, not dealt with here.
      *
-     * @param obj to coerce to a String or nullk
+     * @param obj to coerce to a String or null
      * @param name of method
      * @return coerced value or null
      * @throws PyException if the coercion fails
@@ -739,6 +739,27 @@
         }
     }
 
+    /**
+     * Return a String equivalent to the argument according to the calling conventions of the
+     * certain methods of <code>str</code>. Those methods accept anything bearing the buffer
+     * interface as a byte string, or accept a unicode argument for which they accept responsibility
+     * to interpret from its UTF16 encoded form (the internal representation returned by
+     * {@link PyUnicode#getString()}).
+     *
+     * @param obj to coerce to a String
+     * @return coerced value
+     * @throws PyException if the coercion fails
+     */
+    private static String asBMPStringOrError(PyObject obj) {
+        // PyUnicode accepted here. Care required in the client if obj is not basic plane.
+        String ret = asStringOrNull(obj);
+        if (ret != null) {
+            return ret;
+        } else {
+            throw Py.TypeError("expected str, bytearray, unicode or buffer compatible object");
+        }
+    }
+
     @Override
     public boolean __contains__(PyObject o) {
         return str___contains__(o);
@@ -2309,7 +2330,7 @@
             // Promote the problem to a Unicode one
             return ((PyUnicode)decode()).unicode_count(subObj, start, end);
         } else {
-            // It ought to be None, null, some kind of bytes with the buffer API.
+            // It ought to be some kind of bytes with the buffer API.
             String sub = asStringOrError(subObj);
             return _count(sub, start, end);
         }
@@ -2455,7 +2476,7 @@
             // Promote the problem to a Unicode one
             return ((PyUnicode)decode()).unicode_find(subObj, start, end);
         } else {
-            // It ought to be None, null, some kind of bytes with the buffer API.
+            // It ought to be some kind of bytes with the buffer API.
             String sub = asStringOrError(subObj);
             return _find(sub, start, end);
         }
@@ -2541,7 +2562,7 @@
             // Promote the problem to a Unicode one
             return ((PyUnicode)decode()).unicode_rfind(subObj, start, end);
         } else {
-            // It ought to be None, null, some kind of bytes with the buffer API.
+            // It ought to be some kind of bytes with the buffer API.
             String sub = asStringOrError(subObj);
             return _rfind(sub, start, end);
         }
@@ -3075,92 +3096,145 @@
         return new PyUnicode(buf.toString());
     }
 
+    /**
+     * Equivalent to the Python <code>str.startswith</code> method testing whether a string starts
+     * with a specified prefix. <code>prefix</code> can also be a tuple of prefixes to look for.
+     *
+     * @param prefix string to check for (or a <code>PyTuple</code> of them).
+     * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+     *         <code>false</code>.
+     */
     public boolean startswith(PyObject prefix) {
         return str_startswith(prefix, null, null);
     }
 
+    /**
+     * Equivalent to the Python <code>str.startswith</code> method, testing whether a string starts
+     * with a specified prefix, where a sub-range is specified by <code>[start:]</code>.
+     * <code>start</code> is interpreted as in slice notation, with null or {@link Py#None}
+     * representing "missing". <code>prefix</code> can also be a tuple of prefixes to look for.
+     *
+     * @param prefix string to check for (or a <code>PyTuple</code> of them).
+     * @param start start of slice.
+     * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+     *         <code>false</code>.
+     */
     public boolean startswith(PyObject prefix, PyObject offset) {
         return str_startswith(prefix, offset, null);
     }
 
+    /**
+     * Equivalent to the Python <code>str.startswith</code> method, testing whether a string starts
+     * with a specified prefix, where a sub-range is specified by <code>[start:end]</code>.
+     * Arguments <code>start</code> and <code>end</code> are interpreted as in slice notation, with
+     * null or {@link Py#None} representing "missing". <code>prefix</code> can also be a tuple of
+     * prefixes to look for.
+     *
+     * @param prefix string to check for (or a <code>PyTuple</code> of them).
+     * @param start start of slice.
+     * @param end end of slice.
+     * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+     *         <code>false</code>.
+     */
     public boolean startswith(PyObject prefix, PyObject start, PyObject end) {
         return str_startswith(prefix, start, end);
     }
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_startswith_doc)
-    final boolean str_startswith(PyObject prefix, PyObject start, PyObject end) {
-
-        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
-        int[] indices = translateIndices(start, end);
-
-        if (prefix instanceof PyString) {
-            String strPrefix = ((PyString)prefix).getString();
-            if (indices[1] - indices[0] < strPrefix.length()) {
-                return false;
-            }
-
-            return getString().startsWith(strPrefix, indices[0]);
-        } else if (prefix instanceof PyTuple) {
-            PyObject[] prefixes = ((PyTuple)prefix).getArray();
-
-            for (int i = 0; i < prefixes.length; i++) {
-                if (!(prefixes[i] instanceof PyString)) {
-                    throw Py.TypeError("expected a character buffer object");
-                }
-
-                String strPrefix = ((PyString)prefixes[i]).getString();
-                if (indices[1] - indices[0] < strPrefix.length()) {
-                    continue;
-                }
-
-                if (getString().startsWith(strPrefix, indices[0])) {
+    final boolean str_startswith(PyObject prefix, PyObject startObj, PyObject endObj) {
+        int[] indices = translateIndices(startObj, endObj);
+        int start = indices[0];
+        int sliceLen = indices[1] - start;
+
+        if (!(prefix instanceof PyTuple)) {
+            // It ought to be PyUnicode or some kind of bytes with the buffer API.
+            String s = asBMPStringOrError(prefix);
+            // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+            return sliceLen >= s.length() && getString().startsWith(s, start);
+
+        } else {
+            // Loop will return true if this slice starts with any prefix in the tuple
+            for (PyObject prefixObj : ((PyTuple)prefix).getArray()) {
+                // It ought to be PyUnicode or some kind of bytes with the buffer API.
+                String s = asBMPStringOrError(prefixObj);
+                // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+                if (sliceLen >= s.length() && getString().startsWith(s, start)) {
                     return true;
                 }
             }
+            // None matched
             return false;
-        } else {
-            throw Py.TypeError("expected a character buffer object or tuple");
         }
     }
 
+    /**
+     * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+     * a specified suffix. <code>suffix</code> can also be a tuple of suffixes to look for.
+     *
+     * @param suffix string to check for (or a <code>PyTuple</code> of them).
+     * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+     *         <code>false</code>.
+     */
     public boolean endswith(PyObject suffix) {
         return str_endswith(suffix, null, null);
     }
 
+    /**
+     * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+     * a specified suffix, where a sub-range is specified by <code>[start:]</code>.
+     * <code>start</code> is interpreted as in slice notation, with null or {@link Py#None}
+     * representing "missing". <code>suffix</code> can also be a tuple of suffixes to look for.
+     *
+     * @param suffix string to check for (or a <code>PyTuple</code> of them).
+     * @param start start of slice.
+     * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+     *         <code>false</code>.
+     */
     public boolean endswith(PyObject suffix, PyObject start) {
         return str_endswith(suffix, start, null);
     }
 
+    /**
+     * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+     * a specified suffix, where a sub-range is specified by <code>[start:end]</code>. Arguments
+     * <code>start</code> and <code>end</code> are interpreted as in slice notation, with null or
+     * {@link Py#None} representing "missing". <code>suffix</code> can also be a tuple of suffixes
+     * to look for.
+     *
+     * @param suffix string to check for (or a <code>PyTuple</code> of them).
+     * @param start start of slice.
+     * @param end end of slice.
+     * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+     *         <code>false</code>.
+     */
     public boolean endswith(PyObject suffix, PyObject start, PyObject end) {
         return str_endswith(suffix, start, end);
     }
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_endswith_doc)
-    final boolean str_endswith(PyObject suffix, PyObject start, PyObject end) {
-
-        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
-        int[] indices = translateIndices(start, end);
-
+    final boolean str_endswith(PyObject suffix, PyObject startObj, PyObject endObj) {
+
+        int[] indices = translateIndices(startObj, endObj);
         String substr = getString().substring(indices[0], indices[1]);
-        if (suffix instanceof PyString) {
-            return substr.endsWith(((PyString)suffix).getString());
-        } else if (suffix instanceof PyTuple) {
-            PyObject[] suffixes = ((PyTuple)suffix).getArray();
-
-            for (int i = 0; i < suffixes.length; i++) {
-                if (!(suffixes[i] instanceof PyString)) {
-                    throw Py.TypeError("expected a character buffer object");
-                }
-
-                if (substr.endsWith(((PyString)suffixes[i]).getString())) {
+
+        if (!(suffix instanceof PyTuple)) {
+            // It ought to be PyUnicode or some kind of bytes with the buffer API.
+            String s = asBMPStringOrError(suffix);
+            // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+            return substr.endsWith(s);
+
+        } else {
+            // Loop will return true if this slice ends with any suffix in the tuple
+            for (PyObject suffixObj : ((PyTuple)suffix).getArray()) {
+                // It ought to be PyUnicode or some kind of bytes with the buffer API.
+                String s = asBMPStringOrError(suffixObj);
+                // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+                if (substr.endsWith(s)) {
                     return true;
                 }
             }
+            // None matched
             return false;
-        } else {
-            throw Py.TypeError("expected a character buffer object or tuple");
         }
     }
 
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -1220,13 +1220,15 @@
         return unicodeJoin(seq);
     }
 
-    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
+    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_startswith_doc)
     final boolean unicode_startswith(PyObject prefix, PyObject start, PyObject end) {
+        // FIXME: slice indexing logic incorrect when this is ASTRAL
         return str_startswith(prefix, start, end);
     }
 
-    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
+    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_endswith_doc)
     final boolean unicode_endswith(PyObject suffix, PyObject start, PyObject end) {
+        // FIXME: slice indexing logic incorrect when this is ASTRAL
         return str_endswith(suffix, start, end);
     }
 

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list