[Jython-checkins] jython: Implemented bytearray.count, remove, index, rindex and contains ('in'

Wed May 30 05:17:20 CEST 2012

http://hg.python.org/jython/rev/826d8f4c8014
changeset:   6675:826d8f4c8014
user:        Jeff Allen <ja...py at farowl.co.uk>
date:        Sat May 26 23:44:47 2012 +0100
summary:
  Implemented bytearray.count, remove, index, rindex and __contains__ ('in' operator).
Relatively easy using same apparatus created for replace. Perhaps finally have __contains__ responding correctly to its several feasible argument types. Now scoring 2 failures and 65 errors on test_bytes.py

files:
  src/org/python/core/BaseBytes.java   |   66 +++-
  src/org/python/core/PyByteArray.java |  234 +++++++++++++-
  2 files changed, 264 insertions(+), 36 deletions(-)

diff --git a/src/org/python/core/BaseBytes.java b/src/org/python/core/BaseBytes.java
--- a/src/org/python/core/BaseBytes.java
+++ b/src/org/python/core/BaseBytes.java
@@ -1431,20 +1431,24 @@
 
     /**
      * Search for the target in this byte array, returning true if found and false if not. The
-     * target must be compatible with the Python byte range.
+     * target must either convertible to an integer in the Python byte range, or capable of being
+     * viewed as a byte array.
      *
      * @param target byte value to search for
      * @return true iff found
      */
     protected final synchronized boolean basebytes___contains__(PyObject target) {
-        byte t = byteCheck(target);
-        int jmax = offset + size;
-        for (int j = offset; j < jmax; j++) {
-            if (storage[j] == t) {
-                return true;
-            }
+        if (target.isIndex()) {
+            // Caller is treating this as an array of integers, so the value has to be in range.
+            byte b = byteCheck(target.asIndex());
+            return index(b) >= 0;
+        } else {
+            // Caller is treating this as a byte-string and looking for substring 'target'
+            View targetView = getViewOrError(target);
+            Finder finder = new Finder(targetView);
+            finder.setText(this);
+            return finder.nextIndex() >= 0;
         }
-        return false;
     }
 
     /**
@@ -1572,6 +1576,23 @@
      */
 
     /**
+     * The very simplest kind of find operation: return the index in the byte array of the first
+     * occurrence of the byte value
+     *
+     * @param b byte to search for
+     * @return index in the byte array (0..size-1) or -1 if not found
+     */
+    protected int index(byte b) {
+        int limit = offset + size;
+        for (int p = offset; p < limit; p++) {
+            if (storage[p] == b) {
+                return p - offset;
+            }
+        }
+        return -1;
+    }
+
+    /**
      * This class implements the Boyer-Moore-Horspool Algorithm for findind a pattern in text,
      * applied to byte arrays. The BMH algorithm uses a table of bad-character skips derived from
      * the pattern. The bad-character skips table tells us how far from the end of the pattern is a
@@ -1921,6 +1942,29 @@
     }
 
     /**
+     * Ready-to-expose implementation of Python <code>count( sub [, start [, end ]] )</code>.
+     *  Return
+     * the number of non-overlapping occurrences of <code>sub</code> in the range [start, end].
+     * Optional arguments <code>start</code> and <code>end</code> (which may be <code>null</code> or
+     * <code>Py.None</code> ) are interpreted as in slice notation.
+     *
+     * @param sub bytes to find
+     * @param ostart of slice to search
+     * @param oend of slice to search
+     * @return count of occurrences of sub within this byte array
+     */
+    final int basebytes_count(PyObject sub, PyObject ostart, PyObject oend) {
+        Finder finder = new Finder(getViewOrError(sub));
+
+        // Convert [start:end] to integers
+        PySlice s = new PySlice(ostart, oend, null);
+        int[] index = s.indicesEx(size());  // [ start, end, 1, end-start ]
+
+        // Make this slice the thing we count within.
+        return finder.count(storage, offset + index[0], index[3]);
+    }
+
+    /**
      * Ready-to-expose implementation of Python <code>find( sub [, start [, end ]] )</code>. Return
      * the lowest index in the byte array where byte sequence <code>sub</code> is found, such that
      * <code>sub</code> is contained in the slice <code>[start:end]</code>. Arguments
@@ -1931,7 +1975,7 @@
      * @param sub bytes to find
      * @param ostart of slice to search
      * @param oend of slice to search
-     * @return index of start of ocurrence of sub within this byte array
+     * @return index of start of occurrence of sub within this byte array
      */
     final int basebytes_find(PyObject sub, PyObject ostart, PyObject oend) {
         Finder finder = new Finder(getViewOrError(sub));
@@ -1949,7 +1993,7 @@
      * @param sub bytes to find
      * @param ostart of slice to search
      * @param oend of slice to search
-     * @return index of start of ocurrence of sub within this byte array
+     * @return index of start of occurrence of sub within this byte array
      */
     final int basebytes_rfind(PyObject sub, PyObject ostart, PyObject oend) {
         Finder finder = new ReverseFinder(getViewOrError(sub));
@@ -1964,7 +2008,7 @@
      * @param finder for the bytes to find, sometime forwards, sometime backwards
      * @param ostart of slice to search
      * @param oend of slice to search
-     * @return index of start of ocurrence of sub within this byte array
+     * @return index of start of occurrence of sub within this byte array
      */
     private final int find(Finder finder, PyObject ostart, PyObject oend) {
 
diff --git a/src/org/python/core/PyByteArray.java b/src/org/python/core/PyByteArray.java
--- a/src/org/python/core/PyByteArray.java
+++ b/src/org/python/core/PyByteArray.java
@@ -813,6 +813,17 @@
         pyinsert(size, o);
     }
 
+    /**
+     * Implement to the standard Python __contains__ method, which in turn implements the
+     * <code>in</code> operator.
+     *
+     * @param o the element to search for in this bytearray.
+     * @return the result of the search.
+     **/
+    public boolean __contains__(PyObject o) {
+        return basebytes___contains__(o);
+    }
+
     @ExposedMethod(doc = BuiltinDocs.bytearray___contains___doc)
     final boolean bytearray___contains__(PyObject o) {
         return basebytes___contains__(o);
@@ -823,51 +834,54 @@
         return basebytes_decode(args, keywords);
     }
 
+
     /**
-     * Implementation of Python <code>find(sub)</code>. Return the lowest index in the byte array
-     * where byte sequence <code>sub</code> is found. Return -1 if <code>sub</code> is not found.
+     * Implementation of Python <code>count(sub)</code>.
+     *  Return
+     * the number of non-overlapping occurrences of <code>sub</code> in this byte array.
      *
      * @param sub sequence to find (of a type viewable as a byte sequence)
-     * @return index of start of ocurrence of sub within this byte array
+     * @return count of occurrences of sub within this byte array
      */
-    public int find(PyObject sub) {
-        return basebytes_find(sub, null, null);
+    public int count(PyObject sub) {
+        return basebytes_count(sub, null, null);
     }
 
     /**
-     * Implementation of Python <code>find( sub [, start ] )</code>. Return the lowest index in the
-     * byte array where byte sequence <code>sub</code> is found, such that <code>sub</code> is
-     * contained in the slice <code>[start:]</code>. Return -1 if <code>sub</code> is not found.
+     * Implementation of Python <code>count( sub [, start ] )</code>.
+     *  Return
+     * the number of non-overlapping occurrences of <code>sub</code> in the range [start:].
      *
      * @param sub sequence to find (of a type viewable as a byte sequence)
      * @param start of slice to search
-     * @return index of start of ocurrence of sub within this byte array
+     * @return count of occurrences of sub within this byte array
      */
-    public int find(PyObject sub, PyObject start) {
-        return basebytes_find(sub, start, null);
+    public int count(PyObject sub, PyObject start) {
+        return basebytes_count(sub, start, null);
     }
 
     /**
-     * Implementation of Python <code>find( sub [, start [, end ]] )</code>. Return the lowest index
-     * in the byte array where byte sequence <code>sub</code> is found, such that <code>sub</code>
-     * is contained in the slice <code>[start:end]</code>. Arguments <code>start</code> and
-     * <code>end</code> (which may be <code>null</code> or <code>Py.None</code> ) are interpreted as
-     * in slice notation. Return -1 if <code>sub</code> is not found.
+     * Implementation of Python <code>count( sub [, start [, end ]] )</code>.
+     *  Return
+     * the number of non-overlapping occurrences of <code>sub</code> in the range [start, end].
+     * Optional arguments <code>start</code> and <code>end</code> (which may be <code>null</code> or
+     * <code>Py.None</code> ) are interpreted as in slice notation.
      *
      * @param sub sequence to find (of a type viewable as a byte sequence)
      * @param start of slice to search
      * @param end of slice to search
-     * @return index of start of ocurrence of sub within this byte array
+     * @return count of occurrences of sub within this byte array
      */
-    public int find(PyObject sub, PyObject start, PyObject end) {
-        return basebytes_find(sub, start, end);
+    public int count(PyObject sub, PyObject start, PyObject end) {
+        return basebytes_count(sub, start, end);
     }
 
-    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.bytearray_find_doc)
-    final int bytearray_find(PyObject sub, PyObject start, PyObject end) {
-        return basebytes_find(sub, start, end);
+    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.bytearray_count_doc)
+    final int bytearray_count(PyObject sub, PyObject start, PyObject end) {
+        return basebytes_count(sub, start, end);
     }
 
+
     /**
      * Append the elements in the argument sequence to the end of the array, equivalent to:
      * <code>s[len(s):len(s)] = o</code>. The argument must be a subclass of BaseBytes or an
@@ -886,6 +900,51 @@
         setslice(size, size, 1, o);
     }
 
+    /**
+     * Implementation of Python <code>find(sub)</code>. Return the lowest index in the byte array
+     * where byte sequence <code>sub</code> is found. Return -1 if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @return index of start of occurrence of sub within this byte array
+     */
+    public int find(PyObject sub) {
+        return basebytes_find(sub, null, null);
+    }
+
+    /**
+     * Implementation of Python <code>find( sub [, start ] )</code>. Return the lowest index in the
+     * byte array where byte sequence <code>sub</code> is found, such that <code>sub</code> is
+     * contained in the slice <code>[start:]</code>. Return -1 if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @param start of slice to search
+     * @return index of start of occurrence of sub within this byte array
+     */
+    public int find(PyObject sub, PyObject start) {
+        return basebytes_find(sub, start, null);
+    }
+
+    /**
+     * Implementation of Python <code>find( sub [, start [, end ]] )</code>. Return the lowest index
+     * in the byte array where byte sequence <code>sub</code> is found, such that <code>sub</code>
+     * is contained in the slice <code>[start:end]</code>. Arguments <code>start</code> and
+     * <code>end</code> (which may be <code>null</code> or <code>Py.None</code> ) are interpreted as
+     * in slice notation. Return -1 if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @param start of slice to search
+     * @param end of slice to search
+     * @return index of start of occurrence of sub within this byte array
+     */
+    public int find(PyObject sub, PyObject start, PyObject end) {
+        return basebytes_find(sub, start, end);
+    }
+
+    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.bytearray_find_doc)
+    final int bytearray_find(PyObject sub, PyObject start, PyObject end) {
+        return basebytes_find(sub, start, end);
+    }
+
     @Override
     public PyObject __iadd__(PyObject o) {
         return bytearray___iadd__(o);
@@ -909,6 +968,56 @@
     }
 
     /**
+     * Implementation of Python <code>index(sub)</code>.
+     * Like {@link #find(PyObject)}
+     * but raise {@link Py#ValueError} if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @return index of start of occurrence of sub within this byte array
+     */
+    public int index(PyObject sub) {
+        return bytearray_index(sub, null, null);
+    }
+
+    /**
+     * Implementation of Python <code>index( sub [, start ] )</code>.
+     * Like {@link #find(PyObject,PyObject)}
+     * but raise {@link Py#ValueError} if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @param start of slice to search
+     * @return index of start of occurrence of sub within this byte array
+     */
+    public int index(PyObject sub, PyObject start) {
+        return bytearray_index(sub, start, null);
+    }
+
+    /**
+     * Implementation of Python <code>index( sub [, start [, end ]] )</code>.
+     * Like {@link #find(PyObject,PyObject,PyObject)}
+     * but raise {@link Py#ValueError} if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @param start of slice to search
+     * @param end of slice to search
+     * @return index of start of occurrence of sub within this byte array
+     * @throws PyException ValueError if sub not found in byte array
+     */
+    public int index(PyObject sub, PyObject start, PyObject end) throws PyException {
+        return bytearray_index(sub, start, end);
+    }
+
+    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.bytearray_index_doc)
+    final int bytearray_index(PyObject sub, PyObject start, PyObject end) {
+        // Like find but raise a ValueError if not found
+        int pos = basebytes_find(sub, start, end);
+        if (pos<0) {
+            throw Py.ValueError("subsection not found");
+        }
+        return pos;
+    }
+
+    /**
      * Insert the argument element into the byte array at the specified index.
      * Same as <code>s[index:index] = [o] if index &gt;= 0</code>.
      *
@@ -936,6 +1045,32 @@
         return basebytes___reduce__();
     }
 
+
+    /**
+     * Remove the first occurrence of an element from the array, equivalent to:
+     * <code>del s[s.index(x)]</code>, although x must be convertable to a single byte value. The
+     * argument must be a PyInteger, PyLong or string of length 1.
+     *
+     * @param o the value to remove from the list.
+     * @throws PyException ValueError if o not found in bytearray
+     */
+    public void remove(PyObject o) throws PyException {
+        bytearray_append(o);
+    }
+
+    @ExposedMethod(doc = BuiltinDocs.bytearray_remove_doc)
+    final synchronized void bytearray_remove(PyObject o) {
+        // Check and extract the value, and search for it.
+        byte b = byteCheck(o);
+        int pos = index(b);
+        // Not finding it is an error
+        if (pos < 0) {
+            throw Py.ValueError("value not found in bytearray");
+        } else {
+            storageDelete(pos, 1);
+        }
+    }
+
     /**
      * An implementation of Python <code>replace( old, new )</code>, returning a
      * <code>PyByteArray</code> with all occurrences of sequence <code>oldB</code> replaced by
@@ -975,7 +1110,7 @@
      * where byte sequence <code>sub</code> is found. Return -1 if <code>sub</code> is not found.
      *
      * @param sub sequence to find (of a type viewable as a byte sequence)
-     * @return index of start of rightmost ocurrence of sub within this byte array
+     * @return index of start of rightmost occurrence of sub within this byte array
      */
     public int rfind(PyObject sub) {
         return basebytes_rfind(sub, null, null);
@@ -988,7 +1123,7 @@
      *
      * @param sub sequence to find (of a type viewable as a byte sequence)
      * @param start of slice to search
-     * @return index of start of rightmost ocurrence of sub within this byte array
+     * @return index of start of rightmost occurrence of sub within this byte array
      */
     public int rfind(PyObject sub, PyObject start) {
         return basebytes_rfind(sub, start, null);
@@ -1005,7 +1140,7 @@
      * @param sub sequence to find (of a type viewable as a byte sequence)
      * @param start of slice to search
      * @param end of slice to search
-     * @return index of start of rightmost ocurrence of sub within this byte array
+     * @return index of start of rightmost occurrence of sub within this byte array
      */
     public int rfind(PyObject sub, PyObject start, PyObject end) {
         return basebytes_rfind(sub, start, end);
@@ -1016,6 +1151,55 @@
         return basebytes_rfind(sub, start, end);
     }
 
+    /**
+     * Implementation of Python <code>rindex(sub)</code>.
+     * Like {@link #find(PyObject)}
+     * but raise {@link Py#ValueError} if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @return index of start of occurrence of sub within this byte array
+     */
+    public int rindex(PyObject sub) {
+        return bytearray_rindex(sub, null, null);
+    }
+
+    /**
+     * Implementation of Python <code>rindex( sub [, start ] )</code>.
+     * Like {@link #find(PyObject,PyObject)}
+     * but raise {@link Py#ValueError} if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @param start of slice to search
+     * @return index of start of occurrence of sub within this byte array
+     */
+    public int rindex(PyObject sub, PyObject start) {
+        return bytearray_rindex(sub, start, null);
+    }
+
+    /**
+     * Implementation of Python <code>rindex( sub [, start [, end ]] )</code>.
+     * Like {@link #find(PyObject,PyObject,PyObject)}
+     * but raise {@link Py#ValueError} if <code>sub</code> is not found.
+     *
+     * @param sub sequence to find (of a type viewable as a byte sequence)
+     * @param start of slice to search
+     * @param end of slice to search
+     * @return index of start of occurrence of sub within this byte array
+     */
+    public int rindex(PyObject sub, PyObject start, PyObject end) {
+        return bytearray_rindex(sub, start, end);
+    }
+
+    @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.bytearray_rindex_doc)
+    final int bytearray_rindex(PyObject sub, PyObject start, PyObject end) {
+        // Like rfind but raise a ValueError if not found
+        int pos = basebytes_rfind(sub, start, end);
+        if (pos<0) {
+            throw Py.ValueError("subsection not found");
+        }
+        return pos;
+    }
+
 // Based on PyList and not yet properly implemented.
 //
 //    @Override

-- 
Repository URL: http://hg.python.org/jython


[Jython-checkins] jython: Implemented bytearray.count, remove, index, rindex and __contains__ ('in'

[Jython-checkins] jython: Implemented bytearray.count, remove, index, rindex and contains ('in'