[Jython-checkins] jython (merge default -> default): Merge buffer work on PyArray

jeff.allen jython-checkins at python.org
Fri Nov 15 00:01:41 CET 2013


http://hg.python.org/jython/rev/5f306c9c85e3
changeset:   7152:5f306c9c85e3
parent:      7147:07ca5124f859
parent:      7151:0d5f6b1e0a04
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Thu Nov 14 21:38:49 2013 +0000
summary:
  Merge buffer work on PyArray

files:
  Lib/test/test_array.py                        |  101 ++-
  Lib/test/test_bytes.py                        |    5 +
  src/org/python/core/PyArray.java              |  466 ++++++++-
  src/org/python/core/PyFile.java               |   43 +-
  src/org/python/core/PyMemoryView.java         |   20 +-
  src/org/python/core/io/TextIOBase.java        |   26 +-
  src/org/python/modules/sre/PatternObject.java |   22 +-
  7 files changed, 545 insertions(+), 138 deletions(-)


diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py
--- a/Lib/test/test_array.py
+++ b/Lib/test/test_array.py
@@ -246,6 +246,39 @@
         self.assertEqual(a, b)
         if a.itemsize>1 and self.typecode not in ('b', 'B'):
             self.assertRaises(ValueError, b.fromstring, "x")
+        # Test from byte string available via buffer API (Jython addition)
+        if test_support.is_jython:
+            for buftype in (buffer, memoryview, bytearray):
+                b = array.array(self.typecode)
+                b.fromstring(buftype(a.tostring()))
+                self.assertEqual(a, b)
+
+    @unittest.skipUnless(test_support.is_jython, "Jython supports memoryview slices")
+    def test_tofromstring_sliced(self):
+        a = array.array(self.typecode, self.example)
+        r = bytearray(a.tostring())
+        R = len(r)
+        D = 3*R
+
+        def checkSlice(x, y, z=None):
+            # Scatter the bytes of a.tostring() into d
+            d = bytearray(D)
+            d[x:y:z] = r
+            # Now gather through a memoryview slice
+            with memoryview(d) as m:
+                # Requires proper use of strides when z not None and not 1
+                b = array.array(self.typecode)
+                b.fromstring(m[x:y:z])
+                self.assertEqual(a, b)
+
+        # The slices all have R elements and the whole range D = 3*R
+        checkSlice(None, R)
+        checkSlice(2, 2+R)
+        checkSlice(D-R, None)
+        checkSlice(None, None, 3)
+        checkSlice(None, None, -3)
+        checkSlice(None, D-R-1, -1)
+        checkSlice(R-1, None, -1)
 
     def test_filewrite(self):
         a = array.array(self.typecode, 2*self.example)
@@ -827,6 +860,66 @@
         # SF bug #1486663 -- this used to erroneously raise a TypeError
         ArraySubclassWithKwargs('b', newarg=1)
 
+    @unittest.skipUnless(test_support.is_jython, "array supports buffer interface in Jython")
+    def test_resize_forbidden(self):
+        # Test that array resizing is forbidden with buffer exports (Jython addition).
+        # Test adapted from corresponding one in test_bytes.
+        # We can't resize an array when there are buffer exports, even
+        # if it wouldn't reallocate the underlying array.
+        # Furthermore, no destructive changes to the buffer may be applied
+        # before raising the error.
+        a = array.array(self.typecode, self.example)
+        def resize(n):
+            "n = -1 -> Smaller, 0 -> the same, or 1 -> larger."
+            a[1:-1] = array.array(self.typecode, self.example[1-n:-1])
+
+        v = memoryview(a)
+        orig = a[:]
+
+        self.assertRaises(BufferError, resize, -1)
+        self.assertEqual(a, orig)
+        #self.assertRaises(BufferError, resize, 0)
+        #self.assertEqual(a, orig)
+        self.assertRaises(BufferError, resize, 1)
+        self.assertEqual(a, orig)
+
+        # Other operations implying resize
+        self.assertRaises(BufferError, a.pop, 0)
+        self.assertEqual(a, orig)
+        self.assertRaises(BufferError, a.remove, a[1])
+        self.assertEqual(a, orig)
+        self.assertRaises(BufferError, a.append, self.outside)
+        self.assertEqual(a, orig)
+        self.assertRaises(BufferError, a.insert, 1, self.outside)
+        self.assertEqual(a, orig)
+        self.assertRaises(BufferError, a.extend, self.example)
+        self.assertEqual(a, orig)
+
+        def iadd(x):
+            x += array.array(self.typecode, self.biggerexample)
+        self.assertRaises(BufferError, iadd, a)
+        self.assertEqual(a, orig)
+
+        def imul(x):
+            x *= 3
+        self.assertRaises(BufferError, imul, a)
+        self.assertEqual(a, orig)
+
+        def delitem():
+            del a[1]
+        self.assertRaises(BufferError, delitem)
+        self.assertEqual(a, orig)
+
+        # deleting a non-contiguous slice
+        def delslice():
+            del a[1:-1:2]
+        self.assertRaises(BufferError, delslice)
+        self.assertEqual(a, orig)
+
+        # Show that releasing v releases the array for size change
+        v.release()
+        a.pop()
+
 
 class StringTest(BaseTest):
 
@@ -938,9 +1031,8 @@
         self.assertEqual(a, array.array(self.typecode, [1,2,3,4,5,6,7,8,9]))
         # test issue7788
         a = array.array(self.typecode, range(10))
-        # FIXME #1860: not working on Jython yet.
-        if not test_support.is_jython:
-            del a[9::1<<333]
+        del a[9::1<<333]
+        self.assertEqual(a, array.array(self.typecode, range(9)))
 
     def test_assignment(self):
         a = array.array(self.typecode, range(10))
@@ -1134,9 +1226,6 @@
         # CPython specific; returns a memory address
         del BaseTest.test_buffer_info
 
-        # No buffers in Jython
-        del BaseTest.test_buffer
-
     test_support.run_unittest(*tests)
 
     # verify reference counting
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -886,6 +886,11 @@
         self.assertRaises(BufferError, delslice)
         self.assertEqual(b, orig)
 
+        if test.test_support.is_jython:
+            # Show that releasing v releases the bytearray for size change
+            v.release()
+            b.pop()
+
     def test_empty_bytearray(self):
         # Issue #7561: operations on empty bytearrays could crash in many
         # situations, due to a fragile implementation of the
diff --git a/src/org/python/core/PyArray.java b/src/org/python/core/PyArray.java
--- a/src/org/python/core/PyArray.java
+++ b/src/org/python/core/PyArray.java
@@ -9,8 +9,12 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.lang.ref.WeakReference;
 import java.lang.reflect.Array;
 
+import org.python.core.buffer.BaseBuffer;
+import org.python.core.buffer.SimpleStringBuffer;
+import org.python.core.buffer.SimpleWritableBuffer;
 import org.python.core.util.ByteSwapper;
 import org.python.core.util.StringUtil;
 import org.python.expose.ExposedGet;
@@ -27,7 +31,7 @@
  * See also the jarray module.
  */
 @ExposedType(name = "array.array", base = PyObject.class)
-public class PyArray extends PySequence implements Cloneable {
+public class PyArray extends PySequence implements Cloneable, BufferProtocol {
 
     public static final PyType TYPE = PyType.fromClass(PyArray.class);
 
@@ -74,19 +78,26 @@
     @ExposedNew
     static final PyObject array_new(PyNewWrapper new_, boolean init, PyType subtype,
             PyObject[] args, String[] keywords) {
+
         if (new_.for_type != subtype && keywords.length > 0) {
+            /*
+             * We're constructing as a base for a derived type (via PyDerived) and there are
+             * keywords. The effective args locally should not include the keywords.
+             */
             int argc = args.length - keywords.length;
             PyObject[] justArgs = new PyObject[argc];
             System.arraycopy(args, 0, justArgs, 0, argc);
             args = justArgs;
         }
+
+        // Build the argument parser for this call
         ArgParser ap =
                 new ArgParser("array", args, Py.NoKeywords,
                         new String[] {"typecode", "initializer"}, 1);
         ap.noKeywords();
+
+        // Retrieve the mandatory type code that determines the element type
         PyObject obj = ap.getPyObject(0);
-        PyObject initial = ap.getPyObject(1, null);
-
         Class<?> type;
         String typecode;
         if (obj instanceof PyString && !(obj instanceof PyUnicode)) {
@@ -103,32 +114,48 @@
                     + obj.getType().fastGetName());
         }
 
+        /*
+         * Create a 'blank canvas' of the appropriate concrete class.
+         */
         PyArray self;
         if (new_.for_type == subtype) {
             self = new PyArray(subtype);
         } else {
             self = new PyArrayDerived(subtype);
         }
+
         // Initialize the typecode (and validate type) before creating the backing Array
         class2char(type);
         self.setup(type, Array.newInstance(type, 0));
         self.typecode = typecode;
+
+        /*
+         * The initialiser may be omitted, or may validly be one of several types in the broad
+         * categories of a byte string (which is treated as a machine representation of the data) or
+         * an iterable yielding values assignable to the elements. There is special treatment for
+         * type 'u' Unicode.
+         */
+        PyObject initial = ap.getPyObject(1, null);
         if (initial == null) {
-            return self;
-        }
-        if (initial instanceof PyList) {
+            // Fall through
+
+        } else if (initial instanceof PyList) {
             self.fromlist(initial);
+
         } else if (initial instanceof PyString && !(initial instanceof PyUnicode)) {
             self.fromstring(initial.toString());
+
         } else if ("u".equals(typecode)) {
             if (initial instanceof PyUnicode) {
                 self.extendArray(((PyUnicode)initial).toCodePoints());
             } else {
                 self.extendUnicodeIter(initial);
             }
+
         } else {
             self.extendInternal(initial);
         }
+
         return self;
     }
 
@@ -257,9 +284,13 @@
 
     @ExposedMethod(type = MethodType.BINARY)
     final PyObject array___imul__(PyObject o) {
+
         if (!o.isIndex()) {
             return null;
         }
+
+        resizeCheck();  // Prohibited if exporting a buffer
+
         if (delegate.getSize() > 0) {
             int count = o.asIndex(Py.OverflowError);
             if (count <= 0) {
@@ -308,6 +339,7 @@
 
     @ExposedMethod(type = MethodType.BINARY)
     final PyObject array___iadd__(PyObject other) {
+
         if (!(other instanceof PyArray)) {
             return null;
         }
@@ -317,6 +349,9 @@
             throw Py.TypeError("can only append arrays of the same type, expected '" + this.type
                     + ", found " + otherArr.type);
         }
+
+        resizeCheck();  // Prohibited if exporting a buffer
+
         delegate.appendArray(otherArr.delegate.copyArray());
         return this;
     }
@@ -430,7 +465,8 @@
 
     @ExposedMethod
     public final void array_append(PyObject value) {
-        append(value);
+        resizeCheck();  // Prohibited if exporting a buffer
+        appendUnchecked(value);
     }
 
     private static int getCodePoint(PyObject obj) {
@@ -464,21 +500,31 @@
      *
      * @param value item to be appended to the array
      */
+    public void append(PyObject value) {
+        resizeCheck();  // Prohibited if exporting a buffer
+        appendUnchecked(value);
+    }
 
-    public void append(PyObject value) {
-        // Currently, this is asymmetric with extend, which
+    /**
+     * Common helper method used internally to append a new value x to the end of the array:
+     * {@link #resizeCheck()} is not called, so the client must do so in advance.
+     *
+     * @param value item to be appended to the array
+     */
+    private final void appendUnchecked(PyObject value) {
+        // Currently, append is asymmetric with extend, which
         // *will* do conversions like append(5.0) to an int array.
-        // Also, cpython 2.2 will do the append coersion. However,
-        // it is deprecated in cpython 2.3, so maybe we are just
+        // Also, CPython 2.2 will do the append coercion. However,
+        // it is deprecated in CPython 2.3, so maybe we are just
         // ahead of our time ;-)
 
         int afterLast = delegate.getSize();
+
         if ("u".equals(typecode)) {
             int codepoint = getCodePoint(value);
             delegate.makeInsertSpace(afterLast);
             Array.setInt(data, afterLast, codepoint);
         } else {
-
             delegate.makeInsertSpace(afterLast);
             try {
                 set(afterLast, value);
@@ -660,8 +706,7 @@
      */
     @Override
     protected void del(int i) {
-        // Now the AbstractArray can support this:
-        // throw Py.TypeError("can't remove from array");
+        resizeCheck();  // Prohibited if exporting a buffer
         delegate.remove(i);
     }
 
@@ -673,6 +718,7 @@
      */
     @Override
     protected void delRange(int start, int stop) {
+        resizeCheck();  // Prohibited if exporting a buffer
         delegate.remove(start, stop);
     }
 
@@ -685,7 +731,7 @@
      * Append items from <code>iterable</code> to the end of the array. If iterable is another
      * array, it must have exactly the same type code; if not, TypeError will be raised. If iterable
      * is not an array, it must be iterable and its elements must be the right type to be appended
-     * to the array. Changed in version 2.4: Formerly, the argument could only be another array.
+     * to the array.
      *
      * @param iterable iterable object used to extend the array
      */
@@ -700,8 +746,8 @@
      *
      * @param iterable object of type PyString, PyArray or any object that can be iterated over.
      */
+    private void extendInternal(PyObject iterable) {
 
-    private void extendInternal(PyObject iterable) {
         if (iterable instanceof PyUnicode) {
             if ("u".equals(typecode)) {
                 extendUnicodeIter(iterable);
@@ -710,14 +756,19 @@
             } else {
                 throw Py.TypeError("an integer is required");
             }
-        } else if (iterable instanceof PyString) {
-            fromstring(((PyString)iterable).toString());
+
+// } else if (iterable instanceof PyString) {
+// // XXX CPython treats a str/bytes as an iterable, not as previously here:
+// fromstring(((PyString)iterable).toString());
+
         } else if (iterable instanceof PyArray) {
             PyArray source = (PyArray)iterable;
             if (!source.typecode.equals(typecode)) {
                 throw Py.TypeError("can only extend with array of same kind");
             }
+            resizeCheck();  // Prohibited if exporting a buffer
             delegate.appendArray(source.delegate.copyArray());
+
         } else {
             extendInternalIter(iterable);
         }
@@ -729,40 +780,66 @@
      * @param iterable any object that can be iterated over.
      */
     private void extendInternalIter(PyObject iterable) {
-        // iterable object without a length property - cannot presize the
-        // array, so append each item
-        if (iterable.__findattr__("__len__") == null) {
-            for (PyObject item : iterable.asIterable()) {
-                append(item);
-            }
-        } else {
-            // create room
+
+        // Prohibited operation if exporting a buffer
+        resizeCheck();
+
+        if (iterable.__findattr__("__len__") != null) {
+            // Make room according to source length
             int last = delegate.getSize();
             delegate.ensureCapacity(last + iterable.__len__());
             for (PyObject item : iterable.asIterable()) {
                 set(last++, item);
                 delegate.size++;
             }
-        }
-    }
 
-    private void extendUnicodeIter(PyObject iterable) {
-        for (PyObject item : iterable.asIterable()) {
-            PyUnicode uitem;
-            try {
-                uitem = (PyUnicode)item;
-            } catch (ClassCastException e) {
-                throw Py.TypeError("Type not compatible with array type");
-            }
-            for (int codepoint : uitem.toCodePoints()) {
-                int afterLast = delegate.getSize();
-                delegate.makeInsertSpace(afterLast);
-                Array.setInt(data, afterLast, codepoint);
+        } else {
+            // iterable has no length property: cannot size the array so append each item.
+            for (PyObject item : iterable.asIterable()) {
+                appendUnchecked(item); // we already did a resizeCheck
             }
         }
     }
 
+    /**
+     * Helper used only when the array elements are Unicode characters (<code>typecode=='u'</code>).
+     * (Characters are stored as integer point codes.) The parameter must be an iterable yielding
+     * <code>PyUnicode</code>s. Often this will be an instance of {@link PyUnicode}, which is an
+     * iterable yielding single-character <code>PyUnicode</code>s. But it is also acceptable to this
+     * method for the argument to yield arbitrary <code>PyUnicode</code>s, which will be
+     * concatenated in the array.
+     *
+     * @param iterable of <code>PyUnicode</code>s
+     */
+    private void extendUnicodeIter(PyObject iterable) {
+
+        // Prohibited operation if exporting a buffer
+        resizeCheck();
+
+        try {
+
+            // Append all the code points of all the strings in the iterable
+            for (PyObject item : iterable.asIterable()) {
+                PyUnicode uitem = (PyUnicode)item;
+                // Append all the code points of this item
+                for (int codepoint : uitem.toCodePoints()) {
+                    int afterLast = delegate.getSize();
+                    delegate.makeInsertSpace(afterLast);
+                    Array.setInt(data, afterLast, codepoint);
+                }
+            }
+
+        } catch (ClassCastException e) {
+            // One of the PyUnicodes wasn't
+            throw Py.TypeError("Type not compatible with array type");
+        }
+    }
+
     private void extendArray(int[] items) {
+
+        // Prohibited operation if exporting a buffer
+        resizeCheck();
+
         int last = delegate.getSize();
         delegate.ensureCapacity(last + items.length);
         for (int item : items) {
@@ -787,21 +864,32 @@
      * @param count number of array elements to read
      */
     public void fromfile(PyObject f, int count) {
-        // check for arg1 as file object
-        if (!(f instanceof PyFile)) {
-            throw Py.TypeError("arg1 must be open file");
+        /*
+         * Prohibit when exporting a buffer. Different from CPython, BufferError takes precedence in
+         * Jython over EOFError: if there's nowhere to write the data, we don't read it.
+         */
+        resizeCheck();
+
+        /*
+         * Now get the required number of bytes from the file. Guard against non-file or closed.
+         */
+        if (f instanceof PyFile) {
+            PyFile file = (PyFile)f;
+            if (!file.getClosed()) {
+                // Load required amount or whatever is available into a bytes object
+                int readbytes = count * getStorageSize();
+                String buffer = file.read(readbytes).toString();
+                fromstring(buffer);
+                // check for underflow
+                if (buffer.length() < readbytes) {
+                    int readcount = buffer.length() / getStorageSize();
+                    throw Py.EOFError("not enough items in file. " + Integer.toString(count)
+                            + " requested, " + Integer.toString(readcount) + " actually read");
+                }
+            }
+            return;
         }
-        PyFile file = (PyFile)f;
-        int readbytes = count * getStorageSize();
-        String buffer = file.read(readbytes).toString();
-        // load whatever was collected into the array
-        fromstring(buffer);
-        // check for underflow
-        if (buffer.length() < readbytes) {
-            int readcount = buffer.length() / getStorageSize();
-            throw Py.EOFError("not enough items in file. " + Integer.toString(count)
-                    + " requested, " + Integer.toString(readcount) + " actually read");
-        }
+        throw Py.TypeError("arg1 must be open file");
     }
 
     @ExposedMethod
@@ -810,7 +898,7 @@
     }
 
     /**
-     * Append items from the list. This is equivalent to "for x in list: a.append(x)"except that if
+     * Append items from the list. This is equivalent to "for x in list: a.append(x)" except that if
      * there is a type error, the array is unchanged.
      *
      * @param obj input list object that will be appended to the array
@@ -819,6 +907,10 @@
         if (!(obj instanceof PyList)) {
             throw Py.TypeError("arg must be list");
         }
+
+        // Prohibited operation if exporting a buffer
+        resizeCheck();
+
         // store the current size of the internal array
         int size = delegate.getSize();
         try {
@@ -862,11 +954,8 @@
         // Current number of items present
         int origsize = delegate.getSize();
 
-        // Reserve capacity for 'count' items
-        delegate.setSize(origsize + count);
-
         // Read into the array, after the current contents, up to new size (or EOF thrown)
-        int n = fromStream(is, origsize, delegate.getSize(), true);
+        int n = fromStream(is, origsize, origsize + count, true);
         return n - origsize;
     }
 
@@ -888,11 +977,14 @@
      * Helper for reading primitive values from a stream into a slice of the array. Data is read
      * until the array slice is filled or the stream runs out. The purpose of the method is to
      * concentrate in one place the manipulation of bytes into the several primitive element types
-     * on behalf of {@link #fillFromStream(InputStream)} etc.. Since different read methods respond
-     * differently to it, the caller must specify whether the exhaustion of the stream (EOF) should
-     * be treated as an error or not. If the stream does not contain a whole number of items
-     * (possible if the item size is not one byte), the behaviour in respect of the final partial
-     * item and stream position is not defined.
+     * on behalf of {@link #fillFromStream(InputStream)} etc.. The storage is resized if the slice
+     * being written ends beyond the current end of the array, i.e. it is increased to the value of
+     * <code>limit</code>.
+     * <p>
+     * Since different read methods respond differently to it, the caller must specify whether the
+     * exhaustion of the stream (EOF) should be treated as an error or not. If the stream does not
+     * contain a whole number of items (possible if the item size is not one byte), the behaviour in
+     * respect of the final partial item and stream position is not defined.
      *
      * @param dis data stream source for the values
      * @param index first element index to read
@@ -905,7 +997,14 @@
     private int fromStream(InputStream is, int index, int limit, boolean eofIsError)
             throws IOException, EOFException {
 
-        // We need a wrapper capable of encoding the data
+        // Ensure the array is dimensioned to fit the data expected
+        if (limit > delegate.getSize()) {
+            // Prohibited operation if exporting a buffer
+            resizeCheck();
+            delegate.setSize(limit);
+        }
+
+        // We need a wrapper capable of decoding the data from the representation defined by Java.
         DataInputStream dis = new DataInputStream(is);
 
         try {
@@ -993,12 +1092,14 @@
     }
 
     /**
-     * Appends items from the string, interpreting the string as an array of machine values (as if
-     * it had been read from a file using the {@link #fromfile(PyObject, int) fromfile()} method).
+     * Appends items from the object, which is a byte string of some kind (PyString or object with
+     * the buffer interface providing bytes) The string of bytes is interpreted as an array of
+     * machine values (as if it had been read from a file using the {@link #fromfile(PyObject, int)
+     * fromfile()} method).
      *
      * @param input string of bytes containing array data
      */
-    public void fromstring(String input) {
+    public void fromstring(PyObject input) {
         array_fromstring(input);
     }
 
@@ -1008,20 +1109,108 @@
      *
      * @param input string of bytes containing array data
      */
+    public void fromstring(String input) {
+        frombytesInternal(StringUtil.toBytes(input));
+    }
+
+    /**
+     * Appends items from the string, interpreting the string as an array of machine values (as if
+     * it had been read from a file using the {@link #fromfile(PyObject, int) fromfile()} method).
+     *
+     * @param input string of bytes containing array data
+     */
     @ExposedMethod
-    final void array_fromstring(String input) {
+    final void array_fromstring(PyObject input) {
+
+        if (input instanceof BufferProtocol) {
+
+            if (input instanceof PyUnicode) {
+                // Unicode is treated as specifying a byte string via the default encoding.
+                String s = ((PyUnicode)input).encode();
+                frombytesInternal(StringUtil.toBytes(s));
+
+            } else {
+                // Access the bytes
+                PyBuffer pybuf = ((BufferProtocol)input).getBuffer(PyBUF.STRIDED_RO);
+                try {
+                    // Provide argument as stream of bytes for fromstream method
+                    if (pybuf.getNdim() == 1) {
+                        if (pybuf.getStrides()[0] == 1) {
+                            // Data are contiguous in a byte[]
+                            PyBuffer.Pointer b = pybuf.getBuf();
+                            frombytesInternal(b.storage, b.offset, pybuf.getLen());
+                        } else {
+                            // As frombytesInternal only knows contiguous bytes, make a copy.
+                            byte[] copy = new byte[pybuf.getLen()];
+                            pybuf.copyTo(copy, 0);
+                            frombytesInternal(copy);
+                        }
+                    } else {
+                        // Currently don't support n-dimensional sources
+                        throw Py.ValueError("multi-dimensional buffer not supported");
+                    }
+                } finally {
+                    pybuf.release();
+                }
+            }
+
+        } else {
+            String fmt = "must be string or read-only buffer, not %s";
+            throw Py.TypeError(String.format(fmt, input.getType().fastGetName()));
+        }
+    }
+
+    /**
+     * Common code supporting Java and Python versions of <code>.fromstring()</code>
+     *
+     * @param input string of bytes encoding the array data
+     */
+    private final void fromstringInternal(String input) {
+        frombytesInternal(StringUtil.toBytes(input));
+    }
+
+    /**
+     * Common code supporting Java and Python versions of <code>.fromstring()</code> or
+     * <code>.frombytes()</code> (Python 3.2+ name).
+     *
+     * @param bytes array containing the new array data in machine encoding
+     */
+    private final void frombytesInternal(byte[] bytes) {
+        frombytesInternal(bytes, 0, bytes.length);
+    }
+
+    /**
+     * Common code supporting Java and Python versions of <code>.fromstring()</code> or
+     * <code>.frombytes()</code> (Python 3.2+ name).
+     *
+     * @param bytes array containing the new array data in machine encoding
+     * @param offset of the first byte to read
+     * @param count of bytes to read
+     */
+    private final void frombytesInternal(byte[] bytes, int offset, int count) {
+
+        // Access the bytes
+        int origsize = delegate.getSize();
+
+        // Check validity wrt array itemsize
         int itemsize = getStorageSize();
-        int strlen = input.length();
-        if ((strlen % itemsize) != 0) {
+        if ((count % itemsize) != 0) {
             throw Py.ValueError("string length not a multiple of item size");
         }
-        ByteArrayInputStream bis = new ByteArrayInputStream(StringUtil.toBytes(input));
-        int origsize = delegate.getSize();
+
+        // Prohibited operation if we are exporting a buffer
+        resizeCheck();
+
         try {
+
+            // Provide argument as stream of bytes for fromstream method
+            ByteArrayInputStream bis = new ByteArrayInputStream(bytes, offset, count);
             fromStream(bis);
+
         } catch (EOFException e) {
             // stubbed catch for fromStream throws
             throw Py.EOFError("not enough items in string");
+
         } catch (IOException e) {
             // discard anything successfully loaded
             delegate.setSize(origsize);
@@ -1279,6 +1468,7 @@
      * @param value value to be inserted into array
      */
     public void insert(int index, PyObject value) {
+        resizeCheck();  // Prohibited operation if exporting a buffer
         index = boundToSequence(index);
         if ("u".equals(typecode)) {
             int codepoint = getCodePoint(value);
@@ -1325,6 +1515,10 @@
         if (index == -1) {
             throw Py.IndexError("pop index out of range");
         }
+
+        // Prohibited operation if exporting a buffer
+        resizeCheck();
+
         PyObject ret = Py.java2py(Array.get(data, index));
         delegate.remove(index);
         return ret;
@@ -1343,6 +1537,8 @@
     public void remove(PyObject value) {
         int index = indexInternal(value);
         if (index != -1) {
+            // Prohibited operation if exporting a buffer
+            resizeCheck();
             delegate.remove(index);
             return;
         }
@@ -1359,7 +1555,6 @@
     protected PyObject repeat(int count) {
         Object arraycopy = delegate.copyArray();
         PyArray ret = new PyArray(type, 0);
-        // XXX:
         ret.typecode = typecode;
         for (int i = 0; i < count; i++) {
             ret.delegate.appendArray(arraycopy);
@@ -1500,9 +1695,11 @@
      */
     @Override
     protected void setslice(int start, int stop, int step, PyObject value) {
+
         if (stop < start) {
             stop = start;
         }
+
         if (type == Character.TYPE && value instanceof PyString) {
             char[] chars = null;
             // if (value instanceof PyString) {
@@ -1510,8 +1707,14 @@
                 throw Py.ValueError("invalid bounds for setting from string");
             }
             chars = value.toString().toCharArray();
+            if (start + chars.length != stop) {
+                // This is a size-changing operation: check for buffer exports
+                resizeCheck();
+            }
             delegate.replaceSubArray(start, stop, chars, 0, chars.length);
+
         } else {
+
             if (value instanceof PyString && type == Byte.TYPE) {
                 byte[] chars = ((PyString)value).toBytes();
                 if (chars.length == stop - start && step == 1) {
@@ -1519,12 +1722,14 @@
                 } else {
                     throw Py.ValueError("invalid bounds for setting from string");
                 }
+
             } else if (value instanceof PyArray) {
                 PyArray array = (PyArray)value;
                 if (!array.typecode.equals(typecode)) {
                     throw Py.TypeError("bad argument type for built-in operation|" + array.typecode
                             + "|" + typecode);
                 }
+
                 if (step == 1) {
                     Object arrayDelegate;
                     if (array == this) {
@@ -1532,19 +1737,25 @@
                     } else {
                         arrayDelegate = array.delegate.getArray();
                     }
+                    int len = array.delegate.getSize();
+                    if (start + len != stop) {
+                        // This is a size-changing operation: check for buffer exports
+                        resizeCheck();
+                    }
                     try {
-                        delegate.replaceSubArray(start, stop, arrayDelegate, 0,
-                                array.delegate.getSize());
+                        delegate.replaceSubArray(start, stop, arrayDelegate, 0, len);
                     } catch (IllegalArgumentException e) {
                         throw Py.TypeError("Slice typecode '" + array.typecode
                                 + "' is not compatible with this array (typecode '" + this.typecode
                                 + "')");
                     }
+
                 } else if (step > 1) {
                     int len = array.__len__();
                     for (int i = 0, j = 0; i < len; i++, j += step) {
                         Array.set(data, j + start, Array.get(array.data, i));
                     }
+
                 } else if (step < 0) {
                     if (array == this) {
                         array = (PyArray)array.clone();
@@ -1554,6 +1765,7 @@
                         Array.set(data, j, Array.get(array.data, i));
                     }
                 }
+
             } else {
                 throw Py.TypeError(String.format("can only assign array (not \"%.200s\") to array "
                         + "slice", value.getType().fastGetName()));
@@ -1805,4 +2017,106 @@
             return Array.newInstance(baseType, size);
         }
     }
+
+    /*
+     * ============================================================================================
+     * Support for the Buffer API
+     * ============================================================================================
+     *
+     * The buffer API allows other classes to access the storage directly.
+     *
+     * This is a close duplicate of the same mechanism in PyByteArray. There is perhaps scope for a
+     * shared helper class to implement this logic. For type code 'b', the workings are almost
+     * identical. The fully-fledged buffer interface for PyArray is richer, more like the Python 3
+     * memoryview, as it must cope with items of size other than one byte. This goes beyond the
+     * capabilities of the Jython BufferProtocol at this stage of its development.
+     */
+
+    /**
+     * Hold weakly a reference to a PyBuffer export not yet released, used to prevent untimely
+     * resizing.
+     */
+    private WeakReference<BaseBuffer> export;
+
+    /**
+     * {@inheritDoc}
+     * <p>
+     * The {@link PyBuffer} returned from this method is a one-dimensional array of single byte
+     * items that allows modification of the object state. The existence of this export <b>prohibits
+     * resizing</b> the byte array. This prohibition is not only on the consumer of the view but
+     * extends to any other operations, such as any kind or insertion or deletion.
+     */
+    @Override
+    public synchronized PyBuffer getBuffer(int flags) {
+
+        // If we have already exported a buffer it may still be available for re-use
+        BaseBuffer pybuf = getExistingBuffer(flags);
+
+        if (pybuf == null) {
+            // No existing export we can re-use: create a new one
+            if ("b".equals(typecode)) {
+                // This is byte data, so we are within the state of the art
+                byte[] storage = (byte[])data;
+                int size = delegate.getSize();
+                pybuf = new SimpleWritableBuffer(flags, storage, 0, size);
+            } else if ((flags & PyBUF.WRITABLE) == 0) {
+                // As the client only intends to read, fake the answer with a String
+                pybuf = new SimpleStringBuffer(flags, tostring());
+            } else {
+                // For the time being ...
+                throw Py.NotImplementedError("only array('b') can export a writable buffer");
+            }
+            // Hold a reference for possible re-use
+            export = new WeakReference<BaseBuffer>(pybuf);
+        }
+
+        return pybuf;
+    }
+
+    /**
+     * Try to re-use an existing exported buffer, or return <code>null</code> if we can't.
+     *
+     * @throws PyException (BufferError) if the the flags are incompatible with the buffer
+     */
+    private BaseBuffer getExistingBuffer(int flags) throws PyException {
+        BaseBuffer pybuf = null;
+        if (export != null) {
+            // A buffer was exported at some time.
+            pybuf = export.get();
+            if (pybuf != null) {
+                /*
+                 * We do not test for pybuf.isReleased() as, if any operation had taken place that
+                 * invalidated the buffer, resizeCheck() would have set export=null. The exported
+                 * buffer (navigation, buf member, etc.) remains valid through any operation that
+                 * does not need a resizeCheck.
+                 */
+                pybuf = pybuf.getBufferAgain(flags);
+            }
+        }
+        return pybuf;
+    }
+
+    /**
+     * Test to see if the array may be resized and raise a BufferError if not. This must be called
+     * by the implementation of any operation that changes the number of elements in the array.
+     *
+     * @throws PyException (BufferError) if there are buffer exports preventing a resize
+     */
+    private void resizeCheck() throws PyException {
+        if (export != null) {
+            // A buffer was exported at some time and we have not explicitly discarded it.
+            PyBuffer pybuf = export.get();
+            if (pybuf != null && !pybuf.isReleased()) {
+                // A consumer still has the exported buffer
+                throw Py.BufferError("cannot resize an array that is exporting buffers");
+            } else {
+                /*
+                 * Either the reference has expired or all consumers have released it. Either way,
+                 * the weak reference is useless now.
+                 */
+                export = null;
+            }
+        }
+    }
+
 }
diff --git a/src/org/python/core/PyFile.java b/src/org/python/core/PyFile.java
--- a/src/org/python/core/PyFile.java
+++ b/src/org/python/core/PyFile.java
@@ -377,38 +377,42 @@
     }
 
     /**
-     * Return a String for writing to the underlying file from obj.
+     * Return a String for writing to the underlying file from obj. This is a helper for {@link file_write}
+     * and {@link file_writelines}.
+     *
+     * @param obj to write
+     * @param message for TypeError if raised (or null for default message)
+     * @return bytes representing tha value (as a String in the Jython convention)
      */
     private String asWritable(PyObject obj, String message) {
 
         if (obj instanceof PyUnicode) {
+            // By convention, use platform default encoding to bytes
             return ((PyUnicode)obj).encode();
 
         } else if (obj instanceof PyString) {
-            return ((PyString) obj).getString();
+            // Take a short cut
+            return ((PyString)obj).getString();
+
+        } else if (obj instanceof PyArray && !binary) {
+            // Fall through to TypeError. (If binary, BufferProtocol takes care of PyArray.)
 
         } else if (obj instanceof BufferProtocol) {
-            // Try to get a simple byte-oriented buffer
-            PyBuffer buf = null;
+            // Try to get a byte-oriented buffer
+            PyBuffer buf = ((BufferProtocol)obj).getBuffer(PyBUF.FULL_RO);
             try {
-                buf = ((BufferProtocol)obj).getBuffer(PyBUF.SIMPLE);
-                return StringUtil.fromBytes(buf);
-            } catch (Exception e) {
-                // Wrong kind of buffer: generic error message will do
+                // ... and treat those bytes as a String
+                return buf.toString();
             } finally {
-                // If we got a buffer, we should release it
-                if (buf != null) {
-                    buf.release();
-                }
+                // We should release the buffer
+                buf.release();
             }
+        }
 
-        } else if (binary && obj instanceof PyArray) {
-            return ((PyArray)obj).tostring();
-        }
         if (message == null) {
-            message = String.format("argument 1 must be string or %sbuffer, not %.200s",
-                                    binary ? "" : "read-only character ",
-                                    obj.getType().fastGetName());
+            // Messages differ for text or binary streams (CPython) but we always add the type
+            String.format("%s buffer, not %.200s", (binary ? "must be string or"
+                    : "expected a character"), obj.getType().fastGetName());
         }
         throw Py.TypeError(message);
     }
@@ -581,7 +585,7 @@
         }
     }
 
-  
+
     /**
      * XXX update docs - A mechanism to make sure PyFiles are closed on exit. On creation Closer adds itself
      * to a list of Closers that will be run by PyFileCloser on JVM shutdown. When a
@@ -615,6 +619,7 @@
         }
 
         /** For closing as part of a shutdown process */
+        @Override
         public Void call() {
             file.close();
             sys = null;
diff --git a/src/org/python/core/PyMemoryView.java b/src/org/python/core/PyMemoryView.java
--- a/src/org/python/core/PyMemoryView.java
+++ b/src/org/python/core/PyMemoryView.java
@@ -38,14 +38,19 @@
     private boolean hashCacheValid = false;
 
     /**
-     * Construct a PyMemoryView from a PyBuffer interface. The buffer so obtained will be writable
-     * if the underlying object permits it. The <code>memoryview</code> takes a new lease on the
-     * <code>PyBuffer</code>.
+     * Construct a <code>PyMemoryView</code> from an object bearing the {@link BufferProtocol}
+     * interface. If this object is already an exported buffer, the <code>memoryview</code> takes a
+     * new lease on it. The buffer so obtained will be writable if the underlying object permits it.
      *
      * @param pybuf buffer exported by some underlying object
      */
-    public PyMemoryView(PyBuffer pybuf) {
+    public PyMemoryView(BufferProtocol pybuf) {
         super(TYPE);
+        /*
+         * Ask for the full set of facilities (strides, indirect, etc.) from the object in case they
+         * are necessary for navigation, but only ask for read access. If the object is writable,
+         * the PyBuffer will be writable.
+         */
         backing = pybuf.getBuffer(PyBUF.FULL_RO);
     }
 
@@ -63,12 +68,7 @@
         PyObject obj = ap.getPyObject(0);
 
         if (obj instanceof BufferProtocol) {
-            /*
-             * Ask for the full set of facilities (strides, indirect, etc.) from the object in case
-             * they are necessary for navigation, but only ask for read access. If the object is
-             * writable, the PyBuffer will be writable.
-             */
-            return new PyMemoryView(((BufferProtocol)obj).getBuffer(PyBUF.FULL_RO));
+            return new PyMemoryView((BufferProtocol)obj);
         } else {
             throw Py.TypeError("cannot make memory view because object does not have "
                     + "the buffer interface");
diff --git a/src/org/python/core/io/TextIOBase.java b/src/org/python/core/io/TextIOBase.java
--- a/src/org/python/core/io/TextIOBase.java
+++ b/src/org/python/core/io/TextIOBase.java
@@ -101,14 +101,25 @@
      * @return the amount of data read as an int
      */
     public int readinto(PyObject buf) {
+
         // This is an inefficient version of readinto: but readinto is
         // not recommended for use in Python 2.x anyway
-        if (buf instanceof BufferProtocol) {
+
+        if (buf instanceof PyArray) {
+            // PyArray has the buffer interface but it only works for bytes at present
+            PyArray array = (PyArray)buf;
+            String read = read(array.__len__());
+            for (int i = 0; i < read.length(); i++) {
+                array.set(i, new PyString(read.charAt(i)));
+            }
+            return read.length();
+
+        } else if (buf instanceof BufferProtocol) {
             PyBuffer view = ((BufferProtocol)buf).getBuffer(PyBUF.SIMPLE);
             if (view.isReadonly()) {
                 // More helpful than falling through to CPython message
-                throw Py.TypeError("cannot read into read-only "
-                        + buf.getType().fastGetName());
+                throw Py.TypeError("cannot read into read-only " + buf.getType().fastGetName());
+
             } else {
                 try {
                     // Inefficiently, we have to go via a String
@@ -122,15 +133,8 @@
                     // We should release the buffer explicitly
                     view.release();
                 }
+
             }
-
-        } else if (buf instanceof PyArray) {
-            PyArray array = (PyArray)buf;
-            String read = read(array.__len__());
-            for (int i = 0; i < read.length(); i++) {
-                array.set(i, new PyString(read.charAt(i)));
-            }
-            return read.length();
         }
 
         // No valid alternative worked
diff --git a/src/org/python/modules/sre/PatternObject.java b/src/org/python/modules/sre/PatternObject.java
--- a/src/org/python/modules/sre/PatternObject.java
+++ b/src/org/python/modules/sre/PatternObject.java
@@ -372,28 +372,18 @@
             return (PyString)obj;
 
         } else if (obj instanceof BufferProtocol) {
-            // Try to get a simple byte-oriented buffer
-            PyBuffer buf = null;
+            // Try to get a byte-oriented buffer
+            PyBuffer buf = ((BufferProtocol)obj).getBuffer(PyBUF.FULL_RO);
             try {
-                buf = ((BufferProtocol)obj).getBuffer(PyBUF.SIMPLE);
                 // ... and treat those bytes as a PyString
-                String s = StringUtil.fromBytes(buf);
-                return new PyString(s);
-            } catch (Exception e) {
-                // Wrong kind of buffer: generic error message will do
+                return new PyString(buf.toString());
             } finally {
-                // If we got a buffer, we should release it
-                if (buf != null) {
-                    buf.release();
-                }
+                // We should release the buffer
+                buf.release();
             }
-
-        } else if (obj instanceof PyArray) {
-            // PyArray can do something similar
-            return new PyString(obj.toString());
         }
 
-        // None of those things worked
+        // Neither of those things worked
         throw Py.TypeError("expected string or buffer, but got " + obj.getType());
     }
 }

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list