[Jython-checkins] jython: Add PyBuffer.getNIOByteBuffer to the buffer protocol.

jeff.allen jython-checkins at python.org
Wed Sep 17 00:55:24 CEST 2014


http://hg.python.org/jython/rev/330839dc597a
changeset:   7377:330839dc597a
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Tue Aug 05 23:13:02 2014 +0100
summary:
  Add PyBuffer.getNIOByteBuffer to the buffer protocol.
This is a first implementation of using ByteBuffer in place of byte[] access to
objects. Supporting classes now provide an implementation wrapping the array,
and where possible, code in the core uses that instead of PyBuffer.Pointer.

files:
  src/org/python/core/PyArray.java                   |  83 ++++++--
  src/org/python/core/PyBUF.java                     |  10 +
  src/org/python/core/PyBuffer.java                  |  53 +++++-
  src/org/python/core/buffer/BaseBuffer.java         |  86 +++++++++-
  src/org/python/core/buffer/SimpleBuffer.java       |   9 +
  src/org/python/core/buffer/SimpleStringBuffer.java |  11 +-
  src/org/python/modules/_io/PyFileIO.java           |   6 +-
  src/org/python/modules/posix/PosixModule.java      |   9 +-
  8 files changed, 220 insertions(+), 47 deletions(-)


diff --git a/src/org/python/core/PyArray.java b/src/org/python/core/PyArray.java
--- a/src/org/python/core/PyArray.java
+++ b/src/org/python/core/PyArray.java
@@ -1,7 +1,6 @@
 // Copyright (c) Corporation for National Research Initiatives
 package org.python.core;
 
-import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
@@ -11,6 +10,7 @@
 import java.io.OutputStream;
 import java.lang.ref.WeakReference;
 import java.lang.reflect.Array;
+import java.nio.ByteBuffer;
 
 import org.python.core.buffer.BaseBuffer;
 import org.python.core.buffer.SimpleStringBuffer;
@@ -1130,19 +1130,17 @@
                 frombytesInternal(StringUtil.toBytes(s));
 
             } else {
-                // Access the bytes
+                // Access the bytes through the abstract API of the BufferProtocol
                 try (PyBuffer pybuf = ((BufferProtocol)input).getBuffer(PyBUF.STRIDED_RO)) {
-                    // Provide argument as stream of bytes for fromstream method
                     if (pybuf.getNdim() == 1) {
                         if (pybuf.getStrides()[0] == 1) {
-                            // Data are contiguous in a byte[]
-                            PyBuffer.Pointer b = pybuf.getBuf();
-                            frombytesInternal(b.storage, b.offset, pybuf.getLen());
+                            // Data are contiguous in the buffer
+                            frombytesInternal(pybuf.getNIOByteBuffer());
                         } else {
                             // As frombytesInternal only knows contiguous bytes, make a copy.
                             byte[] copy = new byte[pybuf.getLen()];
                             pybuf.copyTo(copy, 0);
-                            frombytesInternal(copy);
+                            frombytesInternal(ByteBuffer.wrap(copy));
                         }
                     } else {
                         // Currently don't support n-dimensional sources
@@ -1158,39 +1156,30 @@
     }
 
     /**
-     * Common code supporting Java and Python versions of <code>.fromstring()</code>
-     *
-     * @param input string of bytes encoding the array data
-     */
-    private final void fromstringInternal(String input) {
-        frombytesInternal(StringUtil.toBytes(input));
-    }
-
-    /**
      * Common code supporting Java and Python versions of <code>.fromstring()</code> or
      * <code>.frombytes()</code> (Python 3.2+ name).
      *
      * @param bytes array containing the new array data in machine encoding
      */
     private final void frombytesInternal(byte[] bytes) {
-        frombytesInternal(bytes, 0, bytes.length);
+        frombytesInternal(ByteBuffer.wrap(bytes));
     }
 
     /**
-     * Common code supporting Java and Python versions of <code>.fromstring()</code> or
-     * <code>.frombytes()</code> (Python 3.2+ name).
+     * Copy into this array, the remaining bytes of a ByteBuffer (from the current position to the
+     * limit). This is common code supporting Java and Python versions of <code>.fromstring()</code>
+     * or <code>.frombytes()</code> (Python 3.2+ name).
      *
-     * @param bytes array containing the new array data in machine encoding
-     * @param offset of the first byte to read
-     * @param count of bytes to read
+     * @param bytes buffer containing the new array data in machine encoding
      */
-    private final void frombytesInternal(byte[] bytes, int offset, int count) {
+    private final void frombytesInternal(ByteBuffer bytes) {
 
         // Access the bytes
         int origsize = delegate.getSize();
 
         // Check validity wrt array itemsize
         int itemsize = getStorageSize();
+        int count = bytes.remaining();
         if ((count % itemsize) != 0) {
             throw Py.ValueError("string length not a multiple of item size");
         }
@@ -1201,8 +1190,8 @@
         try {
 
             // Provide argument as stream of bytes for fromstream method
-            ByteArrayInputStream bis = new ByteArrayInputStream(bytes, offset, count);
-            fromStream(bis);
+            InputStream is = new ByteBufferBackedInputStream(bytes);
+            fromStream(is);
 
         } catch (EOFException e) {
             // stubbed catch for fromStream throws
@@ -2117,4 +2106,48 @@
         }
     }
 
+    /**
+     * Wrap a <code>ByteBuffer</code> in an InputStream. Reference: <a
+     * href=http://stackoverflow.com/questions/4332264/wrapping-a-bytebuffer-with-an-inputstream>
+     * Stackoverflow question 4332264</a>.
+     */
+    private class ByteBufferBackedInputStream extends InputStream {
+
+        ByteBuffer buf;
+
+        public ByteBufferBackedInputStream(ByteBuffer buf) {
+            this.buf = buf;
+        }
+
+        /**
+         * Return the number of bytes remaining in the underlying buffer.
+         */
+        @Override
+        public int available() throws IOException {
+            return buf.remaining();
+        }
+
+
+        @Override
+        public int read() {
+            return buf.hasRemaining() ? buf.get() & 0xff : -1;
+        }
+
+        @Override
+        public int read(byte[] bytes, int off, int len) {
+            int n = buf.remaining();
+            if (n >= len) {
+                // There are enough bytes remaining to satisfy the request.
+                buf.get(bytes, off, len);
+                return len;
+            } else if (n > 0) {
+                // There are some bytes remaining: truncate request.
+                buf.get(bytes, off, n);
+                return n;
+            } else {
+                // Signal that there are no bytes left
+                return -1;
+            }
+        }
+    }
 }
diff --git a/src/org/python/core/PyBUF.java b/src/org/python/core/PyBUF.java
--- a/src/org/python/core/PyBUF.java
+++ b/src/org/python/core/PyBUF.java
@@ -213,6 +213,16 @@
      */
     static final int FULL_RO = INDIRECT | FORMAT;
 
+    /* Constants for additional feature(s), not standard for CPython */
+
+    /**
+     * A constant used by the consumer in its call to {@link BufferProtocol#getBuffer(int)} to
+     * specify that it expects to access the buffer contents directly as an array (rather than
+     * through that the purely abstract part of the API). <code>getBuffer</code> will raise an
+     * exception if the exporter cannot expose its storage as Java array.
+     */
+    static final int AS_ARRAY = 0x10000000;
+
     /* Constants for readability, not standard for CPython */
 
     /**
diff --git a/src/org/python/core/PyBuffer.java b/src/org/python/core/PyBuffer.java
--- a/src/org/python/core/PyBuffer.java
+++ b/src/org/python/core/PyBuffer.java
@@ -1,5 +1,7 @@
 package org.python.core;
 
+import java.nio.ByteBuffer;
+
 /**
  * The Jython buffer API for access to a byte array within an exporting object. This interface is
  * the counterpart of the CPython <code>Py_buffer</code> struct. Several concrete types implement
@@ -237,10 +239,49 @@
      */
     public PyBuffer getBufferSlice(int flags, int start, int length, int stride);
 
+    // java.nio access to actual storage
+    //
+
+    /**
+     * Obtain a {@link java.nio.ByteBuffer} giving access to the bytes that hold the data being
+     * exported to the consumer. For a one-dimensional contiguous buffer, assuming the following
+     * client code where <code>obj</code> has type <code>BufferProtocol</code>:
+     *
+     * <pre>
+     * PyBuffer a = obj.getBuffer(PyBUF.SIMPLE);
+     * int itemsize = a.getItemsize();
+     * ByteBuffer bb = a.getNIOBuffer();
+     * </pre>
+     *
+     * the item with index <code>bb.pos()+k</code> is in the buffer <code>bb</code> at positions
+     * <code>bb.pos()+k*itemsize</code> to <code>bb.pos()+(k+1)*itemsize - 1</code> inclusive. And
+     * if <code>itemsize==1</code>, the item is simply the byte at position <code>bb.pos()+k</code>.
+     * The buffer limit is set to the first byte beyond the valid data. A block read or write will
+     * therefore access the contents sequentially.
+     * <p>
+     * If the buffer is multidimensional or non-contiguous (strided), the buffer position is still
+     * the (first byte of) the item at index <code>[0]</code> or <code>[0,...,0]</code>, and the
+     * limit is one item beyond the valid data. However, it is necessary to navigate <code>bb</code>
+     * using the <code>shape</code>, <code>strides</code> and maybe <code>suboffsets</code> provided
+     * by the API.
+     *
+     * @return a ByteBuffer equivalent to the exported data contents.
+     */
+    ByteBuffer getNIOByteBuffer();
+
     // Direct access to actual storage
     //
 
     /**
+     * Determine whether the exporter is able to offer direct access the exported storage as a Java
+     * byte array (through the API that involves class {@link Pointer}), or only supports the
+     * abstract API. See also {@link PyBUF#AS_ARRAY}.
+     *
+     * @return true if array access is not allowed, false if it is.
+     */
+    boolean hasArray();
+
+    /**
      * A class that references a <code>byte[]</code> array and a particular offset within it, as the
      * return type for methods that give direct access to byte-oriented data exported by a Python
      * object. In some contexts the consumer will be entitled to make changes to the contents of
@@ -270,11 +311,13 @@
      * Return a structure describing the slice of a byte array that holds the data being exported to
      * the consumer. For a one-dimensional contiguous buffer, assuming the following client code
      * where <code>obj</code> has type <code>BufferProtocol</code>:
+     *
      * <pre>
-     * PyBuffer a = obj.getBuffer();
+     * PyBuffer a = obj.getBuffer(PyBUF.SIMPLE);
      * int itemsize = a.getItemsize();
      * PyBuffer.Pointer b = a.getBuf();
      * </pre>
+     *
      * the item with index <code>k</code> is in the array <code>b.storage</code> at index
      * <code>[b.offset + k*itemsize]</code> to <code>[b.offset + (k+1)*itemsize - 1]</code>
      * inclusive. And if <code>itemsize==1</code>, the item is simply the byte
@@ -293,12 +336,14 @@
      * Return a structure describing the position in a byte array of a single item from the data
      * being exported to the consumer. For a one-dimensional contiguous buffer, assuming the
      * following client code where <code>obj</code> has type <code>BufferProtocol</code>:
+     *
      * <pre>
      * int k = ... ;
-     * PyBuffer a = obj.getBuffer();
+     * PyBuffer a = obj.getBuffer(PyBUF.FULL);
      * int itemsize = a.getItemsize();
      * PyBuffer.Pointer b = a.getPointer(k);
      * </pre>
+     *
      * the item with index <code>k</code> is in the array <code>b.storage</code> at index
      * <code>[b.offset]</code> to <code>[b.offset + itemsize - 1]</code> inclusive. And if
      * <code>itemsize==1</code>, the item is simply the byte <code>b.storage[b.offset]</code>
@@ -317,13 +362,15 @@
      * being exported to the consumer, in the case that array may be multi-dimensional. For a
      * 3-dimensional contiguous buffer, assuming the following client code where <code>obj</code>
      * has type <code>BufferProtocol</code>:
+     *
      * <pre>
      * int i, j, k;
      * // ... calculation that assigns i, j, k
-     * PyBuffer a = obj.getBuffer();
+     * PyBuffer a = obj.getBuffer(PyBUF.FULL);
      * int itemsize = a.getItemsize();
      * PyBuffer.Pointer b = a.getPointer(i,j,k);
      * </pre>
+     *
      * the item with index <code>[i,j,k]</code> is in the array <code>b.storage</code> at index
      * <code>[b.offset]</code> to <code>[b.offset + itemsize - 1]</code> inclusive. And if
      * <code>itemsize==1</code>, the item is simply the byte <code>b.storage[b.offset]</code>
diff --git a/src/org/python/core/buffer/BaseBuffer.java b/src/org/python/core/buffer/BaseBuffer.java
--- a/src/org/python/core/buffer/BaseBuffer.java
+++ b/src/org/python/core/buffer/BaseBuffer.java
@@ -1,5 +1,7 @@
 package org.python.core.buffer;
 
+import java.nio.ByteBuffer;
+
 import org.python.core.BufferProtocol;
 import org.python.core.Py;
 import org.python.core.PyBUF;
@@ -126,17 +128,17 @@
      * Construct an instance of BaseBuffer in support of a sub-class, specifying the 'feature
      * flags', or at least a starting set to be adjusted later. These are the features of the buffer
      * exported, not the flags that form the consumer's request. The buffer will be read-only unless
-     * {@link PyBUF#WRITABLE} is set in the feature flags. {@link PyBUF#FORMAT} is implicitly added
-     * to the feature flags. The navigation arrays are all null, awaiting action by the sub-class
-     * constructor. To complete initialisation, the sub-class normally must assign: the buffer (
-     * {@link #storage}, {@link #index0}), and the navigation arrays ({@link #shape},
-     * {@link #strides}), and call {@link #checkRequestFlags(int)} passing the consumer's request
-     * flags.
+     * {@link PyBUF#WRITABLE} is set in the feature flags. {@link PyBUF#FORMAT} and
+     * {@link PyBUF#AS_ARRAY} are implicitly added to the feature flags. The navigation arrays are
+     * all null, awaiting action by the sub-class constructor. To complete initialisation, the
+     * sub-class normally must assign: the buffer ( {@link #storage}, {@link #index0}), and the
+     * navigation arrays ({@link #shape}, {@link #strides}), and call
+     * {@link #checkRequestFlags(int)} passing the consumer's request flags.
      *
      * @param featureFlags bit pattern that specifies the actual features allowed/required
      */
     protected BaseBuffer(int featureFlags) {
-        setFeatureFlags(featureFlags | FORMAT);
+        setFeatureFlags(featureFlags | FORMAT | AS_ARRAY);
     }
 
     /**
@@ -213,6 +215,12 @@
     }
 
     @Override
+    public boolean hasArray() {
+        // AS_ARRAY is a non-navigational flag, so is inverted in gFeatureFlags
+        return (gFeatureFlags & AS_ARRAY) != 0;
+    }
+
+    @Override
     public int getNdim() {
         return shape.length;
     }
@@ -297,7 +305,7 @@
      * accessors. The default implementation here is suited to N-dimensional arrays.
      *
      * @param indices of the item from the consumer
-     * @return index relative to item x[0,...,0] in actual storage
+     * @return corresponding absolute index in storage
      */
     protected int calcIndex(int... indices) throws IndexOutOfBoundsException {
         final int N = checkDimension(indices);
@@ -313,6 +321,57 @@
     }
 
     /**
+     * Calculate the absolute byte index in the storage array of the last item of the exported data
+     * (if we are not using indirection). This is the greatest value attained by
+     * {@link #calcIndex(int...)}. The first byte not used will be one <code>itemsize</code> more
+     * than the returned value.
+     *
+     * @return greatest absolute index in storage
+     */
+    protected int calcGreatestIndex() throws IndexOutOfBoundsException {
+        final int N = shape.length;
+        // If all the strides are positive, the maximal value is found from:
+        // index = index0 + sum(k=0,N-1) (shape[k]-1)*strides[k]
+        // but in general, for any k where strides[k]<=0, the term should be zero.
+        int index = index0;
+        if (N > 0) {
+            int[] strides = getStrides();
+            for (int k = 0; k < N; k++) {
+                int stride = strides[k];
+                if (stride > 0) {
+                    index += (shape[k] - 1) * stride;
+                }
+            }
+        }
+        return index;
+    }
+
+    /**
+     * Calculate the absolute byte index in the storage array of the first item of the exported data
+     * (if we are not using indirection). This is the least value attained by
+     * {@link #calcIndex(int...)}.
+     *
+     * @return least absolute index in storage
+     */
+    protected int calcLeastIndex() throws IndexOutOfBoundsException {
+        final int N = shape.length;
+        // If all the strides are positive, the maximal value is just index0,
+        // but in general, we must allow strides[k]<=0 for some k:
+        // index = index0 + sum(k=0,N-1) (strides[k]<0) ? (shape[k]-1)*strides[k] : 0
+        int index = index0;
+        if (N > 0) {
+            int[] strides = getStrides();
+            for (int k = 0; k < N; k++) {
+                int stride = strides[k];
+                if (stride < 0) {
+                    index += (shape[k] - 1) * stride;
+                }
+            }
+        }
+        return index;
+    }
+
+    /**
      * {@inheritDoc}
      * <p>
      * The default implementation in <code>BaseBuffer</code> deals with the general one-dimensional
@@ -540,6 +599,15 @@
     // @Override public PyBuffer getBufferSlice(int flags, int start, int length, int stride) {}
 
     @Override
+    public ByteBuffer getNIOByteBuffer() {
+        // Determine the limit of the buffer just beyond the last item.
+        int length = calcGreatestIndex() + getItemsize() - index0;
+        ByteBuffer b = ByteBuffer.wrap(storage, index0, length);
+        // Return as read-only if it is.
+        return isReadonly() ? b.asReadOnlyBuffer() : b;
+    }
+
+    @Override
     public Pointer getBuf() {
         return new Pointer(storage, index0);
     }
@@ -664,6 +732,8 @@
             return bufferRequires("shape array");
         } else if ((syndrome & WRITABLE) != 0) {
             return bufferIsNot("writable");
+        } else if ((syndrome & AS_ARRAY) != 0) {
+            return bufferIsNot("accessible as a Java array");
         } else if ((syndrome & C_CONTIGUOUS) != 0) {
             return bufferIsNot("C-contiguous");
         } else if ((syndrome & F_CONTIGUOUS) != 0) {
diff --git a/src/org/python/core/buffer/SimpleBuffer.java b/src/org/python/core/buffer/SimpleBuffer.java
--- a/src/org/python/core/buffer/SimpleBuffer.java
+++ b/src/org/python/core/buffer/SimpleBuffer.java
@@ -1,5 +1,7 @@
 package org.python.core.buffer;
 
+import java.nio.ByteBuffer;
+
 import org.python.core.PyBuffer;
 import org.python.core.PyException;
 import org.python.core.util.StringUtil;
@@ -229,6 +231,13 @@
     }
 
     @Override
+    public ByteBuffer getNIOByteBuffer() {
+        // Simplify for one-dimensional contiguous bytes
+        ByteBuffer b = ByteBuffer.wrap(storage, index0, shape[0]);
+        return isReadonly() ? b.asReadOnlyBuffer() : b;
+    }
+
+    @Override
     public Pointer getPointer(int index) throws IndexOutOfBoundsException {
         return new Pointer(storage, index0 + index);
     }
diff --git a/src/org/python/core/buffer/SimpleStringBuffer.java b/src/org/python/core/buffer/SimpleStringBuffer.java
--- a/src/org/python/core/buffer/SimpleStringBuffer.java
+++ b/src/org/python/core/buffer/SimpleStringBuffer.java
@@ -1,5 +1,7 @@
 package org.python.core.buffer;
 
+import java.nio.ByteBuffer;
+
 import org.python.core.PyBuffer;
 import org.python.core.util.StringUtil;
 
@@ -112,12 +114,19 @@
             return getBufferSlice(flags, start, length);
         } else {
             // Force creation of the actual byte array from the String.
-            getBuf();
+            ensureHaveBytes();
             // Now we are effectively a SimpleBuffer, return the strided view.
             return super.getBufferSlice(flags, start, length, stride);
         }
     }
 
+    @Override
+    public ByteBuffer getNIOByteBuffer() {
+        // Force creation of the actual byte array from the String.
+        ensureHaveBytes();
+        return super.getNIOByteBuffer().asReadOnlyBuffer();
+    }
+
     /**
      * This method creates an actual byte array from the underlying String if none yet exists.
      */
diff --git a/src/org/python/modules/_io/PyFileIO.java b/src/org/python/modules/_io/PyFileIO.java
--- a/src/org/python/modules/_io/PyFileIO.java
+++ b/src/org/python/modules/_io/PyFileIO.java
@@ -251,8 +251,7 @@
             PyBuffer pybuf = writablePyBuffer(buf);
 
             try {
-                PyBuffer.Pointer bp = pybuf.getBuf();
-                ByteBuffer byteBuffer = ByteBuffer.wrap(bp.storage, bp.offset, pybuf.getLen());
+                ByteBuffer byteBuffer = pybuf.getNIOByteBuffer();
                 synchronized (ioDelegate) {
                     count = ioDelegate.readinto(byteBuffer);
                 }
@@ -293,8 +292,7 @@
 
             try {
                 // Access the data as a java.nio.ByteBuffer [pos:limit] within possibly larger array
-                PyBuffer.Pointer bp = pybuf.getBuf();
-                ByteBuffer byteBuffer = ByteBuffer.wrap(bp.storage, bp.offset, pybuf.getLen());
+                ByteBuffer byteBuffer = pybuf.getNIOByteBuffer();
                 synchronized (ioDelegate) {
                     count = ioDelegate.write(byteBuffer);
                 }
diff --git a/src/org/python/modules/posix/PosixModule.java b/src/org/python/modules/posix/PosixModule.java
--- a/src/org/python/modules/posix/PosixModule.java
+++ b/src/org/python/modules/posix/PosixModule.java
@@ -29,7 +29,6 @@
 import org.python.core.Py;
 import org.python.core.PyBUF;
 import org.python.core.PyBuffer;
-import org.python.core.PyBuffer.Pointer;
 import org.python.core.PyBuiltinFunctionNarrow;
 import org.python.core.PyDictionary;
 import org.python.core.PyException;
@@ -826,10 +825,8 @@
     public static int write(PyObject fd, BufferProtocol bytes) {
         // Get a buffer view: we can cope with N-dimensional data, but not strided data.
         try (PyBuffer buf = bytes.getBuffer(PyBUF.ND)) {
-            // Get the array and offset of the first real byte.
-            Pointer p = buf.getBuf();
-            // Make a ByteBuffer of that array, setting the position and limit to the real data.
-            ByteBuffer bb = ByteBuffer.wrap(p.storage, p.offset, buf.getLen());
+            // Get a ByteBuffer of that data, setting the position and limit to the real data.
+            ByteBuffer bb =  buf.getNIOByteBuffer();
             try {
                 // Write the data (returning the count of bytes).
                 return FileDescriptors.get(fd).write(bb);
@@ -902,7 +899,7 @@
     }
 
     /**
-     * Return a path as a String from a PyObject 
+     * Return a path as a String from a PyObject
      *
      * @param path a PyObject, raising a TypeError if an invalid path type
      * @return a String path

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list