[Jython-checkins] jython: Add PyBuffer.getNIOByteBuffer to the buffer protocol.
jeff.allen
jython-checkins at python.org
Wed Sep 17 00:55:24 CEST 2014
http://hg.python.org/jython/rev/330839dc597a
changeset: 7377:330839dc597a
user: Jeff Allen <ja.py at farowl.co.uk>
date: Tue Aug 05 23:13:02 2014 +0100
summary:
Add PyBuffer.getNIOByteBuffer to the buffer protocol.
This is a first implementation of using ByteBuffer in place of byte[] access to
objects. Supporting classes now provide an implementation wrapping the array,
and where possible, code in the core uses that instead of PyBuffer.Pointer.
files:
src/org/python/core/PyArray.java | 83 ++++++--
src/org/python/core/PyBUF.java | 10 +
src/org/python/core/PyBuffer.java | 53 +++++-
src/org/python/core/buffer/BaseBuffer.java | 86 +++++++++-
src/org/python/core/buffer/SimpleBuffer.java | 9 +
src/org/python/core/buffer/SimpleStringBuffer.java | 11 +-
src/org/python/modules/_io/PyFileIO.java | 6 +-
src/org/python/modules/posix/PosixModule.java | 9 +-
8 files changed, 220 insertions(+), 47 deletions(-)
diff --git a/src/org/python/core/PyArray.java b/src/org/python/core/PyArray.java
--- a/src/org/python/core/PyArray.java
+++ b/src/org/python/core/PyArray.java
@@ -1,7 +1,6 @@
// Copyright (c) Corporation for National Research Initiatives
package org.python.core;
-import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
@@ -11,6 +10,7 @@
import java.io.OutputStream;
import java.lang.ref.WeakReference;
import java.lang.reflect.Array;
+import java.nio.ByteBuffer;
import org.python.core.buffer.BaseBuffer;
import org.python.core.buffer.SimpleStringBuffer;
@@ -1130,19 +1130,17 @@
frombytesInternal(StringUtil.toBytes(s));
} else {
- // Access the bytes
+ // Access the bytes through the abstract API of the BufferProtocol
try (PyBuffer pybuf = ((BufferProtocol)input).getBuffer(PyBUF.STRIDED_RO)) {
- // Provide argument as stream of bytes for fromstream method
if (pybuf.getNdim() == 1) {
if (pybuf.getStrides()[0] == 1) {
- // Data are contiguous in a byte[]
- PyBuffer.Pointer b = pybuf.getBuf();
- frombytesInternal(b.storage, b.offset, pybuf.getLen());
+ // Data are contiguous in the buffer
+ frombytesInternal(pybuf.getNIOByteBuffer());
} else {
// As frombytesInternal only knows contiguous bytes, make a copy.
byte[] copy = new byte[pybuf.getLen()];
pybuf.copyTo(copy, 0);
- frombytesInternal(copy);
+ frombytesInternal(ByteBuffer.wrap(copy));
}
} else {
// Currently don't support n-dimensional sources
@@ -1158,39 +1156,30 @@
}
/**
- * Common code supporting Java and Python versions of <code>.fromstring()</code>
- *
- * @param input string of bytes encoding the array data
- */
- private final void fromstringInternal(String input) {
- frombytesInternal(StringUtil.toBytes(input));
- }
-
- /**
* Common code supporting Java and Python versions of <code>.fromstring()</code> or
* <code>.frombytes()</code> (Python 3.2+ name).
*
* @param bytes array containing the new array data in machine encoding
*/
private final void frombytesInternal(byte[] bytes) {
- frombytesInternal(bytes, 0, bytes.length);
+ frombytesInternal(ByteBuffer.wrap(bytes));
}
/**
- * Common code supporting Java and Python versions of <code>.fromstring()</code> or
- * <code>.frombytes()</code> (Python 3.2+ name).
+ * Copy into this array, the remaining bytes of a ByteBuffer (from the current position to the
+ * limit). This is common code supporting Java and Python versions of <code>.fromstring()</code>
+ * or <code>.frombytes()</code> (Python 3.2+ name).
*
- * @param bytes array containing the new array data in machine encoding
- * @param offset of the first byte to read
- * @param count of bytes to read
+ * @param bytes buffer containing the new array data in machine encoding
*/
- private final void frombytesInternal(byte[] bytes, int offset, int count) {
+ private final void frombytesInternal(ByteBuffer bytes) {
// Access the bytes
int origsize = delegate.getSize();
// Check validity wrt array itemsize
int itemsize = getStorageSize();
+ int count = bytes.remaining();
if ((count % itemsize) != 0) {
throw Py.ValueError("string length not a multiple of item size");
}
@@ -1201,8 +1190,8 @@
try {
// Provide argument as stream of bytes for fromstream method
- ByteArrayInputStream bis = new ByteArrayInputStream(bytes, offset, count);
- fromStream(bis);
+ InputStream is = new ByteBufferBackedInputStream(bytes);
+ fromStream(is);
} catch (EOFException e) {
// stubbed catch for fromStream throws
@@ -2117,4 +2106,48 @@
}
}
+ /**
+ * Wrap a <code>ByteBuffer</code> in an InputStream. Reference: <a
+ * href=http://stackoverflow.com/questions/4332264/wrapping-a-bytebuffer-with-an-inputstream>
+ * Stackoverflow question 4332264</a>.
+ */
+ private class ByteBufferBackedInputStream extends InputStream {
+
+ ByteBuffer buf;
+
+ public ByteBufferBackedInputStream(ByteBuffer buf) {
+ this.buf = buf;
+ }
+
+ /**
+ * Return the number of bytes remaining in the underlying buffer.
+ */
+ @Override
+ public int available() throws IOException {
+ return buf.remaining();
+ }
+
+
+ @Override
+ public int read() {
+ return buf.hasRemaining() ? buf.get() & 0xff : -1;
+ }
+
+ @Override
+ public int read(byte[] bytes, int off, int len) {
+ int n = buf.remaining();
+ if (n >= len) {
+ // There are enough bytes remaining to satisfy the request.
+ buf.get(bytes, off, len);
+ return len;
+ } else if (n > 0) {
+ // There are some bytes remaining: truncate request.
+ buf.get(bytes, off, n);
+ return n;
+ } else {
+ // Signal that there are no bytes left
+ return -1;
+ }
+ }
+ }
}
diff --git a/src/org/python/core/PyBUF.java b/src/org/python/core/PyBUF.java
--- a/src/org/python/core/PyBUF.java
+++ b/src/org/python/core/PyBUF.java
@@ -213,6 +213,16 @@
*/
static final int FULL_RO = INDIRECT | FORMAT;
+ /* Constants for additional feature(s), not standard for CPython */
+
+ /**
+ * A constant used by the consumer in its call to {@link BufferProtocol#getBuffer(int)} to
+ * specify that it expects to access the buffer contents directly as an array (rather than
+ * through that the purely abstract part of the API). <code>getBuffer</code> will raise an
+ * exception if the exporter cannot expose its storage as Java array.
+ */
+ static final int AS_ARRAY = 0x10000000;
+
/* Constants for readability, not standard for CPython */
/**
diff --git a/src/org/python/core/PyBuffer.java b/src/org/python/core/PyBuffer.java
--- a/src/org/python/core/PyBuffer.java
+++ b/src/org/python/core/PyBuffer.java
@@ -1,5 +1,7 @@
package org.python.core;
+import java.nio.ByteBuffer;
+
/**
* The Jython buffer API for access to a byte array within an exporting object. This interface is
* the counterpart of the CPython <code>Py_buffer</code> struct. Several concrete types implement
@@ -237,10 +239,49 @@
*/
public PyBuffer getBufferSlice(int flags, int start, int length, int stride);
+ // java.nio access to actual storage
+ //
+
+ /**
+ * Obtain a {@link java.nio.ByteBuffer} giving access to the bytes that hold the data being
+ * exported to the consumer. For a one-dimensional contiguous buffer, assuming the following
+ * client code where <code>obj</code> has type <code>BufferProtocol</code>:
+ *
+ * <pre>
+ * PyBuffer a = obj.getBuffer(PyBUF.SIMPLE);
+ * int itemsize = a.getItemsize();
+ * ByteBuffer bb = a.getNIOBuffer();
+ * </pre>
+ *
+ * the item with index <code>bb.pos()+k</code> is in the buffer <code>bb</code> at positions
+ * <code>bb.pos()+k*itemsize</code> to <code>bb.pos()+(k+1)*itemsize - 1</code> inclusive. And
+ * if <code>itemsize==1</code>, the item is simply the byte at position <code>bb.pos()+k</code>.
+ * The buffer limit is set to the first byte beyond the valid data. A block read or write will
+ * therefore access the contents sequentially.
+ * <p>
+ * If the buffer is multidimensional or non-contiguous (strided), the buffer position is still
+ * the (first byte of) the item at index <code>[0]</code> or <code>[0,...,0]</code>, and the
+ * limit is one item beyond the valid data. However, it is necessary to navigate <code>bb</code>
+ * using the <code>shape</code>, <code>strides</code> and maybe <code>suboffsets</code> provided
+ * by the API.
+ *
+ * @return a ByteBuffer equivalent to the exported data contents.
+ */
+ ByteBuffer getNIOByteBuffer();
+
// Direct access to actual storage
//
/**
+ * Determine whether the exporter is able to offer direct access the exported storage as a Java
+ * byte array (through the API that involves class {@link Pointer}), or only supports the
+ * abstract API. See also {@link PyBUF#AS_ARRAY}.
+ *
+ * @return true if array access is not allowed, false if it is.
+ */
+ boolean hasArray();
+
+ /**
* A class that references a <code>byte[]</code> array and a particular offset within it, as the
* return type for methods that give direct access to byte-oriented data exported by a Python
* object. In some contexts the consumer will be entitled to make changes to the contents of
@@ -270,11 +311,13 @@
* Return a structure describing the slice of a byte array that holds the data being exported to
* the consumer. For a one-dimensional contiguous buffer, assuming the following client code
* where <code>obj</code> has type <code>BufferProtocol</code>:
+ *
* <pre>
- * PyBuffer a = obj.getBuffer();
+ * PyBuffer a = obj.getBuffer(PyBUF.SIMPLE);
* int itemsize = a.getItemsize();
* PyBuffer.Pointer b = a.getBuf();
* </pre>
+ *
* the item with index <code>k</code> is in the array <code>b.storage</code> at index
* <code>[b.offset + k*itemsize]</code> to <code>[b.offset + (k+1)*itemsize - 1]</code>
* inclusive. And if <code>itemsize==1</code>, the item is simply the byte
@@ -293,12 +336,14 @@
* Return a structure describing the position in a byte array of a single item from the data
* being exported to the consumer. For a one-dimensional contiguous buffer, assuming the
* following client code where <code>obj</code> has type <code>BufferProtocol</code>:
+ *
* <pre>
* int k = ... ;
- * PyBuffer a = obj.getBuffer();
+ * PyBuffer a = obj.getBuffer(PyBUF.FULL);
* int itemsize = a.getItemsize();
* PyBuffer.Pointer b = a.getPointer(k);
* </pre>
+ *
* the item with index <code>k</code> is in the array <code>b.storage</code> at index
* <code>[b.offset]</code> to <code>[b.offset + itemsize - 1]</code> inclusive. And if
* <code>itemsize==1</code>, the item is simply the byte <code>b.storage[b.offset]</code>
@@ -317,13 +362,15 @@
* being exported to the consumer, in the case that array may be multi-dimensional. For a
* 3-dimensional contiguous buffer, assuming the following client code where <code>obj</code>
* has type <code>BufferProtocol</code>:
+ *
* <pre>
* int i, j, k;
* // ... calculation that assigns i, j, k
- * PyBuffer a = obj.getBuffer();
+ * PyBuffer a = obj.getBuffer(PyBUF.FULL);
* int itemsize = a.getItemsize();
* PyBuffer.Pointer b = a.getPointer(i,j,k);
* </pre>
+ *
* the item with index <code>[i,j,k]</code> is in the array <code>b.storage</code> at index
* <code>[b.offset]</code> to <code>[b.offset + itemsize - 1]</code> inclusive. And if
* <code>itemsize==1</code>, the item is simply the byte <code>b.storage[b.offset]</code>
diff --git a/src/org/python/core/buffer/BaseBuffer.java b/src/org/python/core/buffer/BaseBuffer.java
--- a/src/org/python/core/buffer/BaseBuffer.java
+++ b/src/org/python/core/buffer/BaseBuffer.java
@@ -1,5 +1,7 @@
package org.python.core.buffer;
+import java.nio.ByteBuffer;
+
import org.python.core.BufferProtocol;
import org.python.core.Py;
import org.python.core.PyBUF;
@@ -126,17 +128,17 @@
* Construct an instance of BaseBuffer in support of a sub-class, specifying the 'feature
* flags', or at least a starting set to be adjusted later. These are the features of the buffer
* exported, not the flags that form the consumer's request. The buffer will be read-only unless
- * {@link PyBUF#WRITABLE} is set in the feature flags. {@link PyBUF#FORMAT} is implicitly added
- * to the feature flags. The navigation arrays are all null, awaiting action by the sub-class
- * constructor. To complete initialisation, the sub-class normally must assign: the buffer (
- * {@link #storage}, {@link #index0}), and the navigation arrays ({@link #shape},
- * {@link #strides}), and call {@link #checkRequestFlags(int)} passing the consumer's request
- * flags.
+ * {@link PyBUF#WRITABLE} is set in the feature flags. {@link PyBUF#FORMAT} and
+ * {@link PyBUF#AS_ARRAY} are implicitly added to the feature flags. The navigation arrays are
+ * all null, awaiting action by the sub-class constructor. To complete initialisation, the
+ * sub-class normally must assign: the buffer ( {@link #storage}, {@link #index0}), and the
+ * navigation arrays ({@link #shape}, {@link #strides}), and call
+ * {@link #checkRequestFlags(int)} passing the consumer's request flags.
*
* @param featureFlags bit pattern that specifies the actual features allowed/required
*/
protected BaseBuffer(int featureFlags) {
- setFeatureFlags(featureFlags | FORMAT);
+ setFeatureFlags(featureFlags | FORMAT | AS_ARRAY);
}
/**
@@ -213,6 +215,12 @@
}
@Override
+ public boolean hasArray() {
+ // AS_ARRAY is a non-navigational flag, so is inverted in gFeatureFlags
+ return (gFeatureFlags & AS_ARRAY) != 0;
+ }
+
+ @Override
public int getNdim() {
return shape.length;
}
@@ -297,7 +305,7 @@
* accessors. The default implementation here is suited to N-dimensional arrays.
*
* @param indices of the item from the consumer
- * @return index relative to item x[0,...,0] in actual storage
+ * @return corresponding absolute index in storage
*/
protected int calcIndex(int... indices) throws IndexOutOfBoundsException {
final int N = checkDimension(indices);
@@ -313,6 +321,57 @@
}
/**
+ * Calculate the absolute byte index in the storage array of the last item of the exported data
+ * (if we are not using indirection). This is the greatest value attained by
+ * {@link #calcIndex(int...)}. The first byte not used will be one <code>itemsize</code> more
+ * than the returned value.
+ *
+ * @return greatest absolute index in storage
+ */
+ protected int calcGreatestIndex() throws IndexOutOfBoundsException {
+ final int N = shape.length;
+ // If all the strides are positive, the maximal value is found from:
+ // index = index0 + sum(k=0,N-1) (shape[k]-1)*strides[k]
+ // but in general, for any k where strides[k]<=0, the term should be zero.
+ int index = index0;
+ if (N > 0) {
+ int[] strides = getStrides();
+ for (int k = 0; k < N; k++) {
+ int stride = strides[k];
+ if (stride > 0) {
+ index += (shape[k] - 1) * stride;
+ }
+ }
+ }
+ return index;
+ }
+
+ /**
+ * Calculate the absolute byte index in the storage array of the first item of the exported data
+ * (if we are not using indirection). This is the least value attained by
+ * {@link #calcIndex(int...)}.
+ *
+ * @return least absolute index in storage
+ */
+ protected int calcLeastIndex() throws IndexOutOfBoundsException {
+ final int N = shape.length;
+ // If all the strides are positive, the maximal value is just index0,
+ // but in general, we must allow strides[k]<=0 for some k:
+ // index = index0 + sum(k=0,N-1) (strides[k]<0) ? (shape[k]-1)*strides[k] : 0
+ int index = index0;
+ if (N > 0) {
+ int[] strides = getStrides();
+ for (int k = 0; k < N; k++) {
+ int stride = strides[k];
+ if (stride < 0) {
+ index += (shape[k] - 1) * stride;
+ }
+ }
+ }
+ return index;
+ }
+
+ /**
* {@inheritDoc}
* <p>
* The default implementation in <code>BaseBuffer</code> deals with the general one-dimensional
@@ -540,6 +599,15 @@
// @Override public PyBuffer getBufferSlice(int flags, int start, int length, int stride) {}
@Override
+ public ByteBuffer getNIOByteBuffer() {
+ // Determine the limit of the buffer just beyond the last item.
+ int length = calcGreatestIndex() + getItemsize() - index0;
+ ByteBuffer b = ByteBuffer.wrap(storage, index0, length);
+ // Return as read-only if it is.
+ return isReadonly() ? b.asReadOnlyBuffer() : b;
+ }
+
+ @Override
public Pointer getBuf() {
return new Pointer(storage, index0);
}
@@ -664,6 +732,8 @@
return bufferRequires("shape array");
} else if ((syndrome & WRITABLE) != 0) {
return bufferIsNot("writable");
+ } else if ((syndrome & AS_ARRAY) != 0) {
+ return bufferIsNot("accessible as a Java array");
} else if ((syndrome & C_CONTIGUOUS) != 0) {
return bufferIsNot("C-contiguous");
} else if ((syndrome & F_CONTIGUOUS) != 0) {
diff --git a/src/org/python/core/buffer/SimpleBuffer.java b/src/org/python/core/buffer/SimpleBuffer.java
--- a/src/org/python/core/buffer/SimpleBuffer.java
+++ b/src/org/python/core/buffer/SimpleBuffer.java
@@ -1,5 +1,7 @@
package org.python.core.buffer;
+import java.nio.ByteBuffer;
+
import org.python.core.PyBuffer;
import org.python.core.PyException;
import org.python.core.util.StringUtil;
@@ -229,6 +231,13 @@
}
@Override
+ public ByteBuffer getNIOByteBuffer() {
+ // Simplify for one-dimensional contiguous bytes
+ ByteBuffer b = ByteBuffer.wrap(storage, index0, shape[0]);
+ return isReadonly() ? b.asReadOnlyBuffer() : b;
+ }
+
+ @Override
public Pointer getPointer(int index) throws IndexOutOfBoundsException {
return new Pointer(storage, index0 + index);
}
diff --git a/src/org/python/core/buffer/SimpleStringBuffer.java b/src/org/python/core/buffer/SimpleStringBuffer.java
--- a/src/org/python/core/buffer/SimpleStringBuffer.java
+++ b/src/org/python/core/buffer/SimpleStringBuffer.java
@@ -1,5 +1,7 @@
package org.python.core.buffer;
+import java.nio.ByteBuffer;
+
import org.python.core.PyBuffer;
import org.python.core.util.StringUtil;
@@ -112,12 +114,19 @@
return getBufferSlice(flags, start, length);
} else {
// Force creation of the actual byte array from the String.
- getBuf();
+ ensureHaveBytes();
// Now we are effectively a SimpleBuffer, return the strided view.
return super.getBufferSlice(flags, start, length, stride);
}
}
+ @Override
+ public ByteBuffer getNIOByteBuffer() {
+ // Force creation of the actual byte array from the String.
+ ensureHaveBytes();
+ return super.getNIOByteBuffer().asReadOnlyBuffer();
+ }
+
/**
* This method creates an actual byte array from the underlying String if none yet exists.
*/
diff --git a/src/org/python/modules/_io/PyFileIO.java b/src/org/python/modules/_io/PyFileIO.java
--- a/src/org/python/modules/_io/PyFileIO.java
+++ b/src/org/python/modules/_io/PyFileIO.java
@@ -251,8 +251,7 @@
PyBuffer pybuf = writablePyBuffer(buf);
try {
- PyBuffer.Pointer bp = pybuf.getBuf();
- ByteBuffer byteBuffer = ByteBuffer.wrap(bp.storage, bp.offset, pybuf.getLen());
+ ByteBuffer byteBuffer = pybuf.getNIOByteBuffer();
synchronized (ioDelegate) {
count = ioDelegate.readinto(byteBuffer);
}
@@ -293,8 +292,7 @@
try {
// Access the data as a java.nio.ByteBuffer [pos:limit] within possibly larger array
- PyBuffer.Pointer bp = pybuf.getBuf();
- ByteBuffer byteBuffer = ByteBuffer.wrap(bp.storage, bp.offset, pybuf.getLen());
+ ByteBuffer byteBuffer = pybuf.getNIOByteBuffer();
synchronized (ioDelegate) {
count = ioDelegate.write(byteBuffer);
}
diff --git a/src/org/python/modules/posix/PosixModule.java b/src/org/python/modules/posix/PosixModule.java
--- a/src/org/python/modules/posix/PosixModule.java
+++ b/src/org/python/modules/posix/PosixModule.java
@@ -29,7 +29,6 @@
import org.python.core.Py;
import org.python.core.PyBUF;
import org.python.core.PyBuffer;
-import org.python.core.PyBuffer.Pointer;
import org.python.core.PyBuiltinFunctionNarrow;
import org.python.core.PyDictionary;
import org.python.core.PyException;
@@ -826,10 +825,8 @@
public static int write(PyObject fd, BufferProtocol bytes) {
// Get a buffer view: we can cope with N-dimensional data, but not strided data.
try (PyBuffer buf = bytes.getBuffer(PyBUF.ND)) {
- // Get the array and offset of the first real byte.
- Pointer p = buf.getBuf();
- // Make a ByteBuffer of that array, setting the position and limit to the real data.
- ByteBuffer bb = ByteBuffer.wrap(p.storage, p.offset, buf.getLen());
+ // Get a ByteBuffer of that data, setting the position and limit to the real data.
+ ByteBuffer bb = buf.getNIOByteBuffer();
try {
// Write the data (returning the count of bytes).
return FileDescriptors.get(fd).write(bb);
@@ -902,7 +899,7 @@
}
/**
- * Return a path as a String from a PyObject
+ * Return a path as a String from a PyObject
*
* @param path a PyObject, raising a TypeError if an invalid path type
* @return a String path
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list