[Jython-checkins] jython (merge default -> default): Merge recent fixes related to bytearray

jeff.allen jython-checkins at python.org
Sat Jul 21 00:35:14 CEST 2012


http://hg.python.org/jython/rev/546e63253a0e
changeset:   6809:546e63253a0e
parent:      6804:bef3eb26155e
parent:      6808:95bc782dc561
user:        Jeff Allen <ja...py at farowl.co.uk>
date:        Fri Jul 20 23:15:35 2012 +0100
summary:
  Merge recent fixes related to bytearray

files:
  src/org/python/core/BaseBytes.java   |  91 +++++++++------
  src/org/python/core/PyByteArray.java |   4 +-
  src/org/python/core/PyInteger.java   |  85 ++++++++++----
  src/org/python/core/PySequence.java  |  33 +++++-
  src/org/python/core/PyString.java    |  10 +-
  src/org/python/core/__builtin__.java |  52 ++++++--
  6 files changed, 190 insertions(+), 85 deletions(-)


diff --git a/src/org/python/core/BaseBytes.java b/src/org/python/core/BaseBytes.java
--- a/src/org/python/core/BaseBytes.java
+++ b/src/org/python/core/BaseBytes.java
@@ -41,7 +41,8 @@
      * @param type explicit Jython type
      */
     public BaseBytes(PyType type) {
-        super(type);            // implicit setStorage( emptyStorage );
+        super(type, null);
+        delegator = new IndexDelegate();
         setStorage(emptyStorage);
     }
 
@@ -52,7 +53,8 @@
      * @param type explicit Jython type
      */
     public BaseBytes(PyType type, int size) {
-        super(type);
+        super(type, null);
+        delegator = new IndexDelegate();
         newStorage(size);
     }
 
@@ -63,7 +65,8 @@
      * @param value source of values (and size)
      */
     public BaseBytes(PyType type, int[] value) {
-        super(type);
+        super(type, null);
+        delegator = new IndexDelegate();
         int n = value.length;
         newStorage(n);
         for (int i = offset, j = 0; j < n; i++, j++) {
@@ -80,7 +83,8 @@
      * @throws PyException if any value[i] > 255
      */
     protected BaseBytes(PyType type, String value) throws PyException {
-        super(type);
+        super(type, null);
+        delegator = new IndexDelegate();
         int n = value.length();
         newStorage(n);
         for (int i = offset, j = 0; j < n; j++) {
@@ -1127,6 +1131,38 @@
         return getslice(start, stop, 1);
     }
 
+    /**
+     * Class defining the behaviour of bytearray with respect to slice assignment, etc., which
+     * differs from the default (list) behaviour in small ways.
+     */
+    private class IndexDelegate extends PySequence.DefaultIndexDelegate {
+
+        /*
+         * bytearray treats assignment of a zero-length object to a slice as equivalent to deletion,
+         * unlike list, even for an extended slice.
+         */
+        // >>> a = range(10)
+        // >>> b = bytearray(a)
+        // >>> a[1:6:2] = []
+        // Traceback (most recent call last):
+        // File "<stdin>", line 1, in <module>
+        // ValueError: attempt to assign sequence of size 0 to extended slice of size 3
+        // >>> b[1:6:2] = []
+        // >>> b
+        // bytearray(b'\x00\x02\x04\x06\x07\x08\t')
+        //
+        @Override
+        public void checkIdxAndSetSlice(PySlice slice, PyObject value) {
+            if (value.__len__() != 0) {
+                // Proceed as default
+                super.checkIdxAndSetSlice(slice, value);
+            } else {
+                // Treat as deletion
+                super.checkIdxAndDelItem(slice);
+            }
+        }
+    };
+
     /*
      * ============================================================================================
      * Support for Python API common to mutable and immutable subclasses
@@ -1479,18 +1515,21 @@
     }
 
     /**
-     * Copy the bytes of a byte array to the characters of a String with no change in ordinal value.
-     * This could also be described as 'latin-1' decoding of the byte array to a String.
-     *
-     * @return the byte array as a String, still encoded
+     * Present the bytes of a byte array, with no decoding, as a Java String. The bytes are treated
+     * as unsigned character codes, and copied to the to the characters of a String with no change
+     * in ordinal value. This could also be described as 'latin-1' or 'ISO-8859-1' decoding of the
+     * byte array to a String, since this character encoding is numerically equal to Unicode.
+     * 
+     * @return the byte array as a String
      */
-    private synchronized String asEncodedString() {
-        StringBuilder buf = new StringBuilder(size);
-        int jmax = offset + size;
-        for (int j = offset; j < jmax; j++) {
-            buf.append((char)(0xff & storage[j]));
+    @Override
+    public synchronized String asString() {
+        char[] buf = new char[size];
+        int j = offset + size;
+        for (int i = size; --i >= 0;) {
+            buf[i] = (char)(0xff & storage[--j]);
         }
-        return buf.toString();
+        return new String(buf);
     }
 
     /**
@@ -1530,7 +1569,7 @@
          * expects a PyString. (In Python 3k the codecs decode from the <code>bytes</code> type, so
          * we can pass this directly.)
          */
-        PyString this_ = new PyString(this.asEncodedString());
+        PyString this_ = new PyString(this.asString());
         return codecs.decode(this_, encoding, errors);
     }
 
@@ -1578,7 +1617,7 @@
      * @return required tuple of type, arguments needed by init, and any user-added attributes.
      */
     final PyTuple basebytes___reduce__() {
-        PyUnicode encoded = new PyUnicode(this.asEncodedString());
+        PyUnicode encoded = new PyUnicode(this.asString());
         PyObject args = new PyTuple(encoded, getPickleEncoding());
         PyObject dict = __findattr__("__dict__");
         return new PyTuple(getType(), args, (dict != null) ? dict : Py.None);
@@ -3964,26 +4003,6 @@
         return buf.toString();
     }
 
-    /**
-     * Ready-to-expose Python <code>__str__()</code>, returning a <code>PyString</code> by treating
-     * the bytes as point codes. The built-in function <code>str()</code> is expected to call this
-     * method.
-     */
-    final synchronized PyString basebytes_str() {
-        // Get hold of the decoder for ISO-8859-1, which is one-for-one with Unicode
-        if (defaultCharset == null) {
-            defaultCharset = Charset.forName("ISO-8859-1");
-        }
-        String s = new String(storage, offset, size, defaultCharset);
-        return new PyString(s);
-    }
-
-    /**
-     * Used in {@link #basebytes_str()}, and when not null, points to the identity Charset for
-     * decoding bytes to char.
-     */
-    private static Charset defaultCharset;
-
     /*
      * ============================================================================================
      * API for java.util.List<PyInteger>
diff --git a/src/org/python/core/PyByteArray.java b/src/org/python/core/PyByteArray.java
--- a/src/org/python/core/PyByteArray.java
+++ b/src/org/python/core/PyByteArray.java
@@ -1932,12 +1932,12 @@
      */
     @Override
     public PyString __str__() {
-        return basebytes_str();
+        return bytearray_str();
     }
 
     @ExposedMethod(names = {"__str__"}, doc = BuiltinDocs.bytearray___str___doc)
     final PyString bytearray_str() {
-        return basebytes_str();
+        return new PyString(this.asString());
     }
 
     /**
diff --git a/src/org/python/core/PyInteger.java b/src/org/python/core/PyInteger.java
--- a/src/org/python/core/PyInteger.java
+++ b/src/org/python/core/PyInteger.java
@@ -52,75 +52,104 @@
     @ExposedNew
     public static PyObject int_new(PyNewWrapper new_, boolean init, PyType subtype,
             PyObject[] args, String[] keywords) {
+
         ArgParser ap = new ArgParser("int", args, keywords, new String[] {"x", "base"}, 0);
         PyObject x = ap.getPyObject(0, null);
         int base = ap.getInt(1, -909);
-        if (new_.for_type == subtype) {
+
+        if (new_.for_type == subtype) { // A substantive PyInteger is required as the return value
+
             if (x == null) {
                 return Py.Zero;
-            }
-            if (base == -909) {
+
+            } else if (base == -909) {
                 if (x instanceof PyBoolean) {
                     return (coerce(x) == 0) ? Py.Zero : Py.One;
+                } else if (x instanceof PyByteArray) {
+                    // Make use of the string to int conversion in PyString
+                    PyString xs = new PyString(x.asString());
+                    return asPyInteger(xs);
+                } else {
+                    return asPyInteger(x);
                 }
-                return asPyInteger(x);
-            }
-            if (!(x instanceof PyString)) {
+            } else if (!(x instanceof PyString)) {
                 throw Py.TypeError("int: can't convert non-string with explicit base");
             }
+
             try {
-                return Py.newInteger(((PyString) x).atoi(base));
+                return Py.newInteger(((PyString)x).atoi(base));
             } catch (PyException pye) {
                 if (pye.match(Py.OverflowError)) {
-                    return ((PyString) x).atol(base);
+                    return ((PyString)x).atol(base);
                 }
                 throw pye;
             }
-        } else {
+
+        } else { // A PyIntegerDerived(subtype, ... ) is required as the return value
+
             if (x == null) {
                 return new PyIntegerDerived(subtype, 0);
-            }
-            if (base == -909) {
+            } else if (base == -909) {
                 PyObject intOrLong = asPyInteger(x);
+
                 if (intOrLong instanceof PyInteger) {
-                    return new PyIntegerDerived(subtype, ((PyInteger) intOrLong).getValue());
+                    return new PyIntegerDerived(subtype, ((PyInteger)intOrLong).getValue());
                 } else {
                     throw Py.OverflowError("long int too large to convert to int");
                 }
-            }
-            if (!(x instanceof PyString)) {
+
+            } else if (!(x instanceof PyString)) {
                 throw Py.TypeError("int: can't convert non-string with explicit base");
             }
-            return new PyIntegerDerived(subtype, ((PyString) x).atoi(base));
+
+            return new PyIntegerDerived(subtype, ((PyString)x).atoi(base));
         }
     } // xxx
 
     /**
-     * @return convert to an int.
-     * @throws TypeError and AttributeError.
+     * Convert all sorts of object types to either <code>PyInteger</code> or <code>PyLong</code>,
+     * using their {@link PyObject#__int__()} method, whether exposed or not, or if that raises an
+     * exception (as the base <code>PyObject</code> one does), using any <code>__trunc__()</code>
+     * the type may have exposed. If all this fails, this method raises an exception. Equivalent to CPython
+     * <code>PyNumber_Int()</code>.
+     * 
+     * @param x to convert to an int
+     * @return int or long result.
+     * @throws PyException (TypeError) if no method of conversion can be found
+     * @throws PyException (AttributeError) if neither __int__ nor __trunc__ found (?)
      */
-    private static PyObject asPyInteger(PyObject x) {
-        //XXX: Not sure that this perfectly matches CPython semantics.
+    private static PyObject asPyInteger(PyObject x) throws PyException {
+        // XXX: Not sure that this perfectly matches CPython semantics.
         try {
+            // Try the object itself (raises AttributeError if not overridden from PyObject)
             return x.__int__();
+
         } catch (PyException pye) {
             if (!pye.match(Py.AttributeError)) {
+                // x had an __int__ method, but something else went wrong: pass it on
                 throw pye;
-            }
-            try {
-                PyObject integral = x.invoke("__trunc__");
-                return convertIntegralToInt(integral);
-            } catch (PyException pye2) {
-                if (!pye2.match(Py.AttributeError)) {
-                    throw pye2;
+
+            } else {
+                // x did not have an __int__ method, but maybe __trunc__ will work
+                try {
+                    PyObject integral = x.invoke("__trunc__");
+                    return convertIntegralToInt(integral);
+
+                } catch (PyException pye2) {
+                    if (!pye2.match(Py.AttributeError)) {
+                        throw pye2;
+                    }
+                    String fmt = "int() argument must be a string or a number, not '%.200s'";
+                    throw Py.TypeError(String.format(fmt, x));
                 }
-                throw Py.TypeError(
-                    String.format("int() argument must be a string or a number, not '%.200s'", x));
             }
         }
     }
 
     /**
+     * Helper called on whatever exposed method <code>__trunc__</code> returned: it may be
+     * <code>int</code>, <code>long</code> or something with an exposed <code>__int__</code>.
+     * 
      * @return convert to an int.
      * @throws TypeError and AttributeError.
      */
diff --git a/src/org/python/core/PySequence.java b/src/org/python/core/PySequence.java
--- a/src/org/python/core/PySequence.java
+++ b/src/org/python/core/PySequence.java
@@ -14,8 +14,35 @@
  */
 public abstract class PySequence extends PyObject {
 
+    /**
+     * A delegate that handles index checking and manipulation for get, set and del operations on
+     * this sequence in the form of a "pluggable behaviour". Because different types of sequence
+     * exhibit subtly different behaviour, there is scope for subclasses to customise the behaviour
+     * with their own extension of <code>SequenceIndexDelegate</code>.
+     */
+    protected SequenceIndexDelegate delegator;
+
+    /**
+     * Construct a PySequence for the given sub-type with the default index behaviour.
+     * 
+     * @param type actual (Python) type of sub-class
+     */
     protected PySequence(PyType type) {
         super(type);
+        delegator = new DefaultIndexDelegate();
+    }
+
+    /**
+     * Construct a PySequence for the given sub-type with custom index behaviour. In practice,
+     * restrictions on the construction of inner classes will mean null has to be passed and the
+     * actual delegator assigned later.
+     * 
+     * @param type actual (Python) type of sub-class
+     * @param behaviour specific index behaviour (or null)
+     */
+    protected PySequence(PyType type, SequenceIndexDelegate behaviour) {
+        super(type);
+        delegator = behaviour;
     }
 
     // These methods must be defined for any sequence
@@ -451,7 +478,11 @@
         return true;
     }
 
-    protected final SequenceIndexDelegate delegator = new SequenceIndexDelegate() {
+    /**
+     * Class defining the default behaviour of sequences with respect to slice assignment, etc.,
+     * which is the one correct for <code>list</code>.
+     */
+    protected class DefaultIndexDelegate extends SequenceIndexDelegate {
 
         @Override
         public String getTypeName() {
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -740,15 +740,19 @@
     public PyObject __add__(PyObject other) {
         return str___add__(other);
     }
-    
+
     @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___add___doc)
     final PyObject str___add__(PyObject other) {
         if (other instanceof PyUnicode) {
             return decode().__add__(other);
-        }
-        if (other instanceof PyString) {
+
+        } else if (other instanceof PyString) {
             PyString otherStr = (PyString)other;
             return new PyString(getString().concat(otherStr.getString()));
+
+        } else if (other instanceof PyByteArray) {
+            return new PyString(getString().concat(other.asString()));
+
         }
         return null;
     }
diff --git a/src/org/python/core/__builtin__.java b/src/org/python/core/__builtin__.java
--- a/src/org/python/core/__builtin__.java
+++ b/src/org/python/core/__builtin__.java
@@ -64,10 +64,6 @@
             case 2:
                 return __builtin__.range(arg1);
             case 3:
-                if (!(arg1 instanceof PyString)) {
-                    throw Py.TypeError("ord() expected string of length 1, but " +
-                                       arg1.getType().fastGetName() + " found");
-                }
                 return Py.newInteger(__builtin__.ord(arg1));
             case 5:
                 return __builtin__.hash(arg1);
@@ -766,22 +762,48 @@
         return o.__oct__();
     }
 
-    public static final int ord(PyObject c) {
+    /**
+     * Built-in Python function ord() applicable to the string-like types <code>str</code>,
+     * <code>bytearray</code>, <code>unicode</code>.
+     * 
+     * @param c string-like object of length 1
+     * @return ordinal value of character or byte value in
+     * @throws PyException (TypeError) if not a string-like type
+     */
+    public static final int ord(PyObject c) throws PyException {
         final int length;
-        PyString x = (PyString) c;
-        if (x instanceof PyUnicode) {
-            length = x.getString().codePointCount(0, x.getString().length());
+
+        if (c instanceof PyUnicode) {
+            String cu = ((PyUnicode)c).getString();
+            length = cu.codePointCount(0, cu.length());
             if (length == 1) {
-                return x.getString().codePointAt(0);
+                return cu.codePointAt(0);
             }
+
+        } else if (c instanceof PyString) {
+            String cs = ((PyString)c).getString();
+            length = cs.length();
+            if (length == 1) {
+                return cs.charAt(0);
+            }
+
+        } else if (c instanceof BaseBytes) {
+            BaseBytes cb = (BaseBytes)c;
+            length = cb.__len__();
+            if (length == 1) {
+                return cb.intAt(0);
+            }
+
         } else {
-            length = x.getString().length();
-            if (length == 1) {
-                return x.getString().charAt(0);
-            }
+            // Not any of the acceptable types
+            throw Py.TypeError("ord() expected string of length 1, but "
+                    + c.getType().fastGetName() + " found");
         }
-        throw Py.TypeError("ord() expected a character, but string of length " +
-                           length + " found");
+        /*
+         * It was a qualifying string-like object, but if we didn't return or throw by now, the
+         * problem was the length.
+         */
+        throw Py.TypeError("ord() expected a character, but string of length " + length + " found");
     }
 
     public static PyObject pow(PyObject x, PyObject y) {

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list