[Jython-checkins] jython: Implement bytearray.fromhex and strengthen the test.

Wed Jun 13 20:44:04 CEST 2012

http://hg.python.org/jython/rev/0ca0f51a32d6
changeset:   6702:0ca0f51a32d6
user:        Jeff Allen <ja...py at farowl.co.uk>
date:        Wed Jun 06 12:52:02 2012 +0100
summary:
  Implement bytearray.fromhex and strengthen the test.
I beefed-up test_fromhex() in test_bytes.py to detect potential signed-byte problems. Now scoring 2 failures and 33 errors.

files:
  Lib/test/test_bytes.py               |   6 +-
  src/org/python/core/BaseBytes.java   |  80 +++++++++++++++-
  src/org/python/core/PyByteArray.java |  49 +++++++++-
  3 files changed, 125 insertions(+), 10 deletions(-)

diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -260,9 +260,9 @@
         self.assertRaises(TypeError, self.type2test.fromhex)
         self.assertRaises(TypeError, self.type2test.fromhex, 1)
         self.assertEqual(self.type2test.fromhex(u''), self.type2test())
-        b = bytearray([0x1a, 0x2b, 0x30])
-        self.assertEqual(self.type2test.fromhex(u'1a2B30'), b)
-        self.assertEqual(self.type2test.fromhex(u'  1A 2B  30   '), b)
+        b = bytearray([0x1a, 0x2b, 0x30, 0xca, 0xfe, 0xba, 0xbe]) # challenging signs
+        self.assertEqual(self.type2test.fromhex(u'1a2B30CafEBabe'), b)
+        self.assertEqual(self.type2test.fromhex(u'  1A 2B  30 CafeBabe   '), b)
         self.assertEqual(self.type2test.fromhex(u'0000'), b'\0\0')
         self.assertRaises(ValueError, self.type2test.fromhex, u'a')
         self.assertRaises(ValueError, self.type2test.fromhex, u'rt')
diff --git a/src/org/python/core/BaseBytes.java b/src/org/python/core/BaseBytes.java
--- a/src/org/python/core/BaseBytes.java
+++ b/src/org/python/core/BaseBytes.java
@@ -2303,6 +2303,80 @@
     }
 
     /**
+     * Almost ready-to-expose implementation of Python class method <code>fromhex(string)</code>.
+     * This assigns a value to the passed byte array object from a string of two-digit hexadecimal
+     * numbers. Spaces (but not whitespace in general) are acceptable around the numbers, not
+     * within. Non-hexadecimal characters or un-paired hex digits raise a <code>ValueError</code>.
+     * Example:
+     *
+     * <pre>
+     * bytearray.fromhex('B9 01EF') -> * bytearray(b'\xb9\x01\xef')."
+     * </pre>
+     *
+     * @param result to receive the decoded values
+     * @param hex specification of the bytes
+     * @throws PyException(ValueError) if non-hex characters, or isolated ones, are encountered
+     */
+    static void basebytes_fromhex(BaseBytes result, String hex) throws PyException {
+
+        final int hexlen = hex.length();
+        result.newStorage(hexlen / 2); // Over-provides storage if hex has spaces
+
+        // We might produce a ValueError with this message.
+        String fmt = "non-hexadecimal number found in fromhex() arg at position %d";
+
+        // Output pointer in the result array
+        byte[] r = result.storage;
+        int p = result.offset;
+
+        /*
+         * When charAt(i) is a hex digit, we will always access hex.charAt(i+1), and catch the
+         * exception if that is beyond the end of the array.
+         */
+        for (int i = 0; i < hexlen; /* i incremented in loop by 1 or 2 */) {
+            char c = hex.charAt(i++);
+            if (c != ' ') {
+                try {
+                    // hexDigit throws IllegalArgumentException if non-hexadecimal character found
+                    int value = hexDigit(c);
+                    c = hex.charAt(i++); // Throw IndexOutOfBoundsException if no second digit
+                    value = (value << 4) + hexDigit(c);
+                    r[p++] = (byte)value;
+                } catch (IllegalArgumentException e) {
+                    throw Py.ValueError(String.format(fmt, i-1));
+                } catch (IndexOutOfBoundsException e) {
+                    throw Py.ValueError(String.format(fmt, i-2));
+                }
+            }
+        }
+        result.size = p - result.offset;
+    }
+
+    /**
+     * Translate one character to its hexadecimal value.
+     *
+     * @param c to translate
+     * @return value 0-15
+     * @throws IllegalArgumentException if c is not '0-'9', 'A'-'F' or 'a'-'f'.
+     */
+    private static int hexDigit(char c) throws IllegalArgumentException {
+        int result = c - '0';
+        if (result >= 0) {
+            if (result < 10) { // digit
+                return result;
+            } else {
+                // If c is a letter, c & 0xDF is its uppercase.
+                // If c is not a letter c & 0xDF is still not a letter.
+                result = (c & 0xDF) - 'A';
+                if (result >= 0 && result < 6) { // A-F or a-f
+                    return result + 10;
+                }
+            }
+        }
+        throw new IllegalArgumentException();
+    }
+
+    /**
      * Almost ready-to-expose implementation of Python <code>join(iterable)</code>.
      *
      * @param iter iterable of objects capable of being regarded as byte arrays
@@ -3157,7 +3231,7 @@
     /**
      * Implementation of Python <code>splitlines()</code>, returning a list of the lines in the byte
      * array, breaking at line boundaries. Line breaks are not included in the resulting segments.
-     * 
+     *
      * @return List of segments
      */
     public PyList splitlines() {
@@ -3168,7 +3242,7 @@
      * Implementation of Python <code>splitlines(keepends)</code>, returning a list of the lines in
      * the string, breaking at line boundaries. Line breaks are not included in the resulting list
      * unless <code>keepends</code> is true.
-     * 
+     *
      * @param keepends if true, include the end of line bytes(s)
      * @return PyList of segments
      */
@@ -3180,7 +3254,7 @@
      * Ready-to-expose implementation of Python <code>splitlines(keepends)</code>, returning a list
      * of the lines in the string, breaking at line boundaries. Line breaks are not included in the
      * resulting list unless keepends is given and true.
-     * 
+     *
      * @param keepends if true, include the end of line bytes(s)
      * @return List of segments
      */
diff --git a/src/org/python/core/PyByteArray.java b/src/org/python/core/PyByteArray.java
--- a/src/org/python/core/PyByteArray.java
+++ b/src/org/python/core/PyByteArray.java
@@ -2,6 +2,7 @@
 
 import java.util.Arrays;
 
+import org.python.expose.ExposedClassMethod;
 import org.python.expose.ExposedMethod;
 import org.python.expose.ExposedNew;
 import org.python.expose.ExposedType;
@@ -161,11 +162,24 @@
     /**
      * Construct bytearray by re-using an array of byte as storage initialised by the client.
      *
-     * @param newStorage pre-initialised storage: the caller should not keep a reference
+     * @param storage pre-initialised with desired value: the caller should not keep a reference
      */
-    PyByteArray(byte[] newStorage) {
+    PyByteArray(byte[] storage) {
         super(TYPE);
-        setStorage(newStorage);
+        setStorage(storage);
+    }
+
+    /**
+     * Construct bytearray by re-using an array of byte as storage initialised by the client.
+     *
+     * @param storage pre-initialised with desired value: the caller should not keep a reference
+     * @param size number of bytes actually used
+     * @throws IllegalArgumentException if the range [0:size] is not within the array bounds of
+     *             the storage.
+     */
+    PyByteArray(byte[] storage, int size) {
+        super(TYPE);
+        setStorage(storage, size);
     }
 
     /**
@@ -1054,6 +1068,33 @@
         return basebytes_find(sub, start, end);
     }
 
+    /**
+     * Implementation of Python class method <code>bytearray.fromhex(string)</code>, that returns .
+     * a new <code>PyByteArray</code> with a value taken from a string of two-digit hexadecimal
+     * numbers. Spaces (but not whitespace in general) are acceptable around the numbers, not
+     * within. Non-hexadecimal characters or un-paired hex digits raise a <code>ValueError</code>. *
+     * Example:
+     *
+     * <pre>
+     * bytearray.fromhex('B9 01EF') -> * bytearray(b'\xb9\x01\xef')."
+     * </pre>
+     *
+     * @param hex specification of the bytes
+     * @throws PyException(ValueError) if non-hex characters, or isolated ones, are encountered
+     */
+    static PyByteArray fromhex(String hex) throws PyException {
+        return bytearray_fromhex(TYPE, hex);
+    }
+
+    @ExposedClassMethod(doc = BuiltinDocs.bytearray_fromhex_doc)
+    static PyByteArray bytearray_fromhex(PyType type, String hex) {
+        // I think type tells us the actual class but we always return exactly a bytearray
+        // PyObject ba = type.__call__();
+        PyByteArray result = new PyByteArray();
+        basebytes_fromhex(result, hex);
+        return result;
+    }
+
     @Override
     public PyObject __iadd__(PyObject o) {
         return bytearray___iadd__(o);
@@ -1455,7 +1496,7 @@
     final PyList bytearray_splitlines(boolean keepends) {
         return basebytes_splitlines(keepends);
     }
-    
+
     /**
      * Implementation of Python <code>startswith(prefix)</code>.
      *

-- 
Repository URL: http://hg.python.org/jython