[Jython-checkins] jython: Fixes hex formatting issues #2013 (slow) and #2075 (incorrect padding)

jeff.allen jython-checkins at python.org
Sun Dec 22 18:37:39 CET 2013


http://hg.python.org/jython/rev/3bfef9e72808
changeset:   7170:3bfef9e72808
user:        Santoso Wijaya <santoso.wijaya at gmail.com>
date:        Sun Dec 22 15:12:22 2013 +0000
summary:
  Fixes hex formatting issues #2013 (slow) and #2075 (incorrect padding)
Special-cases binary, octal and hex formatting for speed and corrects
alternate (#) formatting bug, allowing removal of several FIXMEs from
test_types.

files:
  Lib/test/test_str_jy.py            |    5 +
  Lib/test/test_types.py             |   48 ++--
  NEWS                               |   12 +-
  src/org/python/core/PyInteger.java |  165 +++++++++++++++-
  src/org/python/core/PyLong.java    |    4 +-
  5 files changed, 183 insertions(+), 51 deletions(-)


diff --git a/Lib/test/test_str_jy.py b/Lib/test/test_str_jy.py
--- a/Lib/test/test_str_jy.py
+++ b/Lib/test/test_str_jy.py
@@ -53,6 +53,11 @@
         self.assertEquals("%+f" % -5, "-5.000000")
         self.assertEquals("%+f" % 5, "+5.000000")
 
+    def test_format_issue2075(self):
+        self.assertEquals("%#018x" % 14, "0x000000000000000e")
+        self.assertEquals("{:#018x}".format(14), "0x000000000000000e")
+        self.assertEquals("{:+#018X}".format(14), "+0X00000000000000E")
+        self.assertEquals("{:#018X}".format(-14), "-0X00000000000000E")
 
     def test_argument_count_exception(self):
         "exception thrown when too many or too few arguments for format string"
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -365,54 +365,46 @@
         test(0, "#b", '0b0')
         test(0, "-#b", '0b0')
         test(1, "-#b", '0b1')
-        #FIXME: not working.
-        #test(-1, "-#b", '-0b1')
-        #test(-1, "-#5b", ' -0b1')
+        test(-1, "-#b", '-0b1')
+        test(-1, "-#5b", ' -0b1')
         test(1, "+#5b", ' +0b1')
         test(100, "+#b", '+0b1100100')
-        #FIXME: not working.
-        #test(100, "#012b", '0b0001100100')
-        #test(-100, "#012b", '-0b001100100')
+        test(100, "#012b", '0b0001100100')
+        test(-100, "#012b", '-0b001100100')
 
         test(0, "#o", '0o0')
         test(0, "-#o", '0o0')
         test(1, "-#o", '0o1')
-        #FIXME: not working.
-        #test(-1, "-#o", '-0o1')
-        #test(-1, "-#5o", ' -0o1')
+        test(-1, "-#o", '-0o1')
+        test(-1, "-#5o", ' -0o1')
         test(1, "+#5o", ' +0o1')
         test(100, "+#o", '+0o144')
-        #FIXME: not working.
-        #test(100, "#012o", '0o0000000144')
-        #test(-100, "#012o", '-0o000000144')
+        test(100, "#012o", '0o0000000144')
+        test(-100, "#012o", '-0o000000144')
 
         test(0, "#x", '0x0')
         test(0, "-#x", '0x0')
         test(1, "-#x", '0x1')
-        #FIXME: not working.
-        #test(-1, "-#x", '-0x1')
-        #test(-1, "-#5x", ' -0x1')
+        test(-1, "-#x", '-0x1')
+        test(-1, "-#5x", ' -0x1')
         test(1, "+#5x", ' +0x1')
         test(100, "+#x", '+0x64')
-        #FIXME: not working.
-        #test(100, "#012x", '0x0000000064')
-        #test(-100, "#012x", '-0x000000064')
-        #test(123456, "#012x", '0x000001e240')
-        #test(-123456, "#012x", '-0x00001e240')
+        test(100, "#012x", '0x0000000064')
+        test(-100, "#012x", '-0x000000064')
+        test(123456, "#012x", '0x000001e240')
+        test(-123456, "#012x", '-0x00001e240')
 
         test(0, "#X", '0X0')
         test(0, "-#X", '0X0')
         test(1, "-#X", '0X1')
-        #FIXME: not working.
-        #test(-1, "-#X", '-0X1')
-        #test(-1, "-#5X", ' -0X1')
+        test(-1, "-#X", '-0X1')
+        test(-1, "-#5X", ' -0X1')
         test(1, "+#5X", ' +0X1')
         test(100, "+#X", '+0X64')
-        #FIXME: not working.
-        #test(100, "#012X", '0X0000000064')
-        #test(-100, "#012X", '-0X000000064')
-        #test(123456, "#012X", '0X000001E240')
-        #test(-123456, "#012X", '-0X00001E240')
+        test(100, "#012X", '0X0000000064')
+        test(-100, "#012X", '-0X000000064')
+        test(123456, "#012X", '0X000001E240')
+        test(-123456, "#012X", '-0X00001E240')
 
         # issue 5782, commas with no specifier type
         #FIXME: not working.
diff --git a/NEWS b/NEWS
--- a/NEWS
+++ b/NEWS
@@ -2,18 +2,20 @@
 
 Jython 2.7b2
   Bugs Fixed
-    - [ 1862 ] cStringIO does not support arrays as arguments
-    - [ 2027 ] Discrepancy in bin(-num) output
-    - [ 2005 ] threading.Event object's wait([timeout]) function returns null instead of True/False.
-    - [ 1926 ] Adjust MutableSet.pop test so we do not need to skip it
-    - [ 2020 ] str.translate should delete characters in the second arg when table is None
     - [ 1753 ] zlib doesn't call end() on compress and decompress
     - [ 1860 ] test failures in test_array.py
+    - [ 1862 ] cStringIO does not support arrays as arguments
+    - [ 1926 ] Adjust MutableSet.pop test so we do not need to skip it
     - [ 1964 ] time.strptime() does not support %f in format
+    - [ 2005 ] threading.Event object's wait([timeout]) function returns null instead of True/False.
+    - [ 2013 ] %x hex formatting takes O(N^2) time.
+    - [ 2020 ] str.translate should delete characters in the second arg when table is None
+    - [ 2027 ] Discrepancy in bin(-num) output
     - [ 2033 ] test_strptime fails: test_mar1_comes_after_feb29_even_when_omitting_the_year
     - [ 2046 ] sys.stdin.readline() hangs when used interactively (JLine, Windows)
     - [ 2060 ] Thread ident missing
     - [ 2071 ] datetime strftime %f does not work
+    - [ 2075 ] Incorrect padding for hex format strings
     - [ 2082 ] Unexpected (Pdb) prompt during regression tests
     - [ 2083 ] os.unlink() can delete directories
 
diff --git a/src/org/python/core/PyInteger.java b/src/org/python/core/PyInteger.java
--- a/src/org/python/core/PyInteger.java
+++ b/src/org/python/core/PyInteger.java
@@ -38,6 +38,8 @@
     @Deprecated
     public static final BigInteger maxInt = MAX_INT;
 
+    private static final String LOOKUP = "0123456789abcdef";
+
     private final int value;
 
     public PyInteger(PyType subType, int v) {
@@ -1047,6 +1049,7 @@
         if (spec.precision != -1) {
             throw new IllegalArgumentException("Precision not allowed in integer format specifier");
         }
+
         int sign;
         if (value instanceof Integer) {
             int intValue = (Integer) value;
@@ -1054,7 +1057,11 @@
         } else {
             sign = ((BigInteger) value).signum();
         }
+
         String strValue;
+        String strPrefix = "";
+        String strSign = "";
+
         if (spec.type == 'c') {
             if (spec.sign != '\0') {
                 throw new IllegalArgumentException("Sign not allowed with integer format "
@@ -1090,13 +1097,26 @@
                 format.setGroupingUsed(true);
                 strValue = format.format(value);
             } else if (value instanceof BigInteger) {
-                strValue = ((BigInteger) value).toString(radix);
+                switch (radix) {
+                    case 2:
+                        strValue = toBinString((BigInteger) value);
+                        break;
+                    case 8:
+                        strValue = toOctString((BigInteger) value);
+                        break;
+                    case 16:
+                        strValue = toHexString((BigInteger) value);
+                        break;
+                    default:
+                        // General case (v.slow in known implementations up to Java 7).
+                        strValue = ((BigInteger) value).toString(radix);
+                        break;
+                }
             } else {
                 strValue = Integer.toString((Integer) value, radix);
             }
 
             if (spec.alternate) {
-                String strPrefix = "";
                 switch (radix) {
                     case 2:
                         strPrefix = "0b";
@@ -1109,32 +1129,145 @@
                         break;
                 }
 
-                if (strValue.startsWith("-")) {
-                    //assert (sign < 0);
-                    if (!strPrefix.equals(""))
-                        strValue = "-" + strPrefix + strValue.substring(1, strValue.length());
-                } else {
-                    strValue = strPrefix + strValue;
+                if (sign < 0) {
+                    assert (strValue.startsWith("-"));
+                    strSign = "-";
+                    strValue = strValue.substring(1);
                 }
             }
 
             if (spec.type == 'X') {
+                strPrefix = strPrefix.toUpperCase();
                 strValue = strValue.toUpperCase();
             }
 
             if (sign >= 0) {
-                if (spec.sign == '+') {
-                    strValue = "+" + strValue;
-                } else if (spec.sign == ' ') {
-                    strValue = " " + strValue;
+                switch (spec.sign) {
+                    case '+':
+                    case ' ':
+                        strSign = Character.toString(spec.sign);
+                        break;
                 }
             }
         }
-        if (spec.align == '=' && (sign < 0 || spec.sign == '+' || spec.sign == ' ')) {
-            char signChar = strValue.charAt(0);
-            return signChar + spec.pad(strValue.substring(1), '>', 1);
+
+        if (spec.align == '=' && (spec.sign == '-' || spec.sign == '+' || spec.sign == ' ')) {
+            assert (strSign.length() == 1);
+            return strSign + strPrefix + spec.pad(strValue, '>', 1 + strPrefix.length());
         }
-        return spec.pad(strValue, '>', 0);
+
+        if (spec.fill_char == 0) {
+            return spec.pad(strSign + strPrefix + strValue, '>', 0);
+        }
+
+        return strSign + strPrefix + spec.pad(strValue, '>', strSign.length() + strPrefix.length());
+    }
+
+    /**
+     * A more efficient algorithm for generating a hexadecimal representation of a byte array.
+     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+     * consequently, is implemented using expensive mathematical operations.
+     *
+     * @param value the value to generate a hexadecimal string from
+     * @return the hexadecimal representation of value, with "-" sign prepended if necessary
+     */
+    static final String toHexString(BigInteger value) {
+        int signum = value.signum();
+
+        // obvious shortcut
+        if (signum == 0) {
+            return "0";
+        }
+
+        // we want to work in absolute numeric value (negative sign is added afterward)
+        byte[] input = value.abs().toByteArray();
+        StringBuilder sb = new StringBuilder(input.length * 2);
+
+        int b;
+        for (int i = 0; i < input.length; i++) {
+            b = input[i] & 0xFF;
+            sb.append(LOOKUP.charAt(b >> 4));
+            sb.append(LOOKUP.charAt(b & 0x0F));
+        }
+
+        // before returning the char array as string, remove leading zeroes, but not the last one
+        String result = sb.toString().replaceFirst("^0+(?!$)", "");
+        return signum < 0 ? "-" + result : result;
+    }
+
+    /**
+     * A more efficient algorithm for generating an octal representation of a byte array.
+     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+     * consequently, is implemented using expensive mathematical operations.
+     *
+     * @param value the value to generate an octal string from
+     * @return the octal representation of value, with "-" sign prepended if necessary
+     */
+    static final String toOctString(BigInteger value) {
+        int signum = value.signum();
+
+        // obvious shortcut
+        if (signum == 0) {
+            return "0";
+        }
+
+        byte[] input = value.abs().toByteArray();
+        if (input.length < 3) {
+            return value.toString(8);
+        }
+
+        StringBuilder sb = new StringBuilder(input.length * 3);
+
+        // working backwards, three bytes at a time
+        int threebytes;
+        int trip1, trip2, trip3;    // most, middle, and least significant bytes in the triplet
+        for (int i = input.length - 1; i >= 0; i -= 3) {
+            trip3 = input[i] & 0xFF;
+            trip2 = ((i - 1) >= 0) ? (input[i - 1] & 0xFF) : 0x00;
+            trip1 = ((i - 2) >= 0) ? (input[i - 2] & 0xFF) : 0x00;
+            threebytes = trip3 | (trip2 << 8) | (trip1 << 16);
+
+            // convert the three-byte value into an eight-character octal string
+            for (int j = 0; j < 8; j++) {
+                sb.append(LOOKUP.charAt((threebytes >> (j * 3)) & 0x000007));
+            }
+        }
+
+        String result = sb.reverse().toString().replaceFirst("^0+(?!%)", "");
+        return signum < 0 ? "-" + result : result;
+    }
+
+    /**
+     * A more efficient algorithm for generating a binary representation of a byte array.
+     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+     * consequently, is implemented using expensive mathematical operations.
+     *
+     * @param value the value to generate a binary string from
+     * @return the binary representation of value, with "-" sign prepended if necessary
+     */
+    static final String toBinString(BigInteger value) {
+        int signum = value.signum();
+
+        // obvious shortcut
+        if (signum == 0) {
+            return "0";
+        }
+
+        // we want to work in absolute numeric value (negative sign is added afterward)
+        byte[] input = value.abs().toByteArray();
+        StringBuilder sb = new StringBuilder(value.bitCount());
+
+        int b;
+        for (int i = 0; i < input.length; i++) {
+            b = input[i] & 0xFF;
+            for (int bit = 7; bit >= 0; bit--) {
+                sb.append(((b >> bit) & 0x1) > 0 ? "1" : "0");
+            }
+        }
+
+        // before returning the char array as string, remove leading zeroes, but not the last one
+        String result = sb.toString().replaceFirst("^0+(?!$)", "");
+        return signum < 0 ? "-" + result : result;
     }
 
     @Override
diff --git a/src/org/python/core/PyLong.java b/src/org/python/core/PyLong.java
--- a/src/org/python/core/PyLong.java
+++ b/src/org/python/core/PyLong.java
@@ -977,7 +977,7 @@
 
     @ExposedMethod(doc = BuiltinDocs.long___oct___doc)
     final PyString long___oct__() {
-        String s = getValue().toString(8);
+        String s = PyInteger.toOctString(getValue());
         if (s.startsWith("-")) {
             return new PyString("-0" + s.substring(1, s.length()) + "L");
         } else if (s.startsWith("0")) {
@@ -994,7 +994,7 @@
 
     @ExposedMethod(doc = BuiltinDocs.long___hex___doc)
     final PyString long___hex__() {
-        String s = getValue().toString(16);
+        String s = PyInteger.toHexString(getValue());
         if (s.startsWith("-")) {
             return new PyString("-0x" + s.substring(1, s.length()) + "L");
         } else {

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list