[Python-checkins] r54141 - sandbox/trunk/pep3101/test_simpleformat.py sandbox/trunk/pep3101/unicodeformat.c

Tue Mar 6 03:36:51 CET 2007

Author: eric.smith
Date: Tue Mar  6 03:36:49 2007
New Revision: 54141

Modified:
   sandbox/trunk/pep3101/test_simpleformat.py
   sandbox/trunk/pep3101/unicodeformat.c
Log:
Added binary formatting.  Factored out _calc_integer_widths() and _fill_integer(), so they can be shared by format_integer() and format_binary().  Added test cases for binary.  This code accesses PyLongObject's ob_digit[] directly, that might not be the best way to go about binary formatting.

Modified: sandbox/trunk/pep3101/test_simpleformat.py
==============================================================================

--- sandbox/trunk/pep3101/test_simpleformat.py	(original)
+++ sandbox/trunk/pep3101/test_simpleformat.py	Tue Mar  6 03:36:49 2007
@@ -123,7 +123,7 @@
 
     def test_specifiers(self):
         self.formatEquals("a", "{0:c}", ord("a"))
-        self.formatEquals("8_08b", "{0:08b}", 8)
+        self.formatEquals("00001000", "{0:08b}", 8)
         self.formatEquals("  8", "{0: >3d}", 8)
         self.formatEquals("15%", "{0:.0%}", .1515)
 
@@ -152,6 +152,7 @@
         self.assertRaises(TypeError, "{0:d}", "non-number")
 
         self.formatEqualsWithUnicode("0", "{0:d}", 0)
+        self.formatEqualsWithUnicode("0", "{0:d}", long(0))
         self.formatEqualsWithUnicode("123", "{0:d}", 123)
         self.formatEqualsWithUnicode("-123", "{0:d}", -123)
         self.formatEqualsWithUnicode("+123", "{0:+d}", 123)
@@ -204,7 +205,7 @@
     def test_hex_specifiers(self):
         n = int("beef", 16)
 
-        self.assertRaises(TypeError, "{0:x", "non-number")
+        self.formatRaises(TypeError, "{0:x}", "non-number")
 
         self.formatEqualsWithUnicodeUC("0", "{0:x}", 0)
         self.formatEqualsWithUnicodeUC("beef", "{0:x}", n)
@@ -248,6 +249,31 @@
         self.formatEqualsWithUnicodeUC("3.1415e+200", "{0:g}", 3.1415e200)
         self.formatEqualsWithUnicodeUC("3.1415e+200", "{0:g}", 3.1415e200)
 
+    def test_percent_specifiers(self):
+        self.formatEqualsWithUnicode("314.15%", "{0:.2%}", 3.1415)
+        self.formatEqualsWithUnicode("3.14e+202%", "{0:.3%}", 3.1415e200)
+
+    def test_binary_specifiers(self):
+        self.formatRaises(TypeError, "{0:b}", "string")
+
+        self.formatEqualsWithUnicode("0", "{0:b}", 0)
+        self.formatEqualsWithUnicode("0", "{0:b}", long(0))
+        self.formatEqualsWithUnicode("1", "{0:b}", 1)
+        self.formatEqualsWithUnicode("1", "{0:b}", long(1))
+        self.formatEqualsWithUnicode("-1", "{0:b}", -1)
+        self.formatEqualsWithUnicode("-1", "{0:b}", long(-1))
+        self.formatEqualsWithUnicode("0         ", "{0:<10b}", 0)
+        self.formatEqualsWithUnicode("         0", "{0:>10b}", 0)
+        self.formatEqualsWithUnicode("1001      ", "{0:<10b}", 9)
+        self.formatEqualsWithUnicode("      1001", "{0:>10b}", 9)
+        self.formatEqualsWithUnicode("1" + "0" * 100, "{0:b}", 2**100)
+        self.formatEqualsWithUnicode("-1" + "0" * 100, "{0:b}", -2**100)
+        self.formatEqualsWithUnicode("1" + "0" * 98 + "11", "{0:b}", 2**100 + 3)
+        self.formatEqualsWithUnicode("1" * 100, "{0:b}", 2**100 - 1)
+        self.formatEqualsWithUnicode("-" + "1" * 100, "{0:b}", -(2**100 - 1))
+        self.formatEqualsWithUnicode("(" + "1" * 100 + ")", "{0:()b}", -(2**100 - 1))
+        self.formatEqualsWithUnicode("(" + " " * 98 + "1" * 100 + ")", "{0:=()200b}", -(2**100 - 1))
+
     def test_missing_type_specifier(self):
         # make sure floats use 'g', ints and longs 'd', and everything else 's'
         pass

Modified: sandbox/trunk/pep3101/unicodeformat.c
==============================================================================
--- sandbox/trunk/pep3101/unicodeformat.c	(original)
+++ sandbox/trunk/pep3101/unicodeformat.c	Tue Mar  6 03:36:49 2007
@@ -20,6 +20,9 @@
 #define C_UNICODE 1
 #endif
 
+/* we need access to a PyLongObject's internals */
+#include "longintrepr.h"
+
 #if C_UNICODE
 #define CH_TYPE                  Py_UNICODE
 #define CH_TYPE_ISDECIMAL        Py_UNICODE_ISDECIMAL
@@ -91,6 +94,7 @@
 */
 #define FORMATBUFLEN (size_t)120
 
+#define ABS(x) ((x) < 0 ? -(x) : (x))
 
 #ifdef __cplusplus
 extern "C" {
@@ -806,6 +810,20 @@
     CH_TYPE type;
 } InternalFormatSpec;
 
+/* describes the layout for an integer, see the comment in
+   _calc_integer_widths() for details */
+typedef struct {
+    Py_ssize_t n_lpadding;
+    Py_ssize_t n_spadding;
+    Py_ssize_t n_rpadding;
+    char lsign;
+    Py_ssize_t n_lsign;
+    char rsign;
+    Py_ssize_t n_rsign;
+    Py_ssize_t n_total; /* just a convenience, it's derivable from the
+                           other fields */
+} IntegerFieldWidths;
+
 /* returns true if this character is a specifier alignment token */
 Py_LOCAL_INLINE(int)
 alignment_token(CH_TYPE c)
@@ -1024,11 +1042,233 @@
 (*FormatFunction)(PyObject *fieldobj, FmtState *fs,
                   const InternalFormatSpec *format);
 
+static void
+_calc_integer_widths(IntegerFieldWidths *r, CH_TYPE sign, Py_ssize_t n_digits,
+                     const InternalFormatSpec *format)
+{
+    r->n_lpadding = 0;
+    r->n_spadding = 0;
+    r->n_rpadding = 0;
+    r->lsign = '\0';
+    r->n_lsign = 0;
+    r->rsign = '\0';
+    r->n_rsign = 0;
+
+    /* the output will look like:
+       |                                                           |
+       | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
+       |                                                           |
+
+       lsign and rsign are computed from format->sign and the actual
+       sign of the number
+
+       digits is already known
+
+       the total width is either given, or computed from the
+       actual digits
+
+       only one of lpadding, spadding, and rpadding can be non-zero,
+       and it's calculated from the width and other fields
+    */
+
+    /* compute the various parts we're going to write */
+    if (format->sign == '+') {
+        /* always put a + or - */
+        r->n_lsign = 1;
+        r->lsign = (sign == '-' ? '-' : '+');
+    } else if (format->sign == '(') {
+        if (sign == '-') {
+            r->n_lsign = 1;
+            r->lsign = '(';
+            r->n_rsign = 1;
+            r->rsign = ')';
+        }
+    } else if (format->sign == ' ') {
+        r->n_lsign = 1;
+        r->lsign = (sign == '-' ? '-' : ' ');
+    } else {
+        /* non specified, or the default (-) */
+        if (sign == '-') {
+            r->n_lsign = 1;
+            r->lsign = '-';
+        }
+    }
+
+    /* now the number of padding characters */
+    if (format->width == -1) {
+        /* no padding at all, nothing to do */
+    } else {
+        /* see if any padding is needed */
+        if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
+            /* no padding needed, we're already bigger than the
+               requested width */
+        } else {
+            /* determine which of left, space, or right padding is
+               needed */
+            Py_ssize_t padding = format->width - (r->n_lsign + n_digits + r->n_rsign);
+            if (format->align == '<')
+                r->n_rpadding = padding;
+            else if (format->align == '>')
+                r->n_lpadding = padding;
+            else
+                /* must be '=' */
+                r->n_spadding = padding;
+        }
+    }
+    r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
+        n_digits + r->n_rsign + r->n_rpadding;
+}
+
+/* fill in the non-digit parts of an integer's string representation,
+   as determined in _calc_integer_widths().  returns the pointer to
+   where the digits go. */
+static CH_TYPE*
+_fill_integer(CH_TYPE *p_buf, const IntegerFieldWidths *spec,
+              Py_ssize_t n_digits, CH_TYPE fill_char)
+{
+    CH_TYPE* p_digits;
+
+    if (spec->n_lpadding) {
+        CH_TYPE_FILL(p_buf, fill_char, spec->n_lpadding);
+        p_buf += spec->n_lpadding;
+    }
+    if (spec->n_lsign == 1) {
+        *p_buf++ = spec->lsign;
+    }
+    if (spec->n_spadding) {
+        CH_TYPE_FILL(p_buf, fill_char, spec->n_spadding);
+        p_buf += spec->n_spadding;
+    }
+    p_digits = p_buf;
+    p_buf += n_digits;
+    if (spec->n_rsign == 1) {
+        *p_buf++ = spec->rsign;
+    }
+    if (spec->n_rpadding) {
+        CH_TYPE_FILL(p_buf, fill_char, spec->n_rpadding);
+        p_buf += spec->n_rpadding;
+    }
+    return p_digits;
+}
+
+static int
+_format_long_binary(PyObject *v, FmtState *fs, const InternalFormatSpec *format)
+{
+    /* we know that v is a PyLongObject */
+    PyLongObject* l = (PyLongObject*)v;
+
+    IntegerFieldWidths spec;
+    CH_TYPE *pbuf;
+    CH_TYPE *start;
+    char sign = _PyLong_Sign(v) >= 0 ? '\0' : '-';
+    Py_ssize_t n_digits = _PyLong_NumBits(v);
+    Py_ssize_t i;
+
+    /* special case for zero */
+    if (l->ob_size == 0)
+        n_digits = 1;
+
+    _calc_integer_widths(&spec, sign, n_digits, format);
+
+    /* allocate space */
+    if (output_allocate(fs, spec.n_total, &pbuf) == 0)
+        return 0;
+
+    /* fill in the non-digit parts, and return a pointer where the digits go */
+    start = _fill_integer(pbuf, &spec, n_digits,
+                          format->fill_char == '\0' ? ' ' : format->fill_char);
+
+    /* degenerate case for zero.  handle it and get out */
+    if (l->ob_size == 0) {
+        *pbuf = '0';
+        return 1;
+    }
+
+    /* finally, fill in the digits, starting at the right and working left */
+    pbuf = start + n_digits - 1;
+
+    for (i = 0; i < ABS(l->ob_size); i++) {
+        Py_ssize_t j;
+        digit d = l->ob_digit[i];
+        for (j = 0; j < SHIFT; j++, d >>= 1) {
+            if (d & 1)
+                *pbuf = '1';
+            else
+                *pbuf = '0';
+
+            /* see if we're done mid-digit */
+            pbuf--;
+            if (pbuf < start)
+                goto DONE;
+        }
+    }
+
+DONE:
+    return 1;
+}
+
+static int
+_format_int_binary(PyObject *v, FmtState *fs, const InternalFormatSpec *format)
+{
+    /* see http://graphics.stanford.edu/~seander/bithacks.html for
+       various bit related hacks used here */
+
+    long x;
+    char sign = '\0';
+    unsigned n_digits;
+    long tmp;
+    IntegerFieldWidths spec;
+    CH_TYPE *pbuf;
+
+    x = PyInt_AsLong(v);
+    if (x == -1 && PyErr_Occurred()) {
+        PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
+                     v->ob_type->tp_name);
+        return 0;
+    }
+
+    if (x < 0) {
+        sign = '-';
+        x *= -1;
+    }
+
+    /* number of binary digits is one more than lg(x).  this also works for 0 */
+    n_digits = 1;
+    tmp = x;
+    while (tmp >>= 1)
+        n_digits++;
+
+    _calc_integer_widths(&spec, sign, n_digits, format);
+
+    /* allocate space */
+    if (output_allocate(fs, spec.n_total, &pbuf) == 0)
+        return 0;
+
+    /* fill in the non-digit parts, and return a pointer where the digits go */
+    pbuf = _fill_integer(pbuf, &spec, n_digits,
+                         format->fill_char == '\0' ? ' ' : format->fill_char);
+
+    /* finally, fill in the digits, starting at the right and working left */
+    /* note that if x == 0, n_digits will be 1 and this loop will still work */
+    pbuf += n_digits-1;
+    for (; n_digits; pbuf--, n_digits--, x >>= 1) {
+        if (x & 1)
+            *pbuf = '1';
+        else
+            *pbuf = '0';
+    }
+
+    return 1;
+}
+
 static int
 format_binary(PyObject *fieldobj, FmtState *fs,
               const InternalFormatSpec *format)
 {
-    return format_DUMMY(fieldobj, fs);
+    if (PyLong_Check(fieldobj))
+        return _format_long_binary(fieldobj, fs, format);
+    else
+        return _format_int_binary(fieldobj, fs, format);
 }
 
 static int
@@ -1137,7 +1377,6 @@
 
     PyOS_snprintf(ptr, buflen, format, x);
 
-
     /* convert from chars to unicode, if needed */
 #if C_UNICODE
     len = strtounicode(*pbuf, start, -1);
@@ -1177,13 +1416,13 @@
 
     /* if we're hex or octal, check to see if 0 or 0x or 0X was at the
        front of the string.  if so, skip it. */
-    if (type == 'o' && n_allocated >= 1 && *pbuf[0] == '0') {
+    if (type == 'o' && n_allocated >= 1 && (*pbuf)[0] == '0') {
         p_charbuf++;
         n_allocated -= 1;
-    } else if (type == 'x' && n_allocated >= 2 && *pbuf[1] == 'x') {
+    } else if (type == 'x' && n_allocated >= 2 && (*pbuf)[1] == 'x') {
         p_charbuf += 2;
         n_allocated -= 1;
-    } else if (type == 'X' && n_allocated >= 2 && *pbuf[1] == 'X') {
+    } else if (type == 'X' && n_allocated >= 2 && (*pbuf)[1] == 'X') {
         p_charbuf += 2;
         n_allocated -= 1;
     }
@@ -1214,14 +1453,7 @@
     CH_TYPE *p_digits;  /* pointer to the digits we have */
     CH_TYPE n_digits;   /* count of digits we have */
     CH_TYPE sign;
-    Py_ssize_t n_lpadding;
-    Py_ssize_t n_spadding;
-    Py_ssize_t n_rpadding;
-    CH_TYPE lsign = 0;
-    Py_ssize_t n_lsign = 0;
-    CH_TYPE rsign = 0;
-    Py_ssize_t n_rsign = 0;
-    Py_ssize_t n_total; /* the total length we're going to write */
+    IntegerFieldWidths spec;
     Py_ssize_t n_allocated; /* how much space we actually allocated
                                when we wrote the digits into the
                                output */
@@ -1258,73 +1490,7 @@
     else
         sign = '\0';
 
-    /* the output will look like:
-       |                                                           |
-       | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
-       |                                                           |
-
-       lsign and rsign are computed from format->sign and the actual
-       sign of the number
-
-       digits is already known
-
-       the total width is either given, or computed from the
-       actual digits
-
-       only one of lpadding, spadding, and rpadding can be non-zero,
-       and it's calculated from the width and other fields
-    */
-
-    /* compute the various parts we're going to write */
-    if (format->sign == '+') {
-        /* always put a + or - */
-        n_lsign = 1;
-        lsign = (sign == '-' ? '-' : '+');
-    } else if (format->sign == '(') {
-        if (sign == '-') {
-            n_lsign = 1;
-            lsign = '(';
-            n_rsign = 1;
-            rsign = ')';
-        }
-    } else if (format->sign == ' ') {
-        n_lsign = 1;
-        lsign = (sign == '-' ? '-' : ' ');
-    } else {
-        /* non specified, or the default (-) */
-        if (sign == '-') {
-            n_lsign = 1;
-            lsign = '-';
-        }
-    }
-
-    /* now the number of padding characters */
-    n_lpadding = n_spadding = n_rpadding = 0;
-    if (format->width == -1) {
-        /* no padding at all, nothing to do */
-    } else {
-        /* see if any padding is needed */
-        if (n_lsign + n_digits + n_rsign >= format->width) {
-            /* no padding needed, we're already bigger than the
-               requested width */
-        } else {
-            /* determine which of left, space, or right padding is
-               needed */
-            Py_ssize_t padding = format->width - (n_lsign + n_digits + n_rsign);
-            if (format->align == '<')
-                n_rpadding = padding;
-            else if (format->align == '>')
-                n_lpadding = padding;
-            else
-                /* must be '=' */
-                n_spadding = padding;
-        }
-    }
-
-    /* set the total length of the string */
-    n_total = n_lpadding + n_lsign + n_spadding + n_digits
-        + n_rsign + n_rpadding;
-    assert(n_total >= n_allocated);
+    _calc_integer_widths(&spec, sign, n_digits, format);
 
     /* because we're going to reallocate, our pointers might be
        invalidated.  remember the offsets, then re-create the pointers
@@ -1333,7 +1499,7 @@
     ofs_buf = p_buf - tmp;
     ofs_digits = p_digits - tmp;
 
-    output_allocate(fs, n_total - n_allocated, &tmp);
+    output_allocate(fs, spec.n_total - n_allocated, &tmp);
 
     tmp = STROBJ_AS_PTR(fs->outstr.obj);
     p_buf = tmp + ofs_buf;
@@ -1342,46 +1508,27 @@
 #if 0
     printf("p_buf       %p\n", p_buf);
     printf("p_digits    %p\n", p_digits);
+    printf("digits      '%.*s'\n", n_digits, p_digits);
     printf("n_digits:   %d\n", n_digits);
-    printf("n_lpadding: %d\n", n_lpadding);
-    printf("n_lsign:    %d\n", n_lsign);
-    printf("lsign:      %d(%c)\n", lsign, lsign);
-    printf("n_rsign:    %d\n", n_rsign);
-    printf("rsign:      %d(%c)\n", rsign, rsign);
-    printf("n_spadding: %d\n", n_spadding);
-    printf("n_rpadding: %d\n", n_rpadding);
+    printf("n_lpadding: %d\n", spec.n_lpadding);
+    printf("n_lsign:    %d\n", spec.n_lsign);
+    printf("lsign:      %d(%c)\n", spec.lsign, spec.lsign);
+    printf("n_rsign:    %d\n", spec.n_rsign);
+    printf("rsign:      %d(%c)\n", spec.rsign, spec.rsign);
+    printf("n_spadding: %d\n", spec.n_spadding);
+    printf("n_rpadding: %d\n", spec.n_rpadding);
 #endif
 
     /* copy the characters into position first, since we're going to
        overwrite some of that space */
     /* short circuit test, in case we don't have to move anything */
-    if (p_buf + (n_lpadding + n_lsign + n_spadding) != p_digits)
-        memmove(p_buf + (n_lpadding + n_lsign + n_spadding), p_digits,
-                n_digits * sizeof(CH_TYPE));
-
-    if (n_lpadding) {
-        CH_TYPE_FILL(p_buf, format->fill_char == '\0' ? ' ' : format->fill_char,
-                     n_lpadding);
-        p_buf += n_lpadding;
-    }
-    if (n_lsign == 1) {
-        *p_buf++ = lsign;
-    }
-    if (n_spadding) {
-        CH_TYPE_FILL(p_buf, format->fill_char == '\0' ? ' ' : format->fill_char,
-                     n_spadding);
-        p_buf += n_spadding;
-    }
-    p_buf += n_digits;
-    if (n_rsign == 1) {
-        *p_buf++ = rsign;
-    }
-    if (n_rpadding) {
-        CH_TYPE_FILL(p_buf, format->fill_char == '\0' ? ' ' : format->fill_char,
-                     n_rpadding);
-        p_buf += n_rpadding;
-    }
-
+    if (p_buf + (spec.n_lpadding + spec.n_lsign + spec.n_spadding) != p_digits)
+        memmove(p_buf + (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
+                p_digits, n_digits * sizeof(CH_TYPE));
+
+    /* now fill in the non-digit parts */
+    _fill_integer(p_buf, &spec, n_digits,
+                  format->fill_char == '\0' ? ' ' : format->fill_char);
     return 1;
 }
 
@@ -1569,10 +1716,7 @@
         }
     }
 
-    /* XXX handle conversion functions that logically map to other
-       conversion functions? percent is the only one, and I'm not wild
-       about having percent at all*/
-
+    /* find the formatter function */
     formatter = format_function(format.type);
     if (formatter == NULL) {
         SetError(fs, "Invalid conversion character");