[Python-checkins] r54141 - sandbox/trunk/pep3101/test_simpleformat.py sandbox/trunk/pep3101/unicodeformat.c
eric.smith
python-checkins at python.org
Tue Mar 6 03:36:51 CET 2007
Author: eric.smith
Date: Tue Mar 6 03:36:49 2007
New Revision: 54141
Modified:
sandbox/trunk/pep3101/test_simpleformat.py
sandbox/trunk/pep3101/unicodeformat.c
Log:
Added binary formatting. Factored out _calc_integer_widths() and _fill_integer(), so they can be shared by format_integer() and format_binary(). Added test cases for binary. This code accesses PyLongObject's ob_digit[] directly, that might not be the best way to go about binary formatting.
Modified: sandbox/trunk/pep3101/test_simpleformat.py
==============================================================================
--- sandbox/trunk/pep3101/test_simpleformat.py (original)
+++ sandbox/trunk/pep3101/test_simpleformat.py Tue Mar 6 03:36:49 2007
@@ -123,7 +123,7 @@
def test_specifiers(self):
self.formatEquals("a", "{0:c}", ord("a"))
- self.formatEquals("8_08b", "{0:08b}", 8)
+ self.formatEquals("00001000", "{0:08b}", 8)
self.formatEquals(" 8", "{0: >3d}", 8)
self.formatEquals("15%", "{0:.0%}", .1515)
@@ -152,6 +152,7 @@
self.assertRaises(TypeError, "{0:d}", "non-number")
self.formatEqualsWithUnicode("0", "{0:d}", 0)
+ self.formatEqualsWithUnicode("0", "{0:d}", long(0))
self.formatEqualsWithUnicode("123", "{0:d}", 123)
self.formatEqualsWithUnicode("-123", "{0:d}", -123)
self.formatEqualsWithUnicode("+123", "{0:+d}", 123)
@@ -204,7 +205,7 @@
def test_hex_specifiers(self):
n = int("beef", 16)
- self.assertRaises(TypeError, "{0:x", "non-number")
+ self.formatRaises(TypeError, "{0:x}", "non-number")
self.formatEqualsWithUnicodeUC("0", "{0:x}", 0)
self.formatEqualsWithUnicodeUC("beef", "{0:x}", n)
@@ -248,6 +249,31 @@
self.formatEqualsWithUnicodeUC("3.1415e+200", "{0:g}", 3.1415e200)
self.formatEqualsWithUnicodeUC("3.1415e+200", "{0:g}", 3.1415e200)
+ def test_percent_specifiers(self):
+ self.formatEqualsWithUnicode("314.15%", "{0:.2%}", 3.1415)
+ self.formatEqualsWithUnicode("3.14e+202%", "{0:.3%}", 3.1415e200)
+
+ def test_binary_specifiers(self):
+ self.formatRaises(TypeError, "{0:b}", "string")
+
+ self.formatEqualsWithUnicode("0", "{0:b}", 0)
+ self.formatEqualsWithUnicode("0", "{0:b}", long(0))
+ self.formatEqualsWithUnicode("1", "{0:b}", 1)
+ self.formatEqualsWithUnicode("1", "{0:b}", long(1))
+ self.formatEqualsWithUnicode("-1", "{0:b}", -1)
+ self.formatEqualsWithUnicode("-1", "{0:b}", long(-1))
+ self.formatEqualsWithUnicode("0 ", "{0:<10b}", 0)
+ self.formatEqualsWithUnicode(" 0", "{0:>10b}", 0)
+ self.formatEqualsWithUnicode("1001 ", "{0:<10b}", 9)
+ self.formatEqualsWithUnicode(" 1001", "{0:>10b}", 9)
+ self.formatEqualsWithUnicode("1" + "0" * 100, "{0:b}", 2**100)
+ self.formatEqualsWithUnicode("-1" + "0" * 100, "{0:b}", -2**100)
+ self.formatEqualsWithUnicode("1" + "0" * 98 + "11", "{0:b}", 2**100 + 3)
+ self.formatEqualsWithUnicode("1" * 100, "{0:b}", 2**100 - 1)
+ self.formatEqualsWithUnicode("-" + "1" * 100, "{0:b}", -(2**100 - 1))
+ self.formatEqualsWithUnicode("(" + "1" * 100 + ")", "{0:()b}", -(2**100 - 1))
+ self.formatEqualsWithUnicode("(" + " " * 98 + "1" * 100 + ")", "{0:=()200b}", -(2**100 - 1))
+
def test_missing_type_specifier(self):
# make sure floats use 'g', ints and longs 'd', and everything else 's'
pass
Modified: sandbox/trunk/pep3101/unicodeformat.c
==============================================================================
--- sandbox/trunk/pep3101/unicodeformat.c (original)
+++ sandbox/trunk/pep3101/unicodeformat.c Tue Mar 6 03:36:49 2007
@@ -20,6 +20,9 @@
#define C_UNICODE 1
#endif
+/* we need access to a PyLongObject's internals */
+#include "longintrepr.h"
+
#if C_UNICODE
#define CH_TYPE Py_UNICODE
#define CH_TYPE_ISDECIMAL Py_UNICODE_ISDECIMAL
@@ -91,6 +94,7 @@
*/
#define FORMATBUFLEN (size_t)120
+#define ABS(x) ((x) < 0 ? -(x) : (x))
#ifdef __cplusplus
extern "C" {
@@ -806,6 +810,20 @@
CH_TYPE type;
} InternalFormatSpec;
+/* describes the layout for an integer, see the comment in
+ _calc_integer_widths() for details */
+typedef struct {
+ Py_ssize_t n_lpadding;
+ Py_ssize_t n_spadding;
+ Py_ssize_t n_rpadding;
+ char lsign;
+ Py_ssize_t n_lsign;
+ char rsign;
+ Py_ssize_t n_rsign;
+ Py_ssize_t n_total; /* just a convenience, it's derivable from the
+ other fields */
+} IntegerFieldWidths;
+
/* returns true if this character is a specifier alignment token */
Py_LOCAL_INLINE(int)
alignment_token(CH_TYPE c)
@@ -1024,11 +1042,233 @@
(*FormatFunction)(PyObject *fieldobj, FmtState *fs,
const InternalFormatSpec *format);
+static void
+_calc_integer_widths(IntegerFieldWidths *r, CH_TYPE sign, Py_ssize_t n_digits,
+ const InternalFormatSpec *format)
+{
+ r->n_lpadding = 0;
+ r->n_spadding = 0;
+ r->n_rpadding = 0;
+ r->lsign = '\0';
+ r->n_lsign = 0;
+ r->rsign = '\0';
+ r->n_rsign = 0;
+
+ /* the output will look like:
+ | |
+ | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
+ | |
+
+ lsign and rsign are computed from format->sign and the actual
+ sign of the number
+
+ digits is already known
+
+ the total width is either given, or computed from the
+ actual digits
+
+ only one of lpadding, spadding, and rpadding can be non-zero,
+ and it's calculated from the width and other fields
+ */
+
+ /* compute the various parts we're going to write */
+ if (format->sign == '+') {
+ /* always put a + or - */
+ r->n_lsign = 1;
+ r->lsign = (sign == '-' ? '-' : '+');
+ } else if (format->sign == '(') {
+ if (sign == '-') {
+ r->n_lsign = 1;
+ r->lsign = '(';
+ r->n_rsign = 1;
+ r->rsign = ')';
+ }
+ } else if (format->sign == ' ') {
+ r->n_lsign = 1;
+ r->lsign = (sign == '-' ? '-' : ' ');
+ } else {
+ /* non specified, or the default (-) */
+ if (sign == '-') {
+ r->n_lsign = 1;
+ r->lsign = '-';
+ }
+ }
+
+ /* now the number of padding characters */
+ if (format->width == -1) {
+ /* no padding at all, nothing to do */
+ } else {
+ /* see if any padding is needed */
+ if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
+ /* no padding needed, we're already bigger than the
+ requested width */
+ } else {
+ /* determine which of left, space, or right padding is
+ needed */
+ Py_ssize_t padding = format->width - (r->n_lsign + n_digits + r->n_rsign);
+ if (format->align == '<')
+ r->n_rpadding = padding;
+ else if (format->align == '>')
+ r->n_lpadding = padding;
+ else
+ /* must be '=' */
+ r->n_spadding = padding;
+ }
+ }
+ r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
+ n_digits + r->n_rsign + r->n_rpadding;
+}
+
+/* fill in the non-digit parts of an integer's string representation,
+ as determined in _calc_integer_widths(). returns the pointer to
+ where the digits go. */
+static CH_TYPE*
+_fill_integer(CH_TYPE *p_buf, const IntegerFieldWidths *spec,
+ Py_ssize_t n_digits, CH_TYPE fill_char)
+{
+ CH_TYPE* p_digits;
+
+ if (spec->n_lpadding) {
+ CH_TYPE_FILL(p_buf, fill_char, spec->n_lpadding);
+ p_buf += spec->n_lpadding;
+ }
+ if (spec->n_lsign == 1) {
+ *p_buf++ = spec->lsign;
+ }
+ if (spec->n_spadding) {
+ CH_TYPE_FILL(p_buf, fill_char, spec->n_spadding);
+ p_buf += spec->n_spadding;
+ }
+ p_digits = p_buf;
+ p_buf += n_digits;
+ if (spec->n_rsign == 1) {
+ *p_buf++ = spec->rsign;
+ }
+ if (spec->n_rpadding) {
+ CH_TYPE_FILL(p_buf, fill_char, spec->n_rpadding);
+ p_buf += spec->n_rpadding;
+ }
+ return p_digits;
+}
+
+static int
+_format_long_binary(PyObject *v, FmtState *fs, const InternalFormatSpec *format)
+{
+ /* we know that v is a PyLongObject */
+ PyLongObject* l = (PyLongObject*)v;
+
+ IntegerFieldWidths spec;
+ CH_TYPE *pbuf;
+ CH_TYPE *start;
+ char sign = _PyLong_Sign(v) >= 0 ? '\0' : '-';
+ Py_ssize_t n_digits = _PyLong_NumBits(v);
+ Py_ssize_t i;
+
+ /* special case for zero */
+ if (l->ob_size == 0)
+ n_digits = 1;
+
+ _calc_integer_widths(&spec, sign, n_digits, format);
+
+ /* allocate space */
+ if (output_allocate(fs, spec.n_total, &pbuf) == 0)
+ return 0;
+
+ /* fill in the non-digit parts, and return a pointer where the digits go */
+ start = _fill_integer(pbuf, &spec, n_digits,
+ format->fill_char == '\0' ? ' ' : format->fill_char);
+
+ /* degenerate case for zero. handle it and get out */
+ if (l->ob_size == 0) {
+ *pbuf = '0';
+ return 1;
+ }
+
+ /* finally, fill in the digits, starting at the right and working left */
+ pbuf = start + n_digits - 1;
+
+ for (i = 0; i < ABS(l->ob_size); i++) {
+ Py_ssize_t j;
+ digit d = l->ob_digit[i];
+ for (j = 0; j < SHIFT; j++, d >>= 1) {
+ if (d & 1)
+ *pbuf = '1';
+ else
+ *pbuf = '0';
+
+ /* see if we're done mid-digit */
+ pbuf--;
+ if (pbuf < start)
+ goto DONE;
+ }
+ }
+
+DONE:
+ return 1;
+}
+
+static int
+_format_int_binary(PyObject *v, FmtState *fs, const InternalFormatSpec *format)
+{
+ /* see http://graphics.stanford.edu/~seander/bithacks.html for
+ various bit related hacks used here */
+
+ long x;
+ char sign = '\0';
+ unsigned n_digits;
+ long tmp;
+ IntegerFieldWidths spec;
+ CH_TYPE *pbuf;
+
+ x = PyInt_AsLong(v);
+ if (x == -1 && PyErr_Occurred()) {
+ PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
+ v->ob_type->tp_name);
+ return 0;
+ }
+
+ if (x < 0) {
+ sign = '-';
+ x *= -1;
+ }
+
+ /* number of binary digits is one more than lg(x). this also works for 0 */
+ n_digits = 1;
+ tmp = x;
+ while (tmp >>= 1)
+ n_digits++;
+
+ _calc_integer_widths(&spec, sign, n_digits, format);
+
+ /* allocate space */
+ if (output_allocate(fs, spec.n_total, &pbuf) == 0)
+ return 0;
+
+ /* fill in the non-digit parts, and return a pointer where the digits go */
+ pbuf = _fill_integer(pbuf, &spec, n_digits,
+ format->fill_char == '\0' ? ' ' : format->fill_char);
+
+ /* finally, fill in the digits, starting at the right and working left */
+ /* note that if x == 0, n_digits will be 1 and this loop will still work */
+ pbuf += n_digits-1;
+ for (; n_digits; pbuf--, n_digits--, x >>= 1) {
+ if (x & 1)
+ *pbuf = '1';
+ else
+ *pbuf = '0';
+ }
+
+ return 1;
+}
+
static int
format_binary(PyObject *fieldobj, FmtState *fs,
const InternalFormatSpec *format)
{
- return format_DUMMY(fieldobj, fs);
+ if (PyLong_Check(fieldobj))
+ return _format_long_binary(fieldobj, fs, format);
+ else
+ return _format_int_binary(fieldobj, fs, format);
}
static int
@@ -1137,7 +1377,6 @@
PyOS_snprintf(ptr, buflen, format, x);
-
/* convert from chars to unicode, if needed */
#if C_UNICODE
len = strtounicode(*pbuf, start, -1);
@@ -1177,13 +1416,13 @@
/* if we're hex or octal, check to see if 0 or 0x or 0X was at the
front of the string. if so, skip it. */
- if (type == 'o' && n_allocated >= 1 && *pbuf[0] == '0') {
+ if (type == 'o' && n_allocated >= 1 && (*pbuf)[0] == '0') {
p_charbuf++;
n_allocated -= 1;
- } else if (type == 'x' && n_allocated >= 2 && *pbuf[1] == 'x') {
+ } else if (type == 'x' && n_allocated >= 2 && (*pbuf)[1] == 'x') {
p_charbuf += 2;
n_allocated -= 1;
- } else if (type == 'X' && n_allocated >= 2 && *pbuf[1] == 'X') {
+ } else if (type == 'X' && n_allocated >= 2 && (*pbuf)[1] == 'X') {
p_charbuf += 2;
n_allocated -= 1;
}
@@ -1214,14 +1453,7 @@
CH_TYPE *p_digits; /* pointer to the digits we have */
CH_TYPE n_digits; /* count of digits we have */
CH_TYPE sign;
- Py_ssize_t n_lpadding;
- Py_ssize_t n_spadding;
- Py_ssize_t n_rpadding;
- CH_TYPE lsign = 0;
- Py_ssize_t n_lsign = 0;
- CH_TYPE rsign = 0;
- Py_ssize_t n_rsign = 0;
- Py_ssize_t n_total; /* the total length we're going to write */
+ IntegerFieldWidths spec;
Py_ssize_t n_allocated; /* how much space we actually allocated
when we wrote the digits into the
output */
@@ -1258,73 +1490,7 @@
else
sign = '\0';
- /* the output will look like:
- | |
- | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
- | |
-
- lsign and rsign are computed from format->sign and the actual
- sign of the number
-
- digits is already known
-
- the total width is either given, or computed from the
- actual digits
-
- only one of lpadding, spadding, and rpadding can be non-zero,
- and it's calculated from the width and other fields
- */
-
- /* compute the various parts we're going to write */
- if (format->sign == '+') {
- /* always put a + or - */
- n_lsign = 1;
- lsign = (sign == '-' ? '-' : '+');
- } else if (format->sign == '(') {
- if (sign == '-') {
- n_lsign = 1;
- lsign = '(';
- n_rsign = 1;
- rsign = ')';
- }
- } else if (format->sign == ' ') {
- n_lsign = 1;
- lsign = (sign == '-' ? '-' : ' ');
- } else {
- /* non specified, or the default (-) */
- if (sign == '-') {
- n_lsign = 1;
- lsign = '-';
- }
- }
-
- /* now the number of padding characters */
- n_lpadding = n_spadding = n_rpadding = 0;
- if (format->width == -1) {
- /* no padding at all, nothing to do */
- } else {
- /* see if any padding is needed */
- if (n_lsign + n_digits + n_rsign >= format->width) {
- /* no padding needed, we're already bigger than the
- requested width */
- } else {
- /* determine which of left, space, or right padding is
- needed */
- Py_ssize_t padding = format->width - (n_lsign + n_digits + n_rsign);
- if (format->align == '<')
- n_rpadding = padding;
- else if (format->align == '>')
- n_lpadding = padding;
- else
- /* must be '=' */
- n_spadding = padding;
- }
- }
-
- /* set the total length of the string */
- n_total = n_lpadding + n_lsign + n_spadding + n_digits
- + n_rsign + n_rpadding;
- assert(n_total >= n_allocated);
+ _calc_integer_widths(&spec, sign, n_digits, format);
/* because we're going to reallocate, our pointers might be
invalidated. remember the offsets, then re-create the pointers
@@ -1333,7 +1499,7 @@
ofs_buf = p_buf - tmp;
ofs_digits = p_digits - tmp;
- output_allocate(fs, n_total - n_allocated, &tmp);
+ output_allocate(fs, spec.n_total - n_allocated, &tmp);
tmp = STROBJ_AS_PTR(fs->outstr.obj);
p_buf = tmp + ofs_buf;
@@ -1342,46 +1508,27 @@
#if 0
printf("p_buf %p\n", p_buf);
printf("p_digits %p\n", p_digits);
+ printf("digits '%.*s'\n", n_digits, p_digits);
printf("n_digits: %d\n", n_digits);
- printf("n_lpadding: %d\n", n_lpadding);
- printf("n_lsign: %d\n", n_lsign);
- printf("lsign: %d(%c)\n", lsign, lsign);
- printf("n_rsign: %d\n", n_rsign);
- printf("rsign: %d(%c)\n", rsign, rsign);
- printf("n_spadding: %d\n", n_spadding);
- printf("n_rpadding: %d\n", n_rpadding);
+ printf("n_lpadding: %d\n", spec.n_lpadding);
+ printf("n_lsign: %d\n", spec.n_lsign);
+ printf("lsign: %d(%c)\n", spec.lsign, spec.lsign);
+ printf("n_rsign: %d\n", spec.n_rsign);
+ printf("rsign: %d(%c)\n", spec.rsign, spec.rsign);
+ printf("n_spadding: %d\n", spec.n_spadding);
+ printf("n_rpadding: %d\n", spec.n_rpadding);
#endif
/* copy the characters into position first, since we're going to
overwrite some of that space */
/* short circuit test, in case we don't have to move anything */
- if (p_buf + (n_lpadding + n_lsign + n_spadding) != p_digits)
- memmove(p_buf + (n_lpadding + n_lsign + n_spadding), p_digits,
- n_digits * sizeof(CH_TYPE));
-
- if (n_lpadding) {
- CH_TYPE_FILL(p_buf, format->fill_char == '\0' ? ' ' : format->fill_char,
- n_lpadding);
- p_buf += n_lpadding;
- }
- if (n_lsign == 1) {
- *p_buf++ = lsign;
- }
- if (n_spadding) {
- CH_TYPE_FILL(p_buf, format->fill_char == '\0' ? ' ' : format->fill_char,
- n_spadding);
- p_buf += n_spadding;
- }
- p_buf += n_digits;
- if (n_rsign == 1) {
- *p_buf++ = rsign;
- }
- if (n_rpadding) {
- CH_TYPE_FILL(p_buf, format->fill_char == '\0' ? ' ' : format->fill_char,
- n_rpadding);
- p_buf += n_rpadding;
- }
-
+ if (p_buf + (spec.n_lpadding + spec.n_lsign + spec.n_spadding) != p_digits)
+ memmove(p_buf + (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
+ p_digits, n_digits * sizeof(CH_TYPE));
+
+ /* now fill in the non-digit parts */
+ _fill_integer(p_buf, &spec, n_digits,
+ format->fill_char == '\0' ? ' ' : format->fill_char);
return 1;
}
@@ -1569,10 +1716,7 @@
}
}
- /* XXX handle conversion functions that logically map to other
- conversion functions? percent is the only one, and I'm not wild
- about having percent at all*/
-
+ /* find the formatter function */
formatter = format_function(format.type);
if (formatter == NULL) {
SetError(fs, "Invalid conversion character");
More information about the Python-checkins
mailing list