[Python-checkins] r70841 - in python/branches/py3k-short-float-repr: Include/pystrtod.h Lib/test/test_format.py Lib/test/test_types.py Modules/_pickle.c Objects/complexobject.c Objects/floatobject.c Objects/unicodeobject.c Python/marshal.c Python/pystrtod.c
eric.smith
python-checkins at python.org
Tue Mar 31 19:06:58 CEST 2009
Author: eric.smith
Date: Tue Mar 31 19:06:57 2009
New Revision: 70841
Log:
Checkpoint so Mark and I can continue working.
Changes:
Hooked up unicodeobject.c so that %-formatting works.
Added trailing zeros.
Added as single 'flags' param to PyOS_decimal_to_string instead of individual flag parameters.
Added upper and lower case format codes g/G e/E f/F. This addresses issue 3382, also.
Many tests still fail, but I'll work on them next.
Modified:
python/branches/py3k-short-float-repr/Include/pystrtod.h
python/branches/py3k-short-float-repr/Lib/test/test_format.py
python/branches/py3k-short-float-repr/Lib/test/test_types.py
python/branches/py3k-short-float-repr/Modules/_pickle.c
python/branches/py3k-short-float-repr/Objects/complexobject.c
python/branches/py3k-short-float-repr/Objects/floatobject.c
python/branches/py3k-short-float-repr/Objects/unicodeobject.c
python/branches/py3k-short-float-repr/Python/marshal.c
python/branches/py3k-short-float-repr/Python/pystrtod.c
Modified: python/branches/py3k-short-float-repr/Include/pystrtod.h
==============================================================================
--- python/branches/py3k-short-float-repr/Include/pystrtod.h (original)
+++ python/branches/py3k-short-float-repr/Include/pystrtod.h Tue Mar 31 19:06:57 2009
@@ -13,10 +13,13 @@
int mode,
char format_code,
int precision,
- int sign,
- int add_dot_0_if_integer);
+ int flags);
+#define Py_DTSF_SIGN 0x01 /* always add the sign */
+#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */
+#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code
+ specific */
#ifdef __cplusplus
}
Modified: python/branches/py3k-short-float-repr/Lib/test/test_format.py
==============================================================================
--- python/branches/py3k-short-float-repr/Lib/test/test_format.py (original)
+++ python/branches/py3k-short-float-repr/Lib/test/test_format.py Tue Mar 31 19:06:57 2009
@@ -220,6 +220,11 @@
testformat("%a", "\u0378", "'\\u0378'") # non printable
testformat("%r", "\u0374", "'\u0374'") # printable
testformat("%a", "\u0374", "'\\u0374'") # printable
+
+ # alternate float formatting
+ testformat('%g', 1.1, '1.1')
+ testformat('%#g', 1.1, '1.10000')
+
# Test exception for unknown format characters
if verbose:
print('Testing exceptions')
Modified: python/branches/py3k-short-float-repr/Lib/test/test_types.py
==============================================================================
--- python/branches/py3k-short-float-repr/Lib/test/test_types.py (original)
+++ python/branches/py3k-short-float-repr/Lib/test/test_types.py Tue Mar 31 19:06:57 2009
@@ -113,6 +113,9 @@
self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
self.assertEqual('%e' % 1.5e-101, '1.500000e-101')
+ self.assertEqual('%g' % 1.0, '1')
+ self.assertEqual('%#g' % 1.0, '1.00000')
+
def test_normal_integers(self):
# Ensure the first 256 integers are shared
a = 256
Modified: python/branches/py3k-short-float-repr/Modules/_pickle.c
==============================================================================
--- python/branches/py3k-short-float-repr/Modules/_pickle.c (original)
+++ python/branches/py3k-short-float-repr/Modules/_pickle.c Tue Mar 31 19:06:57 2009
@@ -1016,7 +1016,7 @@
return -1;
if (pickler_write(self, pdata, 9) < 0)
return -1;
- }
+ }
else {
int result = -1;
char *buf = NULL;
@@ -1025,7 +1025,7 @@
if (pickler_write(self, &op, 1) < 0)
goto done;
- buf = PyOS_double_to_string(x, 2, 'g', 17, 0, 0);
+ buf = PyOS_double_to_string(x, 2, 'g', 17, 0);
if (!buf) {
PyErr_NoMemory();
goto done;
Modified: python/branches/py3k-short-float-repr/Objects/complexobject.c
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/complexobject.c (original)
+++ python/branches/py3k-short-float-repr/Objects/complexobject.c Tue Mar 31 19:06:57 2009
@@ -368,8 +368,7 @@
im = "-inf*";
}
else {
- pim = PyOS_double_to_string(v->cval.imag, mode, 'g', precision,
- 0, 0);
+ pim = PyOS_double_to_string(v->cval.imag, mode, 'g', precision, 0);
if (!pim) {
PyErr_NoMemory();
goto done;
@@ -388,8 +387,7 @@
re = "-inf";
}
else {
- pre = PyOS_double_to_string(v->cval.real, mode, 'g', precision,
- 0, 0);
+ pre = PyOS_double_to_string(v->cval.real, mode, 'g', precision, 0);
if (!pre) {
PyErr_NoMemory();
goto done;
@@ -408,7 +406,7 @@
}
else {
pim = PyOS_double_to_string(v->cval.imag, mode, 'g', precision,
- 1, 0);
+ Py_DTSF_SIGN);
if (!pim) {
PyErr_NoMemory();
goto done;
Modified: python/branches/py3k-short-float-repr/Objects/floatobject.c
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/floatobject.c (original)
+++ python/branches/py3k-short-float-repr/Objects/floatobject.c Tue Mar 31 19:06:57 2009
@@ -369,7 +369,7 @@
{
PyObject *result;
char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
- mode, 'g', precision, 0, 1);
+ mode, 'g', precision, Py_DTSF_ADD_DOT_0);
if (!buf)
return PyErr_NoMemory();
result = PyUnicode_FromString(buf);
@@ -1917,7 +1917,7 @@
Py_REFCNT(p) != 0) {
char *buf = PyOS_double_to_string(
PyFloat_AS_DOUBLE(p), 0, 'g',
- 0, 0, 1);
+ 0, Py_DTSF_ADD_DOT_0);
if (buf) {
/* XXX(twouters) cast
refcount to long
Modified: python/branches/py3k-short-float-repr/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/unicodeobject.c (original)
+++ python/branches/py3k-short-float-repr/Objects/unicodeobject.c Tue Mar 31 19:06:57 2009
@@ -8792,43 +8792,14 @@
return NULL;
}
-static Py_ssize_t
-strtounicode(Py_UNICODE *buffer, const char *charbuffer)
+static void
+strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
{
register Py_ssize_t i;
- Py_ssize_t len = strlen(charbuffer);
for (i = len - 1; i >= 0; i--)
buffer[i] = (Py_UNICODE) charbuffer[i];
-
- return len;
-}
-
-static int
-doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x)
-{
- Py_ssize_t result;
-
- PyOS_ascii_formatd((char *)buffer, len, format, x);
- result = strtounicode(buffer, (char *)buffer);
- return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
}
-#if 0
-static int
-longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x)
-{
- Py_ssize_t result;
-
- PyOS_snprintf((char *)buffer, len, format, x);
- result = strtounicode(buffer, (char *)buffer);
- return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
-}
-#endif
-
-/* XXX To save some code duplication, formatfloat/long/int could have been
- shared with stringobject.c, converting from 8-bit to Unicode after the
- formatting is done. */
-
static int
formatfloat(Py_UNICODE *buf,
size_t buflen,
@@ -8837,54 +8808,56 @@
int type,
PyObject *v)
{
- /* fmt = '%#.' + `prec` + `type`
- worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
- char fmt[20];
+ /* eric.smith: To minimize disturbances in PyUnicode_Format (the
+ only caller of this routine), I'm going to keep the existing
+ API to this function. That means that we'll allocate memory and
+ then copy back into the supplied buffer. But that's better than
+ all of the changes that would be required in PyUnicode_Format
+ because it does lots of memory management tricks. */
+
+ char* p = NULL;
+ int result = -1;
double x;
+ Py_ssize_t len;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred())
- return -1;
+ goto done;
if (prec < 0)
prec = 6;
+
/* make sure that the decimal representation of precision really does
need at most 10 digits: platforms with sizeof(int) == 8 exist! */
if (prec > 0x7fffffffL) {
PyErr_SetString(PyExc_OverflowError,
"outrageously large precision "
"for formatted float");
- return -1;
+ goto done;
}
- if (type == 'f' && fabs(x) >= 1e50)
- type = 'g';
- /* Worst case length calc to ensure no buffer overrun:
-
- 'g' formats:
- fmt = %#.<prec>g
- buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
- for any double rep.)
- len = 1 + prec + 1 + 2 + 5 = 9 + prec
-
- 'f' formats:
- buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
- len = 1 + 50 + 1 + prec = 52 + prec
-
- If prec=0 the effective precision is 1 (the leading digit is
- always given), therefore increase the length by one.
-
- */
if (((type == 'g' || type == 'G') &&
buflen <= (size_t)10 + (size_t)prec) ||
- (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
+ ((type == 'f' || type == 'F') &&
+ buflen <= (size_t)53 + (size_t)prec)) {
PyErr_SetString(PyExc_OverflowError,
"formatted float is too long (precision too large?)");
- return -1;
+ goto done;
}
- PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
- (flags&F_ALT) ? "#" : "",
- prec, type);
- return doubletounicode(buf, buflen, fmt, x);
+
+ p = PyOS_double_to_string(x, 2, type, prec,
+ (flags & F_ALT) ? Py_DTSF_ALT : 0);
+ len = strlen(p);
+ if (len+1 >= buflen) {
+ /* Caller supplied buffer is not large enough. */
+ PyErr_NoMemory();
+ goto done;
+ }
+ strtounicode(buf, p, len);
+ result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
+
+done:
+ PyMem_Free(p);
+ return result;
}
static PyObject*
@@ -8903,84 +8876,6 @@
return result;
}
-#if 0
-static int
-formatint(Py_UNICODE *buf,
- size_t buflen,
- int flags,
- int prec,
- int type,
- PyObject *v)
-{
- /* fmt = '%#.' + `prec` + 'l' + `type`
- * worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
- * + 1 + 1
- * = 24
- */
- char fmt[64]; /* plenty big enough! */
- char *sign;
- long x;
-
- x = PyLong_AsLong(v);
- if (x == -1 && PyErr_Occurred())
- return -1;
- if (x < 0 && type == 'u') {
- type = 'd';
- }
- if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
- sign = "-";
- else
- sign = "";
- if (prec < 0)
- prec = 1;
-
- /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
- * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
- */
- if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted integer is too long (precision too large?)");
- return -1;
- }
-
- if ((flags & F_ALT) &&
- (type == 'x' || type == 'X' || type == 'o')) {
- /* When converting under %#o, %#x or %#X, there are a number
- * of issues that cause pain:
- * - for %#o, we want a different base marker than C
- * - when 0 is being converted, the C standard leaves off
- * the '0x' or '0X', which is inconsistent with other
- * %#x/%#X conversions and inconsistent with Python's
- * hex() function
- * - there are platforms that violate the standard and
- * convert 0 with the '0x' or '0X'
- * (Metrowerks, Compaq Tru64)
- * - there are platforms that give '0x' when converting
- * under %#X, but convert 0 in accordance with the
- * standard (OS/2 EMX)
- *
- * We can achieve the desired consistency by inserting our
- * own '0x' or '0X' prefix, and substituting %x/%X in place
- * of %#x/%#X.
- *
- * Note that this is the same approach as used in
- * formatint() in stringobject.c
- */
- PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
- sign, type, prec, type);
- }
- else {
- PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
- sign, (flags&F_ALT) ? "#" : "",
- prec, type);
- }
- if (sign[0])
- return longtounicode(buf, buflen, fmt, -x);
- else
- return longtounicode(buf, buflen, fmt, x);
-}
-#endif
-
static int
formatchar(Py_UNICODE *buf,
size_t buflen,
@@ -9359,8 +9254,6 @@
case 'F':
case 'g':
case 'G':
- if (c == 'F')
- c = 'f';
pbuf = formatbuf;
len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
flags, prec, c, v);
Modified: python/branches/py3k-short-float-repr/Python/marshal.c
==============================================================================
--- python/branches/py3k-short-float-repr/Python/marshal.c (original)
+++ python/branches/py3k-short-float-repr/Python/marshal.c Tue Mar 31 19:06:57 2009
@@ -237,7 +237,7 @@
}
else {
char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
- 0, 'g', 0, 0, 1);
+ 0, 'g', 0, Py_DTSF_ADD_DOT_0);
if (!buf)
return;
n = strlen(buf);
@@ -269,7 +269,7 @@
char *buf;
w_byte(TYPE_COMPLEX, p);
buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
- 0, 'g', 0, 0, 1);
+ 0, 'g', 0, Py_DTSF_ADD_DOT_0);
if (!buf)
return;
n = strlen(buf);
@@ -277,7 +277,7 @@
w_string(buf, (int)n, p);
PyMem_Free(buf);
buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
- 0, 'g', 0, 0, 1);
+ 0, 'g', 0, Py_DTSF_ADD_DOT_0);
if (!buf)
return;
n = strlen(buf);
Modified: python/branches/py3k-short-float-repr/Python/pystrtod.c
==============================================================================
--- python/branches/py3k-short-float-repr/Python/pystrtod.c (original)
+++ python/branches/py3k-short-float-repr/Python/pystrtod.c Tue Mar 31 19:06:57 2009
@@ -500,16 +500,36 @@
}
+/* I'm using a lookup table here so that I don't have to invent a non-locale
+ specific way to convert to uppercase */
+#define OFS_INF 0
+#define OFS_NAN 1
+#define OFS_E 2
+
+/* The lengths of these are known to the code below, so don't change them */
+static char *lc_float_strings[] = {
+ "inf",
+ "nan",
+ "e",
+};
+static char *uc_float_strings[] = {
+ "INF",
+ "NAN",
+ "E",
+};
+
+
/* convert a Python float to a minimal string that evaluates back to that
float. The output is minimal in the sense of having the least possible
number of significant digits. */
static void
-format_float_short(char *buf, size_t buflen, double d, int mode, int precision, int always_add_sign, int add_dot_0_if_integer)
+format_float_short(char *buf, size_t buflen, double d, char format_code, int mode, int precision, int always_add_sign, int add_dot_0_if_integer, char **float_strings)
{
char *digits, *digits_end;
int decpt, sign, exp_len;
- size_t digits_len, i;
+ Py_ssize_t digits_len, i;
+ int use_exp;
/* _Py_dg_dtoa returns a digit string (no decimal point
or exponent) */
@@ -528,11 +548,11 @@
else if (always_add_sign) {
*buf++ = '+';
}
- strncpy(buf, "inf", 3);
+ strncpy(buf, float_strings[OFS_INF], 3);
buf += 3;
}
else if (digits[0] == 'n' || digits[0] == 'N') {
- strncpy(buf, "nan", 3);
+ strncpy(buf, float_strings[OFS_NAN], 3);
buf += 3;
}
else {
@@ -543,19 +563,55 @@
(int)digits_len, digits);
assert(0);
}
+ *buf = '\0';
+ return;
}
- else if (-4 < decpt && decpt <= 17) {
- if (sign == 1) {
- *buf++ = '-';
- } else if (always_add_sign) {
- *buf++ = '+';
- }
- /* use fixed-point notation if 1e-4 <= value < 1e17 */
+
+ /* this replaces the various tests in other places like:
+ if (type == 'f' && fabs(x) >= 1e50)
+ type = 'g';
+ over time, those tests should be deleted
+ */
+ if (decpt > 50 && format_code == 'f')
+ format_code = 'g';
+
+ /* detect if we're using exponents or not */
+ if (format_code == 'e')
+ use_exp = 1;
+ else {
+ int min_decpt = -4;
+ int max_decpt = 17;
+ if (format_code == 'g')
+ max_decpt = 6;
+ if (min_decpt < decpt && decpt <= max_decpt)
+ use_exp = 0;
+ else
+ use_exp = 1;
+ }
+
+ /* we got digits back, format them */
+
+ if (sign == 1) {
+ *buf++ = '-';
+ } else if (always_add_sign) {
+ *buf++ = '+';
+ }
+
+ if (use_exp) {
+ /* exponential notation: d[.dddd]e(+|-)ee;
+ at least 2 digits in exponent */
+ *buf++ = digits[0];
+ *buf++ = '.';
+ strncpy(buf, digits+1, digits_len-1);
+ buf += digits_len-1;
+
+ } else {
+ /* use fixed-point notation */
if (decpt <= 0) {
/* output: 0.00...00dd...dd */
*buf++ = '0';
*buf++ = '.';
- for (i=0; i < -decpt; i++)
+ for (i = 0; i < -decpt; i++)
*buf++ = '0';
strncpy(buf, digits, digits_len);
buf += digits_len;
@@ -572,52 +628,84 @@
/* decpt >= digits_len. output: dd...dd00...00.0 */
strncpy(buf, digits, digits_len);
buf += digits_len;
- for (i=0; i < decpt-digits_len; i++)
- *buf++ = '0';
- if (add_dot_0_if_integer) {
- *buf++ = '.';
+ for (i = 0; i < decpt-digits_len; i++)
*buf++ = '0';
- }
+ *buf++ = '.';
}
}
- else {
- /* exponential notation: d[.dddd]e(+|-)ee;
- at least 2 digits in exponent */
- if (sign == 1) {
- *buf++ = '-';
- } else if (always_add_sign) {
- *buf++ = '+';
- }
- *buf++ = digits[0];
- if (digits_len > 1) {
- *buf++ = '.';
- strncpy(buf, digits+1, digits_len-1);
- buf += digits_len-1;
+
+ /* Add trailing non-significant zeros for non-mode 0 and non-code g */
+ if (mode != 0 && format_code != 'g') {
+ Py_ssize_t nzeros = precision - digits_len;
+
+ /* It should never be the case that nzeros is negative, but
+ check anyway. And while we're at it, skip 0 zeros. */
+ if (nzeros > 0) {
+ for (i = 0; i < nzeros; i++)
+ *buf++ = '0';
}
- *buf++ = 'e';
+ }
+
+ /* See if we want to have the trailing decimal or not */
+ if (format_code == 'g' && buf[-1] == '.')
+ buf--;
+
+ /* Now that we've done zero padding, add an exponent if needed. */
+ if (use_exp) {
+ *buf++ = float_strings[OFS_E][0];
exp_len = sprintf(buf, "%+.02d", decpt-1);
buf += exp_len;
}
+
*buf++ = '\0';
}
+
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
int mode,
char format_code,
int precision,
- int always_add_sign,
- int add_dot_0_if_integer)
+ int flags)
{
- char fmt[32];
char* buf = (char *)PyMem_Malloc(512);
+ char lc_format_code = format_code;
+ char** float_strings = lc_float_strings;
+
+ switch (format_code) {
+ case 'e':
+ case 'f':
+ case 'g':
+ break;
+ case 'E':
+ lc_format_code = 'e';
+ break;
+ case 'F':
+ lc_format_code = 'f';
+ break;
+ case 'G':
+ lc_format_code = 'g';
+ break;
+ default:
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ if (format_code != lc_format_code)
+ float_strings = uc_float_strings;
+
+ /* don't touch precision if we're in mode 0, it should stay 0. if
+ we're not using 'g', add one to the precision because we need to
+ include the digit before the decimal. */
+ if (mode != 0 && lc_format_code != 'g')
+ precision += 1;
-// printf("in PyOS_double_to_string\n");
+// printf("in PyOS_double_to_string %c %c\n", format_code, lc_format_code);
if (!buf)
return NULL;
/* XXX validate format_code */
- format_float_short(buf, 512, val, mode, precision, always_add_sign, add_dot_0_if_integer);
+ format_float_short(buf, 512, val, lc_format_code, mode, precision, flags & Py_DTSF_SIGN, flags & Py_DTSF_ADD_DOT_0, float_strings);
return buf;
}
More information about the Python-checkins
mailing list