[Python-checkins] r70841 - in python/branches/py3k-short-float-repr: Include/pystrtod.h Lib/test/test_format.py Lib/test/test_types.py Modules/_pickle.c Objects/complexobject.c Objects/floatobject.c Objects/unicodeobject.c Python/marshal.c Python/pystrtod.c

Tue Mar 31 19:06:58 CEST 2009

Author: eric.smith
Date: Tue Mar 31 19:06:57 2009
New Revision: 70841

Log:
Checkpoint so Mark and I can continue working.

Changes:
Hooked up unicodeobject.c so that %-formatting works.
Added trailing zeros.
Added as single 'flags' param to PyOS_decimal_to_string instead of individual flag parameters.
Added upper and lower case format codes g/G e/E f/F. This addresses issue 3382, also.

Many tests still fail, but I'll work on them next.

Modified:
   python/branches/py3k-short-float-repr/Include/pystrtod.h
   python/branches/py3k-short-float-repr/Lib/test/test_format.py
   python/branches/py3k-short-float-repr/Lib/test/test_types.py
   python/branches/py3k-short-float-repr/Modules/_pickle.c
   python/branches/py3k-short-float-repr/Objects/complexobject.c
   python/branches/py3k-short-float-repr/Objects/floatobject.c
   python/branches/py3k-short-float-repr/Objects/unicodeobject.c
   python/branches/py3k-short-float-repr/Python/marshal.c
   python/branches/py3k-short-float-repr/Python/pystrtod.c

Modified: python/branches/py3k-short-float-repr/Include/pystrtod.h
==============================================================================

--- python/branches/py3k-short-float-repr/Include/pystrtod.h	(original)
+++ python/branches/py3k-short-float-repr/Include/pystrtod.h	Tue Mar 31 19:06:57 2009
@@ -13,10 +13,13 @@
                                          int mode,
                                          char format_code,
                                          int precision,
-                                         int sign,
-                                         int add_dot_0_if_integer);
+                                         int flags);
 
 
+#define Py_DTSF_SIGN      0x01 /* always add the sign */
+#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */
+#define Py_DTSF_ALT       0x04 /* "alternate" formatting. it's format_code
+                                  specific */
 
 #ifdef __cplusplus
 }

Modified: python/branches/py3k-short-float-repr/Lib/test/test_format.py
==============================================================================
--- python/branches/py3k-short-float-repr/Lib/test/test_format.py	(original)
+++ python/branches/py3k-short-float-repr/Lib/test/test_format.py	Tue Mar 31 19:06:57 2009
@@ -220,6 +220,11 @@
         testformat("%a", "\u0378", "'\\u0378'")  # non printable
         testformat("%r", "\u0374", "'\u0374'")   # printable
         testformat("%a", "\u0374", "'\\u0374'")  # printable
+
+        # alternate float formatting
+        testformat('%g', 1.1, '1.1')
+        testformat('%#g', 1.1, '1.10000')
+
         # Test exception for unknown format characters
         if verbose:
             print('Testing exceptions')

Modified: python/branches/py3k-short-float-repr/Lib/test/test_types.py
==============================================================================
--- python/branches/py3k-short-float-repr/Lib/test/test_types.py	(original)
+++ python/branches/py3k-short-float-repr/Lib/test/test_types.py	Tue Mar 31 19:06:57 2009
@@ -113,6 +113,9 @@
         self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
         self.assertEqual('%e' % 1.5e-101, '1.500000e-101')
 
+        self.assertEqual('%g' % 1.0, '1')
+        self.assertEqual('%#g' % 1.0, '1.00000')
+
     def test_normal_integers(self):
         # Ensure the first 256 integers are shared
         a = 256

Modified: python/branches/py3k-short-float-repr/Modules/_pickle.c
==============================================================================
--- python/branches/py3k-short-float-repr/Modules/_pickle.c	(original)
+++ python/branches/py3k-short-float-repr/Modules/_pickle.c	Tue Mar 31 19:06:57 2009
@@ -1016,7 +1016,7 @@
             return -1;
         if (pickler_write(self, pdata, 9) < 0)
             return -1;
-    }
+   } 
     else {
         int result = -1;
         char *buf = NULL;
@@ -1025,7 +1025,7 @@
         if (pickler_write(self, &op, 1) < 0)
             goto done;
 
-        buf = PyOS_double_to_string(x, 2, 'g', 17, 0, 0);
+        buf = PyOS_double_to_string(x, 2, 'g', 17, 0);
         if (!buf) {
             PyErr_NoMemory();
             goto done;

Modified: python/branches/py3k-short-float-repr/Objects/complexobject.c
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/complexobject.c	(original)
+++ python/branches/py3k-short-float-repr/Objects/complexobject.c	Tue Mar 31 19:06:57 2009
@@ -368,8 +368,7 @@
                 im = "-inf*";
         }
         else {
-            pim = PyOS_double_to_string(v->cval.imag, mode, 'g', precision,
-                                        0, 0);
+            pim = PyOS_double_to_string(v->cval.imag, mode, 'g', precision, 0);
             if (!pim) {
                 PyErr_NoMemory();
                 goto done;
@@ -388,8 +387,7 @@
                 re = "-inf";
         }
         else {
-            pre = PyOS_double_to_string(v->cval.real, mode, 'g', precision,
-                                        0, 0);
+            pre = PyOS_double_to_string(v->cval.real, mode, 'g', precision, 0);
             if (!pre) {
                 PyErr_NoMemory();
                 goto done;
@@ -408,7 +406,7 @@
         }
         else {
             pim = PyOS_double_to_string(v->cval.imag, mode, 'g', precision,
-                                        1, 0);
+                                        Py_DTSF_SIGN);
             if (!pim) {
                 PyErr_NoMemory();
                 goto done;

Modified: python/branches/py3k-short-float-repr/Objects/floatobject.c
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/floatobject.c	(original)
+++ python/branches/py3k-short-float-repr/Objects/floatobject.c	Tue Mar 31 19:06:57 2009
@@ -369,7 +369,7 @@
 {
     PyObject *result;
     char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
-                                      mode, 'g', precision, 0, 1);
+                                      mode, 'g', precision, Py_DTSF_ADD_DOT_0);
     if (!buf)
         return PyErr_NoMemory();
     result = PyUnicode_FromString(buf);
@@ -1917,7 +1917,7 @@
 				    Py_REFCNT(p) != 0) {
 					char *buf = PyOS_double_to_string(
 						PyFloat_AS_DOUBLE(p), 0, 'g',
-						0, 0, 1);
+						0, Py_DTSF_ADD_DOT_0);
 					if (buf) {
 						/* XXX(twouters) cast
 						   refcount to long

Modified: python/branches/py3k-short-float-repr/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/unicodeobject.c	(original)
+++ python/branches/py3k-short-float-repr/Objects/unicodeobject.c	Tue Mar 31 19:06:57 2009
@@ -8792,43 +8792,14 @@
     return NULL;
 }
 
-static Py_ssize_t
-strtounicode(Py_UNICODE *buffer, const char *charbuffer)
+static void
+strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
 {
     register Py_ssize_t i;
-    Py_ssize_t len = strlen(charbuffer);
     for (i = len - 1; i >= 0; i--)
         buffer[i] = (Py_UNICODE) charbuffer[i];
-
-    return len;
-}
-
-static int
-doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x)
-{
-    Py_ssize_t result;
-
-    PyOS_ascii_formatd((char *)buffer, len, format, x);
-    result = strtounicode(buffer, (char *)buffer);
-    return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
 }
 
-#if 0
-static int
-longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x)
-{
-    Py_ssize_t result;
-
-    PyOS_snprintf((char *)buffer, len, format, x);
-    result = strtounicode(buffer, (char *)buffer);
-    return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
-}
-#endif
-
-/* XXX To save some code duplication, formatfloat/long/int could have been
-   shared with stringobject.c, converting from 8-bit to Unicode after the
-   formatting is done. */
-
 static int
 formatfloat(Py_UNICODE *buf,
             size_t buflen,
@@ -8837,54 +8808,56 @@
             int type,
             PyObject *v)
 {
-    /* fmt = '%#.' + `prec` + `type`
-       worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
-    char fmt[20];
+    /* eric.smith: To minimize disturbances in PyUnicode_Format (the
+       only caller of this routine), I'm going to keep the existing
+       API to this function. That means that we'll allocate memory and
+       then copy back into the supplied buffer. But that's better than
+       all of the changes that would be required in PyUnicode_Format
+       because it does lots of memory management tricks. */
+
+    char* p = NULL;
+    int result = -1;
     double x;
+    Py_ssize_t len;
 
     x = PyFloat_AsDouble(v);
     if (x == -1.0 && PyErr_Occurred())
-        return -1;
+        goto done;
     if (prec < 0)
         prec = 6;
+
     /* make sure that the decimal representation of precision really does
        need at most 10 digits: platforms with sizeof(int) == 8 exist! */
     if (prec > 0x7fffffffL) {
         PyErr_SetString(PyExc_OverflowError,
                         "outrageously large precision "
                         "for formatted float");
-        return -1;
+        goto done;
     }
 
-    if (type == 'f' && fabs(x) >= 1e50)
-        type = 'g';
-    /* Worst case length calc to ensure no buffer overrun:
-
-       'g' formats:
-       fmt = %#.<prec>g
-       buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
-       for any double rep.)
-       len = 1 + prec + 1 + 2 + 5 = 9 + prec
-
-       'f' formats:
-       buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
-       len = 1 + 50 + 1 + prec = 52 + prec
-
-       If prec=0 the effective precision is 1 (the leading digit is
-       always given), therefore increase the length by one.
-
-    */
     if (((type == 'g' || type == 'G') &&
          buflen <= (size_t)10 + (size_t)prec) ||
-        (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
+        ((type == 'f' || type == 'F') &&
+         buflen <= (size_t)53 + (size_t)prec)) {
         PyErr_SetString(PyExc_OverflowError,
                         "formatted float is too long (precision too large?)");
-        return -1;
+        goto done;
     }
-    PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
-                  (flags&F_ALT) ? "#" : "",
-                  prec, type);
-    return doubletounicode(buf, buflen, fmt, x);
+
+    p = PyOS_double_to_string(x, 2, type, prec,
+                              (flags & F_ALT) ? Py_DTSF_ALT : 0);
+    len = strlen(p);
+    if (len+1 >= buflen) {
+        /* Caller supplied buffer is not large enough. */
+        PyErr_NoMemory();
+        goto done;
+    }
+    strtounicode(buf, p, len);
+    result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
+
+done:
+    PyMem_Free(p);
+    return result;
 }
 
 static PyObject*
@@ -8903,84 +8876,6 @@
     return result;
 }
 
-#if 0
-static int
-formatint(Py_UNICODE *buf,
-          size_t buflen,
-          int flags,
-          int prec,
-          int type,
-          PyObject *v)
-{
-    /* fmt = '%#.' + `prec` + 'l' + `type`
-     * worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
-     *                     + 1 + 1
-     *                   = 24
-     */
-    char fmt[64]; /* plenty big enough! */
-    char *sign;
-    long x;
-
-    x = PyLong_AsLong(v);
-    if (x == -1 && PyErr_Occurred())
-        return -1;
-    if (x < 0 && type == 'u') {
-        type = 'd';
-    }
-    if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
-        sign = "-";
-    else
-        sign = "";
-    if (prec < 0)
-        prec = 1;
-
-    /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
-     * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
-     */
-    if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "formatted integer is too long (precision too large?)");
-        return -1;
-    }
-
-    if ((flags & F_ALT) &&
-        (type == 'x' || type == 'X' || type == 'o')) {
-        /* When converting under %#o, %#x or %#X, there are a number
-         * of issues that cause pain:
-         * - for %#o, we want a different base marker than C
-         * - when 0 is being converted, the C standard leaves off
-         *   the '0x' or '0X', which is inconsistent with other
-         *   %#x/%#X conversions and inconsistent with Python's
-         *   hex() function
-         * - there are platforms that violate the standard and
-         *   convert 0 with the '0x' or '0X'
-         *   (Metrowerks, Compaq Tru64)
-         * - there are platforms that give '0x' when converting
-         *   under %#X, but convert 0 in accordance with the
-         *   standard (OS/2 EMX)
-         *
-         * We can achieve the desired consistency by inserting our
-         * own '0x' or '0X' prefix, and substituting %x/%X in place
-         * of %#x/%#X.
-         *
-         * Note that this is the same approach as used in
-         * formatint() in stringobject.c
-         */
-        PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
-                      sign, type, prec, type);
-    }
-    else {
-        PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
-                      sign, (flags&F_ALT) ? "#" : "",
-                      prec, type);
-    }
-    if (sign[0])
-        return longtounicode(buf, buflen, fmt, -x);
-    else
-        return longtounicode(buf, buflen, fmt, x);
-}
-#endif
-
 static int
 formatchar(Py_UNICODE *buf,
            size_t buflen,
@@ -9359,8 +9254,6 @@
             case 'F':
             case 'g':
             case 'G':
-                if (c == 'F')
-                    c = 'f';
                 pbuf = formatbuf;
                 len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
                                   flags, prec, c, v);

Modified: python/branches/py3k-short-float-repr/Python/marshal.c
==============================================================================
--- python/branches/py3k-short-float-repr/Python/marshal.c	(original)
+++ python/branches/py3k-short-float-repr/Python/marshal.c	Tue Mar 31 19:06:57 2009
@@ -237,7 +237,7 @@
 		}
 		else {
 			char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
-				0, 'g', 0, 0, 1);
+				0, 'g', 0, Py_DTSF_ADD_DOT_0);
 			if (!buf)
                             return;
 			n = strlen(buf);
@@ -269,7 +269,7 @@
 			char *buf;
 			w_byte(TYPE_COMPLEX, p);
 			buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
-				0, 'g', 0, 0, 1);
+				0, 'g', 0, Py_DTSF_ADD_DOT_0);
 			if (!buf)
                             return;
 			n = strlen(buf);
@@ -277,7 +277,7 @@
 			w_string(buf, (int)n, p);
 			PyMem_Free(buf);
 			buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
-				0, 'g', 0, 0, 1);
+				0, 'g', 0, Py_DTSF_ADD_DOT_0);
 			if (!buf)
                             return;
 			n = strlen(buf);

Modified: python/branches/py3k-short-float-repr/Python/pystrtod.c
==============================================================================
--- python/branches/py3k-short-float-repr/Python/pystrtod.c	(original)
+++ python/branches/py3k-short-float-repr/Python/pystrtod.c	Tue Mar 31 19:06:57 2009
@@ -500,16 +500,36 @@
 }
 
 
+/* I'm using a lookup table here so that I don't have to invent a non-locale
+   specific way to convert to uppercase */
+#define OFS_INF 0
+#define OFS_NAN 1
+#define OFS_E 2
+
+/* The lengths of these are known to the code below, so don't change them */
+static char *lc_float_strings[] = {
+	"inf",
+	"nan",
+	"e",
+};
+static char *uc_float_strings[] = {
+	"INF",
+	"NAN",
+	"E",
+};
+
+
 /* convert a Python float to a minimal string that evaluates back to that
    float.  The output is minimal in the sense of having the least possible
    number of significant digits. */
 
 static void
-format_float_short(char *buf, size_t buflen, double d, int mode, int precision, int always_add_sign, int add_dot_0_if_integer)
+format_float_short(char *buf, size_t buflen, double d, char format_code, int mode, int precision, int always_add_sign, int add_dot_0_if_integer, char **float_strings)
 {
 	char *digits, *digits_end;
 	int decpt, sign, exp_len;
-	size_t digits_len, i;
+	Py_ssize_t digits_len, i;
+	int use_exp;
 
 	/* _Py_dg_dtoa returns a digit string (no decimal point
 	   or exponent) */
@@ -528,11 +548,11 @@
 			else if (always_add_sign) {
 				*buf++ = '+';
 			}
-			strncpy(buf, "inf", 3);
+			strncpy(buf, float_strings[OFS_INF], 3);
 			buf += 3;
 		}
 		else if (digits[0] == 'n' || digits[0] == 'N') {
-			strncpy(buf, "nan", 3);
+			strncpy(buf, float_strings[OFS_NAN], 3);
 			buf += 3;
 		}
 		else {
@@ -543,19 +563,55 @@
 			       (int)digits_len, digits);
 			assert(0);
 		}
+		*buf = '\0';
+		return;
 	}
-	else if (-4 < decpt && decpt <= 17) {
-		if (sign == 1) {
-			*buf++ = '-';
-		} else if (always_add_sign) {
-			*buf++ = '+';
-		}
-		/* use fixed-point notation if 1e-4 <= value < 1e17 */
+
+	/* this replaces the various tests in other places like:
+	    if (type == 'f' && fabs(x) >= 1e50)
+		type = 'g';
+	   over time, those tests should be deleted
+	*/
+	if (decpt > 50 && format_code == 'f')
+		format_code = 'g';
+
+	/* detect if we're using exponents or not */
+	if (format_code == 'e')
+		use_exp = 1;
+	else {
+		int min_decpt = -4;
+		int max_decpt = 17;
+		if (format_code == 'g')
+			max_decpt = 6;
+		if (min_decpt < decpt && decpt <= max_decpt)
+			use_exp = 0;
+		else
+			use_exp = 1;
+	}
+
+	/* we got digits back, format them */
+
+	if (sign == 1) {
+		*buf++ = '-';
+	} else if (always_add_sign) {
+		*buf++ = '+';
+	}
+
+	if (use_exp) {
+		/* exponential notation: d[.dddd]e(+|-)ee;
+		   at least 2 digits in exponent */
+		*buf++ = digits[0];
+		*buf++ = '.';
+		strncpy(buf, digits+1, digits_len-1);
+		buf += digits_len-1;
+
+	} else {
+		/* use fixed-point notation */
 		if (decpt <= 0) {
 			/* output: 0.00...00dd...dd */
 			*buf++ = '0';
 			*buf++ = '.';
-			for (i=0; i < -decpt; i++)
+			for (i = 0; i < -decpt; i++)
 				*buf++ = '0';
 			strncpy(buf, digits, digits_len);
 			buf += digits_len;
@@ -572,52 +628,84 @@
 			/* decpt >= digits_len.  output: dd...dd00...00.0 */
 			strncpy(buf, digits, digits_len);
 			buf += digits_len;
-			for (i=0; i < decpt-digits_len; i++)
-				*buf++ = '0';
-			if (add_dot_0_if_integer) {
-				*buf++ = '.';
+			for (i = 0; i < decpt-digits_len; i++)
 				*buf++ = '0';
-			}
+			*buf++ = '.';
 		}
 	}
-	else {
-		/* exponential notation: d[.dddd]e(+|-)ee;
-		   at least 2 digits in exponent */
-		if (sign == 1) {
-			*buf++ = '-';
-		} else if (always_add_sign) {
-			*buf++ = '+';
-		}
-		*buf++ = digits[0];
-		if (digits_len > 1) {
-			*buf++ = '.';
-			strncpy(buf, digits+1, digits_len-1);
-			buf += digits_len-1;
+
+	/* Add trailing non-significant zeros for non-mode 0 and non-code g */
+	if (mode != 0 && format_code != 'g') {
+		Py_ssize_t nzeros = precision - digits_len;
+
+		/* It should never be the case that nzeros is negative, but
+		   check anyway. And while we're at it, skip 0 zeros. */
+		if (nzeros > 0) {
+			for (i = 0; i < nzeros; i++)
+				*buf++ = '0';
 		}
-		*buf++ = 'e';
+	}
+
+	/* See if we want to have the trailing decimal or not */
+	if (format_code == 'g' && buf[-1] == '.')
+		buf--;
+
+	/* Now that we've done zero padding, add an exponent if needed. */
+	if (use_exp) {
+		*buf++ = float_strings[OFS_E][0];
 		exp_len = sprintf(buf, "%+.02d", decpt-1);
 		buf += exp_len;
 	}
+
 	*buf++ = '\0';
 }
 
+
 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                                          int mode,
                                          char format_code,
                                          int precision,
-                                         int always_add_sign,
-                                         int add_dot_0_if_integer)
+                                         int flags)
 {
-	char fmt[32];
 	char* buf = (char *)PyMem_Malloc(512);
+	char lc_format_code = format_code;
+	char** float_strings = lc_float_strings;
+
+	switch (format_code) {
+	case 'e':
+	case 'f':
+	case 'g':
+		break;
+	case 'E':
+		lc_format_code = 'e';
+		break;
+	case 'F':
+		lc_format_code = 'f';
+		break;
+	case 'G':
+		lc_format_code = 'g';
+		break;
+	default:
+		PyErr_BadInternalCall();
+		return NULL;
+	}
+
+	if (format_code != lc_format_code)
+		float_strings = uc_float_strings;
+
+	/* don't touch precision if we're in mode 0, it should stay 0. if
+	   we're not using 'g', add one to the precision because we need to
+	   include the digit before the decimal. */
+	if (mode != 0 && lc_format_code != 'g')
+		precision += 1;
 
-//	printf("in PyOS_double_to_string\n");
+//	printf("in PyOS_double_to_string %c %c\n", format_code, lc_format_code);
 	if (!buf)
 		return NULL;
 
 	/* XXX validate format_code */
 
-	format_float_short(buf, 512, val, mode, precision, always_add_sign, add_dot_0_if_integer);
+	format_float_short(buf, 512, val, lc_format_code, mode, precision, flags & Py_DTSF_SIGN, flags & Py_DTSF_ADD_DOT_0, float_strings);
 
 	return buf;
 }