[Python-checkins] r76465 - in python/trunk: Lib/test/string_tests.py Misc/NEWS Objects/stringobject.c Objects/unicodeobject.c

Mon Nov 23 19:46:41 CET 2009

Author: mark.dickinson
Date: Mon Nov 23 19:46:41 2009
New Revision: 76465

Log:
Remove restriction on precision when formatting floats.  This is the
first step towards removing the %f -> %g switch (see issues 7117,
5859).


Modified:
   python/trunk/Lib/test/string_tests.py
   python/trunk/Misc/NEWS
   python/trunk/Objects/stringobject.c
   python/trunk/Objects/unicodeobject.c

Modified: python/trunk/Lib/test/string_tests.py
==============================================================================

--- python/trunk/Lib/test/string_tests.py	(original)
+++ python/trunk/Lib/test/string_tests.py	Mon Nov 23 19:46:41 2009
@@ -1090,14 +1090,7 @@
             value = 0.01
             for x in xrange(60):
                 value = value * 3.141592655 / 3.0 * 10.0
-                # The formatfloat() code in stringobject.c and
-                # unicodeobject.c uses a 120 byte buffer and switches from
-                # 'f' formatting to 'g' at precision 50, so we expect
-                # OverflowErrors for the ranges x < 50 and prec >= 67.
-                if x < 50 and prec >= 67:
-                    self.checkraises(OverflowError, format, "__mod__", value)
-                else:
-                    self.checkcall(format, "__mod__", value)
+                self.checkcall(format, "__mod__", value)
 
     def test_inplace_rewrites(self):
         # Check that strings don't copy and modify cached single-character strings

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Mon Nov 23 19:46:41 2009
@@ -12,6 +12,10 @@
 Core and Builtins
 -----------------
 
+- Remove restrictions on precision when formatting floats.  E.g.,
+  "%.120g" % 1e-100 used to raise OverflowError, but now gives the
+  requested 120 significant digits instead.
+
 - Add Py3k warnings for parameter names in parenthesis.
 
 - Issue #7362: Give a proper error message for def f((x)=3): pass.

Modified: python/trunk/Objects/stringobject.c
==============================================================================
--- python/trunk/Objects/stringobject.c	(original)
+++ python/trunk/Objects/stringobject.c	Mon Nov 23 19:46:41 2009
@@ -4379,72 +4379,36 @@
 #define F_ALT	(1<<3)
 #define F_ZERO	(1<<4)
 
-Py_LOCAL_INLINE(int)
-formatfloat(char *buf, size_t buflen, int flags,
-            int prec, int type, PyObject *v)
+/* Returns a new reference to a PyString object, or NULL on failure. */
+
+static PyObject *
+formatfloat(PyObject *v, int flags, int prec, int type)
 {
-	char *tmp;
+	char *p;
+	PyObject *result;
 	double x;
-	Py_ssize_t len;
 
 	x = PyFloat_AsDouble(v);
 	if (x == -1.0 && PyErr_Occurred()) {
 		PyErr_Format(PyExc_TypeError, "float argument required, "
 			     "not %.200s", Py_TYPE(v)->tp_name);
-		return -1;
+		return NULL;
 	}
+
 	if (prec < 0)
 		prec = 6;
-#if SIZEOF_INT > 4
-	/* make sure that the decimal representation of precision really does
-	   need at most 10 digits: platforms with sizeof(int) == 8 exist! */
-	if (prec > 0x7fffffff) {
-		PyErr_SetString(PyExc_OverflowError,
-				"outrageously large precision "
-				"for formatted float");
-		return -1;
-	}
-#endif
 
 	if (type == 'f' && fabs(x) >= 1e50)
 		type = 'g';
-	/* Worst case length calc to ensure no buffer overrun:
 
-	   'g' formats:
-	     fmt = %#.<prec>g
-	     buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
-	        for any double rep.)
-	     len = 1 + prec + 1 + 2 + 5 = 9 + prec
-
-	   'f' formats:
-	     buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
-	     len = 1 + 50 + 1 + prec = 52 + prec
-
-	   If prec=0 the effective precision is 1 (the leading digit is
-	   always given), therefore increase the length by one.
-
-	*/
-	if (((type == 'g' || type == 'G') &&
-              buflen <= (size_t)10 + (size_t)prec) ||
-	    (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
-		PyErr_SetString(PyExc_OverflowError,
-			"formatted float is too long (precision too large?)");
-		return -1;
-	}
-	tmp = PyOS_double_to_string(x, type, prec,
-				    (flags&F_ALT)?Py_DTSF_ALT:0, NULL);
-	if (!tmp)
-		return -1;
-	len = strlen(tmp);
-	if (len >= buflen) {
-		PyErr_SetString(PyExc_OverflowError,
-			"formatted float is too long (precision too large?)");
-		PyMem_Free(tmp);
-		return -1;
-	}
-	strcpy(buf, tmp);
-	PyMem_Free(tmp);
-	return (int)len;
+	p = PyOS_double_to_string(x, type, prec,
+				  (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
+
+	if (p == NULL)
+		return NULL;
+	result = PyString_FromStringAndSize(p, strlen(p));
+	PyMem_Free(p);
+	return result;
 }
 
 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
@@ -4684,7 +4648,7 @@
 
 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
 
-   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
+   FORMATBUFLEN is the length of the buffer in which the ints &
    chars are formatted. XXX This is a magic number. Each formatting
    routine does bounds checking to ensure no overflow, but a better
    solution may be to malloc a buffer of appropriate size for each
@@ -4754,7 +4718,7 @@
 			int sign;
 			Py_ssize_t len;
 			char formatbuf[FORMATBUFLEN];
-			     /* For format{float,int,char}() */
+			     /* For format{int,char}() */
 #ifdef Py_USING_UNICODE
 			char *fmt_start = fmt;
 			Py_ssize_t argidx_start = argidx;
@@ -5007,11 +4971,11 @@
 			case 'G':
 				if (c == 'F')
 					c = 'f';
-				pbuf = formatbuf;
-				len = formatfloat(pbuf, sizeof(formatbuf),
-						  flags, prec, c, v);
-				if (len < 0)
+				temp = formatfloat(v, flags, prec, c);
+				if (temp == NULL)
 					goto error;
+				pbuf = PyString_AS_STRING(temp);
+				len = PyString_GET_SIZE(temp);
 				sign = 1;
 				if (flags & F_ZERO)
 					fill = '0';

Modified: python/trunk/Objects/unicodeobject.c
==============================================================================
--- python/trunk/Objects/unicodeobject.c	(original)
+++ python/trunk/Objects/unicodeobject.c	Mon Nov 23 19:46:41 2009
@@ -8302,68 +8302,32 @@
    shared with stringobject.c, converting from 8-bit to Unicode after the
    formatting is done. */
 
-static int
-formatfloat(Py_UNICODE *buf,
-            size_t buflen,
-            int flags,
-            int prec,
-            int type,
-            PyObject *v)
+/* Returns a new reference to a PyUnicode object, or NULL on failure. */
+
+static PyObject *
+formatfloat(PyObject *v, int flags, int prec, int type)
 {
+    char *p;
+    PyObject *result;
     double x;
-    Py_ssize_t result;
-    char *tmp;
 
     x = PyFloat_AsDouble(v);
     if (x == -1.0 && PyErr_Occurred())
-        return -1;
+        return NULL;
+
     if (prec < 0)
         prec = 6;
-#if SIZEOF_INT > 4
-    /* make sure that the decimal representation of precision really does
-       need at most 10 digits: platforms with sizeof(int) == 8 exist! */
-    if (prec > 0x7fffffff) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "outrageously large precision "
-                        "for formatted float");
-        return -1;
-    }
-#endif
 
     if (type == 'f' && fabs(x) >= 1e50)
         type = 'g';
-    /* Worst case length calc to ensure no buffer overrun:
 
-       'g' formats:
-       fmt = %#.<prec>g
-       buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
-       for any double rep.)
-       len = 1 + prec + 1 + 2 + 5 = 9 + prec
-
-       'f' formats:
-       buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
-       len = 1 + 50 + 1 + prec = 52 + prec
-
-       If prec=0 the effective precision is 1 (the leading digit is
-       always given), therefore increase the length by one.
-
-    */
-    if (((type == 'g' || type == 'G') &&
-         buflen <= (size_t)10 + (size_t)prec) ||
-        (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "formatted float is too long (precision too large?)");
-        return -1;
-    }
-
-    tmp = PyOS_double_to_string(x, type, prec,
-                                (flags&F_ALT)?Py_DTSF_ALT:0, NULL);
-    if (!tmp)
-        return -1;
-
-    result = strtounicode(buf, tmp);
-    PyMem_Free(tmp);
-    return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
+    p = PyOS_double_to_string(x, type, prec,
+                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
+    if (p == NULL)
+        return NULL;
+    result = PyUnicode_FromStringAndSize(p, strlen(p));
+    PyMem_Free(p);
+    return result;
 }
 
 static PyObject*
@@ -8516,7 +8480,7 @@
 
 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
 
-   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
+   FORMATBUFLEN is the length of the buffer in which the ints &
    chars are formatted. XXX This is a magic number. Each formatting
    routine does bounds checking to ensure no overflow, but a better
    solution may be to malloc a buffer of appropriate size for each
@@ -8587,7 +8551,7 @@
             Py_UNICODE *pbuf;
             Py_UNICODE sign;
             Py_ssize_t len;
-            Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
+            Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{int,char}() */
 
             fmt++;
             if (*fmt == '(') {
@@ -8850,11 +8814,11 @@
             case 'G':
                 if (c == 'F')
                     c = 'f';
-                pbuf = formatbuf;
-                len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
-                                  flags, prec, c, v);
-                if (len < 0)
+                temp = formatfloat(v, flags, prec, c);
+                if (temp == NULL)
                     goto onError;
+                pbuf = PyUnicode_AS_UNICODE(temp);
+                len = PyUnicode_GET_SIZE(temp);
                 sign = 1;
                 if (flags & F_ZERO)
                     fill = '0';