[Python-3000-checkins] r63703 - in python/branches/py3k: Doc/library/collections.rst Doc/library/struct.rst Lib/test/test_ioctl.py Lib/test/test_math.py Lib/test/test_minidom.py Lib/xml/dom/minidom.py Modules/mathmodule.c

Mon May 26 19:36:48 CEST 2008

Author: benjamin.peterson
Date: Mon May 26 19:36:47 2008
New Revision: 63703

Log:
Merged revisions 63542-63544,63546,63553,63563-63564,63567,63569,63576 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r63542 | mark.dickinson | 2008-05-22 20:35:30 -0500 (Thu, 22 May 2008) | 5 lines
  
  Issue #2819: Add math.sum, a function that sums a sequence of floats 
  efficiently but with no intermediate loss of precision.  Based on 
  Raymond Hettinger's ASPN recipe.  Thanks Jean Brouwers for the patch.
........
  r63543 | mark.dickinson | 2008-05-22 21:36:48 -0500 (Thu, 22 May 2008) | 2 lines
  
  Add tests for math.sum (Issue #2819)
........
  r63544 | mark.dickinson | 2008-05-22 22:30:01 -0500 (Thu, 22 May 2008) | 2 lines
  
  Better error reporting in test_math.py
........
  r63546 | raymond.hettinger | 2008-05-22 23:32:43 -0500 (Thu, 22 May 2008) | 1 line
  
  Tweak the comments and formatting.
........
  r63553 | mark.dickinson | 2008-05-23 07:07:36 -0500 (Fri, 23 May 2008) | 3 lines
  
  Skip math.sum tests on non IEEE 754 platforms, and on IEEE 754 platforms 
  that exhibit the problem described in issue #2937. 
........
  r63563 | martin.v.loewis | 2008-05-23 10:18:28 -0500 (Fri, 23 May 2008) | 3 lines
  
  Issue #1390: Raise ValueError in toxml when an invalid comment would
  otherwise be produced.
........
  r63564 | raymond.hettinger | 2008-05-23 12:21:44 -0500 (Fri, 23 May 2008) | 1 line
  
  Issue 2909: show how to name unpacked fields.
........
  r63567 | raymond.hettinger | 2008-05-23 12:34:34 -0500 (Fri, 23 May 2008) | 1 line
  
  Fix typo
........
  r63569 | martin.v.loewis | 2008-05-23 14:33:13 -0500 (Fri, 23 May 2008) | 3 lines
  
  Mention that the leaking of variables from list comprehensions
  is fixed in 3.0.
........
  r63576 | martin.v.loewis | 2008-05-24 04:36:45 -0500 (Sat, 24 May 2008) | 3 lines
  
  Don't try to get the window size if it was never set before.
  Fixes the test failure on Solaris.
........


Modified:
   python/branches/py3k/   (props changed)
   python/branches/py3k/Doc/library/collections.rst
   python/branches/py3k/Doc/library/struct.rst
   python/branches/py3k/Lib/test/test_ioctl.py
   python/branches/py3k/Lib/test/test_math.py
   python/branches/py3k/Lib/test/test_minidom.py
   python/branches/py3k/Lib/xml/dom/minidom.py
   python/branches/py3k/Modules/mathmodule.c

Modified: python/branches/py3k/Doc/library/collections.rst
==============================================================================

--- python/branches/py3k/Doc/library/collections.rst	(original)
+++ python/branches/py3k/Doc/library/collections.rst	Mon May 26 19:36:47 2008
@@ -113,7 +113,7 @@
    Since some set operations create new sets, the default mixin methods need
    a way to create new instances from an iterable. The class constructor is
    assumed to have a signature in the form ``ClassName(iterable)``.
-   That assumption is factored-out to a single internal classmethod called
+   That assumption is factored-out to an internal classmethod called
    :meth:`_from_iterable` which calls ``cls(iterable)`` to produce a new set.
    If the :class:`Set` mixin is being used in a class with a different
    constructor signature, you will need to override :meth:`from_iterable`

Modified: python/branches/py3k/Doc/library/struct.rst
==============================================================================
--- python/branches/py3k/Doc/library/struct.rst	(original)
+++ python/branches/py3k/Doc/library/struct.rst	Mon May 26 19:36:47 2008
@@ -216,6 +216,16 @@
 native size and alignment are in effect; standard size and alignment does not
 enforce any alignment.
 
+Unpacked fields can be named by assigning them to variables or by wrapping
+the result in a named tuple::
+
+    >>> record = 'raymond   \x32\x12\x08\x01\x08'
+    >>> name, serialnum, school, gradelevel = unpack('<10sHHb', record)
+
+    >>> from collections import namedtuple
+    >>> Student = namedtuple('Student', 'name serialnum school gradelevel')
+    >>> Student._make(unpack('<10sHHb', s))
+    Student(name='raymond   ', serialnum=4658, school=264, gradelevel=8)
 
 .. seealso::
 

Modified: python/branches/py3k/Lib/test/test_ioctl.py
==============================================================================
--- python/branches/py3k/Lib/test/test_ioctl.py	(original)
+++ python/branches/py3k/Lib/test/test_ioctl.py	Mon May 26 19:36:47 2008
@@ -52,13 +52,10 @@
                 set_winsz_opcode_maybe_neg, = struct.unpack("i",
                         struct.pack("I", termios.TIOCSWINSZ))
 
-            # We're just testing that these calls do not raise exceptions.
-            saved_winsz = fcntl.ioctl(mfd, termios.TIOCGWINSZ, "\0"*8)
             our_winsz = struct.pack("HHHH",80,25,0,0)
             # test both with a positive and potentially negative ioctl code
             new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_pos, our_winsz)
             new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_maybe_neg, our_winsz)
-            fcntl.ioctl(mfd, set_winsz_opcode_maybe_neg, saved_winsz)
         finally:
             os.close(mfd)
             os.close(sfd)

Modified: python/branches/py3k/Lib/test/test_math.py
==============================================================================
--- python/branches/py3k/Lib/test/test_math.py	(original)
+++ python/branches/py3k/Lib/test/test_math.py	Mon May 26 19:36:47 2008
@@ -626,6 +626,158 @@
         self.assertRaises(ValueError, math.sqrt, NINF)
         self.assert_(math.isnan(math.sqrt(NAN)))
 
+    def testSum(self):
+        # math.sum relies on exact rounding for correct operation.
+        # There's a known problem with IA32 floating-point that causes
+        # inexact rounding in some situations, and will cause the
+        # math.sum tests below to fail; see issue #2937.  On non IEEE
+        # 754 platforms, and on IEEE 754 platforms that exhibit the
+        # problem described in issue #2937, we simply skip the whole
+        # test.
+
+        if not float.__getformat__("double").startswith("IEEE"):
+            return
+
+        # on IEEE 754 compliant machines, both of the expressions
+        # below should round to 10000000000000002.0.
+        if 1e16+2.999 != 1e16+2.9999:
+            return
+
+        # Python version of math.sum algorithm, for comparison
+        def msum(iterable):
+            """Full precision sum of values in iterable.  Returns the value of
+            the sum, rounded to the nearest representable floating-point number
+            using the round-half-to-even rule.
+
+            """
+            # Stage 1: accumulate partials
+            partials = []
+            for x in iterable:
+                i = 0
+                for y in partials:
+                    if abs(x) < abs(y):
+                        x, y = y, x
+                    hi = x + y
+                    lo = y - (hi - x)
+                    if lo:
+                        partials[i] = lo
+                        i += 1
+                    x = hi
+                partials[i:] = [x] if x else []
+
+            # Stage 2: sum partials
+            if not partials:
+                return 0.0
+
+            # sum from the top, stopping as soon as the sum is inexact.
+            total = partials.pop()
+            while partials:
+                x = partials.pop()
+                old_total, total = total, total + x
+                error = x - (total - old_total)
+                if error != 0.0:
+                    # adjust for correct rounding if necessary
+                    if partials and (partials[-1] > 0.0) == (error > 0.0) and \
+                            total + 2*error - total == 2*error:
+                        total += 2*error
+                    break
+            return total
+
+        from sys import float_info
+        maxfloat = float_info.max
+        twopow = 2.**(float_info.max_exp - 1)
+
+        test_values = [
+            ([], 0.0),
+            ([0.0], 0.0),
+            ([1e100, 1.0, -1e100, 1e-100, 1e50, -1.0, -1e50], 1e-100),
+            ([1e308, 1e308, -1e308], OverflowError),
+            ([-1e308, 1e308, 1e308], 1e308),
+            ([1e308, -1e308, 1e308], 1e308),
+            ([2.0**1023, 2.0**1023, -2.0**1000], OverflowError),
+            ([twopow, twopow, twopow, twopow, -twopow, -twopow, -twopow],
+             OverflowError),
+            ([2.0**53, -0.5, -2.0**-54], 2.0**53-1.0),
+            ([2.0**53, 1.0, 2.0**-100], 2.0**53+2.0),
+            ([2.0**53+10.0, 1.0, 2.0**-100], 2.0**53+12.0),
+
+            ([2.0**53-4.0, 0.5, 2.0**-54], 2.0**53-3.0),
+            ([2.0**1023-2.0**970, -1.0, 2.0**1023], OverflowError),
+            ([maxfloat, maxfloat*2.**-54], maxfloat),
+            ([maxfloat, maxfloat*2.**-53], OverflowError),
+            ([1./n for n in range(1, 1001)], 7.4854708605503451),
+            ([(-1.)**n/n for n in range(1, 1001)], -0.69264743055982025),
+            ([1.7**(i+1)-1.7**i for i in range(1000)] + [-1.7**1000], -1.0),
+            ([INF, -INF, NAN], ValueError),
+            ([NAN, INF, -INF], ValueError),
+            ([INF, NAN, INF], ValueError),
+
+            ([INF, INF], OverflowError),
+            ([INF, -INF], ValueError),
+            ([-INF, 1e308, 1e308, -INF], OverflowError),
+            ([2.0**1023-2.0**970, 0.0, 2.0**1023], OverflowError),
+            ([2.0**1023-2.0**970, 1.0, 2.0**1023], OverflowError),
+            ([2.0**1023, 2.0**1023], OverflowError),
+            ([2.0**1023, 2.0**1023, -1.0], OverflowError),
+            ([twopow, twopow, twopow, twopow, -twopow, -twopow],
+             OverflowError),
+            ([twopow, twopow, twopow, twopow, -twopow, twopow], OverflowError),
+            ([-twopow, -twopow, -twopow, -twopow], OverflowError),
+
+            ([2.**1023, 2.**1023, -2.**971], OverflowError),
+            ([2.**1023, 2.**1023, -2.**970], OverflowError),
+            ([-2.**970,  2.**1023,  2.**1023, -2.**-1074], OverflowError),
+            ([ 2.**1023, 2.**1023, -2.**970,   2.**-1074], OverflowError),
+            ([-2.**1023,  2.**971, -2.**1023], -maxfloat),
+            ([-2.**1023, -2.**1023, 2.**970],   OverflowError),
+            ([-2.**1023,  -2.**1023,  2.**970,  2.**-1074], OverflowError),
+            ([-2.**-1074, -2.**1023, -2.**1023, 2.**970], OverflowError),
+            ([2.**930, -2.**980, 2.**1023, 2.**1023, twopow, -twopow],
+             OverflowError),
+            ([2.**1023, 2.**1023, -1e307], OverflowError),
+            ([1e16, 1., 1e-16], 10000000000000002.0),
+            ([1e16-2., 1.-2.**53, -(1e16-2.), -(1.-2.**53)], 0.0),
+        ]
+
+        for i, (vals, s) in enumerate(test_values):
+            if isinstance(s, type) and issubclass(s, Exception):
+                try:
+                    m = math.sum(vals)
+                except s:
+                    pass
+                else:
+                    self.fail("test %d failed: got %r, expected %r "
+                              "for math.sum(%.100r)" %
+                              (i, m, s.__name__, vals))
+            else:
+                try:
+                    self.assertEqual(math.sum(vals), s)
+                except OverflowError:
+                    self.fail("test %d failed: got OverflowError, expected %r "
+                              "for math.sum(%.100r)" % (i, s, vals))
+                except ValueError:
+                    self.fail("test %d failed: got ValueError, expected %r "
+                              "for math.sum(%.100r)" % (i, s, vals))
+
+                # compare with output of msum above, but only when
+                # result isn't an IEEE special or an exception
+                if not math.isinf(s) and not math.isnan(s):
+                    self.assertEqual(msum(vals), s)
+
+        from random import random, gauss, shuffle
+        for j in range(1000):
+            vals = [7, 1e100, -7, -1e100, -9e-20, 8e-20] * 10
+            s = 0
+            for i in range(200):
+                v = gauss(0, random()) ** 7 - s
+                s += v
+                vals.append(v)
+            shuffle(vals)
+
+            s = msum(vals)
+            self.assertEqual(msum(vals), math.sum(vals))
+
+
     def testTan(self):
         self.assertRaises(TypeError, math.tan)
         self.ftest('tan(0)', math.tan(0), 0)
@@ -763,6 +915,10 @@
                 message = (("Unexpected ValueError: %s\n        " +
                            "in test %s:%s(%r)\n") % (exc.args[0], id, fn, ar))
                 self.fail(message)
+            except OverflowError:
+                message = ("Unexpected OverflowError in " +
+                           "test %s:%s(%r)\n" % (id, fn, ar))
+                self.fail(message)
             self.ftest("%s:%s(%r)" % (id, fn, ar), result, er)
 
 def test_main():

Modified: python/branches/py3k/Lib/test/test_minidom.py
==============================================================================
--- python/branches/py3k/Lib/test/test_minidom.py	(original)
+++ python/branches/py3k/Lib/test/test_minidom.py	Mon May 26 19:36:47 2008
@@ -1314,6 +1314,11 @@
             for i in range(len(n1.childNodes)):
                 stack.append((n1.childNodes[i], n2.childNodes[i]))
 
+    def testSerializeCommentNodeWithDoubleHyphen(self):
+        doc = create_doc_without_doctype()
+        doc.appendChild(doc.createComment("foo--bar"))
+        self.assertRaises(ValueError, doc.toxml)
+
 def test_main():
     run_unittest(MinidomTest)
 

Modified: python/branches/py3k/Lib/xml/dom/minidom.py
==============================================================================
--- python/branches/py3k/Lib/xml/dom/minidom.py	(original)
+++ python/branches/py3k/Lib/xml/dom/minidom.py	Mon May 26 19:36:47 2008
@@ -1132,6 +1132,8 @@
         self.data = self.nodeValue = data
 
     def writexml(self, writer, indent="", addindent="", newl=""):
+        if "--" in self.data:
+            raise ValueError("'--' is not allowed in a comment node")
         writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
 
 

Modified: python/branches/py3k/Modules/mathmodule.c
==============================================================================
--- python/branches/py3k/Modules/mathmodule.c	(original)
+++ python/branches/py3k/Modules/mathmodule.c	Mon May 26 19:36:47 2008
@@ -362,6 +362,199 @@
 FUNC1(tanh, tanh, 0,
       "tanh(x)\n\nReturn the hyperbolic tangent of x.")
 
+/* Precision summation function as msum() by Raymond Hettinger in
+   <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
+   enhanced with the exact partials sum and roundoff from Mark
+   Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
+   See those links for more details, proofs and other references.
+
+   Note 1: IEEE 754R floating point semantics are assumed,
+   but the current implementation does not re-establish special
+   value semantics across iterations (i.e. handling -Inf + Inf).
+
+   Note 2:  No provision is made for intermediate overflow handling;
+   therefore, sum([1e+308, 1e-308, 1e+308]) returns result 1e+308 while
+   sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the
+   overflow of the first partial sum.
+
+   Note 3: Aggressively optimizing compilers can potentially eliminate the
+   residual values needed for accurate summation. For instance, the statements
+   "hi = x + y; lo = y - (hi - x);" could be mis-transformed to
+   "hi = x + y; lo = 0.0;" which defeats the computation of residuals.
+
+   Note 4: A similar implementation is in Modules/cmathmodule.c.
+   Be sure to update both when making changes.
+
+   Note 5: The signature of math.sum() differs from __builtin__.sum()
+   because the start argument doesn't make sense in the context of
+   accurate summation.  Since the partials table is collapsed before
+   returning a result, sum(seq2, start=sum(seq1)) may not equal the
+   accurate result returned by sum(itertools.chain(seq1, seq2)).
+*/
+
+#define NUM_PARTIALS  32  /* initial partials array size, on stack */
+
+/* Extend the partials array p[] by doubling its size. */
+static int                          /* non-zero on error */
+_sum_realloc(double **p_ptr, Py_ssize_t  n,
+             double  *ps,    Py_ssize_t *m_ptr)
+{
+	void *v = NULL;
+	Py_ssize_t m = *m_ptr;
+
+	m += m;  /* double */
+	if (n < m && m < (PY_SSIZE_T_MAX / sizeof(double))) {
+		double *p = *p_ptr;
+		if (p == ps) {
+			v = PyMem_Malloc(sizeof(double) * m);
+			if (v != NULL)
+				memcpy(v, ps, sizeof(double) * n);
+		}
+		else
+			v = PyMem_Realloc(p, sizeof(double) * m);
+	}
+	if (v == NULL) {        /* size overflow or no memory */
+		PyErr_SetString(PyExc_MemoryError, "math sum partials");
+		return 1;
+	}
+	*p_ptr = (double*) v;
+	*m_ptr = m;
+	return 0;
+}
+
+/* Full precision summation of a sequence of floats.
+
+   def msum(iterable):
+       partials = []  # sorted, non-overlapping partial sums
+       for x in iterable:
+           i = 0
+           for y in partials:
+               if abs(x) < abs(y):
+                   x, y = y, x
+               hi = x + y
+               lo = y - (hi - x)
+               if lo:
+                   partials[i] = lo
+                   i += 1
+               x = hi
+           partials[i:] = [x]
+       return sum_exact(partials)
+
+   Rounded x+y stored in hi with the roundoff stored in lo.  Together hi+lo
+   are exactly equal to x+y.  The inner loop applies hi/lo summation to each
+   partial so that the list of partial sums remains exact.
+
+   Sum_exact() adds the partial sums exactly and correctly rounds the final
+   result (using the round-half-to-even rule).  The items in partials remain
+   non-zero, non-special, non-overlapping and strictly increasing in
+   magnitude, but possibly not all having the same sign.
+
+   Depends on IEEE 754 arithmetic guarantees and half-even rounding.
+*/
+
+static PyObject*
+math_sum(PyObject *self, PyObject *seq)
+{
+	PyObject *item, *iter, *sum = NULL;
+	Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
+	double x, y, hi, lo=0.0, ps[NUM_PARTIALS], *p = ps;
+
+	iter = PyObject_GetIter(seq);
+	if (iter == NULL)
+		return NULL;
+
+	PyFPE_START_PROTECT("sum", Py_DECREF(iter); return NULL)
+
+	for(;;) {           /* for x in iterable */
+		assert(0 <= n && n <= m);
+		assert((m == NUM_PARTIALS && p == ps) ||
+		       (m >  NUM_PARTIALS && p != NULL));
+
+		item = PyIter_Next(iter);
+		if (item == NULL) {
+			if (PyErr_Occurred())
+				goto _sum_error;
+			break;
+		}
+		x = PyFloat_AsDouble(item);
+		Py_DECREF(item);
+		if (PyErr_Occurred())
+			goto _sum_error;
+
+		for (i = j = 0; j < n; j++) {       /* for y in partials */
+			y = p[j];
+			hi = x + y;
+			lo = fabs(x) < fabs(y)
+			   ? x - (hi - y)
+			   : y - (hi - x);
+			if (lo != 0.0)
+				p[i++] = lo;
+			x = hi;
+		}
+		
+		n = i;                              /* ps[i:] = [x] */                   
+		if (x != 0.0) {
+			/* If non-finite, reset partials, effectively
+			   adding subsequent items without roundoff
+			   and yielding correct non-finite results,
+			   provided IEEE 754 rules are observed */
+			if (! Py_IS_FINITE(x))
+				n = 0;
+			else if (n >= m && _sum_realloc(&p, n, ps, &m))
+				goto _sum_error;
+			p[n++] = x;
+		}
+	}
+
+	if (n > 0) {
+		hi = p[--n];
+		if (Py_IS_FINITE(hi)) {
+			/* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */
+			while (n > 0) {
+				x = p[--n];
+				y = hi;
+				hi = x + y;
+				assert(fabs(x) < fabs(y));
+				lo = x - (hi - y);
+				if (lo != 0.0)
+					break;
+			}
+			/* Little dance to allow half-even rounding across multiple partials.
+                           Needed so that sum([1e-16, 1, 1e16]) will round-up to two instead
+                           of down to zero (the 1e16 makes the 1 slightly closer to two). */
+			if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
+			              (lo > 0.0 && p[n-1] > 0.0))) {
+				y = lo * 2.0;
+				x = hi + y;
+				if (y == (x - hi))
+					hi = x;
+			}
+		}
+		else {  /* raise corresponding error */
+			errno = Py_IS_NAN(hi) ? EDOM : ERANGE;
+			if (is_error(hi))
+				goto _sum_error;
+		}
+	}
+	else  /* default */
+		hi = 0.0;
+	sum = PyFloat_FromDouble(hi);
+
+_sum_error:
+	PyFPE_END_PROTECT(hi)
+	Py_DECREF(iter);
+	if (p != ps)
+		PyMem_Free(p);
+	return sum;
+}
+
+#undef NUM_PARTIALS
+
+PyDoc_STRVAR(math_sum_doc,
+"sum(iterable)\n\n\
+Return an accurate floating point sum of values in the iterable.\n\
+Assumes IEEE-754 floating point arithmetic.");
+
 static PyObject *
 math_trunc(PyObject *self, PyObject *number)
 {
@@ -833,6 +1026,7 @@
 	{"sin",		math_sin,	METH_O,		math_sin_doc},
 	{"sinh",	math_sinh,	METH_O,		math_sinh_doc},
 	{"sqrt",	math_sqrt,	METH_O,		math_sqrt_doc},
+	{"sum",		math_sum,	METH_O,		math_sum_doc},
 	{"tan",		math_tan,	METH_O,		math_tan_doc},
 	{"tanh",	math_tanh,	METH_O,		math_tanh_doc},
  	{"trunc",	math_trunc,	METH_O,		math_trunc_doc},