[Python-checkins] r71414 - in python/trunk: Lib/test/test_minidom.py Lib/xml/dom/minidom.py Misc/NEWS

r.david.murray python-checkins at python.org
Thu Apr 9 23:54:50 CEST 2009


Author: r.david.murray
Date: Thu Apr  9 23:54:50 2009
New Revision: 71414

Log:
Issue #2170: refactored xml.dom.minidom.normalize, increasing both
its clarity and its speed.


Modified:
   python/trunk/Lib/test/test_minidom.py
   python/trunk/Lib/xml/dom/minidom.py
   python/trunk/Misc/NEWS

Modified: python/trunk/Lib/test/test_minidom.py
==============================================================================
--- python/trunk/Lib/test/test_minidom.py	(original)
+++ python/trunk/Lib/test/test_minidom.py	Thu Apr  9 23:54:50 2009
@@ -790,6 +790,167 @@
                 "testNormalize -- single empty node removed")
         doc.unlink()
 
+    def testNormalizeCombineAndNextSibling(self):
+        doc = parseString("<doc/>")
+        root = doc.documentElement
+        root.appendChild(doc.createTextNode("first"))
+        root.appendChild(doc.createTextNode("second"))
+        root.appendChild(doc.createElement("i"))
+        self.confirm(len(root.childNodes) == 3
+                and root.childNodes.length == 3,
+                "testNormalizeCombineAndNextSibling -- preparation")
+        doc.normalize()
+        self.confirm(len(root.childNodes) == 2
+                and root.childNodes.length == 2
+                and root.firstChild.data == "firstsecond"
+                and root.firstChild is not root.lastChild
+                and root.firstChild.nextSibling is root.lastChild
+                and root.firstChild.previousSibling is None
+                and root.lastChild.previousSibling is root.firstChild
+                and root.lastChild.nextSibling is None
+                , "testNormalizeCombinedAndNextSibling -- result")
+        doc.unlink()
+
+    def testNormalizeDeleteWithPrevSibling(self):
+        doc = parseString("<doc/>")
+        root = doc.documentElement
+        root.appendChild(doc.createTextNode("first"))
+        root.appendChild(doc.createTextNode(""))
+        self.confirm(len(root.childNodes) == 2
+                and root.childNodes.length == 2,
+                "testNormalizeDeleteWithPrevSibling -- preparation")
+        doc.normalize()
+        self.confirm(len(root.childNodes) == 1
+                and root.childNodes.length == 1
+                and root.firstChild.data == "first"
+                and root.firstChild is root.lastChild
+                and root.firstChild.nextSibling is None
+                and root.firstChild.previousSibling is None
+                , "testNormalizeDeleteWithPrevSibling -- result")
+        doc.unlink()
+
+    def testNormalizeDeleteWithNextSibling(self):
+        doc = parseString("<doc/>")
+        root = doc.documentElement
+        root.appendChild(doc.createTextNode(""))
+        root.appendChild(doc.createTextNode("second"))
+        self.confirm(len(root.childNodes) == 2
+                and root.childNodes.length == 2,
+                "testNormalizeDeleteWithNextSibling -- preparation")
+        doc.normalize()
+        self.confirm(len(root.childNodes) == 1
+                and root.childNodes.length == 1
+                and root.firstChild.data == "second"
+                and root.firstChild is root.lastChild
+                and root.firstChild.nextSibling is None
+                and root.firstChild.previousSibling is None
+                , "testNormalizeDeleteWithNextSibling -- result")
+        doc.unlink()
+
+    def testNormalizeDeleteWithTwoNonTextSiblings(self):
+        doc = parseString("<doc/>")
+        root = doc.documentElement
+        root.appendChild(doc.createElement("i"))
+        root.appendChild(doc.createTextNode(""))
+        root.appendChild(doc.createElement("i"))
+        self.confirm(len(root.childNodes) == 3
+                and root.childNodes.length == 3,
+                "testNormalizeDeleteWithTwoSiblings -- preparation")
+        doc.normalize()
+        self.confirm(len(root.childNodes) == 2
+                and root.childNodes.length == 2
+                and root.firstChild is not root.lastChild
+                and root.firstChild.nextSibling is root.lastChild
+                and root.firstChild.previousSibling is None
+                and root.lastChild.previousSibling is root.firstChild
+                and root.lastChild.nextSibling is None
+                , "testNormalizeDeleteWithTwoSiblings -- result")
+        doc.unlink()
+
+    def testNormalizeDeleteAndCombine(self):
+        doc = parseString("<doc/>")
+        root = doc.documentElement
+        root.appendChild(doc.createTextNode(""))
+        root.appendChild(doc.createTextNode("second"))
+        root.appendChild(doc.createTextNode(""))
+        root.appendChild(doc.createTextNode("fourth"))
+        root.appendChild(doc.createTextNode(""))
+        self.confirm(len(root.childNodes) == 5
+                and root.childNodes.length == 5,
+                "testNormalizeDeleteAndCombine -- preparation")
+        doc.normalize()
+        self.confirm(len(root.childNodes) == 1
+                and root.childNodes.length == 1
+                and root.firstChild is root.lastChild
+                and root.firstChild.data == "secondfourth"
+                and root.firstChild.previousSibling is None
+                and root.firstChild.nextSibling is None
+                , "testNormalizeDeleteAndCombine -- result")
+        doc.unlink()
+
+    def testNormalizeRecursion(self):
+        doc = parseString("<doc>"
+                            "<o>"
+                              "<i/>"
+                              "t"
+                              #
+                              #x
+                            "</o>"
+                            "<o>"
+                              "<o>"
+                                "t2"
+                                #x2
+                              "</o>"
+                              "t3"
+                              #x3
+                            "</o>"
+                            #
+                          "</doc>")
+        root = doc.documentElement
+        root.childNodes[0].appendChild(doc.createTextNode(""))
+        root.childNodes[0].appendChild(doc.createTextNode("x"))
+        root.childNodes[1].childNodes[0].appendChild(doc.createTextNode("x2"))
+        root.childNodes[1].appendChild(doc.createTextNode("x3"))
+        root.appendChild(doc.createTextNode(""))
+        self.confirm(len(root.childNodes) == 3
+                and root.childNodes.length == 3
+                and len(root.childNodes[0].childNodes) == 4
+                and root.childNodes[0].childNodes.length == 4
+                and len(root.childNodes[1].childNodes) == 3
+                and root.childNodes[1].childNodes.length == 3
+                and len(root.childNodes[1].childNodes[0].childNodes) == 2
+                and root.childNodes[1].childNodes[0].childNodes.length == 2
+                , "testNormalize2 -- preparation")
+        doc.normalize()
+        self.confirm(len(root.childNodes) == 2
+                and root.childNodes.length == 2
+                and len(root.childNodes[0].childNodes) == 2
+                and root.childNodes[0].childNodes.length == 2
+                and len(root.childNodes[1].childNodes) == 2
+                and root.childNodes[1].childNodes.length == 2
+                and len(root.childNodes[1].childNodes[0].childNodes) == 1
+                and root.childNodes[1].childNodes[0].childNodes.length == 1
+                , "testNormalize2 -- childNodes lengths")
+        self.confirm(root.childNodes[0].childNodes[1].data == "tx"
+                and root.childNodes[1].childNodes[0].childNodes[0].data == "t2x2"
+                and root.childNodes[1].childNodes[1].data == "t3x3"
+                , "testNormalize2 -- joined text fields")
+        self.confirm(root.childNodes[0].childNodes[1].nextSibling is None
+                and root.childNodes[0].childNodes[1].previousSibling
+                        is root.childNodes[0].childNodes[0]
+                and root.childNodes[0].childNodes[0].previousSibling is None
+                and root.childNodes[0].childNodes[0].nextSibling
+                        is root.childNodes[0].childNodes[1]
+                and root.childNodes[1].childNodes[1].nextSibling is None
+                and root.childNodes[1].childNodes[1].previousSibling
+                        is root.childNodes[1].childNodes[0]
+                and root.childNodes[1].childNodes[0].previousSibling is None
+                and root.childNodes[1].childNodes[0].nextSibling
+                        is root.childNodes[1].childNodes[1]
+                , "testNormalize2 -- sibling pointers")
+        doc.unlink()
+
+
     def testBug1433694(self):
         doc = parseString("<o><i/>t</o>")
         node = doc.documentElement

Modified: python/trunk/Lib/xml/dom/minidom.py
==============================================================================
--- python/trunk/Lib/xml/dom/minidom.py	(original)
+++ python/trunk/Lib/xml/dom/minidom.py	Thu Apr  9 23:54:50 2009
@@ -177,34 +177,27 @@
         L = []
         for child in self.childNodes:
             if child.nodeType == Node.TEXT_NODE:
-                data = child.data
-                if data and L and L[-1].nodeType == child.nodeType:
+                if not child.data:
+                    # empty text node; discard
+                    if L:
+                        L[-1].nextSibling = child.nextSibling
+                    if child.nextSibling:
+                        child.nextSibling.previousSibling = child.previousSibling
+                    child.unlink()
+                elif L and L[-1].nodeType == child.nodeType:
                     # collapse text node
                     node = L[-1]
                     node.data = node.data + child.data
                     node.nextSibling = child.nextSibling
+                    if child.nextSibling:
+                        child.nextSibling.previousSibling = node
                     child.unlink()
-                elif data:
-                    if L:
-                        L[-1].nextSibling = child
-                        child.previousSibling = L[-1]
-                    else:
-                        child.previousSibling = None
-                    L.append(child)
                 else:
-                    # empty text node; discard
-                    child.unlink()
+                    L.append(child)
             else:
-                if L:
-                    L[-1].nextSibling = child
-                    child.previousSibling = L[-1]
-                else:
-                    child.previousSibling = None
                 L.append(child)
                 if child.nodeType == Node.ELEMENT_NODE:
                     child.normalize()
-        if L:
-            L[-1].nextSibling = None
         self.childNodes[:] = L
 
     def cloneNode(self, deep):

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Thu Apr  9 23:54:50 2009
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #2170: refactored xml.dom.minidom.normalize, increasing both
+  its clarity and its speed.
+
 - Issue #2396: the memoryview object was backported from Python 3.1.
 
 - Fix a problem in PyErr_NormalizeException that leads to "undetected errors"


More information about the Python-checkins mailing list