[Jython-checkins] jython: Updated XML stdlib support to latest from CPython 2.7. Fixes #2124

jim.baker jython-checkins at python.org
Wed Feb 3 14:58:57 EST 2016


https://hg.python.org/jython/rev/ec1b1e701776
changeset:   7893:ec1b1e701776
user:        Jim Baker <jim.baker at rackspace.com>
date:        Wed Feb 03 12:58:49 2016 -0700
summary:
  Updated XML stdlib support to latest from CPython 2.7. Fixes #2124

In addition, Jython does not use the Xerxes API (xml-apis-2.11.0.jar)
in its build, so removed. We will revisit shading with the change to
Gradle in 2.7.2 (#2182).

files:
  CPythonLib.includes                        |    1 +
  Lib/xml/dom/MessageSource.py               |   54 -
  Lib/xml/dom/NodeFilter.py                  |   27 -
  Lib/xml/dom/__init__.py                    |  232 -----
  Lib/xml/dom/domreg.py                      |   99 --
  Lib/xml/dom/minicompat.py                  |  184 ----
  Lib/xml/dom/minidom.py                     |  192 ++--
  Lib/xml/dom/xmlbuilder.py                  |  388 ----------
  Lib/xml/sax/drivers2/drv_javasax.py        |   20 +
  extlibs/xml-apis-2.11.0.jar                |  Bin 
  lib-python/2.7/test/test_minidom.py        |  131 +--
  lib-python/2.7/test/test_xml_etree.py      |   27 +-
  lib-python/2.7/test/test_xml_etree_c.py    |   32 +
  lib-python/2.7/test/test_xmlrpc.py         |  123 ++-
  lib-python/2.7/xml/dom/minicompat.py       |    6 +-
  lib-python/2.7/xml/dom/minidom.py          |    3 -
  lib-python/2.7/xml/etree/ElementInclude.py |   15 +-
  lib-python/2.7/xml/etree/ElementTree.py    |   89 +-
  lib-python/2.7/xml/sax/expatreader.py      |   25 +-
  lib-python/2.7/xml/sax/saxutils.py         |   21 +-
  20 files changed, 407 insertions(+), 1262 deletions(-)


diff --git a/CPythonLib.includes b/CPythonLib.includes
--- a/CPythonLib.includes
+++ b/CPythonLib.includes
@@ -14,6 +14,7 @@
 pydoc_data/**
 test/**
 unittest/**
+xml/dom/**
 xml/etree/**
 
 # Lib files, in alphabetical order:
diff --git a/Lib/xml/dom/MessageSource.py b/Lib/xml/dom/MessageSource.py
deleted file mode 100644
--- a/Lib/xml/dom/MessageSource.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# DOMException
-from xml.dom import INDEX_SIZE_ERR, DOMSTRING_SIZE_ERR , HIERARCHY_REQUEST_ERR
-from xml.dom import WRONG_DOCUMENT_ERR, INVALID_CHARACTER_ERR, NO_DATA_ALLOWED_ERR
-from xml.dom import NO_MODIFICATION_ALLOWED_ERR, NOT_FOUND_ERR, NOT_SUPPORTED_ERR
-from xml.dom import INUSE_ATTRIBUTE_ERR, INVALID_STATE_ERR, SYNTAX_ERR
-from xml.dom import INVALID_MODIFICATION_ERR, NAMESPACE_ERR, INVALID_ACCESS_ERR
-from xml.dom import VALIDATION_ERR
-
-# EventException
-from xml.dom import UNSPECIFIED_EVENT_TYPE_ERR
-
-#Range Exceptions
-from xml.dom import BAD_BOUNDARYPOINTS_ERR
-from xml.dom import INVALID_NODE_TYPE_ERR
-
-# Fourthought Exceptions
-from xml.dom import XML_PARSE_ERR
-
-from xml.FtCore import get_translator
-
-_ = get_translator("dom")
-
-
-DOMExceptionStrings = {
-    INDEX_SIZE_ERR: _("Index error accessing NodeList or NamedNodeMap"),
-    DOMSTRING_SIZE_ERR: _("DOMString exceeds maximum size"),
-    HIERARCHY_REQUEST_ERR: _("Node manipulation results in invalid parent/child relationship."),
-    WRONG_DOCUMENT_ERR: _("Node is from a different document"),
-    INVALID_CHARACTER_ERR: _("Invalid or illegal character"),
-    NO_DATA_ALLOWED_ERR: _("Node does not support data"),
-    NO_MODIFICATION_ALLOWED_ERR: _("Attempt to modify a read-only object"),
-    NOT_FOUND_ERR: _("Node does not exist in this context"),
-    NOT_SUPPORTED_ERR: _("Object or operation not supported"),
-    INUSE_ATTRIBUTE_ERR: _("Attribute already in use by an element"),
-    INVALID_STATE_ERR: _("Object is not, or is no longer, usable"),
-    SYNTAX_ERR: _("Specified string is invalid or illegal"),
-    INVALID_MODIFICATION_ERR: _("Attempt to modify the type of a node"),
-    NAMESPACE_ERR: _("Invalid or illegal namespace operation"),
-    INVALID_ACCESS_ERR: _("Object does not support this operation or parameter"),
-    VALIDATION_ERR: _("Operation would invalidate partial validity constraint"),
-    }
-
-EventExceptionStrings = {
-    UNSPECIFIED_EVENT_TYPE_ERR : _("Uninitialized type in Event object"),
-    }
-
-FtExceptionStrings = {
-    XML_PARSE_ERR : _("XML parse error at line %d, column %d: %s"),
-    }
-
-RangeExceptionStrings = {
-    BAD_BOUNDARYPOINTS_ERR : _("Invalid Boundary Points specified for Range"),
-    INVALID_NODE_TYPE_ERR : _("Invalid Container Node")
-    }
diff --git a/Lib/xml/dom/NodeFilter.py b/Lib/xml/dom/NodeFilter.py
deleted file mode 100644
--- a/Lib/xml/dom/NodeFilter.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# This is the Python mapping for interface NodeFilter from
-# DOM2-Traversal-Range. It contains only constants.
-
-class NodeFilter:
-    """
-    This is the DOM2 NodeFilter interface. It contains only constants.
-    """
-    FILTER_ACCEPT = 1
-    FILTER_REJECT = 2
-    FILTER_SKIP   = 3
-
-    SHOW_ALL                    = 0xFFFFFFFFL
-    SHOW_ELEMENT                = 0x00000001
-    SHOW_ATTRIBUTE              = 0x00000002
-    SHOW_TEXT                   = 0x00000004
-    SHOW_CDATA_SECTION          = 0x00000008
-    SHOW_ENTITY_REFERENCE       = 0x00000010
-    SHOW_ENTITY                 = 0x00000020
-    SHOW_PROCESSING_INSTRUCTION = 0x00000040
-    SHOW_COMMENT                = 0x00000080
-    SHOW_DOCUMENT               = 0x00000100
-    SHOW_DOCUMENT_TYPE          = 0x00000200
-    SHOW_DOCUMENT_FRAGMENT      = 0x00000400
-    SHOW_NOTATION               = 0x00000800
-
-    def acceptNode(self, node):
-        raise NotImplementedError
diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py
deleted file mode 100644
--- a/Lib/xml/dom/__init__.py
+++ /dev/null
@@ -1,232 +0,0 @@
-########################################################################
-#
-# File Name:            __init__.py
-#
-#
-"""
-WWW: http://4suite.org/4DOM         e-mail: support at 4suite.org
-
-Copyright (c) 2000 Fourthought Inc, USA.   All Rights Reserved.
-See  http://4suite.org/COPYRIGHT  for license and copyright information
-"""
-
-
-class Node:
-    """Class giving the nodeType and tree-position constants."""
-
-    # DOM implementations may use this as a base class for their own
-    # Node implementations.  If they don't, the constants defined here
-    # should still be used as the canonical definitions as they match
-    # the values given in the W3C recommendation.  Client code can
-    # safely refer to these values in all tests of Node.nodeType
-    # values.
-
-    ELEMENT_NODE                = 1
-    ATTRIBUTE_NODE              = 2
-    TEXT_NODE                   = 3
-    CDATA_SECTION_NODE          = 4
-    ENTITY_REFERENCE_NODE       = 5
-    ENTITY_NODE                 = 6
-    PROCESSING_INSTRUCTION_NODE = 7
-    COMMENT_NODE                = 8
-    DOCUMENT_NODE               = 9
-    DOCUMENT_TYPE_NODE          = 10
-    DOCUMENT_FRAGMENT_NODE      = 11
-    NOTATION_NODE               = 12
-
-    # Based on DOM Level 3 (WD 9 April 2002)
-
-    TREE_POSITION_PRECEDING    = 0x01
-    TREE_POSITION_FOLLOWING    = 0x02
-    TREE_POSITION_ANCESTOR     = 0x04
-    TREE_POSITION_DESCENDENT   = 0x08
-    TREE_POSITION_EQUIVALENT   = 0x10
-    TREE_POSITION_SAME_NODE    = 0x20
-    TREE_POSITION_DISCONNECTED = 0x00
-
-class UserDataHandler:
-    """Class giving the operation constants for UserDataHandler.handle()."""
-
-    # Based on DOM Level 3 (WD 9 April 2002)
-
-    NODE_CLONED   = 1
-    NODE_IMPORTED = 2
-    NODE_DELETED  = 3
-    NODE_RENAMED  = 4
-
-class DOMError:
-    """Class giving constants for error severity."""
-
-    # Based on DOM Level 3 (WD 9 April 2002)
-
-    SEVERITY_WARNING     = 0
-    SEVERITY_ERROR       = 1
-    SEVERITY_FATAL_ERROR = 2
-
-
-# DOMException codes
-INDEX_SIZE_ERR                 = 1
-DOMSTRING_SIZE_ERR             = 2
-HIERARCHY_REQUEST_ERR          = 3
-WRONG_DOCUMENT_ERR             = 4
-INVALID_CHARACTER_ERR          = 5
-NO_DATA_ALLOWED_ERR            = 6
-NO_MODIFICATION_ALLOWED_ERR    = 7
-NOT_FOUND_ERR                  = 8
-NOT_SUPPORTED_ERR              = 9
-INUSE_ATTRIBUTE_ERR            = 10
-# DOM Level 2
-INVALID_STATE_ERR              = 11
-SYNTAX_ERR                     = 12
-INVALID_MODIFICATION_ERR       = 13
-NAMESPACE_ERR                  = 14
-INVALID_ACCESS_ERR             = 15
-# DOM Level 3
-VALIDATION_ERR                 = 16
-
-# EventException codes
-UNSPECIFIED_EVENT_TYPE_ERR     = 0
-
-# Fourthought specific codes
-FT_EXCEPTION_BASE = 1000
-XML_PARSE_ERR = FT_EXCEPTION_BASE + 1
-
-#RangeException codes
-BAD_BOUNDARYPOINTS_ERR = 1
-INVALID_NODE_TYPE_ERR = 2
-
-
-class DOMException(Exception):
-    def __init__(self, code, msg=''):
-        self.code = code
-        self.msg = msg or DOMExceptionStrings[code]
-
-    def __str__(self):
-        return self.msg
-
-class EventException(Exception):
-    def __init__(self, code, msg=''):
-        self.code = code
-        self.msg = msg or EventExceptionStrings[code]
-        return
-
-    def __str__(self):
-        return self.msg
-
-class RangeException(Exception):
-    def __init__(self, code, msg):
-        self.code = code
-        self.msg = msg or RangeExceptionStrings[code]
-        Exception.__init__(self, self.msg)
-
-class FtException(Exception):
-    def __init__(self, code, *args):
-        self.code = code
-        self.msg = FtExceptionStrings[code] % args
-        return
-
-    def __str__(self):
-        return self.msg
-
-class IndexSizeErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, INDEX_SIZE_ERR, msg)
-
-class DomstringSizeErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, DOMSTRING_SIZE_ERR, msg)
-
-# DOMStringSizeErr was accidentally introduced in rev 1.14 of this
-# file, and was released as part of PyXML 0.6.4, 0.6.5, 0.6.6, 0.7,
-# and 0.7.1.  It has never been part of the Python DOM API, although
-# it better matches the W3C recommendation.  It should remain for
-# compatibility, unfortunately.
-#
-DOMStringSizeErr = DomstringSizeErr
-
-class HierarchyRequestErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, HIERARCHY_REQUEST_ERR, msg)
-
-class WrongDocumentErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, WRONG_DOCUMENT_ERR, msg)
-
-class InvalidCharacterErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, INVALID_CHARACTER_ERR, msg)
-
-class NoDataAllowedErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, NO_DATA_ALLOWED_ERR, msg)
-
-class NoModificationAllowedErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, NO_MODIFICATION_ALLOWED_ERR, msg)
-
-class NotFoundErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, NOT_FOUND_ERR, msg)
-
-class NotSupportedErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, NOT_SUPPORTED_ERR, msg)
-
-class InuseAttributeErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, INUSE_ATTRIBUTE_ERR, msg)
-
-class InvalidStateErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, INVALID_STATE_ERR, msg)
-
-class SyntaxErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, SYNTAX_ERR, msg)
-
-class InvalidModificationErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, INVALID_MODIFICATION_ERR, msg)
-
-class NamespaceErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, NAMESPACE_ERR, msg)
-
-class InvalidAccessErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, INVALID_ACCESS_ERR, msg)
-
-class ValidationErr(DOMException):
-    def __init__(self, msg=''):
-        DOMException.__init__(self, VALIDATION_ERR, msg)
-
-class UnspecifiedEventTypeErr(EventException):
-    def __init__(self, msg=''):
-        EventException.__init__(self, UNSPECIFIED_EVENT_TYPE_ERR, msg)
-
-class XmlParseErr(FtException):
-    def __init__(self, msg=''):
-        FtException.__init__(self, XML_PARSE_ERR, msg)
-
-#Specific Range Exceptions
-class BadBoundaryPointsErr(RangeException):
-    def __init__(self, msg=''):
-        RangeException.__init__(self, BAD_BOUNDARYPOINTS_ERR, msg)
-
-class InvalidNodeTypeErr(RangeException):
-    def __init__(self, msg=''):
-        RangeException.__init__(self, INVALID_NODE_TYPE_ERR, msg)
-
-XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
-XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
-XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
-EMPTY_NAMESPACE = None
-EMPTY_PREFIX = None
-
-import MessageSource
-DOMExceptionStrings = MessageSource.__dict__['DOMExceptionStrings']
-EventExceptionStrings = MessageSource.__dict__['EventExceptionStrings']
-FtExceptionStrings = MessageSource.__dict__['FtExceptionStrings']
-RangeExceptionStrings = MessageSource.__dict__['RangeExceptionStrings']
-
-from domreg import getDOMImplementation,registerDOMImplementation
diff --git a/Lib/xml/dom/domreg.py b/Lib/xml/dom/domreg.py
deleted file mode 100644
--- a/Lib/xml/dom/domreg.py
+++ /dev/null
@@ -1,99 +0,0 @@
-"""Registration facilities for DOM. This module should not be used
-directly. Instead, the functions getDOMImplementation and
-registerDOMImplementation should be imported from xml.dom."""
-
-from xml.dom.minicompat import *  # isinstance, StringTypes
-
-# This is a list of well-known implementations.  Well-known names
-# should be published by posting to xml-sig at python.org, and are
-# subsequently recorded in this file.
-
-well_known_implementations = {
-    'minidom':'xml.dom.minidom',
-    '4DOM': 'xml.dom.DOMImplementation',
-    }
-
-# DOM implementations not officially registered should register
-# themselves with their
-
-registered = {}
-
-def registerDOMImplementation(name, factory):
-    """registerDOMImplementation(name, factory)
-
-    Register the factory function with the name. The factory function
-    should return an object which implements the DOMImplementation
-    interface. The factory function can either return the same object,
-    or a new one (e.g. if that implementation supports some
-    customization)."""
-
-    registered[name] = factory
-
-def _good_enough(dom, features):
-    "_good_enough(dom, features) -> Return 1 if the dom offers the features"
-    for f,v in features:
-        if not dom.hasFeature(f,v):
-            return 0
-    return 1
-
-def getDOMImplementation(name = None, features = ()):
-    """getDOMImplementation(name = None, features = ()) -> DOM implementation.
-
-    Return a suitable DOM implementation. The name is either
-    well-known, the module name of a DOM implementation, or None. If
-    it is not None, imports the corresponding module and returns
-    DOMImplementation object if the import succeeds.
-
-    If name is not given, consider the available implementations to
-    find one with the required feature set. If no implementation can
-    be found, raise an ImportError. The features list must be a sequence
-    of (feature, version) pairs which are passed to hasFeature."""
-
-    import os
-    creator = None
-    mod = well_known_implementations.get(name)
-    if mod:
-        mod = __import__(mod, {}, {}, ['getDOMImplementation'])
-        return mod.getDOMImplementation()
-    elif name:
-        return registered[name]()
-    elif os.environ.has_key("PYTHON_DOM"):
-        return getDOMImplementation(name = os.environ["PYTHON_DOM"])
-
-    # User did not specify a name, try implementations in arbitrary
-    # order, returning the one that has the required features
-    if isinstance(features, StringTypes):
-        features = _parse_feature_string(features)
-    for creator in registered.values():
-        dom = creator()
-        if _good_enough(dom, features):
-            return dom
-
-    for creator in well_known_implementations.keys():
-        try:
-            dom = getDOMImplementation(name = creator)
-        except StandardError: # typically ImportError, or AttributeError
-            continue
-        if _good_enough(dom, features):
-            return dom
-
-    raise ImportError,"no suitable DOM implementation found"
-
-def _parse_feature_string(s):
-    features = []
-    parts = s.split()
-    i = 0
-    length = len(parts)
-    while i < length:
-        feature = parts[i]
-        if feature[0] in "0123456789":
-            raise ValueError, "bad feature name: " + `feature`
-        i = i + 1
-        version = None
-        if i < length:
-            v = parts[i]
-            if v[0] in "0123456789":
-                i = i + 1
-                version = v
-        features.append((feature, version))
-    return tuple(features)
diff --git a/Lib/xml/dom/minicompat.py b/Lib/xml/dom/minicompat.py
deleted file mode 100644
--- a/Lib/xml/dom/minicompat.py
+++ /dev/null
@@ -1,184 +0,0 @@
-"""Python version compatibility support for minidom."""
-
-# This module should only be imported using "import *".
-#
-# The following names are defined:
-#
-#   isinstance    -- version of the isinstance() function that accepts
-#                    tuples as the second parameter regardless of the
-#                    Python version
-#
-#   NodeList      -- lightest possible NodeList implementation
-#
-#   EmptyNodeList -- lightest possible NodeList that is guarateed to
-#                    remain empty (immutable)
-#
-#   StringTypes   -- tuple of defined string types
-#
-#   GetattrMagic  -- base class used to make _get_<attr> be magically
-#                    invoked when available
-#   defproperty   -- function used in conjunction with GetattrMagic;
-#                    using these together is needed to make them work
-#                    as efficiently as possible in both Python 2.2+
-#                    and older versions.  For example:
-#
-#                        class MyClass(GetattrMagic):
-#                            def _get_myattr(self):
-#                                return something
-#
-#                        defproperty(MyClass, "myattr",
-#                                    "return some value")
-#
-#                    For Python 2.2 and newer, this will construct a
-#                    property object on the class, which avoids
-#                    needing to override __getattr__().  It will only
-#                    work for read-only attributes.
-#
-#                    For older versions of Python, inheriting from
-#                    GetattrMagic will use the traditional
-#                    __getattr__() hackery to achieve the same effect,
-#                    but less efficiently.
-#
-#                    defproperty() should be used for each version of
-#                    the relevant _get_<property>() function.
-#
-#   NewStyle      -- base class to cause __slots__ to be honored in
-#                    the new world
-#
-#   True, False   -- only for Python 2.2 and earlier
-
-__all__ = ["NodeList", "EmptyNodeList", "NewStyle",
-           "StringTypes", "defproperty", "GetattrMagic"]
-
-import xml.dom
-
-try:
-    unicode
-except NameError:
-    StringTypes = type(''),
-else:
-    StringTypes = type(''), type(unicode(''))
-
-
-# define True and False only if not defined as built-ins
-try:
-    True
-except NameError:
-    True = 1
-    False = 0
-    __all__.extend(["True", "False"])
-
-
-try:
-    isinstance('', StringTypes)
-except TypeError:
-    #
-    # Wrap isinstance() to make it compatible with the version in
-    # Python 2.2 and newer.
-    #
-    _isinstance = isinstance
-    def isinstance(obj, type_or_seq):
-        try:
-            return _isinstance(obj, type_or_seq)
-        except TypeError:
-            for t in type_or_seq:
-                if _isinstance(obj, t):
-                    return 1
-            return 0
-    __all__.append("isinstance")
-
-
-if list is type([]):
-    class NodeList(list):
-        __slots__ = ()
-
-        def item(self, index):
-            if 0 <= index < len(self):
-                return self[index]
-
-        def _get_length(self):
-            return len(self)
-
-        def _set_length(self, value):
-            raise xml.dom.NoModificationAllowedErr(
-                "attempt to modify read-only attribute 'length'")
-
-        length = property(_get_length, _set_length,
-                          doc="The number of nodes in the NodeList.")
-
-        def __getstate__(self):
-            return list(self)
-
-        def __setstate__(self, state):
-            self[:] = state
-
-    class EmptyNodeList(tuple):
-        __slots__ = ()
-
-        def __add__(self, other):
-            NL = NodeList()
-            NL.extend(other)
-            return NL
-
-        def __radd__(self, other):
-            NL = NodeList()
-            NL.extend(other)
-            return NL
-
-        def item(self, index):
-            return None
-
-        def _get_length(self):
-            return 0
-
-        def _set_length(self, value):
-            raise xml.dom.NoModificationAllowedErr(
-                "attempt to modify read-only attribute 'length'")
-
-        length = property(_get_length, _set_length,
-                          doc="The number of nodes in the NodeList.")
-
-else:
-    def NodeList():
-        return []
-
-    def EmptyNodeList():
-        return []
-
-
-try:
-    property
-except NameError:
-    def defproperty(klass, name, doc):
-        # taken care of by the base __getattr__()
-        pass
-
-    class GetattrMagic:
-        def __getattr__(self, key):
-            if key.startswith("_"):
-                raise AttributeError, key
-
-            try:
-                get = getattr(self, "_get_" + key)
-            except AttributeError:
-                raise AttributeError, key
-            return get()
-
-    class NewStyle:
-        pass
-
-else:
-    def defproperty(klass, name, doc):
-        get = getattr(klass, ("_get_" + name)).im_func
-        def set(self, value, name=name):
-            raise xml.dom.NoModificationAllowedErr(
-                "attempt to modify read-only attribute " + repr(name))
-        assert not hasattr(klass, "_set_" + name), \
-               "expected not to find _set_" + name
-        prop = property(get, set, doc=doc)
-        setattr(klass, name, prop)
-
-    class GetattrMagic:
-        pass
-
-    NewStyle = object
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -1,5 +1,6 @@
-"""\
-minidom.py -- a lightweight DOM implementation.
+"""Simple implementation of the Level 1 DOM.
+
+Namespaces and other minor Level 2 features are also supported.
 
 parse("foo.xml")
 
@@ -14,14 +15,13 @@
  * SAX 2 namespaces
 """
 
+import sys
 import xml.dom
 
 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
 from xml.dom.minicompat import *
 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
 
-_TupleType = type(())
-
 # This is used by the ID-cache invalidation checks; the list isn't
 # actually complete, since the nodes being checked will never be the
 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
@@ -31,7 +31,7 @@
                             xml.dom.Node.ENTITY_REFERENCE_NODE)
 
 
-class Node(xml.dom.Node, GetattrMagic):
+class Node(xml.dom.Node):
     namespaceURI = None # this is non-null only for elements and attributes
     parentNode = None
     ownerDocument = None
@@ -61,9 +61,6 @@
             self.writexml(writer, "", indent, newl)
         return writer.getvalue()
 
-    def hasAttributes(self):
-        return False
-
     def hasChildNodes(self):
         if self.childNodes:
             return True
@@ -138,20 +135,20 @@
         if newChild.nodeType not in self._child_node_types:
             raise xml.dom.HierarchyRequestErr(
                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
+        if newChild is oldChild:
+            return
         if newChild.parentNode is not None:
             newChild.parentNode.removeChild(newChild)
-        if newChild is oldChild:
-            return
         try:
             index = self.childNodes.index(oldChild)
         except ValueError:
             raise xml.dom.NotFoundErr()
+        self.childNodes[index] = newChild
+        newChild.parentNode = self
+        oldChild.parentNode = None
         if (newChild.nodeType in _nodeTypes_with_children
             or oldChild.nodeType in _nodeTypes_with_children):
             _clear_id_cache(self)
-        self.childNodes[index] = newChild
-        newChild.parentNode = self
-        oldChild.parentNode = None
         newChild.nextSibling = oldChild.nextSibling
         newChild.previousSibling = oldChild.previousSibling
         oldChild.nextSibling = None
@@ -182,35 +179,28 @@
         L = []
         for child in self.childNodes:
             if child.nodeType == Node.TEXT_NODE:
-                data = child.data
-                if data and L and L[-1].nodeType == child.nodeType:
+                if not child.data:
+                    # empty text node; discard
+                    if L:
+                        L[-1].nextSibling = child.nextSibling
+                    if child.nextSibling:
+                        child.nextSibling.previousSibling = child.previousSibling
+                    child.unlink()
+                elif L and L[-1].nodeType == child.nodeType:
                     # collapse text node
                     node = L[-1]
                     node.data = node.data + child.data
                     node.nextSibling = child.nextSibling
+                    if child.nextSibling:
+                        child.nextSibling.previousSibling = node
                     child.unlink()
-                elif data:
-                    if L:
-                        L[-1].nextSibling = child
-                        child.previousSibling = L[-1]
-                    else:
-                        child.previousSibling = None
+                else:
                     L.append(child)
-                else:
-                    # empty text node; discard
-                    child.unlink()
             else:
-                if L:
-                    L[-1].nextSibling = child
-                    child.previousSibling = L[-1]
-                else:
-                    child.previousSibling = None
                 L.append(child)
                 if child.nodeType == Node.ELEMENT_NODE:
                     child.normalize()
-        if self.childNodes:
-            self.childNodes[:] = L
-        return
+        self.childNodes[:] = L
 
     def cloneNode(self, deep):
         return _clone_node(self, deep, self.ownerDocument or self)
@@ -250,7 +240,7 @@
         except AttributeError:
             d = {}
             self._user_data = d
-        if d.has_key(key):
+        if key in d:
             old = d[key][0]
         if data is None:
             # ignore handlers passed for None
@@ -303,9 +293,10 @@
 
 def _write_data(writer, data):
     "Writes datachars to writer."
-    data = data.replace("&", "&").replace("<", "<")
-    data = data.replace("\"", """).replace(">", ">")
-    writer.write(data)
+    if data:
+        data = data.replace("&", "&").replace("<", "<"). \
+                    replace("\"", """).replace(">", ">")
+        writer.write(data)
 
 def _get_elements_by_tagName_helper(parent, name, rc):
     for node in parent.childNodes:
@@ -368,9 +359,6 @@
     def _get_localName(self):
         return self.nodeName.split(":", 1)[-1]
 
-    def _get_name(self):
-        return self.name
-
     def _get_specified(self):
         return self.specified
 
@@ -464,7 +452,7 @@
 defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
 
 
-class NamedNodeMap(NewStyle, GetattrMagic):
+class NamedNodeMap(object):
     """The attribute list is a transient interface to the underlying
     dictionaries.  Mutations here will change the underlying element's
     dictionary.
@@ -503,9 +491,9 @@
 
     def has_key(self, key):
         if isinstance(key, StringTypes):
-            return self._attrs.has_key(key)
+            return key in self._attrs
         else:
-            return self._attrsNS.has_key(key)
+            return key in self._attrsNS
 
     def keys(self):
         return self._attrs.keys()
@@ -521,6 +509,7 @@
 
     __len__ = _get_length
 
+    __hash__ = None # Mutable type can't be correctly hashed
     def __cmp__(self, other):
         if self._attrs is getattr(other, "_attrs", None):
             return 0
@@ -528,7 +517,7 @@
             return cmp(id(self), id(other))
 
     def __getitem__(self, attname_or_tuple):
-        if isinstance(attname_or_tuple, _TupleType):
+        if isinstance(attname_or_tuple, tuple):
             return self._attrsNS[attname_or_tuple]
         else:
             return self._attrs[attname_or_tuple]
@@ -567,7 +556,7 @@
             _clear_id_cache(self._ownerElement)
             del self._attrs[n.nodeName]
             del self._attrsNS[(n.namespaceURI, n.localName)]
-            if n.__dict__.has_key('ownerElement'):
+            if 'ownerElement' in n.__dict__:
                 n.__dict__['ownerElement'] = None
             return n
         else:
@@ -579,7 +568,7 @@
             _clear_id_cache(self._ownerElement)
             del self._attrsNS[(n.namespaceURI, n.localName)]
             del self._attrs[n.nodeName]
-            if n.__dict__.has_key('ownerElement'):
+            if 'ownerElement' in n.__dict__:
                 n.__dict__['ownerElement'] = None
             return n
         else:
@@ -592,7 +581,6 @@
         old = self._attrs.get(node.name)
         if old:
             old.unlink()
-            old.ownerDocument = self._ownerElement.ownerDocument
         self._attrs[node.name] = node
         self._attrsNS[(node.namespaceURI, node.localName)] = node
         node.ownerElement = self._ownerElement
@@ -619,7 +607,7 @@
 AttributeList = NamedNodeMap
 
 
-class TypeInfo(NewStyle):
+class TypeInfo(object):
     __slots__ = 'namespace', 'name'
 
     def __init__(self, namespace, name):
@@ -628,9 +616,9 @@
 
     def __repr__(self):
         if self.namespace:
-            return "<TypeInfo %s (from %s)>" % (`self.name`, `self.namespace`)
+            return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
         else:
-            return "<TypeInfo %s>" % `self.name`
+            return "<TypeInfo %r>" % self.name
 
     def _get_name(self):
         return self.name
@@ -783,15 +771,14 @@
         # Restore this since the node is still useful and otherwise
         # unlinked
         node.ownerDocument = self.ownerDocument
-        return node
 
     removeAttributeNodeNS = removeAttributeNode
 
     def hasAttribute(self, name):
-        return self._attrs.has_key(name)
+        return name in self._attrs
 
     def hasAttributeNS(self, namespaceURI, localName):
-        return self._attrsNS.has_key((namespaceURI, localName))
+        return (namespaceURI, localName) in self._attrsNS
 
     def getElementsByTagName(self, name):
         return _get_elements_by_tagName_helper(self, name, NodeList())
@@ -818,10 +805,16 @@
             _write_data(writer, attrs[a_name].value)
             writer.write("\"")
         if self.childNodes:
-            writer.write(">%s"%(newl))
-            for node in self.childNodes:
-                node.writexml(writer,indent+addindent,addindent,newl)
-            writer.write("%s</%s>%s" % (indent,self.tagName,newl))
+            writer.write(">")
+            if (len(self.childNodes) == 1 and
+                self.childNodes[0].nodeType == Node.TEXT_NODE):
+                self.childNodes[0].writexml(writer, '', '', '')
+            else:
+                writer.write(newl)
+                for node in self.childNodes:
+                    node.writexml(writer, indent+addindent, addindent, newl)
+                writer.write(indent)
+            writer.write("</%s>%s" % (self.tagName, newl))
         else:
             writer.write("/>%s"%(newl))
 
@@ -903,6 +896,10 @@
         raise xml.dom.NotFoundErr(
             self.nodeName + " nodes do not have children")
 
+    def normalize(self):
+        # For childless nodes, normalize() has nothing to do.
+        pass
+
     def replaceChild(self, newChild, oldChild):
         raise xml.dom.HierarchyRequestErr(
             self.nodeName + " nodes do not have children")
@@ -965,13 +962,13 @@
             dotdotdot = "..."
         else:
             dotdotdot = ""
-        return "<DOM %s node \"%s%s\">" % (
+        return '<DOM %s node "%r%s">' % (
             self.__class__.__name__, data[0:10], dotdotdot)
 
     def substringData(self, offset, count):
         if offset < 0:
             raise xml.dom.IndexSizeErr("offset cannot be negative")
-        if offset > len(self.data):
+        if offset >= len(self.data):
             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
         if count < 0:
             raise xml.dom.IndexSizeErr("count cannot be negative")
@@ -983,7 +980,7 @@
     def insertData(self, offset, arg):
         if offset < 0:
             raise xml.dom.IndexSizeErr("offset cannot be negative")
-        if offset > len(self.data):
+        if offset >= len(self.data):
             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
         if arg:
             self.data = "%s%s%s" % (
@@ -992,7 +989,7 @@
     def deleteData(self, offset, count):
         if offset < 0:
             raise xml.dom.IndexSizeErr("offset cannot be negative")
-        if offset > len(self.data):
+        if offset >= len(self.data):
             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
         if count < 0:
             raise xml.dom.IndexSizeErr("count cannot be negative")
@@ -1002,7 +999,7 @@
     def replaceData(self, offset, count, arg):
         if offset < 0:
             raise xml.dom.IndexSizeErr("offset cannot be negative")
-        if offset > len(self.data):
+        if offset >= len(self.data):
             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
         if count < 0:
             raise xml.dom.IndexSizeErr("count cannot be negative")
@@ -1014,12 +1011,10 @@
 
 
 class Text(CharacterData):
-
-    # This class doesn't have an __init__() by design; the intent is
-    # to speed construction of new instances.  Once an instance is
-    # created, the .data and .ownerDocument attributes will need to be
-    # initialized.  Subclasses may add an __init__() and initialize
-    # those members there or require them to be initialized later.
+    # Make sure we don't add an instance __dict__ if we don't already
+    # have one, at least when that's possible:
+    # XXX this does not work, CharacterData is an old-style class
+    # __slots__ = ()
 
     nodeType = Node.TEXT_NODE
     nodeName = "#text"
@@ -1041,7 +1036,7 @@
         return newText
 
     def writexml(self, writer, indent="", addindent="", newl=""):
-        _write_data(writer, "%s%s%s"%(indent, self.data, newl))
+        _write_data(writer, "%s%s%s" % (indent, self.data, newl))
 
     # DOM Level 3 (WD 9 April 2002)
 
@@ -1137,10 +1132,17 @@
         self.data = self.nodeValue = data
 
     def writexml(self, writer, indent="", addindent="", newl=""):
+        if "--" in self.data:
+            raise ValueError("'--' is not allowed in a comment node")
         writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
 
 
 class CDATASection(Text):
+    # Make sure we don't add an instance __dict__ if we don't already
+    # have one, at least when that's possible:
+    # XXX this does not work, Text is an old-style class
+    # __slots__ = ()
+
     nodeType = Node.CDATA_SECTION_NODE
     nodeName = "#cdata-section"
 
@@ -1150,7 +1152,7 @@
         writer.write("<![CDATA[%s]]>" % self.data)
 
 
-class ReadOnlySequentialNamedNodeMap(NewStyle, GetattrMagic):
+class ReadOnlySequentialNamedNodeMap(object):
     __slots__ = '_seq',
 
     def __init__(self, seq=()):
@@ -1174,7 +1176,7 @@
                 return n
 
     def __getitem__(self, name_or_tuple):
-        if isinstance(name_or_tuple, _TupleType):
+        if isinstance(name_or_tuple, tuple):
             node = self.getNamedItemNS(*name_or_tuple)
         else:
             node = self.getNamedItem(name_or_tuple)
@@ -1210,7 +1212,6 @@
         return [self._seq]
 
     def __setstate__(self, state):
-        assert len(state) == 1
         self._seq = state[0]
 
 defproperty(ReadOnlySequentialNamedNodeMap, "length",
@@ -1220,6 +1221,9 @@
 class Identified:
     """Mix-in class that supports the publicId and systemId attributes."""
 
+    # XXX this does not work, this is an old-style class
+    # __slots__ = 'publicId', 'systemId'
+
     def _identified_mixin_init(self, publicId, systemId):
         self.publicId = publicId
         self.systemId = systemId
@@ -1280,15 +1284,15 @@
         writer.write("<!DOCTYPE ")
         writer.write(self.name)
         if self.publicId:
-            writer.write("\n  PUBLIC '%s'\n  '%s'"
-                         % (self.publicId, self.systemId))
+            writer.write("%s  PUBLIC '%s'%s  '%s'"
+                         % (newl, self.publicId, newl, self.systemId))
         elif self.systemId:
-            writer.write("\n  SYSTEM '%s'" % self.systemId)
+            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
         if self.internalSubset is not None:
             writer.write(" [")
             writer.write(self.internalSubset)
             writer.write("]")
-        writer.write(">\n")
+        writer.write(">"+newl)
 
 class Entity(Identified, Node):
     attributes = None
@@ -1342,11 +1346,9 @@
 class DOMImplementation(DOMImplementationLS):
     _features = [("core", "1.0"),
                  ("core", "2.0"),
-                 ("core", "3.0"),
                  ("core", None),
                  ("xml", "1.0"),
                  ("xml", "2.0"),
-                 ("xml", "3.0"),
                  ("xml", None),
                  ("ls-load", "3.0"),
                  ("ls-load", None),
@@ -1420,7 +1422,7 @@
     def _create_document(self):
         return Document()
 
-class ElementInfo(NewStyle):
+class ElementInfo(object):
     """Object that represents content-model information for an element.
 
     This implementation is not expected to be used in practice; DOM
@@ -1449,7 +1451,7 @@
         return False
 
     def isId(self, aname):
-        """Returns true iff the named attribte is a DTD-style ID."""
+        """Returns true iff the named attribute is a DTD-style ID."""
         return False
 
     def isIdNS(self, namespaceURI, localName):
@@ -1664,7 +1666,7 @@
         return n
 
     def getElementById(self, id):
-        if self._id_cache.has_key(id):
+        if id in self._id_cache:
             return self._id_cache[id]
         if not (self._elem_info or self._magic_id_count):
             return None
@@ -1741,9 +1743,9 @@
     def writexml(self, writer, indent="", addindent="", newl="",
                  encoding = None):
         if encoding is None:
-            writer.write('<?xml version="1.0" ?>\n')
+            writer.write('<?xml version="1.0" ?>'+newl)
         else:
-            writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding)
+            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
         for node in self.childNodes:
             node.writexml(writer, indent, addindent, newl)
 
@@ -1878,7 +1880,7 @@
                     e._call_user_data_handler(operation, n, entity)
     else:
         # Note the cloning of Document and DocumentType nodes is
-        # implemenetation specific.  minidom handles those cases
+        # implementation specific.  minidom handles those cases
         # directly in the cloneNode() methods.
         raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
 
@@ -1904,34 +1906,30 @@
     return StringIO()
 
 def _do_pulldom_parse(func, args, kwargs):
-    events = apply(func, args, kwargs)
+    events = func(*args, **kwargs)
     toktype, rootNode = events.getEvent()
     events.expandNode(rootNode)
     events.clear()
-    rootNode.normalize()
     return rootNode
 
 def parse(file, parser=None, bufsize=None):
     """Parse a file into a DOM by filename or file object."""
-    import sys
-    if parser is None and bufsize is None and sys.platform[:4] != "java":
-        try:
-            from xml.dom import expatbuilder
-            return expatbuilder.parse(file)
-        except ImportError:
-            pass
-    from xml.dom import pulldom
-    return _do_pulldom_parse(pulldom.parse, (file,),
+    if parser is None and not bufsize and sys.platform[:4] != "java":
+        from xml.dom import expatbuilder
+        return expatbuilder.parse(file)
+    else:
+        from xml.dom import pulldom
+        return _do_pulldom_parse(pulldom.parse, (file,),
             {'parser': parser, 'bufsize': bufsize})
 
 def parseString(string, parser=None):
     """Parse a file into a DOM from a string."""
-    import sys
     if parser is None and sys.platform[:4] != "java":
         from xml.dom import expatbuilder
         return expatbuilder.parseString(string)
-    from xml.dom import pulldom
-    return _do_pulldom_parse(pulldom.parseString, (string,),
+    else:
+        from xml.dom import pulldom
+        return _do_pulldom_parse(pulldom.parseString, (string,),
                                  {'parser': parser})
 
 def getDOMImplementation(features=None):
diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py
deleted file mode 100644
--- a/Lib/xml/dom/xmlbuilder.py
+++ /dev/null
@@ -1,388 +0,0 @@
-"""Implementation of the DOM Level 3 'LS-Load' feature."""
-
-import copy
-import xml.dom
-
-from xml.dom.minicompat import *
-
-from xml.dom.NodeFilter import NodeFilter
-
-
-__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
-
-
-class Options:
-    """Features object that has variables set for each DOMBuilder feature.
-
-    The DOMBuilder class uses an instance of this class to pass settings to
-    the ExpatBuilder class.
-    """
-
-    # Note that the DOMBuilder class in LoadSave constrains which of these
-    # values can be set using the DOM Level 3 LoadSave feature.
-
-    namespaces = 1
-    namespace_declarations = True
-    validation = False
-    external_parameter_entities = True
-    external_general_entities = True
-    external_dtd_subset = True
-    validate_if_schema = False
-    validate = False
-    datatype_normalization = False
-    create_entity_ref_nodes = True
-    entities = True
-    whitespace_in_element_content = True
-    cdata_sections = True
-    comments = True
-    charset_overrides_xml_encoding = True
-    infoset = False
-    supported_mediatypes_only = False
-
-    errorHandler = None
-    filter = None
-
-
-class DOMBuilder:
-    entityResolver = None
-    errorHandler = None
-    filter = None
-
-    ACTION_REPLACE = 1
-    ACTION_APPEND_AS_CHILDREN = 2
-    ACTION_INSERT_AFTER = 3
-    ACTION_INSERT_BEFORE = 4
-
-    _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
-                      ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
-
-    def __init__(self):
-        self._options = Options()
-
-    def _get_entityResolver(self):
-        return self.entityResolver
-    def _set_entityResolver(self, entityResolver):
-        self.entityResolver = entityResolver
-
-    def _get_errorHandler(self):
-        return self.errorHandler
-    def _set_errorHandler(self, errorHandler):
-        self.errorHandler = errorHandler
-
-    def _get_filter(self):
-        return self.filter
-    def _set_filter(self, filter):
-        self.filter = filter
-
-    def setFeature(self, name, state):
-        if self.supportsFeature(name):
-            state = state and 1 or 0
-            try:
-                settings = self._settings[(_name_xform(name), state)]
-            except KeyError:
-                raise xml.dom.NotSupportedErr(
-                    "unsupported feature: " + `name`)
-            else:
-                for name, value in settings:
-                    setattr(self._options, name, value)
-        else:
-            raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
-
-    def supportsFeature(self, name):
-        return hasattr(self._options, _name_xform(name))
-
-    def canSetFeature(self, name, state):
-        key = (_name_xform(name), state and 1 or 0)
-        return self._settings.has_key(key)
-
-    # This dictionary maps from (feature,value) to a list of
-    # (option,value) pairs that should be set on the Options object.
-    # If a (feature,value) setting is not in this dictionary, it is
-    # not supported by the DOMBuilder.
-    #
-    _settings = {
-        ("namespace_declarations", 0): [
-            ("namespace_declarations", 0)],
-        ("namespace_declarations", 1): [
-            ("namespace_declarations", 1)],
-        ("validation", 0): [
-            ("validation", 0)],
-        ("external_general_entities", 0): [
-            ("external_general_entities", 0)],
-        ("external_general_entities", 1): [
-            ("external_general_entities", 1)],
-        ("external_parameter_entities", 0): [
-            ("external_parameter_entities", 0)],
-        ("external_parameter_entities", 1): [
-            ("external_parameter_entities", 1)],
-        ("validate_if_schema", 0): [
-            ("validate_if_schema", 0)],
-        ("create_entity_ref_nodes", 0): [
-            ("create_entity_ref_nodes", 0)],
-        ("create_entity_ref_nodes", 1): [
-            ("create_entity_ref_nodes", 1)],
-        ("entities", 0): [
-            ("create_entity_ref_nodes", 0),
-            ("entities", 0)],
-        ("entities", 1): [
-            ("entities", 1)],
-        ("whitespace_in_element_content", 0): [
-            ("whitespace_in_element_content", 0)],
-        ("whitespace_in_element_content", 1): [
-            ("whitespace_in_element_content", 1)],
-        ("cdata_sections", 0): [
-            ("cdata_sections", 0)],
-        ("cdata_sections", 1): [
-            ("cdata_sections", 1)],
-        ("comments", 0): [
-            ("comments", 0)],
-        ("comments", 1): [
-            ("comments", 1)],
-        ("charset_overrides_xml_encoding", 0): [
-            ("charset_overrides_xml_encoding", 0)],
-        ("charset_overrides_xml_encoding", 1): [
-            ("charset_overrides_xml_encoding", 1)],
-        ("infoset", 0): [],
-        ("infoset", 1): [
-            ("namespace_declarations", 0),
-            ("validate_if_schema", 0),
-            ("create_entity_ref_nodes", 0),
-            ("entities", 0),
-            ("cdata_sections", 0),
-            ("datatype_normalization", 1),
-            ("whitespace_in_element_content", 1),
-            ("comments", 1),
-            ("charset_overrides_xml_encoding", 1)],
-        ("supported_mediatypes_only", 0): [
-            ("supported_mediatypes_only", 0)],
-        ("namespaces", 0): [
-            ("namespaces", 0)],
-        ("namespaces", 1): [
-            ("namespaces", 1)],
-    }
-
-    def getFeature(self, name):
-        xname = _name_xform(name)
-        try:
-            return getattr(self._options, xname)
-        except AttributeError:
-            if name == "infoset":
-                options = self._options
-                return (options.datatype_normalization
-                        and options.whitespace_in_element_content
-                        and options.comments
-                        and options.charset_overrides_xml_encoding
-                        and not (options.namespace_declarations
-                                 or options.validate_if_schema
-                                 or options.create_entity_ref_nodes
-                                 or options.entities
-                                 or options.cdata_sections))
-            raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
-
-    def parseURI(self, uri):
-        if self.entityResolver:
-            input = self.entityResolver.resolveEntity(None, uri)
-        else:
-            input = DOMEntityResolver().resolveEntity(None, uri)
-        return self.parse(input)
-
-    def parse(self, input):
-        options = copy.copy(self._options)
-        options.filter = self.filter
-        options.errorHandler = self.errorHandler
-        fp = input.byteStream
-        if fp is None and options.systemId:
-            import urllib2
-            fp = urllib2.urlopen(input.systemId)
-        return self._parse_bytestream(fp, options)
-
-    def parseWithContext(self, input, cnode, action):
-        if action not in self._legal_actions:
-            raise ValueError("not a legal action")
-        raise NotImplementedError("Haven't written this yet...")
-
-    def _parse_bytestream(self, stream, options):
-        import xml.dom.expatbuilder
-        builder = xml.dom.expatbuilder.makeBuilder(options)
-        return builder.parseFile(stream)
-
-
-def _name_xform(name):
-    return name.lower().replace('-', '_')
-
-
-class DOMEntityResolver(NewStyle):
-    __slots__ = '_opener',
-
-    def resolveEntity(self, publicId, systemId):
-        assert systemId is not None
-        source = DOMInputSource()
-        source.publicId = publicId
-        source.systemId = systemId
-        source.byteStream = self._get_opener().open(systemId)
-
-        # determine the encoding if the transport provided it
-        source.encoding = self._guess_media_encoding(source)
-
-        # determine the base URI is we can
-        import posixpath, urlparse
-        parts = urlparse.urlparse(systemId)
-        scheme, netloc, path, params, query, fragment = parts
-        # XXX should we check the scheme here as well?
-        if path and not path.endswith("/"):
-            path = posixpath.dirname(path) + "/"
-            parts = scheme, netloc, path, params, query, fragment
-            source.baseURI = urlparse.urlunparse(parts)
-
-        return source
-
-    def _get_opener(self):
-        try:
-            return self._opener
-        except AttributeError:
-            self._opener = self._create_opener()
-            return self._opener
-
-    def _create_opener(self):
-        import urllib2
-        return urllib2.build_opener()
-
-    def _guess_media_encoding(self, source):
-        info = source.byteStream.info()
-        if info.has_key("Content-Type"):
-            for param in info.getplist():
-                if param.startswith("charset="):
-                    return param.split("=", 1)[1].lower()
-
-
-class DOMInputSource(NewStyle):
-    __slots__ = ('byteStream', 'characterStream', 'stringData',
-                 'encoding', 'publicId', 'systemId', 'baseURI')
-
-    def __init__(self):
-        self.byteStream = None
-        self.characterStream = None
-        self.stringData = None
-        self.encoding = None
-        self.publicId = None
-        self.systemId = None
-        self.baseURI = None
-
-    def _get_byteStream(self):
-        return self.byteStream
-    def _set_byteStream(self, byteStream):
-        self.byteStream = byteStream
-
-    def _get_characterStream(self):
-        return self.characterStream
-    def _set_characterStream(self, characterStream):
-        self.characterStream = characterStream
-
-    def _get_stringData(self):
-        return self.stringData
-    def _set_stringData(self, data):
-        self.stringData = data
-
-    def _get_encoding(self):
-        return self.encoding
-    def _set_encoding(self, encoding):
-        self.encoding = encoding
-
-    def _get_publicId(self):
-        return self.publicId
-    def _set_publicId(self, publicId):
-        self.publicId = publicId
-
-    def _get_systemId(self):
-        return self.systemId
-    def _set_systemId(self, systemId):
-        self.systemId = systemId
-
-    def _get_baseURI(self):
-        return self.baseURI
-    def _set_baseURI(self, uri):
-        self.baseURI = uri
-
-
-class DOMBuilderFilter:
-    """Element filter which can be used to tailor construction of
-    a DOM instance.
-    """
-
-    # There's really no need for this class; concrete implementations
-    # should just implement the endElement() and startElement()
-    # methods as appropriate.  Using this makes it easy to only
-    # implement one of them.
-
-    FILTER_ACCEPT = 1
-    FILTER_REJECT = 2
-    FILTER_SKIP = 3
-    FILTER_INTERRUPT = 4
-
-    whatToShow = NodeFilter.SHOW_ALL
-
-    def _get_whatToShow(self):
-        return self.whatToShow
-
-    def acceptNode(self, element):
-        return self.FILTER_ACCEPT
-
-    def startContainer(self, element):
-        return self.FILTER_ACCEPT
-
-del NodeFilter
-
-
-class DocumentLS:
-    """Mixin to create documents that conform to the load/save spec."""
-
-    async = False
-
-    def _get_async(self):
-        return False
-    def _set_async(self, async):
-        if async:
-            raise xml.dom.NotSupportedErr(
-                "asynchronous document loading is not supported")
-
-    def abort(self):
-        # What does it mean to "clear" a document?  Does the
-        # documentElement disappear?
-        raise NotImplementedError(
-            "haven't figured out what this means yet")
-
-    def load(self, uri):
-        raise NotImplementedError("haven't written this yet")
-
-    def loadXML(self, source):
-        raise NotImplementedError("haven't written this yet")
-
-    def saveXML(self, snode):
-        if snode is None:
-            snode = self
-        elif snode.ownerDocument is not self:
-            raise xml.dom.WrongDocumentErr()
-        return snode.toxml()
-
-
-class DOMImplementationLS:
-    MODE_SYNCHRONOUS = 1
-    MODE_ASYNCHRONOUS = 2
-
-    def createDOMBuilder(self, mode, schemaType):
-        if schemaType is not None:
-            raise xml.dom.NotSupportedErr(
-                "schemaType not yet supported")
-        if mode == self.MODE_SYNCHRONOUS:
-            return DOMBuilder()
-        if mode == self.MODE_ASYNCHRONOUS:
-            raise xml.dom.NotSupportedErr(
-                "asynchronous builders are not supported")
-        raise ValueError("unknown value for mode")
-
-    def createDOMWriter(self):
-        raise NotImplementedError(
-            "the writer interface hasn't been written yet!")
-
-    def createDOMInputSource(self):
-        return DOMInputSource()
diff --git a/Lib/xml/sax/drivers2/drv_javasax.py b/Lib/xml/sax/drivers2/drv_javasax.py
--- a/Lib/xml/sax/drivers2/drv_javasax.py
+++ b/Lib/xml/sax/drivers2/drv_javasax.py
@@ -33,14 +33,34 @@
 try:
     from javax.xml.parsers import SAXParserFactory, ParserConfigurationException
     factory = SAXParserFactory.newInstance()
+    # Set this feature false, otherwise will attempt to load DTDs like
+    # DOCTYPE doc PUBLIC 'http://xml.python.org/public which are
+    # purposefully very much nonexistent in tests such as
+    # test_minidom.
+    #
+    # NOTE that this factory is by default nonvalidating anyway, as
+    # needed for Python usage.
+    factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", False)
     jaxp = 1
 except ImportError:
     jaxp = 0
 
 from java.lang import String
 
+class SAXUnicodeDecodeError(UnicodeDecodeError):
+    def __init__(self, message):
+        self.message = message
+    def __repr__(self):
+        return "SAXUnicodeDecodeError: caused by %s" % (self.message,)
+    __str__ = __repr__
+
 
 def _wrap_sax_exception(e):
+    # work around issues in how we report exception - note this is an
+    # implementation detail, so it's not guaranteed to always report
+    # this exception. But in the end, it's from Xerces, so should be OK.
+    if "org.apache.xerces.impl.io.MalformedByteSequenceException" in str(e.getException().getClass()):
+        return SAXUnicodeDecodeError(str(e))
     return _exceptions.SAXParseException(e.message,
                                          e.exception,
                                          SimpleLocator(e.columnNumber,
diff --git a/extlibs/xml-apis-2.11.0.jar b/extlibs/xml-apis-2.11.0.jar
deleted file mode 100644
index 46733464fc746776c331ecc51061f3a05e662fd1..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
GIT binary patch
[stripped]
diff --git a/lib-python/2.7/test/test_minidom.py b/lib-python/2.7/test/test_minidom.py
--- a/lib-python/2.7/test/test_minidom.py
+++ b/lib-python/2.7/test/test_minidom.py
@@ -1,5 +1,6 @@
 # test for xml.dom.minidom
 
+import copy
 import pickle
 from StringIO import StringIO
 from test.test_support import verbose, run_unittest, findfile
@@ -14,7 +15,13 @@
 
 
 tstfile = findfile("test.xml", subdir="xmltestdata")
-
+sample = ("<?xml version='1.0' encoding='us-ascii'?>\n"
+          "<!DOCTYPE doc PUBLIC 'http://xml.python.org/public'"
+          " 'http://xml.python.org/system' [\n"
+          "  <!ELEMENT e EMPTY>\n"
+          "  <!ENTITY ent SYSTEM 'http://xml.python.org/entity'>\n"
+          "]><doc attr='value'> text\n"
+          "<?pi sample?> <!-- comment --> <e/> </doc>")
 
 # The tests of DocumentType importing use these helpers to construct
 # the documents to work with, since not all DOM builders actually
@@ -340,19 +347,6 @@
                 and el.getAttribute("spam2") == "bam2")
         dom.unlink()
 
-    def testGetAttrList(self):
-        pass
-
-    def testGetAttrValues(self): pass
-
-    def testGetAttrLength(self): pass
-
-    def testGetAttribute(self): pass
-
-    def testGetAttributeNS(self): pass
-
-    def testGetAttributeNode(self): pass
-
     def testGetElementsByTagNameNS(self):
         d="""<foo xmlns:minidom='http://pyxml.sf.net/minidom'>
         <minidom:myelem/>
@@ -423,8 +417,6 @@
         self.confirm(str(node) == repr(node))
         dom.unlink()
 
-    def testTextNodeRepr(self): pass
-
     def testWriteXML(self):
         str = '<?xml version="1.0" ?><a b="c"/>'
         dom = parseString(str)
@@ -488,14 +480,6 @@
                 and pi.localName is None
                 and pi.namespaceURI == xml.dom.EMPTY_NAMESPACE)
 
-    def testProcessingInstructionRepr(self): pass
-
-    def testTextRepr(self): pass
-
-    def testWriteText(self): pass
-
-    def testDocumentElement(self): pass
-
     def testTooManyDocumentElements(self):
         doc = parseString("<doc/>")
         elem = doc.createElement("extra")
@@ -504,26 +488,6 @@
         elem.unlink()
         doc.unlink()
 
-    def testCreateElementNS(self): pass
-
-    def testCreateAttributeNS(self): pass
-
-    def testParse(self): pass
-
-    def testParseString(self): pass
-
-    def testComment(self): pass
-
-    def testAttrListItem(self): pass
-
-    def testAttrListItems(self): pass
-
-    def testAttrListItemNS(self): pass
-
-    def testAttrListKeys(self): pass
-
-    def testAttrListKeysNS(self): pass
-
     def testRemoveNamedItem(self):
         doc = parseString("<doc a=''/>")
         e = doc.documentElement
@@ -543,32 +507,6 @@
         self.assertRaises(xml.dom.NotFoundErr, attrs.removeNamedItemNS,
                           "http://xml.python.org/", "b")
 
-    def testAttrListValues(self): pass
-
-    def testAttrListLength(self): pass
-
-    def testAttrList__getitem__(self): pass
-
-    def testAttrList__setitem__(self): pass
-
-    def testSetAttrValueandNodeValue(self): pass
-
-    def testParseElement(self): pass
-
-    def testParseAttributes(self): pass
-
-    def testParseElementNamespaces(self): pass
-
-    def testParseAttributeNamespaces(self): pass
-
-    def testParseProcessingInstructions(self): pass
-
-    def testChildNodes(self): pass
-
-    def testFirstChild(self): pass
-
-    def testHasChildNodes(self): pass
-
     def _testCloneElementCopiesAttributes(self, e1, e2, test):
         attrs1 = e1.attributes
         attrs2 = e2.attributes
@@ -1446,52 +1384,55 @@
         self.confirm(e.isSameNode(doc.getElementById("w"))
                 and a2.isId)
 
-    def testPickledDocument(self):
-        doc = parseString("<?xml version='1.0' encoding='us-ascii'?>\n"
-                    "<!DOCTYPE doc PUBLIC 'http://xml.python.org/public'"
-                    " 'http://xml.python.org/system' [\n"
-                    "  <!ELEMENT e EMPTY>\n"
-                    "  <!ENTITY ent SYSTEM 'http://xml.python.org/entity'>\n"
-                    "]><doc attr='value'> text\n"
-                    "<?pi sample?> <!-- comment --> <e/> </doc>")
-        s = pickle.dumps(doc)
-        doc2 = pickle.loads(s)
+    def assert_recursive_equal(self, doc, doc2):
         stack = [(doc, doc2)]
         while stack:
             n1, n2 = stack.pop()
-            self.confirm(n1.nodeType == n2.nodeType
-                    and len(n1.childNodes) == len(n2.childNodes)
-                    and n1.nodeName == n2.nodeName
-                    and not n1.isSameNode(n2)
-                    and not n2.isSameNode(n1))
+            self.assertEqual(n1.nodeType, n2.nodeType)
+            self.assertEqual(len(n1.childNodes), len(n2.childNodes))
+            self.assertEqual(n1.nodeName, n2.nodeName)
+            self.assertFalse(n1.isSameNode(n2))
+            self.assertFalse(n2.isSameNode(n1))
             if n1.nodeType == Node.DOCUMENT_TYPE_NODE:
                 len(n1.entities)
                 len(n2.entities)
                 len(n1.notations)
                 len(n2.notations)
-                self.confirm(len(n1.entities) == len(n2.entities)
-                        and len(n1.notations) == len(n2.notations))
+                self.assertEqual(len(n1.entities), len(n2.entities))
+                self.assertEqual(len(n1.notations), len(n2.notations))
                 for i in range(len(n1.notations)):
                     # XXX this loop body doesn't seem to be executed?
                     no1 = n1.notations.item(i)
                     no2 = n1.notations.item(i)
-                    self.confirm(no1.name == no2.name
-                            and no1.publicId == no2.publicId
-                            and no1.systemId == no2.systemId)
+                    self.assertEqual(no1.name, no2.name)
+                    self.assertEqual(no1.publicId, no2.publicId)
+                    self.assertEqual(no1.systemId, no2.systemId)
                     stack.append((no1, no2))
                 for i in range(len(n1.entities)):
                     e1 = n1.entities.item(i)
                     e2 = n2.entities.item(i)
-                    self.confirm(e1.notationName == e2.notationName
-                            and e1.publicId == e2.publicId
-                            and e1.systemId == e2.systemId)
+                    self.assertEqual(e1.notationName, e2.notationName)
+                    self.assertEqual(e1.publicId, e2.publicId)
+                    self.assertEqual(e1.systemId, e2.systemId)
                     stack.append((e1, e2))
             if n1.nodeType != Node.DOCUMENT_NODE:
-                self.confirm(n1.ownerDocument.isSameNode(doc)
-                        and n2.ownerDocument.isSameNode(doc2))
+                self.assertTrue(n1.ownerDocument.isSameNode(doc))
+                self.assertTrue(n2.ownerDocument.isSameNode(doc2))
             for i in range(len(n1.childNodes)):
                 stack.append((n1.childNodes[i], n2.childNodes[i]))
 
+    def testPickledDocument(self):
+        doc = parseString(sample)
+        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
+            s = pickle.dumps(doc, proto)
+            doc2 = pickle.loads(s)
+            self.assert_recursive_equal(doc, doc2)
+
+    def testDeepcopiedDocument(self):
+        doc = parseString(sample)
+        doc2 = copy.deepcopy(doc)
+        self.assert_recursive_equal(doc, doc2)
+
     def testSerializeCommentNodeWithDoubleHyphen(self):
         doc = create_doc_without_doctype()
         doc.appendChild(doc.createComment("foo--bar"))
diff --git a/lib-python/2.7/test/test_xml_etree.py b/lib-python/2.7/test/test_xml_etree.py
--- a/lib-python/2.7/test/test_xml_etree.py
+++ b/lib-python/2.7/test/test_xml_etree.py
@@ -713,14 +713,21 @@
     end {namespace}root
     end-ns None
 
+    >>> import StringIO
+
+    >>> events = ('start-ns', 'end-ns')
+    >>> context = ET.iterparse(StringIO.StringIO(r"<root xmlns=''/>"), events)
+    >>> for action, elem in context:
+    ...   print action, elem
+    start-ns ('', '')
+    end-ns None
+
     >>> events = ("start", "end", "bogus")
     >>> with open(SIMPLE_XMLFILE, "rb") as f:
     ...     iterparse(f, events)
     Traceback (most recent call last):
     ValueError: unknown event 'bogus'
 
-    >>> import StringIO
-
     >>> source = StringIO.StringIO(
     ...     "<?xml version='1.0' encoding='iso-8859-1'?>\\n"
     ...     "<body xmlns='http://éffbot.org/ns'\\n"
@@ -883,6 +890,12 @@
     >>> check_encoding("iso-8859-15")
     >>> check_encoding("cp437")
     >>> check_encoding("mac-roman")
+    >>> check_encoding("gbk")
+    Traceback (most recent call last):
+    ValueError: multi-byte encodings are not supported
+    >>> check_encoding("cp037")
+    Traceback (most recent call last):
+    ParseError: unknown encoding: line 1, column 30
     """
     ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
 
@@ -1769,6 +1782,16 @@
 
     """
 
+def bug_18347():
+    """
+
+    >>> e = ET.XML('<html><CamelCase>text</CamelCase></html>')
+    >>> serialize(e)
+    '<html><CamelCase>text</CamelCase></html>'
+    >>> serialize(e, method="html")
+    '<html><CamelCase>text</CamelCase></html>'
+    """
+
 # --------------------------------------------------------------------
 # reported on bugs.python.org
 
diff --git a/lib-python/2.7/test/test_xml_etree_c.py b/lib-python/2.7/test/test_xml_etree_c.py
--- a/lib-python/2.7/test/test_xml_etree_c.py
+++ b/lib-python/2.7/test/test_xml_etree_c.py
@@ -30,6 +30,38 @@
         finally:
             data = None
 
+    def test_del_attribute(self):
+        element = cET.Element('tag')
+
+        element.tag = 'TAG'
+        with self.assertRaises(AttributeError):
+            del element.tag
+        self.assertEqual(element.tag, 'TAG')
+
+        with self.assertRaises(AttributeError):
+            del element.text
+        self.assertIsNone(element.text)
+        element.text = 'TEXT'
+        with self.assertRaises(AttributeError):
+            del element.text
+        self.assertEqual(element.text, 'TEXT')
+
+        with self.assertRaises(AttributeError):
+            del element.tail
+        self.assertIsNone(element.tail)
+        element.tail = 'TAIL'
+        with self.assertRaises(AttributeError):
+            del element.tail
+        self.assertEqual(element.tail, 'TAIL')
+
+        with self.assertRaises(AttributeError):
+            del element.attrib
+        self.assertEqual(element.attrib, {})
+        element.attrib = {'A': 'B', 'C': 'D'}
+        with self.assertRaises(AttributeError):
+            del element.attrib
+        self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
+
 
 def test_main():
     from test import test_xml_etree, test_xml_etree_c
diff --git a/lib-python/2.7/test/test_xmlrpc.py b/lib-python/2.7/test/test_xmlrpc.py
--- a/lib-python/2.7/test/test_xmlrpc.py
+++ b/lib-python/2.7/test/test_xmlrpc.py
@@ -19,11 +19,9 @@
     threading = None
 
 try:
-    unicode
-except NameError:
-    have_unicode = False
-else:
-    have_unicode = True
+    import gzip
+except ImportError:
+    gzip = None
 
 alist = [{'astring': 'foo at bar.baz.spam',
           'afloat': 7283.43,
@@ -32,8 +30,6 @@
           'anotherlist': ['.zyx.41'],
           'abase64': xmlrpclib.Binary("my dog has fleas"),
           'boolean': xmlrpclib.False,
-          'unicode': u'\u4000\u6000\u8000',
-          u'ukey\u4000': 'regular value',
           'datetime1': xmlrpclib.DateTime('20050210T11:41:23'),
           'datetime2': xmlrpclib.DateTime(
                         (2005, 02, 10, 11, 41, 23, 0, 1, -1)),
@@ -41,6 +37,12 @@
                         datetime.datetime(2005, 02, 10, 11, 41, 23)),
           }]
 
+if test_support.have_unicode:
+    alist[0].update({
+          'unicode': test_support.u(r'\u4000\u6000\u8000'),
+          test_support.u(r'ukey\u4000'): 'regular value',
+    })
+
 class XMLRPCTestCase(unittest.TestCase):
 
     def test_dump_load(self):
@@ -145,6 +147,25 @@
                          xmlrpclib.loads(strg)[0][0])
         self.assertRaises(TypeError, xmlrpclib.dumps, (arg1,))
 
+    @test_support.requires_unicode
+    def test_dump_encoding(self):
+        value = {test_support.u(r'key\u20ac\xa4'):
+                 test_support.u(r'value\u20ac\xa4')}
+        strg = xmlrpclib.dumps((value,), encoding='iso-8859-15')
+        strg = "<?xml version='1.0' encoding='iso-8859-15'?>" + strg
+        self.assertEqual(xmlrpclib.loads(strg)[0][0], value)
+
+        strg = xmlrpclib.dumps((value,), encoding='iso-8859-15',
+                               methodresponse=True)
+        self.assertEqual(xmlrpclib.loads(strg)[0][0], value)
+
+        methodname = test_support.u(r'method\u20ac\xa4')
+        strg = xmlrpclib.dumps((value,), encoding='iso-8859-15',
+                               methodname=methodname)
+        self.assertEqual(xmlrpclib.loads(strg)[0][0], value)
+        self.assertEqual(xmlrpclib.loads(strg)[1], methodname)
+
+    @test_support.requires_unicode
     def test_default_encoding_issues(self):
         # SF bug #1115989: wrong decoding in '_stringify'
         utf8 = """<?xml version='1.0' encoding='iso-8859-1'?>
@@ -177,7 +198,7 @@
                 temp_sys.setdefaultencoding(old_encoding)
 
         items = d.items()
-        if have_unicode:
+        if test_support.have_unicode:
             self.assertEqual(s, u"abc \x95")
             self.assertIsInstance(s, unicode)
             self.assertEqual(items, [(u"def \x96", u"ghi \x97")])
@@ -277,7 +298,7 @@
 # The evt is set twice.  First when the server is ready to serve.
 # Second when the server has been shutdown.  The user must clear
 # the event after it has been set the first time to catch the second set.
-def http_server(evt, numrequests, requestHandler=None):
+def http_server(evt, numrequests, requestHandler=None, encoding=None):
     class TestInstanceClass:
         def div(self, x, y):
             return x // y
@@ -301,6 +322,7 @@
     if not requestHandler:
         requestHandler = SimpleXMLRPCServer.SimpleXMLRPCRequestHandler
     serv = MyXMLRPCServer(("localhost", 0), requestHandler,
+                          encoding=encoding,
                           logRequests=False, bind_and_activate=False)
     try:
         serv.socket.settimeout(3)
@@ -317,6 +339,7 @@
         serv.register_multicall_functions()
         serv.register_function(pow)
         serv.register_function(lambda x,y: x+y, 'add')
+        serv.register_function(lambda x: x, test_support.u(r't\xea\u0161t'))
         serv.register_function(my_function)
         serv.register_instance(TestInstanceClass())
         evt.set()
@@ -458,9 +481,10 @@
                 # protocol error; provide additional information in test output
                 self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
 
+    @test_support.requires_unicode
     def test_nonascii(self):
-        start_string = 'P\N{LATIN SMALL LETTER Y WITH CIRCUMFLEX}t'
-        end_string = 'h\N{LATIN SMALL LETTER O WITH HORN}n'
+        start_string = test_support.u(r'P\N{LATIN SMALL LETTER Y WITH CIRCUMFLEX}t')
+        end_string = test_support.u(r'h\N{LATIN SMALL LETTER O WITH HORN}n')
 
         try:
             p = xmlrpclib.ServerProxy(URL)
@@ -472,10 +496,38 @@
                 # protocol error; provide additional information in test output
                 self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
 
+    @test_support.requires_unicode
     def test_unicode_host(self):
         server = xmlrpclib.ServerProxy(u"http://%s:%d/RPC2"%(ADDR, PORT))
         self.assertEqual(server.add("a", u"\xe9"), u"a\xe9")
 
+    @test_support.requires_unicode
+    def test_client_encoding(self):
+        start_string = unichr(0x20ac)
+        end_string = unichr(0xa4)
+
+        try:
+            p = xmlrpclib.ServerProxy(URL, encoding='iso-8859-15')
+            self.assertEqual(p.add(start_string, end_string),
+                             start_string + end_string)
+        except (xmlrpclib.ProtocolError, socket.error) as e:
+            # ignore failures due to non-blocking socket unavailable errors.
+            if not is_unavailable_exception(e):
+                # protocol error; provide additional information in test output
+                self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
+
+    @test_support.requires_unicode
+    def test_nonascii_methodname(self):
+        try:
+            p = xmlrpclib.ServerProxy(URL, encoding='iso-8859-15')
+            m = getattr(p, 't\xea\xa8t')
+            self.assertEqual(m(42), 42)
+        except (xmlrpclib.ProtocolError, socket.error) as e:
+            # ignore failures due to non-blocking socket unavailable errors.
+            if not is_unavailable_exception(e):
+                # protocol error; provide additional information in test output
+                self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
+
     # [ch] The test 404 is causing lots of false alarms.
     def XXXtest_404(self):
         # send POST with httplib, it should return 404 header and
@@ -493,6 +545,7 @@
             p = xmlrpclib.ServerProxy(URL)
             meth = p.system.listMethods()
             expected_methods = set(['pow', 'div', 'my_function', 'add',
+                                    test_support.u(r't\xea\u0161t'),
                                     'system.listMethods', 'system.methodHelp',
                                     'system.methodSignature', 'system.multicall'])
             self.assertEqual(set(meth), expected_methods)
@@ -595,6 +648,27 @@
         conn.request('POST', '/RPC2 HTTP/1.0\r\nContent-Length: 100\r\n\r\nbye')
         conn.close()
 
+class SimpleServerEncodingTestCase(BaseServerTestCase):
+    @staticmethod
+    def threadFunc(evt, numrequests, requestHandler=None, encoding=None):
+        http_server(evt, numrequests, requestHandler, 'iso-8859-15')
+
+    @test_support.requires_unicode
+    def test_server_encoding(self):
+        start_string = unichr(0x20ac)
+        end_string = unichr(0xa4)
+
+        try:
+            p = xmlrpclib.ServerProxy(URL)
+            self.assertEqual(p.add(start_string, end_string),
+                             start_string + end_string)
+        except (xmlrpclib.ProtocolError, socket.error) as e:
+            # ignore failures due to non-blocking socket unavailable errors.
+            if not is_unavailable_exception(e):
+                # protocol error; provide additional information in test output
+                self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
+
+
 class MultiPathServerTestCase(BaseServerTestCase):
     threadFunc = staticmethod(http_multi_server)
     request_count = 2
@@ -681,6 +755,7 @@
 
 #A test case that verifies that gzip encoding works in both directions
 #(for a request and the response)
+ at unittest.skipUnless(gzip, 'gzip not available')
 class GzipServerTestCase(BaseServerTestCase):
     #a request handler that supports keep-alive and logs requests into a
     #class variable
@@ -731,7 +806,7 @@
         with cm:
             p.pow(6, 8)
 
-    def test_gsip_response(self):
+    def test_gzip_response(self):
         t = self.Transport()
         p = xmlrpclib.ServerProxy(URL, transport=t)
         old = self.requestHandler.encode_threshold
@@ -744,6 +819,23 @@
         self.requestHandler.encode_threshold = old
         self.assertTrue(a>b)
 
+    def test_gzip_decode_limit(self):
+        max_gzip_decode = 20 * 1024 * 1024
+        data = '\0' * max_gzip_decode
+        encoded = xmlrpclib.gzip_encode(data)
+        decoded = xmlrpclib.gzip_decode(encoded)
+        self.assertEqual(len(decoded), max_gzip_decode)
+
+        data = '\0' * (max_gzip_decode + 1)
+        encoded = xmlrpclib.gzip_encode(data)
+
+        with self.assertRaisesRegexp(ValueError,
+                                     "max gzipped payload length exceeded"):
+            xmlrpclib.gzip_decode(encoded)
+
+        xmlrpclib.gzip_decode(encoded, max_decode=-1)
+
+
 #Test special attributes of the ServerProxy object
 class ServerProxyTestCase(unittest.TestCase):
     def setUp(self):
@@ -1009,13 +1101,10 @@
     xmlrpc_tests = [XMLRPCTestCase, HelperTestCase, DateTimeTestCase,
          BinaryTestCase, FaultTestCase, TransportSubclassTestCase]
     xmlrpc_tests.append(SimpleServerTestCase)
+    xmlrpc_tests.append(SimpleServerEncodingTestCase)
     xmlrpc_tests.append(KeepaliveServerTestCase1)
     xmlrpc_tests.append(KeepaliveServerTestCase2)
-    try:
-        import gzip
-        xmlrpc_tests.append(GzipServerTestCase)
-    except ImportError:
-        pass #gzip not supported in this build
+    xmlrpc_tests.append(GzipServerTestCase)
     xmlrpc_tests.append(MultiPathServerTestCase)
     xmlrpc_tests.append(ServerProxyTestCase)
     xmlrpc_tests.append(FailingServerTestCase)
diff --git a/lib-python/2.7/xml/dom/minicompat.py b/lib-python/2.7/xml/dom/minicompat.py
--- a/lib-python/2.7/xml/dom/minicompat.py
+++ b/lib-python/2.7/xml/dom/minicompat.py
@@ -65,10 +65,10 @@
     length = property(_get_length, _set_length,
                       doc="The number of nodes in the NodeList.")
 
-    def __getstate__(self):
-        return list(self)
-
+    # For backward compatibility
     def __setstate__(self, state):
+        if state is None:
+            state = []
         self[:] = state
 
 
diff --git a/lib-python/2.7/xml/dom/minidom.py b/lib-python/2.7/xml/dom/minidom.py
--- a/lib-python/2.7/xml/dom/minidom.py
+++ b/lib-python/2.7/xml/dom/minidom.py
@@ -358,9 +358,6 @@
     def _get_localName(self):
         return self.nodeName.split(":", 1)[-1]
 
-    def _get_name(self):
-        return self.name
-
     def _get_specified(self):
         return self.specified
 
diff --git a/lib-python/2.7/xml/etree/ElementInclude.py b/lib-python/2.7/xml/etree/ElementInclude.py
--- a/lib-python/2.7/xml/etree/ElementInclude.py
+++ b/lib-python/2.7/xml/etree/ElementInclude.py
@@ -75,14 +75,13 @@
 # @throws IOError If the loader fails to load the resource.
 
 def default_loader(href, parse, encoding=None):
-    file = open(href)
-    if parse == "xml":
-        data = ElementTree.parse(file).getroot()
-    else:
-        data = file.read()
-        if encoding:
-            data = data.decode(encoding)
-    file.close()
+    with open(href) as file:
+        if parse == "xml":
+            data = ElementTree.parse(file).getroot()
+        else:
+            data = file.read()
+            if encoding:
+                data = data.decode(encoding)
     return data
 
 ##
diff --git a/lib-python/2.7/xml/etree/ElementTree.py b/lib-python/2.7/xml/etree/ElementTree.py
--- a/lib-python/2.7/xml/etree/ElementTree.py
+++ b/lib-python/2.7/xml/etree/ElementTree.py
@@ -683,8 +683,8 @@
         return list(self.iter(tag))
 
     ##
-    # Finds the first toplevel element with given tag.
-    # Same as getroot().find(path).
+    # Same as getroot().find(path), starting at the root of the
+    # tree.
     #
     # @param path What element to look for.
     # @keyparam namespaces Optional namespace prefix map.
@@ -704,10 +704,9 @@
         return self._root.find(path, namespaces)
 
     ##
-    # Finds the element text for the first toplevel element with given
-    # tag.  Same as getroot().findtext(path).
+    # Same as getroot().findtext(path), starting at the root of the tree.
     #
-    # @param path What toplevel element to look for.
+    # @param path What element to look for.
     # @param default What to return if the element was not found.
     # @keyparam namespaces Optional namespace prefix map.
     # @return The text content of the first matching element, or the
@@ -729,8 +728,7 @@
         return self._root.findtext(path, default, namespaces)
 
     ##
-    # Finds all toplevel elements with the given tag.
-    # Same as getroot().findall(path).
+    # Same as getroot().findall(path), starting at the root of the tree.
     #
     # @param path What element to look for.
     # @keyparam namespaces Optional namespace prefix map.
@@ -990,15 +988,15 @@
                     # FIXME: handle boolean attributes
                     write(" %s=\"%s\"" % (qnames[k], v))
             write(">")
-            tag = tag.lower()
+            ltag = tag.lower()
             if text:
-                if tag == "script" or tag == "style":
+                if ltag == "script" or ltag == "style":
                     write(_encode(text, encoding))
                 else:
                     write(_escape_cdata(text, encoding))
             for e in elem:
                 _serialize_html(write, e, encoding, qnames, None)
-            if tag not in HTML_EMPTY:
+            if ltag not in HTML_EMPTY:
                 write("</" + tag + ">")
     if elem.tail:
         write(_escape_cdata(elem.tail, encoding))
@@ -1200,9 +1198,14 @@
     if not hasattr(source, "read"):
         source = open(source, "rb")
         close_source = True
-    if not parser:
-        parser = XMLParser(target=TreeBuilder())
-    return _IterParseIterator(source, events, parser, close_source)
+    try:
+        if not parser:
+            parser = XMLParser(target=TreeBuilder())
+        return _IterParseIterator(source, events, parser, close_source)
+    except:
+        if close_source:
+            source.close()
+        raise
 
 class _IterParseIterator(object):
 
@@ -1254,34 +1257,40 @@
                 raise ValueError("unknown event %r" % event)
 
     def next(self):
-        while 1:
-            try:
-                item = self._events[self._index]
-                self._index += 1
-                return item
-            except IndexError:
-                pass
-            if self._error:
-                e = self._error
-                self._error = None
-                raise e
-            if self._parser is None:
-                self.root = self._root
-                if self._close_file:
-                    self._file.close()
-                raise StopIteration
-            # load event buffer
-            del self._events[:]
-            self._index = 0
-            data = self._file.read(16384)
-            if data:
+        try:
+            while 1:
                 try:
-                    self._parser.feed(data)
-                except SyntaxError as exc:
-                    self._error = exc
-            else:
-                self._root = self._parser.close()
-                self._parser = None
+                    item = self._events[self._index]
+                    self._index += 1
+                    return item
+                except IndexError:
+                    pass
+                if self._error:
+                    e = self._error
+                    self._error = None
+                    raise e
+                if self._parser is None:
+                    self.root = self._root
+                    break
+                # load event buffer
+                del self._events[:]
+                self._index = 0
+                data = self._file.read(16384)
+                if data:
+                    try:
+                        self._parser.feed(data)
+                    except SyntaxError as exc:
+                        self._error = exc
+                else:
+                    self._root = self._parser.close()
+                    self._parser = None
+        except:
+            if self._close_file:
+                self._file.close()
+            raise
+        if self._close_file:
+            self._file.close()
+        raise StopIteration
 
     def __iter__(self):
         return self
diff --git a/lib-python/2.7/xml/sax/expatreader.py b/lib-python/2.7/xml/sax/expatreader.py
--- a/lib-python/2.7/xml/sax/expatreader.py
+++ b/lib-python/2.7/xml/sax/expatreader.py
@@ -43,6 +43,9 @@
     _mkproxy = weakref.proxy
     del weakref, _weakref
 
+class _ClosedParser:
+    pass
+
 # --- ExpatLocator
 
 class ExpatLocator(xmlreader.Locator):
@@ -214,14 +217,24 @@
             self._err_handler.fatalError(exc)
 
     def close(self):
-        if self._entity_stack:
+        if (self._entity_stack or self._parser is None or
+            isinstance(self._parser, _ClosedParser)):
             # If we are completing an external entity, do nothing here
             return
-        self.feed("", isFinal = 1)
-        self._cont_handler.endDocument()
-        self._parsing = 0
-        # break cycle created by expat handlers pointing to our methods
-        self._parser = None
+        try:
+            self.feed("", isFinal = 1)
+            self._cont_handler.endDocument()
+            self._parsing = 0
+            # break cycle created by expat handlers pointing to our methods
+            self._parser = None
+        finally:
+            self._parsing = 0
+            if self._parser is not None:
+                # Keep ErrorColumnNumber and ErrorLineNumber after closing.
+                parser = _ClosedParser()
+                parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
+                parser.ErrorLineNumber = self._parser.ErrorLineNumber
+                self._parser = parser
 
     def _reset_cont_handler(self):
         self._parser.ProcessingInstructionHandler = \
diff --git a/lib-python/2.7/xml/sax/saxutils.py b/lib-python/2.7/xml/sax/saxutils.py
--- a/lib-python/2.7/xml/sax/saxutils.py
+++ b/lib-python/2.7/xml/sax/saxutils.py
@@ -98,14 +98,17 @@
         except AttributeError:
             pass
     # wrap a binary writer with TextIOWrapper
-    class UnbufferedTextIOWrapper(io.TextIOWrapper):
-        def write(self, s):
-            super(UnbufferedTextIOWrapper, self).write(s)
-            self.flush()
-    return UnbufferedTextIOWrapper(buffer, encoding=encoding,
+    return _UnbufferedTextIOWrapper(buffer, encoding=encoding,
                                    errors='xmlcharrefreplace',
                                    newline='\n')
 
+
+class _UnbufferedTextIOWrapper(io.TextIOWrapper):
+    def write(self, s):
+        super(_UnbufferedTextIOWrapper, self).write(s)
+        self.flush()
+
+
 class XMLGenerator(handler.ContentHandler):
 
     def __init__(self, out=None, encoding="iso-8859-1"):
@@ -180,10 +183,14 @@
         self._write(u'</%s>' % self._qname(name))
 
     def characters(self, content):
-        self._write(escape(unicode(content)))
+        if not isinstance(content, unicode):
+            content = unicode(content, self._encoding)
+        self._write(escape(content))
 
     def ignorableWhitespace(self, content):
-        self._write(unicode(content))
+        if not isinstance(content, unicode):
+            content = unicode(content, self._encoding)
+        self._write(content)
 
     def processingInstruction(self, target, data):
         self._write(u'<?%s %s?>' % (target, data))

-- 
Repository URL: https://hg.python.org/jython


More information about the Jython-checkins mailing list