From jython-checkins at python.org Sun Sep 4 03:25:56 2016 From: jython-checkins at python.org (jeff.allen) Date: Sun, 04 Sep 2016 07:25:56 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Formatting_only_=28crlf-=3E?= =?utf-8?q?lf=29_in_test=5Fsocketserver=5Fjython?= Message-ID: <20160904072546.9549.40827.15BF33C9@psf.io> https://hg.python.org/jython/rev/2be0f5da0aff changeset: 7957:2be0f5da0aff user: Jeff Allen date: Sat Sep 03 17:43:02 2016 +0100 summary: Formatting only (crlf->lf) in test_socketserver_jython files: Lib/test/test_socketserver_jython.py | 33 +++++++-------- 1 files changed, 16 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_socketserver_jython.py b/Lib/test/test_socketserver_jython.py --- a/Lib/test/test_socketserver_jython.py +++ b/Lib/test/test_socketserver_jython.py @@ -1,17 +1,16 @@ -# -*- coding: windows-1252 -*- - -import unittest - -import SocketServer - -class TestSocketServer(unittest.TestCase): - - def testEphemeralPort(self): - """ Test that an ephemeral port is set correctly """ - host, port = "localhost", 0 # If we specify 0, system should pick an emphemeral port - server = SocketServer.TCPServer( (host, port), None) # Request handler never instantiated - server_host, server_port = server.server_address - self.failIfEqual(server_port, 0, "System assigned ephemeral port should not be zero") - -if __name__ == "__main__": - unittest.main() +import unittest +import SocketServer + +class TestSocketServer(unittest.TestCase): + + def testEphemeralPort(self): + """ Test that an ephemeral port is set correctly """ + # If we specify 0, system should pick an emphemeral port + host, port = "localhost", 0 + # Request handler never instantiated + server = SocketServer.TCPServer( (host, port), None) + server_host, server_port = server.server_address + self.failIfEqual(server_port, 0, "System assigned ephemeral port should not be zero") + +if __name__ == "__main__": + unittest.main() -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sun Sep 4 03:25:56 2016 From: jython-checkins at python.org (jeff.allen) Date: Sun, 04 Sep 2016 07:25:56 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Always_close_the_file_in_El?= =?utf-8?q?ementTree=2Ewrite=28=29=2E_Fixes_=232413=2E?= Message-ID: <20160904072546.9581.86129.A8F30239@psf.io> https://hg.python.org/jython/rev/c168765aae66 changeset: 7956:c168765aae66 user: Jeff Allen date: Sat Sep 03 13:56:53 2016 +0100 summary: Always close the file in ElementTree.write(). Fixes #2413. Also adds test. This makes it necessary for Jython to have its own ElementTree.py, as long as CPython stdlib does not explicitly close the file. files: Lib/test/test_xml_etree_jy.py | 16 ++++++++- Lib/xml/etree/ElementTree.py | 40 ++++++++++++---------- NEWS | 1 + 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_xml_etree_jy.py b/Lib/test/test_xml_etree_jy.py --- a/Lib/test/test_xml_etree_jy.py +++ b/Lib/test/test_xml_etree_jy.py @@ -750,7 +750,7 @@ Entity name: entity """ -def test_close_files(): +def test_close_file_iss1479(): # http://bugs.jython.org/issue1479 """ >>> import os @@ -767,5 +767,19 @@ >>> os.remove(test_support.TESTFN) """ +def test_close_file_iss2413(): + # http://bugs.jython.org/issue2413 + """ + >>> import os + >>> from test import test_support + >>> from xml.etree import ElementTree as ET + + >>> tree = ET.ElementTree(ET.XML('')) + >>> tree.write(test_support.TESTFN, encoding='an_unknown_encoding') + Traceback (most recent call last): + LookupError: unknown encoding 'an_unknown_encoding' + >>> os.remove(test_support.TESTFN) + """ + if __name__ == "__main__": doctest.testmod() diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -800,26 +800,28 @@ file = file_or_filename else: file = open(file_or_filename, "wb") - write = file.write - if not encoding: - if method == "c14n": - encoding = "utf-8" + try: + write = file.write + if not encoding: + if method == "c14n": + encoding = "utf-8" + else: + encoding = "us-ascii" + elif xml_declaration or (xml_declaration is None and + encoding not in ("utf-8", "us-ascii")): + if method == "xml": + write("\n" % encoding) + if method == "text": + _serialize_text(write, self._root, encoding) else: - encoding = "us-ascii" - elif xml_declaration or (xml_declaration is None and - encoding not in ("utf-8", "us-ascii")): - if method == "xml": - write("\n" % encoding) - if method == "text": - _serialize_text(write, self._root, encoding) - else: - qnames, namespaces = _namespaces( - self._root, encoding, default_namespace - ) - serialize = _serialize[method] - serialize(write, self._root, encoding, qnames, namespaces) - if file_or_filename is not file: - file.close() + qnames, namespaces = _namespaces( + self._root, encoding, default_namespace + ) + serialize = _serialize[method] + serialize(write, self._root, encoding, qnames, namespaces) + finally: + if file_or_filename is not file: + file.close() def write_c14n(self, file): # lxml.etree compatibility. use output method instead diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -4,6 +4,7 @@ Jython 2.7.1rc Bugs fixed + - [ 2413 ] ElementTree.write doesn't close files if used with invalid encoding - [ 2488 ] Always join on subprocess coupler threads - [ 2480 ] Repeating from import results in reload - [ 2472 ] Importing simplejson fails with: 'NoneType' object has no -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sun Sep 4 03:25:56 2016 From: jython-checkins at python.org (jeff.allen) Date: Sun, 04 Sep 2016 07:25:56 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Added_itervalues_and_iterke?= =?utf-8?q?ys_to_map_proxy=2E_Fixes_=232443?= Message-ID: <20160904072546.8656.40945.4A0E88BA@psf.io> https://hg.python.org/jython/rev/d65e8b762307 changeset: 7954:d65e8b762307 parent: 7951:a8fef10e48a5 user: Douglas Clayton date: Wed Aug 31 08:09:49 2016 +0100 summary: Added itervalues and iterkeys to map proxy. Fixes #2443 files: Lib/test/test_dict_jy.py | 2 + NEWS | 1 + src/org/python/core/JavaProxyMap.java | 36 +++++++++++++++ 3 files changed, 39 insertions(+), 0 deletions(-) diff --git a/Lib/test/test_dict_jy.py b/Lib/test/test_dict_jy.py --- a/Lib/test/test_dict_jy.py +++ b/Lib/test/test_dict_jy.py @@ -137,6 +137,8 @@ x['a'] = 1 x[(1, 2)] = 'xyz' self.assertEqual({tup for tup in x.iteritems()}, {('a', 1), ((1, 2), 'xyz')}) + self.assertEqual({tup for tup in x.itervalues()}, {1, 'xyz'}) + self.assertEqual({tup for tup in x.iterkeys()}, {'a', (1, 2)}) self.assertEqual(str(x), repr(x)) self.assertEqual(type(str(x)), type(repr(x))) diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -18,6 +18,7 @@ - [ 2112 ] time.strptime() has different default year in Jython and CPython - [ 1767 ] Rich comparisons - [ 2314 ] Failures in test_shutil on Windows + - [ 2443 ] java.util.Map derived classes lack iterkeys, itervalues methods New Features - Added uname function to posix module. The mostly Java-based implementation even diff --git a/src/org/python/core/JavaProxyMap.java b/src/org/python/core/JavaProxyMap.java --- a/src/org/python/core/JavaProxyMap.java +++ b/src/org/python/core/JavaProxyMap.java @@ -223,6 +223,40 @@ }; } }; + private static final PyBuiltinMethodNarrow mapIterKeysProxy = new MapMethod("iterkeys", 0) { + @Override + public PyObject __call__() { + final Iterator keyIterator = asMap().keySet().iterator(); + return new PyIterator() { + @Override + public PyObject __iternext__() { + if (keyIterator.hasNext()) { + Object nextKey = keyIterator.next(); + // yield a Python key + return Py.java2py(nextKey); + } + return null; + } + }; + } + }; + private static final PyBuiltinMethodNarrow mapIterValuesProxy = new MapMethod("itervalues", 0) { + @Override + public PyObject __call__() { + final Iterator valueIterator = asMap().values().iterator(); + return new PyIterator() { + @Override + public PyObject __iternext__() { + if (valueIterator.hasNext()) { + Object nextValue = valueIterator.next(); + // yield a Python value + return Py.java2py(nextValue); + } + return null; + } + }; + } + }; private static final PyBuiltinMethodNarrow mapHasKeyProxy = new MapMethod("has_key", 1) { @Override public PyObject __call__(PyObject key) { @@ -456,6 +490,8 @@ mapPutProxy, mapRemoveProxy, mapIterItemsProxy, + mapIterKeysProxy, + mapIterValuesProxy, mapHasKeyProxy, mapKeysProxy, mapSetDefaultProxy, -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sun Sep 4 03:25:56 2016 From: jython-checkins at python.org (jeff.allen) Date: Sun, 04 Sep 2016 07:25:56 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Fix_tests_that_previously_d?= =?utf-8?q?id_not_run_as_regression_tests=2E?= Message-ID: <20160904072546.22608.75874.5D5B28C3@psf.io> https://hg.python.org/jython/rev/3de87d0fa0d5 changeset: 7958:3de87d0fa0d5 user: Jeff Allen date: Sat Sep 03 23:04:29 2016 +0100 summary: Fix tests that previously did not run as regression tests. Several tests (test_SimpleXMLRPCServer, test_gc_jy, test_generators_jy, test_socketserver_jython, test_xml_etree_jy) would not run under test.regrtest because they had no test_main() and did not run when merely imported. In order to run cleanly, test_gc_jy needed 2 skips, and test_xml_etree_jy is made an "expected failure" for now as it has 8 failing doc tests. Also, the verbosity in test_xml_etree is now correctly related to the command-line choice. files: Lib/test/regrtest.py | 1 + Lib/test/test_SimpleXMLRPCServer.py | 9 +- Lib/test/test_gc_jy.py | 70 +++------------ Lib/test/test_generators_jy.py | 16 +++- Lib/test/test_socketserver_jython.py | 8 +- Lib/test/test_xml_etree.py | 4 +- Lib/test/test_xml_etree_jy.py | 5 + 7 files changed, 51 insertions(+), 62 deletions(-) diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -1309,6 +1309,7 @@ test_stringprep # UnicodeDecodeError test_threadsignals test_transformer + test_xml_etree_jy test_zipimport # fails on Windows standalone, probably shouldn't diff --git a/Lib/test/test_SimpleXMLRPCServer.py b/Lib/test/test_SimpleXMLRPCServer.py --- a/Lib/test/test_SimpleXMLRPCServer.py +++ b/Lib/test/test_SimpleXMLRPCServer.py @@ -4,6 +4,7 @@ from SimpleXMLRPCServer import SimpleXMLRPCServer import threading, xmlrpclib, unittest +from test import test_support HOST = "127.0.0.1" PORT = 7218 @@ -82,7 +83,9 @@ self.assertEqual(client.squared(10), 100) +def test_main(): + test_support.run_unittest(SimpleXMLRPCServerTestCase) + + if __name__ == "__main__": - unittest.main() - -# vim:et:ts=4:sw=4: + test_main() diff --git a/Lib/test/test_gc_jy.py b/Lib/test/test_gc_jy.py --- a/Lib/test/test_gc_jy.py +++ b/Lib/test/test_gc_jy.py @@ -782,6 +782,7 @@ gc.removeJythonGCFlags(gc.FORCE_DELAYED_WEAKREF_CALLBACKS) + at unittest.skipIf(__name__ != "__main__", 'Hangs under regrtest') class GCTests_Jy_Monitoring(unittest.TestCase): @classmethod @@ -1157,59 +1158,20 @@ self.assertTrue(ref == ref) +def test_main(): + tests = ( + GCTests_Jy_CyclicGarbage, + GCTests_Jy_preprocess_and_postprocess, + GCTests_Jy_Delayed_Finalization, + GCTests_Jy_Forced_Delayed_Finalization, + GCTests_Jy_Raw_Forced_Delayed_Finalization, + GCTests_Jy_Monitoring, + GCTests_Jy_Weakref, + GCTests_Jy_TraverseByReflection, + GCTests_Misc, + ) + test_support.run_unittest(*tests) + if __name__ == "__main__": unittest.main() -# comments = [] -# resurrected = [] -# -# class Test_JavaResurrectFinalizable(Object): -# def __init__(self, name, toResurrect): -# self.name = name -# self.toResurrect = toResurrect -# -# def __repr__(self): -# return "<"+self.name+">" -# -# #def __del__(self): -# def finalize(self): -# gc.notifyPreFinalization() -# comments.append("del "+self.name) -# #gc.abortDelayedFinalization(self.toAbort) -# resurrected.append(self.toResurrect) -# print "finalize "+self.name -# # We manually restore weak references: -# gc.restoreWeakReferences(self.toResurrect) -# gc.notifyPostFinalization() -# -# class Test_Finalizable(object): -# def __init__(self, name): -# self.name = name -# -# def __repr__(self): -# return "<"+self.name+">" -# -# def __del__(self): -# comments.append("del "+self.name) -# -# def callback(obj): -# comments.append("callback")#+str(obj)) -# print "callback: "+str(obj) -# -# a = Test_Finalizable("a") -# b = Test_JavaResurrectFinalizable("b", a) -# wa = weakref.ref(a, callback) -# print ("wref: ")+str(wa()) -# gc.addJythonGCFlags(gc.VERBOSE_DELAYED) -# #gc.addJythonGCFlags(gc.FORCE_DELAYED_FINALIZATION) -# #gc.addJythonGCFlags(gc.FORCE_DELAYED_WEAKREF_CALLBACKS) -# print "delayed finalization? "+str(gc.delayedFinalizationEnabled()) -# print "delayed callbacks? "+str(gc.delayedWeakrefCallbacksEnabled()) -# print comments -# del a -# del b -# System.gc() -# time.sleep(1) -# print comments -# print resurrected -# print ("wref: ")+str(wa()) - + diff --git a/Lib/test/test_generators_jy.py b/Lib/test/test_generators_jy.py --- a/Lib/test/test_generators_jy.py +++ b/Lib/test/test_generators_jy.py @@ -1,7 +1,8 @@ from __future__ import generators import unittest +from test import test_support -# tests for deeply nested try/except/finally's +# tests for deeply nested try/except/finally class FinallyTests(unittest.TestCase): def gen1(self): @@ -168,5 +169,16 @@ self.assertEqual(genexp.gi_frame, None) self.assertRaises(StopIteration, genexp.next) + +def test_main(): + tests = ( + FinallyTests, + TryExceptTests, + TestThrowTestCase, + ) + test_support.run_unittest(*tests) + + if __name__ == "__main__": - unittest.main() + test_main() + diff --git a/Lib/test/test_socketserver_jython.py b/Lib/test/test_socketserver_jython.py --- a/Lib/test/test_socketserver_jython.py +++ b/Lib/test/test_socketserver_jython.py @@ -1,4 +1,5 @@ import unittest +from test import test_support import SocketServer class TestSocketServer(unittest.TestCase): @@ -12,5 +13,10 @@ server_host, server_port = server.server_address self.failIfEqual(server_port, 0, "System assigned ephemeral port should not be zero") + +def test_main(): + test_support.run_unittest(TestSocketServer) + + if __name__ == "__main__": - unittest.main() + test_main() diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -349,7 +349,7 @@ def test_main(): from test import test_xml_etree - test_support.run_doctest(test_xml_etree, verbosity=True) + test_support.run_doctest(test_xml_etree) if __name__ == '__main__': - test_main() + doctest.testmod() diff --git a/Lib/test/test_xml_etree_jy.py b/Lib/test/test_xml_etree_jy.py --- a/Lib/test/test_xml_etree_jy.py +++ b/Lib/test/test_xml_etree_jy.py @@ -4,6 +4,7 @@ JYTHON = sys.platform.startswith("java") import doctest +from test import test_support import xml.parsers.expat as expat from xml.etree.ElementTree import * @@ -781,5 +782,9 @@ >>> os.remove(test_support.TESTFN) """ +def test_main(): + from test import test_xml_etree_jy + test_support.run_doctest(test_xml_etree_jy) + if __name__ == "__main__": doctest.testmod() -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sun Sep 4 03:26:11 2016 From: jython-checkins at python.org (jeff.allen) Date: Sun, 04 Sep 2016 07:26:11 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Add_xml=2Eetree=2EElementTr?= =?utf-8?q?ee_from_Python_stdlib_ahead_of_fix_for_=232413?= Message-ID: <20160904072546.47340.86227.E3AD3163@psf.io> https://hg.python.org/jython/rev/5d2bfe0ca5c3 changeset: 7955:5d2bfe0ca5c3 user: Jeff Allen date: Fri Sep 02 20:26:23 2016 +0100 summary: Add xml.etree.ElementTree from Python stdlib ahead of fix for #2413 This initial add unchanged is so that the subsequent change may be seen in isolation. files: Lib/xml/etree/ElementTree.py | 1678 ++++++++++++++++++++++ 1 files changed, 1678 insertions(+), 0 deletions(-) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py new file mode 100644 --- /dev/null +++ b/Lib/xml/etree/ElementTree.py @@ -0,0 +1,1678 @@ +# +# ElementTree +# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ +# +# light-weight XML support for Python 2.3 and later. +# +# history (since 1.2.6): +# 2005-11-12 fl added tostringlist/fromstringlist helpers +# 2006-07-05 fl merged in selected changes from the 1.3 sandbox +# 2006-07-05 fl removed support for 2.1 and earlier +# 2007-06-21 fl added deprecation/future warnings +# 2007-08-25 fl added doctype hook, added parser version attribute etc +# 2007-08-26 fl added new serializer code (better namespace handling, etc) +# 2007-08-27 fl warn for broken /tag searches on tree level +# 2007-09-02 fl added html/text methods to serializer (experimental) +# 2007-09-05 fl added method argument to tostring/tostringlist +# 2007-09-06 fl improved error handling +# 2007-09-13 fl added itertext, iterfind; assorted cleanups +# 2007-12-15 fl added C14N hooks, copy method (experimental) +# +# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. +# +# fredrik at pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2008 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. + +__all__ = [ + # public symbols + "Comment", + "dump", + "Element", "ElementTree", + "fromstring", "fromstringlist", + "iselement", "iterparse", + "parse", "ParseError", + "PI", "ProcessingInstruction", + "QName", + "SubElement", + "tostring", "tostringlist", + "TreeBuilder", + "VERSION", + "XML", + "XMLParser", "XMLTreeBuilder", + ] + +VERSION = "1.3.0" + +## +# The Element type is a flexible container object, designed to +# store hierarchical data structures in memory. The type can be +# described as a cross between a list and a dictionary. +#

+# Each element has a number of properties associated with it: +#

    +#
  • a tag. This is a string identifying what kind of data +# this element represents (the element type, in other words).
  • +#
  • a number of attributes, stored in a Python dictionary.
  • +#
  • a text string.
  • +#
  • an optional tail string.
  • +#
  • a number of child elements, stored in a Python sequence
  • +#
+# +# To create an element instance, use the {@link #Element} constructor +# or the {@link #SubElement} factory function. +#

+# The {@link #ElementTree} class can be used to wrap an element +# structure, and convert it from and to XML. +## + +import sys +import re +import warnings + + +class _SimpleElementPath(object): + # emulate pre-1.2 find/findtext/findall behaviour + def find(self, element, tag, namespaces=None): + for elem in element: + if elem.tag == tag: + return elem + return None + def findtext(self, element, tag, default=None, namespaces=None): + elem = self.find(element, tag) + if elem is None: + return default + return elem.text or "" + def iterfind(self, element, tag, namespaces=None): + if tag[:3] == ".//": + for elem in element.iter(tag[3:]): + yield elem + for elem in element: + if elem.tag == tag: + yield elem + def findall(self, element, tag, namespaces=None): + return list(self.iterfind(element, tag, namespaces)) + +try: + from . import ElementPath +except ImportError: + ElementPath = _SimpleElementPath() + +## +# Parser error. This is a subclass of SyntaxError. +#

+# In addition to the exception value, an exception instance contains a +# specific exception code in the code attribute, and the line and +# column of the error in the position attribute. + +class ParseError(SyntaxError): + pass + +# -------------------------------------------------------------------- + +## +# Checks if an object appears to be a valid element object. +# +# @param An element instance. +# @return A true value if this is an element object. +# @defreturn flag + +def iselement(element): + # FIXME: not sure about this; might be a better idea to look + # for tag/attrib/text attributes + return isinstance(element, Element) or hasattr(element, "tag") + +## +# Element class. This class defines the Element interface, and +# provides a reference implementation of this interface. +#

+# The element name, attribute names, and attribute values can be +# either ASCII strings (ordinary Python strings containing only 7-bit +# ASCII characters) or Unicode strings. +# +# @param tag The element name. +# @param attrib An optional dictionary, containing element attributes. +# @param **extra Additional attributes, given as keyword arguments. +# @see Element +# @see SubElement +# @see Comment +# @see ProcessingInstruction + +class Element(object): + # text...tail + + ## + # (Attribute) Element tag. + + tag = None + + ## + # (Attribute) Element attribute dictionary. Where possible, use + # {@link #Element.get}, + # {@link #Element.set}, + # {@link #Element.keys}, and + # {@link #Element.items} to access + # element attributes. + + attrib = None + + ## + # (Attribute) Text before first subelement. This is either a + # string or the value None. Note that if there was no text, this + # attribute may be either None or an empty string, depending on + # the parser. + + text = None + + ## + # (Attribute) Text after this element's end tag, but before the + # next sibling element's start tag. This is either a string or + # the value None. Note that if there was no text, this attribute + # may be either None or an empty string, depending on the parser. + + tail = None # text after end tag, if any + + # constructor + + def __init__(self, tag, attrib={}, **extra): + attrib = attrib.copy() + attrib.update(extra) + self.tag = tag + self.attrib = attrib + self._children = [] + + def __repr__(self): + return "" % (repr(self.tag), id(self)) + + ## + # Creates a new element object of the same type as this element. + # + # @param tag Element tag. + # @param attrib Element attributes, given as a dictionary. + # @return A new element instance. + + def makeelement(self, tag, attrib): + return self.__class__(tag, attrib) + + ## + # (Experimental) Copies the current element. This creates a + # shallow copy; subelements will be shared with the original tree. + # + # @return A new element instance. + + def copy(self): + elem = self.makeelement(self.tag, self.attrib) + elem.text = self.text + elem.tail = self.tail + elem[:] = self + return elem + + ## + # Returns the number of subelements. Note that this only counts + # full elements; to check if there's any content in an element, you + # have to check both the length and the text attribute. + # + # @return The number of subelements. + + def __len__(self): + return len(self._children) + + def __nonzero__(self): + warnings.warn( + "The behavior of this method will change in future versions. " + "Use specific 'len(elem)' or 'elem is not None' test instead.", + FutureWarning, stacklevel=2 + ) + return len(self._children) != 0 # emulate old behaviour, for now + + ## + # Returns the given subelement, by index. + # + # @param index What subelement to return. + # @return The given subelement. + # @exception IndexError If the given element does not exist. + + def __getitem__(self, index): + return self._children[index] + + ## + # Replaces the given subelement, by index. + # + # @param index What subelement to replace. + # @param element The new element value. + # @exception IndexError If the given element does not exist. + + def __setitem__(self, index, element): + # if isinstance(index, slice): + # for elt in element: + # assert iselement(elt) + # else: + # assert iselement(element) + self._children[index] = element + + ## + # Deletes the given subelement, by index. + # + # @param index What subelement to delete. + # @exception IndexError If the given element does not exist. + + def __delitem__(self, index): + del self._children[index] + + ## + # Adds a subelement to the end of this element. In document order, + # the new element will appear after the last existing subelement (or + # directly after the text, if it's the first subelement), but before + # the end tag for this element. + # + # @param element The element to add. + + def append(self, element): + # assert iselement(element) + self._children.append(element) + + ## + # Appends subelements from a sequence. + # + # @param elements A sequence object with zero or more elements. + # @since 1.3 + + def extend(self, elements): + # for element in elements: + # assert iselement(element) + self._children.extend(elements) + + ## + # Inserts a subelement at the given position in this element. + # + # @param index Where to insert the new subelement. + + def insert(self, index, element): + # assert iselement(element) + self._children.insert(index, element) + + ## + # Removes a matching subelement. Unlike the find methods, + # this method compares elements based on identity, not on tag + # value or contents. To remove subelements by other means, the + # easiest way is often to use a list comprehension to select what + # elements to keep, and use slice assignment to update the parent + # element. + # + # @param element What element to remove. + # @exception ValueError If a matching element could not be found. + + def remove(self, element): + # assert iselement(element) + self._children.remove(element) + + ## + # (Deprecated) Returns all subelements. The elements are returned + # in document order. + # + # @return A list of subelements. + # @defreturn list of Element instances + + def getchildren(self): + warnings.warn( + "This method will be removed in future versions. " + "Use 'list(elem)' or iteration over elem instead.", + DeprecationWarning, stacklevel=2 + ) + return self._children + + ## + # Finds the first matching subelement, by tag name or path. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return The first matching element, or None if no element was found. + # @defreturn Element or None + + def find(self, path, namespaces=None): + return ElementPath.find(self, path, namespaces) + + ## + # Finds text for the first matching subelement, by tag name or path. + # + # @param path What element to look for. + # @param default What to return if the element was not found. + # @keyparam namespaces Optional namespace prefix map. + # @return The text content of the first matching element, or the + # default value no element was found. Note that if the element + # is found, but has no text content, this method returns an + # empty string. + # @defreturn string + + def findtext(self, path, default=None, namespaces=None): + return ElementPath.findtext(self, path, default, namespaces) + + ## + # Finds all matching subelements, by tag name or path. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return A list or other sequence containing all matching elements, + # in document order. + # @defreturn list of Element instances + + def findall(self, path, namespaces=None): + return ElementPath.findall(self, path, namespaces) + + ## + # Finds all matching subelements, by tag name or path. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return An iterator or sequence containing all matching elements, + # in document order. + # @defreturn a generated sequence of Element instances + + def iterfind(self, path, namespaces=None): + return ElementPath.iterfind(self, path, namespaces) + + ## + # Resets an element. This function removes all subelements, clears + # all attributes, and sets the text and tail attributes + # to None. + + def clear(self): + self.attrib.clear() + self._children = [] + self.text = self.tail = None + + ## + # Gets an element attribute. Equivalent to attrib.get, but + # some implementations may handle this a bit more efficiently. + # + # @param key What attribute to look for. + # @param default What to return if the attribute was not found. + # @return The attribute value, or the default value, if the + # attribute was not found. + # @defreturn string or None + + def get(self, key, default=None): + return self.attrib.get(key, default) + + ## + # Sets an element attribute. Equivalent to attrib[key] = value, + # but some implementations may handle this a bit more efficiently. + # + # @param key What attribute to set. + # @param value The attribute value. + + def set(self, key, value): + self.attrib[key] = value + + ## + # Gets a list of attribute names. The names are returned in an + # arbitrary order (just like for an ordinary Python dictionary). + # Equivalent to attrib.keys(). + # + # @return A list of element attribute names. + # @defreturn list of strings + + def keys(self): + return self.attrib.keys() + + ## + # Gets element attributes, as a sequence. The attributes are + # returned in an arbitrary order. Equivalent to attrib.items(). + # + # @return A list of (name, value) tuples for all attributes. + # @defreturn list of (string, string) tuples + + def items(self): + return self.attrib.items() + + ## + # Creates a tree iterator. The iterator loops over this element + # and all subelements, in document order, and returns all elements + # with a matching tag. + #

+ # If the tree structure is modified during iteration, new or removed + # elements may or may not be included. To get a stable set, use the + # list() function on the iterator, and loop over the resulting list. + # + # @param tag What tags to look for (default is to return all elements). + # @return An iterator containing all the matching elements. + # @defreturn iterator + + def iter(self, tag=None): + if tag == "*": + tag = None + if tag is None or self.tag == tag: + yield self + for e in self._children: + for e in e.iter(tag): + yield e + + # compatibility + def getiterator(self, tag=None): + # Change for a DeprecationWarning in 1.4 + warnings.warn( + "This method will be removed in future versions. " + "Use 'elem.iter()' or 'list(elem.iter())' instead.", + PendingDeprecationWarning, stacklevel=2 + ) + return list(self.iter(tag)) + + ## + # Creates a text iterator. The iterator loops over this element + # and all subelements, in document order, and returns all inner + # text. + # + # @return An iterator containing all inner text. + # @defreturn iterator + + def itertext(self): + tag = self.tag + if not isinstance(tag, basestring) and tag is not None: + return + if self.text: + yield self.text + for e in self: + for s in e.itertext(): + yield s + if e.tail: + yield e.tail + +# compatibility +_Element = _ElementInterface = Element + +## +# Subelement factory. This function creates an element instance, and +# appends it to an existing element. +#

+# The element name, attribute names, and attribute values can be +# either 8-bit ASCII strings or Unicode strings. +# +# @param parent The parent element. +# @param tag The subelement name. +# @param attrib An optional dictionary, containing element attributes. +# @param **extra Additional attributes, given as keyword arguments. +# @return An element instance. +# @defreturn Element + +def SubElement(parent, tag, attrib={}, **extra): + attrib = attrib.copy() + attrib.update(extra) + element = parent.makeelement(tag, attrib) + parent.append(element) + return element + +## +# Comment element factory. This factory function creates a special +# element that will be serialized as an XML comment by the standard +# serializer. +#

+# The comment string can be either an 8-bit ASCII string or a Unicode +# string. +# +# @param text A string containing the comment string. +# @return An element instance, representing a comment. +# @defreturn Element + +def Comment(text=None): + element = Element(Comment) + element.text = text + return element + +## +# PI element factory. This factory function creates a special element +# that will be serialized as an XML processing instruction by the standard +# serializer. +# +# @param target A string containing the PI target. +# @param text A string containing the PI contents, if any. +# @return An element instance, representing a PI. +# @defreturn Element + +def ProcessingInstruction(target, text=None): + element = Element(ProcessingInstruction) + element.text = target + if text: + element.text = element.text + " " + text + return element + +PI = ProcessingInstruction + +## +# QName wrapper. This can be used to wrap a QName attribute value, in +# order to get proper namespace handling on output. +# +# @param text A string containing the QName value, in the form {uri}local, +# or, if the tag argument is given, the URI part of a QName. +# @param tag Optional tag. If given, the first argument is interpreted as +# an URI, and this argument is interpreted as a local name. +# @return An opaque object, representing the QName. + +class QName(object): + def __init__(self, text_or_uri, tag=None): + if tag: + text_or_uri = "{%s}%s" % (text_or_uri, tag) + self.text = text_or_uri + def __str__(self): + return self.text + def __hash__(self): + return hash(self.text) + def __cmp__(self, other): + if isinstance(other, QName): + return cmp(self.text, other.text) + return cmp(self.text, other) + +# -------------------------------------------------------------------- + +## +# ElementTree wrapper class. This class represents an entire element +# hierarchy, and adds some extra support for serialization to and from +# standard XML. +# +# @param element Optional root element. +# @keyparam file Optional file handle or file name. If given, the +# tree is initialized with the contents of this XML file. + +class ElementTree(object): + + def __init__(self, element=None, file=None): + # assert element is None or iselement(element) + self._root = element # first node + if file: + self.parse(file) + + ## + # Gets the root element for this tree. + # + # @return An element instance. + # @defreturn Element + + def getroot(self): + return self._root + + ## + # Replaces the root element for this tree. This discards the + # current contents of the tree, and replaces it with the given + # element. Use with care. + # + # @param element An element instance. + + def _setroot(self, element): + # assert iselement(element) + self._root = element + + ## + # Loads an external XML document into this element tree. + # + # @param source A file name or file object. If a file object is + # given, it only has to implement a read(n) method. + # @keyparam parser An optional parser instance. If not given, the + # standard {@link XMLParser} parser is used. + # @return The document root element. + # @defreturn Element + # @exception ParseError If the parser fails to parse the document. + + def parse(self, source, parser=None): + close_source = False + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + try: + if not parser: + parser = XMLParser(target=TreeBuilder()) + while 1: + data = source.read(65536) + if not data: + break + parser.feed(data) + self._root = parser.close() + return self._root + finally: + if close_source: + source.close() + + ## + # Creates a tree iterator for the root element. The iterator loops + # over all elements in this tree, in document order. + # + # @param tag What tags to look for (default is to return all elements) + # @return An iterator. + # @defreturn iterator + + def iter(self, tag=None): + # assert self._root is not None + return self._root.iter(tag) + + # compatibility + def getiterator(self, tag=None): + # Change for a DeprecationWarning in 1.4 + warnings.warn( + "This method will be removed in future versions. " + "Use 'tree.iter()' or 'list(tree.iter())' instead.", + PendingDeprecationWarning, stacklevel=2 + ) + return list(self.iter(tag)) + + ## + # Same as getroot().find(path), starting at the root of the + # tree. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return The first matching element, or None if no element was found. + # @defreturn Element or None + + def find(self, path, namespaces=None): + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.find(path, namespaces) + + ## + # Same as getroot().findtext(path), starting at the root of the tree. + # + # @param path What element to look for. + # @param default What to return if the element was not found. + # @keyparam namespaces Optional namespace prefix map. + # @return The text content of the first matching element, or the + # default value no element was found. Note that if the element + # is found, but has no text content, this method returns an + # empty string. + # @defreturn string + + def findtext(self, path, default=None, namespaces=None): + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.findtext(path, default, namespaces) + + ## + # Same as getroot().findall(path), starting at the root of the tree. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return A list or iterator containing all matching elements, + # in document order. + # @defreturn list of Element instances + + def findall(self, path, namespaces=None): + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.findall(path, namespaces) + + ## + # Finds all matching subelements, by tag name or path. + # Same as getroot().iterfind(path). + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return An iterator or sequence containing all matching elements, + # in document order. + # @defreturn a generated sequence of Element instances + + def iterfind(self, path, namespaces=None): + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.iterfind(path, namespaces) + + ## + # Writes the element tree to a file, as XML. + # + # @def write(file, **options) + # @param file A file name, or a file object opened for writing. + # @param **options Options, given as keyword arguments. + # @keyparam encoding Optional output encoding (default is US-ASCII). + # @keyparam xml_declaration Controls if an XML declaration should + # be added to the file. Use False for never, True for always, + # None for only if not US-ASCII or UTF-8. None is default. + # @keyparam default_namespace Sets the default XML namespace (for "xmlns"). + # @keyparam method Optional output method ("xml", "html", "text" or + # "c14n"; default is "xml"). + + def write(self, file_or_filename, + # keyword arguments + encoding=None, + xml_declaration=None, + default_namespace=None, + method=None): + # assert self._root is not None + if not method: + method = "xml" + elif method not in _serialize: + # FIXME: raise an ImportError for c14n if ElementC14N is missing? + raise ValueError("unknown method %r" % method) + if hasattr(file_or_filename, "write"): + file = file_or_filename + else: + file = open(file_or_filename, "wb") + write = file.write + if not encoding: + if method == "c14n": + encoding = "utf-8" + else: + encoding = "us-ascii" + elif xml_declaration or (xml_declaration is None and + encoding not in ("utf-8", "us-ascii")): + if method == "xml": + write("\n" % encoding) + if method == "text": + _serialize_text(write, self._root, encoding) + else: + qnames, namespaces = _namespaces( + self._root, encoding, default_namespace + ) + serialize = _serialize[method] + serialize(write, self._root, encoding, qnames, namespaces) + if file_or_filename is not file: + file.close() + + def write_c14n(self, file): + # lxml.etree compatibility. use output method instead + return self.write(file, method="c14n") + +# -------------------------------------------------------------------- +# serialization support + +def _namespaces(elem, encoding, default_namespace=None): + # identify namespaces used in this tree + + # maps qnames to *encoded* prefix:local names + qnames = {None: None} + + # maps uri:s to prefixes + namespaces = {} + if default_namespace: + namespaces[default_namespace] = "" + + def encode(text): + return text.encode(encoding) + + def add_qname(qname): + # calculate serialized qname representation + try: + if qname[:1] == "{": + uri, tag = qname[1:].rsplit("}", 1) + prefix = namespaces.get(uri) + if prefix is None: + prefix = _namespace_map.get(uri) + if prefix is None: + prefix = "ns%d" % len(namespaces) + if prefix != "xml": + namespaces[uri] = prefix + if prefix: + qnames[qname] = encode("%s:%s" % (prefix, tag)) + else: + qnames[qname] = encode(tag) # default element + else: + if default_namespace: + # FIXME: can this be handled in XML 1.0? + raise ValueError( + "cannot use non-qualified names with " + "default_namespace option" + ) + qnames[qname] = encode(qname) + except TypeError: + _raise_serialization_error(qname) + + # populate qname and namespaces table + try: + iterate = elem.iter + except AttributeError: + iterate = elem.getiterator # cET compatibility + for elem in iterate(): + tag = elem.tag + if isinstance(tag, QName): + if tag.text not in qnames: + add_qname(tag.text) + elif isinstance(tag, basestring): + if tag not in qnames: + add_qname(tag) + elif tag is not None and tag is not Comment and tag is not PI: + _raise_serialization_error(tag) + for key, value in elem.items(): + if isinstance(key, QName): + key = key.text + if key not in qnames: + add_qname(key) + if isinstance(value, QName) and value.text not in qnames: + add_qname(value.text) + text = elem.text + if isinstance(text, QName) and text.text not in qnames: + add_qname(text.text) + return qnames, namespaces + +def _serialize_xml(write, elem, encoding, qnames, namespaces): + tag = elem.tag + text = elem.text + if tag is Comment: + write("" % _encode(text, encoding)) + elif tag is ProcessingInstruction: + write("" % _encode(text, encoding)) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_xml(write, e, encoding, qnames, None) + else: + write("<" + tag) + items = elem.items() + if items or namespaces: + if namespaces: + for v, k in sorted(namespaces.items(), + key=lambda x: x[1]): # sort on prefix + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % ( + k.encode(encoding), + _escape_attrib(v, encoding) + )) + for k, v in sorted(items): # lexical order + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib(v, encoding) + write(" %s=\"%s\"" % (qnames[k], v)) + if text or len(elem): + write(">") + if text: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_xml(write, e, encoding, qnames, None) + write("") + else: + write(" />") + if elem.tail: + write(_escape_cdata(elem.tail, encoding)) + +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", + "img", "input", "isindex", "link", "meta", "param") + +try: + HTML_EMPTY = set(HTML_EMPTY) +except NameError: + pass + +def _serialize_html(write, elem, encoding, qnames, namespaces): + tag = elem.tag + text = elem.text + if tag is Comment: + write("" % _escape_cdata(text, encoding)) + elif tag is ProcessingInstruction: + write("" % _escape_cdata(text, encoding)) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_html(write, e, encoding, qnames, None) + else: + write("<" + tag) + items = elem.items() + if items or namespaces: + if namespaces: + for v, k in sorted(namespaces.items(), + key=lambda x: x[1]): # sort on prefix + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % ( + k.encode(encoding), + _escape_attrib(v, encoding) + )) + for k, v in sorted(items): # lexical order + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib_html(v, encoding) + # FIXME: handle boolean attributes + write(" %s=\"%s\"" % (qnames[k], v)) + write(">") + ltag = tag.lower() + if text: + if ltag == "script" or ltag == "style": + write(_encode(text, encoding)) + else: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_html(write, e, encoding, qnames, None) + if ltag not in HTML_EMPTY: + write("") + if elem.tail: + write(_escape_cdata(elem.tail, encoding)) + +def _serialize_text(write, elem, encoding): + for part in elem.itertext(): + write(part.encode(encoding)) + if elem.tail: + write(elem.tail.encode(encoding)) + +_serialize = { + "xml": _serialize_xml, + "html": _serialize_html, + "text": _serialize_text, +# this optional method is imported at the end of the module +# "c14n": _serialize_c14n, +} + +## +# Registers a namespace prefix. The registry is global, and any +# existing mapping for either the given prefix or the namespace URI +# will be removed. +# +# @param prefix Namespace prefix. +# @param uri Namespace uri. Tags and attributes in this namespace +# will be serialized with the given prefix, if at all possible. +# @exception ValueError If the prefix is reserved, or is otherwise +# invalid. + +def register_namespace(prefix, uri): + if re.match("ns\d+$", prefix): + raise ValueError("Prefix format reserved for internal use") + for k, v in _namespace_map.items(): + if k == uri or v == prefix: + del _namespace_map[k] + _namespace_map[uri] = prefix + +_namespace_map = { + # "well-known" namespace prefixes + "http://www.w3.org/XML/1998/namespace": "xml", + "http://www.w3.org/1999/xhtml": "html", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://schemas.xmlsoap.org/wsdl/": "wsdl", + # xml schema + "http://www.w3.org/2001/XMLSchema": "xs", + "http://www.w3.org/2001/XMLSchema-instance": "xsi", + # dublin core + "http://purl.org/dc/elements/1.1/": "dc", +} + +def _raise_serialization_error(text): + raise TypeError( + "cannot serialize %r (type %s)" % (text, type(text).__name__) + ) + +def _encode(text, encoding): + try: + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_cdata(text, encoding): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib(text, encoding): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + if "\n" in text: + text = text.replace("\n", " ") + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib_html(text, encoding): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +# -------------------------------------------------------------------- + +## +# Generates a string representation of an XML element, including all +# subelements. +# +# @param element An Element instance. +# @keyparam encoding Optional output encoding (default is US-ASCII). +# @keyparam method Optional output method ("xml", "html", "text" or +# "c14n"; default is "xml"). +# @return An encoded string containing the XML data. +# @defreturn string + +def tostring(element, encoding=None, method=None): + class dummy: + pass + data = [] + file = dummy() + file.write = data.append + ElementTree(element).write(file, encoding, method=method) + return "".join(data) + +## +# Generates a string representation of an XML element, including all +# subelements. The string is returned as a sequence of string fragments. +# +# @param element An Element instance. +# @keyparam encoding Optional output encoding (default is US-ASCII). +# @keyparam method Optional output method ("xml", "html", "text" or +# "c14n"; default is "xml"). +# @return A sequence object containing the XML data. +# @defreturn sequence +# @since 1.3 + +def tostringlist(element, encoding=None, method=None): + class dummy: + pass + data = [] + file = dummy() + file.write = data.append + ElementTree(element).write(file, encoding, method=method) + # FIXME: merge small fragments into larger parts + return data + +## +# Writes an element tree or element structure to sys.stdout. This +# function should be used for debugging only. +#

+# The exact output format is implementation dependent. In this +# version, it's written as an ordinary XML file. +# +# @param elem An element tree or an individual element. + +def dump(elem): + # debugging + if not isinstance(elem, ElementTree): + elem = ElementTree(elem) + elem.write(sys.stdout) + tail = elem.getroot().tail + if not tail or tail[-1] != "\n": + sys.stdout.write("\n") + +# -------------------------------------------------------------------- +# parsing + +## +# Parses an XML document into an element tree. +# +# @param source A filename or file object containing XML data. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return An ElementTree instance + +def parse(source, parser=None): + tree = ElementTree() + tree.parse(source, parser) + return tree + +## +# Parses an XML document into an element tree incrementally, and reports +# what's going on to the user. +# +# @param source A filename or file object containing XML data. +# @param events A list of events to report back. If omitted, only "end" +# events are reported. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return A (event, elem) iterator. + +def iterparse(source, events=None, parser=None): + close_source = False + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + try: + if not parser: + parser = XMLParser(target=TreeBuilder()) + return _IterParseIterator(source, events, parser, close_source) + except: + if close_source: + source.close() + raise + +class _IterParseIterator(object): + + def __init__(self, source, events, parser, close_source=False): + self._file = source + self._close_file = close_source + self._events = [] + self._index = 0 + self._error = None + self.root = self._root = None + self._parser = parser + # wire up the parser for event reporting + parser = self._parser._parser + append = self._events.append + if events is None: + events = ["end"] + for event in events: + if event == "start": + try: + parser.ordered_attributes = 1 + parser.specified_attributes = 1 + def handler(tag, attrib_in, event=event, append=append, + start=self._parser._start_list): + append((event, start(tag, attrib_in))) + parser.StartElementHandler = handler + except AttributeError: + def handler(tag, attrib_in, event=event, append=append, + start=self._parser._start): + append((event, start(tag, attrib_in))) + parser.StartElementHandler = handler + elif event == "end": + def handler(tag, event=event, append=append, + end=self._parser._end): + append((event, end(tag))) + parser.EndElementHandler = handler + elif event == "start-ns": + def handler(prefix, uri, event=event, append=append): + try: + uri = (uri or "").encode("ascii") + except UnicodeError: + pass + append((event, (prefix or "", uri or ""))) + parser.StartNamespaceDeclHandler = handler + elif event == "end-ns": + def handler(prefix, event=event, append=append): + append((event, None)) + parser.EndNamespaceDeclHandler = handler + else: + raise ValueError("unknown event %r" % event) + + def next(self): + try: + while 1: + try: + item = self._events[self._index] + self._index += 1 + return item + except IndexError: + pass + if self._error: + e = self._error + self._error = None + raise e + if self._parser is None: + self.root = self._root + break + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + try: + self._parser.feed(data) + except SyntaxError as exc: + self._error = exc + else: + self._root = self._parser.close() + self._parser = None + except: + if self._close_file: + self._file.close() + raise + if self._close_file: + self._file.close() + raise StopIteration + + def __iter__(self): + return self + +## +# Parses an XML document from a string constant. This function can +# be used to embed "XML literals" in Python code. +# +# @param source A string containing XML data. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return An Element instance. +# @defreturn Element + +def XML(text, parser=None): + if not parser: + parser = XMLParser(target=TreeBuilder()) + parser.feed(text) + return parser.close() + +## +# Parses an XML document from a string constant, and also returns +# a dictionary which maps from element id:s to elements. +# +# @param source A string containing XML data. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return A tuple containing an Element instance and a dictionary. +# @defreturn (Element, dictionary) + +def XMLID(text, parser=None): + if not parser: + parser = XMLParser(target=TreeBuilder()) + parser.feed(text) + tree = parser.close() + ids = {} + for elem in tree.iter(): + id = elem.get("id") + if id: + ids[id] = elem + return tree, ids + +## +# Parses an XML document from a string constant. Same as {@link #XML}. +# +# @def fromstring(text) +# @param source A string containing XML data. +# @return An Element instance. +# @defreturn Element + +fromstring = XML + +## +# Parses an XML document from a sequence of string fragments. +# +# @param sequence A list or other sequence containing XML data fragments. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return An Element instance. +# @defreturn Element +# @since 1.3 + +def fromstringlist(sequence, parser=None): + if not parser: + parser = XMLParser(target=TreeBuilder()) + for text in sequence: + parser.feed(text) + return parser.close() + +# -------------------------------------------------------------------- + +## +# Generic element structure builder. This builder converts a sequence +# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link +# #TreeBuilder.end} method calls to a well-formed element structure. +#

+# You can use this class to build an element structure using a custom XML +# parser, or a parser for some other XML-like format. +# +# @param element_factory Optional element factory. This factory +# is called to create new Element instances, as necessary. + +class TreeBuilder(object): + + def __init__(self, element_factory=None): + self._data = [] # data collector + self._elem = [] # element stack + self._last = None # last element + self._tail = None # true if we're after an end tag + if element_factory is None: + element_factory = Element + self._factory = element_factory + + ## + # Flushes the builder buffers, and returns the toplevel document + # element. + # + # @return An Element instance. + # @defreturn Element + + def close(self): + assert len(self._elem) == 0, "missing end tags" + assert self._last is not None, "missing toplevel element" + return self._last + + def _flush(self): + if self._data: + if self._last is not None: + text = "".join(self._data) + if self._tail: + assert self._last.tail is None, "internal error (tail)" + self._last.tail = text + else: + assert self._last.text is None, "internal error (text)" + self._last.text = text + self._data = [] + + ## + # Adds text to the current element. + # + # @param data A string. This should be either an 8-bit string + # containing ASCII text, or a Unicode string. + + def data(self, data): + self._data.append(data) + + ## + # Opens a new element. + # + # @param tag The element name. + # @param attrib A dictionary containing element attributes. + # @return The opened element. + # @defreturn Element + + def start(self, tag, attrs): + self._flush() + self._last = elem = self._factory(tag, attrs) + if self._elem: + self._elem[-1].append(elem) + self._elem.append(elem) + self._tail = 0 + return elem + + ## + # Closes the current element. + # + # @param tag The element name. + # @return The closed element. + # @defreturn Element + + def end(self, tag): + self._flush() + self._last = self._elem.pop() + assert self._last.tag == tag,\ + "end tag mismatch (expected %s, got %s)" % ( + self._last.tag, tag) + self._tail = 1 + return self._last + +## +# Element structure builder for XML source data, based on the +# expat parser. +# +# @keyparam target Target object. If omitted, the builder uses an +# instance of the standard {@link #TreeBuilder} class. +# @keyparam html Predefine HTML entities. This flag is not supported +# by the current implementation. +# @keyparam encoding Optional encoding. If given, the value overrides +# the encoding specified in the XML file. +# @see #ElementTree +# @see #TreeBuilder + +class XMLParser(object): + + def __init__(self, html=0, target=None, encoding=None): + try: + from xml.parsers import expat + except ImportError: + try: + import pyexpat as expat + except ImportError: + raise ImportError( + "No module named expat; use SimpleXMLTreeBuilder instead" + ) + parser = expat.ParserCreate(encoding, "}") + if target is None: + target = TreeBuilder() + # underscored names are provided for compatibility only + self.parser = self._parser = parser + self.target = self._target = target + self._error = expat.error + self._names = {} # name memo cache + # callbacks + parser.DefaultHandlerExpand = self._default + parser.StartElementHandler = self._start + parser.EndElementHandler = self._end + parser.CharacterDataHandler = self._data + # optional callbacks + parser.CommentHandler = self._comment + parser.ProcessingInstructionHandler = self._pi + # let expat do the buffering, if supported + try: + self._parser.buffer_text = 1 + except AttributeError: + pass + # use new-style attribute handling, if supported + try: + self._parser.ordered_attributes = 1 + self._parser.specified_attributes = 1 + parser.StartElementHandler = self._start_list + except AttributeError: + pass + self._doctype = None + self.entity = {} + try: + self.version = "Expat %d.%d.%d" % expat.version_info + except AttributeError: + pass # unknown + + def _raiseerror(self, value): + err = ParseError(value) + err.code = value.code + err.position = value.lineno, value.offset + raise err + + def _fixtext(self, text): + # convert text string to ascii, if possible + try: + return text.encode("ascii") + except UnicodeError: + return text + + def _fixname(self, key): + # expand qname, and convert name string to ascii, if possible + try: + name = self._names[key] + except KeyError: + name = key + if "}" in name: + name = "{" + name + self._names[key] = name = self._fixtext(name) + return name + + def _start(self, tag, attrib_in): + fixname = self._fixname + fixtext = self._fixtext + tag = fixname(tag) + attrib = {} + for key, value in attrib_in.items(): + attrib[fixname(key)] = fixtext(value) + return self.target.start(tag, attrib) + + def _start_list(self, tag, attrib_in): + fixname = self._fixname + fixtext = self._fixtext + tag = fixname(tag) + attrib = {} + if attrib_in: + for i in range(0, len(attrib_in), 2): + attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) + return self.target.start(tag, attrib) + + def _data(self, text): + return self.target.data(self._fixtext(text)) + + def _end(self, tag): + return self.target.end(self._fixname(tag)) + + def _comment(self, data): + try: + comment = self.target.comment + except AttributeError: + pass + else: + return comment(self._fixtext(data)) + + def _pi(self, target, data): + try: + pi = self.target.pi + except AttributeError: + pass + else: + return pi(self._fixtext(target), self._fixtext(data)) + + def _default(self, text): + prefix = text[:1] + if prefix == "&": + # deal with undefined entities + try: + self.target.data(self.entity[text[1:-1]]) + except KeyError: + from xml.parsers import expat + err = expat.error( + "undefined entity %s: line %d, column %d" % + (text, self._parser.ErrorLineNumber, + self._parser.ErrorColumnNumber) + ) + err.code = 11 # XML_ERROR_UNDEFINED_ENTITY + err.lineno = self._parser.ErrorLineNumber + err.offset = self._parser.ErrorColumnNumber + raise err + elif prefix == "<" and text[:9] == "": + self._doctype = None + return + text = text.strip() + if not text: + return + self._doctype.append(text) + n = len(self._doctype) + if n > 2: + type = self._doctype[1] + if type == "PUBLIC" and n == 4: + name, type, pubid, system = self._doctype + elif type == "SYSTEM" and n == 3: + name, type, system = self._doctype + pubid = None + else: + return + if pubid: + pubid = pubid[1:-1] + if hasattr(self.target, "doctype"): + self.target.doctype(name, pubid, system[1:-1]) + elif self.doctype is not self._XMLParser__doctype: + # warn about deprecated call + self._XMLParser__doctype(name, pubid, system[1:-1]) + self.doctype(name, pubid, system[1:-1]) + self._doctype = None + + ## + # (Deprecated) Handles a doctype declaration. + # + # @param name Doctype name. + # @param pubid Public identifier. + # @param system System identifier. + + def doctype(self, name, pubid, system): + """This method of XMLParser is deprecated.""" + warnings.warn( + "This method of XMLParser is deprecated. Define doctype() " + "method on the TreeBuilder target.", + DeprecationWarning, + ) + + # sentinel, if doctype is redefined in a subclass + __doctype = doctype + + ## + # Feeds data to the parser. + # + # @param data Encoded data. + + def feed(self, data): + try: + self._parser.Parse(data, 0) + except self._error, v: + self._raiseerror(v) + + ## + # Finishes feeding data to the parser. + # + # @return An element structure. + # @defreturn Element + + def close(self): + try: + self._parser.Parse("", 1) # end of data + except self._error, v: + self._raiseerror(v) + tree = self.target.close() + del self.target, self._parser # get rid of circular references + return tree + +# compatibility +XMLTreeBuilder = XMLParser + +# workaround circular import. +try: + from ElementC14N import _serialize_c14n + _serialize["c14n"] = _serialize_c14n +except ImportError: + pass -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sun Sep 4 03:26:11 2016 From: jython-checkins at python.org (jeff.allen) Date: Sun, 04 Sep 2016 07:26:11 +0000 Subject: [Jython-checkins] =?utf-8?q?jython_=28merge_default_-=3E_default?= =?utf-8?q?=29=3A_Merge_a_couple_of_fixes_to_trunk=2E_=28NEWS_merged_by_ha?= =?utf-8?q?nd=2E=29?= Message-ID: <20160904072547.68747.52300.3E1CD8D6@psf.io> https://hg.python.org/jython/rev/9429a467f6b3 changeset: 7959:9429a467f6b3 parent: 7953:63c3d1b61721 parent: 7958:3de87d0fa0d5 user: Jeff Allen date: Sun Sep 04 07:42:54 2016 +0100 summary: Merge a couple of fixes to trunk. (NEWS merged by hand.) files: Lib/test/regrtest.py | 1 + Lib/test/test_SimpleXMLRPCServer.py | 9 +- Lib/test/test_dict_jy.py | 2 + Lib/test/test_gc_jy.py | 70 +- Lib/test/test_generators_jy.py | 16 +- Lib/test/test_socketserver_jython.py | 39 +- Lib/test/test_xml_etree.py | 4 +- Lib/test/test_xml_etree_jy.py | 21 +- Lib/xml/etree/ElementTree.py | 1680 +++++++++++++ NEWS | 2 + src/org/python/core/JavaProxyMap.java | 36 + 11 files changed, 1801 insertions(+), 79 deletions(-) diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -1309,6 +1309,7 @@ test_stringprep # UnicodeDecodeError test_threadsignals test_transformer + test_xml_etree_jy test_zipimport # fails on Windows standalone, probably shouldn't diff --git a/Lib/test/test_SimpleXMLRPCServer.py b/Lib/test/test_SimpleXMLRPCServer.py --- a/Lib/test/test_SimpleXMLRPCServer.py +++ b/Lib/test/test_SimpleXMLRPCServer.py @@ -4,6 +4,7 @@ from SimpleXMLRPCServer import SimpleXMLRPCServer import threading, xmlrpclib, unittest +from test import test_support HOST = "127.0.0.1" PORT = 7218 @@ -82,7 +83,9 @@ self.assertEqual(client.squared(10), 100) +def test_main(): + test_support.run_unittest(SimpleXMLRPCServerTestCase) + + if __name__ == "__main__": - unittest.main() - -# vim:et:ts=4:sw=4: + test_main() diff --git a/Lib/test/test_dict_jy.py b/Lib/test/test_dict_jy.py --- a/Lib/test/test_dict_jy.py +++ b/Lib/test/test_dict_jy.py @@ -137,6 +137,8 @@ x['a'] = 1 x[(1, 2)] = 'xyz' self.assertEqual({tup for tup in x.iteritems()}, {('a', 1), ((1, 2), 'xyz')}) + self.assertEqual({tup for tup in x.itervalues()}, {1, 'xyz'}) + self.assertEqual({tup for tup in x.iterkeys()}, {'a', (1, 2)}) self.assertEqual(str(x), repr(x)) self.assertEqual(type(str(x)), type(repr(x))) diff --git a/Lib/test/test_gc_jy.py b/Lib/test/test_gc_jy.py --- a/Lib/test/test_gc_jy.py +++ b/Lib/test/test_gc_jy.py @@ -782,6 +782,7 @@ gc.removeJythonGCFlags(gc.FORCE_DELAYED_WEAKREF_CALLBACKS) + at unittest.skipIf(__name__ != "__main__", 'Hangs under regrtest') class GCTests_Jy_Monitoring(unittest.TestCase): @classmethod @@ -1157,59 +1158,20 @@ self.assertTrue(ref == ref) +def test_main(): + tests = ( + GCTests_Jy_CyclicGarbage, + GCTests_Jy_preprocess_and_postprocess, + GCTests_Jy_Delayed_Finalization, + GCTests_Jy_Forced_Delayed_Finalization, + GCTests_Jy_Raw_Forced_Delayed_Finalization, + GCTests_Jy_Monitoring, + GCTests_Jy_Weakref, + GCTests_Jy_TraverseByReflection, + GCTests_Misc, + ) + test_support.run_unittest(*tests) + if __name__ == "__main__": unittest.main() -# comments = [] -# resurrected = [] -# -# class Test_JavaResurrectFinalizable(Object): -# def __init__(self, name, toResurrect): -# self.name = name -# self.toResurrect = toResurrect -# -# def __repr__(self): -# return "<"+self.name+">" -# -# #def __del__(self): -# def finalize(self): -# gc.notifyPreFinalization() -# comments.append("del "+self.name) -# #gc.abortDelayedFinalization(self.toAbort) -# resurrected.append(self.toResurrect) -# print "finalize "+self.name -# # We manually restore weak references: -# gc.restoreWeakReferences(self.toResurrect) -# gc.notifyPostFinalization() -# -# class Test_Finalizable(object): -# def __init__(self, name): -# self.name = name -# -# def __repr__(self): -# return "<"+self.name+">" -# -# def __del__(self): -# comments.append("del "+self.name) -# -# def callback(obj): -# comments.append("callback")#+str(obj)) -# print "callback: "+str(obj) -# -# a = Test_Finalizable("a") -# b = Test_JavaResurrectFinalizable("b", a) -# wa = weakref.ref(a, callback) -# print ("wref: ")+str(wa()) -# gc.addJythonGCFlags(gc.VERBOSE_DELAYED) -# #gc.addJythonGCFlags(gc.FORCE_DELAYED_FINALIZATION) -# #gc.addJythonGCFlags(gc.FORCE_DELAYED_WEAKREF_CALLBACKS) -# print "delayed finalization? "+str(gc.delayedFinalizationEnabled()) -# print "delayed callbacks? "+str(gc.delayedWeakrefCallbacksEnabled()) -# print comments -# del a -# del b -# System.gc() -# time.sleep(1) -# print comments -# print resurrected -# print ("wref: ")+str(wa()) - + diff --git a/Lib/test/test_generators_jy.py b/Lib/test/test_generators_jy.py --- a/Lib/test/test_generators_jy.py +++ b/Lib/test/test_generators_jy.py @@ -1,7 +1,8 @@ from __future__ import generators import unittest +from test import test_support -# tests for deeply nested try/except/finally's +# tests for deeply nested try/except/finally class FinallyTests(unittest.TestCase): def gen1(self): @@ -168,5 +169,16 @@ self.assertEqual(genexp.gi_frame, None) self.assertRaises(StopIteration, genexp.next) + +def test_main(): + tests = ( + FinallyTests, + TryExceptTests, + TestThrowTestCase, + ) + test_support.run_unittest(*tests) + + if __name__ == "__main__": - unittest.main() + test_main() + diff --git a/Lib/test/test_socketserver_jython.py b/Lib/test/test_socketserver_jython.py --- a/Lib/test/test_socketserver_jython.py +++ b/Lib/test/test_socketserver_jython.py @@ -1,17 +1,22 @@ -# -*- coding: windows-1252 -*- - -import unittest - -import SocketServer - -class TestSocketServer(unittest.TestCase): - - def testEphemeralPort(self): - """ Test that an ephemeral port is set correctly """ - host, port = "localhost", 0 # If we specify 0, system should pick an emphemeral port - server = SocketServer.TCPServer( (host, port), None) # Request handler never instantiated - server_host, server_port = server.server_address - self.failIfEqual(server_port, 0, "System assigned ephemeral port should not be zero") - -if __name__ == "__main__": - unittest.main() +import unittest +from test import test_support +import SocketServer + +class TestSocketServer(unittest.TestCase): + + def testEphemeralPort(self): + """ Test that an ephemeral port is set correctly """ + # If we specify 0, system should pick an emphemeral port + host, port = "localhost", 0 + # Request handler never instantiated + server = SocketServer.TCPServer( (host, port), None) + server_host, server_port = server.server_address + self.failIfEqual(server_port, 0, "System assigned ephemeral port should not be zero") + + +def test_main(): + test_support.run_unittest(TestSocketServer) + + +if __name__ == "__main__": + test_main() diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -349,7 +349,7 @@ def test_main(): from test import test_xml_etree - test_support.run_doctest(test_xml_etree, verbosity=True) + test_support.run_doctest(test_xml_etree) if __name__ == '__main__': - test_main() + doctest.testmod() diff --git a/Lib/test/test_xml_etree_jy.py b/Lib/test/test_xml_etree_jy.py --- a/Lib/test/test_xml_etree_jy.py +++ b/Lib/test/test_xml_etree_jy.py @@ -4,6 +4,7 @@ JYTHON = sys.platform.startswith("java") import doctest +from test import test_support import xml.parsers.expat as expat from xml.etree.ElementTree import * @@ -750,7 +751,7 @@ Entity name: entity """ -def test_close_files(): +def test_close_file_iss1479(): # http://bugs.jython.org/issue1479 """ >>> import os @@ -767,5 +768,23 @@ >>> os.remove(test_support.TESTFN) """ +def test_close_file_iss2413(): + # http://bugs.jython.org/issue2413 + """ + >>> import os + >>> from test import test_support + >>> from xml.etree import ElementTree as ET + + >>> tree = ET.ElementTree(ET.XML('')) + >>> tree.write(test_support.TESTFN, encoding='an_unknown_encoding') + Traceback (most recent call last): + LookupError: unknown encoding 'an_unknown_encoding' + >>> os.remove(test_support.TESTFN) + """ + +def test_main(): + from test import test_xml_etree_jy + test_support.run_doctest(test_xml_etree_jy) + if __name__ == "__main__": doctest.testmod() diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py new file mode 100644 --- /dev/null +++ b/Lib/xml/etree/ElementTree.py @@ -0,0 +1,1680 @@ +# +# ElementTree +# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ +# +# light-weight XML support for Python 2.3 and later. +# +# history (since 1.2.6): +# 2005-11-12 fl added tostringlist/fromstringlist helpers +# 2006-07-05 fl merged in selected changes from the 1.3 sandbox +# 2006-07-05 fl removed support for 2.1 and earlier +# 2007-06-21 fl added deprecation/future warnings +# 2007-08-25 fl added doctype hook, added parser version attribute etc +# 2007-08-26 fl added new serializer code (better namespace handling, etc) +# 2007-08-27 fl warn for broken /tag searches on tree level +# 2007-09-02 fl added html/text methods to serializer (experimental) +# 2007-09-05 fl added method argument to tostring/tostringlist +# 2007-09-06 fl improved error handling +# 2007-09-13 fl added itertext, iterfind; assorted cleanups +# 2007-12-15 fl added C14N hooks, copy method (experimental) +# +# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. +# +# fredrik at pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2008 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. + +__all__ = [ + # public symbols + "Comment", + "dump", + "Element", "ElementTree", + "fromstring", "fromstringlist", + "iselement", "iterparse", + "parse", "ParseError", + "PI", "ProcessingInstruction", + "QName", + "SubElement", + "tostring", "tostringlist", + "TreeBuilder", + "VERSION", + "XML", + "XMLParser", "XMLTreeBuilder", + ] + +VERSION = "1.3.0" + +## +# The Element type is a flexible container object, designed to +# store hierarchical data structures in memory. The type can be +# described as a cross between a list and a dictionary. +#

+# Each element has a number of properties associated with it: +#

    +#
  • a tag. This is a string identifying what kind of data +# this element represents (the element type, in other words).
  • +#
  • a number of attributes, stored in a Python dictionary.
  • +#
  • a text string.
  • +#
  • an optional tail string.
  • +#
  • a number of child elements, stored in a Python sequence
  • +#
+# +# To create an element instance, use the {@link #Element} constructor +# or the {@link #SubElement} factory function. +#

+# The {@link #ElementTree} class can be used to wrap an element +# structure, and convert it from and to XML. +## + +import sys +import re +import warnings + + +class _SimpleElementPath(object): + # emulate pre-1.2 find/findtext/findall behaviour + def find(self, element, tag, namespaces=None): + for elem in element: + if elem.tag == tag: + return elem + return None + def findtext(self, element, tag, default=None, namespaces=None): + elem = self.find(element, tag) + if elem is None: + return default + return elem.text or "" + def iterfind(self, element, tag, namespaces=None): + if tag[:3] == ".//": + for elem in element.iter(tag[3:]): + yield elem + for elem in element: + if elem.tag == tag: + yield elem + def findall(self, element, tag, namespaces=None): + return list(self.iterfind(element, tag, namespaces)) + +try: + from . import ElementPath +except ImportError: + ElementPath = _SimpleElementPath() + +## +# Parser error. This is a subclass of SyntaxError. +#

+# In addition to the exception value, an exception instance contains a +# specific exception code in the code attribute, and the line and +# column of the error in the position attribute. + +class ParseError(SyntaxError): + pass + +# -------------------------------------------------------------------- + +## +# Checks if an object appears to be a valid element object. +# +# @param An element instance. +# @return A true value if this is an element object. +# @defreturn flag + +def iselement(element): + # FIXME: not sure about this; might be a better idea to look + # for tag/attrib/text attributes + return isinstance(element, Element) or hasattr(element, "tag") + +## +# Element class. This class defines the Element interface, and +# provides a reference implementation of this interface. +#

+# The element name, attribute names, and attribute values can be +# either ASCII strings (ordinary Python strings containing only 7-bit +# ASCII characters) or Unicode strings. +# +# @param tag The element name. +# @param attrib An optional dictionary, containing element attributes. +# @param **extra Additional attributes, given as keyword arguments. +# @see Element +# @see SubElement +# @see Comment +# @see ProcessingInstruction + +class Element(object): + # text...tail + + ## + # (Attribute) Element tag. + + tag = None + + ## + # (Attribute) Element attribute dictionary. Where possible, use + # {@link #Element.get}, + # {@link #Element.set}, + # {@link #Element.keys}, and + # {@link #Element.items} to access + # element attributes. + + attrib = None + + ## + # (Attribute) Text before first subelement. This is either a + # string or the value None. Note that if there was no text, this + # attribute may be either None or an empty string, depending on + # the parser. + + text = None + + ## + # (Attribute) Text after this element's end tag, but before the + # next sibling element's start tag. This is either a string or + # the value None. Note that if there was no text, this attribute + # may be either None or an empty string, depending on the parser. + + tail = None # text after end tag, if any + + # constructor + + def __init__(self, tag, attrib={}, **extra): + attrib = attrib.copy() + attrib.update(extra) + self.tag = tag + self.attrib = attrib + self._children = [] + + def __repr__(self): + return "" % (repr(self.tag), id(self)) + + ## + # Creates a new element object of the same type as this element. + # + # @param tag Element tag. + # @param attrib Element attributes, given as a dictionary. + # @return A new element instance. + + def makeelement(self, tag, attrib): + return self.__class__(tag, attrib) + + ## + # (Experimental) Copies the current element. This creates a + # shallow copy; subelements will be shared with the original tree. + # + # @return A new element instance. + + def copy(self): + elem = self.makeelement(self.tag, self.attrib) + elem.text = self.text + elem.tail = self.tail + elem[:] = self + return elem + + ## + # Returns the number of subelements. Note that this only counts + # full elements; to check if there's any content in an element, you + # have to check both the length and the text attribute. + # + # @return The number of subelements. + + def __len__(self): + return len(self._children) + + def __nonzero__(self): + warnings.warn( + "The behavior of this method will change in future versions. " + "Use specific 'len(elem)' or 'elem is not None' test instead.", + FutureWarning, stacklevel=2 + ) + return len(self._children) != 0 # emulate old behaviour, for now + + ## + # Returns the given subelement, by index. + # + # @param index What subelement to return. + # @return The given subelement. + # @exception IndexError If the given element does not exist. + + def __getitem__(self, index): + return self._children[index] + + ## + # Replaces the given subelement, by index. + # + # @param index What subelement to replace. + # @param element The new element value. + # @exception IndexError If the given element does not exist. + + def __setitem__(self, index, element): + # if isinstance(index, slice): + # for elt in element: + # assert iselement(elt) + # else: + # assert iselement(element) + self._children[index] = element + + ## + # Deletes the given subelement, by index. + # + # @param index What subelement to delete. + # @exception IndexError If the given element does not exist. + + def __delitem__(self, index): + del self._children[index] + + ## + # Adds a subelement to the end of this element. In document order, + # the new element will appear after the last existing subelement (or + # directly after the text, if it's the first subelement), but before + # the end tag for this element. + # + # @param element The element to add. + + def append(self, element): + # assert iselement(element) + self._children.append(element) + + ## + # Appends subelements from a sequence. + # + # @param elements A sequence object with zero or more elements. + # @since 1.3 + + def extend(self, elements): + # for element in elements: + # assert iselement(element) + self._children.extend(elements) + + ## + # Inserts a subelement at the given position in this element. + # + # @param index Where to insert the new subelement. + + def insert(self, index, element): + # assert iselement(element) + self._children.insert(index, element) + + ## + # Removes a matching subelement. Unlike the find methods, + # this method compares elements based on identity, not on tag + # value or contents. To remove subelements by other means, the + # easiest way is often to use a list comprehension to select what + # elements to keep, and use slice assignment to update the parent + # element. + # + # @param element What element to remove. + # @exception ValueError If a matching element could not be found. + + def remove(self, element): + # assert iselement(element) + self._children.remove(element) + + ## + # (Deprecated) Returns all subelements. The elements are returned + # in document order. + # + # @return A list of subelements. + # @defreturn list of Element instances + + def getchildren(self): + warnings.warn( + "This method will be removed in future versions. " + "Use 'list(elem)' or iteration over elem instead.", + DeprecationWarning, stacklevel=2 + ) + return self._children + + ## + # Finds the first matching subelement, by tag name or path. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return The first matching element, or None if no element was found. + # @defreturn Element or None + + def find(self, path, namespaces=None): + return ElementPath.find(self, path, namespaces) + + ## + # Finds text for the first matching subelement, by tag name or path. + # + # @param path What element to look for. + # @param default What to return if the element was not found. + # @keyparam namespaces Optional namespace prefix map. + # @return The text content of the first matching element, or the + # default value no element was found. Note that if the element + # is found, but has no text content, this method returns an + # empty string. + # @defreturn string + + def findtext(self, path, default=None, namespaces=None): + return ElementPath.findtext(self, path, default, namespaces) + + ## + # Finds all matching subelements, by tag name or path. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return A list or other sequence containing all matching elements, + # in document order. + # @defreturn list of Element instances + + def findall(self, path, namespaces=None): + return ElementPath.findall(self, path, namespaces) + + ## + # Finds all matching subelements, by tag name or path. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return An iterator or sequence containing all matching elements, + # in document order. + # @defreturn a generated sequence of Element instances + + def iterfind(self, path, namespaces=None): + return ElementPath.iterfind(self, path, namespaces) + + ## + # Resets an element. This function removes all subelements, clears + # all attributes, and sets the text and tail attributes + # to None. + + def clear(self): + self.attrib.clear() + self._children = [] + self.text = self.tail = None + + ## + # Gets an element attribute. Equivalent to attrib.get, but + # some implementations may handle this a bit more efficiently. + # + # @param key What attribute to look for. + # @param default What to return if the attribute was not found. + # @return The attribute value, or the default value, if the + # attribute was not found. + # @defreturn string or None + + def get(self, key, default=None): + return self.attrib.get(key, default) + + ## + # Sets an element attribute. Equivalent to attrib[key] = value, + # but some implementations may handle this a bit more efficiently. + # + # @param key What attribute to set. + # @param value The attribute value. + + def set(self, key, value): + self.attrib[key] = value + + ## + # Gets a list of attribute names. The names are returned in an + # arbitrary order (just like for an ordinary Python dictionary). + # Equivalent to attrib.keys(). + # + # @return A list of element attribute names. + # @defreturn list of strings + + def keys(self): + return self.attrib.keys() + + ## + # Gets element attributes, as a sequence. The attributes are + # returned in an arbitrary order. Equivalent to attrib.items(). + # + # @return A list of (name, value) tuples for all attributes. + # @defreturn list of (string, string) tuples + + def items(self): + return self.attrib.items() + + ## + # Creates a tree iterator. The iterator loops over this element + # and all subelements, in document order, and returns all elements + # with a matching tag. + #

+ # If the tree structure is modified during iteration, new or removed + # elements may or may not be included. To get a stable set, use the + # list() function on the iterator, and loop over the resulting list. + # + # @param tag What tags to look for (default is to return all elements). + # @return An iterator containing all the matching elements. + # @defreturn iterator + + def iter(self, tag=None): + if tag == "*": + tag = None + if tag is None or self.tag == tag: + yield self + for e in self._children: + for e in e.iter(tag): + yield e + + # compatibility + def getiterator(self, tag=None): + # Change for a DeprecationWarning in 1.4 + warnings.warn( + "This method will be removed in future versions. " + "Use 'elem.iter()' or 'list(elem.iter())' instead.", + PendingDeprecationWarning, stacklevel=2 + ) + return list(self.iter(tag)) + + ## + # Creates a text iterator. The iterator loops over this element + # and all subelements, in document order, and returns all inner + # text. + # + # @return An iterator containing all inner text. + # @defreturn iterator + + def itertext(self): + tag = self.tag + if not isinstance(tag, basestring) and tag is not None: + return + if self.text: + yield self.text + for e in self: + for s in e.itertext(): + yield s + if e.tail: + yield e.tail + +# compatibility +_Element = _ElementInterface = Element + +## +# Subelement factory. This function creates an element instance, and +# appends it to an existing element. +#

+# The element name, attribute names, and attribute values can be +# either 8-bit ASCII strings or Unicode strings. +# +# @param parent The parent element. +# @param tag The subelement name. +# @param attrib An optional dictionary, containing element attributes. +# @param **extra Additional attributes, given as keyword arguments. +# @return An element instance. +# @defreturn Element + +def SubElement(parent, tag, attrib={}, **extra): + attrib = attrib.copy() + attrib.update(extra) + element = parent.makeelement(tag, attrib) + parent.append(element) + return element + +## +# Comment element factory. This factory function creates a special +# element that will be serialized as an XML comment by the standard +# serializer. +#

+# The comment string can be either an 8-bit ASCII string or a Unicode +# string. +# +# @param text A string containing the comment string. +# @return An element instance, representing a comment. +# @defreturn Element + +def Comment(text=None): + element = Element(Comment) + element.text = text + return element + +## +# PI element factory. This factory function creates a special element +# that will be serialized as an XML processing instruction by the standard +# serializer. +# +# @param target A string containing the PI target. +# @param text A string containing the PI contents, if any. +# @return An element instance, representing a PI. +# @defreturn Element + +def ProcessingInstruction(target, text=None): + element = Element(ProcessingInstruction) + element.text = target + if text: + element.text = element.text + " " + text + return element + +PI = ProcessingInstruction + +## +# QName wrapper. This can be used to wrap a QName attribute value, in +# order to get proper namespace handling on output. +# +# @param text A string containing the QName value, in the form {uri}local, +# or, if the tag argument is given, the URI part of a QName. +# @param tag Optional tag. If given, the first argument is interpreted as +# an URI, and this argument is interpreted as a local name. +# @return An opaque object, representing the QName. + +class QName(object): + def __init__(self, text_or_uri, tag=None): + if tag: + text_or_uri = "{%s}%s" % (text_or_uri, tag) + self.text = text_or_uri + def __str__(self): + return self.text + def __hash__(self): + return hash(self.text) + def __cmp__(self, other): + if isinstance(other, QName): + return cmp(self.text, other.text) + return cmp(self.text, other) + +# -------------------------------------------------------------------- + +## +# ElementTree wrapper class. This class represents an entire element +# hierarchy, and adds some extra support for serialization to and from +# standard XML. +# +# @param element Optional root element. +# @keyparam file Optional file handle or file name. If given, the +# tree is initialized with the contents of this XML file. + +class ElementTree(object): + + def __init__(self, element=None, file=None): + # assert element is None or iselement(element) + self._root = element # first node + if file: + self.parse(file) + + ## + # Gets the root element for this tree. + # + # @return An element instance. + # @defreturn Element + + def getroot(self): + return self._root + + ## + # Replaces the root element for this tree. This discards the + # current contents of the tree, and replaces it with the given + # element. Use with care. + # + # @param element An element instance. + + def _setroot(self, element): + # assert iselement(element) + self._root = element + + ## + # Loads an external XML document into this element tree. + # + # @param source A file name or file object. If a file object is + # given, it only has to implement a read(n) method. + # @keyparam parser An optional parser instance. If not given, the + # standard {@link XMLParser} parser is used. + # @return The document root element. + # @defreturn Element + # @exception ParseError If the parser fails to parse the document. + + def parse(self, source, parser=None): + close_source = False + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + try: + if not parser: + parser = XMLParser(target=TreeBuilder()) + while 1: + data = source.read(65536) + if not data: + break + parser.feed(data) + self._root = parser.close() + return self._root + finally: + if close_source: + source.close() + + ## + # Creates a tree iterator for the root element. The iterator loops + # over all elements in this tree, in document order. + # + # @param tag What tags to look for (default is to return all elements) + # @return An iterator. + # @defreturn iterator + + def iter(self, tag=None): + # assert self._root is not None + return self._root.iter(tag) + + # compatibility + def getiterator(self, tag=None): + # Change for a DeprecationWarning in 1.4 + warnings.warn( + "This method will be removed in future versions. " + "Use 'tree.iter()' or 'list(tree.iter())' instead.", + PendingDeprecationWarning, stacklevel=2 + ) + return list(self.iter(tag)) + + ## + # Same as getroot().find(path), starting at the root of the + # tree. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return The first matching element, or None if no element was found. + # @defreturn Element or None + + def find(self, path, namespaces=None): + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.find(path, namespaces) + + ## + # Same as getroot().findtext(path), starting at the root of the tree. + # + # @param path What element to look for. + # @param default What to return if the element was not found. + # @keyparam namespaces Optional namespace prefix map. + # @return The text content of the first matching element, or the + # default value no element was found. Note that if the element + # is found, but has no text content, this method returns an + # empty string. + # @defreturn string + + def findtext(self, path, default=None, namespaces=None): + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.findtext(path, default, namespaces) + + ## + # Same as getroot().findall(path), starting at the root of the tree. + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return A list or iterator containing all matching elements, + # in document order. + # @defreturn list of Element instances + + def findall(self, path, namespaces=None): + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.findall(path, namespaces) + + ## + # Finds all matching subelements, by tag name or path. + # Same as getroot().iterfind(path). + # + # @param path What element to look for. + # @keyparam namespaces Optional namespace prefix map. + # @return An iterator or sequence containing all matching elements, + # in document order. + # @defreturn a generated sequence of Element instances + + def iterfind(self, path, namespaces=None): + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.iterfind(path, namespaces) + + ## + # Writes the element tree to a file, as XML. + # + # @def write(file, **options) + # @param file A file name, or a file object opened for writing. + # @param **options Options, given as keyword arguments. + # @keyparam encoding Optional output encoding (default is US-ASCII). + # @keyparam xml_declaration Controls if an XML declaration should + # be added to the file. Use False for never, True for always, + # None for only if not US-ASCII or UTF-8. None is default. + # @keyparam default_namespace Sets the default XML namespace (for "xmlns"). + # @keyparam method Optional output method ("xml", "html", "text" or + # "c14n"; default is "xml"). + + def write(self, file_or_filename, + # keyword arguments + encoding=None, + xml_declaration=None, + default_namespace=None, + method=None): + # assert self._root is not None + if not method: + method = "xml" + elif method not in _serialize: + # FIXME: raise an ImportError for c14n if ElementC14N is missing? + raise ValueError("unknown method %r" % method) + if hasattr(file_or_filename, "write"): + file = file_or_filename + else: + file = open(file_or_filename, "wb") + try: + write = file.write + if not encoding: + if method == "c14n": + encoding = "utf-8" + else: + encoding = "us-ascii" + elif xml_declaration or (xml_declaration is None and + encoding not in ("utf-8", "us-ascii")): + if method == "xml": + write("\n" % encoding) + if method == "text": + _serialize_text(write, self._root, encoding) + else: + qnames, namespaces = _namespaces( + self._root, encoding, default_namespace + ) + serialize = _serialize[method] + serialize(write, self._root, encoding, qnames, namespaces) + finally: + if file_or_filename is not file: + file.close() + + def write_c14n(self, file): + # lxml.etree compatibility. use output method instead + return self.write(file, method="c14n") + +# -------------------------------------------------------------------- +# serialization support + +def _namespaces(elem, encoding, default_namespace=None): + # identify namespaces used in this tree + + # maps qnames to *encoded* prefix:local names + qnames = {None: None} + + # maps uri:s to prefixes + namespaces = {} + if default_namespace: + namespaces[default_namespace] = "" + + def encode(text): + return text.encode(encoding) + + def add_qname(qname): + # calculate serialized qname representation + try: + if qname[:1] == "{": + uri, tag = qname[1:].rsplit("}", 1) + prefix = namespaces.get(uri) + if prefix is None: + prefix = _namespace_map.get(uri) + if prefix is None: + prefix = "ns%d" % len(namespaces) + if prefix != "xml": + namespaces[uri] = prefix + if prefix: + qnames[qname] = encode("%s:%s" % (prefix, tag)) + else: + qnames[qname] = encode(tag) # default element + else: + if default_namespace: + # FIXME: can this be handled in XML 1.0? + raise ValueError( + "cannot use non-qualified names with " + "default_namespace option" + ) + qnames[qname] = encode(qname) + except TypeError: + _raise_serialization_error(qname) + + # populate qname and namespaces table + try: + iterate = elem.iter + except AttributeError: + iterate = elem.getiterator # cET compatibility + for elem in iterate(): + tag = elem.tag + if isinstance(tag, QName): + if tag.text not in qnames: + add_qname(tag.text) + elif isinstance(tag, basestring): + if tag not in qnames: + add_qname(tag) + elif tag is not None and tag is not Comment and tag is not PI: + _raise_serialization_error(tag) + for key, value in elem.items(): + if isinstance(key, QName): + key = key.text + if key not in qnames: + add_qname(key) + if isinstance(value, QName) and value.text not in qnames: + add_qname(value.text) + text = elem.text + if isinstance(text, QName) and text.text not in qnames: + add_qname(text.text) + return qnames, namespaces + +def _serialize_xml(write, elem, encoding, qnames, namespaces): + tag = elem.tag + text = elem.text + if tag is Comment: + write("" % _encode(text, encoding)) + elif tag is ProcessingInstruction: + write("" % _encode(text, encoding)) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_xml(write, e, encoding, qnames, None) + else: + write("<" + tag) + items = elem.items() + if items or namespaces: + if namespaces: + for v, k in sorted(namespaces.items(), + key=lambda x: x[1]): # sort on prefix + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % ( + k.encode(encoding), + _escape_attrib(v, encoding) + )) + for k, v in sorted(items): # lexical order + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib(v, encoding) + write(" %s=\"%s\"" % (qnames[k], v)) + if text or len(elem): + write(">") + if text: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_xml(write, e, encoding, qnames, None) + write("") + else: + write(" />") + if elem.tail: + write(_escape_cdata(elem.tail, encoding)) + +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", + "img", "input", "isindex", "link", "meta", "param") + +try: + HTML_EMPTY = set(HTML_EMPTY) +except NameError: + pass + +def _serialize_html(write, elem, encoding, qnames, namespaces): + tag = elem.tag + text = elem.text + if tag is Comment: + write("" % _escape_cdata(text, encoding)) + elif tag is ProcessingInstruction: + write("" % _escape_cdata(text, encoding)) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_html(write, e, encoding, qnames, None) + else: + write("<" + tag) + items = elem.items() + if items or namespaces: + if namespaces: + for v, k in sorted(namespaces.items(), + key=lambda x: x[1]): # sort on prefix + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % ( + k.encode(encoding), + _escape_attrib(v, encoding) + )) + for k, v in sorted(items): # lexical order + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib_html(v, encoding) + # FIXME: handle boolean attributes + write(" %s=\"%s\"" % (qnames[k], v)) + write(">") + ltag = tag.lower() + if text: + if ltag == "script" or ltag == "style": + write(_encode(text, encoding)) + else: + write(_escape_cdata(text, encoding)) + for e in elem: + _serialize_html(write, e, encoding, qnames, None) + if ltag not in HTML_EMPTY: + write("") + if elem.tail: + write(_escape_cdata(elem.tail, encoding)) + +def _serialize_text(write, elem, encoding): + for part in elem.itertext(): + write(part.encode(encoding)) + if elem.tail: + write(elem.tail.encode(encoding)) + +_serialize = { + "xml": _serialize_xml, + "html": _serialize_html, + "text": _serialize_text, +# this optional method is imported at the end of the module +# "c14n": _serialize_c14n, +} + +## +# Registers a namespace prefix. The registry is global, and any +# existing mapping for either the given prefix or the namespace URI +# will be removed. +# +# @param prefix Namespace prefix. +# @param uri Namespace uri. Tags and attributes in this namespace +# will be serialized with the given prefix, if at all possible. +# @exception ValueError If the prefix is reserved, or is otherwise +# invalid. + +def register_namespace(prefix, uri): + if re.match("ns\d+$", prefix): + raise ValueError("Prefix format reserved for internal use") + for k, v in _namespace_map.items(): + if k == uri or v == prefix: + del _namespace_map[k] + _namespace_map[uri] = prefix + +_namespace_map = { + # "well-known" namespace prefixes + "http://www.w3.org/XML/1998/namespace": "xml", + "http://www.w3.org/1999/xhtml": "html", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://schemas.xmlsoap.org/wsdl/": "wsdl", + # xml schema + "http://www.w3.org/2001/XMLSchema": "xs", + "http://www.w3.org/2001/XMLSchema-instance": "xsi", + # dublin core + "http://purl.org/dc/elements/1.1/": "dc", +} + +def _raise_serialization_error(text): + raise TypeError( + "cannot serialize %r (type %s)" % (text, type(text).__name__) + ) + +def _encode(text, encoding): + try: + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_cdata(text, encoding): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib(text, encoding): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + if "\n" in text: + text = text.replace("\n", " ") + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib_html(text, encoding): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +# -------------------------------------------------------------------- + +## +# Generates a string representation of an XML element, including all +# subelements. +# +# @param element An Element instance. +# @keyparam encoding Optional output encoding (default is US-ASCII). +# @keyparam method Optional output method ("xml", "html", "text" or +# "c14n"; default is "xml"). +# @return An encoded string containing the XML data. +# @defreturn string + +def tostring(element, encoding=None, method=None): + class dummy: + pass + data = [] + file = dummy() + file.write = data.append + ElementTree(element).write(file, encoding, method=method) + return "".join(data) + +## +# Generates a string representation of an XML element, including all +# subelements. The string is returned as a sequence of string fragments. +# +# @param element An Element instance. +# @keyparam encoding Optional output encoding (default is US-ASCII). +# @keyparam method Optional output method ("xml", "html", "text" or +# "c14n"; default is "xml"). +# @return A sequence object containing the XML data. +# @defreturn sequence +# @since 1.3 + +def tostringlist(element, encoding=None, method=None): + class dummy: + pass + data = [] + file = dummy() + file.write = data.append + ElementTree(element).write(file, encoding, method=method) + # FIXME: merge small fragments into larger parts + return data + +## +# Writes an element tree or element structure to sys.stdout. This +# function should be used for debugging only. +#

+# The exact output format is implementation dependent. In this +# version, it's written as an ordinary XML file. +# +# @param elem An element tree or an individual element. + +def dump(elem): + # debugging + if not isinstance(elem, ElementTree): + elem = ElementTree(elem) + elem.write(sys.stdout) + tail = elem.getroot().tail + if not tail or tail[-1] != "\n": + sys.stdout.write("\n") + +# -------------------------------------------------------------------- +# parsing + +## +# Parses an XML document into an element tree. +# +# @param source A filename or file object containing XML data. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return An ElementTree instance + +def parse(source, parser=None): + tree = ElementTree() + tree.parse(source, parser) + return tree + +## +# Parses an XML document into an element tree incrementally, and reports +# what's going on to the user. +# +# @param source A filename or file object containing XML data. +# @param events A list of events to report back. If omitted, only "end" +# events are reported. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return A (event, elem) iterator. + +def iterparse(source, events=None, parser=None): + close_source = False + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + try: + if not parser: + parser = XMLParser(target=TreeBuilder()) + return _IterParseIterator(source, events, parser, close_source) + except: + if close_source: + source.close() + raise + +class _IterParseIterator(object): + + def __init__(self, source, events, parser, close_source=False): + self._file = source + self._close_file = close_source + self._events = [] + self._index = 0 + self._error = None + self.root = self._root = None + self._parser = parser + # wire up the parser for event reporting + parser = self._parser._parser + append = self._events.append + if events is None: + events = ["end"] + for event in events: + if event == "start": + try: + parser.ordered_attributes = 1 + parser.specified_attributes = 1 + def handler(tag, attrib_in, event=event, append=append, + start=self._parser._start_list): + append((event, start(tag, attrib_in))) + parser.StartElementHandler = handler + except AttributeError: + def handler(tag, attrib_in, event=event, append=append, + start=self._parser._start): + append((event, start(tag, attrib_in))) + parser.StartElementHandler = handler + elif event == "end": + def handler(tag, event=event, append=append, + end=self._parser._end): + append((event, end(tag))) + parser.EndElementHandler = handler + elif event == "start-ns": + def handler(prefix, uri, event=event, append=append): + try: + uri = (uri or "").encode("ascii") + except UnicodeError: + pass + append((event, (prefix or "", uri or ""))) + parser.StartNamespaceDeclHandler = handler + elif event == "end-ns": + def handler(prefix, event=event, append=append): + append((event, None)) + parser.EndNamespaceDeclHandler = handler + else: + raise ValueError("unknown event %r" % event) + + def next(self): + try: + while 1: + try: + item = self._events[self._index] + self._index += 1 + return item + except IndexError: + pass + if self._error: + e = self._error + self._error = None + raise e + if self._parser is None: + self.root = self._root + break + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + try: + self._parser.feed(data) + except SyntaxError as exc: + self._error = exc + else: + self._root = self._parser.close() + self._parser = None + except: + if self._close_file: + self._file.close() + raise + if self._close_file: + self._file.close() + raise StopIteration + + def __iter__(self): + return self + +## +# Parses an XML document from a string constant. This function can +# be used to embed "XML literals" in Python code. +# +# @param source A string containing XML data. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return An Element instance. +# @defreturn Element + +def XML(text, parser=None): + if not parser: + parser = XMLParser(target=TreeBuilder()) + parser.feed(text) + return parser.close() + +## +# Parses an XML document from a string constant, and also returns +# a dictionary which maps from element id:s to elements. +# +# @param source A string containing XML data. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return A tuple containing an Element instance and a dictionary. +# @defreturn (Element, dictionary) + +def XMLID(text, parser=None): + if not parser: + parser = XMLParser(target=TreeBuilder()) + parser.feed(text) + tree = parser.close() + ids = {} + for elem in tree.iter(): + id = elem.get("id") + if id: + ids[id] = elem + return tree, ids + +## +# Parses an XML document from a string constant. Same as {@link #XML}. +# +# @def fromstring(text) +# @param source A string containing XML data. +# @return An Element instance. +# @defreturn Element + +fromstring = XML + +## +# Parses an XML document from a sequence of string fragments. +# +# @param sequence A list or other sequence containing XML data fragments. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLParser} parser is used. +# @return An Element instance. +# @defreturn Element +# @since 1.3 + +def fromstringlist(sequence, parser=None): + if not parser: + parser = XMLParser(target=TreeBuilder()) + for text in sequence: + parser.feed(text) + return parser.close() + +# -------------------------------------------------------------------- + +## +# Generic element structure builder. This builder converts a sequence +# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link +# #TreeBuilder.end} method calls to a well-formed element structure. +#

+# You can use this class to build an element structure using a custom XML +# parser, or a parser for some other XML-like format. +# +# @param element_factory Optional element factory. This factory +# is called to create new Element instances, as necessary. + +class TreeBuilder(object): + + def __init__(self, element_factory=None): + self._data = [] # data collector + self._elem = [] # element stack + self._last = None # last element + self._tail = None # true if we're after an end tag + if element_factory is None: + element_factory = Element + self._factory = element_factory + + ## + # Flushes the builder buffers, and returns the toplevel document + # element. + # + # @return An Element instance. + # @defreturn Element + + def close(self): + assert len(self._elem) == 0, "missing end tags" + assert self._last is not None, "missing toplevel element" + return self._last + + def _flush(self): + if self._data: + if self._last is not None: + text = "".join(self._data) + if self._tail: + assert self._last.tail is None, "internal error (tail)" + self._last.tail = text + else: + assert self._last.text is None, "internal error (text)" + self._last.text = text + self._data = [] + + ## + # Adds text to the current element. + # + # @param data A string. This should be either an 8-bit string + # containing ASCII text, or a Unicode string. + + def data(self, data): + self._data.append(data) + + ## + # Opens a new element. + # + # @param tag The element name. + # @param attrib A dictionary containing element attributes. + # @return The opened element. + # @defreturn Element + + def start(self, tag, attrs): + self._flush() + self._last = elem = self._factory(tag, attrs) + if self._elem: + self._elem[-1].append(elem) + self._elem.append(elem) + self._tail = 0 + return elem + + ## + # Closes the current element. + # + # @param tag The element name. + # @return The closed element. + # @defreturn Element + + def end(self, tag): + self._flush() + self._last = self._elem.pop() + assert self._last.tag == tag,\ + "end tag mismatch (expected %s, got %s)" % ( + self._last.tag, tag) + self._tail = 1 + return self._last + +## +# Element structure builder for XML source data, based on the +# expat parser. +# +# @keyparam target Target object. If omitted, the builder uses an +# instance of the standard {@link #TreeBuilder} class. +# @keyparam html Predefine HTML entities. This flag is not supported +# by the current implementation. +# @keyparam encoding Optional encoding. If given, the value overrides +# the encoding specified in the XML file. +# @see #ElementTree +# @see #TreeBuilder + +class XMLParser(object): + + def __init__(self, html=0, target=None, encoding=None): + try: + from xml.parsers import expat + except ImportError: + try: + import pyexpat as expat + except ImportError: + raise ImportError( + "No module named expat; use SimpleXMLTreeBuilder instead" + ) + parser = expat.ParserCreate(encoding, "}") + if target is None: + target = TreeBuilder() + # underscored names are provided for compatibility only + self.parser = self._parser = parser + self.target = self._target = target + self._error = expat.error + self._names = {} # name memo cache + # callbacks + parser.DefaultHandlerExpand = self._default + parser.StartElementHandler = self._start + parser.EndElementHandler = self._end + parser.CharacterDataHandler = self._data + # optional callbacks + parser.CommentHandler = self._comment + parser.ProcessingInstructionHandler = self._pi + # let expat do the buffering, if supported + try: + self._parser.buffer_text = 1 + except AttributeError: + pass + # use new-style attribute handling, if supported + try: + self._parser.ordered_attributes = 1 + self._parser.specified_attributes = 1 + parser.StartElementHandler = self._start_list + except AttributeError: + pass + self._doctype = None + self.entity = {} + try: + self.version = "Expat %d.%d.%d" % expat.version_info + except AttributeError: + pass # unknown + + def _raiseerror(self, value): + err = ParseError(value) + err.code = value.code + err.position = value.lineno, value.offset + raise err + + def _fixtext(self, text): + # convert text string to ascii, if possible + try: + return text.encode("ascii") + except UnicodeError: + return text + + def _fixname(self, key): + # expand qname, and convert name string to ascii, if possible + try: + name = self._names[key] + except KeyError: + name = key + if "}" in name: + name = "{" + name + self._names[key] = name = self._fixtext(name) + return name + + def _start(self, tag, attrib_in): + fixname = self._fixname + fixtext = self._fixtext + tag = fixname(tag) + attrib = {} + for key, value in attrib_in.items(): + attrib[fixname(key)] = fixtext(value) + return self.target.start(tag, attrib) + + def _start_list(self, tag, attrib_in): + fixname = self._fixname + fixtext = self._fixtext + tag = fixname(tag) + attrib = {} + if attrib_in: + for i in range(0, len(attrib_in), 2): + attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) + return self.target.start(tag, attrib) + + def _data(self, text): + return self.target.data(self._fixtext(text)) + + def _end(self, tag): + return self.target.end(self._fixname(tag)) + + def _comment(self, data): + try: + comment = self.target.comment + except AttributeError: + pass + else: + return comment(self._fixtext(data)) + + def _pi(self, target, data): + try: + pi = self.target.pi + except AttributeError: + pass + else: + return pi(self._fixtext(target), self._fixtext(data)) + + def _default(self, text): + prefix = text[:1] + if prefix == "&": + # deal with undefined entities + try: + self.target.data(self.entity[text[1:-1]]) + except KeyError: + from xml.parsers import expat + err = expat.error( + "undefined entity %s: line %d, column %d" % + (text, self._parser.ErrorLineNumber, + self._parser.ErrorColumnNumber) + ) + err.code = 11 # XML_ERROR_UNDEFINED_ENTITY + err.lineno = self._parser.ErrorLineNumber + err.offset = self._parser.ErrorColumnNumber + raise err + elif prefix == "<" and text[:9] == "": + self._doctype = None + return + text = text.strip() + if not text: + return + self._doctype.append(text) + n = len(self._doctype) + if n > 2: + type = self._doctype[1] + if type == "PUBLIC" and n == 4: + name, type, pubid, system = self._doctype + elif type == "SYSTEM" and n == 3: + name, type, system = self._doctype + pubid = None + else: + return + if pubid: + pubid = pubid[1:-1] + if hasattr(self.target, "doctype"): + self.target.doctype(name, pubid, system[1:-1]) + elif self.doctype is not self._XMLParser__doctype: + # warn about deprecated call + self._XMLParser__doctype(name, pubid, system[1:-1]) + self.doctype(name, pubid, system[1:-1]) + self._doctype = None + + ## + # (Deprecated) Handles a doctype declaration. + # + # @param name Doctype name. + # @param pubid Public identifier. + # @param system System identifier. + + def doctype(self, name, pubid, system): + """This method of XMLParser is deprecated.""" + warnings.warn( + "This method of XMLParser is deprecated. Define doctype() " + "method on the TreeBuilder target.", + DeprecationWarning, + ) + + # sentinel, if doctype is redefined in a subclass + __doctype = doctype + + ## + # Feeds data to the parser. + # + # @param data Encoded data. + + def feed(self, data): + try: + self._parser.Parse(data, 0) + except self._error, v: + self._raiseerror(v) + + ## + # Finishes feeding data to the parser. + # + # @return An element structure. + # @defreturn Element + + def close(self): + try: + self._parser.Parse("", 1) # end of data + except self._error, v: + self._raiseerror(v) + tree = self.target.close() + del self.target, self._parser # get rid of circular references + return tree + +# compatibility +XMLTreeBuilder = XMLParser + +# workaround circular import. +try: + from ElementC14N import _serialize_c14n + _serialize["c14n"] = _serialize_c14n +except ImportError: + pass diff --git a/NEWS b/NEWS --- a/NEWS +++ b/NEWS @@ -4,6 +4,7 @@ Jython 2.7.1rc1 Bugs fixed + - [ 2413 ] ElementTree.write doesn't close files if used with invalid encoding - [ 2516 ] _get_open_ssl_key_manager tries to validate that the private and public keys match, and is throwing an SSLError: "key values mismatch" when provided with multiple certs (Root/CA/Cert) @@ -25,6 +26,7 @@ causes an infinite recursion - [ 2112 ] time.strptime() has different default year in Jython and CPython - [ 1767 ] Rich comparisons + - [ 2443 ] java.util.Map derived classes lack iterkeys, itervalues methods New Features - Buffer API changes allow java.nio.ByteBuffer to provide the storage when a PyBuffer diff --git a/src/org/python/core/JavaProxyMap.java b/src/org/python/core/JavaProxyMap.java --- a/src/org/python/core/JavaProxyMap.java +++ b/src/org/python/core/JavaProxyMap.java @@ -223,6 +223,40 @@ }; } }; + private static final PyBuiltinMethodNarrow mapIterKeysProxy = new MapMethod("iterkeys", 0) { + @Override + public PyObject __call__() { + final Iterator keyIterator = asMap().keySet().iterator(); + return new PyIterator() { + @Override + public PyObject __iternext__() { + if (keyIterator.hasNext()) { + Object nextKey = keyIterator.next(); + // yield a Python key + return Py.java2py(nextKey); + } + return null; + } + }; + } + }; + private static final PyBuiltinMethodNarrow mapIterValuesProxy = new MapMethod("itervalues", 0) { + @Override + public PyObject __call__() { + final Iterator valueIterator = asMap().values().iterator(); + return new PyIterator() { + @Override + public PyObject __iternext__() { + if (valueIterator.hasNext()) { + Object nextValue = valueIterator.next(); + // yield a Python value + return Py.java2py(nextValue); + } + return null; + } + }; + } + }; private static final PyBuiltinMethodNarrow mapHasKeyProxy = new MapMethod("has_key", 1) { @Override public PyObject __call__(PyObject key) { @@ -456,6 +490,8 @@ mapPutProxy, mapRemoveProxy, mapIterItemsProxy, + mapIterKeysProxy, + mapIterValuesProxy, mapHasKeyProxy, mapKeysProxy, mapSetDefaultProxy, -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Sep 6 00:43:24 2016 From: jython-checkins at python.org (jim.baker) Date: Tue, 06 Sep 2016 04:43:24 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Ensure_=5F=5Fsubclasses=5F?= =?utf-8?q?=5F=28=29_has_stable_ordering=2E_Fixes_=232514?= Message-ID: <20160906044324.22629.64936.665CF5F5@psf.io> https://hg.python.org/jython/rev/6f5754e55d75 changeset: 7960:6f5754e55d75 user: Jim Baker date: Mon Sep 05 22:43:01 2016 -0600 summary: Ensure __subclasses__() has stable ordering. Fixes #2514 files: Lib/test/test_subclasses_jy.py | 40 ++++++++++++++++++++- src/org/python/core/PyType.java | 2 +- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_subclasses_jy.py b/Lib/test/test_subclasses_jy.py --- a/Lib/test/test_subclasses_jy.py +++ b/Lib/test/test_subclasses_jy.py @@ -11,8 +11,46 @@ else: self.fail("expected TypeError for subclassing an int instance") + +class TestStableSubclasses(unittest.TestCase): + + def test_subclasses_stable(self): + class C(object): + pass + + subclasses = [] + for i in range(1024): + name = 'S%s' % i + subclasses.append(type(name, (C,), {})) + self.assertEqual(subclasses, C.__subclasses__()) + + def test_subclasses_stable_with_gc(self): + class C(object): + pass + + subclasses = [] + for i in range(1024): + name = 'S%s' % i + subclasses.append(type(name, (C,), {})) + self.assertEqual(subclasses, C.__subclasses__()) + + # punch some holes in the previous subclasses, verify + # continued stability + for i in range(32): + del subclasses[i * 32] # depends on prev deletion of course... + test.test_support.gc_collect() + self.assertEqual(subclasses, C.__subclasses__()) + + # add some more subclasses + for i in range(1024, 2048): + name = 'S%s' % i + subclasses.append(type(name, (C,), {})) + self.assertEqual(subclasses, C.__subclasses__()) + + def test_main(): - test.test_support.run_unittest(SubclassInstanceTest) + test.test_support.run_unittest( + SubclassInstanceTest, TestStableSubclasses) if __name__ == "__main__": test_main() diff --git a/src/org/python/core/PyType.java b/src/org/python/core/PyType.java --- a/src/org/python/core/PyType.java +++ b/src/org/python/core/PyType.java @@ -94,7 +94,7 @@ private int numSlots; private transient ReferenceQueue subclasses_refq = new ReferenceQueue(); - private Set> subclasses = Generic.set(); + private Set> subclasses = Generic.linkedHashSet(); /** Global mro cache. */ private static final MethodCache methodCache = new MethodCache(); -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Sep 6 00:45:42 2016 From: jython-checkins at python.org (jim.baker) Date: Tue, 06 Sep 2016 04:45:42 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Ensure_=5F=5Fsubclasses=5F?= =?utf-8?q?=5F=28=29_has_stable_ordering=2E_Fixes_=232514?= Message-ID: <20160906044542.38003.11571.56F4DE82@psf.io> https://hg.python.org/jython/rev/ed451b497499 changeset: 7961:ed451b497499 user: Jim Baker date: Mon Sep 05 22:45:39 2016 -0600 summary: Ensure __subclasses__() has stable ordering. Fixes #2514 Missing change from previous commit. files: src/org/python/util/Generic.java | 8 ++++++++ 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/src/org/python/util/Generic.java b/src/org/python/util/Generic.java --- a/src/org/python/util/Generic.java +++ b/src/org/python/util/Generic.java @@ -4,6 +4,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -70,6 +71,13 @@ } /** + * Makes a LinkedHashSet using the generic type inferred from whatever this is being assigned to. + */ + public static Set linkedHashSet() { + return new LinkedHashSet(); + } + + /** * Makes a Set using the generic type inferred from whatever this is being assigned to filled * with the items in contents. */ -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Tue Sep 6 01:14:01 2016 From: jython-checkins at python.org (jim.baker) Date: Tue, 06 Sep 2016 05:14:01 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Updates_httplib=2C_urllib?= =?utf-8?q?=2C_urllib2_to_latest=2E_Fixes_=232481?= Message-ID: <20160906051401.8638.99852.8F37B29C@psf.io> https://hg.python.org/jython/rev/91083509a11c changeset: 7962:91083509a11c user: Jim Baker date: Mon Sep 05 23:13:56 2016 -0600 summary: Updates httplib, urllib, urllib2 to latest. Fixes #2481 In 2.7.1, Jython supports CPython 2.7's recent updates for always verifying SSL certs. However, httplib, urllib, urllib2 have support for selectively turning this off, so we needed to refresh this from CPython and repatch. As part of this update there's also a fix for the HTTPoxy vulnerability (https://bugs.python.org/issue27568), although this would only have been possuble if Jython were used as a CGI script (very unlikely), instead of being used in a web container (ModJy, etc). files: CPythonLib.includes | 1 + Lib/test/test_httplib.py | 890 ++++++++ Lib/test/test_urllib.py | 301 ++- Lib/urllib.py | 1630 --------------- lib-python/2.7/rfc822.py | 9 +- lib-python/2.7/test/test_httplib.py | 381 +++- lib-python/2.7/test/test_rfc822.py | 6 + lib-python/2.7/test/test_urllib.py | 296 ++- lib-python/2.7/test/test_urllib2.py | 59 +- lib-python/2.7/urllib.py | 203 +- lib-python/2.7/urllib2.py | 79 +- 11 files changed, 1974 insertions(+), 1881 deletions(-) diff --git a/CPythonLib.includes b/CPythonLib.includes --- a/CPythonLib.includes +++ b/CPythonLib.includes @@ -167,6 +167,7 @@ traceback.py tty.py tzparse.py +urllib.py urllib2.py urlparse.py user.py diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py new file mode 100644 --- /dev/null +++ b/Lib/test/test_httplib.py @@ -0,0 +1,890 @@ +import httplib +import itertools +import array +import StringIO +import socket +import errno +import os +import tempfile + +import unittest +TestCase = unittest.TestCase + +from test import test_support + +here = os.path.dirname(__file__) +# Self-signed cert file for 'localhost' +CERT_localhost = os.path.join(here, 'keycert.pem') +# Self-signed cert file for 'fakehostname' +CERT_fakehostname = os.path.join(here, 'keycert2.pem') +# Self-signed cert file for self-signed.pythontest.net +CERT_selfsigned_pythontestdotnet = os.path.join(here, 'selfsigned_pythontestdotnet.pem') + +HOST = test_support.HOST + +class FakeSocket: + def __init__(self, text, fileclass=StringIO.StringIO, host=None, port=None): + self.text = text + self.fileclass = fileclass + self.data = '' + self.file_closed = False + self.host = host + self.port = port + + def sendall(self, data): + self.data += ''.join(data) + + def makefile(self, mode, bufsize=None): + if mode != 'r' and mode != 'rb': + raise httplib.UnimplementedFileMode() + # keep the file around so we can check how much was read from it + self.file = self.fileclass(self.text) + self.file.close = self.file_close #nerf close () + return self.file + + def file_close(self): + self.file_closed = True + + def close(self): + pass + +class EPipeSocket(FakeSocket): + + def __init__(self, text, pipe_trigger): + # When sendall() is called with pipe_trigger, raise EPIPE. + FakeSocket.__init__(self, text) + self.pipe_trigger = pipe_trigger + + def sendall(self, data): + if self.pipe_trigger in data: + raise socket.error(errno.EPIPE, "gotcha") + self.data += data + + def close(self): + pass + +class NoEOFStringIO(StringIO.StringIO): + """Like StringIO, but raises AssertionError on EOF. + + This is used below to test that httplib doesn't try to read + more from the underlying file than it should. + """ + def read(self, n=-1): + data = StringIO.StringIO.read(self, n) + if data == '': + raise AssertionError('caller tried to read past EOF') + return data + + def readline(self, length=None): + data = StringIO.StringIO.readline(self, length) + if data == '': + raise AssertionError('caller tried to read past EOF') + return data + + +class HeaderTests(TestCase): + def test_auto_headers(self): + # Some headers are added automatically, but should not be added by + # .request() if they are explicitly set. + + class HeaderCountingBuffer(list): + def __init__(self): + self.count = {} + def append(self, item): + kv = item.split(':') + if len(kv) > 1: + # item is a 'Key: Value' header string + lcKey = kv[0].lower() + self.count.setdefault(lcKey, 0) + self.count[lcKey] += 1 + list.append(self, item) + + for explicit_header in True, False: + for header in 'Content-length', 'Host', 'Accept-encoding': + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket('blahblahblah') + conn._buffer = HeaderCountingBuffer() + + body = 'spamspamspam' + headers = {} + if explicit_header: + headers[header] = str(len(body)) + conn.request('POST', '/', body, headers) + self.assertEqual(conn._buffer.count[header.lower()], 1) + + def test_content_length_0(self): + + class ContentLengthChecker(list): + def __init__(self): + list.__init__(self) + self.content_length = None + def append(self, item): + kv = item.split(':', 1) + if len(kv) > 1 and kv[0].lower() == 'content-length': + self.content_length = kv[1].strip() + list.append(self, item) + + # Here, we're testing that methods expecting a body get a + # content-length set to zero if the body is empty (either None or '') + bodies = (None, '') + methods_with_body = ('PUT', 'POST', 'PATCH') + for method, body in itertools.product(methods_with_body, bodies): + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn._buffer = ContentLengthChecker() + conn.request(method, '/', body) + self.assertEqual( + conn._buffer.content_length, '0', + 'Header Content-Length incorrect on {}'.format(method) + ) + + # For these methods, we make sure that content-length is not set when + # the body is None because it might cause unexpected behaviour on the + # server. + methods_without_body = ( + 'GET', 'CONNECT', 'DELETE', 'HEAD', 'OPTIONS', 'TRACE', + ) + for method in methods_without_body: + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn._buffer = ContentLengthChecker() + conn.request(method, '/', None) + self.assertEqual( + conn._buffer.content_length, None, + 'Header Content-Length set for empty body on {}'.format(method) + ) + + # If the body is set to '', that's considered to be "present but + # empty" rather than "missing", so content length would be set, even + # for methods that don't expect a body. + for method in methods_without_body: + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn._buffer = ContentLengthChecker() + conn.request(method, '/', '') + self.assertEqual( + conn._buffer.content_length, '0', + 'Header Content-Length incorrect on {}'.format(method) + ) + + # If the body is set, make sure Content-Length is set. + for method in itertools.chain(methods_without_body, methods_with_body): + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn._buffer = ContentLengthChecker() + conn.request(method, '/', ' ') + self.assertEqual( + conn._buffer.content_length, '1', + 'Header Content-Length incorrect on {}'.format(method) + ) + + def test_putheader(self): + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.putrequest('GET','/') + conn.putheader('Content-length',42) + self.assertIn('Content-length: 42', conn._buffer) + + conn.putheader('Foo', ' bar ') + self.assertIn(b'Foo: bar ', conn._buffer) + conn.putheader('Bar', '\tbaz\t') + self.assertIn(b'Bar: \tbaz\t', conn._buffer) + conn.putheader('Authorization', 'Bearer mytoken') + self.assertIn(b'Authorization: Bearer mytoken', conn._buffer) + conn.putheader('IterHeader', 'IterA', 'IterB') + self.assertIn(b'IterHeader: IterA\r\n\tIterB', conn._buffer) + conn.putheader('LatinHeader', b'\xFF') + self.assertIn(b'LatinHeader: \xFF', conn._buffer) + conn.putheader('Utf8Header', b'\xc3\x80') + self.assertIn(b'Utf8Header: \xc3\x80', conn._buffer) + conn.putheader('C1-Control', b'next\x85line') + self.assertIn(b'C1-Control: next\x85line', conn._buffer) + conn.putheader('Embedded-Fold-Space', 'is\r\n allowed') + self.assertIn(b'Embedded-Fold-Space: is\r\n allowed', conn._buffer) + conn.putheader('Embedded-Fold-Tab', 'is\r\n\tallowed') + self.assertIn(b'Embedded-Fold-Tab: is\r\n\tallowed', conn._buffer) + conn.putheader('Key Space', 'value') + self.assertIn(b'Key Space: value', conn._buffer) + conn.putheader('KeySpace ', 'value') + self.assertIn(b'KeySpace : value', conn._buffer) + conn.putheader(b'Nonbreak\xa0Space', 'value') + self.assertIn(b'Nonbreak\xa0Space: value', conn._buffer) + conn.putheader(b'\xa0NonbreakSpace', 'value') + self.assertIn(b'\xa0NonbreakSpace: value', conn._buffer) + + def test_ipv6host_header(self): + # Default host header on IPv6 transaction should be wrapped by [] if + # it is an IPv6 address + expected = 'GET /foo HTTP/1.1\r\nHost: [2001::]:81\r\n' \ + 'Accept-Encoding: identity\r\n\r\n' + conn = httplib.HTTPConnection('[2001::]:81') + sock = FakeSocket('') + conn.sock = sock + conn.request('GET', '/foo') + self.assertTrue(sock.data.startswith(expected)) + + expected = 'GET /foo HTTP/1.1\r\nHost: [2001:102A::]\r\n' \ + 'Accept-Encoding: identity\r\n\r\n' + conn = httplib.HTTPConnection('[2001:102A::]') + sock = FakeSocket('') + conn.sock = sock + conn.request('GET', '/foo') + self.assertTrue(sock.data.startswith(expected)) + + def test_malformed_headers_coped_with(self): + # Issue 19996 + body = "HTTP/1.1 200 OK\r\nFirst: val\r\n: nval\r\nSecond: val\r\n\r\n" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + resp.begin() + + self.assertEqual(resp.getheader('First'), 'val') + self.assertEqual(resp.getheader('Second'), 'val') + + def test_invalid_headers(self): + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket('') + conn.putrequest('GET', '/') + + # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no + # longer allowed in header names + cases = ( + (b'Invalid\r\nName', b'ValidValue'), + (b'Invalid\rName', b'ValidValue'), + (b'Invalid\nName', b'ValidValue'), + (b'\r\nInvalidName', b'ValidValue'), + (b'\rInvalidName', b'ValidValue'), + (b'\nInvalidName', b'ValidValue'), + (b' InvalidName', b'ValidValue'), + (b'\tInvalidName', b'ValidValue'), + (b'Invalid:Name', b'ValidValue'), + (b':InvalidName', b'ValidValue'), + (b'ValidName', b'Invalid\r\nValue'), + (b'ValidName', b'Invalid\rValue'), + (b'ValidName', b'Invalid\nValue'), + (b'ValidName', b'InvalidValue\r\n'), + (b'ValidName', b'InvalidValue\r'), + (b'ValidName', b'InvalidValue\n'), + ) + for name, value in cases: + with self.assertRaisesRegexp(ValueError, 'Invalid header'): + conn.putheader(name, value) + + +class BasicTest(TestCase): + def test_status_lines(self): + # Test HTTP status lines + + body = "HTTP/1.1 200 Ok\r\n\r\nText" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + resp.begin() + self.assertEqual(resp.read(0), '') # Issue #20007 + self.assertFalse(resp.isclosed()) + self.assertEqual(resp.read(), 'Text') + self.assertTrue(resp.isclosed()) + + body = "HTTP/1.1 400.100 Not Ok\r\n\r\nText" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + self.assertRaises(httplib.BadStatusLine, resp.begin) + + def test_bad_status_repr(self): + exc = httplib.BadStatusLine('') + self.assertEqual(repr(exc), '''BadStatusLine("\'\'",)''') + + def test_partial_reads(self): + # if we have a length, the system knows when to close itself + # same behaviour than when we read the whole thing with read() + body = "HTTP/1.1 200 Ok\r\nContent-Length: 4\r\n\r\nText" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + resp.begin() + self.assertEqual(resp.read(2), 'Te') + self.assertFalse(resp.isclosed()) + self.assertEqual(resp.read(2), 'xt') + self.assertTrue(resp.isclosed()) + + def test_partial_reads_no_content_length(self): + # when no length is present, the socket should be gracefully closed when + # all data was read + body = "HTTP/1.1 200 Ok\r\n\r\nText" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + resp.begin() + self.assertEqual(resp.read(2), 'Te') + self.assertFalse(resp.isclosed()) + self.assertEqual(resp.read(2), 'xt') + self.assertEqual(resp.read(1), '') + self.assertTrue(resp.isclosed()) + + def test_partial_reads_incomplete_body(self): + # if the server shuts down the connection before the whole + # content-length is delivered, the socket is gracefully closed + body = "HTTP/1.1 200 Ok\r\nContent-Length: 10\r\n\r\nText" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + resp.begin() + self.assertEqual(resp.read(2), 'Te') + self.assertFalse(resp.isclosed()) + self.assertEqual(resp.read(2), 'xt') + self.assertEqual(resp.read(1), '') + self.assertTrue(resp.isclosed()) + + def test_host_port(self): + # Check invalid host_port + + # Note that httplib does not accept user:password@ in the host-port. + for hp in ("www.python.org:abc", "user:password at www.python.org"): + self.assertRaises(httplib.InvalidURL, httplib.HTTP, hp) + + for hp, h, p in (("[fe80::207:e9ff:fe9b]:8000", "fe80::207:e9ff:fe9b", + 8000), + ("www.python.org:80", "www.python.org", 80), + ("www.python.org", "www.python.org", 80), + ("www.python.org:", "www.python.org", 80), + ("[fe80::207:e9ff:fe9b]", "fe80::207:e9ff:fe9b", 80)): + http = httplib.HTTP(hp) + c = http._conn + if h != c.host: + self.fail("Host incorrectly parsed: %s != %s" % (h, c.host)) + if p != c.port: + self.fail("Port incorrectly parsed: %s != %s" % (p, c.host)) + + def test_response_headers(self): + # test response with multiple message headers with the same field name. + text = ('HTTP/1.1 200 OK\r\n' + 'Set-Cookie: Customer="WILE_E_COYOTE";' + ' Version="1"; Path="/acme"\r\n' + 'Set-Cookie: Part_Number="Rocket_Launcher_0001"; Version="1";' + ' Path="/acme"\r\n' + '\r\n' + 'No body\r\n') + hdr = ('Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"' + ', ' + 'Part_Number="Rocket_Launcher_0001"; Version="1"; Path="/acme"') + s = FakeSocket(text) + r = httplib.HTTPResponse(s) + r.begin() + cookies = r.getheader("Set-Cookie") + if cookies != hdr: + self.fail("multiple headers not combined properly") + + def test_read_head(self): + # Test that the library doesn't attempt to read any data + # from a HEAD request. (Tickles SF bug #622042.) + sock = FakeSocket( + 'HTTP/1.1 200 OK\r\n' + 'Content-Length: 14432\r\n' + '\r\n', + NoEOFStringIO) + resp = httplib.HTTPResponse(sock, method="HEAD") + resp.begin() + if resp.read() != "": + self.fail("Did not expect response from HEAD request") + + def test_too_many_headers(self): + headers = '\r\n'.join('Header%d: foo' % i for i in xrange(200)) + '\r\n' + text = ('HTTP/1.1 200 OK\r\n' + headers) + s = FakeSocket(text) + r = httplib.HTTPResponse(s) + self.assertRaises(httplib.HTTPException, r.begin) + + def test_send_file(self): + expected = 'GET /foo HTTP/1.1\r\nHost: example.com\r\n' \ + 'Accept-Encoding: identity\r\nContent-Length:' + + body = open(__file__, 'rb') + conn = httplib.HTTPConnection('example.com') + sock = FakeSocket(body) + conn.sock = sock + conn.request('GET', '/foo', body) + self.assertTrue(sock.data.startswith(expected)) + self.assertIn('def test_send_file', sock.data) + + def test_send_tempfile(self): + expected = ('GET /foo HTTP/1.1\r\nHost: example.com\r\n' + 'Accept-Encoding: identity\r\nContent-Length: 9\r\n\r\n' + 'fake\ndata') + + with tempfile.TemporaryFile() as body: + body.write('fake\ndata') + body.seek(0) + + conn = httplib.HTTPConnection('example.com') + sock = FakeSocket(body) + conn.sock = sock + conn.request('GET', '/foo', body) + self.assertEqual(sock.data, expected) + + def test_send(self): + expected = 'this is a test this is only a test' + conn = httplib.HTTPConnection('example.com') + sock = FakeSocket(None) + conn.sock = sock + conn.send(expected) + self.assertEqual(expected, sock.data) + sock.data = '' + conn.send(array.array('c', expected)) + self.assertEqual(expected, sock.data) + sock.data = '' + conn.send(StringIO.StringIO(expected)) + self.assertEqual(expected, sock.data) + + def test_chunked(self): + chunked_start = ( + 'HTTP/1.1 200 OK\r\n' + 'Transfer-Encoding: chunked\r\n\r\n' + 'a\r\n' + 'hello worl\r\n' + '1\r\n' + 'd\r\n' + ) + sock = FakeSocket(chunked_start + '0\r\n') + resp = httplib.HTTPResponse(sock, method="GET") + resp.begin() + self.assertEqual(resp.read(), 'hello world') + resp.close() + + for x in ('', 'foo\r\n'): + sock = FakeSocket(chunked_start + x) + resp = httplib.HTTPResponse(sock, method="GET") + resp.begin() + try: + resp.read() + except httplib.IncompleteRead, i: + self.assertEqual(i.partial, 'hello world') + self.assertEqual(repr(i),'IncompleteRead(11 bytes read)') + self.assertEqual(str(i),'IncompleteRead(11 bytes read)') + else: + self.fail('IncompleteRead expected') + finally: + resp.close() + + def test_chunked_head(self): + chunked_start = ( + 'HTTP/1.1 200 OK\r\n' + 'Transfer-Encoding: chunked\r\n\r\n' + 'a\r\n' + 'hello world\r\n' + '1\r\n' + 'd\r\n' + ) + sock = FakeSocket(chunked_start + '0\r\n') + resp = httplib.HTTPResponse(sock, method="HEAD") + resp.begin() + self.assertEqual(resp.read(), '') + self.assertEqual(resp.status, 200) + self.assertEqual(resp.reason, 'OK') + self.assertTrue(resp.isclosed()) + + def test_negative_content_length(self): + sock = FakeSocket('HTTP/1.1 200 OK\r\n' + 'Content-Length: -1\r\n\r\nHello\r\n') + resp = httplib.HTTPResponse(sock, method="GET") + resp.begin() + self.assertEqual(resp.read(), 'Hello\r\n') + self.assertTrue(resp.isclosed()) + + def test_incomplete_read(self): + sock = FakeSocket('HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\nHello\r\n') + resp = httplib.HTTPResponse(sock, method="GET") + resp.begin() + try: + resp.read() + except httplib.IncompleteRead as i: + self.assertEqual(i.partial, 'Hello\r\n') + self.assertEqual(repr(i), + "IncompleteRead(7 bytes read, 3 more expected)") + self.assertEqual(str(i), + "IncompleteRead(7 bytes read, 3 more expected)") + self.assertTrue(resp.isclosed()) + else: + self.fail('IncompleteRead expected') + + def test_epipe(self): + sock = EPipeSocket( + "HTTP/1.0 401 Authorization Required\r\n" + "Content-type: text/html\r\n" + "WWW-Authenticate: Basic realm=\"example\"\r\n", + b"Content-Length") + conn = httplib.HTTPConnection("example.com") + conn.sock = sock + self.assertRaises(socket.error, + lambda: conn.request("PUT", "/url", "body")) + resp = conn.getresponse() + self.assertEqual(401, resp.status) + self.assertEqual("Basic realm=\"example\"", + resp.getheader("www-authenticate")) + + def test_filenoattr(self): + # Just test the fileno attribute in the HTTPResponse Object. + body = "HTTP/1.1 200 Ok\r\n\r\nText" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + self.assertTrue(hasattr(resp,'fileno'), + 'HTTPResponse should expose a fileno attribute') + + # Test lines overflowing the max line size (_MAXLINE in http.client) + + def test_overflowing_status_line(self): + self.skipTest("disabled for HTTP 0.9 support") + body = "HTTP/1.1 200 Ok" + "k" * 65536 + "\r\n" + resp = httplib.HTTPResponse(FakeSocket(body)) + self.assertRaises((httplib.LineTooLong, httplib.BadStatusLine), resp.begin) + + def test_overflowing_header_line(self): + body = ( + 'HTTP/1.1 200 OK\r\n' + 'X-Foo: bar' + 'r' * 65536 + '\r\n\r\n' + ) + resp = httplib.HTTPResponse(FakeSocket(body)) + self.assertRaises(httplib.LineTooLong, resp.begin) + + def test_overflowing_chunked_line(self): + body = ( + 'HTTP/1.1 200 OK\r\n' + 'Transfer-Encoding: chunked\r\n\r\n' + + '0' * 65536 + 'a\r\n' + 'hello world\r\n' + '0\r\n' + ) + resp = httplib.HTTPResponse(FakeSocket(body)) + resp.begin() + self.assertRaises(httplib.LineTooLong, resp.read) + + def test_early_eof(self): + # Test httpresponse with no \r\n termination, + body = "HTTP/1.1 200 Ok" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + resp.begin() + self.assertEqual(resp.read(), '') + self.assertTrue(resp.isclosed()) + + def test_error_leak(self): + # Test that the socket is not leaked if getresponse() fails + conn = httplib.HTTPConnection('example.com') + response = [] + class Response(httplib.HTTPResponse): + def __init__(self, *pos, **kw): + response.append(self) # Avoid garbage collector closing the socket + httplib.HTTPResponse.__init__(self, *pos, **kw) + conn.response_class = Response + conn.sock = FakeSocket('') # Emulate server dropping connection + conn.request('GET', '/') + self.assertRaises(httplib.BadStatusLine, conn.getresponse) + self.assertTrue(response) + #self.assertTrue(response[0].closed) + self.assertTrue(conn.sock.file_closed) + + def test_proxy_tunnel_without_status_line(self): + # Issue 17849: If a proxy tunnel is created that does not return + # a status code, fail. + body = 'hello world' + conn = httplib.HTTPConnection('example.com', strict=False) + conn.set_tunnel('foo') + conn.sock = FakeSocket(body) + with self.assertRaisesRegexp(socket.error, "Invalid response"): + conn._tunnel() + +class OfflineTest(TestCase): + def test_responses(self): + self.assertEqual(httplib.responses[httplib.NOT_FOUND], "Not Found") + + +class TestServerMixin: + """A limited socket server mixin. + + This is used by test cases for testing http connection end points. + """ + def setUp(self): + self.serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.port = test_support.bind_port(self.serv) + self.source_port = test_support.find_unused_port() + self.serv.listen(5) + self.conn = None + + def tearDown(self): + if self.conn: + self.conn.close() + self.conn = None + self.serv.close() + self.serv = None + +class SourceAddressTest(TestServerMixin, TestCase): + def testHTTPConnectionSourceAddress(self): + self.conn = httplib.HTTPConnection(HOST, self.port, + source_address=('', self.source_port)) + self.conn.connect() + self.assertEqual(self.conn.sock.getsockname()[1], self.source_port) + + @unittest.skipIf(not hasattr(httplib, 'HTTPSConnection'), + 'httplib.HTTPSConnection not defined') + def testHTTPSConnectionSourceAddress(self): + self.conn = httplib.HTTPSConnection(HOST, self.port, + source_address=('', self.source_port)) + # We don't test anything here other the constructor not barfing as + # this code doesn't deal with setting up an active running SSL server + # for an ssl_wrapped connect() to actually return from. + + +class HTTPTest(TestServerMixin, TestCase): + def testHTTPConnection(self): + self.conn = httplib.HTTP(host=HOST, port=self.port, strict=None) + self.conn.connect() + self.assertEqual(self.conn._conn.host, HOST) + self.assertEqual(self.conn._conn.port, self.port) + + def testHTTPWithConnectHostPort(self): + testhost = 'unreachable.test.domain' + testport = '80' + self.conn = httplib.HTTP(host=testhost, port=testport) + self.conn.connect(host=HOST, port=self.port) + self.assertNotEqual(self.conn._conn.host, testhost) + self.assertNotEqual(self.conn._conn.port, testport) + self.assertEqual(self.conn._conn.host, HOST) + self.assertEqual(self.conn._conn.port, self.port) + + +class TimeoutTest(TestCase): + PORT = None + + def setUp(self): + self.serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + TimeoutTest.PORT = test_support.bind_port(self.serv) + self.serv.listen(5) + + def tearDown(self): + self.serv.close() + self.serv = None + + def testTimeoutAttribute(self): + '''This will prove that the timeout gets through + HTTPConnection and into the socket. + ''' + # default -- use global socket timeout + self.assertIsNone(socket.getdefaulttimeout()) + socket.setdefaulttimeout(30) + try: + httpConn = httplib.HTTPConnection(HOST, TimeoutTest.PORT) + httpConn.connect() + finally: + socket.setdefaulttimeout(None) + self.assertEqual(httpConn.sock.gettimeout(), 30) + httpConn.close() + + # no timeout -- do not use global socket default + self.assertIsNone(socket.getdefaulttimeout()) + socket.setdefaulttimeout(30) + try: + httpConn = httplib.HTTPConnection(HOST, TimeoutTest.PORT, + timeout=None) + httpConn.connect() + finally: + socket.setdefaulttimeout(None) + self.assertEqual(httpConn.sock.gettimeout(), None) + httpConn.close() + + # a value + httpConn = httplib.HTTPConnection(HOST, TimeoutTest.PORT, timeout=30) + httpConn.connect() + self.assertEqual(httpConn.sock.gettimeout(), 30) + httpConn.close() + + +class HTTPSTest(TestCase): + + def setUp(self): + if not hasattr(httplib, 'HTTPSConnection'): + self.skipTest('ssl support required') + + def make_server(self, certfile): + from test.ssl_servers import make_https_server + return make_https_server(self, certfile=certfile) + + def test_attributes(self): + # simple test to check it's storing the timeout + h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30) + self.assertEqual(h.timeout, 30) + + def test_networked(self): + # Default settings: requires a valid cert from a trusted CA + import ssl + test_support.requires('network') + with test_support.transient_internet('self-signed.pythontest.net'): + h = httplib.HTTPSConnection('self-signed.pythontest.net', 443) + with self.assertRaises(ssl.SSLError) as exc_info: + h.request('GET', '/') + if test_support.is_jython: + return # FIXME: SSLError.reason not yet available for Jython + self.assertEqual(exc_info.exception.reason, 'CERTIFICATE_VERIFY_FAILED') + + def test_networked_noverification(self): + # Switch off cert verification + import ssl + test_support.requires('network') + with test_support.transient_internet('self-signed.pythontest.net'): + context = ssl._create_stdlib_context() + h = httplib.HTTPSConnection('self-signed.pythontest.net', 443, + context=context) + h.request('GET', '/') + resp = h.getresponse() + self.assertIn('nginx', resp.getheader('server')) + + @test_support.system_must_validate_cert + def test_networked_trusted_by_default_cert(self): + # Default settings: requires a valid cert from a trusted CA + test_support.requires('network') + with test_support.transient_internet('www.python.org'): + h = httplib.HTTPSConnection('www.python.org', 443) + h.request('GET', '/') + resp = h.getresponse() + content_type = resp.getheader('content-type') + self.assertIn('text/html', content_type) + + def test_networked_good_cert(self): + # We feed the server's cert as a validating cert + import ssl + test_support.requires('network') + with test_support.transient_internet('self-signed.pythontest.net'): + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(CERT_selfsigned_pythontestdotnet) + h = httplib.HTTPSConnection('self-signed.pythontest.net', 443, context=context) + h.request('GET', '/') + resp = h.getresponse() + server_string = resp.getheader('server') + self.assertIn('nginx', server_string) + + def test_networked_bad_cert(self): + # We feed a "CA" cert that is unrelated to the server's cert + import ssl + test_support.requires('network') + with test_support.transient_internet('self-signed.pythontest.net'): + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(CERT_localhost) + h = httplib.HTTPSConnection('self-signed.pythontest.net', 443, context=context) + with self.assertRaises(ssl.SSLError) as exc_info: + h.request('GET', '/') + if test_support.is_jython: + return # FIXME: SSLError.reason not yet available for Jython + self.assertEqual(exc_info.exception.reason, 'CERTIFICATE_VERIFY_FAILED') + + def test_local_unknown_cert(self): + # The custom cert isn't known to the default trust bundle + import ssl + server = self.make_server(CERT_localhost) + h = httplib.HTTPSConnection('localhost', server.port) + with self.assertRaises(ssl.SSLError) as exc_info: + h.request('GET', '/') + if test_support.is_jython: + return # FIXME: SSLError.reason not yet available for Jython + self.assertEqual(exc_info.exception.reason, 'CERTIFICATE_VERIFY_FAILED') + + @unittest.skipIf(test_support.is_jython, + 'FIXME: Failing test on Jython (causes other exceptions if not skipped)') + def test_local_good_hostname(self): + # The (valid) cert validates the HTTP hostname + import ssl + server = self.make_server(CERT_localhost) + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = ssl.CERT_REQUIRED + # FIXME this appears to be a problem in Jython certifying + # localhost certs + context.load_verify_locations(CERT_localhost) + h = httplib.HTTPSConnection('localhost', server.port, context=context) + h.request('GET', '/nonexistent') + resp = h.getresponse() + self.assertEqual(resp.status, 404) + + @unittest.skipIf(test_support.is_jython, + 'FIXME: Jython does not raise proper SSL error') + def test_local_bad_hostname(self): + # The (valid) cert doesn't validate the HTTP hostname + import ssl + server = self.make_server(CERT_fakehostname) + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = ssl.CERT_REQUIRED + context.check_hostname = True + context.load_verify_locations(CERT_fakehostname) + h = httplib.HTTPSConnection('localhost', server.port, context=context) + with self.assertRaises(ssl.CertificateError): + h.request('GET', '/') + h.close() + # With context.check_hostname=False, the mismatching is ignored + context.check_hostname = False + h = httplib.HTTPSConnection('localhost', server.port, context=context) + h.request('GET', '/nonexistent') + resp = h.getresponse() + self.assertEqual(resp.status, 404) + + def test_host_port(self): + # Check invalid host_port + + for hp in ("www.python.org:abc", "user:password at www.python.org"): + self.assertRaises(httplib.InvalidURL, httplib.HTTPSConnection, hp) + + for hp, h, p in (("[fe80::207:e9ff:fe9b]:8000", + "fe80::207:e9ff:fe9b", 8000), + ("www.python.org:443", "www.python.org", 443), + ("www.python.org:", "www.python.org", 443), + ("www.python.org", "www.python.org", 443), + ("[fe80::207:e9ff:fe9b]", "fe80::207:e9ff:fe9b", 443), + ("[fe80::207:e9ff:fe9b]:", "fe80::207:e9ff:fe9b", + 443)): + c = httplib.HTTPSConnection(hp) + self.assertEqual(h, c.host) + self.assertEqual(p, c.port) + + +class TunnelTests(TestCase): + def test_connect(self): + response_text = ( + 'HTTP/1.0 200 OK\r\n\r\n' # Reply to CONNECT + 'HTTP/1.1 200 OK\r\n' # Reply to HEAD + 'Content-Length: 42\r\n\r\n' + ) + + def create_connection(address, timeout=None, source_address=None): + return FakeSocket(response_text, host=address[0], port=address[1]) + + conn = httplib.HTTPConnection('proxy.com') + conn._create_connection = create_connection + + # Once connected, we should not be able to tunnel anymore + conn.connect() + self.assertRaises(RuntimeError, conn.set_tunnel, 'destination.com') + + # But if close the connection, we are good. + conn.close() + conn.set_tunnel('destination.com') + conn.request('HEAD', '/', '') + + self.assertEqual(conn.sock.host, 'proxy.com') + self.assertEqual(conn.sock.port, 80) + self.assertIn('CONNECT destination.com', conn.sock.data) + # issue22095 + self.assertNotIn('Host: destination.com:None', conn.sock.data) + self.assertIn('Host: destination.com', conn.sock.data) + + self.assertNotIn('Host: proxy.com', conn.sock.data) + + conn.close() + + conn.request('PUT', '/', '') + self.assertEqual(conn.sock.host, 'proxy.com') + self.assertEqual(conn.sock.port, 80) + self.assertTrue('CONNECT destination.com' in conn.sock.data) + self.assertTrue('Host: destination.com' in conn.sock.data) + + + at test_support.reap_threads +def test_main(verbose=None): + test_support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest, + HTTPTest, HTTPSTest, SourceAddressTest, + TunnelTests) + +if __name__ == '__main__': + test_main() diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1,13 +1,14 @@ """Regresssion tests for urllib""" +import collections import urllib import httplib +import io import unittest import os import sys import mimetools import tempfile -import StringIO from test import test_support from base64 import b64encode @@ -21,37 +22,43 @@ return "%" + hex_repr +def fakehttp(fakedata): + class FakeSocket(io.BytesIO): + + def sendall(self, data): + FakeHTTPConnection.buf = data + + def makefile(self, *args, **kwds): + return self + + def read(self, amt=None): + if self.closed: + return b"" + return io.BytesIO.read(self, amt) + + def readline(self, length=None): + if self.closed: + return b"" + return io.BytesIO.readline(self, length) + + class FakeHTTPConnection(httplib.HTTPConnection): + + # buffer to store data for verification in urlopen tests. + buf = "" + + def connect(self): + self.sock = FakeSocket(self.fakedata) + self.__class__.fakesock = self.sock + FakeHTTPConnection.fakedata = fakedata + + return FakeHTTPConnection + + class FakeHTTPMixin(object): def fakehttp(self, fakedata): - class FakeSocket(StringIO.StringIO): - - def sendall(self, data): - FakeHTTPConnection.buf = data - - def makefile(self, *args, **kwds): - return self - - def read(self, amt=None): - if self.closed: - return "" - return StringIO.StringIO.read(self, amt) - - def readline(self, length=None): - if self.closed: - return "" - return StringIO.StringIO.readline(self, length) - - class FakeHTTPConnection(httplib.HTTPConnection): - - # buffer to store data for verification in urlopen tests. - buf = "" - - def connect(self): - self.sock = FakeSocket(fakedata) - assert httplib.HTTP._connection_class == httplib.HTTPConnection - httplib.HTTP._connection_class = FakeHTTPConnection + httplib.HTTP._connection_class = fakehttp(fakedata) def unfakehttp(self): httplib.HTTP._connection_class = httplib.HTTPConnection @@ -107,9 +114,8 @@ def test_fileno(self): file_num = self.returned_obj.fileno() - if not test_support.is_jython: - self.assert_(isinstance(file_num, int), - "fileno() did not return an int") + if not test_support.is_jython: # does not apply to jython - fileno is an obj + self.assertIsInstance(file_num, int, "fileno() did not return an int") self.assertEqual(os.read(file_num, len(self.text)), self.text, "Reading on the file descriptor returned by fileno() " "did not return the expected text") @@ -160,8 +166,71 @@ # getproxies_environment use lowered case truncated (no '_proxy') keys self.assertEqual('localhost', proxies['no']) # List of no_proxies with space. - self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com') + self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com')) + self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com:8888')) + self.assertTrue(urllib.proxy_bypass_environment('newdomain.com:1234')) + + def test_proxy_cgi_ignore(self): + try: + self.env.set('HTTP_PROXY', 'http://somewhere:3128') + proxies = urllib.getproxies_environment() + self.assertEqual('http://somewhere:3128', proxies['http']) + self.env.set('REQUEST_METHOD', 'GET') + proxies = urllib.getproxies_environment() + self.assertNotIn('http', proxies) + finally: + self.env.unset('REQUEST_METHOD') + self.env.unset('HTTP_PROXY') + + def test_proxy_bypass_environment_host_match(self): + bypass = urllib.proxy_bypass_environment + self.env.set('NO_PROXY', + 'localhost, anotherdomain.com, newdomain.com:1234') + self.assertTrue(bypass('localhost')) + self.assertTrue(bypass('LocalHost')) # MixedCase + self.assertTrue(bypass('LOCALHOST')) # UPPERCASE + self.assertTrue(bypass('newdomain.com:1234')) + self.assertTrue(bypass('anotherdomain.com:8888')) + self.assertTrue(bypass('www.newdomain.com:1234')) + self.assertFalse(bypass('prelocalhost')) + self.assertFalse(bypass('newdomain.com')) # no port + self.assertFalse(bypass('newdomain.com:1235')) # wrong port + +class ProxyTests_withOrderedEnv(unittest.TestCase): + + def setUp(self): + # We need to test conditions, where variable order _is_ significant + self._saved_env = os.environ + # Monkey patch os.environ, start with empty fake environment + os.environ = collections.OrderedDict() + + def tearDown(self): + os.environ = self._saved_env + + def test_getproxies_environment_prefer_lowercase(self): + # Test lowercase preference with removal + os.environ['no_proxy'] = '' + os.environ['No_Proxy'] = 'localhost' + self.assertFalse(urllib.proxy_bypass_environment('localhost')) + self.assertFalse(urllib.proxy_bypass_environment('arbitrary')) + os.environ['http_proxy'] = '' + os.environ['HTTP_PROXY'] = 'http://somewhere:3128' + proxies = urllib.getproxies_environment() + self.assertEqual({}, proxies) + # Test lowercase preference of proxy bypass and correct matching including ports + os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' + os.environ['No_Proxy'] = 'xyz.com' + self.assertTrue(urllib.proxy_bypass_environment('localhost')) + self.assertTrue(urllib.proxy_bypass_environment('noproxy.com:5678')) + self.assertTrue(urllib.proxy_bypass_environment('my.proxy:1234')) + self.assertFalse(urllib.proxy_bypass_environment('my.proxy')) + self.assertFalse(urllib.proxy_bypass_environment('arbitrary')) + # Test lowercase preference with replacement + os.environ['http_proxy'] = 'http://somewhere:3128' + os.environ['Http_Proxy'] = 'http://somewhereelse:3128' + proxies = urllib.getproxies_environment() + self.assertEqual('http://somewhere:3128', proxies['http']) class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): @@ -211,10 +280,26 @@ Content-Type: text/html; charset=iso-8859-1 """) try: - self.assertRaises(IOError, urllib.urlopen, "http://python.org/") + msg = "Redirection to url 'file:" + with self.assertRaisesRegexp(IOError, msg): + urllib.urlopen("http://python.org/") finally: self.unfakehttp() + def test_redirect_limit_independent(self): + # Ticket #12923: make sure independent requests each use their + # own retry limit. + for i in range(urllib.FancyURLopener().maxtries): + self.fakehttp(b'''HTTP/1.1 302 Found +Location: file://guidocomputer.athome.com:/python/license +Connection: close +''') + try: + self.assertRaises(IOError, urllib.urlopen, + "http://something") + finally: + self.unfakehttp() + def test_empty_socket(self): # urlopen() raises IOError if the underlying socket does not send any # data. (#1680230) @@ -229,13 +314,13 @@ 'file://localhost/a/missing/file.py') fd, tmp_file = tempfile.mkstemp() tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + self.assertTrue(os.path.exists(tmp_file)) try: - self.assertTrue(os.path.exists(tmp_file)) fp = urllib.urlopen(tmp_fileurl) + fp.close() finally: os.close(fd) - fp.close() - os.unlink(tmp_file) + os.unlink(tmp_file) self.assertFalse(os.path.exists(tmp_file)) self.assertRaises(IOError, urllib.urlopen, tmp_fileurl) @@ -775,21 +860,131 @@ class Utility_Tests(unittest.TestCase): """Testcase to test the various utility functions in the urllib.""" + # In Python 3 this test class is moved to test_urlparse. + + def test_splittype(self): + splittype = urllib.splittype + self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring')) + self.assertEqual(splittype('opaquestring'), (None, 'opaquestring')) + self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring')) + self.assertEqual(splittype('type:'), ('type', '')) + self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) + + def test_splithost(self): + splithost = urllib.splithost + self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), + ('www.example.org:80', '/foo/bar/baz.html')) + self.assertEqual(splithost('//www.example.org:80'), + ('www.example.org:80', '')) + self.assertEqual(splithost('/foo/bar/baz.html'), + (None, '/foo/bar/baz.html')) + + def test_splituser(self): + splituser = urllib.splituser + self.assertEqual(splituser('User:Pass at www.python.org:080'), + ('User:Pass', 'www.python.org:080')) + self.assertEqual(splituser('@www.python.org:080'), + ('', 'www.python.org:080')) + self.assertEqual(splituser('www.python.org:080'), + (None, 'www.python.org:080')) + self.assertEqual(splituser('User:Pass@'), + ('User:Pass', '')) + self.assertEqual(splituser('User at example.com:Pass at www.python.org:080'), + ('User at example.com:Pass', 'www.python.org:080')) def test_splitpasswd(self): - """Some of the password examples are not sensible, but it is added to - confirming to RFC2617 and addressing issue4675. - """ - self.assertEqual(('user', 'ab'),urllib.splitpasswd('user:ab')) - self.assertEqual(('user', 'a\nb'),urllib.splitpasswd('user:a\nb')) - self.assertEqual(('user', 'a\tb'),urllib.splitpasswd('user:a\tb')) - self.assertEqual(('user', 'a\rb'),urllib.splitpasswd('user:a\rb')) - self.assertEqual(('user', 'a\fb'),urllib.splitpasswd('user:a\fb')) - self.assertEqual(('user', 'a\vb'),urllib.splitpasswd('user:a\vb')) - self.assertEqual(('user', 'a:b'),urllib.splitpasswd('user:a:b')) - self.assertEqual(('user', 'a b'),urllib.splitpasswd('user:a b')) - self.assertEqual(('user 2', 'ab'),urllib.splitpasswd('user 2:ab')) - self.assertEqual(('user+1', 'a+b'),urllib.splitpasswd('user+1:a+b')) + # Some of the password examples are not sensible, but it is added to + # confirming to RFC2617 and addressing issue4675. + splitpasswd = urllib.splitpasswd + self.assertEqual(splitpasswd('user:ab'), ('user', 'ab')) + self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb')) + self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb')) + self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb')) + self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb')) + self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb')) + self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b')) + self.assertEqual(splitpasswd('user:a b'), ('user', 'a b')) + self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab')) + self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b')) + self.assertEqual(splitpasswd('user:'), ('user', '')) + self.assertEqual(splitpasswd('user'), ('user', None)) + self.assertEqual(splitpasswd(':ab'), ('', 'ab')) + + def test_splitport(self): + splitport = urllib.splitport + self.assertEqual(splitport('parrot:88'), ('parrot', '88')) + self.assertEqual(splitport('parrot'), ('parrot', None)) + self.assertEqual(splitport('parrot:'), ('parrot', None)) + self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) + self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) + self.assertEqual(splitport('[::1]:88'), ('[::1]', '88')) + self.assertEqual(splitport('[::1]'), ('[::1]', None)) + self.assertEqual(splitport(':88'), ('', '88')) + + def test_splitnport(self): + splitnport = urllib.splitnport + self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) + self.assertEqual(splitnport('parrot'), ('parrot', -1)) + self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) + self.assertEqual(splitnport('parrot:'), ('parrot', -1)) + self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) + self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) + self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) + self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) + self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) + + def test_splitquery(self): + # Normal cases are exercised by other tests; ensure that we also + # catch cases with no port specified (testcase ensuring coverage) + splitquery = urllib.splitquery + self.assertEqual(splitquery('http://python.org/fake?foo=bar'), + ('http://python.org/fake', 'foo=bar')) + self.assertEqual(splitquery('http://python.org/fake?foo=bar?'), + ('http://python.org/fake?foo=bar', '')) + self.assertEqual(splitquery('http://python.org/fake'), + ('http://python.org/fake', None)) + self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar')) + + def test_splittag(self): + splittag = urllib.splittag + self.assertEqual(splittag('http://example.com?foo=bar#baz'), + ('http://example.com?foo=bar', 'baz')) + self.assertEqual(splittag('http://example.com?foo=bar#'), + ('http://example.com?foo=bar', '')) + self.assertEqual(splittag('#baz'), ('', 'baz')) + self.assertEqual(splittag('http://example.com?foo=bar'), + ('http://example.com?foo=bar', None)) + self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'), + ('http://example.com?foo=bar#baz', 'boo')) + + def test_splitattr(self): + splitattr = urllib.splitattr + self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'), + ('/path', ['attr1=value1', 'attr2=value2'])) + self.assertEqual(splitattr('/path;'), ('/path', [''])) + self.assertEqual(splitattr(';attr1=value1;attr2=value2'), + ('', ['attr1=value1', 'attr2=value2'])) + self.assertEqual(splitattr('/path'), ('/path', [])) + + def test_splitvalue(self): + # Normal cases are exercised by other tests; test pathological cases + # with no key/value pairs. (testcase ensuring coverage) + splitvalue = urllib.splitvalue + self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar')) + self.assertEqual(splitvalue('foo='), ('foo', '')) + self.assertEqual(splitvalue('=bar'), ('', 'bar')) + self.assertEqual(splitvalue('foobar'), ('foobar', None)) + self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz')) + + def test_toBytes(self): + result = urllib.toBytes(u'http://www.python.org') + self.assertEqual(result, 'http://www.python.org') + self.assertRaises(UnicodeError, urllib.toBytes, + test_support.u(r'http://www.python.org/medi\u00e6val')) + + def test_unwrap(self): + url = urllib.unwrap('') + self.assertEqual(url, 'type://host/path') class URLopener_Tests(unittest.TestCase): @@ -814,7 +1009,7 @@ # Everywhere else they work ok, but on those machines, sometimes # fail in one of the tests, sometimes in other. I have a linux, and # the tests go ok. -# If anybody has one of the problematic enviroments, please help! +# If anybody has one of the problematic environments, please help! # . Facundo # # def server(evt): @@ -860,7 +1055,7 @@ # def testTimeoutNone(self): # # global default timeout is ignored # import socket -# self.assertTrue(socket.getdefaulttimeout() is None) +# self.assertIsNone(socket.getdefaulttimeout()) # socket.setdefaulttimeout(30) # try: # ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) @@ -872,7 +1067,7 @@ # def testTimeoutDefault(self): # # global default timeout is used # import socket -# self.assertTrue(socket.getdefaulttimeout() is None) +# self.assertIsNone(socket.getdefaulttimeout()) # socket.setdefaulttimeout(30) # try: # ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) @@ -906,6 +1101,8 @@ Pathname_Tests, Utility_Tests, URLopener_Tests, + ProxyTests, + ProxyTests_withOrderedEnv, #FTPWrapperTests, ) diff --git a/Lib/urllib.py b/Lib/urllib.py deleted file mode 100644 --- a/Lib/urllib.py +++ /dev/null @@ -1,1630 +0,0 @@ -"""Open an arbitrary URL. - -See the following document for more info on URLs: -"Names and Addresses, URIs, URLs, URNs, URCs", at -http://www.w3.org/pub/WWW/Addressing/Overview.html - -See also the HTTP spec (from which the error codes are derived): -"HTTP - Hypertext Transfer Protocol", at -http://www.w3.org/pub/WWW/Protocols/ - -Related standards and specs: -- RFC1808: the "relative URL" spec. (authoritative status) -- RFC1738 - the "URL standard". (authoritative status) -- RFC1630 - the "URI spec". (informational status) - -The object returned by URLopener().open(file) will differ per -protocol. All you know is that is has methods read(), readline(), -readlines(), fileno(), close() and info(). The read*(), fileno() -and close() methods work like those of open files. -The info() method returns a mimetools.Message object which can be -used to query various info about the object, if available. -(mimetools.Message objects are queried with the getheader() method.) -""" - -import string -import socket -import os -import time -import sys -import base64 -import re - -from urlparse import urljoin as basejoin - -__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", - "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", - "urlencode", "url2pathname", "pathname2url", "splittag", - "localhost", "thishost", "ftperrors", "basejoin", "unwrap", - "splittype", "splithost", "splituser", "splitpasswd", "splitport", - "splitnport", "splitquery", "splitattr", "splitvalue", - "getproxies"] - -__version__ = '1.17' # XXX This version is not always updated :-( - -MAXFTPCACHE = 10 # Trim the ftp cache beyond this size - -# Helper for non-unix systems -if (os._name if sys.platform.startswith('java') else os.name) == 'nt': - from nturl2path import url2pathname, pathname2url -elif os.name == 'riscos': - from rourl2path import url2pathname, pathname2url -else: - def url2pathname(pathname): - """OS-specific conversion from a relative URL of the 'file' scheme - to a file system path; not recommended for general use.""" - return unquote(pathname) - - def pathname2url(pathname): - """OS-specific conversion from a file system path to a relative URL - of the 'file' scheme; not recommended for general use.""" - return quote(pathname) - -# This really consists of two pieces: -# (1) a class which handles opening of all sorts of URLs -# (plus assorted utilities etc.) -# (2) a set of functions for parsing URLs -# XXX Should these be separated out into different modules? - - -# Shortcut for basic usage -_urlopener = None -def urlopen(url, data=None, proxies=None, context=None): - """Create a file-like object for the specified URL to read from.""" - from warnings import warnpy3k - warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " - "favor of urllib2.urlopen()", stacklevel=2) - - global _urlopener - if proxies is not None or context is not None: - opener = FancyURLopener(proxies=proxies, context=context) - elif not _urlopener: - opener = FancyURLopener() - _urlopener = opener - else: - opener = _urlopener - if data is None: - return opener.open(url) - else: - return opener.open(url, data) -def urlretrieve(url, filename=None, reporthook=None, data=None, context=None): - global _urlopener - if context is not None: - opener = FancyURLopener(context=context) - elif not _urlopener: - _urlopener = opener = FancyURLopener() - else: - opener = _urlopener - return opener.retrieve(url, filename, reporthook, data) -def urlcleanup(): - if _urlopener: - _urlopener.cleanup() - _safe_quoters.clear() - ftpcache.clear() - -# check for SSL -try: - import ssl -except: - _have_ssl = False -else: - _have_ssl = True - -# exception raised when downloaded size does not match content-length -class ContentTooShortError(IOError): - def __init__(self, message, content): - IOError.__init__(self, message) - self.content = content - -ftpcache = {} -class URLopener: - """Class to open URLs. - This is a class rather than just a subroutine because we may need - more than one set of global protocol-specific options. - Note -- this is a base class for those who don't want the - automatic handling of errors type 302 (relocated) and 401 - (authorization needed).""" - - __tempfiles = None - - version = "Python-urllib/%s" % __version__ - - # Constructor - def __init__(self, proxies=None, context=None, **x509): - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'has_key'), "proxies must be a mapping" - self.proxies = proxies - self.key_file = x509.get('key_file') - self.cert_file = x509.get('cert_file') - self.context = context - self.addheaders = [('User-Agent', self.version)] - self.__tempfiles = [] - self.__unlink = os.unlink # See cleanup() - self.tempcache = None - # Undocumented feature: if you assign {} to tempcache, - # it is used to cache files retrieved with - # self.retrieve(). This is not enabled by default - # since it does not work for changing documents (and I - # haven't got the logic to check expiration headers - # yet). - self.ftpcache = ftpcache - # Undocumented feature: you can use a different - # ftp cache by assigning to the .ftpcache member; - # in case you want logically independent URL openers - # XXX This is not threadsafe. Bah. - - def __del__(self): - self.close() - - def close(self): - self.cleanup() - - def cleanup(self): - # This code sometimes runs when the rest of this module - # has already been deleted, so it can't use any globals - # or import anything. - if self.__tempfiles: - for file in self.__tempfiles: - try: - self.__unlink(file) - except OSError: - pass - del self.__tempfiles[:] - if self.tempcache: - self.tempcache.clear() - - def addheader(self, *args): - """Add a header to be used by the HTTP interface only - e.g. u.addheader('Accept', 'sound/basic')""" - self.addheaders.append(args) - - # External interface - def open(self, fullurl, data=None): - """Use URLopener().open(file) instead of open(file, 'r').""" - fullurl = unwrap(toBytes(fullurl)) - # percent encode url, fixing lame server errors for e.g, like space - # within url paths. - fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") - if self.tempcache and fullurl in self.tempcache: - filename, headers = self.tempcache[fullurl] - fp = open(filename, 'rb') - return addinfourl(fp, headers, fullurl) - urltype, url = splittype(fullurl) - if not urltype: - urltype = 'file' - if urltype in self.proxies: - proxy = self.proxies[urltype] - urltype, proxyhost = splittype(proxy) - host, selector = splithost(proxyhost) - url = (host, fullurl) # Signal special case to open_*() - else: - proxy = None - name = 'open_' + urltype - self.type = urltype - name = name.replace('-', '_') - if not hasattr(self, name): - if proxy: - return self.open_unknown_proxy(proxy, fullurl, data) - else: - return self.open_unknown(fullurl, data) - try: - if data is None: - return getattr(self, name)(url) - else: - return getattr(self, name)(url, data) - except socket.error, msg: - raise IOError, ('socket error', msg), sys.exc_info()[2] - - def open_unknown(self, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = splittype(fullurl) - raise IOError, ('url error', 'unknown url type', type) - - def open_unknown_proxy(self, proxy, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = splittype(fullurl) - raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) - - # External interface - def retrieve(self, url, filename=None, reporthook=None, data=None): - """retrieve(url) returns (filename, headers) for a local object - or (tempfilename, headers) for a remote object.""" - url = unwrap(toBytes(url)) - if self.tempcache and url in self.tempcache: - return self.tempcache[url] - type, url1 = splittype(url) - if filename is None and (not type or type == 'file'): - try: - fp = self.open_local_file(url1) - hdrs = fp.info() - fp.close() - return url2pathname(splithost(url1)[1]), hdrs - except IOError: - pass - fp = self.open(url, data) - try: - headers = fp.info() - if filename: - tfp = open(filename, 'wb') - else: - import tempfile - garbage, path = splittype(url) - garbage, path = splithost(path or "") - path, garbage = splitquery(path or "") - path, garbage = splitattr(path or "") - suffix = os.path.splitext(path)[1] - (fd, filename) = tempfile.mkstemp(suffix) - self.__tempfiles.append(filename) - tfp = os.fdopen(fd, 'wb') - try: - result = filename, headers - if self.tempcache is not None: - self.tempcache[url] = result - bs = 1024*8 - size = -1 - read = 0 - blocknum = 0 - if "content-length" in headers: - size = int(headers["Content-Length"]) - if reporthook: - reporthook(blocknum, bs, size) - while 1: - block = fp.read(bs) - if block == "": - break - read += len(block) - tfp.write(block) - blocknum += 1 - if reporthook: - reporthook(blocknum, bs, size) - finally: - tfp.close() - finally: - fp.close() - - # raise exception if actual size does not match content-length header - if size >= 0 and read < size: - raise ContentTooShortError("retrieval incomplete: got only %i out " - "of %i bytes" % (read, size), result) - - return result - - # Each method named open_ knows how to open that type of URL - - def open_http(self, url, data=None): - """Use HTTP protocol.""" - import httplib - user_passwd = None - proxy_passwd= None - if isinstance(url, str): - host, selector = splithost(url) - if host: - user_passwd, host = splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # check whether the proxy contains authorization information - proxy_passwd, host = splituser(host) - # now we proceed with the url we want to obtain - urltype, rest = splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'http': - realhost = None - else: - realhost, rest = splithost(rest) - if realhost: - user_passwd, realhost = splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - if proxy_bypass(realhost): - host = realhost - - #print "proxy via http:", host, selector - if not host: raise IOError, ('http error', 'no host given') - - if proxy_passwd: - proxy_passwd = unquote(proxy_passwd) - proxy_auth = base64.b64encode(proxy_passwd).strip() - else: - proxy_auth = None - - if user_passwd: - user_passwd = unquote(user_passwd) - auth = base64.b64encode(user_passwd).strip() - else: - auth = None - h = httplib.HTTP(host) - if data is not None: - h.putrequest('POST', selector) - h.putheader('Content-Type', 'application/x-www-form-urlencoded') - h.putheader('Content-Length', '%d' % len(data)) - else: - h.putrequest('GET', selector) - if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) - if auth: h.putheader('Authorization', 'Basic %s' % auth) - if realhost: h.putheader('Host', realhost) - for args in self.addheaders: h.putheader(*args) - h.endheaders(data) - errcode, errmsg, headers = h.getreply() - fp = h.getfile() - if errcode == -1: - if fp: fp.close() - # something went wrong with the HTTP status line - raise IOError, ('http protocol error', 0, - 'got a bad status line', None) - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if (200 <= errcode < 300): - return addinfourl(fp, headers, "http:" + url, errcode) - else: - if data is None: - return self.http_error(url, fp, errcode, errmsg, headers) - else: - return self.http_error(url, fp, errcode, errmsg, headers, data) - - def http_error(self, url, fp, errcode, errmsg, headers, data=None): - """Handle http errors. - Derived class can override this, or provide specific handlers - named http_error_DDD where DDD is the 3-digit error code.""" - # First check if there's a specific handler for this error - name = 'http_error_%d' % errcode - if hasattr(self, name): - method = getattr(self, name) - if data is None: - result = method(url, fp, errcode, errmsg, headers) - else: - result = method(url, fp, errcode, errmsg, headers, data) - if result: return result - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handler: close the connection and raise IOError.""" - fp.close() - raise IOError, ('http error', errcode, errmsg, headers) - - if _have_ssl: - def open_https(self, url, data=None): - """Use HTTPS protocol.""" - - import httplib - user_passwd = None - proxy_passwd = None - if isinstance(url, str): - host, selector = splithost(url) - if host: - user_passwd, host = splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # here, we determine, whether the proxy contains authorization information - proxy_passwd, host = splituser(host) - urltype, rest = splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'https': - realhost = None - else: - realhost, rest = splithost(rest) - if realhost: - user_passwd, realhost = splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - #print "proxy via https:", host, selector - if not host: raise IOError, ('https error', 'no host given') - if proxy_passwd: - proxy_passwd = unquote(proxy_passwd) - proxy_auth = base64.b64encode(proxy_passwd).strip() - else: - proxy_auth = None - if user_passwd: - user_passwd = unquote(user_passwd) - auth = base64.b64encode(user_passwd).strip() - else: - auth = None - h = httplib.HTTPS(host, 0, - key_file=self.key_file, - cert_file=self.cert_file, - context=self.context) - if data is not None: - h.putrequest('POST', selector) - h.putheader('Content-Type', - 'application/x-www-form-urlencoded') - h.putheader('Content-Length', '%d' % len(data)) - else: - h.putrequest('GET', selector) - if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) - if auth: h.putheader('Authorization', 'Basic %s' % auth) - if realhost: h.putheader('Host', realhost) - for args in self.addheaders: h.putheader(*args) - h.endheaders(data) - errcode, errmsg, headers = h.getreply() - fp = h.getfile() - if errcode == -1: - if fp: fp.close() - # something went wrong with the HTTP status line - raise IOError, ('http protocol error', 0, - 'got a bad status line', None) - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if (200 <= errcode < 300): - return addinfourl(fp, headers, "https:" + url, errcode) - else: - if data is None: - return self.http_error(url, fp, errcode, errmsg, headers) - else: - return self.http_error(url, fp, errcode, errmsg, headers, - data) - - def open_file(self, url): - """Use local file or FTP depending on form of URL.""" - if not isinstance(url, str): - raise IOError, ('file error', 'proxy support for file protocol currently not implemented') - if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': - return self.open_ftp(url) - else: - return self.open_local_file(url) - - def open_local_file(self, url): - """Use local file.""" - import mimetypes, mimetools, email.utils - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - host, file = splithost(url) - localname = url2pathname(file) - try: - stats = os.stat(localname) - except OSError, e: - raise IOError(e.errno, e.strerror, e.filename) - size = stats.st_size - modified = email.utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(url)[0] - headers = mimetools.Message(StringIO( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - if not host: - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - elif file[:2] == './': - raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) - return addinfourl(open(localname, 'rb'), - headers, urlfile) - host, port = splitport(host) - if not port \ - and socket.gethostbyname(host) in (localhost(), thishost()): - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - return addinfourl(open(localname, 'rb'), - headers, urlfile) - raise IOError, ('local file error', 'not on local host') - - def open_ftp(self, url): - """Use FTP protocol.""" - if not isinstance(url, str): - raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') - import mimetypes, mimetools - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - host, path = splithost(url) - if not host: raise IOError, ('ftp error', 'no host given') - host, port = splitport(host) - user, host = splituser(host) - if user: user, passwd = splitpasswd(user) - else: passwd = None - host = unquote(host) - user = user or '' - passwd = passwd or '' - host = socket.gethostbyname(host) - if not port: - import ftplib - port = ftplib.FTP_PORT - else: - port = int(port) - path, attrs = splitattr(path) - path = unquote(path) - dirs = path.split('/') - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: dirs = dirs[1:] - if dirs and not dirs[0]: dirs[0] = '/' - key = user, host, port, '/'.join(dirs) - # XXX thread unsafe! - if len(self.ftpcache) > MAXFTPCACHE: - # Prune the cache, rather arbitrarily - for k in self.ftpcache.keys(): - if k != key: - v = self.ftpcache[k] - del self.ftpcache[k] - v.close() - try: - if not key in self.ftpcache: - self.ftpcache[key] = \ - ftpwrapper(user, passwd, host, port, dirs) - if not file: type = 'D' - else: type = 'I' - for attr in attrs: - attr, value = splitvalue(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - (fp, retrlen) = self.ftpcache[key].retrfile(file, type) - mtype = mimetypes.guess_type("ftp:" + url)[0] - headers = "" - if mtype: - headers += "Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - headers = mimetools.Message(StringIO(headers)) - return addinfourl(fp, headers, "ftp:" + url) - except ftperrors(), msg: - raise IOError, ('ftp error', msg), sys.exc_info()[2] - - def open_data(self, url, data=None): - """Use "data" URL.""" - if not isinstance(url, str): - raise IOError, ('data error', 'proxy support for data protocol currently not implemented') - # ignore POSTed data - # - # syntax of data URLs: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - import mimetools - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - try: - [type, data] = url.split(',', 1) - except ValueError: - raise IOError, ('data error', 'bad data URL') - if not type: - type = 'text/plain;charset=US-ASCII' - semi = type.rfind(';') - if semi >= 0 and '=' not in type[semi:]: - encoding = type[semi+1:] - type = type[:semi] - else: - encoding = '' - msg = [] - msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', - time.gmtime(time.time()))) - msg.append('Content-type: %s' % type) - if encoding == 'base64': - data = base64.decodestring(data) - else: - data = unquote(data) - msg.append('Content-Length: %d' % len(data)) - msg.append('') - msg.append(data) - msg = '\n'.join(msg) - f = StringIO(msg) - headers = mimetools.Message(f, 0) - #f.fileno = None # needed for addinfourl - return addinfourl(f, headers, url) - - -class FancyURLopener(URLopener): - """Derived class with handlers for errors we can handle (perhaps).""" - - def __init__(self, *args, **kwargs): - URLopener.__init__(self, *args, **kwargs) - self.auth_cache = {} - self.tries = 0 - self.maxtries = 10 - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handling -- don't raise an exception.""" - return addinfourl(fp, headers, "http:" + url, errcode) - - def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): - """Error 302 -- relocated (temporarily).""" - self.tries += 1 - if self.maxtries and self.tries >= self.maxtries: - if hasattr(self, "http_error_500"): - meth = self.http_error_500 - else: - meth = self.http_error_default - self.tries = 0 - return meth(url, fp, 500, - "Internal Server Error: Redirect Recursion", headers) - result = self.redirect_internal(url, fp, errcode, errmsg, headers, - data) - self.tries = 0 - return result - - def redirect_internal(self, url, fp, errcode, errmsg, headers, data): - if 'location' in headers: - newurl = headers['location'] - elif 'uri' in headers: - newurl = headers['uri'] - else: - return - fp.close() - # In case the server sent a relative URL, join with original: - newurl = basejoin(self.type + ":" + url, newurl) - - # For security reasons we do not allow redirects to protocols - # other than HTTP, HTTPS or FTP. - newurl_lower = newurl.lower() - if not (newurl_lower.startswith('http://') or - newurl_lower.startswith('https://') or - newurl_lower.startswith('ftp://')): - raise IOError('redirect error', errcode, - errmsg + " - Redirection to url '%s' is not allowed" % - newurl, - headers) - - return self.open(newurl) - - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - """Error 301 -- also relocated (permanently).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): - """Error 303 -- also relocated (essentially identical to 302).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): - """Error 307 -- relocated, but turn POST into error.""" - if data is None: - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - else: - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - """Error 401 -- authentication required. - This function supports Basic authentication only.""" - if not 'www-authenticate' in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['www-authenticate'] - import re - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - name = 'retry_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): - """Error 407 -- proxy authentication required. - This function supports Basic authentication only.""" - if not 'proxy-authenticate' in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['proxy-authenticate'] - import re - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - name = 'retry_proxy_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def retry_proxy_http_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - newurl = 'http://' + host + selector - proxy = self.proxies['http'] - urltype, proxyhost = splittype(proxy) - proxyhost, proxyselector = splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost - self.proxies['http'] = 'http://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_proxy_https_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - newurl = 'https://' + host + selector - proxy = self.proxies['https'] - urltype, proxyhost = splittype(proxy) - proxyhost, proxyselector = splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost - self.proxies['https'] = 'https://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_http_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host - newurl = 'http://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_https_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host - newurl = 'https://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def get_user_passwd(self, host, realm, clear_cache=0): - key = realm + '@' + host.lower() - if key in self.auth_cache: - if clear_cache: - del self.auth_cache[key] - else: - return self.auth_cache[key] - user, passwd = self.prompt_user_passwd(host, realm) - if user or passwd: self.auth_cache[key] = (user, passwd) - return user, passwd - - def prompt_user_passwd(self, host, realm): - """Override this in a GUI environment!""" - import getpass - try: - user = raw_input("Enter username for %s at %s: " % (realm, - host)) - passwd = getpass.getpass("Enter password for %s in %s at %s: " % - (user, realm, host)) - return user, passwd - except KeyboardInterrupt: - print - return None, None - - -# Utility functions - -_localhost = None -def localhost(): - """Return the IP address of the magic hostname 'localhost'.""" - global _localhost - if _localhost is None: - _localhost = socket.gethostbyname('localhost') - return _localhost - -_thishost = None -def thishost(): - """Return the IP address of the current host.""" - global _thishost - if _thishost is None: - try: - _thishost = socket.gethostbyname(socket.gethostname()) - except socket.gaierror: - _thishost = socket.gethostbyname('localhost') - return _thishost - -_ftperrors = None -def ftperrors(): - """Return the set of errors raised by the FTP class.""" - global _ftperrors - if _ftperrors is None: - import ftplib - _ftperrors = ftplib.all_errors - return _ftperrors - -_noheaders = None -def noheaders(): - """Return an empty mimetools.Message object.""" - global _noheaders - if _noheaders is None: - import mimetools - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - _noheaders = mimetools.Message(StringIO(), 0) - _noheaders.fp.close() # Recycle file descriptor - return _noheaders - - -# Utility classes - -class ftpwrapper: - """Class used by open_ftp() for cache of open FTP connections.""" - - def __init__(self, user, passwd, host, port, dirs, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - persistent=True): - self.user = user - self.passwd = passwd - self.host = host - self.port = port - self.dirs = dirs - self.timeout = timeout - self.refcount = 0 - self.keepalive = persistent - try: - self.init() - except: - self.close() - raise - - def init(self): - import ftplib - self.busy = 0 - self.ftp = ftplib.FTP() - self.ftp.connect(self.host, self.port, self.timeout) - self.ftp.login(self.user, self.passwd) - _target = '/'.join(self.dirs) - self.ftp.cwd(_target) - - def retrfile(self, file, type): - import ftplib - self.endtransfer() - if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 - else: cmd = 'TYPE ' + type; isdir = 0 - try: - self.ftp.voidcmd(cmd) - except ftplib.all_errors: - self.init() - self.ftp.voidcmd(cmd) - conn = None - if file and not isdir: - # Try to retrieve as a file - try: - cmd = 'RETR ' + file - conn, retrlen = self.ftp.ntransfercmd(cmd) - except ftplib.error_perm, reason: - if str(reason)[:3] != '550': - raise IOError, ('ftp error', reason), sys.exc_info()[2] - if not conn: - # Set transfer mode to ASCII! - self.ftp.voidcmd('TYPE A') - # Try a directory listing. Verify that directory exists. - if file: - pwd = self.ftp.pwd() - try: - try: - self.ftp.cwd(file) - except ftplib.error_perm, reason: - raise IOError, ('ftp error', reason), sys.exc_info()[2] - finally: - self.ftp.cwd(pwd) - cmd = 'LIST ' + file - else: - cmd = 'LIST' - conn, retrlen = self.ftp.ntransfercmd(cmd) - self.busy = 1 - ftpobj = addclosehook(conn.makefile('rb'), self.file_close) - self.refcount += 1 - conn.close() - # Pass back both a suitably decorated object and a retrieval length - return (ftpobj, retrlen) - - def endtransfer(self): - if not self.busy: - return - self.busy = 0 - try: - self.ftp.voidresp() - except ftperrors(): - pass - - def close(self): - self.keepalive = False - if self.refcount <= 0: - self.real_close() - - def file_close(self): - self.endtransfer() - self.refcount -= 1 - if self.refcount <= 0 and not self.keepalive: - self.real_close() - - def real_close(self): - self.endtransfer() - try: - self.ftp.close() - except ftperrors(): - pass - -class addbase: - """Base class for addinfo and addclosehook.""" - - def __init__(self, fp): - self.fp = fp - self.read = self.fp.read - self.readline = self.fp.readline - if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines - if hasattr(self.fp, "fileno"): - self.fileno = self.fp.fileno - else: - self.fileno = lambda: None - if hasattr(self.fp, "__iter__"): - self.__iter__ = self.fp.__iter__ - if hasattr(self.fp, "next"): - self.next = self.fp.next - - def __repr__(self): - return '<%s at %r whose fp = %r>' % (self.__class__.__name__, - id(self), self.fp) - - def close(self): - self.read = None - self.readline = None - self.readlines = None - self.fileno = None - if self.fp: self.fp.close() - self.fp = None - -class addclosehook(addbase): - """Class to add a close hook to an open file.""" - - def __init__(self, fp, closehook, *hookargs): - addbase.__init__(self, fp) - self.closehook = closehook - self.hookargs = hookargs - - def close(self): - try: - closehook = self.closehook - hookargs = self.hookargs - if closehook: - self.closehook = None - self.hookargs = None - closehook(*hookargs) - finally: - addbase.close(self) - - -class addinfo(addbase): - """class to add an info() method to an open file.""" - - def __init__(self, fp, headers): - addbase.__init__(self, fp) - self.headers = headers - - def info(self): - return self.headers - -class addinfourl(addbase): - """class to add info() and geturl() methods to an open file.""" - - def __init__(self, fp, headers, url, code=None): - addbase.__init__(self, fp) - self.headers = headers - self.url = url - self.code = code - - def info(self): - return self.headers - - def getcode(self): - return self.code - - def geturl(self): - return self.url - - -# Utilities to parse URLs (most of these return None for missing parts): -# unwrap('') --> 'type://host/path' -# splittype('type:opaquestring') --> 'type', 'opaquestring' -# splithost('//host[:port]/path') --> 'host[:port]', '/path' -# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' -# splitpasswd('user:passwd') -> 'user', 'passwd' -# splitport('host:port') --> 'host', 'port' -# splitquery('/path?query') --> '/path', 'query' -# splittag('/path#tag') --> '/path', 'tag' -# splitattr('/path;attr1=value1;attr2=value2;...') -> -# '/path', ['attr1=value1', 'attr2=value2', ...] -# splitvalue('attr=value') --> 'attr', 'value' -# unquote('abc%20def') -> 'abc def' -# quote('abc def') -> 'abc%20def') - -try: - unicode -except NameError: - def _is_unicode(x): - return 0 -else: - def _is_unicode(x): - return isinstance(x, unicode) - -def toBytes(url): - """toBytes(u"URL") --> 'URL'.""" - # Most URL schemes require ASCII. If that changes, the conversion - # can be relaxed - if _is_unicode(url): - try: - url = url.encode("ASCII") - except UnicodeError: - raise UnicodeError("URL " + repr(url) + - " contains non-ASCII characters") - return url - -def unwrap(url): - """unwrap('') --> 'type://host/path'.""" - url = url.strip() - if url[:1] == '<' and url[-1:] == '>': - url = url[1:-1].strip() - if url[:4] == 'URL:': url = url[4:].strip() - return url - -_typeprog = None -def splittype(url): - """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" - global _typeprog - if _typeprog is None: - import re - _typeprog = re.compile('^([^/:]+):') - - match = _typeprog.match(url) - if match: - scheme = match.group(1) - return scheme.lower(), url[len(scheme) + 1:] - return None, url - -_hostprog = None -def splithost(url): - """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" - global _hostprog - if _hostprog is None: - import re - _hostprog = re.compile('^//([^/?]*)(.*)$') - - match = _hostprog.match(url) - if match: - host_port = match.group(1) - path = match.group(2) - if path and not path.startswith('/'): - path = '/' + path - return host_port, path - return None, url - -_userprog = None -def splituser(host): - """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" - global _userprog - if _userprog is None: - import re - _userprog = re.compile('^(.*)@(.*)$') - - match = _userprog.match(host) - if match: return match.group(1, 2) - return None, host - -_passwdprog = None -def splitpasswd(user): - """splitpasswd('user:passwd') -> 'user', 'passwd'.""" - global _passwdprog - if _passwdprog is None: - import re - _passwdprog = re.compile('^([^:]*):(.*)$',re.S) - - match = _passwdprog.match(user) - if match: return match.group(1, 2) - return user, None - -# splittag('/path#tag') --> '/path', 'tag' -_portprog = None -def splitport(host): - """splitport('host:port') --> 'host', 'port'.""" - global _portprog - if _portprog is None: - import re - _portprog = re.compile('^(.*):([0-9]*)$') - - match = _portprog.match(host) - if match: - host, port = match.groups() - if port: - return host, port - return host, None - -_nportprog = None -def splitnport(host, defport=-1): - """Split host and port, returning numeric port. - Return given default port if no ':' found; defaults to -1. - Return numerical port if a valid number are found after ':'. - Return None if ':' but not a valid number.""" - global _nportprog - if _nportprog is None: - import re - _nportprog = re.compile('^(.*):(.*)$') - - match = _nportprog.match(host) - if match: - host, port = match.group(1, 2) - if port: - try: - nport = int(port) - except ValueError: - nport = None - return host, nport - return host, defport - -_queryprog = None -def splitquery(url): - """splitquery('/path?query') --> '/path', 'query'.""" - global _queryprog - if _queryprog is None: - import re - _queryprog = re.compile('^(.*)\?([^?]*)$') - - match = _queryprog.match(url) - if match: return match.group(1, 2) - return url, None - -_tagprog = None -def splittag(url): - """splittag('/path#tag') --> '/path', 'tag'.""" - global _tagprog - if _tagprog is None: - import re - _tagprog = re.compile('^(.*)#([^#]*)$') - - match = _tagprog.match(url) - if match: return match.group(1, 2) - return url, None - -def splitattr(url): - """splitattr('/path;attr1=value1;attr2=value2;...') -> - '/path', ['attr1=value1', 'attr2=value2', ...].""" - words = url.split(';') - return words[0], words[1:] - -_valueprog = None -def splitvalue(attr): - """splitvalue('attr=value') --> 'attr', 'value'.""" - global _valueprog - if _valueprog is None: - import re - _valueprog = re.compile('^([^=]*)=(.*)$') - - match = _valueprog.match(attr) - if match: return match.group(1, 2) - return attr, None - -# urlparse contains a duplicate of this method to avoid a circular import. If -# you update this method, also update the copy in urlparse. This code -# duplication does not exist in Python3. - -_hexdig = '0123456789ABCDEFabcdef' -_hextochr = dict((a + b, chr(int(a + b, 16))) - for a in _hexdig for b in _hexdig) -_asciire = re.compile('([\x00-\x7f]+)') - -def unquote(s): - """unquote('abc%20def') -> 'abc def'.""" - res = s.split('%') - # fastpath - if len(res) == 1: - return s - buf = [res[0]] - is_unicode = isinstance(s, unicode) - for item in res[1:]: - try: - if is_unicode: - buf.append(unichr(int(item[:2], 16))) - buf.append(item[2:]) - else: - buf.append(_hextochr[item[:2]]) - buf.append(item[2:]) - except KeyError: - buf.append('%') - buf.append(item) - return ''.join(buf) - -def unquote_plus(s): - """unquote('%7e/abc+def') -> '~/abc def'""" - s = s.replace('+', ' ') - return unquote(s) - -always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' - 'abcdefghijklmnopqrstuvwxyz' - '0123456789' '_.-') -_safe_map = {} -for i, c in zip(xrange(256), str(bytearray(xrange(256)))): - _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) -_safe_quoters = {} - -def quote(s, safe='/'): - """quote('abc def') -> 'abc%20def' - - Each part of a URL, e.g. the path info, the query, etc., has a - different set of reserved characters that must be quoted. - - RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists - the following reserved characters. - - reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | - "$" | "," - - Each of these characters is reserved in some component of a URL, - but not necessarily in all of them. - - By default, the quote function is intended for quoting the path - section of a URL. Thus, it will not encode '/'. This character - is reserved, but in typical usage the quote function is being - called on a path where the existing slash characters are used as - reserved characters. - """ - # fastpath - if not s: - if s is None: - raise TypeError('None object cannot be quoted') - return s - cachekey = (safe, always_safe) - try: - (quoter, safe) = _safe_quoters[cachekey] - except KeyError: - safe_map = _safe_map.copy() - safe_map.update([(c, c) for c in safe]) - quoter = safe_map.__getitem__ - safe = always_safe + safe - _safe_quoters[cachekey] = (quoter, safe) - if not s.rstrip(safe): - return s - return ''.join(map(quoter, s)) - -def quote_plus(s, safe=''): - """Quote the query fragment of a URL; replacing ' ' with '+'""" - if ' ' in s: - s = quote(s, safe + ' ') - return s.replace(' ', '+') - return quote(s, safe) - -def urlencode(query, doseq=0): - """Encode a sequence of two-element tuples or dictionary into a URL query string. - - If any values in the query arg are sequences and doseq is true, each - sequence element is converted to a separate parameter. - - If the query arg is a sequence of two-element tuples, the order of the - parameters in the output will match the order of parameters in the - input. - """ - - if hasattr(query,"items"): - # mapping objects - query = query.items() - else: - # it's a bother at times that strings and string-like objects are - # sequences... - try: - # non-sequence items should not work with len() - # non-empty strings will fail this - if len(query) and not isinstance(query[0], tuple): - raise TypeError - # zero-length sequences of all types will get here and succeed, - # but that's a minor nit - since the original implementation - # allowed empty dicts that type of behavior probably should be - # preserved for consistency - except TypeError: - ty,va,tb = sys.exc_info() - raise TypeError, "not a valid non-string sequence or mapping object", tb - - l = [] - if not doseq: - # preserve old behavior - for k, v in query: - k = quote_plus(str(k)) - v = quote_plus(str(v)) - l.append(k + '=' + v) - else: - for k, v in query: - k = quote_plus(str(k)) - if isinstance(v, str): - v = quote_plus(v) - l.append(k + '=' + v) - elif _is_unicode(v): - # is there a reasonable way to convert to ASCII? - # encode generates a string, but "replace" or "ignore" - # lose information and "strict" can raise UnicodeError - v = quote_plus(v.encode("ASCII","replace")) - l.append(k + '=' + v) - else: - try: - # is this a sufficient test for sequence-ness? - len(v) - except TypeError: - # not a sequence - v = quote_plus(str(v)) - l.append(k + '=' + v) - else: - # loop over the sequence - for elt in v: - l.append(k + '=' + quote_plus(str(elt))) - return '&'.join(l) - -# Proxy handling -def getproxies_environment(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Scan the environment for variables named _proxy; - this seems to be the standard convention. If you need a - different way, you can pass a proxies dictionary to the - [Fancy]URLopener constructor. - - """ - proxies = {} - for name, value in os.environ.items(): - name = name.lower() - if value and name[-6:] == '_proxy': - proxies[name[:-6]] = value - return proxies - -def proxy_bypass_environment(host): - """Test if proxies should not be used for a particular host. - - Checks the environment for a variable named no_proxy, which should - be a list of DNS suffixes separated by commas, or '*' for all hosts. - """ - no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') - # '*' is special case for always bypass - if no_proxy == '*': - return 1 - # strip port off host - hostonly, port = splitport(host) - # check if the host ends with any of the DNS suffixes - no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] - for name in no_proxy_list: - if name and (hostonly.endswith(name) or host.endswith(name)): - return 1 - # otherwise, don't bypass - return 0 - - -if sys.platform == 'darwin': - from _scproxy import _get_proxy_settings, _get_proxies - - def proxy_bypass_macosx_sysconf(host): - """ - Return True iff this host shouldn't be accessed using a proxy - - This function uses the MacOSX framework SystemConfiguration - to fetch the proxy information. - """ - import re - import socket - from fnmatch import fnmatch - - hostonly, port = splitport(host) - - def ip2num(ipAddr): - parts = ipAddr.split('.') - parts = map(int, parts) - if len(parts) != 4: - parts = (parts + [0, 0, 0, 0])[:4] - return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] - - proxy_settings = _get_proxy_settings() - - # Check for simple host names: - if '.' not in host: - if proxy_settings['exclude_simple']: - return True - - hostIP = None - - for value in proxy_settings.get('exceptions', ()): - # Items in the list are strings like these: *.local, 169.254/16 - if not value: continue - - m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) - if m is not None: - if hostIP is None: - try: - hostIP = socket.gethostbyname(hostonly) - hostIP = ip2num(hostIP) - except socket.error: - continue - - base = ip2num(m.group(1)) - mask = m.group(2) - if mask is None: - mask = 8 * (m.group(1).count('.') + 1) - - else: - mask = int(mask[1:]) - mask = 32 - mask - - if (hostIP >> mask) == (base >> mask): - return True - - elif fnmatch(host, value): - return True - - return False - - def getproxies_macosx_sysconf(): - """Return a dictionary of scheme -> proxy server URL mappings. - - This function uses the MacOSX framework SystemConfiguration - to fetch the proxy information. - """ - return _get_proxies() - - def proxy_bypass(host): - if getproxies_environment(): - return proxy_bypass_environment(host) - else: - return proxy_bypass_macosx_sysconf(host) - - def getproxies(): - return getproxies_environment() or getproxies_macosx_sysconf() - -elif os.name == 'nt': - def getproxies_registry(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Win32 uses the registry to store proxies. - - """ - proxies = {} - try: - import _winreg - except ImportError: - # Std module, so should be around - but you never know! - return proxies - try: - internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, - r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') - proxyEnable = _winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0] - if proxyEnable: - # Returned as Unicode but problems if not converted to ASCII - proxyServer = str(_winreg.QueryValueEx(internetSettings, - 'ProxyServer')[0]) - if '=' in proxyServer: - # Per-protocol settings - for p in proxyServer.split(';'): - protocol, address = p.split('=', 1) - # See if address has a type:// prefix - import re - if not re.match('^([^/:]+)://', address): - address = '%s://%s' % (protocol, address) - proxies[protocol] = address - else: - # Use one setting for all protocols - if proxyServer[:5] == 'http:': - proxies['http'] = proxyServer - else: - proxies['http'] = 'http://%s' % proxyServer - proxies['https'] = 'https://%s' % proxyServer - proxies['ftp'] = 'ftp://%s' % proxyServer - internetSettings.Close() - except (WindowsError, ValueError, TypeError): - # Either registry key not found etc, or the value in an - # unexpected format. - # proxies already set up to be empty so nothing to do - pass - return proxies - - def getproxies(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Returns settings gathered from the environment, if specified, - or the registry. - - """ - return getproxies_environment() or getproxies_registry() - - def proxy_bypass_registry(host): - try: - import _winreg - import re - except ImportError: - # Std modules, so should be around - but you never know! - return 0 - try: - internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, - r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') - proxyEnable = _winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0] - proxyOverride = str(_winreg.QueryValueEx(internetSettings, - 'ProxyOverride')[0]) - # ^^^^ Returned as Unicode but problems if not converted to ASCII - except WindowsError: - return 0 - if not proxyEnable or not proxyOverride: - return 0 - # try to make a host list from name and IP address. - rawHost, port = splitport(host) - host = [rawHost] - try: - addr = socket.gethostbyname(rawHost) - if addr != rawHost: - host.append(addr) - except socket.error: - pass - try: - fqdn = socket.getfqdn(rawHost) - if fqdn != rawHost: - host.append(fqdn) - except socket.error: - pass - # make a check value list from the registry entry: replace the - # '' string by the localhost entry and the corresponding - # canonical entry. - proxyOverride = proxyOverride.split(';') - # now check if we match one of the registry values. - for test in proxyOverride: - if test == '': - if '.' not in rawHost: - return 1 - test = test.replace(".", r"\.") # mask dots - test = test.replace("*", r".*") # change glob sequence - test = test.replace("?", r".") # change glob char - for val in host: - # print "%s <--> %s" %( test, val ) - if re.match(test, val, re.I): - return 1 - return 0 - - def proxy_bypass(host): - """Return a dictionary of scheme -> proxy server URL mappings. - - Returns settings gathered from the environment, if specified, - or the registry. - - """ - if getproxies_environment(): - return proxy_bypass_environment(host) - else: - return proxy_bypass_registry(host) - -else: - # By default use environment variables - getproxies = getproxies_environment - proxy_bypass = proxy_bypass_environment - -# Test and time quote() and unquote() -def test1(): - s = '' - for i in range(256): s = s + chr(i) - s = s*4 - t0 = time.time() - qs = quote(s) - uqs = unquote(qs) - t1 = time.time() - if uqs != s: - print 'Wrong!' - print repr(s) - print repr(qs) - print repr(uqs) - print round(t1 - t0, 3), 'sec' - - -def reporthook(blocknum, blocksize, totalsize): - # Report during remote transfers - print "Block number: %d, Block size: %d, Total size: %d" % ( - blocknum, blocksize, totalsize) diff --git a/lib-python/2.7/rfc822.py b/lib-python/2.7/rfc822.py --- a/lib-python/2.7/rfc822.py +++ b/lib-python/2.7/rfc822.py @@ -179,6 +179,11 @@ lst.append(line) self.dict[headerseen] = line[len(headerseen)+1:].strip() continue + elif headerseen is not None: + # An empty header name. These aren't allowed in HTTP, but it's + # probably a benign mistake. Don't add the header, just keep + # going. + continue else: # It's not a header line; throw it back and stop here. if not self.dict: @@ -202,7 +207,7 @@ data in RFC 2822-like formats with special header formats. """ i = line.find(':') - if i > 0: + if i > -1: return line[:i].lower() return None @@ -956,7 +961,7 @@ According to RFC 1123, day and month names must always be in English. If not for that, this code could use strftime(). It - can't because strftime() honors the locale and could generated + can't because strftime() honors the locale and could generate non-English names. """ if timeval is None: diff --git a/lib-python/2.7/test/test_httplib.py b/lib-python/2.7/test/test_httplib.py --- a/lib-python/2.7/test/test_httplib.py +++ b/lib-python/2.7/test/test_httplib.py @@ -1,15 +1,25 @@ import httplib +import itertools import array -import httplib import StringIO import socket import errno +import os +import tempfile import unittest TestCase = unittest.TestCase from test import test_support +here = os.path.dirname(__file__) +# Self-signed cert file for 'localhost' +CERT_localhost = os.path.join(here, 'keycert.pem') +# Self-signed cert file for 'fakehostname' +CERT_fakehostname = os.path.join(here, 'keycert2.pem') +# Self-signed cert file for self-signed.pythontest.net +CERT_selfsigned_pythontestdotnet = os.path.join(here, 'selfsigned_pythontestdotnet.pem') + HOST = test_support.HOST class FakeSocket: @@ -17,6 +27,7 @@ self.text = text self.fileclass = fileclass self.data = '' + self.file_closed = False self.host = host self.port = port @@ -26,7 +37,13 @@ def makefile(self, mode, bufsize=None): if mode != 'r' and mode != 'rb': raise httplib.UnimplementedFileMode() - return self.fileclass(self.text) + # keep the file around so we can check how much was read from it + self.file = self.fileclass(self.text) + self.file.close = self.file_close #nerf close () + return self.file + + def file_close(self): + self.file_closed = True def close(self): pass @@ -107,21 +124,59 @@ self.content_length = kv[1].strip() list.append(self, item) - # POST with empty body - conn = httplib.HTTPConnection('example.com') - conn.sock = FakeSocket(None) - conn._buffer = ContentLengthChecker() - conn.request('POST', '/', '') - self.assertEqual(conn._buffer.content_length, '0', - 'Header Content-Length not set') + # Here, we're testing that methods expecting a body get a + # content-length set to zero if the body is empty (either None or '') + bodies = (None, '') + methods_with_body = ('PUT', 'POST', 'PATCH') + for method, body in itertools.product(methods_with_body, bodies): + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn._buffer = ContentLengthChecker() + conn.request(method, '/', body) + self.assertEqual( + conn._buffer.content_length, '0', + 'Header Content-Length incorrect on {}'.format(method) + ) - # PUT request with empty body - conn = httplib.HTTPConnection('example.com') - conn.sock = FakeSocket(None) - conn._buffer = ContentLengthChecker() - conn.request('PUT', '/', '') - self.assertEqual(conn._buffer.content_length, '0', - 'Header Content-Length not set') + # For these methods, we make sure that content-length is not set when + # the body is None because it might cause unexpected behaviour on the + # server. + methods_without_body = ( + 'GET', 'CONNECT', 'DELETE', 'HEAD', 'OPTIONS', 'TRACE', + ) + for method in methods_without_body: + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn._buffer = ContentLengthChecker() + conn.request(method, '/', None) + self.assertEqual( + conn._buffer.content_length, None, + 'Header Content-Length set for empty body on {}'.format(method) + ) + + # If the body is set to '', that's considered to be "present but + # empty" rather than "missing", so content length would be set, even + # for methods that don't expect a body. + for method in methods_without_body: + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn._buffer = ContentLengthChecker() + conn.request(method, '/', '') + self.assertEqual( + conn._buffer.content_length, '0', + 'Header Content-Length incorrect on {}'.format(method) + ) + + # If the body is set, make sure Content-Length is set. + for method in itertools.chain(methods_without_body, methods_with_body): + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn._buffer = ContentLengthChecker() + conn.request(method, '/', ' ') + self.assertEqual( + conn._buffer.content_length, '1', + 'Header Content-Length incorrect on {}'.format(method) + ) def test_putheader(self): conn = httplib.HTTPConnection('example.com') @@ -130,9 +185,36 @@ conn.putheader('Content-length',42) self.assertIn('Content-length: 42', conn._buffer) + conn.putheader('Foo', ' bar ') + self.assertIn(b'Foo: bar ', conn._buffer) + conn.putheader('Bar', '\tbaz\t') + self.assertIn(b'Bar: \tbaz\t', conn._buffer) + conn.putheader('Authorization', 'Bearer mytoken') + self.assertIn(b'Authorization: Bearer mytoken', conn._buffer) + conn.putheader('IterHeader', 'IterA', 'IterB') + self.assertIn(b'IterHeader: IterA\r\n\tIterB', conn._buffer) + conn.putheader('LatinHeader', b'\xFF') + self.assertIn(b'LatinHeader: \xFF', conn._buffer) + conn.putheader('Utf8Header', b'\xc3\x80') + self.assertIn(b'Utf8Header: \xc3\x80', conn._buffer) + conn.putheader('C1-Control', b'next\x85line') + self.assertIn(b'C1-Control: next\x85line', conn._buffer) + conn.putheader('Embedded-Fold-Space', 'is\r\n allowed') + self.assertIn(b'Embedded-Fold-Space: is\r\n allowed', conn._buffer) + conn.putheader('Embedded-Fold-Tab', 'is\r\n\tallowed') + self.assertIn(b'Embedded-Fold-Tab: is\r\n\tallowed', conn._buffer) + conn.putheader('Key Space', 'value') + self.assertIn(b'Key Space: value', conn._buffer) + conn.putheader('KeySpace ', 'value') + self.assertIn(b'KeySpace : value', conn._buffer) + conn.putheader(b'Nonbreak\xa0Space', 'value') + self.assertIn(b'Nonbreak\xa0Space: value', conn._buffer) + conn.putheader(b'\xa0NonbreakSpace', 'value') + self.assertIn(b'\xa0NonbreakSpace: value', conn._buffer) + def test_ipv6host_header(self): - # Default host header on IPv6 transaction should wrapped by [] if - # its actual IPv6 address + # Default host header on IPv6 transaction should be wrapped by [] if + # it is an IPv6 address expected = 'GET /foo HTTP/1.1\r\nHost: [2001::]:81\r\n' \ 'Accept-Encoding: identity\r\n\r\n' conn = httplib.HTTPConnection('[2001::]:81') @@ -149,6 +231,45 @@ conn.request('GET', '/foo') self.assertTrue(sock.data.startswith(expected)) + def test_malformed_headers_coped_with(self): + # Issue 19996 + body = "HTTP/1.1 200 OK\r\nFirst: val\r\n: nval\r\nSecond: val\r\n\r\n" + sock = FakeSocket(body) + resp = httplib.HTTPResponse(sock) + resp.begin() + + self.assertEqual(resp.getheader('First'), 'val') + self.assertEqual(resp.getheader('Second'), 'val') + + def test_invalid_headers(self): + conn = httplib.HTTPConnection('example.com') + conn.sock = FakeSocket('') + conn.putrequest('GET', '/') + + # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no + # longer allowed in header names + cases = ( + (b'Invalid\r\nName', b'ValidValue'), + (b'Invalid\rName', b'ValidValue'), + (b'Invalid\nName', b'ValidValue'), + (b'\r\nInvalidName', b'ValidValue'), + (b'\rInvalidName', b'ValidValue'), + (b'\nInvalidName', b'ValidValue'), + (b' InvalidName', b'ValidValue'), + (b'\tInvalidName', b'ValidValue'), + (b'Invalid:Name', b'ValidValue'), + (b':InvalidName', b'ValidValue'), + (b'ValidName', b'Invalid\r\nValue'), + (b'ValidName', b'Invalid\rValue'), + (b'ValidName', b'Invalid\nValue'), + (b'ValidName', b'InvalidValue\r\n'), + (b'ValidName', b'InvalidValue\r'), + (b'ValidName', b'InvalidValue\n'), + ) + for name, value in cases: + with self.assertRaisesRegexp(ValueError, 'Invalid header'): + conn.putheader(name, value) + class BasicTest(TestCase): def test_status_lines(self): @@ -279,6 +400,22 @@ conn.sock = sock conn.request('GET', '/foo', body) self.assertTrue(sock.data.startswith(expected)) + self.assertIn('def test_send_file', sock.data) + + def test_send_tempfile(self): + expected = ('GET /foo HTTP/1.1\r\nHost: example.com\r\n' + 'Accept-Encoding: identity\r\nContent-Length: 9\r\n\r\n' + 'fake\ndata') + + with tempfile.TemporaryFile() as body: + body.write('fake\ndata') + body.seek(0) + + conn = httplib.HTTPConnection('example.com') + sock = FakeSocket(body) + conn.sock = sock + conn.request('GET', '/foo', body) + self.assertEqual(sock.data, expected) def test_send(self): expected = 'this is a test this is only a test' @@ -425,12 +562,42 @@ self.assertEqual(resp.read(), '') self.assertTrue(resp.isclosed()) + def test_error_leak(self): + # Test that the socket is not leaked if getresponse() fails + conn = httplib.HTTPConnection('example.com') + response = [] + class Response(httplib.HTTPResponse): + def __init__(self, *pos, **kw): + response.append(self) # Avoid garbage collector closing the socket + httplib.HTTPResponse.__init__(self, *pos, **kw) + conn.response_class = Response + conn.sock = FakeSocket('') # Emulate server dropping connection + conn.request('GET', '/') + self.assertRaises(httplib.BadStatusLine, conn.getresponse) + self.assertTrue(response) + #self.assertTrue(response[0].closed) + self.assertTrue(conn.sock.file_closed) + + def test_proxy_tunnel_without_status_line(self): + # Issue 17849: If a proxy tunnel is created that does not return + # a status code, fail. + body = 'hello world' + conn = httplib.HTTPConnection('example.com', strict=False) + conn.set_tunnel('foo') + conn.sock = FakeSocket(body) + with self.assertRaisesRegexp(socket.error, "Invalid response"): + conn._tunnel() + class OfflineTest(TestCase): def test_responses(self): self.assertEqual(httplib.responses[httplib.NOT_FOUND], "Not Found") -class SourceAddressTest(TestCase): +class TestServerMixin: + """A limited socket server mixin. + + This is used by test cases for testing http connection end points. + """ def setUp(self): self.serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.port = test_support.bind_port(self.serv) @@ -445,6 +612,7 @@ self.serv.close() self.serv = None +class SourceAddressTest(TestServerMixin, TestCase): def testHTTPConnectionSourceAddress(self): self.conn = httplib.HTTPConnection(HOST, self.port, source_address=('', self.source_port)) @@ -461,6 +629,24 @@ # for an ssl_wrapped connect() to actually return from. +class HTTPTest(TestServerMixin, TestCase): + def testHTTPConnection(self): + self.conn = httplib.HTTP(host=HOST, port=self.port, strict=None) + self.conn.connect() + self.assertEqual(self.conn._conn.host, HOST) + self.assertEqual(self.conn._conn.port, self.port) + + def testHTTPWithConnectHostPort(self): + testhost = 'unreachable.test.domain' + testport = '80' + self.conn = httplib.HTTP(host=testhost, port=testport) + self.conn.connect(host=HOST, port=self.port) + self.assertNotEqual(self.conn._conn.host, testhost) + self.assertNotEqual(self.conn._conn.port, testport) + self.assertEqual(self.conn._conn.host, HOST) + self.assertEqual(self.conn._conn.port, self.port) + + class TimeoutTest(TestCase): PORT = None @@ -507,35 +693,138 @@ httpConn.close() -class HTTPSTimeoutTest(TestCase): -# XXX Here should be tests for HTTPS, there isn't any right now! +class HTTPSTest(TestCase): + + def setUp(self): + if not hasattr(httplib, 'HTTPSConnection'): + self.skipTest('ssl support required') + + def make_server(self, certfile): + from test.ssl_servers import make_https_server + return make_https_server(self, certfile=certfile) def test_attributes(self): - # simple test to check it's storing it - if hasattr(httplib, 'HTTPSConnection'): - h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30) - self.assertEqual(h.timeout, 30) + # simple test to check it's storing the timeout + h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30) + self.assertEqual(h.timeout, 30) - @unittest.skipIf(not hasattr(httplib, 'HTTPS'), 'httplib.HTTPS not available') + def test_networked(self): + # Default settings: requires a valid cert from a trusted CA + import ssl + test_support.requires('network') + with test_support.transient_internet('self-signed.pythontest.net'): + h = httplib.HTTPSConnection('self-signed.pythontest.net', 443) + with self.assertRaises(ssl.SSLError) as exc_info: + h.request('GET', '/') + self.assertEqual(exc_info.exception.reason, 'CERTIFICATE_VERIFY_FAILED') + + def test_networked_noverification(self): + # Switch off cert verification + import ssl + test_support.requires('network') + with test_support.transient_internet('self-signed.pythontest.net'): + context = ssl._create_stdlib_context() + h = httplib.HTTPSConnection('self-signed.pythontest.net', 443, + context=context) + h.request('GET', '/') + resp = h.getresponse() + self.assertIn('nginx', resp.getheader('server')) + + @test_support.system_must_validate_cert + def test_networked_trusted_by_default_cert(self): + # Default settings: requires a valid cert from a trusted CA + test_support.requires('network') + with test_support.transient_internet('www.python.org'): + h = httplib.HTTPSConnection('www.python.org', 443) + h.request('GET', '/') + resp = h.getresponse() + content_type = resp.getheader('content-type') + self.assertIn('text/html', content_type) + + def test_networked_good_cert(self): + # We feed the server's cert as a validating cert + import ssl + test_support.requires('network') + with test_support.transient_internet('self-signed.pythontest.net'): + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(CERT_selfsigned_pythontestdotnet) + h = httplib.HTTPSConnection('self-signed.pythontest.net', 443, context=context) + h.request('GET', '/') + resp = h.getresponse() + server_string = resp.getheader('server') + self.assertIn('nginx', server_string) + + def test_networked_bad_cert(self): + # We feed a "CA" cert that is unrelated to the server's cert + import ssl + test_support.requires('network') + with test_support.transient_internet('self-signed.pythontest.net'): + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(CERT_localhost) + h = httplib.HTTPSConnection('self-signed.pythontest.net', 443, context=context) + with self.assertRaises(ssl.SSLError) as exc_info: + h.request('GET', '/') + self.assertEqual(exc_info.exception.reason, 'CERTIFICATE_VERIFY_FAILED') + + def test_local_unknown_cert(self): + # The custom cert isn't known to the default trust bundle + import ssl + server = self.make_server(CERT_localhost) + h = httplib.HTTPSConnection('localhost', server.port) + with self.assertRaises(ssl.SSLError) as exc_info: + h.request('GET', '/') + self.assertEqual(exc_info.exception.reason, 'CERTIFICATE_VERIFY_FAILED') + + def test_local_good_hostname(self): + # The (valid) cert validates the HTTP hostname + import ssl + server = self.make_server(CERT_localhost) + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(CERT_localhost) + h = httplib.HTTPSConnection('localhost', server.port, context=context) + h.request('GET', '/nonexistent') + resp = h.getresponse() + self.assertEqual(resp.status, 404) + + def test_local_bad_hostname(self): + # The (valid) cert doesn't validate the HTTP hostname + import ssl + server = self.make_server(CERT_fakehostname) + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = ssl.CERT_REQUIRED + context.check_hostname = True + context.load_verify_locations(CERT_fakehostname) + h = httplib.HTTPSConnection('localhost', server.port, context=context) + with self.assertRaises(ssl.CertificateError): + h.request('GET', '/') + h.close() + # With context.check_hostname=False, the mismatching is ignored + context.check_hostname = False + h = httplib.HTTPSConnection('localhost', server.port, context=context) + h.request('GET', '/nonexistent') + resp = h.getresponse() + self.assertEqual(resp.status, 404) + def test_host_port(self): # Check invalid host_port - # Note that httplib does not accept user:password@ in the host-port. for hp in ("www.python.org:abc", "user:password at www.python.org"): - self.assertRaises(httplib.InvalidURL, httplib.HTTP, hp) + self.assertRaises(httplib.InvalidURL, httplib.HTTPSConnection, hp) - for hp, h, p in (("[fe80::207:e9ff:fe9b]:8000", "fe80::207:e9ff:fe9b", - 8000), - ("pypi.python.org:443", "pypi.python.org", 443), - ("pypi.python.org", "pypi.python.org", 443), - ("pypi.python.org:", "pypi.python.org", 443), - ("[fe80::207:e9ff:fe9b]", "fe80::207:e9ff:fe9b", 443)): - http = httplib.HTTPS(hp) - c = http._conn - if h != c.host: - self.fail("Host incorrectly parsed: %s != %s" % (h, c.host)) - if p != c.port: - self.fail("Port incorrectly parsed: %s != %s" % (p, c.host)) + for hp, h, p in (("[fe80::207:e9ff:fe9b]:8000", + "fe80::207:e9ff:fe9b", 8000), + ("www.python.org:443", "www.python.org", 443), + ("www.python.org:", "www.python.org", 443), + ("www.python.org", "www.python.org", 443), + ("[fe80::207:e9ff:fe9b]", "fe80::207:e9ff:fe9b", 443), + ("[fe80::207:e9ff:fe9b]:", "fe80::207:e9ff:fe9b", + 443)): + c = httplib.HTTPSConnection(hp) + self.assertEqual(h, c.host) + self.assertEqual(p, c.port) class TunnelTests(TestCase): @@ -563,10 +852,12 @@ self.assertEqual(conn.sock.host, 'proxy.com') self.assertEqual(conn.sock.port, 80) - self.assertTrue('CONNECT destination.com' in conn.sock.data) - self.assertTrue('Host: destination.com' in conn.sock.data) + self.assertIn('CONNECT destination.com', conn.sock.data) + # issue22095 + self.assertNotIn('Host: destination.com:None', conn.sock.data) + self.assertIn('Host: destination.com', conn.sock.data) - self.assertTrue('Host: proxy.com' not in conn.sock.data) + self.assertNotIn('Host: proxy.com', conn.sock.data) conn.close() @@ -577,9 +868,11 @@ self.assertTrue('Host: destination.com' in conn.sock.data) + at test_support.reap_threads def test_main(verbose=None): test_support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest, - HTTPSTimeoutTest, SourceAddressTest, TunnelTests) + HTTPTest, HTTPSTest, SourceAddressTest, + TunnelTests) if __name__ == '__main__': test_main() diff --git a/lib-python/2.7/test/test_rfc822.py b/lib-python/2.7/test/test_rfc822.py --- a/lib-python/2.7/test/test_rfc822.py +++ b/lib-python/2.7/test/test_rfc822.py @@ -248,6 +248,12 @@ eq(rfc822.quote('foo\\wacky"name'), 'foo\\\\wacky\\"name') eq(rfc822.unquote('"foo\\\\wacky\\"name"'), 'foo\\wacky"name') + def test_invalid_headers(self): + eq = self.assertEqual + msg = self.create_message("First: val\n: otherval\nSecond: val2\n") + eq(msg.getheader('First'), 'val') + eq(msg.getheader('Second'), 'val2') + def test_main(): test_support.run_unittest(MessageTestCase) diff --git a/lib-python/2.7/test/test_urllib.py b/lib-python/2.7/test/test_urllib.py --- a/lib-python/2.7/test/test_urllib.py +++ b/lib-python/2.7/test/test_urllib.py @@ -1,13 +1,14 @@ """Regresssion tests for urllib""" +import collections import urllib import httplib +import io import unittest import os import sys import mimetools import tempfile -import StringIO from test import test_support from base64 import b64encode @@ -21,37 +22,43 @@ return "%" + hex_repr +def fakehttp(fakedata): + class FakeSocket(io.BytesIO): + + def sendall(self, data): + FakeHTTPConnection.buf = data + + def makefile(self, *args, **kwds): + return self + + def read(self, amt=None): + if self.closed: + return b"" + return io.BytesIO.read(self, amt) + + def readline(self, length=None): + if self.closed: + return b"" + return io.BytesIO.readline(self, length) + + class FakeHTTPConnection(httplib.HTTPConnection): + + # buffer to store data for verification in urlopen tests. + buf = "" + + def connect(self): + self.sock = FakeSocket(self.fakedata) + self.__class__.fakesock = self.sock + FakeHTTPConnection.fakedata = fakedata + + return FakeHTTPConnection + + class FakeHTTPMixin(object): def fakehttp(self, fakedata): - class FakeSocket(StringIO.StringIO): - - def sendall(self, data): - FakeHTTPConnection.buf = data - - def makefile(self, *args, **kwds): - return self - - def read(self, amt=None): - if self.closed: - return "" - return StringIO.StringIO.read(self, amt) - - def readline(self, length=None): - if self.closed: - return "" - return StringIO.StringIO.readline(self, length) - - class FakeHTTPConnection(httplib.HTTPConnection): - - # buffer to store data for verification in urlopen tests. - buf = "" - - def connect(self): - self.sock = FakeSocket(fakedata) - assert httplib.HTTP._connection_class == httplib.HTTPConnection - httplib.HTTP._connection_class = FakeHTTPConnection + httplib.HTTP._connection_class = fakehttp(fakedata) def unfakehttp(self): httplib.HTTP._connection_class = httplib.HTTPConnection @@ -158,8 +165,71 @@ # getproxies_environment use lowered case truncated (no '_proxy') keys self.assertEqual('localhost', proxies['no']) # List of no_proxies with space. - self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com') + self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com')) + self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com:8888')) + self.assertTrue(urllib.proxy_bypass_environment('newdomain.com:1234')) + + def test_proxy_cgi_ignore(self): + try: + self.env.set('HTTP_PROXY', 'http://somewhere:3128') + proxies = urllib.getproxies_environment() + self.assertEqual('http://somewhere:3128', proxies['http']) + self.env.set('REQUEST_METHOD', 'GET') + proxies = urllib.getproxies_environment() + self.assertNotIn('http', proxies) + finally: + self.env.unset('REQUEST_METHOD') + self.env.unset('HTTP_PROXY') + + def test_proxy_bypass_environment_host_match(self): + bypass = urllib.proxy_bypass_environment + self.env.set('NO_PROXY', + 'localhost, anotherdomain.com, newdomain.com:1234') + self.assertTrue(bypass('localhost')) + self.assertTrue(bypass('LocalHost')) # MixedCase + self.assertTrue(bypass('LOCALHOST')) # UPPERCASE + self.assertTrue(bypass('newdomain.com:1234')) + self.assertTrue(bypass('anotherdomain.com:8888')) + self.assertTrue(bypass('www.newdomain.com:1234')) + self.assertFalse(bypass('prelocalhost')) + self.assertFalse(bypass('newdomain.com')) # no port + self.assertFalse(bypass('newdomain.com:1235')) # wrong port + +class ProxyTests_withOrderedEnv(unittest.TestCase): + + def setUp(self): + # We need to test conditions, where variable order _is_ significant + self._saved_env = os.environ + # Monkey patch os.environ, start with empty fake environment + os.environ = collections.OrderedDict() + + def tearDown(self): + os.environ = self._saved_env + + def test_getproxies_environment_prefer_lowercase(self): + # Test lowercase preference with removal + os.environ['no_proxy'] = '' + os.environ['No_Proxy'] = 'localhost' + self.assertFalse(urllib.proxy_bypass_environment('localhost')) + self.assertFalse(urllib.proxy_bypass_environment('arbitrary')) + os.environ['http_proxy'] = '' + os.environ['HTTP_PROXY'] = 'http://somewhere:3128' + proxies = urllib.getproxies_environment() + self.assertEqual({}, proxies) + # Test lowercase preference of proxy bypass and correct matching including ports + os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' + os.environ['No_Proxy'] = 'xyz.com' + self.assertTrue(urllib.proxy_bypass_environment('localhost')) + self.assertTrue(urllib.proxy_bypass_environment('noproxy.com:5678')) + self.assertTrue(urllib.proxy_bypass_environment('my.proxy:1234')) + self.assertFalse(urllib.proxy_bypass_environment('my.proxy')) + self.assertFalse(urllib.proxy_bypass_environment('arbitrary')) + # Test lowercase preference with replacement + os.environ['http_proxy'] = 'http://somewhere:3128' + os.environ['Http_Proxy'] = 'http://somewhereelse:3128' + proxies = urllib.getproxies_environment() + self.assertEqual('http://somewhere:3128', proxies['http']) class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): @@ -209,10 +279,26 @@ Content-Type: text/html; charset=iso-8859-1 """) try: - self.assertRaises(IOError, urllib.urlopen, "http://python.org/") + msg = "Redirection to url 'file:" + with self.assertRaisesRegexp(IOError, msg): + urllib.urlopen("http://python.org/") finally: self.unfakehttp() + def test_redirect_limit_independent(self): + # Ticket #12923: make sure independent requests each use their + # own retry limit. + for i in range(urllib.FancyURLopener().maxtries): + self.fakehttp(b'''HTTP/1.1 302 Found +Location: file://guidocomputer.athome.com:/python/license +Connection: close +''') + try: + self.assertRaises(IOError, urllib.urlopen, + "http://something") + finally: + self.unfakehttp() + def test_empty_socket(self): # urlopen() raises IOError if the underlying socket does not send any # data. (#1680230) @@ -227,13 +313,13 @@ 'file://localhost/a/missing/file.py') fd, tmp_file = tempfile.mkstemp() tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + self.assertTrue(os.path.exists(tmp_file)) try: - self.assertTrue(os.path.exists(tmp_file)) fp = urllib.urlopen(tmp_fileurl) + fp.close() finally: os.close(fd) - fp.close() - os.unlink(tmp_file) + os.unlink(tmp_file) self.assertFalse(os.path.exists(tmp_file)) self.assertRaises(IOError, urllib.urlopen, tmp_fileurl) @@ -773,21 +859,131 @@ class Utility_Tests(unittest.TestCase): """Testcase to test the various utility functions in the urllib.""" + # In Python 3 this test class is moved to test_urlparse. + + def test_splittype(self): + splittype = urllib.splittype + self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring')) + self.assertEqual(splittype('opaquestring'), (None, 'opaquestring')) + self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring')) + self.assertEqual(splittype('type:'), ('type', '')) + self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) + + def test_splithost(self): + splithost = urllib.splithost + self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), + ('www.example.org:80', '/foo/bar/baz.html')) + self.assertEqual(splithost('//www.example.org:80'), + ('www.example.org:80', '')) + self.assertEqual(splithost('/foo/bar/baz.html'), + (None, '/foo/bar/baz.html')) + + def test_splituser(self): + splituser = urllib.splituser + self.assertEqual(splituser('User:Pass at www.python.org:080'), + ('User:Pass', 'www.python.org:080')) + self.assertEqual(splituser('@www.python.org:080'), + ('', 'www.python.org:080')) + self.assertEqual(splituser('www.python.org:080'), + (None, 'www.python.org:080')) + self.assertEqual(splituser('User:Pass@'), + ('User:Pass', '')) + self.assertEqual(splituser('User at example.com:Pass at www.python.org:080'), + ('User at example.com:Pass', 'www.python.org:080')) def test_splitpasswd(self): - """Some of the password examples are not sensible, but it is added to - confirming to RFC2617 and addressing issue4675. - """ - self.assertEqual(('user', 'ab'),urllib.splitpasswd('user:ab')) - self.assertEqual(('user', 'a\nb'),urllib.splitpasswd('user:a\nb')) - self.assertEqual(('user', 'a\tb'),urllib.splitpasswd('user:a\tb')) - self.assertEqual(('user', 'a\rb'),urllib.splitpasswd('user:a\rb')) - self.assertEqual(('user', 'a\fb'),urllib.splitpasswd('user:a\fb')) - self.assertEqual(('user', 'a\vb'),urllib.splitpasswd('user:a\vb')) - self.assertEqual(('user', 'a:b'),urllib.splitpasswd('user:a:b')) - self.assertEqual(('user', 'a b'),urllib.splitpasswd('user:a b')) - self.assertEqual(('user 2', 'ab'),urllib.splitpasswd('user 2:ab')) - self.assertEqual(('user+1', 'a+b'),urllib.splitpasswd('user+1:a+b')) + # Some of the password examples are not sensible, but it is added to + # confirming to RFC2617 and addressing issue4675. + splitpasswd = urllib.splitpasswd + self.assertEqual(splitpasswd('user:ab'), ('user', 'ab')) + self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb')) + self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb')) + self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb')) + self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb')) + self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb')) + self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b')) + self.assertEqual(splitpasswd('user:a b'), ('user', 'a b')) + self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab')) + self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b')) + self.assertEqual(splitpasswd('user:'), ('user', '')) + self.assertEqual(splitpasswd('user'), ('user', None)) + self.assertEqual(splitpasswd(':ab'), ('', 'ab')) + + def test_splitport(self): + splitport = urllib.splitport + self.assertEqual(splitport('parrot:88'), ('parrot', '88')) + self.assertEqual(splitport('parrot'), ('parrot', None)) + self.assertEqual(splitport('parrot:'), ('parrot', None)) + self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) + self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) + self.assertEqual(splitport('[::1]:88'), ('[::1]', '88')) + self.assertEqual(splitport('[::1]'), ('[::1]', None)) + self.assertEqual(splitport(':88'), ('', '88')) + + def test_splitnport(self): + splitnport = urllib.splitnport + self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) + self.assertEqual(splitnport('parrot'), ('parrot', -1)) + self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) + self.assertEqual(splitnport('parrot:'), ('parrot', -1)) + self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) + self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) + self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) + self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) + self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) + + def test_splitquery(self): + # Normal cases are exercised by other tests; ensure that we also + # catch cases with no port specified (testcase ensuring coverage) + splitquery = urllib.splitquery + self.assertEqual(splitquery('http://python.org/fake?foo=bar'), + ('http://python.org/fake', 'foo=bar')) + self.assertEqual(splitquery('http://python.org/fake?foo=bar?'), + ('http://python.org/fake?foo=bar', '')) + self.assertEqual(splitquery('http://python.org/fake'), + ('http://python.org/fake', None)) + self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar')) + + def test_splittag(self): + splittag = urllib.splittag + self.assertEqual(splittag('http://example.com?foo=bar#baz'), + ('http://example.com?foo=bar', 'baz')) + self.assertEqual(splittag('http://example.com?foo=bar#'), + ('http://example.com?foo=bar', '')) + self.assertEqual(splittag('#baz'), ('', 'baz')) + self.assertEqual(splittag('http://example.com?foo=bar'), + ('http://example.com?foo=bar', None)) + self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'), + ('http://example.com?foo=bar#baz', 'boo')) + + def test_splitattr(self): + splitattr = urllib.splitattr + self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'), + ('/path', ['attr1=value1', 'attr2=value2'])) + self.assertEqual(splitattr('/path;'), ('/path', [''])) + self.assertEqual(splitattr(';attr1=value1;attr2=value2'), + ('', ['attr1=value1', 'attr2=value2'])) + self.assertEqual(splitattr('/path'), ('/path', [])) + + def test_splitvalue(self): + # Normal cases are exercised by other tests; test pathological cases + # with no key/value pairs. (testcase ensuring coverage) + splitvalue = urllib.splitvalue + self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar')) + self.assertEqual(splitvalue('foo='), ('foo', '')) + self.assertEqual(splitvalue('=bar'), ('', 'bar')) + self.assertEqual(splitvalue('foobar'), ('foobar', None)) + self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz')) + + def test_toBytes(self): + result = urllib.toBytes(u'http://www.python.org') + self.assertEqual(result, 'http://www.python.org') + self.assertRaises(UnicodeError, urllib.toBytes, + test_support.u(r'http://www.python.org/medi\u00e6val')) + + def test_unwrap(self): + url = urllib.unwrap('') + self.assertEqual(url, 'type://host/path') class URLopener_Tests(unittest.TestCase): @@ -812,7 +1008,7 @@ # Everywhere else they work ok, but on those machines, sometimes # fail in one of the tests, sometimes in other. I have a linux, and # the tests go ok. -# If anybody has one of the problematic enviroments, please help! +# If anybody has one of the problematic environments, please help! # . Facundo # # def server(evt): @@ -858,7 +1054,7 @@ # def testTimeoutNone(self): # # global default timeout is ignored # import socket -# self.assertTrue(socket.getdefaulttimeout() is None) +# self.assertIsNone(socket.getdefaulttimeout()) # socket.setdefaulttimeout(30) # try: # ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) @@ -870,7 +1066,7 @@ # def testTimeoutDefault(self): # # global default timeout is used # import socket -# self.assertTrue(socket.getdefaulttimeout() is None) +# self.assertIsNone(socket.getdefaulttimeout()) # socket.setdefaulttimeout(30) # try: # ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) @@ -904,6 +1100,8 @@ Pathname_Tests, Utility_Tests, URLopener_Tests, + ProxyTests, + ProxyTests_withOrderedEnv, #FTPWrapperTests, ) diff --git a/lib-python/2.7/test/test_urllib2.py b/lib-python/2.7/test/test_urllib2.py --- a/lib-python/2.7/test/test_urllib2.py +++ b/lib-python/2.7/test/test_urllib2.py @@ -1,12 +1,19 @@ import unittest from test import test_support +from test import test_urllib import os import socket import StringIO import urllib2 -from urllib2 import Request, OpenerDirector +from urllib2 import Request, OpenerDirector, AbstractDigestAuthHandler +import httplib + +try: + import ssl +except ImportError: + ssl = None # XXX # Request @@ -20,7 +27,7 @@ self.assertRaises(ValueError, urllib2.urlopen, 'bogus url') # XXX Name hacking to get this to work on Windows. - fname = os.path.abspath(urllib2.__file__).replace('\\', '/') + fname = os.path.abspath(urllib2.__file__).replace(os.sep, '/') # And more hacking to get it to work on MacOS. This assumes # urllib.pathname2url works, unfortunately... @@ -47,6 +54,14 @@ for string, list in tests: self.assertEqual(urllib2.parse_http_list(string), list) + @unittest.skipUnless(ssl, "ssl module required") + def test_cafile_and_context(self): + context = ssl.create_default_context() + with self.assertRaises(ValueError): + urllib2.urlopen( + "https://localhost", cafile="/nonexistent/path", context=context + ) + def test_request_headers_dict(): """ @@ -405,7 +420,7 @@ self._count = 0 self.requests = [] def http_open(self, req): - import mimetools, httplib, copy + import mimetools, copy from StringIO import StringIO self.requests.append(copy.deepcopy(req)) if self._count == 0: @@ -591,8 +606,8 @@ self.assertIsInstance(args[0], Request) # response from opener.open is None, because there's no # handler that defines http_open to handle it - self.assertTrue(args[1] is None or - isinstance(args[1], MockResponse)) + if args[1] is not None: + self.assertIsInstance(args[1], MockResponse) def sanepathname2url(path): @@ -924,7 +939,8 @@ MockHeaders({"location": to_url})) except urllib2.HTTPError: # 307 in response to POST requires user OK - self.assertTrue(code == 307 and data is not None) + self.assertEqual(code, 307) + self.assertIsNotNone(data) self.assertEqual(o.req.get_full_url(), to_url) try: self.assertEqual(o.req.get_method(), "GET") @@ -1022,6 +1038,22 @@ fp = o.open('http://www.example.com') self.assertEqual(fp.geturl(), redirected_url.strip()) + def test_redirect_no_path(self): + # Issue 14132: Relative redirect strips original path + real_class = httplib.HTTPConnection + response1 = b"HTTP/1.1 302 Found\r\nLocation: ?query\r\n\r\n" + httplib.HTTPConnection = test_urllib.fakehttp(response1) + self.addCleanup(setattr, httplib, "HTTPConnection", real_class) + urls = iter(("/path", "/path?query")) + def request(conn, method, url, *pos, **kw): + self.assertEqual(url, next(urls)) + real_class.request(conn, method, url, *pos, **kw) + # Change response for subsequent connection + conn.__class__.fakedata = b"HTTP/1.1 200 OK\r\n\r\nHello!" + httplib.HTTPConnection.request = request + fp = urllib2.urlopen("http://python.org/path") + self.assertEqual(fp.geturl(), "http://python.org/path?query") + def test_proxy(self): o = OpenerDirector() ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128")) @@ -1276,6 +1308,16 @@ else: self.assertTrue(False) + def test_unsupported_algorithm(self): + handler = AbstractDigestAuthHandler() + with self.assertRaises(ValueError) as exc: + handler.get_algorithm_impls('invalid') + self.assertEqual( + str(exc.exception), + "Unsupported digest authentication algorithm 'invalid'" + ) + + class RequestTests(unittest.TestCase): def setUp(self): @@ -1336,6 +1378,11 @@ req = Request(url) self.assertEqual(req.get_full_url(), url) + def test_private_attributes(self): + self.assertFalse(hasattr(self.get, '_Request__r_xxx')) + # Issue #6500: infinite recursion + self.assertFalse(hasattr(self.get, '_Request__r_method')) + def test_HTTPError_interface(self): """ Issue 13211 reveals that HTTPError didn't implement the URLError diff --git a/lib-python/2.7/urllib.py b/lib-python/2.7/urllib.py --- a/lib-python/2.7/urllib.py +++ b/lib-python/2.7/urllib.py @@ -28,6 +28,7 @@ import time import sys import base64 +import re from urlparse import urljoin as basejoin @@ -68,15 +69,15 @@ # Shortcut for basic usage _urlopener = None -def urlopen(url, data=None, proxies=None): +def urlopen(url, data=None, proxies=None, context=None): """Create a file-like object for the specified URL to read from.""" from warnings import warnpy3k warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " "favor of urllib2.urlopen()", stacklevel=2) global _urlopener - if proxies is not None: - opener = FancyURLopener(proxies=proxies) + if proxies is not None or context is not None: + opener = FancyURLopener(proxies=proxies, context=context) elif not _urlopener: opener = FancyURLopener() _urlopener = opener @@ -86,11 +87,15 @@ return opener.open(url) else: return opener.open(url, data) -def urlretrieve(url, filename=None, reporthook=None, data=None): +def urlretrieve(url, filename=None, reporthook=None, data=None, context=None): global _urlopener - if not _urlopener: - _urlopener = FancyURLopener() - return _urlopener.retrieve(url, filename, reporthook, data) + if context is not None: + opener = FancyURLopener(context=context) + elif not _urlopener: + _urlopener = opener = FancyURLopener() + else: + opener = _urlopener + return opener.retrieve(url, filename, reporthook, data) def urlcleanup(): if _urlopener: _urlopener.cleanup() @@ -125,13 +130,14 @@ version = "Python-urllib/%s" % __version__ # Constructor - def __init__(self, proxies=None, **x509): + def __init__(self, proxies=None, context=None, **x509): if proxies is None: proxies = getproxies() assert hasattr(proxies, 'has_key'), "proxies must be a mapping" self.proxies = proxies self.key_file = x509.get('key_file') self.cert_file = x509.get('cert_file') + self.context = context self.addheaders = [('User-Agent', self.version)] self.__tempfiles = [] self.__unlink = os.unlink # See cleanup() @@ -421,7 +427,8 @@ auth = None h = httplib.HTTPS(host, 0, key_file=self.key_file, - cert_file=self.cert_file) + cert_file=self.cert_file, + context=self.context) if data is not None: h.putrequest('POST', selector) h.putheader('Content-Type', @@ -622,18 +629,20 @@ def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): """Error 302 -- relocated (temporarily).""" self.tries += 1 - if self.maxtries and self.tries >= self.maxtries: - if hasattr(self, "http_error_500"): - meth = self.http_error_500 - else: - meth = self.http_error_default + try: + if self.maxtries and self.tries >= self.maxtries: + if hasattr(self, "http_error_500"): + meth = self.http_error_500 + else: + meth = self.http_error_default + return meth(url, fp, 500, + "Internal Server Error: Redirect Recursion", + headers) + result = self.redirect_internal(url, fp, errcode, errmsg, + headers, data) + return result + finally: self.tries = 0 - return meth(url, fp, 500, - "Internal Server Error: Redirect Recursion", headers) - result = self.redirect_internal(url, fp, errcode, errmsg, headers, - data) - self.tries = 0 - return result def redirect_internal(self, url, fp, errcode, errmsg, headers, data): if 'location' in headers: @@ -818,7 +827,10 @@ """Return the IP address of the current host.""" global _thishost if _thishost is None: - _thishost = socket.gethostbyname(socket.gethostname()) + try: + _thishost = socket.gethostbyname(socket.gethostname()) + except socket.gaierror: + _thishost = socket.gethostbyname('localhost') return _thishost _ftperrors = None @@ -861,7 +873,11 @@ self.timeout = timeout self.refcount = 0 self.keepalive = persistent - self.init() + try: + self.init() + except: + self.close() + raise def init(self): import ftplib @@ -869,8 +885,8 @@ self.ftp = ftplib.FTP() self.ftp.connect(self.host, self.port, self.timeout) self.ftp.login(self.user, self.passwd) - for dir in self.dirs: - self.ftp.cwd(dir) + _target = '/'.join(self.dirs) + self.ftp.cwd(_target) def retrfile(self, file, type): import ftplib @@ -916,13 +932,7 @@ return (ftpobj, retrlen) def endtransfer(self): - if not self.busy: - return self.busy = 0 - try: - self.ftp.voidresp() - except ftperrors(): - pass def close(self): self.keepalive = False @@ -980,11 +990,16 @@ self.hookargs = hookargs def close(self): - if self.closehook: - self.closehook(*self.hookargs) - self.closehook = None - self.hookargs = None - addbase.close(self) + try: + closehook = self.closehook + hookargs = self.hookargs + if closehook: + self.closehook = None + self.hookargs = None + closehook(*hookargs) + finally: + addbase.close(self) + class addinfo(addbase): """class to add an info() method to an open file.""" @@ -1121,10 +1136,13 @@ global _portprog if _portprog is None: import re - _portprog = re.compile('^(.*):([0-9]+)$') + _portprog = re.compile('^(.*):([0-9]*)$') match = _portprog.match(host) - if match: return match.group(1, 2) + if match: + host, port = match.groups() + if port: + return host, port return host, None _nportprog = None @@ -1141,12 +1159,12 @@ match = _nportprog.match(host) if match: host, port = match.group(1, 2) - try: - if not port: raise ValueError, "no digits" - nport = int(port) - except ValueError: - nport = None - return host, nport + if port: + try: + nport = int(port) + except ValueError: + nport = None + return host, nport return host, defport _queryprog = None @@ -1198,22 +1216,35 @@ _hexdig = '0123456789ABCDEFabcdef' _hextochr = dict((a + b, chr(int(a + b, 16))) for a in _hexdig for b in _hexdig) +_asciire = re.compile('([\x00-\x7f]+)') def unquote(s): """unquote('abc%20def') -> 'abc def'.""" - res = s.split('%') + if _is_unicode(s): + if '%' not in s: + return s + bits = _asciire.split(s) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote(str(bits[i])).decode('latin1')) + append(bits[i + 1]) + return ''.join(res) + + bits = s.split('%') # fastpath - if len(res) == 1: + if len(bits) == 1: return s - s = res[0] - for item in res[1:]: + res = [bits[0]] + append = res.append + for item in bits[1:]: try: - s += _hextochr[item[:2]] + item[2:] + append(_hextochr[item[:2]]) + append(item[2:]) except KeyError: - s += '%' + item - except UnicodeDecodeError: - s += unichr(int(item[:2], 16)) + item[2:] - return s + append('%') + append(item) + return ''.join(res) def unquote_plus(s): """unquote('%7e/abc+def') -> '~/abc def'""" @@ -1342,25 +1373,51 @@ """Return a dictionary of scheme -> proxy server URL mappings. Scan the environment for variables named _proxy; - this seems to be the standard convention. If you need a - different way, you can pass a proxies dictionary to the + this seems to be the standard convention. In order to prefer lowercase + variables, we process the environment in two passes, first matches any + and second matches only lower case proxies. + + If you need a different way, you can pass a proxies dictionary to the [Fancy]URLopener constructor. - """ + # Get all variables proxies = {} for name, value in os.environ.items(): name = name.lower() if value and name[-6:] == '_proxy': proxies[name[:-6]] = value + + # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY + # (non-all-lowercase) as it may be set from the web server by a "Proxy:" + # header from the client + # If "proxy" is lowercase, it will still be used thanks to the next block + if 'REQUEST_METHOD' in os.environ: + proxies.pop('http', None) + + # Get lowercase variables + for name, value in os.environ.items(): + if name[-6:] == '_proxy': + name = name.lower() + if value: + proxies[name[:-6]] = value + else: + proxies.pop(name[:-6], None) + return proxies -def proxy_bypass_environment(host): +def proxy_bypass_environment(host, proxies=None): """Test if proxies should not be used for a particular host. - Checks the environment for a variable named no_proxy, which should - be a list of DNS suffixes separated by commas, or '*' for all hosts. + Checks the proxies dict for the value of no_proxy, which should be a + list of comma separated DNS suffixes, or '*' for all hosts. """ - no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') + if proxies is None: + proxies = getproxies_environment() + # don't bypass, if no_proxy isn't specified + try: + no_proxy = proxies['no'] + except KeyError: + return 0 # '*' is special case for always bypass if no_proxy == '*': return 1 @@ -1369,8 +1426,12 @@ # check if the host ends with any of the DNS suffixes no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] for name in no_proxy_list: - if name and (hostonly.endswith(name) or host.endswith(name)): - return 1 + if name: + name = re.escape(name) + pattern = r'(.+\.)?%s$' % name + if (re.match(pattern, hostonly, re.I) + or re.match(pattern, host, re.I)): + return 1 # otherwise, don't bypass return 0 @@ -1446,8 +1507,14 @@ return _get_proxies() def proxy_bypass(host): - if getproxies_environment(): - return proxy_bypass_environment(host) + """Return True, if a host should be bypassed. + + Checks proxy settings gathered from the environment, if specified, or + from the MacOSX framework SystemConfiguration. + """ + proxies = getproxies_environment() + if proxies: + return proxy_bypass_environment(host, proxies) else: return proxy_bypass_macosx_sysconf(host) @@ -1563,14 +1630,14 @@ return 0 def proxy_bypass(host): - """Return a dictionary of scheme -> proxy server URL mappings. + """Return True, if the host should be bypassed. - Returns settings gathered from the environment, if specified, + Checks proxy settings gathered from the environment, if specified, or the registry. - """ - if getproxies_environment(): - return proxy_bypass_environment(host) + proxies = getproxies_environment() + if proxies: + return proxy_bypass_environment(host, proxies) else: return proxy_bypass_registry(host) diff --git a/lib-python/2.7/urllib2.py b/lib-python/2.7/urllib2.py --- a/lib-python/2.7/urllib2.py +++ b/lib-python/2.7/urllib2.py @@ -109,6 +109,14 @@ except ImportError: from StringIO import StringIO +# check for SSL +try: + import ssl +except ImportError: + _have_ssl = False +else: + _have_ssl = True + from urllib import (unwrap, unquote, splittype, splithost, quote, addinfourl, splitport, splittag, toBytes, splitattr, ftpwrapper, splituser, splitpasswd, splitvalue) @@ -120,11 +128,30 @@ __version__ = sys.version[:3] _opener = None -def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): +def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + cafile=None, capath=None, cadefault=False, context=None): global _opener - if _opener is None: - _opener = build_opener() - return _opener.open(url, data, timeout) + if cafile or capath or cadefault: + if context is not None: + raise ValueError( + "You can't pass both context and any of cafile, capath, and " + "cadefault" + ) + if not _have_ssl: + raise ValueError('SSL support not available') + context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, + cafile=cafile, + capath=capath) + https_handler = HTTPSHandler(context=context) + opener = build_opener(https_handler) + elif context: + https_handler = HTTPSHandler(context=context) + opener = build_opener(https_handler) + elif _opener is None: + _opener = opener = build_opener() + else: + opener = _opener + return opener.open(url, data, timeout) def install_opener(opener): global _opener @@ -221,11 +248,9 @@ # methods getting called in a non-standard order. this may be # too complicated and/or unnecessary. # XXX should the __r_XXX attributes be public? - if attr[:12] == '_Request__r_': - name = attr[12:] - if hasattr(Request, 'get_' + name): - getattr(self, 'get_' + name)() - return getattr(self, attr) + if attr in ('_Request__r_type', '_Request__r_host'): + getattr(self, 'get_' + attr[12:])() + return self.__dict__[attr] raise AttributeError, attr def get_method(self): @@ -584,7 +609,7 @@ # fix a possible malformed URL urlparts = urlparse.urlparse(newurl) - if not urlparts.path: + if not urlparts.path and urlparts.netloc: urlparts = list(urlparts) urlparts[2] = "/" newurl = urlparse.urlunparse(urlparts) @@ -843,10 +868,7 @@ password_mgr = HTTPPasswordMgr() self.passwd = password_mgr self.add_password = self.passwd.add_password - self.retried = 0 - def reset_retry_count(self): - self.retried = 0 def http_error_auth_reqed(self, authreq, host, req, headers): # host may be an authority (without userinfo) or a URL with an @@ -854,13 +876,6 @@ # XXX could be multiple headers authreq = headers.get(authreq, None) - if self.retried > 5: - # retry sending the username:password 5 times before failing. - raise HTTPError(req.get_full_url(), 401, "basic auth failed", - headers, None) - else: - self.retried += 1 - if authreq: mo = AbstractBasicAuthHandler.rx.search(authreq) if mo: @@ -869,17 +884,14 @@ warnings.warn("Basic Auth Realm was unquoted", UserWarning, 2) if scheme.lower() == 'basic': - response = self.retry_http_basic_auth(host, req, realm) - if response and response.code != 401: - self.retried = 0 - return response + return self.retry_http_basic_auth(host, req, realm) def retry_http_basic_auth(self, host, req, realm): user, pw = self.passwd.find_user_password(realm, host) if pw is not None: raw = "%s:%s" % (user, pw) auth = 'Basic %s' % base64.b64encode(raw).strip() - if req.headers.get(self.auth_header, None) == auth: + if req.get_header(self.auth_header, None) == auth: return None req.add_unredirected_header(self.auth_header, auth) return self.parent.open(req, timeout=req.timeout) @@ -895,7 +907,6 @@ url = req.get_full_url() response = self.http_error_auth_reqed('www-authenticate', url, req, headers) - self.reset_retry_count() return response @@ -911,7 +922,6 @@ authority = req.get_host() response = self.http_error_auth_reqed('proxy-authenticate', authority, req, headers) - self.reset_retry_count() return response @@ -1061,6 +1071,9 @@ elif algorithm == 'SHA': H = lambda x: hashlib.sha1(x).hexdigest() # XXX MD5-sess + else: + raise ValueError("Unsupported digest authentication " + "algorithm %r" % algorithm.lower()) KD = lambda s, d: H("%s:%s" % (s, d)) return H, KD @@ -1136,7 +1149,7 @@ return request - def do_open(self, http_class, req): + def do_open(self, http_class, req, **http_conn_args): """Return an addinfourl object for the request, using http_class. http_class must implement the HTTPConnection API from httplib. @@ -1150,7 +1163,8 @@ if not host: raise URLError('no host given') - h = http_class(host, timeout=req.timeout) # will parse host:port + # will parse host:port + h = http_class(host, timeout=req.timeout, **http_conn_args) h.set_debuglevel(self._debuglevel) headers = dict(req.unredirected_hdrs) @@ -1218,8 +1232,13 @@ if hasattr(httplib, 'HTTPS'): class HTTPSHandler(AbstractHTTPHandler): + def __init__(self, debuglevel=0, context=None): + AbstractHTTPHandler.__init__(self, debuglevel) + self._context = context + def https_open(self, req): - return self.do_open(httplib.HTTPSConnection, req) + return self.do_open(httplib.HTTPSConnection, req, + context=self._context) https_request = AbstractHTTPHandler.do_request_ -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Sun Sep 18 20:39:42 2016 From: jython-checkins at python.org (darjus.loktevic) Date: Mon, 19 Sep 2016 00:39:42 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Fix_=232455=2C_regression_i?= =?utf-8?q?n_import_loading_between_Java_and_Python_files_when?= Message-ID: <20160919003942.81575.73536.E1D841D1@psf.io> https://hg.python.org/jython/rev/ac01360b4252 changeset: 7963:ac01360b4252 user: Darjus Loktevic date: Mon Sep 19 10:39:42 2016 +1000 summary: Fix #2455, regression in import loading between Java and Python files when __init__.py is present. files: build.xml | 1 - src/org/python/core/PyModule.java | 9 + tests/java/javatests/Issue2455Test.java | 94 +++++++++++++ 3 files changed, 103 insertions(+), 1 deletions(-) diff --git a/build.xml b/build.xml --- a/build.xml +++ b/build.xml @@ -952,7 +952,6 @@ - diff --git a/src/org/python/core/PyModule.java b/src/org/python/core/PyModule.java --- a/src/org/python/core/PyModule.java +++ b/src/org/python/core/PyModule.java @@ -132,6 +132,15 @@ } @Override + public PyObject __findattr_ex__(String name) { + PyObject attr = super.__findattr_ex__(name); + if (attr != null) { + return attr; + } + return impAttr(name); + } + + @Override public void __setattr__(String name, PyObject value) { module___setattr__(name, value); } diff --git a/tests/java/javatests/Issue2455Test.java b/tests/java/javatests/Issue2455Test.java new file mode 100644 --- /dev/null +++ b/tests/java/javatests/Issue2455Test.java @@ -0,0 +1,94 @@ +package javatests; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.python.core.PyJavaPackage; +import org.python.core.PyModule; +import org.python.util.PythonInterpreter; + +import javax.tools.JavaCompiler; +import javax.tools.JavaFileObject; +import javax.tools.StandardJavaFileManager; +import javax.tools.ToolProvider; + +import static org.junit.Assert.*; + +/** + * Test for the Jython bug 2455. + * @author jsaiz + */ +public class Issue2455Test { + + private static final String NEW_LINE = System.getProperty("line.separator"); + + private final PythonInterpreter interpreter = new PythonInterpreter(); + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Test(timeout = 60000) + public void testJavaModule() throws IOException, InterruptedException { + File example1 = temporaryFolder.newFolder("example1"); + File example2 = temporaryFolder.newFolder("example2"); + + // Create Java class in example1 and __init__.py in example2 + createJavaFile(example1, "SomeClass"); + createInitFile(example2); + + // Create an interpreter and import the example packages + interpreter.exec("import sys"); + interpreter.exec("sys.path.append('" + temporaryFolder.getRoot().toString() + "')"); + interpreter.exec("import " + example1.getName()); + interpreter.exec("import " + example2.getName()); + assertTrue(interpreter.eval(example1.getName()) instanceof PyJavaPackage); + assertTrue(interpreter.eval(example2.getName()) instanceof PyModule); + + // Now add a Java class to example2 (after importing; otherwise example2 might be loaded as a PyJavaPackage) + createJavaFile(example2, "OtherClass"); + + // Both classes should be found + evaluate(example1.getName() + ".SomeClass"); + evaluate(example2.getName() + ".OtherClass"); // works with 2.5.2 and the patch for 2.7.1, fails with 2.7.0 + } + + private void createJavaFile(File packageFolder, String className) throws IOException, InterruptedException { + String javaCode = "package " + packageFolder.getName() + ";" + NEW_LINE + "public class " + className + " {}" + NEW_LINE; + File javaFile = new File(packageFolder, className + ".java"); + createFile(javaFile, javaCode); + + compileJavaFile(javaFile); + } + + private void compileJavaFile(File javaFile) { + JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); + StandardJavaFileManager fileManager = compiler.getStandardFileManager(null, null, null); + + Iterable compilationUnits = + fileManager.getJavaFileObjectsFromFiles(new ArrayList<>(Arrays.asList(javaFile))); + compiler.getTask(null, fileManager, null, null, null, compilationUnits).call(); + } + + private void createInitFile(File directory) throws IOException { + File jythonFile = new File(directory, "__init__.py"); + createFile(jythonFile, "print 'within __init__.py'"); + } + + private void createFile(File file, String text) throws IOException { + file.getParentFile().mkdirs(); + try (FileWriter writer = new FileWriter(file)) { + writer.append(text); + writer.flush(); + } + } + + private void evaluate(String className) { + assertEquals("", interpreter.eval(className).toString()); + } +} -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Mon Sep 19 09:10:43 2016 From: jython-checkins at python.org (darjus.loktevic) Date: Mon, 19 Sep 2016 13:10:43 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Change_getaddrinfo_exceptio?= =?utf-8?q?n_on_host_not_found_to_match_the_python_output?= Message-ID: <20160919131043.100635.28447.528732BC@psf.io> https://hg.python.org/jython/rev/fb71c3a0c760 changeset: 7964:fb71c3a0c760 user: Darjus Loktevic date: Mon Sep 19 23:00:03 2016 +1000 summary: Change getaddrinfo exception on host not found to match the python output In [19]: socket.getaddrinfo('exist.not.rrrrrrrrr', 443) --------------------------------------------------------------------------- gaierror Traceback (most recent call last) in () ----> 1 socket.getaddrinfo('exist.not.rrrrrrrrr', 443) gaierror: [Errno 8] nodename nor servname provided, or not known files: Lib/_socket.py | 7 ++++++- Lib/test/test_socket.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Lib/_socket.py b/Lib/_socket.py --- a/Lib/_socket.py +++ b/Lib/_socket.py @@ -1754,7 +1754,12 @@ hosts = [host] results = [] for h in hosts: - for a in java.net.InetAddress.getAllByName(h): + try: + all_by_name = java.net.InetAddress.getAllByName(h) + except java.net.UnknownHostException: + raise gaierror(errno.ENOEXEC, 'nodename nor servname provided, or not known') + + for a in all_by_name: if len([f for f in filter_fns if f(a)]): family = {java.net.Inet4Address: AF_INET, java.net.Inet6Address: AF_INET6}[a.getClass()] if flags & AI_CANONNAME: diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -2242,7 +2242,7 @@ try: self.s.connect( ('non.existent.server', PORT) ) except socket.gaierror, gaix: - self.failUnlessEqual(gaix[0], errno.EGETADDRINFOFAILED) + self.failUnlessEqual(gaix[0], errno.ENOEXEC) except Exception, x: self.fail("Get host name for non-existent host raised wrong exception: %s" % x) else: -- Repository URL: https://hg.python.org/jython From jython-checkins at python.org Mon Sep 19 09:10:47 2016 From: jython-checkins at python.org (darjus.loktevic) Date: Mon, 19 Sep 2016 13:10:47 +0000 Subject: [Jython-checkins] =?utf-8?q?jython=3A_Add_client-side_SNI_support?= =?utf-8?q?=2E_Partially_fixes_=232446?= Message-ID: <20160919131043.4593.94403.5314D1EF@psf.io> https://hg.python.org/jython/rev/a07c595b410f changeset: 7965:a07c595b410f user: Darjus Loktevic date: Mon Sep 19 23:10:33 2016 +1000 summary: Add client-side SNI support. Partially fixes #2446 files: Lib/_sslcerts.py | 30 +++++--- Lib/ssl.py | 97 ++++++++++++++++++++++--------- Lib/test/test_ssl.py | 2 +- build.xml | 2 +- 4 files changed, 87 insertions(+), 44 deletions(-) diff --git a/Lib/_sslcerts.py b/Lib/_sslcerts.py --- a/Lib/_sslcerts.py +++ b/Lib/_sslcerts.py @@ -10,8 +10,15 @@ from java.security.cert import CertificateException, CertificateFactory from java.security.interfaces import RSAPrivateCrtKey from java.security.interfaces import RSAPublicKey -from javax.net.ssl import ( - X509KeyManager, X509TrustManager, KeyManagerFactory, SSLContext, TrustManager, TrustManagerFactory) +from javax.net.ssl import X509KeyManager, X509TrustManager, KeyManagerFactory, SSLContext + +try: + # jarjar-ed version + from org.python.netty.handler.ssl.util import SimpleTrustManagerFactory + +except ImportError: + # dev version from extlibs + from io.netty.handler.ssl.util import SimpleTrustManagerFactory try: # dev version from extlibs OR if in classpath. @@ -64,7 +71,7 @@ for cert in cf.generateCertificates(BufferedInputStream(f)): trust_store.setCertificateEntry(str(uuid.uuid4()), cert) num_certs_installed += 1 - tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm()) + tmf = SimpleTrustManagerFactory.getInstance(SimpleTrustManagerFactory.getDefaultAlgorithm()) tmf.init(trust_store) log.debug("Installed %s certificates", num_certs_installed, extra={"sock": "*"}) return tmf @@ -329,7 +336,7 @@ def getPrivateKey(self, alias): for key_manager in self.key_managers: - private_key = keyManager.getPrivateKey(alias) + private_key = key_manager.getPrivateKey(alias) if private_key: return private_key return None @@ -404,14 +411,13 @@ return certs -# To use with CERT_NONE -class NoVerifyX509TrustManager(X509TrustManager): +class CompositeX509TrustManagerFactory(SimpleTrustManagerFactory): - def checkClientTrusted(self, chain, auth_type): + def __init__(self, trust_managers): + self._trust_manager = CompositeX509TrustManager(trust_managers) + + def engineInit(self, arg): pass - def checkServerTrusted(self, chain, auth_type): - pass - - def getAcceptedIssuers(self): - return None + def engineGetTrustManagers(self): + return [self._trust_manager] diff --git a/Lib/ssl.py b/Lib/ssl.py --- a/Lib/ssl.py +++ b/Lib/ssl.py @@ -6,7 +6,6 @@ from java.io import BufferedInputStream from java.security import KeyStore, KeyStoreException from java.security.cert import CertificateParsingException -from javax.net.ssl import TrustManagerFactory from javax.naming.ldap import LdapName from java.lang import IllegalArgumentException, System import logging @@ -19,11 +18,16 @@ try: # jarjar-ed version from org.python.netty.channel import ChannelInitializer - from org.python.netty.handler.ssl import SslHandler + from org.python.netty.handler.ssl import SslHandler, SslProvider, SslContextBuilder, ClientAuth + from org.python.netty.handler.ssl.util import SimpleTrustManagerFactory, InsecureTrustManagerFactory + from org.python.netty.buffer import ByteBufAllocator + except ImportError: # dev version from extlibs from io.netty.channel import ChannelInitializer - from io.netty.handler.ssl import SslHandler + from io.netty.handler.ssl import SslHandler, SslProvider, SslContextBuilder, ClientAuth + from io.netty.handler.ssl.util import SimpleTrustManagerFactory, InsecureTrustManagerFactory + from io.netty.buffer import ByteBufAllocator from _socket import ( SSLError, raises_java_exception, @@ -45,7 +49,7 @@ error as socket_error) from _sslcerts import _get_openssl_key_manager, _extract_cert_from_data, _extract_certs_for_paths, \ - NoVerifyX509TrustManager, _str_hash_key_entry, _get_ecdh_parameter_spec, CompositeX509TrustManager + _str_hash_key_entry, _get_ecdh_parameter_spec, CompositeX509TrustManagerFactory from _sslcerts import SSLContext as _JavaSSLContext from java.text import SimpleDateFormat @@ -56,6 +60,13 @@ from javax.security.auth.x500 import X500Principal from org.ietf.jgss import Oid +try: + # requires Java 8 or higher for this support + from javax.net.ssl import SNIHostName, SNIMatcher + HAS_SNI = True +except ImportError: + HAS_SNI = False + log = logging.getLogger("_socket") @@ -67,6 +78,10 @@ CERT_NONE, CERT_OPTIONAL, CERT_REQUIRED = range(3) +_CERT_TO_CLIENT_AUTH = {CERT_NONE: ClientAuth.NONE, + CERT_OPTIONAL: ClientAuth.OPTIONAL, + CERT_REQUIRED: ClientAuth.REQUIRE} + # Do not support PROTOCOL_SSLv2, it is highly insecure and it is optional _, PROTOCOL_SSLv3, PROTOCOL_SSLv23, PROTOCOL_TLSv1, PROTOCOL_TLSv1_1, PROTOCOL_TLSv1_2 = range(6) _PROTOCOL_NAMES = { @@ -85,7 +100,8 @@ CHANNEL_BINDING_TYPES = [] # https://docs.python.org/2/library/ssl.html#ssl.HAS_ALPN etc... -HAS_ALPN, HAS_NPN, HAS_ECDH, HAS_SNI = False, False, True, False +HAS_ALPN, HAS_NPN, HAS_ECDH = False, False, True + # TODO not supported on jython yet # Disable weak or insecure ciphers by default @@ -578,12 +594,17 @@ def context(self): return self._context + @context.setter + def context(self, context): + self._context = context + def setup_engine(self, addr): if self.engine is None: # http://stackoverflow.com/questions/13390964/java-ssl-fatal-error-80-unwrapping-net-record-after-adding-the-https-en self.engine = self._context._createSSLEngine( addr, self.server_hostname, - cert_file=getattr(self, "certfile", None), key_file=getattr(self, "keyfile", None)) + cert_file=getattr(self, "certfile", None), key_file=getattr(self, "keyfile", None), + server_side=self.server_side) self.engine.setUseClientMode(not self.server_side) def connect(self, addr): @@ -1044,6 +1065,8 @@ self._key_managers = None + self._server_name_callback = None + def wrap_socket(self, sock, server_side=False, do_handshake_on_connect=True, suppress_ragged_eofs=True, @@ -1054,36 +1077,47 @@ server_hostname=server_hostname, _context=self) - def _createSSLEngine(self, addr, hostname=None, cert_file=None, key_file=None): - trust_managers = [NoVerifyX509TrustManager()] - if self.verify_mode == CERT_REQUIRED: - tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm()) + def _createSSLEngine(self, addr, hostname=None, cert_file=None, key_file=None, server_side=False): + tmf = InsecureTrustManagerFactory.INSTANCE + if self.verify_mode != CERT_NONE: + # XXX need to refactor so we don't have to get trust managers twice + stmf = SimpleTrustManagerFactory.getInstance(SimpleTrustManagerFactory.getDefaultAlgorithm()) + stmf.init(self._trust_store) + + tmf = CompositeX509TrustManagerFactory(stmf.getTrustManagers()) tmf.init(self._trust_store) - trust_managers = [CompositeX509TrustManager(tmf.getTrustManagers())] - context = _JavaSSLContext.getInstance(self._protocol_name) + kmf = self._key_managers + if self._key_managers is None: + kmf = _get_openssl_key_manager(cert_file=cert_file, key_file=key_file) - if self._key_managers is None: - context.init( - _get_openssl_key_manager( - cert_file=cert_file, key_file=key_file).getKeyManagers(), - trust_managers, None) - else: - context.init( - self._key_managers.getKeyManagers(), - trust_managers, None) + context_builder = None - # addr could be ipv6, only extract relevant parts - engine = context.createSSLEngine((hostname or addr[0]), addr[1]) + if not server_side: + context_builder = SslContextBuilder.forClient() - # apparently this can be used to enforce hostname verification - if hostname is not None and self._check_hostname: - params = engine.getSSLParameters() - params.setEndpointIdentificationAlgorithm('HTTPS') - engine.setSSLParameters(params) + if kmf: + if server_side: + context_builder = SslContextBuilder.forServer(kmf) + else: + context_builder = context_builder.keyManager(kmf) + + context_builder = context_builder.trustManager(tmf) + context_builder = context_builder.sslProvider(SslProvider.JDK) + context_builder = context_builder.clientAuth(_CERT_TO_CLIENT_AUTH[self.verify_mode]) if self._ciphers is not None: - engine.setEnabledCipherSuites(self._ciphers) + context_builder = context_builder.ciphers(self._ciphers) + + if self._check_hostname: + engine = context_builder.build().newEngine(ByteBufAllocator.DEFAULT, hostname, addr[1]) + if HAS_SNI: + params = engine.getSSLParameters() + params.setEndpointIdentificationAlgorithm('HTTPS') + params.setServerNames([SNIHostName(hostname)]) + engine.setSSLParameters(params) + else: + engine = context_builder.build().newEngine(ByteBufAllocator.DEFAULT, addr[0], addr[1]) return engine @@ -1163,7 +1197,10 @@ raise NotImplementedError() def set_servername_callback(self, server_name_callback): - raise NotImplementedError() + if not callable(server_name_callback) and server_name_callback is not None: + raise TypeError("{!r} is not callable".format(server_name_callback)) + self._server_name_callback = server_name_callback + def load_dh_params(self, dhfile): # TODO? diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -1521,7 +1521,7 @@ if ssl.OPENSSL_VERSION_INFO < (0, 9, 8, 0, 15): self.skipTest("SHA256 not available on %r" % ssl.OPENSSL_VERSION) # sha256.tbs-internet.com needs SNI to use the correct certificate - if not ssl.HAS_SNI: + if not ssl.HAS_SNI or support.is_jython: # sha256.tbs-internet.com is no longer alive self.skipTest("SNI needed for this test") # https://sha2.hboeck.de/ was used until 2011-01-08 (no route to host) remote = ("sha256.tbs-internet.com", 443) diff --git a/build.xml b/build.xml --- a/build.xml +++ b/build.xml @@ -164,7 +164,7 @@ - + -- Repository URL: https://hg.python.org/jython