[Python-checkins] distutils2: Updated mirror support for indexes.
tarek.ziade
python-checkins at python.org
Sun Aug 8 11:50:46 CEST 2010
tarek.ziade pushed 35ac4bcb4b04 to distutils2:
http://hg.python.org/distutils2/rev/35ac4bcb4b04
changeset: 449:35ac4bcb4b04
user: Alexis Metaireau <ametaireau at gmail.com>
date: Tue Jul 20 18:13:05 2010 +0200
summary: Updated mirror support for indexes.
files: src/distutils2/index/mirrors.py, src/distutils2/index/simple.py, src/distutils2/tests/test_index_simple.py
diff --git a/src/distutils2/index/mirrors.py b/src/distutils2/index/mirrors.py
new file mode 100644
--- /dev/null
+++ b/src/distutils2/index/mirrors.py
@@ -0,0 +1,55 @@
+"""Utilities related to the mirror infrastructure defined in PEP 381.
+See http://www.python.org/dev/peps/pep-0381/
+"""
+
+from string import ascii_lowercase
+import socket
+
+DEFAULT_MIRROR_URL = "last.pypi.python.org"
+
+def get_mirrors(hostname=None):
+ """Return the list of mirrors from the last record found on the DNS
+ entry::
+
+ >>> from distutils2.index.mirrors import get_mirrors
+ >>> get_mirrors()
+ ['a.pypi.python.org', 'b.pypi.python.org', 'c.pypi.python.org',
+ 'd.pypi.python.org']
+
+ """
+ if hostname is None:
+ hostname = DEFAULT_MIRROR_URL
+
+ # return the last mirror registered on PyPI.
+ try:
+ unused, aliaslist, ipaddr = socket.gethostbyname_ex(hostname)
+ except socket.gaierror:
+ return []
+ if len(aliaslist) < 2:
+ return []
+ index_adress = aliaslist[-1]
+ end_letter = index_adress.split(".", 1)
+
+ # determine the list from the last one.
+ return ["%s.%s" % (s, end_letter[1]) for s in string_range(end_letter[0])]
+
+def string_range(last):
+ """Compute the range of string between "a" and last.
+
+ This works for simple "a to z" lists, but also for "a to zz" lists.
+ """
+ for k in range(len(last)):
+ for x in product(ascii_lowercase, repeat=k+1):
+ result = ''.join(x)
+ yield result
+ if result == last:
+ return
+
+def product(*args, **kwds):
+ pools = map(tuple, args) * kwds.get('repeat', 1)
+ result = [[]]
+ for pool in pools:
+ result = [x+[y] for x in result for y in pool]
+ for prod in result:
+ yield tuple(prod)
+
diff --git a/src/distutils2/index/simple.py b/src/distutils2/index/simple.py
--- a/src/distutils2/index/simple.py
+++ b/src/distutils2/index/simple.py
@@ -17,11 +17,11 @@
get_infos_from_url)
from distutils2.index.errors import (IndexError, DownloadError,
UnableToDownload)
+from distutils2.index.mirrors import get_mirrors
from distutils2 import __version__ as __distutils2_version__
# -- Constants -----------------------------------------------
-DEFAULT_INDEX_URL = "http://pypi.python.org/simple/"
-DEFAULT_MIRROR_URL = "mirrors.pypi.python.org"
+DEFAULT_INDEX_URL = "http://a.pypi.python.org/simple/"
DEFAULT_HOSTS = ("*",)
SOCKET_TIMEOUT = 15
USER_AGENT = "Python-urllib/%s distutils2/%s" % (
@@ -61,6 +61,9 @@
class Crawler(IndexClient):
"""Provides useful tools to request the Python Package Index simple API.
+ You can specify both mirrors and mirrors_url, but mirrors_url will only be
+ used if mirrors is set to None.
+
:param index_url: the url of the simple index to search on.
:param follow_externals: tell if following external links is needed or
not. Default is False.
@@ -74,28 +77,30 @@
pick up the last final version.
:param mirrors_url: the url to look on for DNS records giving mirror
adresses.
- :param mirrors: a list of mirrors to check out if problems
- occurs while working with the one given in "url"
+ :param mirrors: a list of mirrors (see PEP 381).
:param timeout: time in seconds to consider a url has timeouted.
+ :param mirrors_max_tries": number of times to try requesting informations
+ on mirrors before switching.
"""
def __init__(self, index_url=DEFAULT_INDEX_URL, hosts=DEFAULT_HOSTS,
follow_externals=False, prefer_final=False,
- mirrors_url=DEFAULT_MIRROR_URL, mirrors=None,
- timeout=SOCKET_TIMEOUT):
+ mirrors_url=None, mirrors=None,
+ timeout=SOCKET_TIMEOUT, mirrors_max_tries=0):
self.follow_externals = follow_externals
-
+
+ # mirroring attributes.
if not index_url.endswith("/"):
index_url += "/"
- self._index_urls = [index_url]
# if no mirrors are defined, use the method described in PEP 381.
if mirrors is None:
- try:
- mirrors = socket.gethostbyname_ex(mirrors_url)[-1]
- except socket.gaierror:
- mirrors = []
- self._index_urls.extend(mirrors)
- self._current_index_url = 0
+ mirrors = get_mirrors(mirrors_url)
+ self._mirrors = set(mirrors)
+ self._mirrors_used = set()
+ self.index_url = index_url
+ self._mirrors_max_tries = mirrors_max_tries
+ self._mirrors_tries = 0
+
self._timeout = timeout
self._prefer_final = prefer_final
@@ -108,10 +113,6 @@
self._processed_urls = []
self._releases = {}
- @property
- def index_url(self):
- return self._index_urls[self._current_index_url]
-
def _search_for_releases(self, requirements):
"""Search for distributions and return a ReleaseList object containing
the results
@@ -128,14 +129,19 @@
def _switch_to_next_mirror(self):
"""Switch to the next mirror (eg. point self.index_url to the next
- url.
+ mirror url.
+
+ Raise a KeyError if all mirrors have been tried.
"""
- # Internally, iter over the _index_url iterable, if we have read all
- # of the available indexes, raise an exception.
- if self._current_index_url < len(self._index_urls):
- self._current_index_url = self._current_index_url + 1
- else:
- raise UnableToDownload("All mirrors fails")
+ self._mirrors_used.add(self.index_url)
+ index_url = self._mirrors.pop()
+ if not ("http://" or "https://" or "file://") in index_url:
+ index_url = "http://%s" % index_url
+
+ if not index_url.endswith("/simple"):
+ index_url = "%s/simple/" % index_url
+
+ self.index_url = index_url
def _is_browsable(self, url):
"""Tell if the given URL can be browsed or not.
@@ -270,8 +276,13 @@
self._process_url(url, name)
except DownloadError:
# if an error occurs, try with the next index_url
- # (provided by the mirrors)
- self._switch_to_next_mirror()
+ if self._mirrors_tries >= self._mirrors_max_tries:
+ try:
+ self._switch_to_next_mirror()
+ except KeyError:
+ raise UnableToDownload("Tried all mirrors")
+ else:
+ self._mirrors_tries += 1
self._releases.clear()
self._process_index_page(name)
diff --git a/src/distutils2/tests/test_index_simple.py b/src/distutils2/tests/test_index_simple.py
--- a/src/distutils2/tests/test_index_simple.py
+++ b/src/distutils2/tests/test_index_simple.py
@@ -222,7 +222,7 @@
# create the index using both servers
crawler = Crawler(server.full_address + "/simple/",
hosts=('*',), timeout=1, # set the timeout to 1s for the tests
- mirrors=[mirror.full_address + "/simple/", ])
+ mirrors=[mirror.full_address, ])
# this should not raise a timeout
self.assertEqual(4, len(crawler.find("foo")))
--
Repository URL: http://hg.python.org/distutils2
More information about the Python-checkins
mailing list