[Python-checkins] distutils2: Updated mirror support for indexes.

tarek.ziade python-checkins at python.org
Sun Aug 8 11:50:46 CEST 2010


tarek.ziade pushed 35ac4bcb4b04 to distutils2:

http://hg.python.org/distutils2/rev/35ac4bcb4b04
changeset:   449:35ac4bcb4b04
user:        Alexis Metaireau <ametaireau at gmail.com>
date:        Tue Jul 20 18:13:05 2010 +0200
summary:     Updated mirror support for indexes.
files:       src/distutils2/index/mirrors.py, src/distutils2/index/simple.py, src/distutils2/tests/test_index_simple.py

diff --git a/src/distutils2/index/mirrors.py b/src/distutils2/index/mirrors.py
new file mode 100644
--- /dev/null
+++ b/src/distutils2/index/mirrors.py
@@ -0,0 +1,55 @@
+"""Utilities related to the mirror infrastructure defined in PEP 381. 
+See http://www.python.org/dev/peps/pep-0381/
+"""
+
+from string import ascii_lowercase
+import socket
+
+DEFAULT_MIRROR_URL = "last.pypi.python.org"
+
+def get_mirrors(hostname=None):
+    """Return the list of mirrors from the last record found on the DNS
+    entry::
+
+    >>> from distutils2.index.mirrors import get_mirrors
+    >>> get_mirrors()
+    ['a.pypi.python.org', 'b.pypi.python.org', 'c.pypi.python.org',
+    'd.pypi.python.org']
+
+    """
+    if hostname is None:
+        hostname = DEFAULT_MIRROR_URL
+    
+    # return the last mirror registered on PyPI.
+    try:
+        unused, aliaslist, ipaddr = socket.gethostbyname_ex(hostname)
+    except socket.gaierror:
+        return []
+    if len(aliaslist) < 2:
+        return []
+    index_adress = aliaslist[-1]
+    end_letter = index_adress.split(".", 1)
+
+    # determine the list from the last one.
+    return ["%s.%s" % (s, end_letter[1]) for s in string_range(end_letter[0])]
+
+def string_range(last):
+    """Compute the range of string between "a" and last.
+    
+    This works for simple "a to z" lists, but also for "a to zz" lists.
+    """
+    for k in range(len(last)):
+        for x in product(ascii_lowercase, repeat=k+1):
+            result = ''.join(x)
+            yield result
+            if result == last:
+                return
+
+def product(*args, **kwds):
+    pools = map(tuple, args) * kwds.get('repeat', 1)
+    result = [[]]
+    for pool in pools:
+        result = [x+[y] for x in result for y in pool]
+    for prod in result:
+        yield tuple(prod)
+
diff --git a/src/distutils2/index/simple.py b/src/distutils2/index/simple.py
--- a/src/distutils2/index/simple.py
+++ b/src/distutils2/index/simple.py
@@ -17,11 +17,11 @@
                                    get_infos_from_url)
 from distutils2.index.errors import (IndexError, DownloadError,
                                      UnableToDownload)
+from distutils2.index.mirrors import get_mirrors
 from distutils2 import __version__ as __distutils2_version__
 
 # -- Constants -----------------------------------------------
-DEFAULT_INDEX_URL = "http://pypi.python.org/simple/"
-DEFAULT_MIRROR_URL = "mirrors.pypi.python.org"
+DEFAULT_INDEX_URL = "http://a.pypi.python.org/simple/"
 DEFAULT_HOSTS = ("*",)
 SOCKET_TIMEOUT = 15
 USER_AGENT = "Python-urllib/%s distutils2/%s" % (
@@ -61,6 +61,9 @@
 class Crawler(IndexClient):
     """Provides useful tools to request the Python Package Index simple API.
 
+    You can specify both mirrors and mirrors_url, but mirrors_url will only be
+    used if mirrors is set to None.
+
     :param index_url: the url of the simple index to search on.
     :param follow_externals: tell if following external links is needed or
                              not. Default is False.
@@ -74,28 +77,30 @@
                          pick up the last final version.
     :param mirrors_url: the url to look on for DNS records giving mirror
                         adresses.
-    :param mirrors: a list of mirrors to check out if problems
-                         occurs while working with the one given in "url"
+    :param mirrors: a list of mirrors (see PEP 381).
     :param timeout: time in seconds to consider a url has timeouted.
+    :param mirrors_max_tries": number of times to try requesting informations
+                               on mirrors before switching.
     """
 
     def __init__(self, index_url=DEFAULT_INDEX_URL, hosts=DEFAULT_HOSTS,
                  follow_externals=False, prefer_final=False,
-                 mirrors_url=DEFAULT_MIRROR_URL, mirrors=None,
-                 timeout=SOCKET_TIMEOUT):
+                 mirrors_url=None, mirrors=None,
+                 timeout=SOCKET_TIMEOUT, mirrors_max_tries=0):
         self.follow_externals = follow_externals
-
+        
+        # mirroring attributes.
         if not index_url.endswith("/"):
             index_url += "/"
-        self._index_urls = [index_url]
         # if no mirrors are defined, use the method described in PEP 381.
         if mirrors is None:
-            try:
-                mirrors = socket.gethostbyname_ex(mirrors_url)[-1]
-            except socket.gaierror:
-                mirrors = []
-        self._index_urls.extend(mirrors)
-        self._current_index_url = 0
+            mirrors = get_mirrors(mirrors_url)
+        self._mirrors = set(mirrors)
+        self._mirrors_used = set()
+        self.index_url = index_url 
+        self._mirrors_max_tries = mirrors_max_tries
+        self._mirrors_tries = 0
+
         self._timeout = timeout
         self._prefer_final = prefer_final
 
@@ -108,10 +113,6 @@
         self._processed_urls = []
         self._releases = {}
   
-    @property
-    def index_url(self):
-        return self._index_urls[self._current_index_url]
-
     def _search_for_releases(self, requirements):
         """Search for distributions and return a ReleaseList object containing
         the results
@@ -128,14 +129,19 @@
 
     def _switch_to_next_mirror(self):
         """Switch to the next mirror (eg. point self.index_url to the next
-        url.
+        mirror url.
+
+        Raise a KeyError if all mirrors have been tried.
         """
-        # Internally, iter over the _index_url iterable, if we have read all
-        # of the available indexes, raise an exception.
-        if self._current_index_url < len(self._index_urls):
-            self._current_index_url = self._current_index_url + 1
-        else:
-            raise UnableToDownload("All mirrors fails")
+        self._mirrors_used.add(self.index_url)
+        index_url = self._mirrors.pop()
+        if not ("http://" or "https://" or "file://") in index_url:
+            index_url = "http://%s" % index_url
+
+        if not index_url.endswith("/simple"):
+            index_url = "%s/simple/" % index_url
+
+        self.index_url = index_url
 
     def _is_browsable(self, url):
         """Tell if the given URL can be browsed or not.
@@ -270,8 +276,13 @@
             self._process_url(url, name)
         except DownloadError:
             # if an error occurs, try with the next index_url
-            # (provided by the mirrors)
-            self._switch_to_next_mirror()
+            if self._mirrors_tries >= self._mirrors_max_tries:
+                try:
+                    self._switch_to_next_mirror()
+                except KeyError:
+                   raise UnableToDownload("Tried all mirrors") 
+            else:
+                self._mirrors_tries += 1
             self._releases.clear()
             self._process_index_page(name)
 
diff --git a/src/distutils2/tests/test_index_simple.py b/src/distutils2/tests/test_index_simple.py
--- a/src/distutils2/tests/test_index_simple.py
+++ b/src/distutils2/tests/test_index_simple.py
@@ -222,7 +222,7 @@
             # create the index using both servers
             crawler = Crawler(server.full_address + "/simple/",
                 hosts=('*',), timeout=1,  # set the timeout to 1s for the tests
-                mirrors=[mirror.full_address + "/simple/", ])
+                mirrors=[mirror.full_address, ])
 
             # this should not raise a timeout
             self.assertEqual(4, len(crawler.find("foo")))

--
Repository URL: http://hg.python.org/distutils2


More information about the Python-checkins mailing list