[Python-checkins] python/nondist/sandbox/setuptools/setuptools package_index.py, 1.1, 1.2
pje@users.sourceforge.net
pje at users.sourceforge.net
Tue Jun 14 03:26:29 CEST 2005
Update of /cvsroot/python/python/nondist/sandbox/setuptools/setuptools
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4594/setuptools
Modified Files:
package_index.py
Log Message:
Add lots of progress messages, so people know what the package search is
doing.
Index: package_index.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/package_index.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- package_index.py 12 Jun 2005 03:44:07 -0000 1.1
+++ package_index.py 14 Jun 2005 01:26:26 -0000 1.2
@@ -90,21 +90,21 @@
self.fetched_urls = {}
self.package_pages = {}
- def scan_url(self, url):
- self.process_url(url, True)
-
def process_url(self, url, retrieve=False):
if url in self.scanned_urls and not retrieve:
return
self.scanned_urls[url] = True
dists = list(distros_for_url(url))
- map(self.add, dists)
+ if dists: self.debug("Found link: %s", url)
- if dists or not retrieve or url in self.fetched_urls:
+ if dists or not retrieve or url in self.fetched_urls:
+ for dist in dists:
+ self.add(dist)
# don't need the actual page
return
+ self.info("Reading %s", url)
f = self.open_url(url)
self.fetched_urls[url] = self.fetched_urls[f.url] = True
if 'html' not in f.headers['content-type'].lower():
@@ -121,17 +121,11 @@
link = urlparse.urljoin(base, match.group(1))
self.process_url(link)
- def find_packages(self,requirement):
- self.scan_url(self.index_url + requirement.distname)
- if not self.package_pages.get(requirement.key):
- # We couldn't find the target package, so search the index page too
- self.scan_url(self.index_url)
- for url in self.package_pages.get(requirement.key,()):
- # scan each page that might be related to the desired package
- self.scan_url(url)
-
def process_index(self,url,page):
+ """Process the contents of a PyPI page"""
+
def scan(link):
+ # Process a URL to see if it's for a package page
if link.startswith(self.index_url):
parts = map(
urllib2.unquote, link[len(self.index_url):].split('/')
@@ -141,10 +135,12 @@
pkg = safe_name(parts[0])
ver = safe_version(parts[1])
self.package_pages.setdefault(pkg.lower(),{})[link] = True
+
if url==self.index_url or 'Index of Packages</title>' in page:
# process an index page into the package-page index
for match in HREF.finditer(page):
scan( urlparse.urljoin(url, match.group(1)) )
+
else:
scan(url) # ensure this page is in the page index
# process individual package page
@@ -156,11 +152,56 @@
# Process the found URL
self.scan_url(urlparse.urljoin(url, match.group(1)))
+
+
+
+
+
+
+
+
+
+
+ def find_packages(self,requirement):
+ self.scan_url(self.index_url + requirement.distname+'/')
+ if not self.package_pages.get(requirement.key):
+ # We couldn't find the target package, so search the index page too
+ self.warn(
+ "Couldn't find index page for %r (maybe misspelled?)",
+ requirement.distname
+ )
+ if self.index_url not in self.fetched_urls:
+ self.warn(
+ "Scanning index of all packages (this may take a while)"
+ )
+ self.scan_url(self.index_url)
+
+ for url in self.package_pages.get(requirement.key,()):
+ # scan each page that might be related to the desired package
+ self.scan_url(url)
+
def obtain(self,requirement):
self.find_packages(requirement)
for dist in self.get(requirement.key, ()):
if dist in requirement:
return dist
+ self.debug("%s does not match %s", requirement, dist)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
def download(self, spec, tmpdir):
"""Locate and/or download `spec`, returning a local filename
@@ -193,19 +234,21 @@
"Not a URL, existing file, or requirement spec: %r" %
(spec,)
)
-
# process a Requirement
+ self.info("Searching for %s", spec)
dist = self.best_match(spec,[])
if dist is not None:
+ self.info("Best match: %s", dist)
return self.download(dist.path, tmpdir)
+ self.warn("No local packages or download links found for %s", spec)
return None
-
-
dl_blocksize = 8192
def _download_to(self, url, filename):
+ self.info("Downloading %s", url)
+
# Download the file
fp, tfp = None, None
try:
@@ -242,8 +285,6 @@
def reporthook(self, url, filename, blocknum, blksize, size):
pass # no-op
-
-
def open_url(self, url):
try:
return urllib2.urlopen(url)
@@ -278,8 +319,8 @@
else:
return filename
-
-
+ def scan_url(self, url):
+ self.process_url(url, True)
@@ -313,22 +354,24 @@
def _download_svn(self, url, filename):
+ self.info("Doing subversion checkout from %s to %s", url, filename)
os.system("svn checkout -q %s %s" % (url, filename))
return filename
+ def debug(self, msg, *args):
+ pass #print msg % args # XXX
-
-
-
-
-
-
-
-
+ def info(self, msg, *args):
+ print msg % args # XXX
+
+ def warn(self, msg, *args):
+ print msg % args # XXX
def _download_sourceforge(self, source_url, sf_page, tmpdir):
"""Download package from randomly-selected SourceForge mirror"""
+ self.debug("Processing SourceForge mirror page")
+
mirror_regex = re.compile(r'HREF=(/.*?\?use_mirror=[^>]*)')
urls = [m.group(1) for m in mirror_regex.finditer(sf_page)]
if not urls:
@@ -338,6 +381,12 @@
import random
url = urlparse.urljoin(source_url, random.choice(urls))
+
+ self.info(
+ "Requesting redirect to (randomly selected) %r mirror",
+ url.split('=',1)[-1]
+ )
+
f = self.open_url(url)
match = re.search(
r'<META HTTP-EQUIV="refresh" content=".*?URL=(.*?)"',
@@ -359,11 +408,3 @@
-
-
-
-
-
-
-
-
More information about the Python-checkins
mailing list