From python-checkins at python.org Sun Aug 1 19:55:14 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sun, 1 Aug 2010 19:55:14 +0200 (CEST) Subject: [Pypi-checkins] r837 - trunk/pypi/templates Message-ID: <20100801175514.910B2EEBA8@mail.python.org> Author: martin.von.loewis Date: Sun Aug 1 19:55:14 2010 New Revision: 837 Modified: trunk/pypi/templates/files.pt Log: Drop old Python versions. Modified: trunk/pypi/templates/files.pt ============================================================================== --- trunk/pypi/templates/files.pt (original) +++ trunk/pypi/templates/files.pt Sun Aug 1 19:55:14 2010 @@ -40,8 +40,6 @@ + + + +''' % (url, m.upload_key, f.path)) class File(blobstore_handlers.BlobstoreDownloadHandler): def get(self, path): @@ -44,21 +89,20 @@ f = None if f: if f[0].contents: - self.send_blob(f.contents) + self.send_blob(f[0].contents) else: self.response.headers['content-type'] = 'text/plain' - self.response.error(404) + self.response.set_status(404) self.response.out.write('Not yet copied') return if path: - d = model.Directory.all().filter("path = ", path) + d = model.Directory.all().filter("path = ", path).fetch(1) if not d: - self.response.error(404) + self.response.set_status(404) return - else: - d = None - dirs = model.Directory.all().filter('parent = ', d).fetch(100) - files = model.File.all().filter('parent = ', d).fetch(1000) + dirs = model.Directory.all().filter('dotdot = ', path).fetch(1000) + files = model.File.all().filter('dotdot = ', path).fetch(1000) + #files = model.File.all().fetch(1000) self.response.out.write(template.render(tpl_path('dir.html'), { 'path':path, 'dirs':dirs, 'files':files })) @@ -74,15 +118,20 @@ self.response.headers['content-type'] = 'application/octet-stream' self.response.out.write(p.sig) else: - self.response.error(404) + self.response.set_status(404) class Stats(webapp.RequestHandler): def get(self, path): self.response.headers['content-type'] = 'text/plain' - self.response.write('not implemented yet') + self.response.out.write('not implemented yet') class Step(webapp.RequestHandler): def get(self): self.response.headers['content-type'] = 'text/plain' self.response.out.write(fetch.step()) post = get + +class Cron(webapp.RequestHandler): + def get(self): + self.response.headers['content-type'] = 'text/plain' + self.response.out.write(fetch.cron()) Modified: trunk/appengine/mirror.py ============================================================================== --- trunk/appengine/mirror.py (original) +++ trunk/appengine/mirror.py Wed Aug 4 01:25:32 2010 @@ -9,7 +9,12 @@ ('/packages/(.*)', File), ('/serversig/(.*)', Serversig), ('/local-stats/days/(.*)', Stats), - ('/step', Step)], + ('/cron', Cron), + ('/step', Step), + ('/mkupload/(.*)', MkUpload), + ('/upload', Upload), + ('/uploaded/(.*)', Upload), + ], debug=True) def main(): Modified: trunk/appengine/model.py ============================================================================== --- trunk/appengine/model.py (original) +++ trunk/appengine/model.py Wed Aug 4 01:25:32 2010 @@ -4,41 +4,55 @@ # singleton object todo = db.BlobProperty() simple = db.BlobProperty() # simple root - last_modified = db.StringProperty() + last_modified = db.IntegerProperty() # seconds since the epoch + upload_key = db.StringProperty() + current_upload = db.StringProperty() + current_upload_start = db.IntegerProperty() + last_step = db.IntegerProperty() + @classmethod + def instance(klass): + res = MirrorState().all().fetch(1) + if res: + return res[0] + return None class Project(db.Model): simple = db.BlobProperty() sig = db.BlobProperty() +# All path names are relative to the /packages/ URL + class Directory(db.Model): path = db.StringProperty() + # parent was already reserved as a property name + dotdot = db.StringProperty() @staticmethod def mkdir(path): - if '/' not in path: - res = Directory.get_by_key_name(path, parent=None) - if not res: - res = Directory(parent=None, key_name=path, path=path) - res.put() - return res - parent, name = path.rsplit('/', 1) - parent = Directory.mkdir(parent) - res = Directory.get_by_key_name(name, parent=parent) - if not res: - res = Directory(parent=parent, key_name=name, path=path) + res = Directory.all().filter("path = ", path).fetch(1) + if res: + return res[0] + components = path.rsplit('/', 1) + if len(components) == 1: + res = Directory(dotdot="", path=path) res.put() + return res + parent, name = components + Directory.mkdir(parent) + res = Directory(dotdot=parent, path=path) + res.put() return res @property def name(self): - return self.key().name() - + return self.path.rsplit("/")[-1] class File(db.Model): # also using parent and key contents = blobstore.BlobReferenceProperty() etag = db.StringProperty() project = db.ReferenceProperty(Project) + dotdot = db.StringProperty() path = db.StringProperty() @property def name(self): - return self.key().name() + return self.path.rsplit("/")[-1] Modified: trunk/appengine/templates/dir.html ============================================================================== --- trunk/appengine/templates/dir.html (original) +++ trunk/appengine/templates/dir.html Wed Aug 4 01:25:32 2010 @@ -4,10 +4,10 @@ {% for dir in dirs %} -{{dir}}
+{{dir.name}}
{% endfor %} {% for file in files %} -{{file}}
+{{file.name}}
{% endfor %} \ No newline at end of file From python-checkins at python.org Fri Aug 6 16:44:13 2010 From: python-checkins at python.org (martin.von.loewis) Date: Fri, 6 Aug 2010 16:44:13 +0200 (CEST) Subject: [Pypi-checkins] r842 - trunk/appengine Message-ID: <20100806144413.01573EE9F7@mail.python.org> Author: martin.von.loewis Date: Fri Aug 6 16:44:12 2010 New Revision: 842 Added: trunk/appengine/stats.py Modified: trunk/appengine/handlers.py trunk/appengine/mirror.py trunk/appengine/model.py Log: Collect download stats. Modified: trunk/appengine/handlers.py ============================================================================== --- trunk/appengine/handlers.py (original) +++ trunk/appengine/handlers.py Fri Aug 6 16:44:12 2010 @@ -1,7 +1,7 @@ import os, logging from google.appengine.ext import webapp, blobstore from google.appengine.ext.webapp import template, blobstore_handlers -import model, fetch +import model, fetch, stats def tpl_path(template_file_name): return os.path.join(os.path.dirname(__file__), 'templates', template_file_name) @@ -89,6 +89,11 @@ f = None if f: if f[0].contents: + d = model.Download(day=stats.today(), + name=f[0].name, + project=f[0].project.key().name(), + agent=self.request.headers['user-agent']) + d.put() self.send_blob(f[0].contents) else: self.response.headers['content-type'] = 'text/plain' Modified: trunk/appengine/mirror.py ============================================================================== --- trunk/appengine/mirror.py (original) +++ trunk/appengine/mirror.py Fri Aug 6 16:44:12 2010 @@ -12,6 +12,7 @@ ('/cron', Cron), ('/step', Step), ('/mkupload/(.*)', MkUpload), + #('/mkupload2', MkUpload2), ('/upload', Upload), ('/uploaded/(.*)', Upload), ], Modified: trunk/appengine/model.py ============================================================================== --- trunk/appengine/model.py (original) +++ trunk/appengine/model.py Fri Aug 6 16:44:12 2010 @@ -56,3 +56,9 @@ @property def name(self): return self.path.rsplit("/")[-1] + +class Download(db.Model): + day = db.StringProperty() + name = db.StringProperty() + project = db.StringProperty() + agent = db.StringProperty() Added: trunk/appengine/stats.py ============================================================================== --- (empty file) +++ trunk/appengine/stats.py Fri Aug 6 16:44:12 2010 @@ -0,0 +1,5 @@ +import datetime + +def today(): + now = datetime.datetime.utcnow() + return "%s-%.2s-%.2s" % (now.year, now.month, now.day) From python-checkins at python.org Fri Aug 6 16:47:32 2010 From: python-checkins at python.org (martin.von.loewis) Date: Fri, 6 Aug 2010 16:47:32 +0200 (CEST) Subject: [Pypi-checkins] r843 - trunk/appengine Message-ID: <20100806144732.B0688EEA02@mail.python.org> Author: martin.von.loewis Date: Fri Aug 6 16:47:32 2010 New Revision: 843 Modified: trunk/appengine/stats.py Log: Fix date formatting. Modified: trunk/appengine/stats.py ============================================================================== --- trunk/appengine/stats.py (original) +++ trunk/appengine/stats.py Fri Aug 6 16:47:32 2010 @@ -2,4 +2,4 @@ def today(): now = datetime.datetime.utcnow() - return "%s-%.2s-%.2s" % (now.year, now.month, now.day) + return "%s-%.2d-%.2d" % (now.year, now.month, now.day) From python-checkins at python.org Sat Aug 7 12:22:07 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 7 Aug 2010 12:22:07 +0200 (CEST) Subject: [Pypi-checkins] r844 - trunk/pypi Message-ID: <20100807102207.7D04FEEA5E@mail.python.org> Author: martin.von.loewis Date: Sat Aug 7 12:22:07 2010 New Revision: 844 Modified: trunk/pypi/rpc.py Log: If the requested release does not exist, return an empty dictionary. Modified: trunk/pypi/rpc.py ============================================================================== --- trunk/pypi/rpc.py (original) +++ trunk/pypi/rpc.py Sat Aug 7 12:22:07 2010 @@ -76,6 +76,8 @@ def release_data(store, package_name, version): info = store.get_package(package_name, version).as_dict() + if not info: + return {} del info['description_html'] for col in ('requires', 'provides', 'obsoletes', 'requires_dist', 'obsoletes_dist', 'project_url', 'provides_dist', From python-checkins at python.org Sat Aug 7 12:24:31 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 7 Aug 2010 12:24:31 +0200 (CEST) Subject: [Pypi-checkins] r845 - trunk/pypi Message-ID: <20100807102431.A318ED8A3@mail.python.org> Author: martin.von.loewis Date: Sat Aug 7 12:24:31 2010 New Revision: 845 Modified: trunk/pypi/rpc.py Log: If the requested release does not exist, return an empty dictionary. (2nd try) Modified: trunk/pypi/rpc.py ============================================================================== --- trunk/pypi/rpc.py (original) +++ trunk/pypi/rpc.py Sat Aug 7 12:24:31 2010 @@ -75,9 +75,10 @@ def release_data(store, package_name, version): - info = store.get_package(package_name, version).as_dict() + info = store.get_package(package_name, version) if not info: return {} + info = info.as_dict() del info['description_html'] for col in ('requires', 'provides', 'obsoletes', 'requires_dist', 'obsoletes_dist', 'project_url', 'provides_dist', From python-checkins at python.org Sat Aug 7 21:50:00 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 7 Aug 2010 21:50:00 +0200 (CEST) Subject: [Pypi-checkins] r846 - trunk/appengine Message-ID: <20100807195000.873D4EE984@mail.python.org> Author: martin.von.loewis Date: Sat Aug 7 21:49:56 2010 New Revision: 846 Modified: trunk/appengine/fetch.py trunk/appengine/handlers.py trunk/appengine/model.py Log: Add incremental updates. Modified: trunk/appengine/fetch.py ============================================================================== --- trunk/appengine/fetch.py (original) +++ trunk/appengine/fetch.py Sat Aug 7 21:49:56 2010 @@ -18,6 +18,8 @@ def rpc(): return xmlrpclib.ServerProxy('http://pypi.python.org/pypi') +################### Transfer ###################################### + def simple_page(m, uproject): project = uproject.encode('utf-8') h = httplib.HTTPConnection('pypi.python.org') @@ -32,10 +34,14 @@ r = h.getresponse() html = r.read() if r.status == 404: + if project: + delete_package(obj) return None if r.status == 301: # package not existant anymore, however, similarly-spelled # package exists + if project: + delete_package(obj) return None if r.status != 200: raise ValueError, "Status %d on %s" % (r.status, project) @@ -54,17 +60,18 @@ return html def get_state(): - t = time.time() + t = int(time.time()) m = model.MirrorState.all().fetch(1) if m: return m[0] secret = binascii.b2a_hex(os.urandom(10)) logging.info("Secret key "+secret) - m = model.MirrorState(upload_key=secret) + m = model.MirrorState(upload_key=secret, + epoch=0) todo = [('package', '')] for p in rpc().list_packages(): todo.append(('package', p)) - todo.append(('packages_listed', t)) + todo.append(('last_modified', t)) m.todo = pickle.dumps(todo) m.put() return m @@ -74,6 +81,7 @@ if not data: return x = fromstring(data) + files = set() for a in x.findall(".//a"): url = a.attrib['href'] if not url.startswith('../../packages/'): @@ -82,6 +90,9 @@ url = url[len('../../packages/'):] # insert after current task todo.insert(1,('file', (name, url))) + files.add(url) + if name: + delete_extra_files(name, files) def copy_file(m, todo, (package, path)): project = model.Project.get_by_key_name(package) @@ -136,12 +147,9 @@ f.contents.delete() f.delete() -def packages_listed(m, todo, t): - # reappend after all files - todo.append(('last_modified', t)) - def last_modified(m, todo, t): - m.last_modified = time.strftime("%Y%m%dT%H:%M:%S", time.gmtime(t)) + m.last_modified = int(t) + m.epoch = m.epoch+1 m.put() def transfer_file(m): @@ -168,9 +176,41 @@ return True return True +############## Incremental Updates ################# + +def delete_file(f): + if f.contents: + f.contents.delete() + f.delete() + +def delete_package(obj): + if not obj: + return + for f in model.File.all().filter("project = ", obj): + delete_file(f) + obj.delete() + +def delete_extra_files(name, files): + p = model.Project.get_by_key_name(name) + for f in model.File.all().filter("project = ", p): + if f.path not in files: + delete_file(f) + +def check_modifications(m, todo): + now = int(time.time()) + modified = rpc().changelog(m.last_modified-1) + for name, version, date, action in modified: + if ('package', name) in todo: + continue + todo.append(('package', name)) + if modified: + todo.append(('package', '')) + todo.append(('last_modified', now)) + +############## Queuing ############################# + actions = {'package':package, 'file':copy_file, - 'packages_listed': packages_listed, 'last_modified': last_modified, } @@ -185,8 +225,10 @@ if transfer_file(m): # a new file transfer was initiated, set timer return "file transfer started" - if not m.todo: - return "" + if not todo: + check_modifications(m, todo) + if not todo: + return "" action, param = todo[0] try: actions[action](m, todo, param) @@ -199,13 +241,13 @@ if todo: # name the task, so that no two of them will be added try: - # Allow re-running a task once a day + # Allow re-running a task once a day, per epoch day = int(time.time())//3600/24-14820 n, p = todo[0] if n == 'file': p = p[1] - name = '%s-%s' % (n, p) - name = re.sub('[^a-zA-Z0-9-]', '-', name)+'-'+str(day) + name = '%s-%s-%d-%d-%s' % (n, p, hash(p) & 0xFFFF, day, m.epoch) + name = re.sub('[^a-zA-Z0-9-]', '-', name) taskqueue.add(name=name, url='/step') except taskqueue.InvalidTaskError, e: # likely, task already existed, or was tombstoned Modified: trunk/appengine/handlers.py ============================================================================== --- trunk/appengine/handlers.py (original) +++ trunk/appengine/handlers.py Sat Aug 7 21:49:56 2010 @@ -1,4 +1,4 @@ -import os, logging +import os, logging, time from google.appengine.ext import webapp, blobstore from google.appengine.ext.webapp import template, blobstore_handlers import model, fetch, stats @@ -23,7 +23,7 @@ def get(self): self.response.headers['content-type'] = 'text/plain' p = model.MirrorState.all().fetch(1)[0] - self.response.out.write(p.last_modified) + self.response.out.write(time.strftime("%Y%m%dT%H:%M:%S\n", time.gmtime(p.last_modified))) class Upload(blobstore_handlers.BlobstoreUploadHandler): def post(self): @@ -105,8 +105,10 @@ if not d: self.response.set_status(404) return - dirs = model.Directory.all().filter('dotdot = ', path).fetch(1000) - files = model.File.all().filter('dotdot = ', path).fetch(1000) + dirs = sorted(model.Directory.all().filter('dotdot = ', path).fetch(1000), + key=lambda d:d.name) + files = sorted(model.File.all().filter('dotdot = ', path).fetch(1000), + key=lambda f:f.name) #files = model.File.all().fetch(1000) self.response.out.write(template.render(tpl_path('dir.html'), { 'path':path, 'dirs':dirs, 'files':files })) Modified: trunk/appengine/model.py ============================================================================== --- trunk/appengine/model.py (original) +++ trunk/appengine/model.py Sat Aug 7 21:49:56 2010 @@ -9,6 +9,8 @@ current_upload = db.StringProperty() current_upload_start = db.IntegerProperty() last_step = db.IntegerProperty() + # incremented every time a synchronization completes + epoch = db.IntegerProperty() @classmethod def instance(klass): res = MirrorState().all().fetch(1) From python-checkins at python.org Sat Aug 7 23:11:55 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 7 Aug 2010 23:11:55 +0200 (CEST) Subject: [Pypi-checkins] r847 - trunk/appengine Message-ID: <20100807211155.B6907EE99F@mail.python.org> Author: martin.von.loewis Date: Sat Aug 7 23:11:55 2010 New Revision: 847 Modified: trunk/appengine/handlers.py Log: Give 404 on invalid package names. Modified: trunk/appengine/handlers.py ============================================================================== --- trunk/appengine/handlers.py (original) +++ trunk/appengine/handlers.py Sat Aug 7 23:11:55 2010 @@ -17,6 +17,10 @@ p = model.MirrorState.all().fetch(1)[0] else: p = model.Project.get_by_key_name(path) + if p is None: + self.response.set_status(404) + self.response.out.write("404 Not FoundPackage %s does not exist" % path) + return self.response.out.write(p.simple) class LastModified(webapp.RequestHandler): From python-checkins at python.org Tue Aug 10 18:33:26 2010 From: python-checkins at python.org (martin.von.loewis) Date: Tue, 10 Aug 2010 18:33:26 +0200 (CEST) Subject: [Pypi-checkins] r848 - trunk/appengine Message-ID: <20100810163326.687DEEE9E8@mail.python.org> Author: martin.von.loewis Date: Tue Aug 10 18:33:26 2010 New Revision: 848 Modified: trunk/appengine/handlers.py Log: Reject package names with slashes in them. Modified: trunk/appengine/handlers.py ============================================================================== --- trunk/appengine/handlers.py (original) +++ trunk/appengine/handlers.py Tue Aug 10 18:33:26 2010 @@ -15,6 +15,8 @@ path = path.rstrip('/') if not path: p = model.MirrorState.all().fetch(1)[0] + elif '/' in path: + return self.error(404) else: p = model.Project.get_by_key_name(path) if p is None: From python-checkins at python.org Wed Aug 11 03:00:48 2010 From: python-checkins at python.org (richard) Date: Wed, 11 Aug 2010 03:00:48 +0200 (CEST) Subject: [Pypi-checkins] r849 - trunk/pypi Message-ID: <20100811010048.F3129EE996@mail.python.org> Author: richard Date: Wed Aug 11 03:00:48 2010 New Revision: 849 Modified: trunk/pypi/webui.py Log: revert the unwanted multiple-version behaviour in json & doap export Modified: trunk/pypi/webui.py ============================================================================== --- trunk/pypi/webui.py (original) +++ trunk/pypi/webui.py Wed Aug 11 03:00:48 2010 @@ -977,15 +977,28 @@ self.role_form() + def _get_latest_pkg_info(self, name, version): + # get the appropriate package info from the database + if name is None: + try: + name = self.form['name'] + except KeyError: + raise NotFound, 'no package name supplied' + if version is None: + if self.form.has_key('version'): + version = self.form['version'] + else: + l = self.store.get_latest_release(name, hidden=False) + try: + version = l[-1][1] + except IndexError: + raise NotFound, 'no releases' + return self.store.get_package(name, version), name, version + def doap(self, name=None, version=None): '''Return DOAP rendering of a package. ''' - try: - info, latest_version = self._load_release_info(name, version) - except MultipleReleases, e: - return self.index(releases=e.releases) - - name = info['name'] + info, name, version = self._get_latest_pkg_info(name, version) root = cElementTree.Element('rdf:RDF', { 'xmlns:rdf': "http://www.w3.org/1999/02/22-rdf-syntax-ns#", @@ -1057,11 +1070,7 @@ def json(self, name=None, version=None): '''Return JSON rendering of a package. ''' - try: - info, latest_version = self._load_release_info(name, version) - except MultipleReleases, e: - return self.index(releases=e.releases) - name, version = info['name'], info['version'] + info, name, version = self._get_latest_pkg_info(name, version) d = { 'info': rpc.release_data(self.store, name, version), 'urls': rpc.release_urls(self.store, name, version), @@ -1082,30 +1091,12 @@ s = '%s(%s)' % (callback, s) self.wfile.write(s) - def _get_pkg_info(self, name, version): - # get the appropriate package info from the database - if name is None: - try: - name = self.form['name'] - except KeyError: - raise NotFound, 'no package name supplied' - if version is None: - if self.form.has_key('version'): - version = self.form['version'] - else: - l = self.store.get_latest_release(name, hidden=False) - try: - version = l[-1][1] - except IndexError: - raise NotFound, 'no releases' - return self.store.get_package(name, version), name, version - def display_pkginfo(self, name=None, version=None): '''Reconstruct and send a PKG-INFO metadata file. ''' # XXX tarek need to add 1.2 support here # - info, name, version = self._get_pkg_info(name, version) + info, name, version = self._get_latest_pkg_info(name, version) if not info: return self.fail('No such package / version', heading='%s %s'%(name, version), From python-checkins at python.org Wed Aug 11 03:08:33 2010 From: python-checkins at python.org (richard) Date: Wed, 11 Aug 2010 03:08:33 +0200 (CEST) Subject: [Pypi-checkins] r850 - trunk/pypi Message-ID: <20100811010833.EA468EE996@mail.python.org> Author: richard Date: Wed Aug 11 03:08:33 2010 New Revision: 850 Modified: trunk/pypi/webui.py Log: version may be None, ugh Modified: trunk/pypi/webui.py ============================================================================== --- trunk/pypi/webui.py (original) +++ trunk/pypi/webui.py Wed Aug 11 03:08:33 2010 @@ -985,7 +985,7 @@ except KeyError: raise NotFound, 'no package name supplied' if version is None: - if self.form.has_key('version'): + if self.form.get('version'): version = self.form['version'] else: l = self.store.get_latest_release(name, hidden=False) From python-checkins at python.org Wed Aug 11 03:25:38 2010 From: python-checkins at python.org (richard) Date: Wed, 11 Aug 2010 03:25:38 +0200 (CEST) Subject: [Pypi-checkins] r851 - trunk/pypi Message-ID: <20100811012538.E9604EE9A5@mail.python.org> Author: richard Date: Wed Aug 11 03:25:38 2010 New Revision: 851 Modified: trunk/pypi/webui.py Log: handle bogus crap passed by users Modified: trunk/pypi/webui.py ============================================================================== --- trunk/pypi/webui.py (original) +++ trunk/pypi/webui.py Wed Aug 11 03:25:38 2010 @@ -993,7 +993,10 @@ version = l[-1][1] except IndexError: raise NotFound, 'no releases' - return self.store.get_package(name, version), name, version + info = self.store.get_package(name, version) + if not info: + raise NotFound + return info, name, version def doap(self, name=None, version=None): '''Return DOAP rendering of a package. From python-checkins at python.org Wed Aug 11 03:27:31 2010 From: python-checkins at python.org (richard) Date: Wed, 11 Aug 2010 03:27:31 +0200 (CEST) Subject: [Pypi-checkins] r852 - trunk/pypi Message-ID: <20100811012731.EF15CEE9A5@mail.python.org> Author: richard Date: Wed Aug 11 03:27:31 2010 New Revision: 852 Modified: trunk/pypi/webui.py Log: moar information Modified: trunk/pypi/webui.py ============================================================================== --- trunk/pypi/webui.py (original) +++ trunk/pypi/webui.py Wed Aug 11 03:27:31 2010 @@ -995,7 +995,7 @@ raise NotFound, 'no releases' info = self.store.get_package(name, version) if not info: - raise NotFound + raise NotFound, 'invalid name/version' return info, name, version def doap(self, name=None, version=None): From python-checkins at python.org Sat Aug 14 18:02:34 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 14 Aug 2010 18:02:34 +0200 (CEST) Subject: [Pypi-checkins] r853 - in trunk/pypi: . tools Message-ID: <20100814160234.9174FF78B@mail.python.org> Author: martin.von.loewis Date: Sat Aug 14 18:02:34 2010 New Revision: 853 Added: trunk/pypi/tools/sql-migrate-20100814.sql (contents, props changed) Modified: trunk/pypi/pkgbase_schema.sql trunk/pypi/rpc.py trunk/pypi/store.py trunk/pypi/tools/demodata.py Log: Integrate dependency tables into a single one. Modified: trunk/pypi/pkgbase_schema.sql ============================================================================== --- trunk/pypi/pkgbase_schema.sql (original) +++ trunk/pypi/pkgbase_schema.sql Sat Aug 14 18:02:34 2010 @@ -157,94 +157,18 @@ CREATE INDEX rel_class_trove_id_idx ON release_classifiers(trove_id); CREATE INDEX rel_class_name_version_idx ON release_classifiers(name, version); --- Table structure for table: release_provides -CREATE TABLE release_provides ( +-- Release dependencies +-- See store.py for the valid kind values +CREATE TABLE release_dependencies ( name TEXT, version TEXT, + kind INTEGER, specifier TEXT, FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE ); -CREATE INDEX rel_prov_name_idx ON release_provides(name); -CREATE INDEX rel_prov_version_id_idx ON release_provides(version); -CREATE INDEX rel_prov_name_version_idx ON release_provides (name,version); - - --- Table structure for table: release_requires -CREATE TABLE release_requires ( - name TEXT, - version TEXT, - specifier TEXT, - FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE -); -CREATE INDEX rel_req_name_idx ON release_requires(name); -CREATE INDEX rel_req_version_id_idx ON release_requires(version); -CREATE INDEX rel_req_name_version_idx ON release_requires(name,version); - --- Table structure for table: release_obsoletes -CREATE TABLE release_obsoletes ( - name TEXT, - version TEXT, - specifier TEXT, - FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE -); -CREATE INDEX rel_obs_name_idx ON release_obsoletes(name); -CREATE INDEX rel_obs_version_id_idx ON release_obsoletes(version); -CREATE INDEX rel_obs_name_version_idx ON release_obsoletes (name,version); - --- Table structure for table: release_requires_external -CREATE TABLE release_requires_external ( - name TEXT, - version TEXT, - specifier TEXT, - FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE -); -CREATE INDEX rel_req_ext_name_idx ON release_requires_external(name); -CREATE INDEX rel_req_ext_version_id_idx ON release_requires_external(version); -CREATE INDEX rel_req_ext_name_version_idx ON release_requires_external(name,version); - --- Table structure for table: release_requires_dist -CREATE TABLE release_requires_dist ( - name TEXT, - version TEXT, - specifier TEXT, - FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE -); -CREATE INDEX rel_req_dist_name_idx ON release_requires_dist(name); -CREATE INDEX rel_req_dist_version_id_idx ON release_requires_dist(version); -CREATE INDEX rel_req_dist_name_version_idx ON release_requires_dist(name,version); - --- Table structure for table: release_provides_dist -CREATE TABLE release_provides_dist ( - name TEXT, - version TEXT, - specifier TEXT, - FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE -); -CREATE INDEX rel_prov_dist_name_idx ON release_provides_dist(name); -CREATE INDEX rel_prov_dist_version_id_idx ON release_provides_dist(version); -CREATE INDEX rel_prov_dist_name_version_idx ON release_provides_dist(name,version); - --- Table structure for table: release_obsoletes_dist -CREATE TABLE release_obsoletes_dist ( - name TEXT, - version TEXT, - specifier TEXT, - FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE -); -CREATE INDEX rel_obs_dist_name_idx ON release_obsoletes_dist(name); -CREATE INDEX rel_obs_dist_version_id_idx ON release_obsoletes_dist(version); -CREATE INDEX rel_obs_dist_name_version_idx ON release_obsoletes_dist(name,version); - --- Table structure for table: release_project_url -CREATE TABLE release_project_url ( - name TEXT, - version TEXT, - specifier TEXT, - FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE -); -CREATE INDEX rel_proj_url_name_idx ON release_project_url(name); -CREATE INDEX rel_proj_url_version_id_idx ON release_project_url(version); -CREATE INDEX rel_proj_url_name_version_idx ON release_project_url(name,version); +CREATE INDEX rel_dep_name_idx ON release_dependencies(name); +CREATE INDEX rel_dep_name_version_idx ON release_dependencies(name, version); +CREATE INDEX rel_dep_name_version_kind_idx ON release_dependencies(name, version, kind); -- Table structure for table: package_files -- python version is only first two digits Modified: trunk/pypi/rpc.py ============================================================================== --- trunk/pypi/rpc.py (original) +++ trunk/pypi/rpc.py Sat Aug 14 18:02:34 2010 @@ -4,6 +4,10 @@ import time from cStringIO import StringIO from SimpleXMLRPCServer import SimpleXMLRPCDispatcher +from collections import defaultdict + +# local imports +from store import dependency class RequestHandler(SimpleXMLRPCDispatcher): """A request dispatcher for the PyPI XML-RPC API.""" @@ -29,7 +33,8 @@ webui_obj.handler.send_header('Content-type', 'text/xml') webui_obj.handler.send_header('charset', 'UTF-8' ); webui_obj.handler.end_headers() - data = webui_obj.handler.rfile.read() + length = int(webui_obj.env['CONTENT_LENGTH']) + data = webui_obj.handler.rfile.read(length) # This should be thread-safe, as the store is really a singleton self.store = webui_obj.store response = self._marshaled_dispatch(data) @@ -80,11 +85,10 @@ return {} info = info.as_dict() del info['description_html'] - for col in ('requires', 'provides', 'obsoletes', 'requires_dist', - 'obsoletes_dist', 'project_url', 'provides_dist', - 'requires_external'): - rows = store.get_release_relationships(package_name, version, col) - info[col] = [row['specifier'] for row in rows] + dependencies = defaultdict(list) + for kind, specifier in store.get_release_dependencies(package_name, version): + dependencies[dependency.by_val[kind]].append(specifier) + info.update(dependencies) classifiers = [r[0] for r in store.get_release_classifiers(package_name, version)] info['classifiers' ] = classifiers Modified: trunk/pypi/store.py ============================================================================== --- trunk/pypi/store.py (original) +++ trunk/pypi/store.py Sat Aug 14 18:02:34 2010 @@ -2,6 +2,7 @@ ''' import sys, os, re, time, hashlib, random, types, math, stat, errno import logging, cStringIO, string, datetime, calendar, binascii, urllib2, cgi +from collections import defaultdict try: import psycopg2 except ImportError: @@ -33,6 +34,24 @@ ] dist_file_types_d = dict(dist_file_types) +# This could have been done with Postgres ENUMs, however +# a) they are not extensible, and +# b) they are not supported in other databases +class dependency: + requires = 1 + provides = 2 + obsoletes = 3 + requires_dist = 4 + provides_dist = 5 + obsoletes_dist = 6 + requires_external = 7 + project_url = 8 + by_val = {} +for k,v in dependency.__dict__.items(): + if not isinstance(v, int): + continue + dependency.by_val[v] = k + keep_conn = False connection = None keep_trove = True @@ -257,7 +276,7 @@ # now see if we're inserting or updating a release message = None - relationships = {} + relationships = defaultdict(set) old_cifiers = [] html = None if self.has_release(name, version): @@ -295,16 +314,17 @@ old.append('classifiers') # get old classifiers list - for col in ('requires', 'provides', 'obsoletes', 'requires_dist', - 'provides_dist', 'obsoletes_dist', - 'requires_external', 'project_url'): - relationships[col] = self.get_release_relationships(name, - version, col) - relationships[col].sort() - new_val = info.get(col, []) - new_val.sort() - if info.has_key(col) and relationships[col] != new_val: - old.append(col) + for kind, specifier in self.get_release_dependencies(name, version): + relationships[kind].add(specifier) + for nkind, skind in dependency.by_val.items(): + # numerical kinds in relationships; string kinds in info + try: + new_val = set(info[skind]) + except KeyError: + # value not provided + continue + if relationships[skind] != new_val: + old.append(skind) # no update when nothing changes if not old: @@ -385,17 +405,15 @@ (name, version, trove_id)) # handle relationship specifiers - for col in ('requires', 'provides', 'obsoletes', 'requires_dist', - 'provides_dist', 'obsoletes_dist', - 'requires_external', 'project_url'): - if not info.has_key(col) or relationships.get(col, []) == info[col]: + for nkind, skind in dependency.by_val.items(): + if not info.has_key(skind) or relationships[nkind] == set(info[skind]): continue - safe_execute(cursor, '''delete from release_%s where name=%%s - and version=%%s'''%col, (name, version)) - for specifier in info[col]: - safe_execute(cursor, '''insert into release_%s (name, version, - specifier) values (%%s, %%s, %%s)'''%col, (name, - version, specifier)) + safe_execute(cursor, '''delete from release_dependencies where name=%s + and version=%s and kind=%s''', (name, version, nkind)) + for specifier in info[skind]: + safe_execute(cursor, '''insert into release_dependencies (name, version, + kind, specifier) values (%s, %s, %s, %s)''', (name, + version, nkind, specifier)) return message @@ -667,10 +685,19 @@ "requires", "provides" or "obsoletes". ''' cursor = self.get_cursor() - safe_execute(cursor, '''select specifier from release_%s where - name=%%s and version=%%s'''%relationship, (name, version)) + safe_execute(cursor, '''select specifier from release_dependencies where + name=%s and version=%s and kind=%s''', (name, version, + getattr(dependency, relationship))) return Result(None, cursor.fetchall(), self._Release_Relationships) + _Release_Dependencies = FastResultRow('kind! specifier') + def get_release_dependencies(self, name, version): + '''Fetch all release dependencies of a release.''' + cursor = self.get_cursor() + safe_execute(cursor, '''select kind, specifier from release_dependencies + where name=%s and version=%s''', (name, version)) + return Result(None, cursor.fetchall(), self._Release_Dependencies) + _Package_Roles = FastResultRow('role_name user_name') def get_package_roles(self, name): ''' Fetch the list of Roles for the package. @@ -703,8 +730,9 @@ def get_package_requires_dist(self, name, version): cursor = self.get_cursor() - safe_execute(cursor, '''select specifier from release_requires_dist - where name=%s and version=%s ''', (name, version)) + safe_execute(cursor, '''select specifier from release_dependencies + where name=%s and version=%s and kind=%s''', (name, version, + dependency.requires_dist)) packages = [] for package in cursor.fetchall(): pack = {'name': package[0], @@ -714,8 +742,9 @@ def get_package_provides_dist(self, name, version): cursor = self.get_cursor() - safe_execute(cursor, '''select specifier from release_provides_dist - where name=%s and version=%s ''', (name, version)) + safe_execute(cursor, '''select specifier from release_dependencies + where name=%s and version=%s and kind=%s''', (name, version, + dependency.provides_dist)) packages = [] for package in cursor.fetchall(): pack = {'name': package[0], @@ -725,8 +754,9 @@ def get_package_obsoletes_dist(self, name, version): cursor = self.get_cursor() - safe_execute(cursor, '''select specifier from release_obsoletes_dist - where name=%s and version=%s ''', (name, version)) + safe_execute(cursor, '''select specifier from release_dependencies + where name=%s and version=%s and kind=%s''', (name, version, + dependency.obsoletes_dist)) packages = [] for package in cursor.fetchall(): pack = {'name': package[0], @@ -736,14 +766,16 @@ def get_package_requires_external(self, name, version): cursor = self.get_cursor() - safe_execute(cursor, '''select specifier from release_requires_external - where name=%s and version=%s ''', (name, version)) + safe_execute(cursor, '''select specifier from release_dependencies + where name=%s and version=%s and kind=%s''', (name, version, + dependency.requires_external)) return [package[0] for package in cursor.fetchall()] def get_package_project_url(self, name, version): cursor = self.get_cursor() - safe_execute(cursor, '''select specifier from release_project_url - where name=%s and version=%s ''', (name, version)) + safe_execute(cursor, '''select specifier from release_dependencies + where name=%s and version=%s and kind=%s''', (name, version, + dependency.project_url)) project_urls = [] for project in cursor.fetchall(): project_urls.append(project[0].split(',')) @@ -969,10 +1001,7 @@ file['filename'])) # delete ancillary table entries - for tab in ('files', 'provides', 'requires', 'obsoletes', - 'classifiers', 'requires_dist', 'provides_dist', - 'obsoletes_dist', 'requires_external', - 'project_url'): + for tab in ('files', 'dependencies', 'classifiers'): safe_execute(cursor, '''delete from release_%s where name=%%s and version=%%s'''%tab, (name, version)) safe_execute(cursor, 'delete from description_urls where name=%s and version=%s', @@ -998,10 +1027,7 @@ file['filename'])) # delete ancillary table entries - for tab in ('files', 'provides', 'requires', 'obsoletes', - 'classifiers', 'requires_dist', 'provides_dist', - 'obsoletes_dist', 'requires_external', - 'project_url'): + for tab in ('files', 'dependencies', 'classifiers'): safe_execute(cursor, 'delete from release_%s where name=%%s'%tab, (name, )) Modified: trunk/pypi/tools/demodata.py ============================================================================== --- trunk/pypi/tools/demodata.py (original) +++ trunk/pypi/tools/demodata.py Sat Aug 14 18:02:34 2010 @@ -48,6 +48,7 @@ 'description':'Does anybody want to provide real data here?', 'classifiers':["Development Status :: 3 - Alpha", "Programming Language :: Python :: 3"], + 'requires_dist':['spam'], '_pypi_hidden':version!='0.4' }) Added: trunk/pypi/tools/sql-migrate-20100814.sql ============================================================================== --- (empty file) +++ trunk/pypi/tools/sql-migrate-20100814.sql Sat Aug 14 18:02:34 2010 @@ -0,0 +1,41 @@ +BEGIN; +CREATE TABLE release_dependencies ( + name TEXT, + version TEXT, + kind INTEGER, + specifier TEXT, + FOREIGN KEY (name, version) REFERENCES releases (name, version) ON UPDATE CASCADE +); +grant all on release_dependencies to pypi; + +insert into release_dependencies(name, version, kind, specifier) + select name, version, 1, specifier from release_requires; +insert into release_dependencies(name, version, kind, specifier) + select name, version, 2, specifier from release_provides; +insert into release_dependencies(name, version, kind, specifier) + select name, version, 3, specifier from release_obsoletes; +insert into release_dependencies(name, version, kind, specifier) + select name, version, 4, specifier from release_requires_dist; +insert into release_dependencies(name, version, kind, specifier) + select name, version, 5, specifier from release_provides_dist; +insert into release_dependencies(name, version, kind, specifier) + select name, version, 6, specifier from release_obsoletes_dist; +insert into release_dependencies(name, version, kind, specifier) + select name, version, 7, specifier from release_requires_external; +insert into release_dependencies(name, version, kind, specifier) + select name, version, 8, specifier from release_project_url; + +CREATE INDEX rel_dep_name_idx ON release_dependencies(name); +CREATE INDEX rel_dep_name_version_idx ON release_dependencies(name, version); +CREATE INDEX rel_dep_name_version_kind_idx ON release_dependencies(name, version, kind); + +drop table release_requires; +drop table release_provides; +drop table release_obsoletes; +drop table release_requires_dist; +drop table release_provides_dist; +drop table release_obsoletes_dist; +drop table release_requires_external; +drop table release_project_url; + +COMMIT; From python-checkins at python.org Sun Aug 15 20:55:51 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sun, 15 Aug 2010 20:55:51 +0200 (CEST) Subject: [Pypi-checkins] r854 - trunk/appengine Message-ID: <20100815185551.8E040D7D7@mail.python.org> Author: martin.von.loewis Date: Sun Aug 15 20:55:51 2010 New Revision: 854 Modified: trunk/appengine/fetch.py Log: Catch urlfetch errors. Modified: trunk/appengine/fetch.py ============================================================================== --- trunk/appengine/fetch.py (original) +++ trunk/appengine/fetch.py Sun Aug 15 20:55:51 2010 @@ -18,6 +18,9 @@ def rpc(): return xmlrpclib.ServerProxy('http://pypi.python.org/pypi') +class StepFailed(Exception): + pass + ################### Transfer ###################################### def simple_page(m, uproject): @@ -31,7 +34,10 @@ obj = m h.putheader('User-Agent', UA) h.endheaders() - r = h.getresponse() + try: + r = h.getresponse() + except DownloadError, e: + raise StepFailed(e) html = r.read() if r.status == 404: if project: @@ -198,7 +204,11 @@ def check_modifications(m, todo): now = int(time.time()) - modified = rpc().changelog(m.last_modified-1) + try: + modified = rpc().changelog(m.last_modified-1) + except DownloadError, e: + logging.warning('changelog call failed: '+str(e)) + return for name, version, date, action in modified: if ('package', name) in todo: continue @@ -232,9 +242,9 @@ action, param = todo[0] try: actions[action](m, todo, param) - except Exception, e: - raise - return str(e) + except StepFailed, e: + logging.warning("Step %s/%s failed: %s" % (action, param, e)) + return "failed" del todo[0] m.todo = pickle.dumps(todo) m.put() From python-checkins at python.org Mon Aug 16 10:25:00 2010 From: python-checkins at python.org (martin.von.loewis) Date: Mon, 16 Aug 2010 10:25:00 +0200 (CEST) Subject: [Pypi-checkins] r855 - in trunk/appengine: . templates Message-ID: <20100816082500.5185DEEA24@mail.python.org> Author: martin.von.loewis Date: Mon Aug 16 10:25:00 2010 New Revision: 855 Added: trunk/appengine/templates/stats.html (contents, props changed) Modified: trunk/appengine/app.yaml trunk/appengine/cron.yaml trunk/appengine/fetch.py trunk/appengine/handlers.py trunk/appengine/mirror.py trunk/appengine/model.py trunk/appengine/stats.py Log: Integrate download stats. Modified: trunk/appengine/app.yaml ============================================================================== --- trunk/appengine/app.yaml (original) +++ trunk/appengine/app.yaml Mon Aug 16 10:25:00 2010 @@ -12,6 +12,10 @@ script: mirror.py login: admin +- url: /daily + script: mirror.py + login: admin + - url: .* script: mirror.py Modified: trunk/appengine/cron.yaml ============================================================================== --- trunk/appengine/cron.yaml (original) +++ trunk/appengine/cron.yaml Mon Aug 16 10:25:00 2010 @@ -1,4 +1,7 @@ cron: -- description: daily summary job +- description: check for updates url: /cron schedule: every 5 minutes +- description: daily summary job + url: /daily + schedule: every 24 hours Modified: trunk/appengine/fetch.py ============================================================================== --- trunk/appengine/fetch.py (original) +++ trunk/appengine/fetch.py Mon Aug 16 10:25:00 2010 @@ -217,11 +217,18 @@ todo.append(('package', '')) todo.append(('last_modified', now)) +############## Statistics ########################## + +def integrate_stats(m): + import stats + stats.integrate() + ############## Queuing ############################# actions = {'package':package, 'file':copy_file, 'last_modified': last_modified, + 'integrate_stats': integrate_stats, } def queue_step(): Modified: trunk/appengine/handlers.py ============================================================================== --- trunk/appengine/handlers.py (original) +++ trunk/appengine/handlers.py Mon Aug 16 10:25:00 2010 @@ -135,12 +135,26 @@ class Stats(webapp.RequestHandler): def get(self, path): - self.response.headers['content-type'] = 'text/plain' - self.response.out.write('not implemented yet') + path = path.rstrip('/') + if not path: + days = [d.day for d in model.Stats.all().fetch(1000) if d.data] + days.sort() + self.response.out.write(template.render(tpl_path('stats.html'), {'days':days})) + return + if not path.endswith('.bz2'): + return self.error(404) + path = path[:-4] + s = model.Stats.all().filter("day = ", path).fetch(1) + if not s or not s[0].data: + return self.error(404) + self.response.headers['content-type'] = 'application/octet-stream' + self.response.out.write(s[0].data) class Step(webapp.RequestHandler): - def get(self): + def get(self, path): self.response.headers['content-type'] = 'text/plain' + if path == '/integrate': + return self.response.out.write(stats.integrate()) self.response.out.write(fetch.step()) post = get @@ -148,3 +162,8 @@ def get(self): self.response.headers['content-type'] = 'text/plain' self.response.out.write(fetch.cron()) + +class Daily(webapp.RequestHandler): + def get(self): + self.response.headers['content-type'] = 'text/plain' + self.response.out.write(stats.integrate()) Modified: trunk/appengine/mirror.py ============================================================================== --- trunk/appengine/mirror.py (original) +++ trunk/appengine/mirror.py Mon Aug 16 10:25:00 2010 @@ -10,7 +10,8 @@ ('/serversig/(.*)', Serversig), ('/local-stats/days/(.*)', Stats), ('/cron', Cron), - ('/step', Step), + ('/daily', Daily), + ('/step(/.*)?', Step), ('/mkupload/(.*)', MkUpload), #('/mkupload2', MkUpload2), ('/upload', Upload), Modified: trunk/appengine/model.py ============================================================================== --- trunk/appengine/model.py (original) +++ trunk/appengine/model.py Mon Aug 16 10:25:00 2010 @@ -64,3 +64,8 @@ name = db.StringProperty() project = db.StringProperty() agent = db.StringProperty() + +class Stats(db.Model): + day = db.StringProperty() + data = db.BlobProperty() + partial = db.BlobProperty() Modified: trunk/appengine/stats.py ============================================================================== --- trunk/appengine/stats.py (original) +++ trunk/appengine/stats.py Mon Aug 16 10:25:00 2010 @@ -1,5 +1,104 @@ -import datetime +# stdlib +import datetime, bz2, csv, re, cStringIO, cPickle +from collections import defaultdict +# GAE +from google.appengine.api.labs import taskqueue +# PyPI +import model + +# list of recognized user agents +SETUPTOOLS_UA = (re.compile((r'^.* setuptools/(?P[0-9]\..*)$')), 'setuptools/%s') +URLLIB_UA = (re.compile(r'^Python-urllib/(?P[23]\.[0-9])$'), 'Python-urllib/%s') +SAFARI_UA = (re.compile(r'^Mozilla.* .* Version/(?P.*) Safari/.*$'), 'Safari/%s') +GOOGLEBOT = (re.compile(r'Googlebot-Mobile/(?P.*);'), 'Googlebot-Mobile/%s') +MSNBOT = (re.compile(r'^msnbot/(?P.*) '), 'msnbot/%s') +FIREFOX_UA = (re.compile(r'^Mozilla.*? Firefox/(?P[23])\..*$'), 'Firefox/%s') +PLAIN_MOZILLA = (re.compile(r'^Mozilla/(?P.*?) '), 'Mozilla/%s') + +def get_simplified_ua(user_agent): + """returns a simplified version of the user agent""" + while user_agent.endswith(',gzip(gfe)'): + user_agent = user_agent[:-len(',gzip(gfe)')] + for expr, repl in (URLLIB_UA, SETUPTOOLS_UA, SAFARI_UA, GOOGLEBOT, + MSNBOT, FIREFOX_UA, PLAIN_MOZILLA): + res = expr.search(user_agent) + if res is not None: + return repl % res.group('version') + return user_agent def today(): now = datetime.datetime.utcnow() return "%s-%.2d-%.2d" % (now.year, now.month, now.day) + +def mkbz2(entries): + downloads = entries.items() + downloads.sort() + output = cStringIO.StringIO() + writer = csv.writer(output) + for (p,n,a),c in downloads: + writer.writerow((p,n,a,c)) + data = bz2.compress(output.getvalue()) + return data + +def unpack(data): + res = {} + reader = csv.reader(cStringIO.StringIO(bz2.decompress(data))) + for project, agent, name, count in reader: + res[project, agent, name] = int(count) + return res + +def integrate_one_day(day): + entries = defaultdict(lambda:0) + old = model.Stats.all().filter('day = ', day).fetch(1) + if old: + old = old[0] + if len(old.partial) > 500000: + # argh. need to make multiple files to fit into Google blob limits + partno = 1 + while model.Stats.all().filter('day = ', '%s.part%d' % (day, partno)).fetch(1): + partno += 1 + old.day = '%s.part%d' % (day, partno) + old.data = old.partial + old.partial = None + old.put() + # enough for now + return + entries.update(unpack(old.partial)) + deletable = [] + todo = model.Download.all().filter('day = ', day).fetch(100) + for download in todo: + agent = get_simplified_ua(download.agent) + key = download.project,download.name,agent + entries[key] += 1 + deletable.append(download) + if len(todo) == 100: + # Partial results. Save them + data = mkbz2(entries) + if old: + old.partial = data + else: + old = model.Stats(day=day, partial=data) + old.put() + for d in deletable: + d.delete() + return + # complete data + data = mkbz2(entries) + if old: + old.data = data + old.partial = None + else: + old = model.Stats(day=day, data=data) + old.put() + for d in deletable: + d.delete() + +def integrate(): + 'Integrate all downloads except for those from today' + # find a day that isn't integrated yet + d = model.Download.all().filter('day != ', today()).fetch(1) + if not d: + return "Done" + integrate_one_day(d[0].day) + taskqueue.add(url='/step/integrate') + return "queued next integration" Added: trunk/appengine/templates/stats.html ============================================================================== --- (empty file) +++ trunk/appengine/templates/stats.html Mon Aug 16 10:25:00 2010 @@ -0,0 +1,10 @@ + + +Index of /local-stats/days + + +{% for day in days %} +{{day}}.bz2
+{% endfor %} + + \ No newline at end of file From python-checkins at python.org Fri Aug 20 16:51:37 2010 From: python-checkins at python.org (martin.von.loewis) Date: Fri, 20 Aug 2010 16:51:37 +0200 (CEST) Subject: [Pypi-checkins] r856 - trunk/pypi/tools Message-ID: <20100820145137.583F2EE9EE@mail.python.org> Author: martin.von.loewis Date: Fri Aug 20 16:51:37 2010 New Revision: 856 Modified: trunk/pypi/tools/apache_stats.py Log: Fix field order in keys. Modified: trunk/pypi/tools/apache_stats.py ============================================================================== --- trunk/pypi/tools/apache_stats.py (original) +++ trunk/pypi/tools/apache_stats.py Fri Aug 20 16:51:37 2010 @@ -8,6 +8,8 @@ from apache_reader import ApacheLogReader +# dictionary key structure: filename, user_agent, package_name + class LocalStats(object): """Base class that writes the log file """ @@ -116,7 +118,7 @@ def read_stats_dict(self, stats_file): res = {} for r in self.read_stats(stats_file): - key = (r['packagename'], r['filename'], r['useragent']) + key = (r['filename'], r['useragent'], r['packagename']) value = r['count'] res[key] = value return res From python-checkins at python.org Fri Aug 20 17:04:49 2010 From: python-checkins at python.org (martin.von.loewis) Date: Fri, 20 Aug 2010 17:04:49 +0200 (CEST) Subject: [Pypi-checkins] r857 - trunk/pypi/tools Message-ID: <20100820150449.BAE76F863@mail.python.org> Author: martin.von.loewis Date: Fri Aug 20 17:04:49 2010 New Revision: 857 Modified: trunk/pypi/tools/integratestats Log: Don't skip last host. Modified: trunk/pypi/tools/integratestats ============================================================================== --- trunk/pypi/tools/integratestats (original) +++ trunk/pypi/tools/integratestats Fri Aug 20 17:04:49 2010 @@ -57,9 +57,9 @@ host = 'b' while True: integrate_remote(sys.argv[1], host) - host = chr(ord(host)+1) if host == last: break + host = chr(ord(host)+1) main() From python-checkins at python.org Sat Aug 21 16:35:45 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 21 Aug 2010 16:35:45 +0200 (CEST) Subject: [Pypi-checkins] r858 - trunk/pypi/tools Message-ID: <20100821143545.0D7D6EE9EB@mail.python.org> Author: martin.von.loewis Date: Sat Aug 21 16:35:44 2010 New Revision: 858 Modified: trunk/pypi/tools/integratestats Log: Fix key orderFix key order. Modified: trunk/pypi/tools/integratestats ============================================================================== --- trunk/pypi/tools/integratestats (original) +++ trunk/pypi/tools/integratestats Sat Aug 21 16:35:44 2010 @@ -14,7 +14,7 @@ dbpass = c.get('database', 'password') dbconn = psycopg2.connect(database=dbname, user=dbuser, password=dbpass) cursor = dbconn.cursor() - for (package, filename, browser), count in data.items(): + for (filename, browser, package), count in data.items(): cursor.execute('update release_files set downloads=downloads+%s where filename=%s', (count, filename)) dbconn.commit() From python-checkins at python.org Sat Aug 21 17:28:41 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 21 Aug 2010 17:28:41 +0200 (CEST) Subject: [Pypi-checkins] r859 - trunk/appengine/templates Message-ID: <20100821152841.B7187EE98B@mail.python.org> Author: martin.von.loewis Date: Sat Aug 21 17:28:41 2010 New Revision: 859 Modified: trunk/appengine/templates/stats.html Log: Generate relative URLs for stats, as this is what integratestats expects. Modified: trunk/appengine/templates/stats.html ============================================================================== --- trunk/appengine/templates/stats.html (original) +++ trunk/appengine/templates/stats.html Sat Aug 21 17:28:41 2010 @@ -4,7 +4,7 @@ {% for day in days %} -{{day}}.bz2
+{{day}}.bz2
{% endfor %} - \ No newline at end of file + From python-checkins at python.org Sat Aug 21 17:31:42 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 21 Aug 2010 17:31:42 +0200 (CEST) Subject: [Pypi-checkins] r860 - trunk/pypi/tools Message-ID: <20100821153142.DDE63EE98B@mail.python.org> Author: martin.von.loewis Date: Sat Aug 21 17:31:42 2010 New Revision: 860 Modified: trunk/pypi/tools/integratestats Log: Add trailing slash to locals-stats/days/ Modified: trunk/pypi/tools/integratestats ============================================================================== --- trunk/pypi/tools/integratestats (original) +++ trunk/pypi/tools/integratestats Sat Aug 21 17:31:42 2010 @@ -21,7 +21,7 @@ dbconn.close() def integrate_remote(config, host, dbupdate=True): - index = urllib.urlopen('http://%s.pypi.python.org/local-stats/days' % host).read() + index = urllib.urlopen('http://%s.pypi.python.org/local-stats/days/' % host).read() files = set(re.findall('href=.(20..-..-..).bz2', index)) try: integrated = open(statsdir+'/integrated/'+host).readlines() From python-checkins at python.org Sat Aug 21 20:21:50 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 21 Aug 2010 20:21:50 +0200 (CEST) Subject: [Pypi-checkins] r861 - trunk/pypi Message-ID: <20100821182150.EFACEEE986@mail.python.org> Author: martin.von.loewis Date: Sat Aug 21 20:21:50 2010 New Revision: 861 Modified: trunk/pypi/gae.py Log: Put quotes around filename Add boundary after last field. Modified: trunk/pypi/gae.py ============================================================================== --- trunk/pypi/gae.py (original) +++ trunk/pypi/gae.py Sat Aug 21 20:21:50 2010 @@ -6,32 +6,31 @@ POST="""\ --%(boundary)s -Content-Disposition: form-data; name=secret +Content-Disposition: form-data; name="secret" %(secret)s --%(boundary)s -Content-Disposition: form-data; name=path +Content-Disposition: form-data; name="path" %(path)s --%(boundary)s -Content-Disposition: form-data; name=file; filename=data.bin +Content-Disposition: form-data; name="file"; filename="%(path)s" +Content-Type: application/octet-stream %(data)s +--%(boundary)s """ -POST = "\r\n".join(POST.splitlines()) +POST = "\r\n".join(POST.splitlines())+"\r\n" -# XXX Not sure how to report errors def doit(host, secret, srcdir): x = urllib2.urlopen('http://%s/mkupload/%s' % (host, secret)) if x.code != 200: - #print "mkupload failed (%s)" % r.code return path,url = x.read().splitlines() host, session = urlparse.urlsplit(url)[1:3] try: data = open(srcdir+"/"+path).read() except IOError, e: - #print "Read of %s failed:%s"%(path,str(e)) return boundary = "" while boundary in data: From python-checkins at python.org Sat Aug 21 20:22:42 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 21 Aug 2010 20:22:42 +0200 (CEST) Subject: [Pypi-checkins] r862 - trunk/pypi/tools Message-ID: <20100821182242.C12A3EE986@mail.python.org> Author: martin.von.loewis Date: Sat Aug 21 20:22:42 2010 New Revision: 862 Modified: trunk/pypi/tools/downloadstats Log: Fix path. Modified: trunk/pypi/tools/downloadstats ============================================================================== --- trunk/pypi/tools/downloadstats (original) +++ trunk/pypi/tools/downloadstats Sat Aug 21 20:22:42 2010 @@ -4,7 +4,7 @@ import sys, os, csv import apache_reader, apache_stats -statsdir = '/data/pypi/local-stats/' +statsdir = '/data/www/pypi/local-stats/' days = set() records = [] From python-checkins at python.org Sun Aug 22 10:36:26 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sun, 22 Aug 2010 10:36:26 +0200 (CEST) Subject: [Pypi-checkins] r863 - trunk/pypi/tools Message-ID: <20100822083626.553C6EEB9A@mail.python.org> Author: martin.von.loewis Date: Sun Aug 22 10:36:26 2010 New Revision: 863 Modified: trunk/pypi/tools/integratestats Log: Also update database for a. Modified: trunk/pypi/tools/integratestats ============================================================================== --- trunk/pypi/tools/integratestats (original) +++ trunk/pypi/tools/integratestats Sun Aug 22 10:36:26 2010 @@ -53,8 +53,7 @@ else: raise ValueError, "Could not properly resolve last mirror name" last = last.split('.')[0] - integrate_remote(None, 'a', False) - host = 'b' + host = 'a' while True: integrate_remote(sys.argv[1], host) if host == last: