From python-checkins at python.org Sat Oct 9 15:50:12 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 9 Oct 2010 15:50:12 +0200 (CEST) Subject: [Pypi-checkins] r875 - branches/egginfo Message-ID: <20101009135012.29BA7EE98D@mail.python.org> Author: martin.von.loewis Date: Sat Oct 9 15:50:12 2010 New Revision: 875 Added: branches/egginfo/ - copied from r874, trunk/pypi/ Log: Record rejecte egginfo feature From python-checkins at python.org Sat Oct 9 15:51:36 2010 From: python-checkins at python.org (martin.von.loewis) Date: Sat, 9 Oct 2010 15:51:36 +0200 (CEST) Subject: [Pypi-checkins] r876 - branches/egginfo Message-ID: <20101009135136.31285EE98D@mail.python.org> Author: martin.von.loewis Date: Sat Oct 9 15:51:36 2010 New Revision: 876 Added: branches/egginfo/egg_info.py Modified: branches/egginfo/config.py branches/egginfo/store.py branches/egginfo/webui.py Log: Provide egg_info as extracted files, per release. Modified: branches/egginfo/config.py ============================================================================== --- branches/egginfo/config.py (original) +++ branches/egginfo/config.py Sat Oct 9 15:51:36 2010 @@ -18,6 +18,7 @@ self.database_pw = None self.database_files_dir = c.get('database', 'files_dir') self.database_docs_dir = c.get('database', 'docs_dir') + self.database_egg_info_dir = c.get('database', 'egg_info_dir') if c.has_option('database', 'pubsubhubbub'): self.pubsubhubbub = c.get('database', 'pubsubhubbub') else: Added: branches/egginfo/egg_info.py ============================================================================== --- (empty file) +++ branches/egginfo/egg_info.py Sat Oct 9 15:51:36 2010 @@ -0,0 +1,85 @@ +from __future__ import with_statement +import zipfile, os, subprocess + +def magic(content): + 'Run file on some data.' + fn = os.tmpnam() + with open(fn, 'w') as f: + f.write(content) + p = subprocess.Popen(['/usr/bin/file', '-b', '--mime-type', fn], stdout=subprocess.PIPE) + res = p.stdout.read().strip() + p.wait() + os.unlink(fn) + return res + +good_magic = set(('text/plain', 'application/octet-stream')) +def check_egg_info(egg_info): + 'Validate that all filenames and contents are good; raise ValueError if not.' + for name, data in egg_info: + if not re.match('^[a-zA-Z0-9. _-]+$', name): + raise ValueError, "invalid filename "+repr(name) + if magic(data) not in good_magic: + raise ValueError, "%s has bad mime type (%s)" % (name, data) + + +def egg_info_zip(contents): + contents = zipfile.ZipFile(contents) + data = [] + for name in contents.namelist(): + if name.startswith('EGG-INFO/'): + if name.endswith('/'): + # skip directories + continue + basename = name[len('EGG-INFO/'):] + if '/' in basename: + # skip files in subdirectories + continue + data.append((basename, contents.read(name))) + return data + +def egg_info_tar(contents): + data = [] + for member in contents: + pos = member.name.find('.egg-info/') + if pos == -1: + continue + basename = member.name[pos+len('.egg-info/'):] + data.append((basename, contents.extractfile(member).read())) + return data + + +def get_egg_info(filename, contents): + # Extract information of a single egg into the file system + if filename.endswith('.egg') or filename.endswith('.zip'): + return egg_info_zip(contents) + if filename.endswith('.tar.gz') or filename.endswith('.tgz'): + return egg_info_tar(tarfile.TarFile.gzopen(contents)) + if filename.endswith('.tar.bz2'): + return egg_info_tar(tarfile.TarFile.bz2open(contents)) + return None + +def extract_eggs(store): + # Insert information from oldest egg for each release having one. + c = store.get_cursor() + c.execute("select name, version from release_files where packagetype='bdist_egg' " + "group by name,version order by name, version") + for name, version in c.fetchall(): + if os.path.exists(store.egg_info_dir(name, version)): + continue + c.execute("select filename, python_version from release_files " + "where name=%s and version=%s and packagetype='bdist_egg' " + "order by upload_time limit 1", (name, version)) + f = c.fetchone() + c.abort() # don't keep records locked + if not f: + continue + filename, pyversion = f + data = get_egg_info(filename, open(store.gen_file_path(pyversion, name, filename))) + store.check_egg_info(data) + store.store_egg_info(package, version, data, do_journal=False) + +if __name__=='__main__': + import sys, store, config + c = config.Config('config.ini') + s = store.Store(c) + extract_eggs(s) Modified: branches/egginfo/store.py ============================================================================== --- branches/egginfo/store.py (original) +++ branches/egginfo/store.py Sat Oct 9 15:51:36 2010 @@ -417,6 +417,34 @@ return message + def egg_info_dir(self, name, version): + return os.path.join(self.config.database_egg_info_dir, name[0], name, version) + + def store_egg_info(self, name, version, egg_info, do_journal=True): + '''Store the egg_info files on disk.''' + cursor = self.get_cursor() + base = self.config.database_egg_info_dir + for p in ('', name[0], name, version): + base = os.path.join(base, p) + if not os.path.exists(base): + os.mkdir(base, 0755) + # remove existing files + for p in os.listdir(base): + os.unlink(os.path.join(base, p)) + # create new files + for p, data in egg_info: + with open(os.path.join(base, p), 'w') as f: + f.write(data) + + if not do_journal: + return + + date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) + safe_execute(cursor, '''insert into journals (name, version, action, + submitted_date, submitted_by, submitted_from) values + (%s, %s, %s, %s, %s, %s)''', (name, version, 'egg_info', date, + self.username, self.userip)) + def fix_ordering(self, name, new_version=None): ''' Fix the _pypi_ordering column for a package's releases. @@ -1007,6 +1035,13 @@ os.remove(self.gen_file_path(file['python_version'], name, file['filename'])) + # also delete egg_info + base = os.path.join(self.config.database_egg_info_dir, name[0], name, version) + if os.path.exists(base): + for file in os.listdir(base): + os.unlink(os.path.join(base, file)) + os.rmdir(base) + # delete ancillary table entries for tab in ('files', 'dependencies', 'classifiers'): safe_execute(cursor, '''delete from release_%s where @@ -1033,6 +1068,11 @@ os.remove(self.gen_file_path(file['python_version'], name, file['filename'])) + # delete egg_info + base = os.path.join(self.config.database_egg_info_dir, name[0], name) + if os.path.exists(base): + shutil.rmtree(base) + # delete ancillary table entries for tab in ('files', 'dependencies', 'classifiers'): safe_execute(cursor, 'delete from release_%s where name=%%s'%tab, Modified: branches/egginfo/webui.py ============================================================================== --- branches/egginfo/webui.py (original) +++ branches/egginfo/webui.py Sat Oct 9 15:51:36 2010 @@ -28,7 +28,7 @@ # local imports import store, config, versionpredicate, verify_filetype, rpc -import MailingLogger, openid2rp, gae +import MailingLogger, openid2rp, gae, egg_info from mini_pkg_resources import safe_name esc = cgi.escape @@ -563,7 +563,7 @@ password_reset role role_form list_classifiers login logout files file_upload show_md5 doc_upload claim openid openid_return dropid rate comment addcomment delcomment clear_auth addkey delkey lasthour - json gae_file'''.split(): + json gae_file egg_info_upload'''.split(): getattr(self, action)() else: #raise NotFound, 'Unknown action %s' % action @@ -2208,23 +2208,14 @@ self.handler.end_headers() self.wfile.write(digest) - CURRENT_UPLOAD_PROTOCOL = "1" - def file_upload(self, response=True): + def is_upload_ok(self): + '''Verify that the user is authenticated and authorized to post to + a specific release. Return name, version if successful; raise an + exception if it is not.''' # make sure the user is identified if not self.authenticated: raise Unauthorised, \ - "You must be identified to edit package information" - - # Verify protocol version - if self.form.has_key('protocol_version'): - protocol_version = self.form['protocol_version'] - else: - protocol_version = self.CURRENT_UPLOAD_PROTOCOL - - if protocol_version!=self.CURRENT_UPLOAD_PROTOCOL: - # If a new protocol version is added, backward compatibility - # with old distutils upload commands needs to be preserved - raise NotImplementedError, "Unsupported file upload protocol" + "You must be identified to edit package information" # figure the package name and version name = version = None @@ -2242,6 +2233,23 @@ raise Forbidden, \ "You are not allowed to edit '%s' package information"%name + return name, version + + CURRENT_UPLOAD_PROTOCOL = "1" + def file_upload(self, response=True): + name, version = self.is_upload_ok() + + # Verify protocol version + if self.form.has_key('protocol_version'): + protocol_version = self.form['protocol_version'] + else: + protocol_version = self.CURRENT_UPLOAD_PROTOCOL + + if protocol_version!=self.CURRENT_UPLOAD_PROTOCOL: + # If a new protocol version is added, backward compatibility + # with old distutils upload commands needs to be preserved + raise NotImplementedError, "Unsupported file upload protocol" + # verify the release exists if not self.store.has_release(name, version): # auto-register the release... @@ -2348,8 +2356,21 @@ m.hexdigest())''') return + e_i = None + if filetype == 'bdist_egg' and \ + not os.path.exists(self.store.egg_info_dir(name, version)): + try: + e_i = egg_info.get_egg_info(filename, cStringIO.StringIO(content)) + egg_info.check_egg_info(e_i) + except Exception: + # there is something wrong with the egg. + # just don't extract the info from it + e_i = None + self.store.add_file(name, version, content, md5_digest, filetype, pyversion, comment, filename, signature) + if e_i: + self.store.store_egg_info(name, version, e_i) self.store.changed() if response: @@ -2358,28 +2379,30 @@ self.handler.end_headers() self.wfile.write('OK\n') + def egg_info_upload(self): + name, version = self.is_upload_ok() + + if not self.store.has_release(name, version): + raise FormError, "Release does not exist" + + egg_info = [] + for v in self.form['egg_info']: + egg_info.append((v.filename, v.value)) + + egg_info.check_egg_info(egg_info) # raises exception on failure + self.store.store_egg_info(name, version, egg_info) + self.store.changed() + + self.handler.send_response(200, 'OK') + self.handler.set_content_type('text/plain') + self.handler.end_headers() + self.wfile.write('OK\n') + # # Documentation Upload # def doc_upload(self): - # make sure the user is identified - if not self.authenticated: - raise Unauthorised, \ - "You must be identified to edit package information" - - # figure the package name and version - name = version = None - if self.form.has_key('name'): - name = self.form['name'] - if not name: - raise FormError, 'No package name given' - - # make sure the user has permission to do stuff - if not (self.store.has_role('Owner', name) or - self.store.has_role('Admin', name) or - self.store.has_role('Maintainer', name)): - raise Forbidden, \ - "You are not allowed to edit '%s' package information"%name + name, version = self.is_upload_ok() if not self.form.has_key('content'): raise FormError, "No file uploaded"