[Python-checkins] distutils2: Better bytes/characters separation for d2.pypi.simple

eric.araujo python-checkins at python.org
Mon Sep 19 15:12:39 CEST 2011


http://hg.python.org/distutils2/rev/3c0df18caa89
changeset:   1177:3c0df18caa89
user:        Éric Araujo <merwok at netwok.org>
date:        Mon Sep 19 04:57:47 2011 +0200
summary:
  Better bytes/characters separation for d2.pypi.simple

files:
  distutils2/pypi/simple.py |  22 ++++++++++------------
  1 files changed, 10 insertions(+), 12 deletions(-)


diff --git a/distutils2/pypi/simple.py b/distutils2/pypi/simple.py
--- a/distutils2/pypi/simple.py
+++ b/distutils2/pypi/simple.py
@@ -157,25 +157,23 @@
 
         Return a list of names.
         """
+        if u'*' in name:
+            name.replace(u'*', u'.*')
+        else:
+            name = u"%s%s%s" % (u'*.?', name, u'*.?')
+        name = name.replace(u'*', u'[^<]*')  # avoid matching end tag
+        pattern = (u'<a[^>]*>(%s)</a>' % name).encode('utf-8')
+        projectname = re.compile(pattern, re.I)
+        matching_projects = []
+
         index = self._open_url(self.index_url)
         try:
-            if '*' in name:
-                name.replace('*', '.*')
-            else:
-                name = "%s%s%s" % ('*.?', name, '*.?')
-            name = name.replace('*', '[^<]*')  # avoid matching end tag
-            projectname = re.compile('<a[^>]*>(%s)</a>' % name, re.I)
-            matching_projects = []
-
             index_content = index.read()
         finally:
             index.close()
 
-        # FIXME should use bytes I/O and regexes instead of decoding
-        index_content = index_content.decode()
-
         for match in projectname.finditer(index_content):
-            project_name = match.group(1)
+            project_name = match.group(1).decode('utf-8')
             matching_projects.append(self._get_project(project_name))
         return matching_projects
 

-- 
Repository URL: http://hg.python.org/distutils2


More information about the Python-checkins mailing list