[Python-checkins] cpython: Use bytes regex instead of decoding whole pages

Mon Sep 12 17:42:20 CEST 2011

http://hg.python.org/cpython/rev/77df8ab7914b
changeset: 72352:77df8ab7914b
user: Éric Araujo <merwok at netwok.org>
date: Sat Sep 10 18:10:58 2011 +0200
summary:
 Use bytes regex instead of decoding whole pages
files:
 Lib/packaging/pypi/simple.py | 22 ++++++++++------------
 1 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/Lib/packaging/pypi/simple.py b/Lib/packaging/pypi/simple.py
--- a/Lib/packaging/pypi/simple.py
+++ b/Lib/packaging/pypi/simple.py
@@ -159,22 +159,20 @@
 
 Return a list of names.
 """
+ if '*' in name:
+ name.replace('*', '.*')
+ else:
+ name = "%s%s%s" % ('*.?', name, '*.?')
+ name = name.replace('*', '[^<]*') # avoid matching end tag
+ pattern = ('<a[^>]*>(%s)</a>' % name).encode('utf-8')
+ projectname = re.compile(pattern, re.I)
+ matching_projects = []
+
 with self._open_url(self.index_url) as index:
- if '*' in name:
- name.replace('*', '.*')
- else:
- name = "%s%s%s" % ('*.?', name, '*.?')
- name = name.replace('*', '[^<]*') # avoid matching end tag
- projectname = re.compile('<a[^>]*>(%s)</a>' % name, re.I)
- matching_projects = []
-
 index_content = index.read()
 
- # FIXME should use bytes I/O and regexes instead of decoding
- index_content = index_content.decode()
-
 for match in projectname.finditer(index_content):
- project_name = match.group(1)
+ project_name = match.group(1).decode('utf-8')
 matching_projects.append(self._get_project(project_name))
 return matching_projects
 
-- 
Repository URL: http://hg.python.org/cpython