[Python-checkins] python/dist/src/Lib robotparser.py,1.10,1.10.16.1
loewis@users.sourceforge.net
loewis@users.sourceforge.net
Mon, 19 May 2003 23:19:38 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv4496
Modified Files:
Tag: release22-maint
robotparser.py
Log Message:
Patch #499513: use readline() instead of readlines(). Removed the
unnecessary redirection limit code which is already in FancyURLopener.
Backport of 1.12.
Index: robotparser.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/robotparser.py,v
retrieving revision 1.10
retrieving revision 1.10.16.1
diff -C2 -d -r1.10 -r1.10.16.1
*** robotparser.py 13 Aug 2001 14:43:43 -0000 1.10
--- robotparser.py 20 May 2003 06:19:36 -0000 1.10.16.1
***************
*** 5,9 ****
You can choose between two licenses when using this package:
1) GNU GPLv2
! 2) PYTHON 2.0 OPEN SOURCE LICENSE
The robots.txt Exclusion Protocol is implemented as specified in
--- 5,9 ----
You can choose between two licenses when using this package:
1) GNU GPLv2
! 2) PSF license for Python 2.2
The robots.txt Exclusion Protocol is implemented as specified in
***************
*** 42,46 ****
opener = URLopener()
f = opener.open(self.url)
! lines = f.readlines()
self.errcode = opener.errcode
if self.errcode == 401 or self.errcode == 403:
--- 42,50 ----
opener = URLopener()
f = opener.open(self.url)
! lines = []
! line = f.readline()
! while line:
! lines.append(line.strip())
! line = f.readline()
self.errcode = opener.errcode
if self.errcode == 401 or self.errcode == 403:
***************
*** 63,67 ****
for line in lines:
- line = line.strip()
linenumber = linenumber + 1
if not line:
--- 67,70 ----
***************
*** 200,205 ****
apply(urllib.FancyURLopener.__init__, (self,) + args)
self.errcode = 200
- self.tries = 0
- self.maxtries = 10
def http_error_default(self, url, fp, errcode, errmsg, headers):
--- 203,206 ----
***************
*** 207,221 ****
return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
errmsg, headers)
-
- def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
- self.tries += 1
- if self.tries >= self.maxtries:
- return self.http_error_default(url, fp, 500,
- "Internal Server Error: Redirect Recursion",
- headers)
- result = urllib.FancyURLopener.http_error_302(self, url, fp, errcode,
- errmsg, headers, data)
- self.tries = 0
- return result
def _check(a,b):
--- 208,211 ----