[Python-checkins] cpython (3.6): Issue #28151: Use pythontest.net in test_robotparser

berker.peksag python-checkins at python.org
Sun Sep 18 04:21:22 EDT 2016


https://hg.python.org/cpython/rev/83bca958adc9
changeset:   103913:83bca958adc9
branch:      3.6
parent:      103911:d4c7e217e7a8
user:        Berker Peksag <berker.peksag at gmail.com>
date:        Sun Sep 18 11:21:57 2016 +0300
summary:
  Issue #28151: Use pythontest.net in test_robotparser

files:
  Lib/test/test_robotparser.py |  43 ++++++++++++++++++++---
  1 files changed, 36 insertions(+), 7 deletions(-)


diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -1,4 +1,5 @@
 import io
+import os
 import unittest
 import urllib.robotparser
 from collections import namedtuple
@@ -272,14 +273,42 @@
 
 class NetworkTestCase(unittest.TestCase):
 
-    def testPythonOrg(self):
+    base_url = 'http://www.pythontest.net/'
+    robots_txt = '{}elsewhere/robots.txt'.format(base_url)
+
+    @classmethod
+    def setUpClass(cls):
         support.requires('network')
-        with support.transient_internet('www.python.org'):
-            parser = urllib.robotparser.RobotFileParser(
-                "http://www.python.org/robots.txt")
-            parser.read()
-            self.assertTrue(
-                parser.can_fetch("*", "http://www.python.org/robots.txt"))
+        with support.transient_internet(cls.base_url):
+            cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt)
+            cls.parser.read()
+
+    def url(self, path):
+        return '{}{}{}'.format(
+            self.base_url, path, '/' if not os.path.splitext(path)[1] else ''
+        )
+
+    def test_basic(self):
+        self.assertFalse(self.parser.disallow_all)
+        self.assertFalse(self.parser.allow_all)
+        self.assertGreater(self.parser.mtime(), 0)
+        self.assertFalse(self.parser.crawl_delay('*'))
+        self.assertFalse(self.parser.request_rate('*'))
+
+    def test_can_fetch(self):
+        self.assertTrue(self.parser.can_fetch('*', self.url('elsewhere')))
+        self.assertFalse(self.parser.can_fetch('Nutch', self.base_url))
+        self.assertFalse(self.parser.can_fetch('Nutch', self.url('brian')))
+        self.assertFalse(self.parser.can_fetch('Nutch', self.url('webstats')))
+        self.assertFalse(self.parser.can_fetch('*', self.url('webstats')))
+        self.assertTrue(self.parser.can_fetch('*', self.base_url))
+
+    def test_read_404(self):
+        parser = urllib.robotparser.RobotFileParser(self.url('i-robot.txt'))
+        parser.read()
+        self.assertTrue(parser.allow_all)
+        self.assertFalse(parser.disallow_all)
+        self.assertEqual(parser.mtime(), 0)
 
 if __name__=='__main__':
     unittest.main()

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list