[Python-checkins] r42212 - in python/trunk: Lib/difflib.py Lib/test/test_difflib.py Misc/NEWS

gustavo.niemeyer python-checkins at python.org
Tue Jan 31 19:34:18 CET 2006


Author: gustavo.niemeyer
Date: Tue Jan 31 19:34:13 2006
New Revision: 42212

Modified:
   python/trunk/Lib/difflib.py
   python/trunk/Lib/test/test_difflib.py
   python/trunk/Misc/NEWS
Log:
Patch #1413711: Certain patterns of differences were making difflib
touch the recursion limit. The applied patch inlines the recursive
__helper method in a non-recursive way.


Modified: python/trunk/Lib/difflib.py
==============================================================================
--- python/trunk/Lib/difflib.py	(original)
+++ python/trunk/Lib/difflib.py	Tue Jan 31 19:34:13 2006
@@ -473,26 +473,31 @@
 
         if self.matching_blocks is not None:
             return self.matching_blocks
-        self.matching_blocks = []
         la, lb = len(self.a), len(self.b)
-        self.__helper(0, la, 0, lb, self.matching_blocks)
-        self.matching_blocks.append( (la, lb, 0) )
-        return self.matching_blocks
 
-    # builds list of matching blocks covering a[alo:ahi] and
-    # b[blo:bhi], appending them in increasing order to answer
+        indexed_blocks = []
+        queue = [(0, la, 0, lb)]
+        while queue:
+            # builds list of matching blocks covering a[alo:ahi] and
+            # b[blo:bhi], appending them in increasing order to answer
+            alo, ahi, blo, bhi = queue.pop()
+
+            # a[alo:i] vs b[blo:j] unknown
+            # a[i:i+k] same as b[j:j+k]
+            # a[i+k:ahi] vs b[j+k:bhi] unknown
+            i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
+
+            if k:
+                if alo < i and blo < j:
+                    queue.append((alo, i, blo, j))
+                indexed_blocks.append((i, x))
+                if i+k < ahi and j+k < bhi:
+                    queue.append((i+k, ahi, j+k, bhi))
+        indexed_blocks.sort()
 
-    def __helper(self, alo, ahi, blo, bhi, answer):
-        i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
-        # a[alo:i] vs b[blo:j] unknown
-        # a[i:i+k] same as b[j:j+k]
-        # a[i+k:ahi] vs b[j+k:bhi] unknown
-        if k:
-            if alo < i and blo < j:
-                self.__helper(alo, i, blo, j, answer)
-            answer.append(x)
-            if i+k < ahi and j+k < bhi:
-                self.__helper(i+k, ahi, j+k, bhi, answer)
+        self.matching_blocks = [elem[1] for elem in indexed_blocks]
+        self.matching_blocks.append( (la, lb, 0) )
+        return self.matching_blocks
 
     def get_opcodes(self):
         """Return list of 5-tuples describing how to turn a into b.

Modified: python/trunk/Lib/test/test_difflib.py
==============================================================================
--- python/trunk/Lib/test/test_difflib.py	(original)
+++ python/trunk/Lib/test/test_difflib.py	Tue Jan 31 19:34:13 2006
@@ -2,6 +2,7 @@
 from test.test_support import run_unittest, findfile
 import unittest
 import doctest
+import sys
 
 class TestSFbugs(unittest.TestCase):
 
@@ -143,6 +144,14 @@
 
         self.assertEqual(actual,expect)
 
+    def test_recursion_limit(self):
+        # Check if the problem described in patch #1413711 exists.
+        limit = sys.getrecursionlimit()
+        old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
+        new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
+        difflib.SequenceMatcher(None, old, new).get_opcodes()
+
+
 Doctests = doctest.DocTestSuite(difflib)
 
 run_unittest(TestSFpatches, TestSFbugs, Doctests)

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Tue Jan 31 19:34:13 2006
@@ -676,6 +676,9 @@
 
 - ` uu.encode()`` and ``uu.decode()`` now support unicode filenames.
 
+- Patch #1413711: Certain patterns of differences were making difflib
+  touch the recursion limit.
+
 Build
 -----
 


More information about the Python-checkins mailing list