[Python-checkins] bpo-40394 - difflib.SequenceMatched.find_longest_match default args (GH-19742)

lrjball webhook-mailer at python.org
Wed Apr 29 23:42:52 EDT 2020


https://github.com/python/cpython/commit/3209cbd99b6d65aa18b3beb124fac9c792b8993d
commit: 3209cbd99b6d65aa18b3beb124fac9c792b8993d
branch: master
author: lrjball <50599110+lrjball at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2020-04-29T22:42:45-05:00
summary:

bpo-40394 - difflib.SequenceMatched.find_longest_match default args (GH-19742)

* bpo-40394 - difflib.SequenceMatched.find_longest_match default args

Added default args to find_longest_match, as well as related tests.

files:
A Misc/NEWS.d/next/Library/2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst
M Doc/library/difflib.rst
M Lib/difflib.py
M Lib/test/test_difflib.py
M Misc/ACKS

diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst
index ada311bc3a205..7a898c21b52e0 100644
--- a/Doc/library/difflib.rst
+++ b/Doc/library/difflib.rst
@@ -421,7 +421,7 @@ The :class:`SequenceMatcher` class has this constructor:
       is not changed.
 
 
-   .. method:: find_longest_match(alo, ahi, blo, bhi)
+   .. method:: find_longest_match(alo=0, ahi=None, blo=0, bhi=None)
 
       Find longest matching block in ``a[alo:ahi]`` and ``b[blo:bhi]``.
 
@@ -458,6 +458,9 @@ The :class:`SequenceMatcher` class has this constructor:
 
       This method returns a :term:`named tuple` ``Match(a, b, size)``.
 
+      .. versionchanged:: 3.9
+         Added default arguments.
+
 
    .. method:: get_matching_blocks()
 
diff --git a/Lib/difflib.py b/Lib/difflib.py
index f2215d8d4561c..0dda80d387573 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -130,7 +130,7 @@ class SequenceMatcher:
     set_seq2(b)
         Set the second sequence to be compared.
 
-    find_longest_match(alo, ahi, blo, bhi)
+    find_longest_match(alo=0, ahi=None, blo=0, bhi=None)
         Find longest matching block in a[alo:ahi] and b[blo:bhi].
 
     get_matching_blocks()
@@ -334,9 +334,11 @@ def __chain_b(self):
             for elt in popular: # ditto; as fast for 1% deletion
                 del b2j[elt]
 
-    def find_longest_match(self, alo, ahi, blo, bhi):
+    def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None):
         """Find longest matching block in a[alo:ahi] and b[blo:bhi].
 
+        By default it will find the longest match in the entirety of a and b.
+
         If isjunk is not defined:
 
         Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
@@ -391,6 +393,10 @@ def find_longest_match(self, alo, ahi, blo, bhi):
         # the unique 'b's and then matching the first two 'a's.
 
         a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
+        if ahi is None:
+            ahi = len(a)
+        if bhi is None:
+            bhi = len(b)
         besti, bestj, bestsize = alo, blo, 0
         # find longest junk-free match
         # during an iteration of the loop, j2len[j] = length of longest
diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py
index 5e2ca1a23b928..42ac1fdcd81cd 100644
--- a/Lib/test/test_difflib.py
+++ b/Lib/test/test_difflib.py
@@ -501,12 +501,58 @@ def test_is_character_junk_false(self):
         for char in ['a', '#', '\n', '\f', '\r', '\v']:
             self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
 
+class TestFindLongest(unittest.TestCase):
+    def longer_match_exists(self, a, b, n):
+        return any(b_part in a for b_part in
+                   [b[i:i + n + 1] for i in range(0, len(b) - n - 1)])
+
+    def test_default_args(self):
+        a = 'foo bar'
+        b = 'foo baz bar'
+        sm = difflib.SequenceMatcher(a=a, b=b)
+        match = sm.find_longest_match()
+        self.assertEqual(match.a, 0)
+        self.assertEqual(match.b, 0)
+        self.assertEqual(match.size, 6)
+        self.assertEqual(a[match.a: match.a + match.size],
+                         b[match.b: match.b + match.size])
+        self.assertFalse(self.longer_match_exists(a, b, match.size))
+
+        match = sm.find_longest_match(alo=2, blo=4)
+        self.assertEqual(match.a, 3)
+        self.assertEqual(match.b, 7)
+        self.assertEqual(match.size, 4)
+        self.assertEqual(a[match.a: match.a + match.size],
+                         b[match.b: match.b + match.size])
+        self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))
+
+        match = sm.find_longest_match(bhi=5, blo=1)
+        self.assertEqual(match.a, 1)
+        self.assertEqual(match.b, 1)
+        self.assertEqual(match.size, 4)
+        self.assertEqual(a[match.a: match.a + match.size],
+                         b[match.b: match.b + match.size])
+        self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))
+
+    def test_longest_match_with_popular_chars(self):
+        a = 'dabcd'
+        b = 'd'*100 + 'abc' + 'd'*100  # length over 200 so popular used
+        sm = difflib.SequenceMatcher(a=a, b=b)
+        match = sm.find_longest_match(0, len(a), 0, len(b))
+        self.assertEqual(match.a, 0)
+        self.assertEqual(match.b, 99)
+        self.assertEqual(match.size, 5)
+        self.assertEqual(a[match.a: match.a + match.size],
+                         b[match.b: match.b + match.size])
+        self.assertFalse(self.longer_match_exists(a, b, match.size))
+
+
 def test_main():
     difflib.HtmlDiff._default_prefix = 0
     Doctests = doctest.DocTestSuite(difflib)
     run_unittest(
         TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
-        TestOutputFormat, TestBytes, TestJunkAPIs, Doctests)
+        TestOutputFormat, TestBytes, TestJunkAPIs, TestFindLongest, Doctests)
 
 if __name__ == '__main__':
     test_main()
diff --git a/Misc/ACKS b/Misc/ACKS
index 89f37e584ef8b..21822dd7524cf 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -88,6 +88,7 @@ Dwayne Bailey
 Stig Bakken
 Aleksandr Balezin
 Greg Ball
+Lewis Ball
 Luigi Ballabio
 Thomas Ballinger
 Jeff Balogh
diff --git a/Misc/NEWS.d/next/Library/2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst b/Misc/NEWS.d/next/Library/2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst
new file mode 100644
index 0000000000000..ef2e239b1e678
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst
@@ -0,0 +1 @@
+Added default arguments to :meth:`difflib.SequenceMatcher.find_longest_match()`.
\ No newline at end of file



More information about the Python-checkins mailing list