[Python-checkins] python/dist/src/Lib difflib.py,1.11,1.12

Sun, 08 Jun 2003 04:07:10 -0700

Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv23394

Modified Files:
	difflib.py 
Log Message:
Added functions for creating context diffs and unified diffs.

Documentation update and NEWS item are forthcoming.

Index: difflib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/difflib.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** difflib.py	30 Oct 2002 06:09:58 -0000	1.11
--- difflib.py	8 Jun 2003 11:07:08 -0000	1.12
***************
*** 7,10 ****
--- 7,13 ----
      Use SequenceMatcher to return list of the best "good enough" matches.

+ Function context_diff(a, b):
+     For two lists of strings, return a delta in context diff format.
+ 
  Function ndiff(a, b):
      Return a delta: the difference between `a` and `b` (lists of strings).
***************
*** 13,16 ****
--- 16,22 ----
      Return one of the two sequences that generated an ndiff delta.

+ Function unified_diff(a, b):
+     For two lists of strings, return a delta in unified diff format.
+ 
  Class SequenceMatcher:
      A flexible class for comparing pairs of sequences of any type.
***************
*** 21,25 ****

  __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
!            'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK']

  class SequenceMatcher:
--- 27,32 ----

  __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
!            'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
!            'unified_diff']

  class SequenceMatcher:
***************
*** 533,536 ****
--- 540,591 ----
          return answer

+     def get_grouped_opcodes(self, n=3):
+         """ Isolate change clusters by eliminating ranges with no changes.
+ 
+         Return a generator of groups with upto n lines of context.
+         Each group is in the same format as returned by get_opcodes().
+ 
+         >>> from pprint import pprint
+         >>> a = map(str, range(1,40))
+         >>> b = a[:]
+         >>> b[8:8] = ['i']     # Make an insertion
+         >>> b[20] += 'x'       # Make a replacement
+         >>> b[23:28] = []      # Make a deletion
+         >>> b[30] += 'y'       # Make another replacement
+         >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
+         [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
+          [('equal', 16, 19, 17, 20),
+           ('replace', 19, 20, 20, 21),
+           ('equal', 20, 22, 21, 23),
+           ('delete', 22, 27, 23, 23),
+           ('equal', 27, 30, 23, 26)],
+          [('equal', 31, 34, 27, 30),
+           ('replace', 34, 35, 30, 31),
+           ('equal', 35, 38, 31, 34)]]
+         """
+ 
+         codes = self.get_opcodes()
+         # Fixup leading and trailing groups if they show no changes.
+         if codes[0][0] == 'equal':
+             tag, i1, i2, j1, j2 = codes[0]
+             codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
+         if codes[-1][0] == 'equal':
+             tag, i1, i2, j1, j2 = codes[-1]
+             codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
+ 
+         nn = n + n
+         group = []
+         for tag, i1, i2, j1, j2 in codes:
+             # End the current group and start a new one whenever
+             # there is a large range with no changes.
+             if tag == 'equal' and i2-i1 > nn:
+                 group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
+                 yield group
+                 group = []
+                 i1, j1 = max(i1, i2-n), max(j1, j2-n)
+             group.append((tag, i1, i2, j1 ,j2))
+         if group and not (len(group)==1 and group[0][0] == 'equal'):
+             yield group
+ 
      def ratio(self):
          """Return a measure of the sequences' similarity (float in [0,1]).
***************
*** 1042,1045 ****
--- 1097,1244 ----

  del re
+ 
+ 
+ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+                  tofiledate='', n=3, lineterm='\n'):
+     r"""
+     Compare two sequences of lines; generate the delta as a unified diff.
+ 
+     Unified diffs are a compact way of showing line changes and a few
+     lines of context.  The number of context lines is set by 'n' which
+     defaults to three.
+ 
+     By default, the diff control lines (those with *** or ---) are
+     created with a trailing newline.  This is helpful so that inputs
+     created from file.readlines() result in diffs that are suitable for
+     file.writelines() since both the inputs and outputs have trailing
+     newlines.
+ 
+     For inputs that do not have trailing newlines, set the lineterm
+     argument to "" so that the output will be uniformly newline free.
+ 
+     The unidiff format normally has a header for filenames and modification
+     times.  Any or all of these may be specified using strings for
+     'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
+     times are normally expressed in the format returned by time.ctime().
+ 
+     Example:
+ 
+     >>> for line in unified_diff('one two three four'.split(),
+     ...             'zero one tree four'.split(), 'Original', 'Current',
+     ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
+     ...             lineterm=''):
+     ...     print line
+     --- Original Sat Jan 26 23:30:50 1991
+     +++ Current Fri Jun 06 10:20:52 2003
+     @@ -1,4 +1,4 @@
+     +zero
+      one
+     -two
+     -three
+     +tree
+      four
+     """
+ 
+     started = False
+     for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+         if not started:
+             yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
+             yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
+             started = True
+         i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+         yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
+         for tag, i1, i2, j1, j2 in group:
+             if tag == 'equal':
+                 for line in a[i1:i2]:
+                     yield ' ' + line
+                 continue
+             if tag == 'replace' or tag == 'delete':
+                 for line in a[i1:i2]:
+                     yield '-' + line
+             if tag == 'replace' or tag == 'insert':
+                 for line in b[j1:j2]:
+                     yield '+' + line
+ 
+ # See http://www.unix.org/single_unix_specification/
+ def context_diff(a, b, fromfile='', tofile='',
+                  fromfiledate='', tofiledate='', n=3, lineterm='\n'):
+     r"""
+     Compare two sequences of lines; generate the delta as a context diff.
+ 
+     Context diffs are a compact way of showing line changes and a few
+     lines of context.  The number of context lines is set by 'n' which
+     defaults to three.
+ 
+     By default, the diff control lines (those with *** or ---) are
+     created with a trailing newline.  This is helpful so that inputs
+     created from file.readlines() result in diffs that are suitable for
+     file.writelines() since both the inputs and outputs have trailing
+     newlines.
+ 
+     For inputs that do not have trailing newlines, set the lineterm
+     argument to "" so that the output will be uniformly newline free.
+ 
+     The context diff format normally has a header for filenames and
+     modification times.  Any or all of these may be specified using
+     strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+     The modification times are normally expressed in the format returned
+     by time.ctime().  If not specified, the strings default to blanks.
+ 
+     Example:
+ 
+     >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
+     ...       'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current',
+     ...       'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')),
+     *** Original Sat Jan 26 23:30:50 1991
+     --- Current Fri Jun 06 10:22:46 2003
+     ***************
+     *** 1,4 ****
+       one
+     ! two
+     ! three
+       four
+     --- 1,4 ----
+     + zero
+       one
+     ! tree
+       four
+     """
+ 
+     started = False
+     prefixmap = dict(insert='+ ', delete='- ', replace='! ', equal='  ')
+     for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+         if not started:
+             yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm)
+             yield '--- %s %s%s' % (tofile, tofiledate, lineterm)
+             started = True
+         yield '***************%s' % (lineterm,)
+         if group[-1][2] - group[0][1] >= 2:
+             yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm)
+         else:
+             yield '*** %d ****%s' % (group[-1][2], lineterm)
+         empty = True
+         for tag, i1, i2, j1, j2 in group:
+             if tag == 'replace' or tag == 'delete':
+                 empty = False
+                 break
+         if not empty:
+             for tag, i1, i2, j1, j2 in group:
+                 if tag != 'insert':
+                     for line in a[i1:i2]:
+                         yield prefixmap[tag] + line
+         if group[-1][4] - group[0][3] >= 2:
+             yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm)
+         else:
+             yield '--- %d ----%s' % (group[-1][4], lineterm)
+         empty = True
+         for tag, i1, i2, j1, j2 in group:
+             if tag == 'replace' or tag == 'insert':
+                 empty = False
+                 break
+         if not empty:
+             for tag, i1, i2, j1, j2 in group:
+                 if tag != 'delete':
+                     for line in b[j1:j2]:
+                         yield prefixmap[tag] + line

  def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):