[Patches] Unicode Patch Set 2000-04-11

M.-A. Lemburg mal@lemburg.com
Tue, 11 Apr 2000 12:03:09 +0200


This is a multi-part message in MIME format.
--------------16779387E85086A13F2E7D09
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

This patch changes the semantics of the .splitlines() argument.
This method previously had an argument maxsplit which worked
just like the .split() method's argument of the same name.
For line splitting, maxsplit doesn't make much sense, so instead,
the argument is now called "keepends" and defaults to 0. When 
given and true, line end markers are included in the list entries.

I hope its not too late for this change: .splitlines() isn't
documented anywhere and probably hasn't had too much exposure 
yet -- besides, this is alpha software ;-)

BTW, The change was needed in order to make file .readline() et al. 
methods work with arbitrary string-like objects.


Patch Set Contents:
-------------------

Include/unicodeobject.h:

Changed PyUnicode_Splitlines() maxsplit argument to keepends.
The maxsplit functionality was replaced by the keepends
functionality which allows keeping the line end markers together
with the string.

Objects/stringobject.c:

The maxsplit functionality in .splitlines() was replaced by the keepends
functionality which allows keeping the line end markers together
with the string.

Added support for '%r' % obj: this inserts repr(obj) rather
than str(obj).

Objects/unicodectype.c:

Added a few missing whitespace Unicode char mappings.
Thanks to Brian Hooper.

Objects/unicodeobject.c:

The maxsplit functionality in .splitlines() was replaced by the keepends
functionality which allows keeping the line end markers together
with the string.

Python/bltinmodule.c:

Added special case to unicode(): when being passed a
Unicode object as first argument, return the object as-is.
Raises an exception when given a Unicode object *and* an
encoding name.

Lib/UserString.py:

The maxsplit functionality in .splitlines() was replaced by the keepends
functionality which allows keeping the line end markers together
with the string.

Lib/codecs.py:

Added .writelines(), .readlines() and .readline() to all
codec classes.

Added encoding name attributes to wrapper classes which
allow applications to check the used encoding names.

Lib/test/test_string.py:

Modified .splitlines() tests according to the changes
in stringobject.c.

Lib/test/test_unicode.py:

Modified .splitlines() tests according to the changes
in unicodeobject.c.

-- 
Marc-Andre Lemburg
______________________________________________________________________
Business:                                      http://www.lemburg.com/
Python Pages:                           http://www.lemburg.com/python/
--------------16779387E85086A13F2E7D09
Content-Type: text/plain; charset=us-ascii;
 name="Unicode-Implementation-2000-04-11.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="Unicode-Implementation-2000-04-11.patch"

diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.py CVS-Python/Include/unicodeobject.h Python+Unicode/Include/unicodeobject.h
--- CVS-Python/Include/unicodeobject.h	Mon Apr 10 15:41:41 2000
+++ Python+Unicode/Include/unicodeobject.h	Mon Apr 10 21:23:21 2000
@@ -674,7 +674,7 @@
     
 extern DL_IMPORT(PyObject*) PyUnicode_Splitlines(
     PyObject *s,		/* String to split */
-    int maxsplit		/* Maxsplit count */
+    int keepends		/* If true, line end markers are included */
     );		
 
 /* Translate a string by applying a character mapping table to it and
Only in CVS-Python/Lib/test/output: test_zipfile
diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.py CVS-Python/Objects/stringobject.c Python+Unicode/Objects/stringobject.c
--- CVS-Python/Objects/stringobject.c	Mon Apr 10 15:47:21 2000
+++ Python+Unicode/Objects/stringobject.c	Tue Apr 11 11:20:24 2000
@@ -2072,11 +2072,11 @@
 
 
 static char splitlines__doc__[] =
-"S.splitlines([maxsplit]]) -> list of strings\n\
+"S.splitlines([keepends]]) -> list of strings\n\
 \n\
 Return a list of the lines in S, breaking at line boundaries.\n\
-If maxsplit is given, at most maxsplit are done. Line breaks are not\n\
-included in the resulting list.";
+Line breaks are not included in the resulting list unless keepends\n\
+is given and true.";
 
 #define SPLIT_APPEND(data, left, right)					\
 	str = PyString_FromStringAndSize(data + left, right - left);	\
@@ -2092,43 +2092,43 @@
 static PyObject*
 string_splitlines(PyStringObject *self, PyObject *args)
 {
-    int maxcount = -1;
     register int i;
     register int j;
     int len;
+    int keepends = 0;
     PyObject *list;
     PyObject *str;
     char *data;
 
-    if (!PyArg_ParseTuple(args, "|i:splitlines", &maxcount))
+    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
         return NULL;
 
     data = PyString_AS_STRING(self);
     len = PyString_GET_SIZE(self);
 
-    if (maxcount < 0)
-        maxcount = INT_MAX;
-
     list = PyList_New(0);
     if (!list)
         goto onError;
 
     for (i = j = 0; i < len; ) {
+	int eol;
+
 	/* Find a line and append it */
 	while (i < len && data[i] != '\n' && data[i] != '\r')
 	    i++;
-	if (maxcount-- <= 0)
-	    break;
-	SPLIT_APPEND(data, j, i);
 
 	/* Skip the line break reading CRLF as one line break */
+	eol = i;
 	if (i < len) {
 	    if (data[i] == '\r' && i + 1 < len &&
 		data[i+1] == '\n')
 		i += 2;
 	    else
 		i++;
+	    if (keepends)
+		eol = i;
 	}
+	SPLIT_APPEND(data, j, eol);
 	j = i;
     }
     if (j < len) {
@@ -2591,7 +2591,10 @@
 					fmt = fmt_start;
 					goto unicode;
 				}
+				if (c == 's')
 				temp = PyObject_Str(v);
+				else
+					temp = PyObject_Repr(v);
 				if (temp == NULL)
 					goto error;
 				if (!PyString_Check(temp)) {
diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.py CVS-Python/Objects/unicodectype.c Python+Unicode/Objects/unicodectype.c
--- CVS-Python/Objects/unicodectype.c	Fri Mar 10 23:52:46 2000
+++ Python+Unicode/Objects/unicodectype.c	Tue Apr 11 10:40:08 2000
@@ -633,8 +633,8 @@
 
 #ifndef WANT_WCTYPE_FUNCTIONS
 
-/* Returns 1 for Unicode characters having the type 'WS', 'B' or 'S',
-   0 otherwise. */
+/* Returns 1 for Unicode characters having the bidirectional type
+   'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
 
 int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
 {
@@ -649,6 +649,8 @@
     case 0x001E: /* RECORD SEPARATOR */
     case 0x001F: /* UNIT SEPARATOR */
     case 0x0020: /* SPACE */
+    case 0x0085: /* NEXT LINE */
+    case 0x00A0: /* NO-BREAK SPACE */
     case 0x1680: /* OGHAM SPACE MARK */
     case 0x2000: /* EN QUAD */
     case 0x2001: /* EM QUAD */
@@ -661,7 +663,9 @@
     case 0x2008: /* PUNCTUATION SPACE */
     case 0x2009: /* THIN SPACE */
     case 0x200A: /* HAIR SPACE */
+    case 0x200B: /* ZERO WIDTH SPACE */
     case 0x2028: /* LINE SEPARATOR */
+    case 0x2029: /* PARAGRAPH SEPARATOR */
     case 0x202F: /* NARROW NO-BREAK SPACE */
     case 0x3000: /* IDEOGRAPHIC SPACE */
 	return 1;
diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.py CVS-Python/Objects/unicodeobject.c Python+Unicode/Objects/unicodeobject.c
--- CVS-Python/Objects/unicodeobject.c	Mon Apr 10 15:51:10 2000
+++ Python+Unicode/Objects/unicodeobject.c	Tue Apr 11 11:47:03 2000
@@ -2516,7 +2516,7 @@
 }
 
 PyObject *PyUnicode_Splitlines(PyObject *string,
-			       int maxcount)
+			       int keepends)
 {
     register int i;
     register int j;
@@ -2531,29 +2531,29 @@
     data = PyUnicode_AS_UNICODE(string);
     len = PyUnicode_GET_SIZE(string);
 
-    if (maxcount < 0)
-        maxcount = INT_MAX;
-
     list = PyList_New(0);
     if (!list)
         goto onError;
 
     for (i = j = 0; i < len; ) {
+	int eol;
+	
 	/* Find a line and append it */
 	while (i < len && !Py_UNICODE_ISLINEBREAK(data[i]))
 	    i++;
-	if (maxcount-- <= 0)
-	    break;
-	SPLIT_APPEND(data, j, i);
 
 	/* Skip the line break reading CRLF as one line break */
+	eol = i;
 	if (i < len) {
 	    if (data[i] == '\r' && i + 1 < len &&
 		data[i+1] == '\n')
 		i += 2;
 	    else
 		i++;
+	    if (keepends)
+		eol = i;
 	}
+	SPLIT_APPEND(data, j, eol);
 	j = i;
     }
     if (j < len) {
@@ -3785,21 +3785,21 @@
 }
 
 static char splitlines__doc__[] =
-"S.splitlines([maxsplit]]) -> list of strings\n\
+"S.splitlines([keepends]]) -> list of strings\n\
 \n\
 Return a list of the lines in S, breaking at line boundaries.\n\
-If maxsplit is given, at most maxsplit are done. Line breaks are not\n\
-included in the resulting list.";
+Line breaks are not included in the resulting list unless keepends\n\
+is given and true.";
 
 static PyObject*
 unicode_splitlines(PyUnicodeObject *self, PyObject *args)
 {
-    int maxcount = -1;
+    int keepends = 0;
 
-    if (!PyArg_ParseTuple(args, "|i:splitlines", &maxcount))
+    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
         return NULL;
 
-    return PyUnicode_Splitlines((PyObject *)self, maxcount);
+    return PyUnicode_Splitlines((PyObject *)self, keepends);
 }
 
 static
diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.py CVS-Python/Python/bltinmodule.c Python+Unicode/Python/bltinmodule.c
--- CVS-Python/Python/bltinmodule.c	Wed Apr  5 22:11:21 2000
+++ Python+Unicode/Python/bltinmodule.c	Tue Apr 11 11:12:50 2000
@@ -165,15 +165,28 @@
 	PyObject *self;
 	PyObject *args;
 {
-	char *s;
+        PyObject *v;
+	const void *buffer;
 	int len;
 	char *encoding = NULL;
 	char *errors = NULL;
 
-	if ( !PyArg_ParseTuple(args, "s#|ss:unicode", &s, &len, 
-			       &encoding, &errors) )
+	if ( !PyArg_ParseTuple(args, "O|ss:unicode", &v, &encoding, &errors) )
 	    return NULL;
-	return PyUnicode_Decode(s, len, encoding, errors);
+	/* Special case: Unicode will stay Unicode */
+	if (PyUnicode_Check(v)) {
+	    if (encoding) {
+		PyErr_SetString(PyExc_TypeError,
+		  "unicode() does not support decoding of Unicode objects");
+		return NULL;
+	    }
+	    Py_INCREF(v);
+	    return v;
+	}
+	/* Read raw data and decode it */
+	if (PyObject_AsReadBuffer(v, &buffer, &len))
+	    return NULL;
+	return PyUnicode_Decode((const char *)buffer, len, encoding, errors);
 }
 
 static char unicode_doc[] =
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.c -x *.h -x *.in -x output CVS-Python/Lib/UserString.py Python+Unicode/Lib/UserString.py
--- CVS-Python/Lib/UserString.py	Mon Apr  3 05:51:49 2000
+++ Python+Unicode/Lib/UserString.py	Tue Apr 11 11:36:56 2000
@@ -96,7 +96,7 @@
     def rstrip(self): return self.__class__(self.data.rstrip())
     def split(self, sep=None, maxsplit=-1): 
         return self.data.split(sep, maxsplit)
-    def splitlines(self, maxsplit=-1): return self.data.splitlines(maxsplit)
+    def splitlines(self, keepends=0): return self.data.splitlines(keepends)
     def startswith(self, prefix, start=0, end=sys.maxint): 
         return self.data.startswith(prefix, start, end)
     def strip(self): return self.__class__(self.data.strip())
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.c -x *.h -x *.in -x output CVS-Python/Lib/codecs.py Python+Unicode/Lib/codecs.py
--- CVS-Python/Lib/codecs.py	Fri Mar 31 19:22:29 2000
+++ Python+Unicode/Lib/codecs.py	Mon Apr 10 22:29:00 2000
@@ -127,14 +127,19 @@
         self.stream = stream
         self.errors = errors
 
-    def write(self,object):
+    def write(self, object):
 
         """ Writes the object's contents encoded to self.stream.
         """
         data, consumed = self.encode(object,self.errors)
         self.stream.write(data)
 
-    # XXX .writelines() ?
+    def writelines(self, list):
+
+        """ Writes the concatenated list of strings to the stream
+            using .write().
+        """
+        self.write(''.join(list))
         
     def reset(self):
 
@@ -179,7 +184,7 @@
         self.stream = stream
         self.errors = errors
 
-    def read(self,size=-1):
+    def read(self, size=-1):
 
         """ Decodes data from the stream self.stream and returns the
             resulting object.
@@ -221,8 +226,44 @@
             else:
                 return object
 
-    # XXX .readline() and .readlines() (these are hard to implement
-    #     without using buffers for keeping read-ahead data)
+    def readline(self, size=None):
+
+        """ Read one line from the input stream and return the
+            decoded data.
+
+            Note: Unlike the .readlines() method, line breaking must
+            be implemented by the underlying stream's .readline()
+            method -- there is currently no support for line breaking
+            using the codec decoder due to lack of line buffering.
+
+            size, if given, is passed as size argument to the stream's
+            .readline() method.
+            
+        """
+        if size is None:
+            line = self.stream.readline()
+        else:
+            line = self.stream.readline(size)
+        return self.decode(line)[0]
+            
+
+    def readlines(self, sizehint=0):
+
+        """ Read all lines available on the input stream
+            and return them as list of lines.
+
+            Line breaks are implemented using the codec's decoder
+            method and are included in the list entries.
+            
+            sizehint, if given, is passed as size argument to the
+            stream's .read() method.
+
+        """
+        if sizehint is None:
+            data = self.stream.read()
+        else:
+            data = self.stream.read(sizehint)
+        return self.decode(data)[0].splitlines(1)
 
     def reset(self):
 
@@ -247,6 +288,9 @@
 
 class StreamReaderWriter:
 
+    # Optional attributes set by the file wrappers below
+    encoding = 'unknown'
+
     def __init__(self,stream,Reader,Writer,errors='strict'):
 
         """ Creates a StreamReaderWriter instance.
@@ -269,10 +313,22 @@
 
         return self.reader.read(size)
 
+    def readline(size=None):
+
+        return self.reader.readline(size)
+
+    def readlines(sizehint=None):
+
+        return self.reader.readlines(sizehint)
+
     def write(self,data):
 
         return self.writer.write(data)
 
+    def writelines(self,list):
+
+        return self.writer.writelines(list)
+
     def reset(self):
 
         self.reader.reset()
@@ -290,6 +346,10 @@
 
 class StreamRecoder:
 
+    # Optional attributes set by the file wrappers below
+    data_encoding = 'unknown'
+    file_encoding = 'unknown'
+
     def __init__(self,stream,encode,decode,Reader,Writer,errors='strict'):
 
         """ Creates a StreamRecoder instance which implements a two-way
@@ -328,13 +388,34 @@
         data, bytesencoded = self.encode(data, self.errors)
         return data
 
+    def readline(self,size=None):
+
+        if size is None:
+            data = self.reader.readline()
+        else:
+            data = self.reader.readline(size)
+        data, bytesencoded = self.encode(data, self.errors)
+        return data
+
+    def readlines(self,sizehint=None):
+
+        if sizehint is None:
+            data = self.reader.read()
+        else:
+            data = self.reader.read(sizehint)
+        data, bytesencoded = self.encode(data, self.errors)
+        return data.splitlines(1)
+
     def write(self,data):
 
         data, bytesdecoded = self.decode(data, self.errors)
         return self.writer.write(data)
 
-    # .writelines(), .readline() and .readlines() ... see notes
-    # above.
+    def writelines(self,list):
+
+        data = ''.join(list)
+        data, bytesdecoded = self.decode(data, self.errors)
+        return self.writer.write(data)
 
     def reset(self):
 
@@ -380,33 +461,45 @@
     if encoding is None:
         return file
     (e,d,sr,sw) = lookup(encoding)
-    return StreamReaderWriter(file, sr, sw, errors)
+    srw = StreamReaderWriter(file, sr, sw, errors)
+    # Add attributes to simplify introspection
+    srw.encoding = encoding
+    return srw
 
-def EncodedFile(file, input, output=None, errors='strict'):
+def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
 
     """ Return a wrapped version of file which provides transparent
         encoding translation.
 
         Strings written to the wrapped file are interpreted according
-        to the given input encoding and then written to the original
-        file as string using the output encoding. The intermediate
-        encoding will usually be Unicode but depends on the specified
-        codecs.
+        to the given data_encoding and then written to the original
+        file as string using file_encoding. The intermediate encoding
+        will usually be Unicode but depends on the specified codecs.
 
-        If output is not given, it defaults to input.
+        Strings are read from the file using file_encoding and then
+        passed back to the caller as string using data_encoding.
+
+        If file_encoding is not given, it defaults to data_encoding.
 
         errors may be given to define the error handling. It defaults
         to 'strict' which causes ValueErrors to be raised in case an
         encoding error occurs.
 
+        data_encoding and file_encoding are added to the wrapped file
+        object as attributes .data_encoding and .file_encoding resp.
+
     """
-    if output is None:
-        output = input
-    encode, decode = lookup(input)[:2]
-    Reader, Writer = lookup(output)[2:]
-    return StreamRecoder(file,
-                         encode,decode,Reader,Writer,
-                         errors)
+    if file_encoding is None:
+        file_encoding = data_encoding
+    encode, decode = lookup(data_encoding)[:2]
+    Reader, Writer = lookup(file_encoding)[2:]
+    sr = StreamRecoder(file,
+                       encode,decode,Reader,Writer,
+                       errors)
+    # Add attributes to simplify introspection
+    sr.data_encoding = data_encoding
+    sr.file_encoding = file_encoding
+    return sr
 
 ### Tests
     
@@ -414,5 +507,8 @@
 
     import sys
     
-    # Make stdout translate Latin-1 into Unicode-Escape
-    sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'unicode-escape')
+    # Make stdout translate Latin-1 output into UTF-8 output
+    sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
+    
+    # Have stdin translate Latin-1 input into UTF-8 input
+    sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.c -x *.h -x *.in -x output CVS-Python/Lib/test/test_string.py Python+Unicode/Lib/test/test_string.py
--- CVS-Python/Lib/test/test_string.py	Mon Mar 20 17:36:30 2000
+++ Python+Unicode/Lib/test/test_string.py	Tue Apr 11 11:20:50 2000
@@ -90,8 +90,7 @@
 test('splitlines', "abc\ndef\r\nghi\n", ['abc', 'def', 'ghi'])
 test('splitlines', "abc\ndef\r\nghi\n\r", ['abc', 'def', 'ghi', ''])
 test('splitlines', "\nabc\ndef\r\nghi\n\r", ['', 'abc', 'def', 'ghi', ''])
-test('splitlines', "\nabc\ndef\r\nghi\n\r", ['', 'abc\012def\015\012ghi\012\015'], 1)
-test('splitlines', "\nabc\ndef\r\nghi\n\r", ['', 'abc', 'def\015\012ghi\012\015'], 2)
+test('splitlines', "\nabc\ndef\r\nghi\n\r", ['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'], 1)
 
 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
 
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x *.txt -x distutils -x PC -x PCbuild -x *.c -x *.h -x *.in -x output CVS-Python/Lib/test/test_unicode.py Python+Unicode/Lib/test/test_unicode.py
--- CVS-Python/Lib/test/test_unicode.py	Mon Apr 10 15:52:48 2000
+++ Python+Unicode/Lib/test/test_unicode.py	Tue Apr 11 11:23:02 2000
@@ -212,8 +212,7 @@
 test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
 test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
 test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
-test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc\012def\015\012ghi\012\015'], 1)
-test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def\015\012ghi\012\015'], 2)
+test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
 
 test('translate', u"abababc", u'bbbc', {ord('a'):None})
 test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
Only in CVS-Python/Lib/test: test_zipfile.py

--------------16779387E85086A13F2E7D09--