[Python-checkins] r69817 - in python/branches/io-c: Lib/test/test_io.py Modules/_textio.c

antoine.pitrou python-checkins at python.org
Fri Feb 20 21:45:50 CET 2009


Author: antoine.pitrou
Date: Fri Feb 20 21:45:50 2009
New Revision: 69817

Log:
Allow IncrementalNewlineDecoder to take unicode objects as decoding input if the decoder parameter is None
This will help rewriting StringIO to C



Modified:
   python/branches/io-c/Lib/test/test_io.py
   python/branches/io-c/Modules/_textio.c

Modified: python/branches/io-c/Lib/test/test_io.py
==============================================================================
--- python/branches/io-c/Lib/test/test_io.py	(original)
+++ python/branches/io-c/Lib/test/test_io.py	Fri Feb 20 21:45:50 2009
@@ -1745,7 +1745,10 @@
 
         self.assertEqual(buffer.seekable(), txt.seekable())
 
-    def check_newline_decoder_utf8(self, decoder):
+
+class IncrementalNewlineDecoderTest(unittest.TestCase):
+
+    def check_newline_decoding_utf8(self, decoder):
         # UTF-8 specific tests for a newline decoder
         def _check_decode(b, s, **kwargs):
             # We exercise getstate() / setstate() as well as decode()
@@ -1787,12 +1790,20 @@
         _check_decode(b'\xe8\xa2\x88\r', "\u8888")
         _check_decode(b'\n', "\n")
 
-    def check_newline_decoder(self, decoder, encoding):
+    def check_newline_decoding(self, decoder, encoding):
         result = []
-        encoder = codecs.getincrementalencoder(encoding)()
-        def _decode_bytewise(s):
-            for b in encoder.encode(s):
-                result.append(decoder.decode(bytes([b])))
+        if encoding is not None:
+            encoder = codecs.getincrementalencoder(encoding)()
+            def _decode_bytewise(s):
+                # Decode one byte at a time
+                for b in encoder.encode(s):
+                    result.append(decoder.decode(bytes([b])))
+        else:
+            encoder = None
+            def _decode_bytewise(s):
+                # Decode one char at a time
+                for c in s:
+                    result.append(decoder.decode(c))
         self.assertEquals(decoder.newlines, None)
         _decode_bytewise("abc\n\r")
         self.assertEquals(decoder.newlines, '\n')
@@ -1805,22 +1816,28 @@
         _decode_bytewise("abc\r")
         self.assertEquals("".join(result), "abc\n\nabcabc\nabcabc")
         decoder.reset()
-        self.assertEquals(decoder.decode("abc".encode(encoding)), "abc")
+        input = "abc"
+        if encoder is not None:
+            encoder.reset()
+            input = encoder.encode(input)
+        self.assertEquals(decoder.decode(input), "abc")
         self.assertEquals(decoder.newlines, None)
 
     def test_newline_decoder(self):
         encodings = (
-            'utf-8', 'latin-1',
+            # None meaning the IncrementalNewlineDecoder takes unicode input
+            # rather than bytes input
+            None, 'utf-8', 'latin-1',
             'utf-16', 'utf-16-le', 'utf-16-be',
             'utf-32', 'utf-32-le', 'utf-32-be',
         )
         for enc in encodings:
-            decoder = codecs.getincrementaldecoder(enc)()
+            decoder = enc and codecs.getincrementaldecoder(enc)()
             decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
-            self.check_newline_decoder(decoder, enc)
+            self.check_newline_decoding(decoder, enc)
         decoder = codecs.getincrementaldecoder("utf-8")()
         decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
-        self.check_newline_decoder_utf8(decoder)
+        self.check_newline_decoding_utf8(decoder)
 
 
 # XXX Tests for open()
@@ -1933,7 +1950,8 @@
                          BufferedReaderTest, BufferedWriterTest,
                          BufferedRWPairTest, BufferedRandomTest,
                          StatefulIncrementalDecoderTest,
-                         TextIOWrapperTest, MiscIOTest
+                         IncrementalNewlineDecoderTest,
+                         TextIOWrapperTest, MiscIOTest,
                          )
 
 if __name__ == "__main__":

Modified: python/branches/io-c/Modules/_textio.c
==============================================================================
--- python/branches/io-c/Modules/_textio.c	(original)
+++ python/branches/io-c/Modules/_textio.c	Fri Feb 20 21:45:50 2009
@@ -154,7 +154,9 @@
     "another incremental decoder, translating \\r\\n and \\r into \\n.  It also\n"
     "records the types of newlines encountered.  When used with\n"
     "translate=False, it ensures that the newline sequence is returned in\n"
-    "one piece.\n"
+    "one piece. When used with decoder=None, it expects unicode strings as\n"
+    "decode input and translates newlines without first invoking an external\n"
+    "decoder.\n"
     );
 
 typedef struct {
@@ -226,8 +228,15 @@
     }
 
     /* decode input (with the eventual \r from a previous pass) */
-    output = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
-                                        input, final ? Py_True : Py_False, NULL);
+    if (self->decoder != Py_None) {
+        output = PyObject_CallMethodObjArgs(self->decoder,
+            _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
+    }
+    else {
+        output = input;
+        Py_INCREF(output);
+    }
+
     if (output == NULL)
         return NULL;
 
@@ -421,20 +430,25 @@
 static PyObject *
 IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
 {
-    PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
-                                                 _PyIO_str_getstate, NULL);
     PyObject *buffer;
     unsigned PY_LONG_LONG flag;
 
-    if (state == NULL)
-        return NULL;
-
-    if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
+    if (self->decoder != Py_None) {
+        PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
+           _PyIO_str_getstate, NULL);
+        if (state == NULL)
+            return NULL;
+        if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
+            Py_DECREF(state);
+            return NULL;
+        }
+        Py_INCREF(buffer);
         Py_DECREF(state);
-        return NULL;
     }
-    Py_INCREF(buffer);
-    Py_DECREF(state);
+    else {
+        buffer = PyBytes_FromString("");
+        flag = 0;
+    }
     flag <<= 1;
     if (self->pendingcr)
         flag |= 1;
@@ -453,7 +467,11 @@
     self->pendingcr = (int) flag & 1;
     flag >>= 1;
 
-    return PyObject_CallMethod(self->decoder, "setstate", "((OK))", buffer, flag);
+    if (self->decoder != Py_None)
+        return PyObject_CallMethod(self->decoder,
+                                   "setstate", "((OK))", buffer, flag);
+    else
+        Py_RETURN_NONE;
 }
 
 static PyObject *
@@ -461,7 +479,10 @@
 {
     self->seennl = 0;
     self->pendingcr = 0;
-    return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
+    if (self->decoder != Py_None)
+        return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
+    else
+        Py_RETURN_NONE;
 }
 
 static PyObject *


More information about the Python-checkins mailing list