[Python-checkins] r69817 - in python/branches/io-c: Lib/test/test_io.py Modules/_textio.c
antoine.pitrou
python-checkins at python.org
Fri Feb 20 21:45:50 CET 2009
Author: antoine.pitrou
Date: Fri Feb 20 21:45:50 2009
New Revision: 69817
Log:
Allow IncrementalNewlineDecoder to take unicode objects as decoding input if the decoder parameter is None
This will help rewriting StringIO to C
Modified:
python/branches/io-c/Lib/test/test_io.py
python/branches/io-c/Modules/_textio.c
Modified: python/branches/io-c/Lib/test/test_io.py
==============================================================================
--- python/branches/io-c/Lib/test/test_io.py (original)
+++ python/branches/io-c/Lib/test/test_io.py Fri Feb 20 21:45:50 2009
@@ -1745,7 +1745,10 @@
self.assertEqual(buffer.seekable(), txt.seekable())
- def check_newline_decoder_utf8(self, decoder):
+
+class IncrementalNewlineDecoderTest(unittest.TestCase):
+
+ def check_newline_decoding_utf8(self, decoder):
# UTF-8 specific tests for a newline decoder
def _check_decode(b, s, **kwargs):
# We exercise getstate() / setstate() as well as decode()
@@ -1787,12 +1790,20 @@
_check_decode(b'\xe8\xa2\x88\r', "\u8888")
_check_decode(b'\n', "\n")
- def check_newline_decoder(self, decoder, encoding):
+ def check_newline_decoding(self, decoder, encoding):
result = []
- encoder = codecs.getincrementalencoder(encoding)()
- def _decode_bytewise(s):
- for b in encoder.encode(s):
- result.append(decoder.decode(bytes([b])))
+ if encoding is not None:
+ encoder = codecs.getincrementalencoder(encoding)()
+ def _decode_bytewise(s):
+ # Decode one byte at a time
+ for b in encoder.encode(s):
+ result.append(decoder.decode(bytes([b])))
+ else:
+ encoder = None
+ def _decode_bytewise(s):
+ # Decode one char at a time
+ for c in s:
+ result.append(decoder.decode(c))
self.assertEquals(decoder.newlines, None)
_decode_bytewise("abc\n\r")
self.assertEquals(decoder.newlines, '\n')
@@ -1805,22 +1816,28 @@
_decode_bytewise("abc\r")
self.assertEquals("".join(result), "abc\n\nabcabc\nabcabc")
decoder.reset()
- self.assertEquals(decoder.decode("abc".encode(encoding)), "abc")
+ input = "abc"
+ if encoder is not None:
+ encoder.reset()
+ input = encoder.encode(input)
+ self.assertEquals(decoder.decode(input), "abc")
self.assertEquals(decoder.newlines, None)
def test_newline_decoder(self):
encodings = (
- 'utf-8', 'latin-1',
+ # None meaning the IncrementalNewlineDecoder takes unicode input
+ # rather than bytes input
+ None, 'utf-8', 'latin-1',
'utf-16', 'utf-16-le', 'utf-16-be',
'utf-32', 'utf-32-le', 'utf-32-be',
)
for enc in encodings:
- decoder = codecs.getincrementaldecoder(enc)()
+ decoder = enc and codecs.getincrementaldecoder(enc)()
decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
- self.check_newline_decoder(decoder, enc)
+ self.check_newline_decoding(decoder, enc)
decoder = codecs.getincrementaldecoder("utf-8")()
decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
- self.check_newline_decoder_utf8(decoder)
+ self.check_newline_decoding_utf8(decoder)
# XXX Tests for open()
@@ -1933,7 +1950,8 @@
BufferedReaderTest, BufferedWriterTest,
BufferedRWPairTest, BufferedRandomTest,
StatefulIncrementalDecoderTest,
- TextIOWrapperTest, MiscIOTest
+ IncrementalNewlineDecoderTest,
+ TextIOWrapperTest, MiscIOTest,
)
if __name__ == "__main__":
Modified: python/branches/io-c/Modules/_textio.c
==============================================================================
--- python/branches/io-c/Modules/_textio.c (original)
+++ python/branches/io-c/Modules/_textio.c Fri Feb 20 21:45:50 2009
@@ -154,7 +154,9 @@
"another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
"records the types of newlines encountered. When used with\n"
"translate=False, it ensures that the newline sequence is returned in\n"
- "one piece.\n"
+ "one piece. When used with decoder=None, it expects unicode strings as\n"
+ "decode input and translates newlines without first invoking an external\n"
+ "decoder.\n"
);
typedef struct {
@@ -226,8 +228,15 @@
}
/* decode input (with the eventual \r from a previous pass) */
- output = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
- input, final ? Py_True : Py_False, NULL);
+ if (self->decoder != Py_None) {
+ output = PyObject_CallMethodObjArgs(self->decoder,
+ _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
+ }
+ else {
+ output = input;
+ Py_INCREF(output);
+ }
+
if (output == NULL)
return NULL;
@@ -421,20 +430,25 @@
static PyObject *
IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
{
- PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
- _PyIO_str_getstate, NULL);
PyObject *buffer;
unsigned PY_LONG_LONG flag;
- if (state == NULL)
- return NULL;
-
- if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
+ if (self->decoder != Py_None) {
+ PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
+ _PyIO_str_getstate, NULL);
+ if (state == NULL)
+ return NULL;
+ if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
+ Py_DECREF(state);
+ return NULL;
+ }
+ Py_INCREF(buffer);
Py_DECREF(state);
- return NULL;
}
- Py_INCREF(buffer);
- Py_DECREF(state);
+ else {
+ buffer = PyBytes_FromString("");
+ flag = 0;
+ }
flag <<= 1;
if (self->pendingcr)
flag |= 1;
@@ -453,7 +467,11 @@
self->pendingcr = (int) flag & 1;
flag >>= 1;
- return PyObject_CallMethod(self->decoder, "setstate", "((OK))", buffer, flag);
+ if (self->decoder != Py_None)
+ return PyObject_CallMethod(self->decoder,
+ "setstate", "((OK))", buffer, flag);
+ else
+ Py_RETURN_NONE;
}
static PyObject *
@@ -461,7 +479,10 @@
{
self->seennl = 0;
self->pendingcr = 0;
- return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
+ if (self->decoder != Py_None)
+ return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
+ else
+ Py_RETURN_NONE;
}
static PyObject *
More information about the Python-checkins
mailing list