[Python-checkins] python/dist/src/Lib codecs.py, 1.35.2.10, 1.35.2.11
loewis@users.sourceforge.net
loewis at users.sourceforge.net
Sun Sep 18 10:45:38 CEST 2005
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18448/Lib
Modified Files:
Tag: release24-maint
codecs.py
Log Message:
Patch #1268314: Cache lines in StreamReader.readlines for performance.
Index: codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/codecs.py,v
retrieving revision 1.35.2.10
retrieving revision 1.35.2.11
diff -u -d -r1.35.2.10 -r1.35.2.11
--- codecs.py 1 Sep 2005 12:03:14 -0000 1.35.2.10
+++ codecs.py 18 Sep 2005 08:45:35 -0000 1.35.2.11
@@ -232,6 +232,7 @@
# For str->str decoding this will stay a str
# For str->unicode decoding the first read will promote it to unicode
self.charbuffer = ""
+ self.linebuffer = None
def decode(self, input, errors='strict'):
raise NotImplementedError
@@ -264,6 +265,11 @@
optional encoding endings or state markers are available
on the stream, these should be read too.
"""
+ # If we have lines cached, first merge them back into characters
+ if self.linebuffer:
+ self.charbuffer = "".join(self.linebuffer)
+ self.linebuffer = None
+
# read until we get the required number of characters (if available)
while True:
# can the request can be satisfied from the character buffer?
@@ -316,6 +322,20 @@
read() method.
"""
+ # If we have lines cached from an earlier read, return
+ # them unconditionally
+ if self.linebuffer:
+ line = self.linebuffer[0]
+ del self.linebuffer[0]
+ if len(self.linebuffer) == 1:
+ # revert to charbuffer mode; we might need more data
+ # next time
+ self.charbuffer = self.linebuffer[0]
+ self.linebuffer = None
+ if not keepends:
+ line = line.splitlines(False)[0]
+ return line
+
readsize = size or 72
line = ""
# If size is given, we call read() only once
@@ -331,6 +351,22 @@
line += data
lines = line.splitlines(True)
if lines:
+ if len(lines) > 1:
+ # More than one line result; the first line is a full line
+ # to return
+ line = lines[0]
+ del lines[0]
+ if len(lines) > 1:
+ # cache the remaining lines
+ lines[-1] += self.charbuffer
+ self.linebuffer = lines
+ self.charbuffer = None
+ else:
+ # only one remaining line, put it back into charbuffer
+ self.charbuffer = lines[0] + self.charbuffer
+ if not keepends:
+ line = line.splitlines(False)[0]
+ break
line0withend = lines[0]
line0withoutend = lines[0].splitlines(False)[0]
if line0withend != line0withoutend: # We really have a line end
@@ -376,6 +412,7 @@
"""
self.bytebuffer = ""
self.charbuffer = u""
+ self.linebuffer = None
def seek(self, offset, whence=0):
""" Set the input stream's current position.
More information about the Python-checkins
mailing list