[Python-checkins] python/dist/src/Lib codecs.py, 1.35.2.10, 1.35.2.11

loewis@users.sourceforge.net loewis at users.sourceforge.net
Sun Sep 18 10:45:38 CEST 2005


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18448/Lib

Modified Files:
      Tag: release24-maint
	codecs.py 
Log Message:
Patch #1268314: Cache lines in StreamReader.readlines for performance.


Index: codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/codecs.py,v
retrieving revision 1.35.2.10
retrieving revision 1.35.2.11
diff -u -d -r1.35.2.10 -r1.35.2.11
--- codecs.py	1 Sep 2005 12:03:14 -0000	1.35.2.10
+++ codecs.py	18 Sep 2005 08:45:35 -0000	1.35.2.11
@@ -232,6 +232,7 @@
         # For str->str decoding this will stay a str
         # For str->unicode decoding the first read will promote it to unicode
         self.charbuffer = ""
+        self.linebuffer = None
 
     def decode(self, input, errors='strict'):
         raise NotImplementedError
@@ -264,6 +265,11 @@
             optional encoding endings or state markers are available
             on the stream, these should be read too.
         """
+        # If we have lines cached, first merge them back into characters
+        if self.linebuffer:
+            self.charbuffer = "".join(self.linebuffer)
+            self.linebuffer = None
+            
         # read until we get the required number of characters (if available)
         while True:
             # can the request can be satisfied from the character buffer?
@@ -316,6 +322,20 @@
             read() method.
 
         """
+        # If we have lines cached from an earlier read, return
+        # them unconditionally
+        if self.linebuffer:
+            line = self.linebuffer[0]
+            del self.linebuffer[0]
+            if len(self.linebuffer) == 1:
+                # revert to charbuffer mode; we might need more data
+                # next time
+                self.charbuffer = self.linebuffer[0]
+                self.linebuffer = None
+            if not keepends:
+                line = line.splitlines(False)[0]
+            return line
+            
         readsize = size or 72
         line = ""
         # If size is given, we call read() only once
@@ -331,6 +351,22 @@
             line += data
             lines = line.splitlines(True)
             if lines:
+                if len(lines) > 1:
+                    # More than one line result; the first line is a full line
+                    # to return
+                    line = lines[0]
+                    del lines[0]
+                    if len(lines) > 1:
+                        # cache the remaining lines
+                        lines[-1] += self.charbuffer
+                        self.linebuffer = lines
+                        self.charbuffer = None
+                    else:
+                        # only one remaining line, put it back into charbuffer
+                        self.charbuffer = lines[0] + self.charbuffer
+                    if not keepends:
+                        line = line.splitlines(False)[0]
+                    break
                 line0withend = lines[0]
                 line0withoutend = lines[0].splitlines(False)[0]
                 if line0withend != line0withoutend: # We really have a line end
@@ -376,6 +412,7 @@
         """
         self.bytebuffer = ""
         self.charbuffer = u""
+        self.linebuffer = None
 
     def seek(self, offset, whence=0):
         """ Set the input stream's current position.



More information about the Python-checkins mailing list