[pypy-commit] pypy default: Rewrite the algorithm of readlines() based on the hypothesis that

Fri Nov 4 10:36:10 CET 2011

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r48726:c98931f5191b
Date: 2011-11-04 10:35 +0100
http://bitbucket.org/pypy/pypy/changeset/c98931f5191b/

Log:	Rewrite the algorithm of readlines() based on the hypothesis that it
	is equivalent to read() followed by splitting after each '\n'. I
	*think* it is true, because read() should do itself the conversion
	from '\r' or '\r\n' when the file is in text or universal mode.

diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py
--- a/pypy/module/_file/interp_file.py
+++ b/pypy/module/_file/interp_file.py
@@ -206,24 +206,28 @@
     @unwrap_spec(size=int)
     def direct_readlines(self, size=0):
         stream = self.getstream()
-        # NB. this implementation is very inefficient for unbuffered
-        # streams, but ok if stream.readline() is efficient.
+        # this is implemented as: .read().split('\n')
+        # except that it keeps the \n in the resulting strings
         if size <= 0:
-            result = []
-            while True:
-                line = stream.readline()
-                if not line:
-                    break
-                result.append(line)
-                size -= len(line)
+            data = stream.readall()
         else:
-            result = []
-            while size > 0:
-                line = stream.readline()
-                if not line:
-                    break
-                result.append(line)
-                size -= len(line)
+            data = stream.read(size)
+        result = []
+        splitfrom = 0
+        for i in range(len(data)):
+            if data[i] == '\n':
+                result.append(data[splitfrom : i + 1])
+                splitfrom = i + 1
+        #
+        if splitfrom < len(data):
+            # there is a partial line at the end.  If size > 0, it is likely
+            # to be because the 'read(size)' returned data up to the middle
+            # of a line.  In that case, use 'readline()' to read until the
+            # end of the current line.
+            data = data[splitfrom:]
+            if size > 0:
+                data += stream.readline()
+            result.append(data)
         return result
 
     @unwrap_spec(offset=r_longlong, whence=int)