[pypy-commit] pypy default: Rewrite the algorithm of readlines() based on the hypothesis that
arigo
noreply at buildbot.pypy.org
Fri Nov 4 10:36:10 CET 2011
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r48726:c98931f5191b
Date: 2011-11-04 10:35 +0100
http://bitbucket.org/pypy/pypy/changeset/c98931f5191b/
Log: Rewrite the algorithm of readlines() based on the hypothesis that it
is equivalent to read() followed by splitting after each '\n'. I
*think* it is true, because read() should do itself the conversion
from '\r' or '\r\n' when the file is in text or universal mode.
diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py
--- a/pypy/module/_file/interp_file.py
+++ b/pypy/module/_file/interp_file.py
@@ -206,24 +206,28 @@
@unwrap_spec(size=int)
def direct_readlines(self, size=0):
stream = self.getstream()
- # NB. this implementation is very inefficient for unbuffered
- # streams, but ok if stream.readline() is efficient.
+ # this is implemented as: .read().split('\n')
+ # except that it keeps the \n in the resulting strings
if size <= 0:
- result = []
- while True:
- line = stream.readline()
- if not line:
- break
- result.append(line)
- size -= len(line)
+ data = stream.readall()
else:
- result = []
- while size > 0:
- line = stream.readline()
- if not line:
- break
- result.append(line)
- size -= len(line)
+ data = stream.read(size)
+ result = []
+ splitfrom = 0
+ for i in range(len(data)):
+ if data[i] == '\n':
+ result.append(data[splitfrom : i + 1])
+ splitfrom = i + 1
+ #
+ if splitfrom < len(data):
+ # there is a partial line at the end. If size > 0, it is likely
+ # to be because the 'read(size)' returned data up to the middle
+ # of a line. In that case, use 'readline()' to read until the
+ # end of the current line.
+ data = data[splitfrom:]
+ if size > 0:
+ data += stream.readline()
+ result.append(data)
return result
@unwrap_spec(offset=r_longlong, whence=int)
More information about the pypy-commit
mailing list