[pypy-commit] pypy default: issue #2557: file.read(1) could return 2 bytes on Windows

arigo pypy.commits at gmail.com
Sun May 14 12:04:25 EDT 2017


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r91279:643daedf4ed9
Date: 2017-05-14 18:03 +0200
http://bitbucket.org/pypy/pypy/changeset/643daedf4ed9/

Log:	issue #2557: file.read(1) could return 2 bytes on Windows

diff --git a/rpython/rlib/streamio.py b/rpython/rlib/streamio.py
--- a/rpython/rlib/streamio.py
+++ b/rpython/rlib/streamio.py
@@ -902,18 +902,30 @@
         self.do_read = base.read
         self.do_write = base.write
         self.do_flush = base.flush_buffers
-        self.lfbuffer = ""
+        self.readahead_count = 0   # either 0 or 1
 
     def read(self, n=-1):
-        data = self.lfbuffer + self.do_read(n)
-        self.lfbuffer = ""
+        """If n >= 1, this should read between 1 and n bytes."""
+        if n <= 0:
+            if n < 0:
+                return self.readall()
+            else:
+                return ""
+
+        data = self.do_read(n - self.readahead_count)
+        if self.readahead_count > 0:
+            data = self.readahead_char + data
+            self.readahead_count = 0
+
         if data.endswith("\r"):
             c = self.do_read(1)
-            if c and c[0] == '\n':
-                data = data + '\n'
-                self.lfbuffer = c[1:]
-            else:
-                self.lfbuffer = c
+            if len(c) >= 1:
+                assert len(c) == 1
+                if c[0] == '\n':
+                    data = data + '\n'
+                else:
+                    self.readahead_char = c[0]
+                    self.readahead_count = 1
 
         result = []
         offset = 0
@@ -936,21 +948,21 @@
 
     def tell(self):
         pos = self.base.tell()
-        return pos - len(self.lfbuffer)
+        return pos - self.readahead_count
 
     def seek(self, offset, whence):
         if whence == 1:
-            offset -= len(self.lfbuffer)   # correct for already-read-ahead character
+            offset -= self.readahead_count   # correct for already-read-ahead character
         self.base.seek(offset, whence)
-        self.lfbuffer = ""
+        self.readahead_count = 0
 
     def flush_buffers(self):
-        if self.lfbuffer:
+        if self.readahead_count > 0:
             try:
-                self.base.seek(-len(self.lfbuffer), 1)
+                self.base.seek(-self.readahead_count, 1)
             except (MyNotImplementedError, OSError):
                 return
-            self.lfbuffer = ""
+            self.readahead_count = 0
         self.do_flush()
 
     def write(self, data):
diff --git a/rpython/rlib/test/test_streamio.py b/rpython/rlib/test/test_streamio.py
--- a/rpython/rlib/test/test_streamio.py
+++ b/rpython/rlib/test/test_streamio.py
@@ -657,6 +657,23 @@
             assert line == ''
         self.interpret(f, [])
 
+    def test_read1(self):
+        s_input = "abc\r\nabc\nd\r\nef\r\ngha\rbc\rdef\n\r\n\r"
+        s_output = "abc\nabc\nd\nef\ngha\rbc\rdef\n\n\r"
+        assert s_output == s_input.replace('\r\n', '\n')
+        packets = list(s_input)
+        expected = list(s_output)
+        crlf = streamio.TextCRLFFilter(TSource(packets))
+        def f():
+            blocks = []
+            while True:
+                block = crlf.read(1)
+                if not block:
+                    break
+                blocks.append(block)
+            assert blocks == expected
+        self.interpret(f, [])
+
 class TestTextCRLFFilterLLInterp(BaseTestTextCRLFFilter):
     pass
 


More information about the pypy-commit mailing list