[Python-checkins] cpython (2.7): use Py_ssize_t for file offset and length computations in iteration (closes

benjamin.peterson python-checkins at python.org
Wed Oct 1 03:17:27 CEST 2014


https://hg.python.org/cpython/rev/beadb3e1dc81
changeset:   92702:beadb3e1dc81
branch:      2.7
user:        Benjamin Peterson <benjamin at python.org>
date:        Tue Sep 30 21:17:15 2014 -0400
summary:
  use Py_ssize_t for file offset and length computations in iteration (closes #22526)

files:
  Lib/test/test_file2k.py |  12 ++++++++++++
  Misc/NEWS               |   2 ++
  Objects/fileobject.c    |  15 +++++++--------
  3 files changed, 21 insertions(+), 8 deletions(-)


diff --git a/Lib/test/test_file2k.py b/Lib/test/test_file2k.py
--- a/Lib/test/test_file2k.py
+++ b/Lib/test/test_file2k.py
@@ -436,6 +436,18 @@
         finally:
             f.close()
 
+    @test_support.precisionbigmemtest(2**31, 1)
+    def test_very_long_line(self, maxsize):
+        # Issue #22526
+        with open(TESTFN, "wb") as fp:
+            fp.write("\0"*2**31)
+        with open(TESTFN, "rb") as fp:
+            for l in fp:
+                pass
+        self.assertEqual(len(l), 2**31)
+        self.assertEqual(l.count("\0"), 2**31)
+        l = None
+
 class FileSubclassTests(unittest.TestCase):
 
     def testExit(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@
 Core and Builtins
 -----------------
 
+- Issue #22526: Fix iterating through files with lines longer than 2^31 bytes.
+
 - Issue #22519: Fix overflow checking in PyString_Repr.
 
 - Issue #22518: Fix integer overflow issues in latin-1 encoding.
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -2236,7 +2236,7 @@
    (unless at EOF) and no more than bufsize.  Returns negative value on
    error, will set MemoryError if bufsize bytes cannot be allocated. */
 static int
-readahead(PyFileObject *f, int bufsize)
+readahead(PyFileObject *f, Py_ssize_t bufsize)
 {
     Py_ssize_t chunksize;
 
@@ -2274,7 +2274,7 @@
    logarithmic buffer growth to about 50 even when reading a 1gb line. */
 
 static PyStringObject *
-readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
+readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize)
 {
     PyStringObject* s;
     char *bufptr;
@@ -2294,10 +2294,10 @@
         bufptr++;                               /* Count the '\n' */
         len = bufptr - f->f_bufptr;
         s = (PyStringObject *)
-            PyString_FromStringAndSize(NULL, skip+len);
+            PyString_FromStringAndSize(NULL, skip + len);
         if (s == NULL)
             return NULL;
-        memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
+        memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len);
         f->f_bufptr = bufptr;
         if (bufptr == f->f_bufend)
             drop_readahead(f);
@@ -2305,14 +2305,13 @@
         bufptr = f->f_bufptr;
         buf = f->f_buf;
         f->f_buf = NULL;                /* Force new readahead buffer */
-        assert(skip+len < INT_MAX);
-        s = readahead_get_line_skip(
-            f, (int)(skip+len), bufsize + (bufsize>>2) );
+        assert(len <= PY_SSIZE_T_MAX - skip);
+        s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2));
         if (s == NULL) {
             PyMem_Free(buf);
             return NULL;
         }
-        memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
+        memcpy(PyString_AS_STRING(s) + skip, bufptr, len);
         PyMem_Free(buf);
     }
     return s;

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list