[Python-checkins] r88689 - in python/branches/release31-maint: Lib/test/support.py Lib/test/test_mmap.py Lib/test/test_zlib.py Misc/NEWS Modules/mmapmodule.c Modules/zlibmodule.c

antoine.pitrou python-checkins at python.org
Tue Mar 1 00:48:17 CET 2011


Author: antoine.pitrou
Date: Tue Mar  1 00:48:16 2011
New Revision: 88689

Log:
Merged revisions 88460,88464,88466,88486,88511,88652 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r88460 | antoine.pitrou | 2011-02-21 19:03:13 +0100 (lun., 21 févr. 2011) | 4 lines
  
  Issue #10276: Fix the results of zlib.crc32() and zlib.adler32() on buffers
  larger than 4GB.  Patch by Nadeem Vawda.
........
  r88464 | antoine.pitrou | 2011-02-21 20:05:08 +0100 (lun., 21 févr. 2011) | 3 lines
  
  Fix issues on 32-bit systems introduced by r88460
........
  r88466 | antoine.pitrou | 2011-02-21 20:28:40 +0100 (lun., 21 févr. 2011) | 3 lines
  
  Fix compile error under MSVC introduced by r88460.
........
  r88486 | antoine.pitrou | 2011-02-22 00:41:12 +0100 (mar., 22 févr. 2011) | 5 lines
  
  Issue #4681: Allow mmap() to work on file sizes and offsets larger than
  4GB, even on 32-bit builds.  Initial patch by Ross Lagerwall, adapted for
  32-bit Windows.
........
  r88511 | antoine.pitrou | 2011-02-22 22:42:56 +0100 (mar., 22 févr. 2011) | 4 lines
  
  Issue #11277: finally fix Snow Leopard crash following r88460.
  (probably an OS-related issue with mmap)
........
  r88652 | antoine.pitrou | 2011-02-26 16:58:05 +0100 (sam., 26 févr. 2011) | 4 lines
  
  Issue #9931: Fix hangs in GUI tests under Windows in certain conditions.
  Patch by Hirokazu Yamamoto.
........


Modified:
   python/branches/release31-maint/   (props changed)
   python/branches/release31-maint/Lib/test/support.py
   python/branches/release31-maint/Lib/test/test_mmap.py
   python/branches/release31-maint/Lib/test/test_zlib.py
   python/branches/release31-maint/Misc/NEWS
   python/branches/release31-maint/Modules/mmapmodule.c
   python/branches/release31-maint/Modules/zlibmodule.c

Modified: python/branches/release31-maint/Lib/test/support.py
==============================================================================
--- python/branches/release31-maint/Lib/test/support.py	(original)
+++ python/branches/release31-maint/Lib/test/support.py	Tue Mar  1 00:48:16 2011
@@ -198,6 +198,36 @@
         # is exited) but there is a .pyo file.
         unlink(os.path.join(dirname, modname + '.pyo'))
 
+# On some platforms, should not run gui test even if it is allowed
+# in `use_resources'.
+if sys.platform.startswith('win'):
+    import ctypes
+    import ctypes.wintypes
+    def _is_gui_available():
+        UOI_FLAGS = 1
+        WSF_VISIBLE = 0x0001
+        class USEROBJECTFLAGS(ctypes.Structure):
+            _fields_ = [("fInherit", ctypes.wintypes.BOOL),
+                        ("fReserved", ctypes.wintypes.BOOL),
+                        ("dwFlags", ctypes.wintypes.DWORD)]
+        dll = ctypes.windll.user32
+        h = dll.GetProcessWindowStation()
+        if not h:
+            raise ctypes.WinError()
+        uof = USEROBJECTFLAGS()
+        needed = ctypes.wintypes.DWORD()
+        res = dll.GetUserObjectInformationW(h,
+            UOI_FLAGS,
+            ctypes.byref(uof),
+            ctypes.sizeof(uof),
+            ctypes.byref(needed))
+        if not res:
+            raise ctypes.WinError()
+        return bool(uof.dwFlags & WSF_VISIBLE)
+else:
+    def _is_gui_available():
+        return True
+
 def is_resource_enabled(resource):
     """Test whether a resource is enabled.  Known resources are set by
     regrtest.py."""
@@ -208,6 +238,8 @@
 
     If the caller's module is __main__ then automatically return True.  The
     possibility of False being returned occurs when regrtest.py is executing."""
+    if resource == 'gui' and not _is_gui_available():
+        raise unittest.SkipTest("Cannot use the 'gui' resource")
     # see if the caller's module is __main__ - if so, treat as if
     # the resource was set
     if sys._getframe(1).f_globals.get("__name__") == "__main__":
@@ -869,6 +901,8 @@
     return obj
 
 def requires_resource(resource):
+    if resource == 'gui' and not _is_gui_available():
+        return unittest.skip("resource 'gui' is not available")
     if is_resource_enabled(resource):
         return _id
     else:

Modified: python/branches/release31-maint/Lib/test/test_mmap.py
==============================================================================
--- python/branches/release31-maint/Lib/test/test_mmap.py	(original)
+++ python/branches/release31-maint/Lib/test/test_mmap.py	Tue Mar  1 00:48:16 2011
@@ -1,6 +1,6 @@
-from test.support import TESTFN, run_unittest, import_module
+from test.support import TESTFN, run_unittest, import_module, unlink, requires
 import unittest
-import os, re, itertools, socket
+import os, re, itertools, socket, sys
 
 # Skip test if we can't import mmap.
 mmap = import_module('mmap')
@@ -636,8 +636,63 @@
             finally:
                 s.close()
 
+
+class LargeMmapTests(unittest.TestCase):
+
+    def setUp(self):
+        unlink(TESTFN)
+
+    def tearDown(self):
+        unlink(TESTFN)
+
+    def _working_largefile(self):
+        # Only run if the current filesystem supports large files.
+        f = open(TESTFN, 'wb', buffering=0)
+        try:
+            f.seek(0x80000001)
+            f.write(b'x')
+            f.flush()
+        except (IOError, OverflowError):
+            raise unittest.SkipTest("filesystem does not have largefile support")
+        finally:
+            f.close()
+            unlink(TESTFN)
+
+    def test_large_offset(self):
+        if sys.platform[:3] == 'win' or sys.platform == 'darwin':
+            requires('largefile',
+                'test requires %s bytes and a long time to run' % str(0x180000000))
+        self._working_largefile()
+        with open(TESTFN, 'wb') as f:
+            f.seek(0x14FFFFFFF)
+            f.write(b" ")
+
+        with open(TESTFN, 'rb') as f:
+            m = mmap.mmap(f.fileno(), 0, offset=0x140000000, access=mmap.ACCESS_READ)
+            try:
+                self.assertEqual(m[0xFFFFFFF], 32)
+            finally:
+                m.close()
+
+    def test_large_filesize(self):
+        if sys.platform[:3] == 'win' or sys.platform == 'darwin':
+            requires('largefile',
+                'test requires %s bytes and a long time to run' % str(0x180000000))
+        self._working_largefile()
+        with open(TESTFN, 'wb') as f:
+            f.seek(0x17FFFFFFF)
+            f.write(b" ")
+
+        with open(TESTFN, 'rb') as f:
+            m = mmap.mmap(f.fileno(), 0x10000, access=mmap.ACCESS_READ)
+            try:
+                self.assertEqual(m.size(), 0x180000000)
+            finally:
+                m.close()
+
+
 def test_main():
-    run_unittest(MmapTests)
+    run_unittest(MmapTests, LargeMmapTests)
 
 if __name__ == '__main__':
     test_main()

Modified: python/branches/release31-maint/Lib/test/test_zlib.py
==============================================================================
--- python/branches/release31-maint/Lib/test/test_zlib.py	(original)
+++ python/branches/release31-maint/Lib/test/test_zlib.py	Tue Mar  1 00:48:16 2011
@@ -2,10 +2,16 @@
 from test import support
 import binascii
 import random
-from test.support import precisionbigmemtest, _1G
+import sys
+from test.support import precisionbigmemtest, _1G, _4G
 
 zlib = support.import_module('zlib')
 
+try:
+    import mmap
+except ImportError:
+    mmap = None
+
 
 class ChecksumTestCase(unittest.TestCase):
     # checksum test cases
@@ -57,6 +63,28 @@
         self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam'))
 
 
+# Issue #10276 - check that inputs >=4GB are handled correctly.
+class ChecksumBigBufferTestCase(unittest.TestCase):
+
+    def setUp(self):
+        with open(support.TESTFN, "wb+") as f:
+            f.seek(_4G)
+            f.write(b"asdf")
+        with open(support.TESTFN, "rb") as f:
+            self.mapping = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+
+    def tearDown(self):
+        self.mapping.close()
+        support.unlink(support.TESTFN)
+
+    @unittest.skipUnless(mmap, "mmap() is not available.")
+    @unittest.skipUnless(sys.maxsize > _4G, "Can't run on a 32-bit system.")
+    @unittest.skipUnless(support.is_resource_enabled("largefile"),
+                         "May use lots of disk space.")
+    def test_big_buffer(self):
+        self.assertEqual(zlib.crc32(self.mapping), 3058686908)
+        self.assertEqual(zlib.adler32(self.mapping), 82837919)
+
 
 class ExceptionTestCase(unittest.TestCase):
     # make sure we generate some expected errors
@@ -567,6 +595,7 @@
 def test_main():
     support.run_unittest(
         ChecksumTestCase,
+        ChecksumBigBufferTestCase,
         ExceptionTestCase,
         CompressTestCase,
         CompressObjectTestCase

Modified: python/branches/release31-maint/Misc/NEWS
==============================================================================
--- python/branches/release31-maint/Misc/NEWS	(original)
+++ python/branches/release31-maint/Misc/NEWS	Tue Mar  1 00:48:16 2011
@@ -37,6 +37,13 @@
 Library
 -------
 
+- Issue #10276: Fix the results of zlib.crc32() and zlib.adler32() on buffers
+  larger than 4GB.  Patch by Nadeem Vawda.
+
+- Issue #4681: Allow mmap() to work on file sizes and offsets larger than
+  4GB, even on 32-bit builds.  Initial patch by Ross Lagerwall, adapted for
+  32-bit Windows.
+
 - email.header.Header was incorrectly encoding folding white space when
   rfc2047-encoding header values with embedded newlines, leaving them
   without folding whitespace.  It now uses the continuation_ws, as it

Modified: python/branches/release31-maint/Modules/mmapmodule.c
==============================================================================
--- python/branches/release31-maint/Modules/mmapmodule.c	(original)
+++ python/branches/release31-maint/Modules/mmapmodule.c	Tue Mar  1 00:48:16 2011
@@ -90,7 +90,11 @@
     char *      data;
     size_t      size;
     size_t      pos;    /* relative to offset */
-    size_t      offset;
+#ifdef MS_WINDOWS
+    PY_LONG_LONG offset;
+#else
+    off_t       offset;
+#endif
     int     exports;
 
 #ifdef MS_WINDOWS
@@ -435,7 +439,11 @@
             PyErr_SetFromErrno(mmap_module_error);
             return NULL;
         }
-        return PyLong_FromSsize_t(buf.st_size);
+#ifdef HAVE_LARGEFILE_SUPPORT
+        return PyLong_FromLongLong(buf.st_size);
+#else
+        return PyLong_FromLong(buf.st_size);
+#endif
     }
 #endif /* UNIX */
 }
@@ -469,17 +477,10 @@
         CloseHandle(self->map_handle);
         self->map_handle = NULL;
         /* Move to the desired EOF position */
-#if SIZEOF_SIZE_T > 4
         newSizeHigh = (DWORD)((self->offset + new_size) >> 32);
         newSizeLow = (DWORD)((self->offset + new_size) & 0xFFFFFFFF);
         off_hi = (DWORD)(self->offset >> 32);
         off_lo = (DWORD)(self->offset & 0xFFFFFFFF);
-#else
-        newSizeHigh = 0;
-        newSizeLow = (DWORD)(self->offset + new_size);
-        off_hi = 0;
-        off_lo = (DWORD)self->offset;
-#endif
         SetFilePointer(self->file_handle,
                        newSizeLow, &newSizeHigh, FILE_BEGIN);
         /* Change the size of the file */
@@ -1020,6 +1021,12 @@
 }
 
 #ifdef UNIX
+#ifdef HAVE_LARGEFILE_SUPPORT
+#define _Py_PARSE_OFF_T "L"
+#else
+#define _Py_PARSE_OFF_T "l"
+#endif
+
 static PyObject *
 new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
 {
@@ -1027,8 +1034,9 @@
     struct stat st;
 #endif
     mmap_object *m_obj;
-    PyObject *map_size_obj = NULL, *offset_obj = NULL;
-    Py_ssize_t map_size, offset;
+    PyObject *map_size_obj = NULL;
+    Py_ssize_t map_size;
+    off_t offset = 0;
     int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ;
     int devzero = -1;
     int access = (int)ACCESS_DEFAULT;
@@ -1036,16 +1044,18 @@
                                      "flags", "prot",
                                      "access", "offset", NULL};
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iiiO", keywords,
+    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iii" _Py_PARSE_OFF_T, keywords,
                                      &fd, &map_size_obj, &flags, &prot,
-                                     &access, &offset_obj))
+                                     &access, &offset))
         return NULL;
     map_size = _GetMapSize(map_size_obj, "size");
     if (map_size < 0)
         return NULL;
-    offset = _GetMapSize(offset_obj, "offset");
-    if (offset < 0)
+    if (offset < 0) {
+        PyErr_SetString(PyExc_OverflowError,
+            "memory mapped offset must be positive");
         return NULL;
+    }
 
     if ((access != (int)ACCESS_DEFAULT) &&
         ((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ))))
@@ -1090,8 +1100,14 @@
                                 "mmap offset is greater than file size");
                 return NULL;
             }
-            map_size = st.st_size - offset;
-        } else if ((size_t)offset + (size_t)map_size > st.st_size) {
+            off_t calc_size = st.st_size - offset;
+            map_size = calc_size;
+            if (map_size != calc_size) {
+                PyErr_SetString(PyExc_ValueError,
+                                 "mmap length is too large");
+                 return NULL;
+             }
+        } else if (offset + (size_t)map_size > st.st_size) {
             PyErr_SetString(PyExc_ValueError,
                             "mmap length is greater than file size");
             return NULL;
@@ -1152,12 +1168,19 @@
 #endif /* UNIX */
 
 #ifdef MS_WINDOWS
+
+/* A note on sizes and offsets: while the actual map size must hold in a
+   Py_ssize_t, both the total file size and the start offset can be longer
+   than a Py_ssize_t, so we use PY_LONG_LONG which is always 64-bit.
+*/
+
 static PyObject *
 new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
 {
     mmap_object *m_obj;
-    PyObject *map_size_obj = NULL, *offset_obj = NULL;
-    Py_ssize_t map_size, offset;
+    PyObject *map_size_obj = NULL;
+    Py_ssize_t map_size;
+    PY_LONG_LONG offset = 0, size;
     DWORD off_hi;       /* upper 32 bits of offset */
     DWORD off_lo;       /* lower 32 bits of offset */
     DWORD size_hi;      /* upper 32 bits of size */
@@ -1172,9 +1195,9 @@
                                       "tagname",
                                       "access", "offset", NULL };
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziO", keywords,
+    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziL", keywords,
                                      &fileno, &map_size_obj,
-                                     &tagname, &access, &offset_obj)) {
+                                     &tagname, &access, &offset)) {
         return NULL;
     }
 
@@ -1199,9 +1222,11 @@
     map_size = _GetMapSize(map_size_obj, "size");
     if (map_size < 0)
         return NULL;
-    offset = _GetMapSize(offset_obj, "offset");
-    if (offset < 0)
+    if (offset < 0) {
+        PyErr_SetString(PyExc_OverflowError,
+            "memory mapped offset must be positive");
         return NULL;
+    }
 
     /* assume -1 and 0 both mean invalid filedescriptor
        to 'anonymously' map memory.
@@ -1265,28 +1290,26 @@
                 return PyErr_SetFromWindowsErr(dwErr);
             }
 
-#if SIZEOF_SIZE_T > 4
-            m_obj->size = (((size_t)high)<<32) + low;
-#else
-            if (high)
-                /* File is too large to map completely */
-                m_obj->size = (size_t)-1;
-            else
-                m_obj->size = low;
-#endif
-            if (offset >= m_obj->size) {
+            size = (((PY_LONG_LONG) high) << 32) + low;
+            if (offset >= size) {
                 PyErr_SetString(PyExc_ValueError,
                                 "mmap offset is greater than file size");
                 Py_DECREF(m_obj);
                 return NULL;
             }
-            m_obj->size -= offset;
+            if (offset - size > PY_SSIZE_T_MAX)
+                /* Map area too large to fit in memory */
+                m_obj->size = (Py_ssize_t) -1;
+            else
+                m_obj->size = (Py_ssize_t) (size - offset);
         } else {
             m_obj->size = map_size;
+            size = offset + map_size;
         }
     }
     else {
         m_obj->size = map_size;
+        size = offset + map_size;
     }
 
     /* set the initial position */
@@ -1307,22 +1330,10 @@
         m_obj->tagname = NULL;
 
     m_obj->access = (access_mode)access;
-    /* DWORD is a 4-byte int.  If we're on a box where size_t consumes
-     * more than 4 bytes, we need to break it apart.  Else (size_t
-     * consumes 4 bytes), C doesn't define what happens if we shift
-     * right by 32, so we need different code.
-     */
-#if SIZEOF_SIZE_T > 4
-    size_hi = (DWORD)((offset + m_obj->size) >> 32);
-    size_lo = (DWORD)((offset + m_obj->size) & 0xFFFFFFFF);
+    size_hi = (DWORD)(size >> 32);
+    size_lo = (DWORD)(size & 0xFFFFFFFF);
     off_hi = (DWORD)(offset >> 32);
     off_lo = (DWORD)(offset & 0xFFFFFFFF);
-#else
-    size_hi = 0;
-    size_lo = (DWORD)(offset + m_obj->size);
-    off_hi = 0;
-    off_lo = (DWORD)offset;
-#endif
     /* For files, it would be sufficient to pass 0 as size.
        For anonymous maps, we have to pass the size explicitly. */
     m_obj->map_handle = CreateFileMapping(m_obj->file_handle,

Modified: python/branches/release31-maint/Modules/zlibmodule.c
==============================================================================
--- python/branches/release31-maint/Modules/zlibmodule.c	(original)
+++ python/branches/release31-maint/Modules/zlibmodule.c	Tue Mar  1 00:48:16 2011
@@ -931,8 +931,18 @@
     /* Releasing the GIL for very small buffers is inefficient
        and may lower performance */
     if (pbuf.len > 1024*5) {
+        unsigned char *buf = pbuf.buf;
+        Py_ssize_t len = pbuf.len;
+
         Py_BEGIN_ALLOW_THREADS
-        adler32val = adler32(adler32val, pbuf.buf, pbuf.len);
+        /* Avoid truncation of length for very large buffers. adler32() takes
+           length as an unsigned int, which may be narrower than Py_ssize_t. */
+        while (len > (size_t) UINT_MAX) {
+            adler32val = adler32(adler32val, buf, UINT_MAX);
+            buf += (size_t) UINT_MAX;
+            len -= (size_t) UINT_MAX;
+        }
+        adler32val = adler32(adler32val, buf, len);
         Py_END_ALLOW_THREADS
     } else {
         adler32val = adler32(adler32val, pbuf.buf, pbuf.len);
@@ -959,8 +969,18 @@
     /* Releasing the GIL for very small buffers is inefficient
        and may lower performance */
     if (pbuf.len > 1024*5) {
+        unsigned char *buf = pbuf.buf;
+        Py_ssize_t len = pbuf.len;
+
         Py_BEGIN_ALLOW_THREADS
-        signed_val = crc32(crc32val, pbuf.buf, pbuf.len);
+        /* Avoid truncation of length for very large buffers. crc32() takes
+           length as an unsigned int, which may be narrower than Py_ssize_t. */
+        while (len > (size_t) UINT_MAX) {
+            crc32val = crc32(crc32val, buf, UINT_MAX);
+            buf += (size_t) UINT_MAX;
+            len -= (size_t) UINT_MAX;
+        }
+        signed_val = crc32(crc32val, buf, len);
         Py_END_ALLOW_THREADS
     } else {
         signed_val = crc32(crc32val, pbuf.buf, pbuf.len);


More information about the Python-checkins mailing list