[Patches] fix mmap module (borken seek, 64-bit stuff, overflows)

Trent Mick trentm@activestate.com
Thu, 1 Jun 2000 20:02:08 -0700


Discussion:

This patch fixes some issues in the mmap module. The changes are:
- The use of HFILE for Win32 is no longer appropriate for Win32/64,
  all of the associated system calls use a pointer length integer, hence HFILE
  is replaced with INT_PTR (a Windows world typedef for this)
  
- Do proper bounds checking limiting the length of a mmap'd file to [0,
  INT_MAX] (see _GetMapSize()). INT_MAX is chosen because this is the current
  restriction on the length of Python objects and a mmap'd file larger than
  this cannot be used effectively anyway. For instance a mmap'd file longer
  than INT_MAX cannot be sliced beyond INT_MAX, or indexed, etc. Until the
  limit on the size of Python objects increases then there is little use in
  having larger mmap'd files.
  
- The mmap_seek is fairly significantly broken (seeking from the current
  position or from the end are both borken, IIRC). This patch fixes that,
  checks for possible out of range seek values, and extends test_mmap.py to
  test this stuff.
  
- Use a C int for variables associated with the length field of Python
  objects, this is what it is and the distinction matters on Linux64, for
  example.
  
- Use safe number literals. (some unsigned)0xFFFFFFFF -> (some unsigned)-1
  The former does not give the intended result iff 'some unsigned' is >
  32-bits.
 

Legal:

I confirm that, to the best of my knowledge and belief, this
contribution is free of any claims of third parties under
copyright, patent or other rights or interests ("claims").  To
the extent that I have any such claims, I hereby grant to CNRI a
nonexclusive, irrevocable, royalty-free, worldwide license to
reproduce, distribute, perform and/or display publicly, prepare
derivative versions, and otherwise use this contribution as part
of the Python software and its related documentation, or any
derivative versions thereof, at no cost to CNRI or its licensed
users, and to authorize others to do so.

I acknowledge that CNRI may, at its sole discretion, decide
whether or not to incorporate this contribution in the Python
software and its related documentation.  I further grant CNRI
permission to use my name and other identifying information
provided to CNRI by me for use in connection with the Python
software and its related documentation.


Patch (use 'patch -p8'):

*** /home/trentm/main/contrib/python/dist/src/Modules/mmapmodule.c	Thu Jun  1 00:13:39 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Modules/mmapmodule.c	Wed May 31 23:54:18 2000
***************
*** 44,50 ****
  
  #ifdef MS_WIN32
  	HANDLE	map_handle;
! 	HFILE	file_handle;
  	char *	tagname;
  #endif
  
--- 44,50 ----
  
  #ifdef MS_WIN32
  	HANDLE	map_handle;
! 	INT_PTR	file_handle;
  	char *	tagname;
  #endif
  
***************
*** 118,124 ****
  	char value;
  	char * where;
  	CHECK_VALID(NULL);
! 	if (self->pos >= 0 && self->pos < self->size) {
  	        where = self->data + self->pos;
  		value = (char) *(where);
  		self->pos += 1;
--- 118,124 ----
  	char value;
  	char * where;
  	CHECK_VALID(NULL);
! 	if (self->pos < self->size) {
  	        where = self->data + self->pos;
  		value = (char) *(where);
  		self->pos += 1;
***************
*** 146,152 ****
  	else
  		++eol;		/* we're interested in the position after the
  				   newline. */
! 	result = PyString_FromStringAndSize(start, (long) (eol - start));
  	self->pos += (eol - start);
  	return (result);
  }
--- 146,152 ----
  	else
  		++eol;		/* we're interested in the position after the
  				   newline. */
! 	result = PyString_FromStringAndSize(start, (int) (eol - start));
  	self->pos += (eol - start);
  	return (result);
  }
***************
*** 175,186 ****
  mmap_find_method (mmap_object *self,
  		      PyObject *args)
  {
! 	long start = self->pos;
  	char * needle;
  	int len;
  
  	CHECK_VALID(NULL);
! 	if (!PyArg_ParseTuple (args, "s#|l", &needle, &len, &start)) {
  		return NULL;
  	} else {
  		char * p = self->data+self->pos;
--- 175,186 ----
  mmap_find_method (mmap_object *self,
  		      PyObject *args)
  {
! 	int start = self->pos;
  	char * needle;
  	int len;
  
  	CHECK_VALID(NULL);
! 	if (!PyArg_ParseTuple (args, "s#|i", &needle, &len, &start)) {
  		return NULL;
  	} else {
  		char * p = self->data+self->pos;
***************
*** 193,200 ****
  			}
  			if (!*n) {
  				return Py_BuildValue (
! 					"l",
! 					(long) (p - (self->data + start)));
  			}
  			p++;
  		}
--- 193,200 ----
  			}
  			if (!*n) {
  				return Py_BuildValue (
! 					"i",
! 					(int) (p - (self->data + start)));
  			}
  			p++;
  		}
***************
*** 246,252 ****
  	CHECK_VALID(NULL);
  
  #ifdef MS_WIN32
! 	if (self->file_handle != (HFILE) 0xFFFFFFFF) {
  		return (Py_BuildValue (
  			"l",
  			GetFileSize ((HANDLE)self->file_handle, NULL)));
--- 246,252 ----
  	CHECK_VALID(NULL);
  
  #ifdef MS_WIN32
! 	if (self->file_handle != (INT_PTR) -1) {
  		return (Py_BuildValue (
  			"l",
  			GetFileSize ((HANDLE)self->file_handle, NULL)));
***************
*** 383,421 ****
  static PyObject *
  mmap_seek_method (mmap_object * self, PyObject * args)
  {
! 	/* ptrdiff_t dist; */
! 	long dist;
  	int how=0;
  	CHECK_VALID(NULL);
! 	if (!PyArg_ParseTuple (args, "l|i", &dist, &how)) {
  		return(NULL);
  	} else {
! 		unsigned long where;
  		switch (how) {
! 		case 0:
  			where = dist;
  			break;
! 		case 1:
  			where = self->pos + dist;
  			break;
! 		case 2:
! 			where = self->size - dist;
  			break;
  		default:
  			PyErr_SetString (PyExc_ValueError,
  					 "unknown seek type");
  			return NULL;
  		}
! 		if ((where >= 0) && (where < (self->size))) {
! 			self->pos = where;
! 			Py_INCREF (Py_None);
! 			return (Py_None);
! 		} else {
! 			PyErr_SetString (PyExc_ValueError,
! 					 "seek out of range");
! 			return NULL;
! 		}
  	}
  }
  
  static PyObject *
--- 383,426 ----
  static PyObject *
  mmap_seek_method (mmap_object * self, PyObject * args)
  {
! 	int dist;
  	int how=0;
  	CHECK_VALID(NULL);
! 	if (!PyArg_ParseTuple (args, "i|i", &dist, &how)) {
  		return(NULL);
  	} else {
! 		size_t where;
  		switch (how) {
! 		case 0: /* relative to start */
! 			if (dist < 0)
! 				goto onoutofrange;
  			where = dist;
  			break;
! 		case 1: /* relative to current position */
! 			if ((int)self->pos + dist < 0)
! 				goto onoutofrange;
  			where = self->pos + dist;
  			break;
! 		case 2: /* relative to end */
! 			if ((int)self->size + dist < 0)
! 				goto onoutofrange;
! 			where = self->size + dist;
  			break;
  		default:
  			PyErr_SetString (PyExc_ValueError,
  					 "unknown seek type");
  			return NULL;
  		}
! 		if (where > self->size)
! 			goto onoutofrange;
! 		self->pos = where;
! 		Py_INCREF (Py_None);
! 		return (Py_None);
  	}
+ 
+ onoutofrange:
+ 	PyErr_SetString (PyExc_ValueError, "seek out of range");
+ 	return NULL;
  }
  
  static PyObject *
***************
*** 689,711 ****
  	0,					/*tp_doc*/
  };
  
  #ifdef UNIX 
  static PyObject *
  new_mmap_object (PyObject * self, PyObject * args, PyObject *kwdict)
  {
  	mmap_object * m_obj;
! 	unsigned long map_size;
  	int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ;
  	char * filename;
  	int namelen;
  	char *keywords[] = {"file", "size", "flags", "prot", NULL};
  
  	if (!PyArg_ParseTupleAndKeywords(args, kwdict, 
! 					 "il|ii", keywords, 
! 					 &fd, &map_size, &flags, &prot)
  		)
  		return NULL;
!   
  	m_obj = PyObject_New (mmap_object, &mmap_object_type);
  	if (m_obj == NULL) {return NULL;}
  	m_obj->size = (size_t) map_size;
--- 694,777 ----
  	0,					/*tp_doc*/
  };
  
+ 
+ /* extract the map size from the given PyObject
+ 
+    The map size is restricted to [0, INT_MAX] because this is the current
+    Python limitation on object sizes. Although the mmap object *could* handle
+    a larger map size, there is no point because all the useful operations
+    (len(), slicing(), sequence indexing) are limited by a C int.
+ 
+    Returns -1 on error, with an apprpriate Python exception raised. On
+    success, the map size is returned. */
+ static int
+ _GetMapSize(o)
+ 	PyObject *o;
+ {
+ 	if (PyInt_Check(o)) {
+ 		long i = PyInt_AsLong(o);
+ 		if (PyErr_Occurred())
+ 			return -1;
+ 		if (i < 0)
+ 			goto onnegoverflow;
+ 		if (i > INT_MAX)
+ 			goto onposoverflow;
+ 		return (int)i;
+ 	}
+ 	else if (PyLong_Check(o)) {
+ 		long i = PyLong_AsLong(o);
+ 		if (PyErr_Occurred()) {
+ 			/* yes negative overflow is mistaken for positive overflow
+ 			   but not worth the trouble to check sign of 'i' */
+ 			if (PyErr_ExceptionMatches(PyExc_OverflowError))
+ 				goto onposoverflow;
+ 			else
+ 				return -1;
+ 		}
+ 		if (i < 0)
+ 			goto onnegoverflow;
+ 		if (i > INT_MAX)
+ 			goto onposoverflow;
+ 		return (int)i;
+ 	}
+ 	else {
+ 		PyErr_SetString(PyExc_TypeError,
+ 			"map size must be an integral value");
+ 		return -1;
+ 	}
+ 
+ onnegoverflow:
+ 	PyErr_SetString(PyExc_OverflowError,
+ 		"memory mapped size must be positive");
+ 	return -1;
+ 
+ onposoverflow:
+ 	PyErr_SetString(PyExc_OverflowError,
+ 		"memory mapped size is too large (limited by C int)");
+ 	return -1;
+ }
+ 
  #ifdef UNIX 
  static PyObject *
  new_mmap_object (PyObject * self, PyObject * args, PyObject *kwdict)
  {
  	mmap_object * m_obj;
! 	PyObject *map_size_obj = NULL;
! 	int map_size;
  	int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ;
  	char * filename;
  	int namelen;
  	char *keywords[] = {"file", "size", "flags", "prot", NULL};
  
  	if (!PyArg_ParseTupleAndKeywords(args, kwdict, 
! 					 "iO|ii", keywords, 
! 					 &fd, &map_size_obj, &flags, &prot)
  		)
  		return NULL;
! 	map_size = _GetMapSize(map_size_obj);
! 	if (map_size < 0)
! 		return NULL;
! 	
  	m_obj = PyObject_New (mmap_object, &mmap_object_type);
  	if (m_obj == NULL) {return NULL;}
  	m_obj->size = (size_t) map_size;
***************
*** 728,751 ****
  new_mmap_object (PyObject * self, PyObject * args)
  {
  	mmap_object * m_obj;
! 	unsigned long map_size;
  	char * tagname = "";
  
  	DWORD dwErr = 0;
  	int fileno;
! 	HFILE fh = 0;
  
  	/* Patch the object type */
  	mmap_object_type.ob_type = &PyType_Type;
  
  	if (!PyArg_ParseTuple(args,
! 			  "il|z",
  			  &fileno,
! 			  &map_size,
  			  &tagname)
  		)
  		return NULL;
    
  	/* if an actual filename has been specified */
  	if (fileno != 0) {
  		fh = _get_osfhandle(fileno);
--- 794,822 ----
  new_mmap_object (PyObject * self, PyObject * args)
  {
  	mmap_object * m_obj;
! 	PyObject *map_size_obj = NULL;
! 	int map_size;
  	char * tagname = "";
  
  	DWORD dwErr = 0;
  	int fileno;
! 	INT_PTR fh = 0;
  
  	/* Patch the object type */
  	mmap_object_type.ob_type = &PyType_Type;
  
  	if (!PyArg_ParseTuple(args,
! 			  "iO|z",
  			  &fileno,
! 			  &map_size_obj,
  			  &tagname)
  		)
  		return NULL;
    
+ 	map_size = _GetMapSize(map_size_obj);
+ 	if (map_size < 0)
+ 		return NULL;
+ 	
  	/* if an actual filename has been specified */
  	if (fileno != 0) {
  		fh = _get_osfhandle(fileno);
***************
*** 768,774 ****
  		}
  	}
  	else {
! 		m_obj->file_handle = (HFILE) 0xFFFFFFFF;
  		m_obj->size = map_size;
  	}
  
--- 839,845 ----
  		}
  	}
  	else {
! 		m_obj->file_handle = (INT_PTR) -1;
  		m_obj->size = map_size;
  	}
  
*** /home/trentm/main/contrib/python/dist/src/Lib/test/test_mmap.py	Thu Jun  1 00:13:38 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Lib/test/test_mmap.py	Wed May 31 23:54:16 2000
***************
*** 58,64 ****
          
          assert start == PAGESIZE
          assert end == PAGESIZE + 6
!         
      m.close()
      os.unlink("foo")
      print ' Test passed'
--- 58,99 ----
          
          assert start == PAGESIZE
          assert end == PAGESIZE + 6
! 
!     # test seeking around (try to overflow the seek implementation)
!     m.seek(0,0)
!     print '  Seek to zeroth byte'
!     assert m.tell() == 0
!     m.seek(42,1)
!     print '  Seek to 42nd byte'
!     assert m.tell() == 42
!     m.seek(0,2)
!     print '  Seek to last byte'
!     assert m.tell() == len(m)
! 	
!     print '  Try to seek to negative position...'
!     try:
!         m.seek(-1)
!     except ValueError:
!         pass
!     else:
!         assert 0, 'expected a ValueError but did not get it'
! 
!     print '  Try to seek beyond end of mmap...'
!     try:
!         m.seek(1,2)
!     except ValueError:
!         pass
!     else:
!         assert 0, 'expected a ValueError but did not get it'
! 
!     print '  Try to seek to negative position...'
!     try:
!         m.seek(-len(m)-1,2)
!     except ValueError:
!         pass
!     else:
!         assert 0, 'expected a ValueError but did not get it'
! 
      m.close()
      os.unlink("foo")
      print ' Test passed'


-- 
Trent Mick
trentm@activestate.com