[Python-checkins] r46037 - in python/trunk: Doc/lib/libfuncs.tex Lib/test/test_file.py Misc/NEWS Objects/fileobject.c

Thu May 18 09:01:31 CEST 2006

Author: georg.brandl
Date: Thu May 18 09:01:27 2006
New Revision: 46037

Modified:
   python/trunk/Doc/lib/libfuncs.tex
   python/trunk/Lib/test/test_file.py
   python/trunk/Misc/NEWS
   python/trunk/Objects/fileobject.c
Log:
Bug #1462152: file() now checks more thoroughly for invalid mode
strings and removes a possible "U" before passing the mode to the
C library function.


Modified: python/trunk/Doc/lib/libfuncs.tex
==============================================================================

--- python/trunk/Doc/lib/libfuncs.tex	(original)
+++ python/trunk/Doc/lib/libfuncs.tex	Thu May 18 09:01:27 2006
@@ -418,7 +418,7 @@
   that differentiate between binary and text files (else it is
   ignored).  If the file cannot be opened, \exception{IOError} is
   raised.
-
+  
   In addition to the standard \cfunction{fopen()} values \var{mode}
   may be \code{'U'} or \code{'rU'}. If Python is built with universal
   newline support (the default) the file is opened as a text file, but
@@ -434,6 +434,9 @@
   have yet been seen), \code{'\e n'}, \code{'\e r'}, \code{'\e r\e n'},
   or a tuple containing all the newline types seen.
 
+  Python enforces that the mode, after stripping \code{'U'}, begins with
+  \code{'r'}, \code{'w'} or \code{'a'}.
+
   If \var{mode} is omitted, it defaults to \code{'r'}.  When opening a
   binary file, you should append \code{'b'} to the \var{mode} value
   for improved portability.  (It's useful even on systems which don't
@@ -456,6 +459,9 @@
     determine whether this is the case.}
 
   \versionadded{2.2}
+
+  \versionchanged[Restriction on first letter of mode string
+                  introduced]{2.5}
 \end{funcdesc}
 
 \begin{funcdesc}{filter}{function, list}

Modified: python/trunk/Lib/test/test_file.py
==============================================================================
--- python/trunk/Lib/test/test_file.py	(original)
+++ python/trunk/Lib/test/test_file.py	Thu May 18 09:01:27 2006
@@ -136,7 +136,7 @@
 bad_mode = "qwerty"
 try:
     open(TESTFN, bad_mode)
-except IOError, msg:
+except ValueError, msg:
     if msg[0] != 0:
         s = str(msg)
         if s.find(TESTFN) != -1 or s.find(bad_mode) == -1:

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Thu May 18 09:01:27 2006
@@ -12,6 +12,10 @@
 Core and builtins
 -----------------
 
+- Bug #1462152: file() now checks more thoroughly for invalid mode
+  strings and removes a possible "U" before passing the mode to the
+  C library function.
+
 - Patch #1488312, Fix memory alignment problem on SPARC in unicode
 
 - Bug #1487966: Fix SystemError with conditional expression in assignment

Modified: python/trunk/Objects/fileobject.c
==============================================================================
--- python/trunk/Objects/fileobject.c	(original)
+++ python/trunk/Objects/fileobject.c	Thu May 18 09:01:27 2006
@@ -136,46 +136,45 @@
 /* check for known incorrect mode strings - problem is, platforms are
    free to accept any mode characters they like and are supposed to
    ignore stuff they don't understand... write or append mode with
-   universal newline support is expressly forbidden by PEP 278. */
+   universal newline support is expressly forbidden by PEP 278.
+   Additionally, remove the 'U' from the mode string as platforms
+   won't know what it is. */
 /* zero return is kewl - one is un-kewl */
 static int
-check_the_mode(char *mode)
+sanitize_the_mode(char *mode)
 {
+	char *upos;
 	size_t len = strlen(mode);
 
-	switch (len) {
-	case 0:
+	if (!len) {
 		PyErr_SetString(PyExc_ValueError, "empty mode string");
 		return 1;
+	}
 
-	/* reject wU, aU */
-	case 2:
-		switch (mode[0]) {
-		case 'w':
-		case 'a':
-			if (mode[1] == 'U') {
-				PyErr_SetString(PyExc_ValueError,
-						"invalid mode string");
-				return 1;
-			}
-			break;
+	upos = strchr(mode, 'U');
+	if (upos) {
+		memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
+
+		if (mode[0] == 'w' || mode[0] == 'a') {
+			PyErr_Format(PyExc_ValueError, "universal newline "
+			             "mode can only be used with modes "
+				     "starting with 'r'");
+			return 1;
 		}
-		break;
 
-	/* reject w+U, a+U, wU+, aU+ */
-	case 3:
-		switch (mode[0]) {
-		case 'w':
-		case 'a':
-			if ((mode[1] == '+' && mode[2] == 'U') ||
-			    (mode[1] == 'U' && mode[2] == '+')) {
-				PyErr_SetString(PyExc_ValueError,
-						"invalid mode string");
-				return 1;
-			}
-			break;
+		if (mode[0] != 'r') {
+			memmove(mode+1, mode, strlen(mode)+1);
+			mode[0] = 'r';
 		}
-		break;
+
+		if (!strchr(mode, 'b')) {
+			memmove(mode+2, mode+1, strlen(mode));
+			mode[1] = 'b';
+		}
+	} else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
+		PyErr_Format(PyExc_ValueError, "mode string must begin with "
+	        	    "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
+		return 1;
 	}
 
 	return 0;
@@ -184,6 +183,7 @@
 static PyObject *
 open_the_file(PyFileObject *f, char *name, char *mode)
 {
+	char *newmode;
 	assert(f != NULL);
 	assert(PyFile_Check(f));
 #ifdef MS_WINDOWS
@@ -195,8 +195,18 @@
 	assert(mode != NULL);
 	assert(f->f_fp == NULL);
 
-	if (check_the_mode(mode))
+	/* probably need to replace 'U' by 'rb' */
+	newmode = PyMem_MALLOC(strlen(mode) + 3);
+	if (!newmode) {
+		PyErr_NoMemory();
 		return NULL;
+	}
+	strcpy(newmode, mode);
+
+	if (sanitize_the_mode(newmode)) {
+		f = NULL;
+		goto cleanup;
+	}
 
 	/* rexec.py can't stop a user from getting the file() constructor --
 	   all they have to do is get *any* file object f, and then do
@@ -204,16 +214,15 @@
 	if (PyEval_GetRestricted()) {
 		PyErr_SetString(PyExc_IOError,
 		"file() constructor not accessible in restricted mode");
-		return NULL;
+		f = NULL;
+		goto cleanup;
 	}
 	errno = 0;
 
-	if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
-		mode = "rb";
 #ifdef MS_WINDOWS
 	if (PyUnicode_Check(f->f_name)) {
 		PyObject *wmode;
-		wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
+		wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
 		if (f->f_name && wmode) {
 			Py_BEGIN_ALLOW_THREADS
 			/* PyUnicode_AS_UNICODE OK without thread
@@ -227,7 +236,7 @@
 #endif
 	if (NULL == f->f_fp && NULL != name) {
 		Py_BEGIN_ALLOW_THREADS
-		f->f_fp = fopen(name, mode);
+		f->f_fp = fopen(name, newmode);
 		Py_END_ALLOW_THREADS
 	}
 
@@ -254,6 +263,10 @@
 	}
 	if (f != NULL)
 		f = dircheck(f);
+
+cleanup:
+	PyMem_FREE(newmode);
+
 	return (PyObject *)f;
 }