[Python-checkins] r63846 - in python/trunk: Doc/c-api/file.rst Doc/library/stdtypes.rst Doc/using/cmdline.rst Include/fileobject.h Lib/test/test_sys.py Misc/NEWS Modules/main.c Objects/fileobject.c Python/pythonrun.c Python/sysmodule.c

martin.v.loewis python-checkins at python.org
Sun Jun 1 09:20:47 CEST 2008


Author: martin.v.loewis
Date: Sun Jun  1 09:20:46 2008
New Revision: 63846

Log:
New environment variable PYTHONIOENCODING.


Modified:
   python/trunk/Doc/c-api/file.rst
   python/trunk/Doc/library/stdtypes.rst
   python/trunk/Doc/using/cmdline.rst
   python/trunk/Include/fileobject.h
   python/trunk/Lib/test/test_sys.py
   python/trunk/Misc/NEWS
   python/trunk/Modules/main.c
   python/trunk/Objects/fileobject.c
   python/trunk/Python/pythonrun.c
   python/trunk/Python/sysmodule.c

Modified: python/trunk/Doc/c-api/file.rst
==============================================================================
--- python/trunk/Doc/c-api/file.rst	(original)
+++ python/trunk/Doc/c-api/file.rst	Sun Jun  1 09:20:46 2008
@@ -130,6 +130,14 @@
    .. versionadded:: 2.3
 
 
+.. cfunction:: int PyFile_SetEncodingAndErrors(PyFileObject *p, const char *enc, *errors)
+
+   Set the file's encoding for Unicode output to *enc*, and its error
+   mode to *err*. Return 1 on success and 0 on failure.
+
+   .. versionadded:: 2.6
+
+
 .. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag)
 
    .. index:: single: softspace (file attribute)

Modified: python/trunk/Doc/library/stdtypes.rst
==============================================================================
--- python/trunk/Doc/library/stdtypes.rst	(original)
+++ python/trunk/Doc/library/stdtypes.rst	Sun Jun  1 09:20:46 2008
@@ -2165,6 +2165,13 @@
    .. versionadded:: 2.3
 
 
+.. attribute:: file.errors
+
+   The Unicode error handler used to along with the encoding.
+
+   .. versionadded:: 2.6
+
+
 .. attribute:: file.mode
 
    The I/O mode for the file.  If the file was created using the :func:`open`

Modified: python/trunk/Doc/using/cmdline.rst
==============================================================================
--- python/trunk/Doc/using/cmdline.rst	(original)
+++ python/trunk/Doc/using/cmdline.rst	Sun Jun  1 09:20:46 2008
@@ -481,6 +481,13 @@
 
    .. versionadded:: 2.6
 
+.. envvar:: PYTHONIOENCODING
+
+   Overrides the encoding used for stdin/stdout/stderr, in the syntax
+   encodingname:errorhandler, with the :errors part being optional.
+
+   .. versionadded:: 2.6
+
 
 .. envvar:: PYTHONNOUSERSITE
 

Modified: python/trunk/Include/fileobject.h
==============================================================================
--- python/trunk/Include/fileobject.h	(original)
+++ python/trunk/Include/fileobject.h	Sun Jun  1 09:20:46 2008
@@ -24,6 +24,7 @@
 	int f_newlinetypes;	/* Types of newlines seen */
 	int f_skipnextlf;	/* Skip next \n */
 	PyObject *f_encoding;
+	PyObject *f_errors;
 	PyObject *weakreflist; /* List of weak references */
 	int unlocked_count;	/* Num. currently running sections of code
 				   using f_fp with the GIL released. */
@@ -37,6 +38,7 @@
 PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
 PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
 PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
+PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors);
 PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
                                              int (*)(FILE *));
 PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);

Modified: python/trunk/Lib/test/test_sys.py
==============================================================================
--- python/trunk/Lib/test/test_sys.py	(original)
+++ python/trunk/Lib/test/test_sys.py	Sun Jun  1 09:20:46 2008
@@ -385,6 +385,26 @@
 ##        self.assert_(r[0][2] > 100, r[0][2])
 ##        self.assert_(r[1][2] > 100, r[1][2])
 
+    def test_ioencoding(self):
+        import subprocess,os
+        env = dict(os.environ)
+
+        # Test character: cent sign, encoded as 0x4A (ASCII J) in CP424,
+        # not representable in ASCII.
+
+        env["PYTHONIOENCODING"] = "cp424"
+        p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+                             stdout = subprocess.PIPE, env=env)
+        out = p.stdout.read().strip()
+        self.assertEqual(out, unichr(0xa2).encode("cp424"))
+
+        env["PYTHONIOENCODING"] = "ascii:replace"
+        p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+                             stdout = subprocess.PIPE, env=env)
+        out = p.stdout.read().strip()
+        self.assertEqual(out, '?')
+
+
 def test_main():
     test.test_support.run_unittest(SysModuleTest)
 

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sun Jun  1 09:20:46 2008
@@ -12,6 +12,8 @@
 Core and Builtins
 -----------------
 
+- New environment variable PYTHONIOENCODING.
+
 - Patch #2488: Add sys.maxsize.
 
 - Issue #2353: file.xreadlines() now emits a Py3k warning.

Modified: python/trunk/Modules/main.c
==============================================================================
--- python/trunk/Modules/main.c	(original)
+++ python/trunk/Modules/main.c	Sun Jun  1 09:20:46 2008
@@ -99,6 +99,7 @@
 PYTHONHOME   : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
                The default module search path uses %s.\n\
 PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
+PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
 ";
 
 

Modified: python/trunk/Objects/fileobject.c
==============================================================================
--- python/trunk/Objects/fileobject.c	(original)
+++ python/trunk/Objects/fileobject.c	Sun Jun  1 09:20:46 2008
@@ -155,6 +155,7 @@
 	Py_DECREF(f->f_name);
 	Py_DECREF(f->f_mode);
 	Py_DECREF(f->f_encoding);
+	Py_DECREF(f->f_errors);
 
         Py_INCREF(name);
         f->f_name = name;
@@ -170,6 +171,8 @@
 	f->f_skipnextlf = 0;
 	Py_INCREF(Py_None);
 	f->f_encoding = Py_None;
+	Py_INCREF(Py_None);
+	f->f_errors = Py_None;
 
 	if (f->f_mode == NULL)
 		return NULL;
@@ -435,19 +438,38 @@
 }
 
 /* Set the encoding used to output Unicode strings.
-   Returh 1 on success, 0 on failure. */
+   Return 1 on success, 0 on failure. */
 
 int
 PyFile_SetEncoding(PyObject *f, const char *enc)
 {
+	return PyFile_SetEncodingAndErrors(f, enc, NULL);
+}
+
+int
+PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
+{
 	PyFileObject *file = (PyFileObject*)f;
-	PyObject *str = PyBytes_FromString(enc);
+	PyObject *str, *oerrors;
 
 	assert(PyFile_Check(f));
+	str = PyBytes_FromString(enc);
 	if (!str)
 		return 0;
+	if (errors) {
+		oerrors = PyString_FromString(errors);
+		if (!oerrors) {
+			Py_DECREF(str);
+			return 0;
+		}
+	} else {
+		oerrors = Py_None;
+		Py_INCREF(Py_None);
+	}
 	Py_DECREF(file->f_encoding);
 	file->f_encoding = str;
+	Py_DECREF(file->f_errors);
+	file->f_errors = oerrors;
 	return 1;
 }
 
@@ -491,6 +513,7 @@
 	Py_XDECREF(f->f_name);
 	Py_XDECREF(f->f_mode);
 	Py_XDECREF(f->f_encoding);
+	Py_XDECREF(f->f_errors);
 	drop_readahead(f);
 	Py_TYPE(f)->tp_free((PyObject *)f);
 }
@@ -1879,6 +1902,8 @@
 	 "file name"},
 	{"encoding",	T_OBJECT,	OFF(f_encoding),	RO,
 	 "file encoding"},
+	{"errors",	T_OBJECT,	OFF(f_errors),	RO,
+	 "Unicode error handler"},
 	/* getattr(f, "closed") is implemented without this table */
 	{NULL}	/* Sentinel */
 };
@@ -2093,6 +2118,8 @@
 		((PyFileObject *)self)->f_mode = not_yet_string;
 		Py_INCREF(Py_None);
 		((PyFileObject *)self)->f_encoding = Py_None;
+		Py_INCREF(Py_None);
+		((PyFileObject *)self)->f_errors = Py_None;
 		((PyFileObject *)self)->weakreflist = NULL;
 		((PyFileObject *)self)->unlocked_count = 0;
 	}
@@ -2295,7 +2322,9 @@
                 if ((flags & Py_PRINT_RAW) &&
 		    PyUnicode_Check(v) && enc != Py_None) {
 			char *cenc = PyBytes_AS_STRING(enc);
-			value = PyUnicode_AsEncodedString(v, cenc, "strict");
+			char *errors = fobj->f_errors == Py_None ? 
+			  "strict" : PyBytes_AS_STRING(fobj->f_errors);
+			value = PyUnicode_AsEncodedString(v, cenc, errors);
 			if (value == NULL)
 				return -1;
 		} else {

Modified: python/trunk/Python/pythonrun.c
==============================================================================
--- python/trunk/Python/pythonrun.c	(original)
+++ python/trunk/Python/pythonrun.c	Sun Jun  1 09:20:46 2008
@@ -132,11 +132,20 @@
 	PyThreadState *tstate;
 	PyObject *bimod, *sysmod;
 	char *p;
+	char *icodeset; /* On Windows, input codeset may theoretically 
+			   differ from output codeset. */
+	char *codeset = NULL;
+	char *errors = NULL;
+	int free_codeset = 0;
+	int overridden = 0;
 #if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
-	char *codeset;
-	char *saved_locale;
+	char *saved_locale, *loc_codeset;
 	PyObject *sys_stream, *sys_isatty;
 #endif
+#ifdef MS_WINDOWS
+	char ibuf[128];
+	char buf[128];
+#endif
 	extern void _Py_ReadyTypes(void);
 
 	if (initialized)
@@ -238,38 +247,75 @@
 	_PyGILState_Init(interp, tstate);
 #endif /* WITH_THREAD */
 
+	if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') {
+		p = icodeset = codeset = strdup(p);
+		free_codeset = 1;
+		errors = strchr(p, ':');
+		if (errors) {
+			*errors = '\0';
+			errors++;
+		}
+		overridden = 1;
+	}
+
 #if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
 	/* On Unix, set the file system encoding according to the
 	   user's preference, if the CODESET names a well-known
 	   Python codec, and Py_FileSystemDefaultEncoding isn't
 	   initialized by other means. Also set the encoding of
-	   stdin and stdout if these are terminals.  */
+	   stdin and stdout if these are terminals, unless overridden.  */
 
-	saved_locale = strdup(setlocale(LC_CTYPE, NULL));
-	setlocale(LC_CTYPE, "");
-	codeset = nl_langinfo(CODESET);
-	if (codeset && *codeset) {
-		PyObject *enc = PyCodec_Encoder(codeset);
-		if (enc) {
-			codeset = strdup(codeset);
-			Py_DECREF(enc);
-		} else {
-			codeset = NULL;
-			PyErr_Clear();
+	if (!overridden || !Py_FileSystemDefaultEncoding) {
+		saved_locale = strdup(setlocale(LC_CTYPE, NULL));
+		setlocale(LC_CTYPE, "");
+		loc_codeset = nl_langinfo(CODESET);
+		if (loc_codeset && *loc_codeset) {
+			PyObject *enc = PyCodec_Encoder(loc_codeset);
+			if (enc) {
+				loc_codeset = strdup(loc_codeset);
+				Py_DECREF(enc);
+			} else {
+				loc_codeset = NULL;
+				PyErr_Clear();
+			}
+		} else
+			loc_codeset = NULL;
+		setlocale(LC_CTYPE, saved_locale);
+		free(saved_locale);
+
+		if (!overridden) {
+			codeset = icodeset = loc_codeset;
+			free_codeset = 1;
+		}
+
+		/* Initialize Py_FileSystemDefaultEncoding from
+		   locale even if PYTHONIOENCODING is set. */
+		if (!Py_FileSystemDefaultEncoding) {
+			Py_FileSystemDefaultEncoding = loc_codeset;
+			if (!overridden)
+				free_codeset = 0;
 		}
-	} else
-		codeset = NULL;
-	setlocale(LC_CTYPE, saved_locale);
-	free(saved_locale);
+	}
+#endif
+
+#ifdef MS_WINDOWS
+	if (!overridden) {
+		icodeset = ibuf;
+		encoding = buf;
+		sprintf(ibuf, "cp%d", GetConsoleCP());
+		sprintf(buf, "cp%d", GetConsoleOutputCP());
+	}
+#endif
 
 	if (codeset) {
 		sys_stream = PySys_GetObject("stdin");
 		sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
 		if (!sys_isatty)
 			PyErr_Clear();
-		if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+		if ((overridden ||
+		     (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
 		   PyFile_Check(sys_stream)) {
-			if (!PyFile_SetEncoding(sys_stream, codeset))
+			if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors))
 				Py_FatalError("Cannot set codeset of stdin");
 		}
 		Py_XDECREF(sys_isatty);
@@ -278,9 +324,10 @@
 		sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
 		if (!sys_isatty)
 			PyErr_Clear();
-		if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+		if ((overridden || 
+		     (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
 		   PyFile_Check(sys_stream)) {
-			if (!PyFile_SetEncoding(sys_stream, codeset))
+			if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
 				Py_FatalError("Cannot set codeset of stdout");
 		}
 		Py_XDECREF(sys_isatty);
@@ -289,19 +336,17 @@
 		sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
 		if (!sys_isatty)
 			PyErr_Clear();
-		if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+		if((overridden || 
+		    (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
 		   PyFile_Check(sys_stream)) {
-			if (!PyFile_SetEncoding(sys_stream, codeset))
+			if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
 				Py_FatalError("Cannot set codeset of stderr");
 		}
 		Py_XDECREF(sys_isatty);
 
-		if (!Py_FileSystemDefaultEncoding)
-			Py_FileSystemDefaultEncoding = codeset;
-		else
+		if (free_codeset)
 			free(codeset);
 	}
-#endif
 }
 
 void

Modified: python/trunk/Python/sysmodule.c
==============================================================================
--- python/trunk/Python/sysmodule.c	(original)
+++ python/trunk/Python/sysmodule.c	Sun Jun  1 09:20:46 2008
@@ -1232,9 +1232,6 @@
 	PyObject *m, *v, *sysdict;
 	PyObject *sysin, *sysout, *syserr;
 	char *s;
-#ifdef MS_WINDOWS
-	char buf[128];
-#endif
 
 	m = Py_InitModule3("sys", sys_methods, sys_doc);
 	if (m == NULL)
@@ -1272,23 +1269,6 @@
 	syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush);
 	if (PyErr_Occurred())
 		return NULL;
-#ifdef MS_WINDOWS
-	if(isatty(_fileno(stdin)) && PyFile_Check(sysin)) {
-		sprintf(buf, "cp%d", GetConsoleCP());
-		if (!PyFile_SetEncoding(sysin, buf))
-			return NULL;
-	}
-	if(isatty(_fileno(stdout)) && PyFile_Check(sysout)) {
-		sprintf(buf, "cp%d", GetConsoleOutputCP());
-		if (!PyFile_SetEncoding(sysout, buf))
-			return NULL;
-	}
-	if(isatty(_fileno(stderr)) && PyFile_Check(syserr)) {
-		sprintf(buf, "cp%d", GetConsoleOutputCP());
-		if (!PyFile_SetEncoding(syserr, buf))
-			return NULL;
-	}
-#endif
 
 	PyDict_SetItemString(sysdict, "stdin", sysin);
 	PyDict_SetItemString(sysdict, "stdout", sysout);


More information about the Python-checkins mailing list