[Python-checkins] r73020 - in python/branches/py3k: Misc/NEWS Modules/python.c

martin.v.loewis python-checkins at python.org
Fri May 29 18:22:26 CEST 2009


Author: martin.v.loewis
Date: Fri May 29 18:22:26 2009
New Revision: 73020

Log:
Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
of the command line.


Modified:
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Modules/python.c

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Fri May 29 18:22:26 2009
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
+  of the command line.
+
 - Issue #6012: Add cleanup support to O& argument parsing.
 
 - Issue #6089: Fixed str.format with certain invalid field specifiers

Modified: python/branches/py3k/Modules/python.c
==============================================================================
--- python/branches/py3k/Modules/python.c	(original)
+++ python/branches/py3k/Modules/python.c	Fri May 29 18:22:26 2009
@@ -38,8 +38,16 @@
 		if (!res)
 			goto oom;
 		count = mbstowcs(res, arg, argsize+1);
-		if (count != (size_t)-1)
-			return res;
+		if (count != (size_t)-1) {
+			wchar_t *tmp;
+			/* Only use the result if it contains no
+			   surrogate characters. */
+			for (tmp = res; *tmp != 0 &&
+				     (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
+				;
+			if (*tmp == 0)
+				return res;
+		}
 		PyMem_Free(res);
 	}
 	/* Conversion failed. Fall back to escaping with surrogateescape. */
@@ -75,6 +83,14 @@
 			memset(&mbs, 0, sizeof mbs);
 			continue;
 		}
+		if (*out >= 0xd800 && *out <= 0xdfff) {
+			/* Surrogate character.  Escape the original
+			   byte sequence with surrogateescape. */
+			argsize -= converted;
+			while (converted--)
+				*out++ = 0xdc00 + *in++;
+			continue;
+		}
 		/* successfully converted some bytes */
 		in += converted;
 		argsize -= converted;


More information about the Python-checkins mailing list