[Python-checkins] cpython: Issue #25318: Avoid sprintf() in backslashreplace()
victor.stinner
python-checkins at python.org
Thu Oct 8 21:38:50 EDT 2015
https://hg.python.org/cpython/rev/9cf89366bbcb
changeset: 98609:9cf89366bbcb
parent: 98607:e9c1404d6bd9
user: Victor Stinner <victor.stinner at gmail.com>
date: Fri Oct 09 03:17:30 2015 +0200
summary:
Issue #25318: Avoid sprintf() in backslashreplace()
Rewrite backslashreplace() to be closer to PyCodec_BackslashReplaceErrors().
Add also unit tests for non-BMP characters.
files:
Lib/test/test_codecs.py | 6 ++++--
Objects/unicodeobject.c | 27 +++++++++++++++++++--------
2 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3155,7 +3155,8 @@
('[\x80\xff\u20ac]', 'ignore', b'[]'),
('[\x80\xff\u20ac]', 'replace', b'[???]'),
('[\x80\xff\u20ac]', 'xmlcharrefreplace', b'[ÿ€]'),
- ('[\x80\xff\u20ac]', 'backslashreplace', b'[\\x80\\xff\\u20ac]'),
+ ('[\x80\xff\u20ac\U000abcde]', 'backslashreplace',
+ b'[\\x80\\xff\\u20ac\\U000abcde]'),
('[\udc80\udcff]', 'surrogateescape', b'[\x80\xff]'),
):
with self.subTest(data=data, error_handler=error_handler,
@@ -3197,7 +3198,8 @@
for data, error_handler, expected in (
('[\u20ac\udc80]', 'ignore', b'[]'),
('[\u20ac\udc80]', 'replace', b'[??]'),
- ('[\u20ac\udc80]', 'backslashreplace', b'[\\u20ac\\udc80]'),
+ ('[\u20ac\U000abcde]', 'backslashreplace',
+ b'[\\u20ac\\U000abcde]'),
('[\u20ac\udc80]', 'xmlcharrefreplace', b'[€]'),
('[\udc80\udcff]', 'surrogateescape', b'[\x80\xff]'),
):
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -610,14 +610,25 @@
/* generate replacement */
for (i = collstart; i < collend; ++i) {
ch = PyUnicode_READ(kind, data, i);
- if (ch < 0x100)
- str += sprintf(str, "\\x%02x", ch);
- else if (ch < 0x10000)
- str += sprintf(str, "\\u%04x", ch);
- else {
- assert(ch <= MAX_UNICODE);
- str += sprintf(str, "\\U%08x", ch);
- }
+ *str++ = '\\';
+ if (ch >= 0x00010000) {
+ *str++ = 'U';
+ *str++ = Py_hexdigits[(ch>>28)&0xf];
+ *str++ = Py_hexdigits[(ch>>24)&0xf];
+ *str++ = Py_hexdigits[(ch>>20)&0xf];
+ *str++ = Py_hexdigits[(ch>>16)&0xf];
+ *str++ = Py_hexdigits[(ch>>12)&0xf];
+ *str++ = Py_hexdigits[(ch>>8)&0xf];
+ }
+ else if (ch >= 0x100) {
+ *str++ = 'u';
+ *str++ = Py_hexdigits[(ch>>12)&0xf];
+ *str++ = Py_hexdigits[(ch>>8)&0xf];
+ }
+ else
+ *str++ = 'x';
+ *str++ = Py_hexdigits[(ch>>4)&0xf];
+ *str++ = Py_hexdigits[ch&0xf];
}
return str;
}
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list