[Python-checkins] cpython (merge 3.2 -> default): merge 3.2 (#12732)

benjamin.peterson python-checkins at python.org
Sat Aug 13 05:18:31 CEST 2011


http://hg.python.org/cpython/rev/5af15f018e20
changeset:   71849:5af15f018e20
parent:      71847:0937a0986b66
parent:      71848:787ed1a7aba8
user:        Benjamin Peterson <benjamin at python.org>
date:        Fri Aug 12 22:18:19 2011 -0500
summary:
  merge 3.2 (#12732)

files:
  Lib/test/test_pep3131.py |   3 ++
  Lib/test/test_unicode.py |   1 +
  Misc/NEWS                |   3 ++
  Objects/unicodeobject.c  |  31 ++++++++++++++++++++-------
  4 files changed, 30 insertions(+), 8 deletions(-)


diff --git a/Lib/test/test_pep3131.py b/Lib/test/test_pep3131.py
--- a/Lib/test/test_pep3131.py
+++ b/Lib/test/test_pep3131.py
@@ -8,9 +8,12 @@
             ä = 1
             µ = 2 # this is a compatibility character
             蟒 = 3
+            𝔘𝔫𝔦𝔠𝔬𝔡𝔢  = 4
         self.assertEqual(getattr(T, "\xe4"), 1)
         self.assertEqual(getattr(T, "\u03bc"), 2)
         self.assertEqual(getattr(T, '\u87d2'), 3)
+        v = getattr(T, "\U0001d518\U0001d52b\U0001d526\U0001d520\U0001d52c\U0001d521\U0001d522")
+        self.assertEqual(v, 4)
 
     def test_invalid(self):
         try:
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -404,6 +404,7 @@
         self.assertTrue("bc".isidentifier())
         self.assertTrue("b_".isidentifier())
         self.assertTrue("µ".isidentifier())
+        self.assertTrue("𝔘𝔫𝔦𝔠𝔬𝔡𝔢".isidentifier())
 
         self.assertFalse(" ".isidentifier())
         self.assertFalse("[".isidentifier())
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #12732: In narrow unicode builds, allow Unicode identifiers which fall
+  outside the BMP.
+
 - Issue #12575: Validate user-generated AST before it is compiled.
 
 - Make type(None), type(Ellipsis), and type(NotImplemented) callable. They
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8044,14 +8044,30 @@
     return PyBool_FromLong(1);
 }
 
+static Py_UCS4
+decode_ucs4(const Py_UNICODE *s, Py_ssize_t *i, Py_ssize_t size)
+{
+    Py_UCS4 ch;
+    assert(*i < size);
+    ch = s[(*i)++];
+#ifndef Py_UNICODE_WIDE
+    if ((ch & 0xfffffc00) == 0xd800 &&
+        *i < size
+        && (s[*i] & 0xFFFFFC00) == 0xDC00)
+        ch = ((Py_UCS4)ch << 10UL) + (Py_UCS4)(s[(*i)++]) - 0x35fdc00;
+#endif
+    return ch;
+}
+
 int
 PyUnicode_IsIdentifier(PyObject *self)
 {
-    register const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
-    register const Py_UNICODE *e;
+    Py_ssize_t i = 0, size = PyUnicode_GET_SIZE(self);
+    Py_UCS4 first;
+    const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
 
     /* Special case for empty strings */
-    if (PyUnicode_GET_SIZE(self) == 0)
+    if (!size)
         return 0;
 
     /* PEP 3131 says that the first character must be in
@@ -8062,14 +8078,13 @@
        definition of XID_Start and XID_Continue, it is sufficient
        to check just for these, except that _ must be allowed
        as starting an identifier.  */
-    if (!_PyUnicode_IsXidStart(*p) && *p != 0x5F /* LOW LINE */)
+    first = decode_ucs4(p, &i, size);
+    if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
         return 0;
 
-    e = p + PyUnicode_GET_SIZE(self);
-    for (p++; p < e; p++) {
-        if (!_PyUnicode_IsXidContinue(*p))
+    while (i < size)
+        if (!_PyUnicode_IsXidContinue(decode_ucs4(p, &i, size)))
             return 0;
-    }
     return 1;
 }
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list