[Python-checkins] cpython: add unicode_char() in unicodeobject.c to factorize code

victor.stinner python-checkins at python.org
Fri Jan 3 13:01:01 CET 2014


http://hg.python.org/cpython/rev/d453c95def31
changeset:   88271:d453c95def31
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Fri Jan 03 12:53:47 2014 +0100
summary:
  add unicode_char() in unicodeobject.c to factorize code

files:
  Objects/unicodeobject.c |  86 ++++++++++------------------
  1 files changed, 31 insertions(+), 55 deletions(-)


diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1749,7 +1749,6 @@
     }
 }
 
-
 static PyObject*
 get_latin1_char(unsigned char ch)
 {
@@ -1766,6 +1765,31 @@
     return unicode;
 }
 
+static PyObject*
+unicode_char(Py_UCS4 ch)
+{
+    PyObject *unicode;
+
+    assert(ch <= MAX_UNICODE);
+
+    unicode = PyUnicode_New(1, ch);
+    if (unicode == NULL)
+        return NULL;
+    switch (PyUnicode_KIND(unicode)) {
+    case PyUnicode_1BYTE_KIND:
+        PyUnicode_1BYTE_DATA(unicode)[0] = (Py_UCS1)ch;
+        break;
+    case PyUnicode_2BYTE_KIND:
+        PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
+        break;
+    default:
+        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+        PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+    }
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+    return unicode;
+}
+
 PyObject *
 PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
 {
@@ -1964,22 +1988,8 @@
     if (size == 0)
         _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
-    if (size == 1) {
-        Py_UCS4 ch = u[0];
-        int kind;
-        void *data;
-        if (ch < 256)
-            return get_latin1_char((unsigned char)ch);
-
-        res = PyUnicode_New(1, ch);
-        if (res == NULL)
-            return NULL;
-        kind = PyUnicode_KIND(res);
-        data = PyUnicode_DATA(res);
-        PyUnicode_WRITE(kind, data, 0, ch);
-        assert(_PyUnicode_CheckConsistency(res, 1));
-        return res;
-    }
+    if (size == 1)
+        return unicode_char(u[0]);
 
     max_char = ucs2lib_find_max_char(u, u + size);
     res = PyUnicode_New(size, max_char);
@@ -2004,22 +2014,8 @@
     if (size == 0)
         _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
-    if (size == 1) {
-        Py_UCS4 ch = u[0];
-        int kind;
-        void *data;
-        if (ch < 256)
-            return get_latin1_char((unsigned char)ch);
-
-        res = PyUnicode_New(1, ch);
-        if (res == NULL)
-            return NULL;
-        kind = PyUnicode_KIND(res);
-        data = PyUnicode_DATA(res);
-        PyUnicode_WRITE(kind, data, 0, ch);
-        assert(_PyUnicode_CheckConsistency(res, 1));
-        return res;
-    }
+    if (size == 1)
+        return unicode_char(u[0]);
 
     max_char = ucs4lib_find_max_char(u, u + size);
     res = PyUnicode_New(size, max_char);
@@ -2887,17 +2883,7 @@
         return NULL;
     }
 
-    if ((Py_UCS4)ordinal < 256)
-        return get_latin1_char((unsigned char)ordinal);
-
-    v = PyUnicode_New(1, ordinal);
-    if (v == NULL)
-        return NULL;
-    kind = PyUnicode_KIND(v);
-    data = PyUnicode_DATA(v);
-    PyUnicode_WRITE(kind, data, 0, ordinal);
-    assert(_PyUnicode_CheckConsistency(v, 1));
-    return v;
+    return unicode_char((Py_UCS4)ordinal);
 }
 
 PyObject *
@@ -11354,17 +11340,7 @@
     kind = PyUnicode_KIND(self);
     data = PyUnicode_DATA(self);
     ch = PyUnicode_READ(kind, data, index);
-    if (ch < 256)
-        return get_latin1_char(ch);
-
-    res = PyUnicode_New(1, ch);
-    if (res == NULL)
-        return NULL;
-    kind = PyUnicode_KIND(res);
-    data = PyUnicode_DATA(res);
-    PyUnicode_WRITE(kind, data, 0, ch);
-    assert(_PyUnicode_CheckConsistency(res, 1));
-    return res;
+    return unicode_char(ch);
 }
 
 /* Believe it or not, this produces the same value for ASCII strings

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list