[Python-checkins] cpython: Unicode: optimize creating of 1-character strings
victor.stinner
python-checkins at python.org
Thu May 3 02:33:56 CEST 2012
http://hg.python.org/cpython/rev/3d83e2297166
changeset: 76722:3d83e2297166
user: Victor Stinner <victor.stinner at gmail.com>
date: Thu May 03 02:17:04 2012 +0200
summary:
Unicode: optimize creating of 1-character strings
files:
Objects/unicodeobject.c | 60 ++++++++++++++++++++++++----
1 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1919,8 +1919,18 @@
return unicode_empty;
}
assert(size > 0);
- if (size == 1 && u[0] < 256)
- return get_latin1_char((unsigned char)u[0]);
+ if (size == 1) {
+ Py_UCS4 ch = u[0];
+ if (ch < 256)
+ return get_latin1_char((unsigned char)ch);
+
+ res = PyUnicode_New(1, ch);
+ if (res == NULL)
+ return NULL;
+ PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return res;
+ }
max_char = ucs2lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char);
@@ -1947,8 +1957,18 @@
return unicode_empty;
}
assert(size > 0);
- if (size == 1 && u[0] < 256)
- return get_latin1_char((unsigned char)u[0]);
+ if (size == 1) {
+ Py_UCS4 ch = u[0];
+ if (ch < 256)
+ return get_latin1_char((unsigned char)ch);
+
+ res = PyUnicode_New(1, ch);
+ if (res == NULL)
+ return NULL;
+ PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return res;
+ }
max_char = ucs4lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char);
@@ -11368,10 +11388,33 @@
static PyObject *
unicode_getitem(PyObject *self, Py_ssize_t index)
{
- Py_UCS4 ch = PyUnicode_ReadChar(self, index);
- if (ch == (Py_UCS4)-1)
- return NULL;
- return PyUnicode_FromOrdinal(ch);
+ void *data;
+ enum PyUnicode_Kind kind;
+ Py_UCS4 ch;
+ PyObject *res;
+
+ if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) {
+ PyErr_BadArgument();
+ return NULL;
+ }
+ if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) {
+ PyErr_SetString(PyExc_IndexError, "string index out of range");
+ return NULL;
+ }
+ kind = PyUnicode_KIND(self);
+ data = PyUnicode_DATA(self);
+ ch = PyUnicode_READ(kind, data, index);
+ if (ch < 256)
+ return get_latin1_char(ch);
+
+ res = PyUnicode_New(1, ch);
+ if (res == NULL)
+ return NULL;
+ kind = PyUnicode_KIND(res);
+ data = PyUnicode_DATA(res);
+ PyUnicode_WRITE(kind, data, 0, ch);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return res;
}
/* Believe it or not, this produces the same value for ASCII strings
@@ -12039,7 +12082,6 @@
}
if (PyUnicode_IS_ASCII(self)) {
- kind = PyUnicode_KIND(self);
data = PyUnicode_1BYTE_DATA(self);
return unicode_fromascii(data + start, length);
}
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list