[Python-checkins] cpython: Issue #23573: Increased performance of string search operations (str.find,

serhiy.storchaka python-checkins at python.org
Tue Mar 24 20:58:14 CET 2015


https://hg.python.org/cpython/rev/6db9d7c1be29
changeset:   95176:6db9d7c1be29
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Tue Mar 24 21:55:47 2015 +0200
summary:
  Issue #23573: Increased performance of string search operations (str.find,
str.index, str.count, the in operator, str.split, str.partition) with
arguments of different kinds (UCS1, UCS2, UCS4).

files:
  Misc/NEWS                      |    4 +
  Objects/bytearrayobject.c      |   35 +-
  Objects/bytesobject.c          |   35 +-
  Objects/stringlib/fastsearch.h |    4 +-
  Objects/stringlib/find.h       |   23 +-
  Objects/unicodeobject.c        |  368 +++++++++++---------
  6 files changed, 262 insertions(+), 207 deletions(-)


diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #23573: Increased performance of string search operations (str.find,
+  str.index, str.count, the in operator, str.split, str.partition) with
+  arguments of different kinds (UCS1, UCS2, UCS4).
+
 - Issue #23753: Python doesn't support anymore platforms without stat() or
   fstat(), these functions are always required.
 
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -1142,7 +1142,7 @@
     char byte;
     Py_buffer subbuf;
     const char *sub;
-    Py_ssize_t sub_len;
+    Py_ssize_t len, sub_len;
     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
     Py_ssize_t res;
 
@@ -1161,15 +1161,30 @@
         sub = &byte;
         sub_len = 1;
     }
-
-    if (dir > 0)
-        res = stringlib_find_slice(
-            PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
-            sub, sub_len, start, end);
-    else
-        res = stringlib_rfind_slice(
-            PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
-            sub, sub_len, start, end);
+    len = PyByteArray_GET_SIZE(self);
+
+    ADJUST_INDICES(start, end, len);
+    if (end - start < sub_len)
+        res = -1;
+    else if (sub_len == 1) {
+        unsigned char needle = *sub;
+        int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
+        res = stringlib_fastsearch_memchr_1char(
+            PyByteArray_AS_STRING(self) + start, end - start,
+            needle, needle, mode);
+        if (res >= 0)
+            res += start;
+    }
+    else {
+        if (dir > 0)
+            res = stringlib_find_slice(
+                PyByteArray_AS_STRING(self), len,
+                sub, sub_len, start, end);
+        else
+            res = stringlib_rfind_slice(
+                PyByteArray_AS_STRING(self), len,
+                sub, sub_len, start, end);
+    }
 
     if (subobj)
         PyBuffer_Release(&subbuf);
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1914,7 +1914,7 @@
     char byte;
     Py_buffer subbuf;
     const char *sub;
-    Py_ssize_t sub_len;
+    Py_ssize_t len, sub_len;
     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
     Py_ssize_t res;
 
@@ -1933,15 +1933,30 @@
         sub = &byte;
         sub_len = 1;
     }
-
-    if (dir > 0)
-        res = stringlib_find_slice(
-            PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
-            sub, sub_len, start, end);
-    else
-        res = stringlib_rfind_slice(
-            PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
-            sub, sub_len, start, end);
+    len = PyBytes_GET_SIZE(self);
+
+    ADJUST_INDICES(start, end, len);
+    if (end - start < sub_len)
+        res = -1;
+    else if (sub_len == 1) {
+        unsigned char needle = *sub;
+        int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
+        res = stringlib_fastsearch_memchr_1char(
+            PyBytes_AS_STRING(self) + start, end - start,
+            needle, needle, mode);
+        if (res >= 0)
+            res += start;
+    }
+    else {
+        if (dir > 0)
+            res = stringlib_find_slice(
+                PyBytes_AS_STRING(self), len,
+                sub, sub_len, start, end);
+        else
+            res = stringlib_rfind_slice(
+                PyBytes_AS_STRING(self), len,
+                sub, sub_len, start, end);
+    }
 
     if (subobj)
         PyBuffer_Release(&subbuf);
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -36,7 +36,7 @@
 Py_LOCAL_INLINE(Py_ssize_t)
 STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
                                    STRINGLIB_CHAR ch, unsigned char needle,
-                                   Py_ssize_t maxcount, int mode)
+                                   int mode)
 {
     if (mode == FAST_SEARCH) {
         const STRINGLIB_CHAR *ptr = s;
@@ -115,7 +115,7 @@
             if (needle != 0)
 #endif
                 return STRINGLIB(fastsearch_memchr_1char)
-                       (s, n, p[0], needle, maxcount, mode);
+                       (s, n, p[0], needle, mode);
         }
         if (mode == FAST_COUNT) {
             for (i = 0; i < n; i++)
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -11,8 +11,7 @@
 {
     Py_ssize_t pos;
 
-    if (str_len < 0)
-        return -1;
+    assert(str_len >= 0);
     if (sub_len == 0)
         return offset;
 
@@ -31,8 +30,7 @@
 {
     Py_ssize_t pos;
 
-    if (str_len < 0)
-        return -1;
+    assert(str_len >= 0);
     if (sub_len == 0)
         return str_len + offset;
 
@@ -44,27 +42,11 @@
     return pos;
 }
 
-/* helper macro to fixup start/end slice values */
-#define ADJUST_INDICES(start, end, len)         \
-    if (end > len)                              \
-        end = len;                              \
-    else if (end < 0) {                         \
-        end += len;                             \
-        if (end < 0)                            \
-            end = 0;                            \
-    }                                           \
-    if (start < 0) {                            \
-        start += len;                           \
-        if (start < 0)                          \
-            start = 0;                          \
-    }
-
 Py_LOCAL_INLINE(Py_ssize_t)
 STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
                      const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
                      Py_ssize_t start, Py_ssize_t end)
 {
-    ADJUST_INDICES(start, end, str_len);
     return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
 }
 
@@ -73,7 +55,6 @@
                       const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
                       Py_ssize_t start, Py_ssize_t end)
 {
-    ADJUST_INDICES(start, end, str_len);
     return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
 }
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -641,7 +641,7 @@
 static PyObject *
 fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s));
 
-Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
+Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
                                      Py_ssize_t size, Py_UCS4 ch,
                                      int direction)
 {
@@ -8959,35 +8959,61 @@
 
 /* --- Helpers ------------------------------------------------------------ */
 
+/* helper macro to fixup start/end slice values */
+#define ADJUST_INDICES(start, end, len)         \
+    if (end > len)                              \
+        end = len;                              \
+    else if (end < 0) {                         \
+        end += len;                             \
+        if (end < 0)                            \
+            end = 0;                            \
+    }                                           \
+    if (start < 0) {                            \
+        start += len;                           \
+        if (start < 0)                          \
+            start = 0;                          \
+    }
+
 static Py_ssize_t
 any_find_slice(int direction, PyObject* s1, PyObject* s2,
                Py_ssize_t start,
                Py_ssize_t end)
 {
-    int kind1, kind2, kind;
+    int kind1, kind2;
     void *buf1, *buf2;
     Py_ssize_t len1, len2, result;
 
     kind1 = PyUnicode_KIND(s1);
     kind2 = PyUnicode_KIND(s2);
-    kind = kind1 > kind2 ? kind1 : kind2;
+    if (kind1 < kind2)
+        return -1;
+
+    len1 = PyUnicode_GET_LENGTH(s1);
+    len2 = PyUnicode_GET_LENGTH(s2);
+    ADJUST_INDICES(start, end, len1);
+    if (end - start < len2)
+        return -1;
+
     buf1 = PyUnicode_DATA(s1);
     buf2 = PyUnicode_DATA(s2);
-    if (kind1 != kind)
-        buf1 = _PyUnicode_AsKind(s1, kind);
-    if (!buf1)
-        return -2;
-    if (kind2 != kind)
-        buf2 = _PyUnicode_AsKind(s2, kind);
-    if (!buf2) {
-        if (kind1 != kind) PyMem_Free(buf1);
-        return -2;
-    }
-    len1 = PyUnicode_GET_LENGTH(s1);
-    len2 = PyUnicode_GET_LENGTH(s2);
+    if (len2 == 1) {
+        Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0);
+        result = findchar((const char *)buf1 + kind1*start,
+                          kind1, end - start, ch, direction);
+        if (result == -1)
+            return -1;
+        else
+            return start + result;
+    }
+
+    if (kind2 != kind1) {
+        buf2 = _PyUnicode_AsKind(s2, kind1);
+        if (!buf2)
+            return -2;
+    }
 
     if (direction > 0) {
-        switch (kind) {
+        switch (kind1) {
         case PyUnicode_1BYTE_KIND:
             if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
                 result = asciilib_find_slice(buf1, len1, buf2, len2, start, end);
@@ -9005,7 +9031,7 @@
         }
     }
     else {
-        switch (kind) {
+        switch (kind1) {
         case PyUnicode_1BYTE_KIND:
             if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
                 result = asciilib_rfind_slice(buf1, len1, buf2, len2, start, end);
@@ -9023,9 +9049,7 @@
         }
     }
 
-    if (kind1 != kind)
-        PyMem_Free(buf1);
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
 
     return result;
@@ -9115,21 +9139,6 @@
 }
 
 
-/* helper macro to fixup start/end slice values */
-#define ADJUST_INDICES(start, end, len)         \
-    if (end > len)                              \
-        end = len;                              \
-    else if (end < 0) {                         \
-        end += len;                             \
-        if (end < 0)                            \
-            end = 0;                            \
-    }                                           \
-    if (start < 0) {                            \
-        start += len;                           \
-        if (start < 0)                          \
-            start = 0;                          \
-    }
-
 Py_ssize_t
 PyUnicode_Count(PyObject *str,
                 PyObject *substr,
@@ -9139,7 +9148,7 @@
     Py_ssize_t result;
     PyObject* str_obj;
     PyObject* sub_obj;
-    int kind1, kind2, kind;
+    int kind1, kind2;
     void *buf1 = NULL, *buf2 = NULL;
     Py_ssize_t len1, len2;
 
@@ -9159,24 +9168,30 @@
 
     kind1 = PyUnicode_KIND(str_obj);
     kind2 = PyUnicode_KIND(sub_obj);
-    kind = kind1;
+    if (kind1 < kind2) {
+        Py_DECREF(sub_obj);
+        Py_DECREF(str_obj);
+        return 0;
+    }
+
+    len1 = PyUnicode_GET_LENGTH(str_obj);
+    len2 = PyUnicode_GET_LENGTH(sub_obj);
+    ADJUST_INDICES(start, end, len1);
+    if (end - start < len2) {
+        Py_DECREF(sub_obj);
+        Py_DECREF(str_obj);
+        return 0;
+    }
+
     buf1 = PyUnicode_DATA(str_obj);
     buf2 = PyUnicode_DATA(sub_obj);
-    if (kind2 != kind) {
-        if (kind2 > kind) {
-            Py_DECREF(sub_obj);
-            Py_DECREF(str_obj);
-            return 0;
-        }
-        buf2 = _PyUnicode_AsKind(sub_obj, kind);
-    }
-    if (!buf2)
-        goto onError;
-    len1 = PyUnicode_GET_LENGTH(str_obj);
-    len2 = PyUnicode_GET_LENGTH(sub_obj);
-
-    ADJUST_INDICES(start, end, len1);
-    switch (kind) {
+    if (kind2 != kind1) {
+        buf2 = _PyUnicode_AsKind(sub_obj, kind1);
+        if (!buf2)
+            goto onError;
+    }
+
+    switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj))
             result = asciilib_count(
@@ -9208,14 +9223,14 @@
     Py_DECREF(sub_obj);
     Py_DECREF(str_obj);
 
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
 
     return result;
   onError:
     Py_DECREF(sub_obj);
     Py_DECREF(str_obj);
-    if (kind2 != kind && buf2)
+    if (kind2 != kind1 && buf2)
         PyMem_Free(buf2);
     return -1;
 }
@@ -9268,6 +9283,8 @@
     }
     if (end > PyUnicode_GET_LENGTH(str))
         end = PyUnicode_GET_LENGTH(str);
+    if (start >= end)
+        return -1;
     kind = PyUnicode_KIND(str);
     result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
                       kind, end-start, ch, direction);
@@ -10014,7 +10031,7 @@
       PyObject *substring,
       Py_ssize_t maxcount)
 {
-    int kind1, kind2, kind;
+    int kind1, kind2;
     void *buf1, *buf2;
     Py_ssize_t len1, len2;
     PyObject* out;
@@ -10058,23 +10075,25 @@
 
     kind1 = PyUnicode_KIND(self);
     kind2 = PyUnicode_KIND(substring);
-    kind = kind1 > kind2 ? kind1 : kind2;
+    len1 = PyUnicode_GET_LENGTH(self);
+    len2 = PyUnicode_GET_LENGTH(substring);
+    if (kind1 < kind2 || len1 < len2) {
+        out = PyList_New(1);
+        if (out == NULL)
+            return NULL;
+        Py_INCREF(self);
+        PyList_SET_ITEM(out, 0, self);
+        return out;
+    }
     buf1 = PyUnicode_DATA(self);
     buf2 = PyUnicode_DATA(substring);
-    if (kind1 != kind)
-        buf1 = _PyUnicode_AsKind(self, kind);
-    if (!buf1)
-        return NULL;
-    if (kind2 != kind)
-        buf2 = _PyUnicode_AsKind(substring, kind);
-    if (!buf2) {
-        if (kind1 != kind) PyMem_Free(buf1);
-        return NULL;
-    }
-    len1 = PyUnicode_GET_LENGTH(self);
-    len2 = PyUnicode_GET_LENGTH(substring);
-
-    switch (kind) {
+    if (kind2 != kind1) {
+        buf2 = _PyUnicode_AsKind(substring, kind1);
+        if (!buf2)
+            return NULL;
+    }
+
+    switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
             out = asciilib_split(
@@ -10094,9 +10113,7 @@
     default:
         out = NULL;
     }
-    if (kind1 != kind)
-        PyMem_Free(buf1);
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
     return out;
 }
@@ -10106,7 +10123,7 @@
        PyObject *substring,
        Py_ssize_t maxcount)
 {
-    int kind1, kind2, kind;
+    int kind1, kind2;
     void *buf1, *buf2;
     Py_ssize_t len1, len2;
     PyObject* out;
@@ -10150,23 +10167,25 @@
 
     kind1 = PyUnicode_KIND(self);
     kind2 = PyUnicode_KIND(substring);
-    kind = kind1 > kind2 ? kind1 : kind2;
+    len1 = PyUnicode_GET_LENGTH(self);
+    len2 = PyUnicode_GET_LENGTH(substring);
+    if (kind1 < kind2 || len1 < len2) {
+        out = PyList_New(1);
+        if (out == NULL)
+            return NULL;
+        Py_INCREF(self);
+        PyList_SET_ITEM(out, 0, self);
+        return out;
+    }
     buf1 = PyUnicode_DATA(self);
     buf2 = PyUnicode_DATA(substring);
-    if (kind1 != kind)
-        buf1 = _PyUnicode_AsKind(self, kind);
-    if (!buf1)
-        return NULL;
-    if (kind2 != kind)
-        buf2 = _PyUnicode_AsKind(substring, kind);
-    if (!buf2) {
-        if (kind1 != kind) PyMem_Free(buf1);
-        return NULL;
-    }
-    len1 = PyUnicode_GET_LENGTH(self);
-    len2 = PyUnicode_GET_LENGTH(substring);
-
-    switch (kind) {
+    if (kind2 != kind1) {
+        buf2 = _PyUnicode_AsKind(substring, kind1);
+        if (!buf2)
+            return NULL;
+    }
+
+    switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
             out = asciilib_rsplit(
@@ -10186,9 +10205,7 @@
     default:
         out = NULL;
     }
-    if (kind1 != kind)
-        PyMem_Free(buf1);
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
     return out;
 }
@@ -10407,7 +10424,7 @@
         }
         /* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) -
            PyUnicode_GET_LENGTH(str1))); */
-        if (len2 > len1 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) {
+        if (len1 < len2 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) {
                 PyErr_SetString(PyExc_OverflowError,
                                 "replace string is too long");
                 goto error;
@@ -10816,7 +10833,7 @@
         }
         if (len1 > len2)
             return 1; /* uni is longer */
-        if (len2 > len1)
+        if (len1 < len2)
             return -1; /* str is longer */
         return 0;
     }
@@ -10928,23 +10945,35 @@
 
     kind1 = PyUnicode_KIND(str);
     kind2 = PyUnicode_KIND(sub);
+    if (kind1 < kind2) {
+        Py_DECREF(sub);
+        Py_DECREF(str);
+        return 0;
+    }
+    len1 = PyUnicode_GET_LENGTH(str);
+    len2 = PyUnicode_GET_LENGTH(sub);
+    if (len1 < len2) {
+        Py_DECREF(sub);
+        Py_DECREF(str);
+        return 0;
+    }
     buf1 = PyUnicode_DATA(str);
     buf2 = PyUnicode_DATA(sub);
+    if (len2 == 1) {
+        Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0);
+        result = findchar((const char *)buf1, kind1, len1, ch, 1) != -1;
+        Py_DECREF(sub);
+        Py_DECREF(str);
+        return result;
+    }
     if (kind2 != kind1) {
-        if (kind2 > kind1) {
+        buf2 = _PyUnicode_AsKind(sub, kind1);
+        if (!buf2) {
             Py_DECREF(sub);
             Py_DECREF(str);
-            return 0;
-        }
-        buf2 = _PyUnicode_AsKind(sub, kind1);
-    }
-    if (!buf2) {
-        Py_DECREF(sub);
-        Py_DECREF(str);
-        return -1;
-    }
-    len1 = PyUnicode_GET_LENGTH(str);
-    len2 = PyUnicode_GET_LENGTH(sub);
+            return -1;
+        }
+    }
 
     switch (kind1) {
     case PyUnicode_1BYTE_KIND:
@@ -11129,7 +11158,7 @@
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
     PyObject *result;
-    int kind1, kind2, kind;
+    int kind1, kind2;
     void *buf1, *buf2;
     Py_ssize_t len1, len2, iresult;
 
@@ -11139,24 +11168,27 @@
 
     kind1 = PyUnicode_KIND(self);
     kind2 = PyUnicode_KIND(substring);
-    if (kind2 > kind1) {
+    if (kind1 < kind2) {
         Py_DECREF(substring);
         return PyLong_FromLong(0);
     }
-    kind = kind1;
+    len1 = PyUnicode_GET_LENGTH(self);
+    len2 = PyUnicode_GET_LENGTH(substring);
+    ADJUST_INDICES(start, end, len1);
+    if (end - start < len2) {
+        Py_DECREF(substring);
+        return PyLong_FromLong(0);
+    }
     buf1 = PyUnicode_DATA(self);
     buf2 = PyUnicode_DATA(substring);
-    if (kind2 != kind)
-        buf2 = _PyUnicode_AsKind(substring, kind);
-    if (!buf2) {
-        Py_DECREF(substring);
-        return NULL;
-    }
-    len1 = PyUnicode_GET_LENGTH(self);
-    len2 = PyUnicode_GET_LENGTH(substring);
-
-    ADJUST_INDICES(start, end, len1);
-    switch (kind) {
+    if (kind2 != kind1) {
+        buf2 = _PyUnicode_AsKind(substring, kind1);
+        if (!buf2) {
+            Py_DECREF(substring);
+            return NULL;
+        }
+    }
+    switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         iresult = ucs1lib_count(
             ((Py_UCS1*)buf1) + start, end - start,
@@ -11181,7 +11213,7 @@
 
     result = PyLong_FromSsize_t(iresult);
 
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
 
     Py_DECREF(substring);
@@ -12632,8 +12664,8 @@
     PyObject* str_obj;
     PyObject* sep_obj;
     PyObject* out;
-    int kind1, kind2, kind;
-    void *buf1 = NULL, *buf2 = NULL;
+    int kind1, kind2;
+    void *buf1, *buf2;
     Py_ssize_t len1, len2;
 
     str_obj = PyUnicode_FromObject(str_in);
@@ -12652,21 +12684,29 @@
 
     kind1 = PyUnicode_KIND(str_obj);
     kind2 = PyUnicode_KIND(sep_obj);
-    kind = Py_MAX(kind1, kind2);
-    buf1 = PyUnicode_DATA(str_obj);
-    if (kind1 != kind)
-        buf1 = _PyUnicode_AsKind(str_obj, kind);
-    if (!buf1)
-        goto onError;
-    buf2 = PyUnicode_DATA(sep_obj);
-    if (kind2 != kind)
-        buf2 = _PyUnicode_AsKind(sep_obj, kind);
-    if (!buf2)
-        goto onError;
     len1 = PyUnicode_GET_LENGTH(str_obj);
     len2 = PyUnicode_GET_LENGTH(sep_obj);
-
-    switch (PyUnicode_KIND(str_obj)) {
+    if (kind1 < kind2 || len1 < len2) {
+        _Py_INCREF_UNICODE_EMPTY();
+        if (!unicode_empty)
+            out = NULL;
+        else {
+            out = PyTuple_Pack(3, str_obj, unicode_empty, unicode_empty);
+            Py_DECREF(unicode_empty);
+        }
+        Py_DECREF(sep_obj);
+        Py_DECREF(str_obj);
+        return out;
+    }
+    buf1 = PyUnicode_DATA(str_obj);
+    buf2 = PyUnicode_DATA(sep_obj);
+    if (kind2 != kind1) {
+        buf2 = _PyUnicode_AsKind(sep_obj, kind1);
+        if (!buf2)
+            goto onError;
+    }
+
+    switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
             out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
@@ -12686,18 +12726,14 @@
 
     Py_DECREF(sep_obj);
     Py_DECREF(str_obj);
-    if (kind1 != kind)
-        PyMem_Free(buf1);
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
 
     return out;
   onError:
     Py_DECREF(sep_obj);
     Py_DECREF(str_obj);
-    if (kind1 != kind && buf1)
-        PyMem_Free(buf1);
-    if (kind2 != kind && buf2)
+    if (kind2 != kind1 && buf2)
         PyMem_Free(buf2);
     return NULL;
 }
@@ -12709,8 +12745,8 @@
     PyObject* str_obj;
     PyObject* sep_obj;
     PyObject* out;
-    int kind1, kind2, kind;
-    void *buf1 = NULL, *buf2 = NULL;
+    int kind1, kind2;
+    void *buf1, *buf2;
     Py_ssize_t len1, len2;
 
     str_obj = PyUnicode_FromObject(str_in);
@@ -12722,23 +12758,31 @@
         return NULL;
     }
 
-    kind1 = PyUnicode_KIND(str_in);
+    kind1 = PyUnicode_KIND(str_obj);
     kind2 = PyUnicode_KIND(sep_obj);
-    kind = Py_MAX(kind1, kind2);
-    buf1 = PyUnicode_DATA(str_in);
-    if (kind1 != kind)
-        buf1 = _PyUnicode_AsKind(str_in, kind);
-    if (!buf1)
-        goto onError;
-    buf2 = PyUnicode_DATA(sep_obj);
-    if (kind2 != kind)
-        buf2 = _PyUnicode_AsKind(sep_obj, kind);
-    if (!buf2)
-        goto onError;
     len1 = PyUnicode_GET_LENGTH(str_obj);
     len2 = PyUnicode_GET_LENGTH(sep_obj);
-
-    switch (PyUnicode_KIND(str_in)) {
+    if (kind1 < kind2 || len1 < len2) {
+        _Py_INCREF_UNICODE_EMPTY();
+        if (!unicode_empty)
+            out = NULL;
+        else {
+            out = PyTuple_Pack(3, unicode_empty, unicode_empty, str_obj);
+            Py_DECREF(unicode_empty);
+        }
+        Py_DECREF(sep_obj);
+        Py_DECREF(str_obj);
+        return out;
+    }
+    buf1 = PyUnicode_DATA(str_obj);
+    buf2 = PyUnicode_DATA(sep_obj);
+    if (kind2 != kind1) {
+        buf2 = _PyUnicode_AsKind(sep_obj, kind1);
+        if (!buf2)
+            goto onError;
+    }
+
+    switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
             out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
@@ -12758,18 +12802,14 @@
 
     Py_DECREF(sep_obj);
     Py_DECREF(str_obj);
-    if (kind1 != kind)
-        PyMem_Free(buf1);
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
 
     return out;
   onError:
     Py_DECREF(sep_obj);
     Py_DECREF(str_obj);
-    if (kind1 != kind && buf1)
-        PyMem_Free(buf1);
-    if (kind2 != kind && buf2)
+    if (kind2 != kind1 && buf2)
         PyMem_Free(buf2);
     return NULL;
 }

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list