[Python-checkins] cpython: Reuse the stringlib in findchar(), and make its signature more convenient

antoine.pitrou python-checkins at python.org
Thu Oct 13 18:59:21 CEST 2011


http://hg.python.org/cpython/rev/62f2ed19807b
changeset:   72912:62f2ed19807b
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Thu Oct 13 18:55:09 2011 +0200
summary:
  Reuse the stringlib in findchar(), and make its signature more convenient

files:
  Objects/unicodeobject.c |  84 ++++++++++++++--------------
  1 files changed, 43 insertions(+), 41 deletions(-)


diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -519,36 +519,45 @@
 #include "stringlib/localeutil.h"
 #include "stringlib/undef.h"
 
+#include "stringlib/unicodedefs.h"
+#include "stringlib/fastsearch.h"
+#include "stringlib/count.h"
+#include "stringlib/find.h"
+
 /* --- Unicode Object ----------------------------------------------------- */
 
 static PyObject *
 fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s));
 
-Py_LOCAL_INLINE(char *) findchar(void *s, int kind,
-                                 Py_ssize_t size, Py_UCS4 ch,
-                                 int direction)
-{
-    /* like wcschr, but doesn't stop at NULL characters */
-    Py_ssize_t i;
-    if (kind == 1) {
-        if (direction == 1)
-            return memchr(s, ch, size);
-#ifdef HAVE_MEMRCHR
-        else
-            return memrchr(s, ch, size);
-#endif
-    }
-    if (direction == 1) {
-        for(i = 0; i < size; i++)
-            if (PyUnicode_READ(kind, s, i) == ch)
-                return (char*)s + kind * i;
-    }
-    else {
-        for(i = size-1; i >= 0; i--)
-            if (PyUnicode_READ(kind, s, i) == ch)
-                return (char*)s + kind * i;
-    }
-    return NULL;
+Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
+                                     Py_ssize_t size, Py_UCS4 ch,
+                                     int direction)
+{
+    int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
+
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND:
+        {
+            Py_UCS1 ch1 = (Py_UCS1) ch;
+            if (ch1 == ch)
+                return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
+            else
+                return -1;
+        }
+    case PyUnicode_2BYTE_KIND:
+        {
+            Py_UCS2 ch2 = (Py_UCS2) ch;
+            if (ch2 == ch)
+                return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
+            else
+                return -1;
+        }
+    case PyUnicode_4BYTE_KIND:
+        return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode);
+    default:
+        assert(0);
+        return -1;
+    }
 }
 
 static PyObject*
@@ -3311,7 +3320,7 @@
         }
     }
     if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output),
-                 PyUnicode_GET_LENGTH(output), 0, 1)) {
+                 PyUnicode_GET_LENGTH(output), 0, 1) >= 0) {
         PyErr_SetString(PyExc_TypeError, "embedded NUL character");
         Py_DECREF(output);
         return 0;
@@ -8638,12 +8647,6 @@
 }
 
 
-#include "stringlib/unicodedefs.h"
-#include "stringlib/fastsearch.h"
-
-#include "stringlib/count.h"
-#include "stringlib/find.h"
-
 /* helper macro to fixup start/end slice values */
 #define ADJUST_INDICES(start, end, len)         \
     if (end > len)                              \
@@ -8779,8 +8782,8 @@
                    Py_ssize_t start, Py_ssize_t end,
                    int direction)
 {
-    char *result;
     int kind;
+    Py_ssize_t result;
     if (PyUnicode_READY(str) == -1)
         return -2;
     if (start < 0 || end < 0) {
@@ -8790,13 +8793,12 @@
     if (end > PyUnicode_GET_LENGTH(str))
         end = PyUnicode_GET_LENGTH(str);
     kind = PyUnicode_KIND(str);
-    result = findchar(PyUnicode_1BYTE_DATA(str)
-                      + kind*start,
-                      kind,
-                      end-start, ch, direction);
-    if (!result)
+    result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
+                      kind, end-start, ch, direction);
+    if (result == -1)
         return -1;
-    return (result-(char*)PyUnicode_DATA(str)) >> (kind-1);
+    else
+        return start + result;
 }
 
 static int
@@ -9707,8 +9709,8 @@
             Py_UCS4 u1, u2;
             int rkind;
             u1 = PyUnicode_READ_CHAR(str1, 0);
-            if (!findchar(sbuf, PyUnicode_KIND(self),
-                          slen, u1, 1))
+            if (findchar(sbuf, PyUnicode_KIND(self),
+                         slen, u1, 1) < 0)
                 goto nothing;
             u2 = PyUnicode_READ_CHAR(str2, 0);
             u = PyUnicode_New(slen, maxchar);

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list