Match beginning of two strings

Richie Hindle richie at entrian.com
Mon Aug 4 11:38:28 EDT 2003


[Alex]
> I'm not sure where I went wrong in
> the Pyrex coding (it doesn't seem to be performing anywhere
> as well as I thought it might) and I'll be happy for real
> Pyrex expert to show me the way.

I'm no an expert, but I can see a few easily-fixed problems.  The line:

        if a[i] != b[i]:

is working with Python strings when it could be working with C
strings.  Here's the original code and its output on my machine:

def exa(a, b):
    cdef int la
    cdef int lb
    la = len(a)
    lb = len(b)
    cdef int lmin
    lmin = min(la, lb)
    cdef int i
    i = 0
    while i < lmin:
        if a[i] != b[i]:
            return a[:i]
        i = i + 1
    if lmin == la:
        return a
    else:
        return b

100000 loops, best of 3: 9.11 usec per loop

Here's a modified version of the code comparing C strings:

def exa(a, b):
    cdef char* c_a   # `a` as a C string
    cdef char* c_b   # `b` as a C string
    cdef int la
    cdef int lb
    
    c_a = a
    c_b = b
    la = len(a)
    lb = len(b)
    cdef int lmin
    lmin = min(la, lb)
    cdef int i
    i = 0
    while i < lmin:
        if c_a[i] != c_b[i]:
            return a[:i]
        i = i + 1
    if lmin == la:
        return a
    else:
        return b

100000 loops, best of 3: 5.79 usec per loop

Almost twice as fast.  Looking at the generated C is always
worthwhile when optimising Pyrex code - here's the code that does
the comparison against Python strings:

    /* "D:\src\tests\pyrex\exa.pyx":11 */
    __pyx_3 = PyInt_FromLong(__pyx_v_i); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
    __pyx_1 = PyObject_GetItem(__pyx_v_a, __pyx_3); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
    Py_DECREF(__pyx_3); __pyx_3 = 0;
    __pyx_5 = PyInt_FromLong(__pyx_v_i); if (!__pyx_5) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
    __pyx_2 = PyObject_GetItem(__pyx_v_b, __pyx_5); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
    Py_DECREF(__pyx_5); __pyx_5 = 0;
    if (PyObject_Cmp(__pyx_1, __pyx_2, &__pyx_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
    __pyx_4 = __pyx_4 != 0;
    Py_DECREF(__pyx_1); __pyx_1 = 0;
    Py_DECREF(__pyx_2); __pyx_2 = 0;
    if (__pyx_4) {

vs.

    /* "D:\src\tests\pyrex\exa.pyx":16 */
    __pyx_5 = ((__pyx_v_c_a[__pyx_v_i]) != (__pyx_v_c_b[__pyx_v_i]));
    if (__pyx_5) {

for C strings.  There's another similar optimisation that the C
output leads you to: you can use strlen rather than Python's len:

cdef extern from "string.h":
    int strlen(char*)

def exa(a, b):
    cdef char* c_a   # `a` as a C string
    cdef char* c_b   # `b` as a C string
    cdef int la
    cdef int lb
    
    c_a = a
    c_b = b
    la = strlen(c_a)
    lb = strlen(c_b)
    cdef int lmin
    lmin = min(la, lb)
    cdef int i
    i = 0
    while i < lmin:
        if c_a[i] != c_b[i]:
            return a[:i]
        i = i + 1
    if lmin == la:
        return a
    else:
        return b

100000 loops, best of 3: 3.58 usec per loop

That replaces:

  /* "D:\src\tests\pyrex\exa.pyx":4 */
  __pyx_1 = __Pyx_GetName(__pyx_b, "len"); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;}
  __pyx_2 = PyTuple_New(1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;}
  Py_INCREF(__pyx_v_a);
  PyTuple_SET_ITEM(__pyx_2, 0, __pyx_v_a);
  __pyx_3 = PyObject_CallObject(__pyx_1, __pyx_2); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;}
  Py_DECREF(__pyx_1); __pyx_1 = 0;
  Py_DECREF(__pyx_2); __pyx_2 = 0;
  __pyx_4 = PyInt_AsLong(__pyx_3); if (PyErr_Occurred()) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;}
  Py_DECREF(__pyx_3); __pyx_3 = 0;
  __pyx_v_la = __pyx_4;

  /* "D:\src\tests\pyrex\exa.pyx":5 */
  __pyx_1 = __Pyx_GetName(__pyx_b, "len"); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; goto __pyx_L1;}
  __pyx_2 = PyTuple_New(1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; goto __pyx_L1;}
  Py_INCREF(__pyx_v_b);
  PyTuple_SET_ITEM(__pyx_2, 0, __pyx_v_b);
  __pyx_3 = PyObject_CallObject(__pyx_1, __pyx_2); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; goto __pyx_L1;}
  Py_DECREF(__pyx_1); __pyx_1 = 0;
  Py_DECREF(__pyx_2); __pyx_2 = 0;
  __pyx_4 = PyInt_AsLong(__pyx_3); if (PyErr_Occurred()) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; goto __pyx_L1;}
  Py_DECREF(__pyx_3); __pyx_3 = 0;
  __pyx_v_lb = __pyx_4;

with :

  /* "D:\src\tests\pyrex\exa.pyx":12 */
  __pyx_v_la = strlen(__pyx_v_c_a);

  /* "D:\src\tests\pyrex\exa.pyx":13 */
  __pyx_v_lb = strlen(__pyx_v_c_b);

and leaves the call to 'min' as the only remaining huge block of C.
The final version looks like this, eliminating 'min' (Greg, can we have
the terary operator in Pyrex please? <good_mood ? wink : frown>)

cdef extern from "string.h":
    int strlen(char*)

def exa(a, b):
    cdef char* c_a   # `a` as a C string
    cdef char* c_b   # `b` as a C string
    cdef int la
    cdef int lb
    
    c_a = a
    c_b = b
    la = strlen(c_a)
    lb = strlen(c_b)
    cdef int lmin
    if la < lb:
        lmin = la
    else:
        lmin = lb
    cdef int i
    i = 0
    while i < lmin:
        if c_a[i] != c_b[i]:
            return a[:i]
        i = i + 1
    if lmin == la:
        return a
    else:
        return b


1000000 loops, best of 3: 0.803 usec per loop

Over ten times quicker than the original, for the sake of a couple of
small tweaks driven by looking at the C output.  Although the C still
looks very verbose at first glance, it's now substantially the same as
Alex's cexa.c.

Hope that helps,

-- 
Richie Hindle
richie at entrian.com






More information about the Python-list mailing list