[Python-checkins] r46394 - in sandbox/trunk/hotbuffer: Modules/_hotbuf.c Modules/fastsearch.h test_hotbuf.py

bob.ippolito python-checkins at python.org
Fri May 26 21:13:41 CEST 2006


Author: bob.ippolito
Date: Fri May 26 21:13:40 2006
New Revision: 46394

Added:
   sandbox/trunk/hotbuffer/Modules/fastsearch.h   (contents, props changed)
Modified:
   sandbox/trunk/hotbuffer/Modules/_hotbuf.c
   sandbox/trunk/hotbuffer/test_hotbuf.py
Log:
add fast find and count functions

Modified: sandbox/trunk/hotbuffer/Modules/_hotbuf.c
==============================================================================
--- sandbox/trunk/hotbuffer/Modules/_hotbuf.c	(original)
+++ sandbox/trunk/hotbuffer/Modules/_hotbuf.c	Fri May 26 21:13:40 2006
@@ -20,7 +20,13 @@
 #define writebufferproc getwritebufferproc
 #define charbufferproc getcharbufferproc
 #define segcountproc getsegcountproc
+#ifndef Py_LOCAL(rval)
+#define Py_LOCAL static rval
 #endif
+#endif
+
+#define STRINGLIB_CHAR char
+#include "fastsearch.h"
 
 static PyTypeObject PyHotbuf_Type;
 static PyObject *HotbufOverflow = NULL;
@@ -685,7 +691,7 @@
                         "incorrect input type, require string");
         return NULL;
     }
-    instring = PyString_AsString(arg);
+    instring = PyString_AS_STRING(arg);
     len = PyString_GET_SIZE(arg);
 
     CHECK_LIMIT_ERROR(len);
@@ -759,6 +765,65 @@
     return result;
 }
 
+PyDoc_STRVAR(find__doc__,
+"B.find(sub) -> int\n\
+\n\
+Return the lowest index in S where substring sub is found,\n\
+such that sub is contained within the current window.\n\
+\n\
+Return -1 on failure.");
+
+static PyObject*
+hotbuf_find(PyHotbufObject *self, PyObject* arg)
+{
+    Py_ssize_t result;
+
+    /* Check and extract input string */
+    if ( arg == NULL || !PyString_Check(arg) ) {
+        PyErr_SetString(PyExc_TypeError,
+                        "incorrect input type, require string");
+        return NULL;
+    }
+
+    result = fastsearch(self->b_ptr + self->b_position,
+                        self->b_limit - self->b_position,
+                        PyString_AS_STRING(arg),
+                        PyString_GET_SIZE(arg),
+                        FAST_SEARCH);
+
+    return PyInt_FromSsize_t(result);
+}
+
+PyDoc_STRVAR(count__doc__,
+"B.count(sub) -> int\n\
+\n\
+Return the number of occurrences of substring sub in\n\
+the current window.");
+
+static PyObject*
+hotbuf_count(PyHotbufObject *self, PyObject* arg)
+{
+    Py_ssize_t result;
+
+    /* Check and extract input string */
+    if ( arg == NULL || !PyString_Check(arg) ) {
+        PyErr_SetString(PyExc_TypeError,
+                        "incorrect input type, require string");
+        return NULL;
+    }
+
+    result = fastsearch(self->b_ptr + self->b_position,
+                        self->b_limit - self->b_position,
+                        PyString_AS_STRING(arg),
+                        PyString_GET_SIZE(arg),
+                        FAST_COUNT);
+
+    if (result == -1)
+        result = 0;
+
+    return PyInt_FromSsize_t(result);
+}
+
 
 /* ===========================================================================
  * Buffer protocol methods
@@ -941,6 +1006,8 @@
     {"getstr", (PyCFunction)hotbuf_getstr, METH_VARARGS, getstr__doc__},
     {"putstr", (PyCFunction)hotbuf_putstr, METH_O, putstr__doc__},
     {"unpack", (PyCFunction)hotbuf_unpack, METH_O, unpack__doc__},
+    {"find", (PyCFunction)hotbuf_find, METH_O, find__doc__},
+    {"count", (PyCFunction)hotbuf_count, METH_O, count__doc__},
     {NULL, NULL} /* sentinel */
 };
 

Added: sandbox/trunk/hotbuffer/Modules/fastsearch.h
==============================================================================
--- (empty file)
+++ sandbox/trunk/hotbuffer/Modules/fastsearch.h	Fri May 26 21:13:40 2006
@@ -0,0 +1,104 @@
+/* stringlib: fastsearch implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#define STRINGLIB_FASTSEARCH_H
+
+/* fast search/count implementation, based on a mix between boyer-
+   moore and horspool, with a few more bells and whistles on the top.
+   for some more background, see: http://effbot.org/stringlib */
+
+/* note: fastsearch may access s[n], which isn't a problem when using
+   Python's ordinary string types, but may cause problems if you're
+   using this code in other contexts.  also, the count mode returns -1
+   if there cannot possible be a match in the target string, and 0 if
+   it has actually checked for matches, but didn't find any.  callers
+   beware! */
+
+#define FAST_COUNT 0
+#define FAST_SEARCH 1
+
+Py_LOCAL(Py_ssize_t)
+fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
+           const STRINGLIB_CHAR* p, Py_ssize_t m,
+           int mode)
+{
+    long mask;
+    Py_ssize_t skip, count = 0;
+    Py_ssize_t i, j, mlast, w;
+
+    w = n - m;
+
+    if (w < 0)
+        return -1;
+
+    /* look for special cases */
+    if (m <= 1) {
+        if (m <= 0)
+            return -1;
+        /* use special case for 1-character strings */
+        if (mode == FAST_COUNT) {
+            for (i = 0; i < n; i++)
+                if (s[i] == p[0])
+                    count++;
+            return count;
+        } else {
+            for (i = 0; i < n; i++)
+                if (s[i] == p[0])
+                    return i;
+        }
+        return -1;
+    }
+
+    mlast = m - 1;
+
+    /* create compressed boyer-moore delta 1 table */
+    skip = mlast - 1;
+    /* process pattern[:-1] */
+    for (mask = i = 0; i < mlast; i++) {
+        mask |= (1 << (p[i] & 0x1F));
+        if (p[i] == p[mlast])
+            skip = mlast - i - 1;
+    }
+    /* process pattern[-1] outside the loop */
+    mask |= (1 << (p[mlast] & 0x1F));
+
+    for (i = 0; i <= w; i++) {
+        /* note: using mlast in the skip path slows things down on x86 */
+        if (s[i+m-1] == p[m-1]) {
+            /* candidate match */
+            for (j = 0; j < mlast; j++)
+                if (s[i+j] != p[j])
+                    break;
+            if (j == mlast) {
+                /* got a match! */
+                if (mode != FAST_COUNT)
+                    return i;
+                count++;
+                i = i + mlast;
+                continue;
+            }
+            /* miss: check if next character is part of pattern */
+            if (!(mask & (1 << (s[i+m] & 0x1F))))
+                i = i + m;
+            else
+                i = i + skip;
+        } else {
+            /* skip: check if next character is part of pattern */
+            if (!(mask & (1 << (s[i+m] & 0x1F))))
+                i = i + m;
+        }
+    }
+
+    if (mode != FAST_COUNT)
+        return -1;
+    return count;
+}
+
+#endif
+
+/*
+Local variables:
+c-basic-offset: 4
+indent-tabs-mode: nil
+End:
+*/

Modified: sandbox/trunk/hotbuffer/test_hotbuf.py
==============================================================================
--- sandbox/trunk/hotbuffer/test_hotbuf.py	(original)
+++ sandbox/trunk/hotbuffer/test_hotbuf.py	Fri May 26 21:13:40 2006
@@ -227,6 +227,55 @@
         self.assertEquals(str(b), '')
         self.assertEquals(b.getstr(), '')
 
+    def test_count(self):
+        b = hotbuf(CAPACITY)
+        b.putstr('abcddddd')
+        b.flip()
+        b.limit = 3
+        self.assertEquals(b.count('ab'), 1)
+        self.assertEquals(b.count('d'), 0)
+        self.assertEquals(b.count('1' * 100), 0)
+        b.limit = 4
+        self.assertEquals(b.count('ab'), 1)
+        self.assertEquals(b.count('d'), 1)
+        self.assertEquals(b.count('1' * 100), 0)
+        b.limit = 5
+        self.assertEquals(b.count('ab'), 1)
+        self.assertEquals(b.count('d'), 2)
+        self.assertEquals(b.count('1' * 100), 0)
+        b.advance(1)
+        self.assertEquals(b.count('ab'), 0)
+        self.assertEquals(b.count('d'), 2)
+        self.assertEquals(b.count('1' * 100), 0)
+        b.limit += 1
+        self.assertEquals(b.count('ab'), 0)
+        self.assertEquals(b.count('d'), 3)
+        self.assertEquals(b.count('1' * 100), 0)
+    
+    def test_find(self):
+        b = hotbuf(CAPACITY)
+        b.putstr('abcddddd')
+        b.flip()
+        b.limit = 3
+        self.assertEquals(b.find('ab'), 0)
+        self.assertEquals(b.find('d'), -1)
+        self.assertEquals(b.find('1' * 100), -1)
+        b.limit = 4
+        self.assertEquals(b.find('ab'), 0)
+        self.assertEquals(b.find('d'), 3)
+        self.assertEquals(b.find('1' * 100), -1)
+        b.limit = 5
+        self.assertEquals(b.find('ab'), 0)
+        self.assertEquals(b.find('d'), 3)
+        self.assertEquals(b.find('1' * 100), -1)
+        b.advance(1)
+        self.assertEquals(b.find('ab'), -1)
+        self.assertEquals(b.find('d'), 2)
+        self.assertEquals(b.find('1' * 100), -1)
+        b.limit += 1
+        self.assertEquals(b.find('ab'), -1)
+        self.assertEquals(b.find('d'), 2)
+        self.assertEquals(b.find('1' * 100), -1)
 
 def test_main():
     test_support.run_unittest(HotbufTestCase)


More information about the Python-checkins mailing list