[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.72,2.73
Fredrik Lundh
effbot@users.sourceforge.net
Mon, 22 Oct 2001 14:18:10 -0700
Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv11996/Modules
Modified Files:
_sre.c
Log Message:
another major speedup: let sre.sub/subn check for escapes in the
template string, and don't call the template compiler if we can
avoid it.
Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.72
retrieving revision 2.73
diff -C2 -d -r2.72 -r2.73
*** _sre.c 2001/10/22 06:01:56 2.72
--- _sre.c 2001/10/22 21:18:08 2.73
***************
*** 35,38 ****
--- 35,39 ----
* 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
* 2001-10-21 fl added sub/subn primitive
+ * 2001-10-22 fl check for literal sub/subn templates
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
***************
*** 360,363 ****
--- 361,365 ----
#define SRE_MATCH sre_match
#define SRE_SEARCH sre_search
+ #define SRE_LITERAL_TEMPLATE sre_literal_template
#if defined(HAVE_UNICODE)
***************
*** 367,370 ****
--- 369,373 ----
#undef SRE_RECURSIVE
+ #undef SRE_LITERAL_TEMPLATE
#undef SRE_SEARCH
#undef SRE_MATCH
***************
*** 384,387 ****
--- 387,391 ----
#define SRE_MATCH sre_umatch
#define SRE_SEARCH sre_usearch
+ #define SRE_LITERAL_TEMPLATE sre_uliteral_template
#endif
***************
*** 1283,1286 ****
--- 1287,1299 ----
}
+ LOCAL(int)
+ SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
+ {
+ /* check if given string is a literal template (i.e. no escapes) */
+ while (len-- > 0)
+ if (*ptr++ == '\\')
+ return 0;
+ return 1;
+ }
#if !defined(SRE_RECURSIVE)
***************
*** 1389,1406 ****
}
! LOCAL(PyObject*)
! state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
! int start, int end)
{
! /* prepare state object */
!
PyBufferProcs *buffer;
! int size, bytes;
void* ptr;
- memset(state, 0, sizeof(SRE_STATE));
-
- state->lastindex = -1;
-
#if defined(HAVE_UNICODE)
if (PyUnicode_Check(string)) {
--- 1402,1416 ----
}
! static void*
! getstring(PyObject* string, int* p_length, int* p_charsize)
{
! /* given a python object, return a data pointer, a length (in
! characters), and a character size. return NULL if the object
! is not a string (or not compatible) */
!
PyBufferProcs *buffer;
! int size, bytes, charsize;
void* ptr;
#if defined(HAVE_UNICODE)
if (PyUnicode_Check(string)) {
***************
*** 1409,1413 ****
bytes = PyUnicode_GET_DATA_SIZE(string);
size = PyUnicode_GET_SIZE(string);
! state->charsize = sizeof(Py_UNICODE);
} else {
--- 1419,1423 ----
bytes = PyUnicode_GET_DATA_SIZE(string);
size = PyUnicode_GET_SIZE(string);
! charsize = sizeof(Py_UNICODE);
} else {
***************
*** 1437,1444 ****
if (PyString_Check(string) || bytes == size)
! state->charsize = 1;
#if defined(HAVE_UNICODE)
else if (bytes == (int) (size * sizeof(Py_UNICODE)))
! state->charsize = sizeof(Py_UNICODE);
#endif
else {
--- 1447,1454 ----
if (PyString_Check(string) || bytes == size)
! charsize = 1;
#if defined(HAVE_UNICODE)
else if (bytes == (int) (size * sizeof(Py_UNICODE)))
! charsize = sizeof(Py_UNICODE);
#endif
else {
***************
*** 1451,1465 ****
#endif
/* adjust boundaries */
if (start < 0)
start = 0;
! else if (start > size)
! start = size;
if (end < 0)
end = 0;
! else if (end > size)
! end = size;
state->beginning = ptr;
--- 1461,1501 ----
#endif
+ *p_length = size;
+ *p_charsize = charsize;
+
+ return ptr;
+ }
+
+ LOCAL(PyObject*)
+ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
+ int start, int end)
+ {
+ /* prepare state object */
+
+ int length;
+ int charsize;
+ void* ptr;
+
+ memset(state, 0, sizeof(SRE_STATE));
+
+ state->lastindex = -1;
+
+ ptr = getstring(string, &length, &charsize);
+ if (!ptr)
+ return NULL;
+
/* adjust boundaries */
if (start < 0)
start = 0;
! else if (start > length)
! start = length;
if (end < 0)
end = 0;
! else if (end > length)
! end = length;
+ state->charsize = charsize;
+
state->beginning = ptr;
***************
*** 2039,2042 ****
--- 2075,2079 ----
PyObject* args;
PyObject* match;
+ void* ptr;
int status;
int n;
***************
*** 2050,2062 ****
filter_is_callable = 1;
} else {
! /* if not callable, call the template compiler. it may return
! either a filter function or a literal string */
! filter = call(
! SRE_MODULE, "_subx",
! Py_BuildValue("OO", self, template)
! );
! if (!filter)
! return NULL;
! filter_is_callable = PyCallable_Check(filter);
}
--- 2087,2119 ----
filter_is_callable = 1;
} else {
! /* if not callable, check if it's a literal string */
! int literal;
! ptr = getstring(template, &n, &b);
! if (ptr) {
! if (b == 1) {
! literal = sre_literal_template(ptr, n);
! } else {
! #if defined(HAVE_UNICODE)
! literal = sre_uliteral_template(ptr, n);
! #endif
! }
! } else {
! PyErr_Clear();
! literal = 0;
! }
! if (literal) {
! filter = template;
! Py_INCREF(filter);
! filter_is_callable = 0;
! } else {
! /* not a literal; hand it over to the template compiler */
! filter = call(
! SRE_MODULE, "_subx",
! Py_BuildValue("OO", self, template)
! );
! if (!filter)
! return NULL;
! filter_is_callable = PyCallable_Check(filter);
! }
}
***************
*** 2133,2140 ****
/* add to list */
! status = PyList_Append(list, item);
! Py_DECREF(item);
! if (status < 0)
! goto error;
i = e;
--- 2190,2199 ----
/* add to list */
! if (item != Py_None) {
! status = PyList_Append(list, item);
! Py_DECREF(item);
! if (status < 0)
! goto error;
! }
i = e;