[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.11,2.12
Fredrik Lundh
python-dev@python.org
Thu, 29 Jun 2000 16:33:14 -0700
Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv12640/Modules
Modified Files:
_sre.c
Log Message:
still trying to figure out how to fix the remaining
group reset problem. in the meantime, I added some
optimizations:
- added "inline" directive to LOCAL
(this assumes that AC_C_INLINE does what it's
supposed to do). to compile SRE on a non-unix
platform that doesn't support inline, you have
to add a "#define inline" somewhere...
- added code to generate a SRE_OP_INFO primitive
- added code to do fast prefix search
(enabled by the USE_FAST_SEARCH define; default
is on, in this release)
Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.11
retrieving revision 2.12
diff -C2 -r2.11 -r2.12
*** _sre.c 2000/06/29 18:03:25 2.11
--- _sre.c 2000/06/29 23:33:12 2.12
***************
*** 20,23 ****
--- 20,24 ----
* 00-06-28 fl fixed findall (0.9.1)
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
+ * 00-06-30 fl tuning, fast search (0.9.3)
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
***************
*** 30,35 ****
#ifndef SRE_RECURSIVE
! static char
! copyright[] = " SRE 0.9.2 Copyright (c) 1997-2000 by Secret Labs AB ";
#include "Python.h"
--- 31,35 ----
#ifndef SRE_RECURSIVE
! char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
#include "Python.h"
***************
*** 56,59 ****
--- 56,62 ----
#endif
+ /* optional features */
+ #define USE_FAST_SEARCH
+
#if defined(_MSC_VER)
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
***************
*** 61,65 ****
#define LOCAL(type) static __inline type __fastcall
#else
! #define LOCAL(type) static type
#endif
--- 64,68 ----
#define LOCAL(type) static __inline type __fastcall
#else
! #define LOCAL(type) static inline type
#endif
***************
*** 397,400 ****
--- 400,414 ----
TRACE(("%8d: enter\n", PTR(ptr)));
+ if (pattern[0] == SRE_OP_INFO) {
+ /* optimization info block */
+ /* args: <1=skip> <2=flags> <3=min> ... */
+ if (pattern[3] && (end - ptr) < pattern[3]) {
+ TRACE(("reject (got %d chars, need %d)\n",
+ (end - ptr), pattern[3]));
+ return 0;
+ }
+ pattern += pattern[1] + 1;
+ }
+
stackbase = stack = state->stackbase;
lastmark = state->lastmark;
***************
*** 918,935 ****
int status = 0;
int prefix_len = 0;
! SRE_CODE* prefix = NULL;
if (pattern[0] == SRE_OP_INFO) {
! /* args: <skip> <min> <max> <prefix> <prefix data...> */
! end -= pattern[2];
! prefix_len = pattern[4];
! prefix = pattern + 5;
! pattern += pattern[1];
}
! /* if (prefix_len > 0) ... */
if (pattern[0] == SRE_OP_LITERAL) {
! /* pattern starts with a literal */
SRE_CHAR chr = (SRE_CHAR) pattern[1];
for (;;) {
--- 932,1001 ----
int status = 0;
int prefix_len = 0;
! SRE_CODE* prefix;
! SRE_CODE* overlap;
! int literal = 0;
if (pattern[0] == SRE_OP_INFO) {
! /* optimization info block */
! /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
!
! if (pattern[3] > 0) {
! /* adjust end point (but make sure we leave at least one
! character in there) */
! end -= pattern[3]-1;
! if (end <= ptr)
! end = ptr+1;
! }
!
! literal = pattern[2];
!
! prefix = pattern + 6;
! prefix_len = pattern[5];
!
! overlap = prefix + prefix_len - 1;
!
! pattern += 1 + pattern[1];
}
! #if defined(USE_FAST_SEARCH)
! if (prefix_len > 1) {
! /* pattern starts with a known prefix. use the overlap
! table to skip forward as fast as we possibly can */
! int i = 0;
! end = state->end;
! while (ptr < end) {
! for (;;) {
! if (*ptr != (SRE_CHAR) prefix[i]) {
! if (!i)
! break;
! else
! i = overlap[i];
! } else {
! if (++i == prefix_len) {
! /* found a potential match */
! TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
! state->start = ptr - prefix_len + 1;
! state->ptr = ptr + 1;
! if (literal)
! return 1; /* all of it */
! status = SRE_MATCH(state, pattern + 2*prefix_len);
! if (status != 0)
! return status;
! /* close but no cigar -- try again */
! i = overlap[i];
! }
! break;
! }
!
! }
! ptr++;
! }
! return 0;
! }
! #endif
if (pattern[0] == SRE_OP_LITERAL) {
! /* pattern starts with a literal character. this is used for
! short prefixes, and if fast search is disabled*/
SRE_CHAR chr = (SRE_CHAR) pattern[1];
for (;;) {
***************
*** 945,950 ****
break;
}
-
} else
while (ptr <= end) {
TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
--- 1011,1016 ----
break;
}
} else
+ /* general case */
while (ptr <= end) {
TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));