[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.11,2.12

Fredrik Lundh python-dev@python.org
Thu, 29 Jun 2000 16:33:14 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv12640/Modules

Modified Files:
	_sre.c 
Log Message:

still trying to figure out how to fix the remaining
group reset problem.  in the meantime, I added some
optimizations:

- added "inline" directive to LOCAL

  (this assumes that AC_C_INLINE does what it's
  supposed to do).  to compile SRE on a non-unix
  platform that doesn't support inline, you have
  to add a "#define inline" somewhere...

- added code to generate a SRE_OP_INFO primitive
 
- added code to do fast prefix search

  (enabled by the USE_FAST_SEARCH define; default
  is on, in this release)

Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.11
retrieving revision 2.12
diff -C2 -r2.11 -r2.12
*** _sre.c	2000/06/29 18:03:25	2.11
--- _sre.c	2000/06/29 23:33:12	2.12
***************
*** 20,23 ****
--- 20,24 ----
   * 00-06-28 fl	fixed findall (0.9.1)
   * 00-06-29 fl	fixed split, added more scanner features (0.9.2)
+  * 00-06-30 fl	tuning, fast search (0.9.3)
   *
   * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
***************
*** 30,35 ****
  #ifndef SRE_RECURSIVE
  
! static char
! copyright[] = " SRE 0.9.2 Copyright (c) 1997-2000 by Secret Labs AB ";
  
  #include "Python.h"
--- 31,35 ----
  #ifndef SRE_RECURSIVE
  
! char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
  
  #include "Python.h"
***************
*** 56,59 ****
--- 56,62 ----
  #endif
  
+ /* optional features */
+ #define USE_FAST_SEARCH
+ 
  #if defined(_MSC_VER)
  #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
***************
*** 61,65 ****
  #define LOCAL(type) static __inline type __fastcall
  #else
! #define LOCAL(type) static type
  #endif
  
--- 64,68 ----
  #define LOCAL(type) static __inline type __fastcall
  #else
! #define LOCAL(type) static inline type
  #endif
  
***************
*** 397,400 ****
--- 400,414 ----
      TRACE(("%8d: enter\n", PTR(ptr)));
  
+     if (pattern[0] == SRE_OP_INFO) {
+         /* optimization info block */
+         /* args: <1=skip> <2=flags> <3=min> ... */
+         if (pattern[3] && (end - ptr) < pattern[3]) {
+             TRACE(("reject (got %d chars, need %d)\n",
+                    (end - ptr), pattern[3]));
+             return 0;
+         }
+         pattern += pattern[1] + 1;
+     }
+ 
      stackbase = stack = state->stackbase;
      lastmark = state->lastmark;
***************
*** 918,935 ****
  	int status = 0;
      int prefix_len = 0;
!     SRE_CODE* prefix = NULL;
  
      if (pattern[0] == SRE_OP_INFO) {
!         /* args: <skip> <min> <max> <prefix> <prefix data...> */
!         end -= pattern[2];
!         prefix_len = pattern[4];
!         prefix = pattern + 5;
!         pattern += pattern[1];
      }
  
!     /* if (prefix_len > 0) ... */
  
  	if (pattern[0] == SRE_OP_LITERAL) {
! 		/* pattern starts with a literal */
  		SRE_CHAR chr = (SRE_CHAR) pattern[1];
  		for (;;) {
--- 932,1001 ----
  	int status = 0;
      int prefix_len = 0;
!     SRE_CODE* prefix;
!     SRE_CODE* overlap;
!     int literal = 0;
  
      if (pattern[0] == SRE_OP_INFO) {
!         /* optimization info block */
!         /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
! 
!         if (pattern[3] > 0) {
!             /* adjust end point (but make sure we leave at least one
!                character in there) */
!             end -= pattern[3]-1;
!             if (end <= ptr)
!                 end = ptr+1;
!         }
! 
!         literal = pattern[2];
! 
!         prefix = pattern + 6;
!         prefix_len = pattern[5];
! 
!         overlap = prefix + prefix_len - 1;
! 
!         pattern += 1 + pattern[1];
      }
  
! #if defined(USE_FAST_SEARCH)
!     if (prefix_len > 1) {
!         /* pattern starts with a known prefix.  use the overlap
!            table to skip forward as fast as we possibly can */
!         int i = 0;
!         end = state->end;
!         while (ptr < end) {
!             for (;;) {
!                 if (*ptr != (SRE_CHAR) prefix[i]) {
!                     if (!i)
!                         break;
!                     else
!                         i = overlap[i];
!                 } else {
!                     if (++i == prefix_len) {
!                         /* found a potential match */
!                         TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
!                         state->start = ptr - prefix_len + 1;
!                         state->ptr = ptr + 1;
!                         if (literal)
!                             return 1; /* all of it */
!                         status = SRE_MATCH(state, pattern + 2*prefix_len);
!                         if (status != 0)
!                             return status;
!                         /* close but no cigar -- try again */
!                         i = overlap[i];
!                     }
!                     break;
!                 }
!                 
!             }
!             ptr++;
!         }
!         return 0;
!     }
! #endif
  
  	if (pattern[0] == SRE_OP_LITERAL) {
! 		/* pattern starts with a literal character.  this is used for
!            short prefixes, and if fast search is disabled*/
  		SRE_CHAR chr = (SRE_CHAR) pattern[1];
  		for (;;) {
***************
*** 945,950 ****
  				break;
  		}
- 
  	} else
  		while (ptr <= end) {
  			TRACE(("%8d: === SEARCH ===\n", PTR(ptr))); 
--- 1011,1016 ----
  				break;
  		}
  	} else
+ 		/* general case */
  		while (ptr <= end) {
  			TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));