[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.72,2.73

Fredrik Lundh effbot@users.sourceforge.net
Mon, 22 Oct 2001 14:18:10 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv11996/Modules

Modified Files:
	_sre.c 
Log Message:


another major speedup: let sre.sub/subn check for escapes in the
template string, and don't call the template compiler if we can
avoid it.


Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.72
retrieving revision 2.73
diff -C2 -d -r2.72 -r2.73
*** _sre.c	2001/10/22 06:01:56	2.72
--- _sre.c	2001/10/22 21:18:08	2.73
***************
*** 35,38 ****
--- 35,39 ----
   * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
   * 2001-10-21 fl  added sub/subn primitive
+  * 2001-10-22 fl  check for literal sub/subn templates
   *
   * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
***************
*** 360,363 ****
--- 361,365 ----
  #define SRE_MATCH sre_match
  #define SRE_SEARCH sre_search
+ #define SRE_LITERAL_TEMPLATE sre_literal_template
  
  #if defined(HAVE_UNICODE)
***************
*** 367,370 ****
--- 369,373 ----
  #undef SRE_RECURSIVE
  
+ #undef SRE_LITERAL_TEMPLATE
  #undef SRE_SEARCH
  #undef SRE_MATCH
***************
*** 384,387 ****
--- 387,391 ----
  #define SRE_MATCH sre_umatch
  #define SRE_SEARCH sre_usearch
+ #define SRE_LITERAL_TEMPLATE sre_uliteral_template
  #endif
  
***************
*** 1283,1286 ****
--- 1287,1299 ----
  }
      
+ LOCAL(int)
+ SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
+ {
+     /* check if given string is a literal template (i.e. no escapes) */
+     while (len-- > 0)
+         if (*ptr++ == '\\')
+             return 0;
+     return 1;
+ }
  
  #if !defined(SRE_RECURSIVE)
***************
*** 1389,1406 ****
  }
  
! LOCAL(PyObject*)
! state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
!            int start, int end)
  {
!     /* prepare state object */
! 
      PyBufferProcs *buffer;
!     int size, bytes;
      void* ptr;
  
-     memset(state, 0, sizeof(SRE_STATE));
- 
-     state->lastindex = -1;
- 
  #if defined(HAVE_UNICODE)
      if (PyUnicode_Check(string)) {
--- 1402,1416 ----
  }
  
! static void*
! getstring(PyObject* string, int* p_length, int* p_charsize)
  {
!     /* given a python object, return a data pointer, a length (in
!        characters), and a character size.  return NULL if the object
!        is not a string (or not compatible) */
!     
      PyBufferProcs *buffer;
!     int size, bytes, charsize;
      void* ptr;
  
  #if defined(HAVE_UNICODE)
      if (PyUnicode_Check(string)) {
***************
*** 1409,1413 ****
          bytes = PyUnicode_GET_DATA_SIZE(string);
          size = PyUnicode_GET_SIZE(string);
!         state->charsize = sizeof(Py_UNICODE);
  
      } else {
--- 1419,1423 ----
          bytes = PyUnicode_GET_DATA_SIZE(string);
          size = PyUnicode_GET_SIZE(string);
!         charsize = sizeof(Py_UNICODE);
  
      } else {
***************
*** 1437,1444 ****
  
      if (PyString_Check(string) || bytes == size)
!         state->charsize = 1;
  #if defined(HAVE_UNICODE)
      else if (bytes == (int) (size * sizeof(Py_UNICODE)))
!         state->charsize = sizeof(Py_UNICODE);
  #endif
      else {
--- 1447,1454 ----
  
      if (PyString_Check(string) || bytes == size)
!         charsize = 1;
  #if defined(HAVE_UNICODE)
      else if (bytes == (int) (size * sizeof(Py_UNICODE)))
!         charsize = sizeof(Py_UNICODE);
  #endif
      else {
***************
*** 1451,1465 ****
  #endif
  
      /* adjust boundaries */
      if (start < 0)
          start = 0;
!     else if (start > size)
!         start = size;
  
      if (end < 0)
          end = 0;
!     else if (end > size)
!         end = size;
  
      state->beginning = ptr;
  
--- 1461,1501 ----
  #endif
  
+     *p_length = size;
+     *p_charsize = charsize;
+ 
+     return ptr;
+ }
+ 
+ LOCAL(PyObject*)
+ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
+            int start, int end)
+ {
+     /* prepare state object */
+ 
+     int length;
+     int charsize;
+     void* ptr;
+ 
+     memset(state, 0, sizeof(SRE_STATE));
+ 
+     state->lastindex = -1;
+ 
+     ptr = getstring(string, &length, &charsize);
+     if (!ptr)
+         return NULL;
+ 
      /* adjust boundaries */
      if (start < 0)
          start = 0;
!     else if (start > length)
!         start = length;
  
      if (end < 0)
          end = 0;
!     else if (end > length)
!         end = length;
  
+     state->charsize = charsize;
+ 
      state->beginning = ptr;
  
***************
*** 2039,2042 ****
--- 2075,2079 ----
      PyObject* args;
      PyObject* match;
+     void* ptr;
      int status;
      int n;
***************
*** 2050,2062 ****
          filter_is_callable = 1;
      } else {
!         /* if not callable, call the template compiler.  it may return
!            either a filter function or a literal string */
!         filter = call(
!             SRE_MODULE, "_subx",
!             Py_BuildValue("OO", self, template)
!             );
!         if (!filter)
!             return NULL;
!         filter_is_callable = PyCallable_Check(filter);
      }
  
--- 2087,2119 ----
          filter_is_callable = 1;
      } else {
!         /* if not callable, check if it's a literal string */
!         int literal;
!         ptr = getstring(template, &n, &b);
!         if (ptr) {
!             if (b == 1) {
!                 literal = sre_literal_template(ptr, n);
!             } else {
! #if defined(HAVE_UNICODE)
!                 literal = sre_uliteral_template(ptr, n);
! #endif
!             }
!         } else {
!             PyErr_Clear();
!             literal = 0;
!         }
!         if (literal) {
!             filter = template;
!             Py_INCREF(filter);
!             filter_is_callable = 0;
!         } else {
!             /* not a literal; hand it over to the template compiler */
!             filter = call(
!                 SRE_MODULE, "_subx",
!                 Py_BuildValue("OO", self, template)
!                 );
!             if (!filter)
!                 return NULL;
!             filter_is_callable = PyCallable_Check(filter);
!         }
      }
  
***************
*** 2133,2140 ****
  
          /* add to list */
!         status = PyList_Append(list, item);
!         Py_DECREF(item);
!         if (status < 0)
!             goto error;
          
          i = e;
--- 2190,2199 ----
  
          /* add to list */
!         if (item != Py_None) {
!             status = PyList_Append(list, item);
!             Py_DECREF(item);
!             if (status < 0)
!                 goto error;
!         }
          
          i = e;