[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.31,2.32 sre.h,2.17,2.18

Fredrik Lundh python-dev@python.org
Thu, 3 Aug 2000 09:29:53 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv12492/Modules

Modified Files:
	_sre.c sre.h 
Log Message:


-- added recursion limit (currently ~10,000 levels)
-- improved error messages
-- factored out SRE_COUNT; the same code is used by
   SRE_OP_REPEAT_ONE_TEMPLATE
-- minor cleanups


Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.31
retrieving revision 2.32
diff -C2 -r2.31 -r2.32
*** _sre.c	2000/08/01 22:47:49	2.31
--- _sre.c	2000/08/03 16:29:50	2.32
***************
*** 14,17 ****
--- 14,19 ----
   * 00-07-21 fl  reset lastindex in scanner methods (0.9.6)
   * 00-08-01 fl  fixes for 1.6b1 (0.9.8)
+  * 00-08-02 fl  moved SRE_COUNT out of the match method
+  * 00-08-03 fl  added recursion limit
   *
   * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
***************
*** 56,59 ****
--- 58,64 ----
  /* optional features */
  
+ /* prevent run-away recursion (bad patterns on long strings) */
+ #define USE_RECURSION_LIMIT 10000
+ 
  /* enables fast searching */
  #define USE_FAST_SEARCH
***************
*** 78,81 ****
--- 83,87 ----
  #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
  #define SRE_ERROR_STATE -2 /* illegal state */
+ #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
  #define SRE_ERROR_MEMORY -9 /* out of memory */
  
***************
*** 211,234 ****
  
  static void
- mark_init(SRE_STATE* state)
- {
-     state->mark_stack = NULL;
-     state->mark_stack_size = state->mark_stack_base = 0;
- }
- 
- static void
  mark_fini(SRE_STATE* state)
  {
! #if 0
!     /* FIXME: debugging */
!     if (state->maxlevel > 0)
!         printf("max %d\n", state->maxlevel);
!     if (state->mark_stack_base > 0)
!         printf("mark stack %d\n", state->mark_stack_base);
! #endif
! 
!     if (state->mark_stack)
          free(state->mark_stack);
!     mark_init(state);
  }
  
--- 217,227 ----
  
  static void
  mark_fini(SRE_STATE* state)
  {
!     if (state->mark_stack) {
          free(state->mark_stack);
!         state->mark_stack = NULL;
!     }
!     state->mark_stack_size = state->mark_stack_base = 0;
  }
  
***************
*** 305,308 ****
--- 298,302 ----
  #define SRE_CHAR unsigned char
  #define SRE_AT sre_at
+ #define SRE_COUNT sre_count
  #define SRE_MEMBER sre_member
  #define SRE_MATCH sre_match
***************
*** 318,321 ****
--- 312,316 ----
  #undef SRE_MATCH
  #undef SRE_MEMBER
+ #undef SRE_COUNT
  #undef SRE_AT
  #undef SRE_CHAR
***************
*** 325,328 ****
--- 320,324 ----
  #define SRE_CHAR Py_UNICODE
  #define SRE_AT sre_uat
+ #define SRE_COUNT sre_ucount
  #define SRE_MEMBER sre_umember
  #define SRE_MATCH sre_umatch
***************
*** 395,405 ****
          switch (*set++) {
  
-         case SRE_OP_NEGATE:
-             ok = !ok;
-             break;
- 
-         case SRE_OP_FAILURE:
-             return !ok;
- 
          case SRE_OP_LITERAL:
              /* <LITERAL> <code> */
--- 391,394 ----
***************
*** 430,433 ****
--- 419,429 ----
              break;
  
+         case SRE_OP_NEGATE:
+             ok = !ok;
+             break;
+ 
+         case SRE_OP_FAILURE:
+             return !ok;
+ 
          default:
              /* internal error -- there's not much we can do about it
***************
*** 438,445 ****
  }
  
  LOCAL(int)
  SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
  {
!     /* check if string matches the given pattern.  returns -1 for
         error, 0 for failure, and 1 for success */
  
--- 434,518 ----
  }
  
+ LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
+ 
  LOCAL(int)
+ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
+ {
+     SRE_CODE chr;
+     SRE_CHAR* ptr = state->ptr;
+     SRE_CHAR* end = state->end;
+     int i;
+ 
+     /* adjust end */
+     if (maxcount < end - ptr && maxcount != 65535)
+         end = ptr + maxcount;
+ 
+     switch (pattern[0]) {
+ 
+     case SRE_OP_ANY:
+         /* repeated dot wildcard. */
+         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
+             ptr++;
+         break;
+ 
+     case SRE_OP_ANY_ALL:
+         /* repeated dot wildcare.  skip to the end of the target
+            string, and backtrack from there */
+         ptr = end;
+         break;
+ 
+     case SRE_OP_LITERAL:
+         /* repeated literal */
+         chr = pattern[1];
+         while (ptr < end && (SRE_CODE) *ptr == chr)
+             ptr++;
+         break;
+ 
+     case SRE_OP_LITERAL_IGNORE:
+         /* repeated literal */
+         chr = pattern[1];
+         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
+             ptr++;
+         break;
+ 
+     case SRE_OP_NOT_LITERAL:
+         /* repeated non-literal */
+         chr = pattern[1];
+         while (ptr < end && (SRE_CODE) *ptr != chr)
+             ptr++;
+         break;
+                 
+     case SRE_OP_NOT_LITERAL_IGNORE:
+         /* repeated non-literal */
+         chr = pattern[1];
+         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
+             ptr++;
+         break;
+ 
+     case SRE_OP_IN:
+         /* repeated set */
+         while (ptr < end && SRE_MEMBER(pattern + 2, *ptr))
+             ptr++;
+         break;
+ 
+     default:
+         /* repeated single character pattern */
+         while ((SRE_CHAR*) state->ptr < end) {
+             i = SRE_MATCH(state, pattern, level);
+             if (i < 0)
+                 return i;
+             if (!i)
+                 break;
+         }
+         return (SRE_CHAR*) state->ptr - ptr;
+     }
+ 
+     return ptr - (SRE_CHAR*) state->ptr;
+ }
+ 
+ LOCAL(int)
  SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
  {
!     /* check if string matches the given pattern.  returns <0 for
         error, 0 for failure, and 1 for success */
  
***************
*** 455,458 ****
--- 528,536 ----
      TRACE(("%8d: enter %d\n", PTR(ptr), level));
  
+ #if defined(USE_RECURSION_LIMIT)
+     if (level > USE_RECURSION_LIMIT)
+         return SRE_ERROR_RECURSION_LIMIT;
+ #endif
+ 
      if (pattern[0] == SRE_OP_INFO) {
          /* optimization info block */
***************
*** 466,473 ****
      }
  
-     /* FIXME: debugging */
-     if (level > state->maxlevel)
-         state->maxlevel = level;
- 
      for (;;) {
  
--- 544,547 ----
***************
*** 612,616 ****
              TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
              if (ptr >= end
!                 || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
                  return 0;
              pattern += pattern[0];
--- 686,690 ----
              TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
              if (ptr >= end
!                 || !SRE_MEMBER(pattern + 1, (SRE_CODE) state->lower(*ptr)))
                  return 0;
              pattern += pattern[0];
***************
*** 675,693 ****
              /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
              TRACE(("%8d: branch\n", PTR(ptr)));
!             {
!                 lastmark = state->lastmark;
!                 while (pattern[0]) {
!                     TRACE(("%8d: try branch\n", PTR(ptr)));
!                     if (pattern[1] != SRE_OP_LITERAL ||
!                         (ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
!                         state->ptr = ptr;
!                         i = SRE_MATCH(state, pattern + 1, level + 1);
!                         if (i)
!                             return i;
!                     }
                      while (state->lastmark > lastmark)
                          state->mark[state->lastmark--] = NULL;
-                     pattern += pattern[0];
                  }
              }
              return 0;
--- 749,779 ----
              /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
              TRACE(("%8d: branch\n", PTR(ptr)));
!             lastmark = state->lastmark;
!             while (pattern[0]) {
!                 SRE_CODE* code = pattern+1;
!                 TRACE(("%8d: try branch\n", PTR(ptr)));
!                 switch (code[0]) {
!                 case SRE_OP_IN:
!                     if (ptr >= end || !SRE_MEMBER(code + 2, ptr[0]))
!                         break;
!                     code += code[1] + 1;
!                     state->ptr = ptr + 1;
!                     goto branch;
!                 case SRE_OP_LITERAL:
!                     if (ptr >= end || (SRE_CODE) ptr[0] != code[1])
!                         break;
!                     code += 2;
!                     state->ptr = ptr + 1;
!                     goto branch;
!                 default:
!                     state->ptr = ptr;
!                 branch:
!                     i = SRE_MATCH(state, code, level + 1);
!                     if (i)
!                         return i;
                      while (state->lastmark > lastmark)
                          state->mark[state->lastmark--] = NULL;
                  }
+                 pattern += pattern[0];
              }
              return 0;
***************
*** 708,806 ****
              if (ptr + pattern[1] > end)
                  return 0; /* cannot match */
- 
-             count = 0;
- 
-             switch (pattern[3]) {
- 
-             case SRE_OP_ANY:
-                 /* repeated wildcard. */
-                 while (count < (int) pattern[2]) {
-                     if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
-                         break;
-                     ptr++;
-                     count++;
-                 }
-                 break;
- 
-             case SRE_OP_ANY_ALL:
-                 /* repeated wildcard.  skip to the end of the target
-                    string, and backtrack from there */
-                 if (ptr + pattern[1] > end)
-                     return 0; /* cannot match */
-                 count = pattern[2];
-                 if (count > end - ptr)
-                     count = end - ptr;
-                 ptr += count;
-                 break;
- 
-             case SRE_OP_LITERAL:
-                 /* repeated literal */
-                 chr = pattern[4];
-                 while (count < (int) pattern[2]) {
-                     if (ptr >= end || (SRE_CODE) ptr[0] != chr)
-                         break;
-                     ptr++;
-                     count++;
-                 }
-                 break;
- 
-             case SRE_OP_LITERAL_IGNORE:
-                 /* repeated literal */
-                 chr = pattern[4];
-                 while (count < (int) pattern[2]) {
-                     if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
-                         break;
-                     ptr++;
-                     count++;
-                 }
-                 break;
  
!             case SRE_OP_NOT_LITERAL:
!                 /* repeated non-literal */
!                 chr = pattern[4];
!                 while (count < (int) pattern[2]) {
!                     if (ptr >= end || (SRE_CODE) ptr[0] == chr)
!                         break;
!                     ptr++;
!                     count++;
!                 }
!                 break;
!                 
!             case SRE_OP_NOT_LITERAL_IGNORE:
!                 /* repeated non-literal */
!                 chr = pattern[4];
!                 while (count < (int) pattern[2]) {
!                     if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
!                         break;
!                     ptr++;
!                     count++;
!                 }
!                 break;
  
!             case SRE_OP_IN:
!                 /* repeated set */
!                 while (count < (int) pattern[2]) {
!                     if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
!                         break;
!                     ptr++;
!                     count++;
!                 }
!                 break;
  
!             default:
!                 /* repeated single character pattern */
!                 state->ptr = ptr;
!                 while (count < (int) pattern[2]) {
!                     i = SRE_MATCH(state, pattern + 3, level + 1);
!                     if (i < 0)
!                         return i;
!                     if (!i)
!                         break;
!                     count++;
!                 }
!                 state->ptr = ptr;
!                 ptr += count;
!                 break;
!             }
  
              /* when we arrive here, count contains the number of
--- 794,805 ----
              if (ptr + pattern[1] > end)
                  return 0; /* cannot match */
  
!             state->ptr = ptr;
  
!             count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
!             if (count < 0)
!                 return count;
  
!             ptr += count;
  
              /* when we arrive here, count contains the number of
***************
*** 905,908 ****
--- 904,908 ----
                  TRACE(("%8d: match item (required)\n", PTR(ptr)));
                  rp->count = count;
+                 /* RECURSIVE */
                  i = SRE_MATCH(state, rp->pattern + 3, level + 1);
                  if (i)
***************
*** 920,923 ****
--- 920,924 ----
                  lastmark = state->lastmark;
                  mark_save(state, 0, lastmark);
+                 /* RECURSIVE */
                  i = SRE_MATCH(state, rp->pattern + 3, level + 1);
                  if (i)
***************
*** 956,959 ****
--- 957,961 ----
                  TRACE(("%8d: match item (required)\n", PTR(ptr)));
                  rp->count = count;
+                 /* RECURSIVE */
                  i = SRE_MATCH(state, rp->pattern + 3, level + 1);
                  if (i)
***************
*** 979,982 ****
--- 981,985 ----
              TRACE(("%8d: match item (optional)\n", PTR(ptr)));
              rp->count = count;
+             /* RECURSIVE */
              i = SRE_MATCH(state, rp->pattern + 3, level + 1);
              if (i)
***************
*** 995,999 ****
  }
  
! static int
  SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
  {
--- 998,1002 ----
  }
  
! LOCAL(int)
  SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
  {
***************
*** 1221,1227 ****
      state->repeat = NULL;
  
-     /* FIXME: debugging */
-     state->maxlevel = 0;
- 
      mark_fini(state);
  }
--- 1224,1227 ----
***************
*** 1237,1240 ****
--- 1237,1244 ----
      void* ptr;
  
+     memset(state, 0, sizeof(SRE_STATE));
+ 
+     state->lastindex = -1;
+ 
      /* get pointer to string buffer */
      buffer = string->ob_type->tp_as_buffer;
***************
*** 1301,1309 ****
          state->lower = sre_lower;
  
-     state->mark_stack = NULL;
-     state->mark_stack_base = 0;
- 
-     state_reset(state);
- 
      return string;
  }
--- 1305,1308 ----
***************
*** 1335,1338 ****
--- 1334,1359 ----
  }
  
+ static void
+ pattern_error(int status)
+ {
+     switch (status) {
+     case SRE_ERROR_RECURSION_LIMIT:
+         PyErr_SetString(
+             PyExc_RuntimeError,
+             "maximum recursion limit exceeded"
+             );
+         break;
+     case SRE_ERROR_MEMORY:
+         PyErr_NoMemory();
+         break;
+     default:
+         /* other error codes indicate compiler/engine bugs */
+         PyErr_SetString(
+             PyExc_RuntimeError,
+             "internal error in regular expression engine"
+             );
+     }
+ }
+ 
  static PyObject*
  pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
***************
*** 1384,1399 ****
          return (PyObject*) match;
  
!     } else if (status < 0) {
  
!         /* internal error */
!         PyErr_SetString(
!             PyExc_RuntimeError, "internal error in regular expression engine"
!             );
!         return NULL;
  
      }
  
!     Py_INCREF(Py_None);
!     return Py_None;
  }
  
--- 1405,1419 ----
          return (PyObject*) match;
  
!     } else if (status == 0) {
  
!         /* no match */
!         Py_INCREF(Py_None);
!         return Py_None;
  
      }
  
!     /* internal error */
!     pattern_error(status);
!     return NULL;
  }
  
***************
*** 1642,1650 ****
                  break;
  
!             /* internal error */
!             PyErr_SetString(
!                 PyExc_RuntimeError,
!                 "internal error in regular expression engine"
!                 );
              goto error;
  
--- 1662,1666 ----
                  break;
  
!             pattern_error(status);
              goto error;
  

Index: sre.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/sre.h,v
retrieving revision 2.17
retrieving revision 2.18
diff -C2 -r2.17 -r2.18
*** sre.h	2000/08/01 21:05:41	2.17
--- sre.h	2000/08/03 16:29:50	2.18
***************
*** 75,80 ****
      /* hooks */
      SRE_TOLOWER_HOOK lower;
-     /* debugging */
-     int maxlevel;
  } SRE_STATE;
  
--- 75,78 ----