[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.31,2.32 sre.h,2.17,2.18
Fredrik Lundh
python-dev@python.org
Thu, 3 Aug 2000 09:29:53 -0700
Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv12492/Modules
Modified Files:
_sre.c sre.h
Log Message:
-- added recursion limit (currently ~10,000 levels)
-- improved error messages
-- factored out SRE_COUNT; the same code is used by
SRE_OP_REPEAT_ONE_TEMPLATE
-- minor cleanups
Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.31
retrieving revision 2.32
diff -C2 -r2.31 -r2.32
*** _sre.c 2000/08/01 22:47:49 2.31
--- _sre.c 2000/08/03 16:29:50 2.32
***************
*** 14,17 ****
--- 14,19 ----
* 00-07-21 fl reset lastindex in scanner methods (0.9.6)
* 00-08-01 fl fixes for 1.6b1 (0.9.8)
+ * 00-08-02 fl moved SRE_COUNT out of the match method
+ * 00-08-03 fl added recursion limit
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
***************
*** 56,59 ****
--- 58,64 ----
/* optional features */
+ /* prevent run-away recursion (bad patterns on long strings) */
+ #define USE_RECURSION_LIMIT 10000
+
/* enables fast searching */
#define USE_FAST_SEARCH
***************
*** 78,81 ****
--- 83,87 ----
#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
#define SRE_ERROR_STATE -2 /* illegal state */
+ #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
#define SRE_ERROR_MEMORY -9 /* out of memory */
***************
*** 211,234 ****
static void
- mark_init(SRE_STATE* state)
- {
- state->mark_stack = NULL;
- state->mark_stack_size = state->mark_stack_base = 0;
- }
-
- static void
mark_fini(SRE_STATE* state)
{
! #if 0
! /* FIXME: debugging */
! if (state->maxlevel > 0)
! printf("max %d\n", state->maxlevel);
! if (state->mark_stack_base > 0)
! printf("mark stack %d\n", state->mark_stack_base);
! #endif
!
! if (state->mark_stack)
free(state->mark_stack);
! mark_init(state);
}
--- 217,227 ----
static void
mark_fini(SRE_STATE* state)
{
! if (state->mark_stack) {
free(state->mark_stack);
! state->mark_stack = NULL;
! }
! state->mark_stack_size = state->mark_stack_base = 0;
}
***************
*** 305,308 ****
--- 298,302 ----
#define SRE_CHAR unsigned char
#define SRE_AT sre_at
+ #define SRE_COUNT sre_count
#define SRE_MEMBER sre_member
#define SRE_MATCH sre_match
***************
*** 318,321 ****
--- 312,316 ----
#undef SRE_MATCH
#undef SRE_MEMBER
+ #undef SRE_COUNT
#undef SRE_AT
#undef SRE_CHAR
***************
*** 325,328 ****
--- 320,324 ----
#define SRE_CHAR Py_UNICODE
#define SRE_AT sre_uat
+ #define SRE_COUNT sre_ucount
#define SRE_MEMBER sre_umember
#define SRE_MATCH sre_umatch
***************
*** 395,405 ****
switch (*set++) {
- case SRE_OP_NEGATE:
- ok = !ok;
- break;
-
- case SRE_OP_FAILURE:
- return !ok;
-
case SRE_OP_LITERAL:
/* <LITERAL> <code> */
--- 391,394 ----
***************
*** 430,433 ****
--- 419,429 ----
break;
+ case SRE_OP_NEGATE:
+ ok = !ok;
+ break;
+
+ case SRE_OP_FAILURE:
+ return !ok;
+
default:
/* internal error -- there's not much we can do about it
***************
*** 438,445 ****
}
LOCAL(int)
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
{
! /* check if string matches the given pattern. returns -1 for
error, 0 for failure, and 1 for success */
--- 434,518 ----
}
+ LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
+
LOCAL(int)
+ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
+ {
+ SRE_CODE chr;
+ SRE_CHAR* ptr = state->ptr;
+ SRE_CHAR* end = state->end;
+ int i;
+
+ /* adjust end */
+ if (maxcount < end - ptr && maxcount != 65535)
+ end = ptr + maxcount;
+
+ switch (pattern[0]) {
+
+ case SRE_OP_ANY:
+ /* repeated dot wildcard. */
+ while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
+ ptr++;
+ break;
+
+ case SRE_OP_ANY_ALL:
+ /* repeated dot wildcare. skip to the end of the target
+ string, and backtrack from there */
+ ptr = end;
+ break;
+
+ case SRE_OP_LITERAL:
+ /* repeated literal */
+ chr = pattern[1];
+ while (ptr < end && (SRE_CODE) *ptr == chr)
+ ptr++;
+ break;
+
+ case SRE_OP_LITERAL_IGNORE:
+ /* repeated literal */
+ chr = pattern[1];
+ while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
+ ptr++;
+ break;
+
+ case SRE_OP_NOT_LITERAL:
+ /* repeated non-literal */
+ chr = pattern[1];
+ while (ptr < end && (SRE_CODE) *ptr != chr)
+ ptr++;
+ break;
+
+ case SRE_OP_NOT_LITERAL_IGNORE:
+ /* repeated non-literal */
+ chr = pattern[1];
+ while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
+ ptr++;
+ break;
+
+ case SRE_OP_IN:
+ /* repeated set */
+ while (ptr < end && SRE_MEMBER(pattern + 2, *ptr))
+ ptr++;
+ break;
+
+ default:
+ /* repeated single character pattern */
+ while ((SRE_CHAR*) state->ptr < end) {
+ i = SRE_MATCH(state, pattern, level);
+ if (i < 0)
+ return i;
+ if (!i)
+ break;
+ }
+ return (SRE_CHAR*) state->ptr - ptr;
+ }
+
+ return ptr - (SRE_CHAR*) state->ptr;
+ }
+
+ LOCAL(int)
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
{
! /* check if string matches the given pattern. returns <0 for
error, 0 for failure, and 1 for success */
***************
*** 455,458 ****
--- 528,536 ----
TRACE(("%8d: enter %d\n", PTR(ptr), level));
+ #if defined(USE_RECURSION_LIMIT)
+ if (level > USE_RECURSION_LIMIT)
+ return SRE_ERROR_RECURSION_LIMIT;
+ #endif
+
if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */
***************
*** 466,473 ****
}
- /* FIXME: debugging */
- if (level > state->maxlevel)
- state->maxlevel = level;
-
for (;;) {
--- 544,547 ----
***************
*** 612,616 ****
TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
if (ptr >= end
! || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
return 0;
pattern += pattern[0];
--- 686,690 ----
TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
if (ptr >= end
! || !SRE_MEMBER(pattern + 1, (SRE_CODE) state->lower(*ptr)))
return 0;
pattern += pattern[0];
***************
*** 675,693 ****
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
TRACE(("%8d: branch\n", PTR(ptr)));
! {
! lastmark = state->lastmark;
! while (pattern[0]) {
! TRACE(("%8d: try branch\n", PTR(ptr)));
! if (pattern[1] != SRE_OP_LITERAL ||
! (ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
! state->ptr = ptr;
! i = SRE_MATCH(state, pattern + 1, level + 1);
! if (i)
! return i;
! }
while (state->lastmark > lastmark)
state->mark[state->lastmark--] = NULL;
- pattern += pattern[0];
}
}
return 0;
--- 749,779 ----
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
TRACE(("%8d: branch\n", PTR(ptr)));
! lastmark = state->lastmark;
! while (pattern[0]) {
! SRE_CODE* code = pattern+1;
! TRACE(("%8d: try branch\n", PTR(ptr)));
! switch (code[0]) {
! case SRE_OP_IN:
! if (ptr >= end || !SRE_MEMBER(code + 2, ptr[0]))
! break;
! code += code[1] + 1;
! state->ptr = ptr + 1;
! goto branch;
! case SRE_OP_LITERAL:
! if (ptr >= end || (SRE_CODE) ptr[0] != code[1])
! break;
! code += 2;
! state->ptr = ptr + 1;
! goto branch;
! default:
! state->ptr = ptr;
! branch:
! i = SRE_MATCH(state, code, level + 1);
! if (i)
! return i;
while (state->lastmark > lastmark)
state->mark[state->lastmark--] = NULL;
}
+ pattern += pattern[0];
}
return 0;
***************
*** 708,806 ****
if (ptr + pattern[1] > end)
return 0; /* cannot match */
-
- count = 0;
-
- switch (pattern[3]) {
-
- case SRE_OP_ANY:
- /* repeated wildcard. */
- while (count < (int) pattern[2]) {
- if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
- break;
- ptr++;
- count++;
- }
- break;
-
- case SRE_OP_ANY_ALL:
- /* repeated wildcard. skip to the end of the target
- string, and backtrack from there */
- if (ptr + pattern[1] > end)
- return 0; /* cannot match */
- count = pattern[2];
- if (count > end - ptr)
- count = end - ptr;
- ptr += count;
- break;
-
- case SRE_OP_LITERAL:
- /* repeated literal */
- chr = pattern[4];
- while (count < (int) pattern[2]) {
- if (ptr >= end || (SRE_CODE) ptr[0] != chr)
- break;
- ptr++;
- count++;
- }
- break;
-
- case SRE_OP_LITERAL_IGNORE:
- /* repeated literal */
- chr = pattern[4];
- while (count < (int) pattern[2]) {
- if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
- break;
- ptr++;
- count++;
- }
- break;
! case SRE_OP_NOT_LITERAL:
! /* repeated non-literal */
! chr = pattern[4];
! while (count < (int) pattern[2]) {
! if (ptr >= end || (SRE_CODE) ptr[0] == chr)
! break;
! ptr++;
! count++;
! }
! break;
!
! case SRE_OP_NOT_LITERAL_IGNORE:
! /* repeated non-literal */
! chr = pattern[4];
! while (count < (int) pattern[2]) {
! if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
! break;
! ptr++;
! count++;
! }
! break;
! case SRE_OP_IN:
! /* repeated set */
! while (count < (int) pattern[2]) {
! if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
! break;
! ptr++;
! count++;
! }
! break;
! default:
! /* repeated single character pattern */
! state->ptr = ptr;
! while (count < (int) pattern[2]) {
! i = SRE_MATCH(state, pattern + 3, level + 1);
! if (i < 0)
! return i;
! if (!i)
! break;
! count++;
! }
! state->ptr = ptr;
! ptr += count;
! break;
! }
/* when we arrive here, count contains the number of
--- 794,805 ----
if (ptr + pattern[1] > end)
return 0; /* cannot match */
! state->ptr = ptr;
! count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
! if (count < 0)
! return count;
! ptr += count;
/* when we arrive here, count contains the number of
***************
*** 905,908 ****
--- 904,908 ----
TRACE(("%8d: match item (required)\n", PTR(ptr)));
rp->count = count;
+ /* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
***************
*** 920,923 ****
--- 920,924 ----
lastmark = state->lastmark;
mark_save(state, 0, lastmark);
+ /* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
***************
*** 956,959 ****
--- 957,961 ----
TRACE(("%8d: match item (required)\n", PTR(ptr)));
rp->count = count;
+ /* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
***************
*** 979,982 ****
--- 981,985 ----
TRACE(("%8d: match item (optional)\n", PTR(ptr)));
rp->count = count;
+ /* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
***************
*** 995,999 ****
}
! static int
SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
{
--- 998,1002 ----
}
! LOCAL(int)
SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
{
***************
*** 1221,1227 ****
state->repeat = NULL;
- /* FIXME: debugging */
- state->maxlevel = 0;
-
mark_fini(state);
}
--- 1224,1227 ----
***************
*** 1237,1240 ****
--- 1237,1244 ----
void* ptr;
+ memset(state, 0, sizeof(SRE_STATE));
+
+ state->lastindex = -1;
+
/* get pointer to string buffer */
buffer = string->ob_type->tp_as_buffer;
***************
*** 1301,1309 ****
state->lower = sre_lower;
- state->mark_stack = NULL;
- state->mark_stack_base = 0;
-
- state_reset(state);
-
return string;
}
--- 1305,1308 ----
***************
*** 1335,1338 ****
--- 1334,1359 ----
}
+ static void
+ pattern_error(int status)
+ {
+ switch (status) {
+ case SRE_ERROR_RECURSION_LIMIT:
+ PyErr_SetString(
+ PyExc_RuntimeError,
+ "maximum recursion limit exceeded"
+ );
+ break;
+ case SRE_ERROR_MEMORY:
+ PyErr_NoMemory();
+ break;
+ default:
+ /* other error codes indicate compiler/engine bugs */
+ PyErr_SetString(
+ PyExc_RuntimeError,
+ "internal error in regular expression engine"
+ );
+ }
+ }
+
static PyObject*
pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
***************
*** 1384,1399 ****
return (PyObject*) match;
! } else if (status < 0) {
! /* internal error */
! PyErr_SetString(
! PyExc_RuntimeError, "internal error in regular expression engine"
! );
! return NULL;
}
! Py_INCREF(Py_None);
! return Py_None;
}
--- 1405,1419 ----
return (PyObject*) match;
! } else if (status == 0) {
! /* no match */
! Py_INCREF(Py_None);
! return Py_None;
}
! /* internal error */
! pattern_error(status);
! return NULL;
}
***************
*** 1642,1650 ****
break;
! /* internal error */
! PyErr_SetString(
! PyExc_RuntimeError,
! "internal error in regular expression engine"
! );
goto error;
--- 1662,1666 ----
break;
! pattern_error(status);
goto error;
Index: sre.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/sre.h,v
retrieving revision 2.17
retrieving revision 2.18
diff -C2 -r2.17 -r2.18
*** sre.h 2000/08/01 21:05:41 2.17
--- sre.h 2000/08/03 16:29:50 2.18
***************
*** 75,80 ****
/* hooks */
SRE_TOLOWER_HOOK lower;
- /* debugging */
- int maxlevel;
} SRE_STATE;
--- 75,78 ----