[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.29,2.30 sre.h,2.16,2.17
Fredrik Lundh
python-dev@python.org
Tue, 1 Aug 2000 14:05:44 -0700
Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv12762/Modules
Modified Files:
_sre.c sre.h
Log Message:
-- fixed width calculations for alternations
-- fixed literal check in branch operator
(this broke test_tokenize, as reported by Mark Favas)
-- added REPEAT_ONE operator (still not enabled, though)
-- added some debugging stuff (maxlevel)
Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.29
retrieving revision 2.30
diff -C2 -r2.29 -r2.30
*** _sre.c 2000/08/01 18:20:07 2.29
--- _sre.c 2000/08/01 21:05:41 2.30
***************
*** 220,223 ****
--- 220,231 ----
mark_fini(SRE_STATE* state)
{
+ #if 0
+ /* FIXME: debugging */
+ if (state->maxlevel > 0)
+ printf("max %d\n", state->maxlevel);
+ if (state->mark_stack_base > 0)
+ printf("mark stack %d\n", state->mark_stack_base);
+ #endif
+
if (state->mark_stack)
free(state->mark_stack);
***************
*** 431,435 ****
LOCAL(int)
! SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
{
/* check if string matches the given pattern. returns -1 for
--- 439,443 ----
LOCAL(int)
! SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
{
/* check if string matches the given pattern. returns -1 for
***************
*** 444,448 ****
SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
! TRACE(("%8d: enter\n", PTR(ptr)));
if (pattern[0] == SRE_OP_INFO) {
--- 452,456 ----
SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
! TRACE(("%8d: enter %d\n", PTR(ptr), level));
if (pattern[0] == SRE_OP_INFO) {
***************
*** 457,460 ****
--- 465,472 ----
}
+ /* FIXME: debugging */
+ if (level > state->maxlevel)
+ state->maxlevel = level;
+
for (;;) {
***************
*** 624,628 ****
if (state->ptr < state->beginning)
return 0;
! i = SRE_MATCH(state, pattern + 2);
if (i <= 0)
return i;
--- 636,640 ----
if (state->ptr < state->beginning)
return 0;
! i = SRE_MATCH(state, pattern + 2, level + 1);
if (i <= 0)
return i;
***************
*** 639,643 ****
if (state->ptr < state->beginning)
return 0;
! i = SRE_MATCH(state, pattern + 2);
if (i < 0)
return i;
--- 651,655 ----
if (state->ptr < state->beginning)
return 0;
! i = SRE_MATCH(state, pattern + 2, level + 1);
if (i < 0)
return i;
***************
*** 657,664 ****
while (pattern[0]) {
TRACE(("%8d: try branch\n", PTR(ptr)));
! if (pattern[2] != SRE_OP_LITERAL ||
! (ptr < end && (SRE_CODE) ptr[0] == pattern[3])) {
state->ptr = ptr;
! i = SRE_MATCH(state, pattern + 1);
if (i)
return i;
--- 669,676 ----
while (pattern[0]) {
TRACE(("%8d: try branch\n", PTR(ptr)));
! if (pattern[1] != SRE_OP_LITERAL ||
! (ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
state->ptr = ptr;
! i = SRE_MATCH(state, pattern + 1, level + 1);
if (i)
return i;
***************
*** 671,674 ****
--- 683,835 ----
return 0;
+ case SRE_OP_REPEAT_ONE:
+ /* match repeated sequence (maximizing regexp) */
+
+ /* this operator only works if the repeated item is
+ exactly one character wide, and we're not already
+ collecting backtracking points. for other cases,
+ use the MAX_REPEAT operator instead */
+
+ /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
+
+ TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
+ pattern[1], pattern[2]));
+
+ count = 0;
+
+ if (pattern[3] == SRE_OP_ANY) {
+ /* repeated wildcard. skip to the end of the target
+ string, and backtrack from there */
+ /* FIXME: must look for line endings */
+ if (ptr + pattern[1] > end)
+ return 0; /* cannot match */
+ count = pattern[2];
+ if (count > end - ptr)
+ count = end - ptr;
+ ptr += count;
+
+ } else if (pattern[3] == SRE_OP_LITERAL) {
+ /* repeated literal */
+ SRE_CODE chr = pattern[4];
+ while (count < (int) pattern[2]) {
+ if (ptr >= end || (SRE_CODE) ptr[0] != chr)
+ break;
+ ptr++;
+ count++;
+ }
+
+ } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
+ /* repeated literal */
+ SRE_CODE chr = pattern[4];
+ while (count < (int) pattern[2]) {
+ if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
+ break;
+ ptr++;
+ count++;
+ }
+
+ } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
+ /* repeated non-literal */
+ SRE_CODE chr = pattern[4];
+ while (count < (int) pattern[2]) {
+ if (ptr >= end || (SRE_CODE) ptr[0] == chr)
+ break;
+ ptr++;
+ count++;
+ }
+
+ } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
+ /* repeated non-literal */
+ SRE_CODE chr = pattern[4];
+ while (count < (int) pattern[2]) {
+ if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
+ break;
+ ptr++;
+ count++;
+ }
+
+ } else if (pattern[3] == SRE_OP_IN) {
+ /* repeated set */
+ while (count < (int) pattern[2]) {
+ if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
+ break;
+ ptr++;
+ count++;
+ }
+
+ } else {
+ /* repeated single character pattern */
+ state->ptr = ptr;
+ while (count < (int) pattern[2]) {
+ i = SRE_MATCH(state, pattern + 3, level + 1);
+ if (i < 0)
+ return i;
+ if (!i)
+ break;
+ count++;
+ }
+ state->ptr = ptr;
+ ptr += count;
+ }
+
+ /* when we arrive here, count contains the number of
+ matches, and ptr points to the tail of the target
+ string. check if the rest of the pattern matches,
+ and backtrack if not. */
+
+ TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
+
+ if (count < (int) pattern[1])
+ return 0;
+
+ if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
+ /* tail is empty. we're finished */
+ TRACE(("%8d: tail is empty\n", PTR(ptr)));
+ state->ptr = ptr;
+ return 1;
+
+ } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
+ /* tail starts with a literal. skip positions where
+ the rest of the pattern cannot possibly match */
+ SRE_CODE chr = pattern[pattern[0]+1];
+ TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
+ for (;;) {
+ TRACE(("%8d: scan for tail match\n", PTR(ptr)));
+ while (count >= (int) pattern[1] &&
+ (ptr >= end || *ptr != chr)) {
+ ptr--;
+ count--;
+ }
+ TRACE(("%8d: check tail\n", PTR(ptr)));
+ if (count < (int) pattern[1])
+ break;
+ state->ptr = ptr;
+ i = SRE_MATCH(state, pattern + pattern[0], level + 1);
+ if (i > 0) {
+ TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
+ return 1;
+ }
+ ptr--;
+ count--;
+ }
+
+ } else {
+ /* general case */
+ TRACE(("%8d: tail is pattern\n", PTR(ptr)));
+ while (count >= (int) pattern[1]) {
+ state->ptr = ptr;
+ i = SRE_MATCH(state, pattern + pattern[0], level + 1);
+ if (i < 0)
+ return i;
+ if (i) {
+ TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
+ return 1;
+ }
+ ptr--;
+ count--;
+ }
+ }
+ return 0;
+
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
***************
*** 678,683 ****
pattern[1], pattern[2]));
- state->ptr = ptr;
-
rep.count = -1;
rep.pattern = pattern;
--- 839,842 ----
***************
*** 687,694 ****
state->repeat = &rep;
! i = SRE_MATCH(state, pattern + pattern[0]);
state->repeat = rep.prev;
- /* free(rp); */
return i;
--- 846,853 ----
state->repeat = &rep;
! state->ptr = ptr;
! i = SRE_MATCH(state, pattern + pattern[0], level + 1);
state->repeat = rep.prev;
return i;
***************
*** 715,719 ****
TRACE(("%8d: match item (required)\n", PTR(ptr)));
rp->count = count;
! i = SRE_MATCH(state, rp->pattern + 3);
if (i)
return i;
--- 874,878 ----
TRACE(("%8d: match item (required)\n", PTR(ptr)));
rp->count = count;
! i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
***************
*** 730,734 ****
lastmark = state->lastmark;
mark_save(state, 0, lastmark);
! i = SRE_MATCH(state, rp->pattern + 3);
if (i)
return i;
--- 889,893 ----
lastmark = state->lastmark;
mark_save(state, 0, lastmark);
! i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
***************
*** 742,750 ****
TRACE(("%8d: match tail\n", PTR(ptr)));
state->repeat = rp->prev;
! i = SRE_MATCH(state, pattern);
! if (i) {
! /* free(rp); */
return i;
- }
state->repeat = rp;
return 0;
--- 901,907 ----
TRACE(("%8d: match tail\n", PTR(ptr)));
state->repeat = rp->prev;
! i = SRE_MATCH(state, pattern, level + 1);
! if (i)
return i;
state->repeat = rp;
return 0;
***************
*** 768,772 ****
TRACE(("%8d: match item (required)\n", PTR(ptr)));
rp->count = count;
! i = SRE_MATCH(state, rp->pattern + 3);
if (i)
return i;
--- 925,929 ----
TRACE(("%8d: match item (required)\n", PTR(ptr)));
rp->count = count;
! i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
***************
*** 779,783 ****
TRACE(("%8d: match tail\n", PTR(ptr)));
state->repeat = rp->prev;
! i = SRE_MATCH(state, pattern);
if (i) {
/* free(rp); */
--- 936,940 ----
TRACE(("%8d: match tail\n", PTR(ptr)));
state->repeat = rp->prev;
! i = SRE_MATCH(state, pattern, level + 1);
if (i) {
/* free(rp); */
***************
*** 791,795 ****
TRACE(("%8d: match item (optional)\n", PTR(ptr)));
rp->count = count;
! i = SRE_MATCH(state, rp->pattern + 3);
if (i)
return i;
--- 948,952 ----
TRACE(("%8d: match item (optional)\n", PTR(ptr)));
rp->count = count;
! i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
***************
*** 866,870 ****
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
! status = SRE_MATCH(state, pattern + 2*prefix_len);
if (status != 0)
return status;
--- 1023,1027 ----
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
! status = SRE_MATCH(state, pattern + 2*prefix_len, 1);
if (status != 0)
return status;
***************
*** 894,898 ****
state->start = ptr;
state->ptr = ++ptr;
! status = SRE_MATCH(state, pattern + 2);
if (status != 0)
break;
--- 1051,1055 ----
state->start = ptr;
state->ptr = ++ptr;
! status = SRE_MATCH(state, pattern + 2, 1);
if (status != 0)
break;
***************
*** 908,912 ****
state->start = ptr;
state->ptr = ptr;
! status = SRE_MATCH(state, pattern);
if (status != 0)
break;
--- 1065,1069 ----
state->start = ptr;
state->ptr = ptr;
! status = SRE_MATCH(state, pattern, 1);
if (status != 0)
break;
***************
*** 917,921 ****
TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
state->start = state->ptr = ptr++;
! status = SRE_MATCH(state, pattern);
if (status != 0)
break;
--- 1074,1078 ----
TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
state->start = state->ptr = ptr++;
! status = SRE_MATCH(state, pattern, 1);
if (status != 0)
break;
***************
*** 1033,1036 ****
--- 1190,1196 ----
state->repeat = NULL;
+ /* FIXME: debugging */
+ state->maxlevel = 0;
+
mark_fini(state);
}
***************
*** 1111,1114 ****
--- 1271,1275 ----
state->mark_stack = NULL;
+ state->mark_stack_base = 0;
state_reset(state);
***************
*** 1263,1270 ****
if (state.charsize == 1) {
! status = sre_match(&state, PatternObject_GetCode(self));
} else {
#if defined(HAVE_UNICODE)
! status = sre_umatch(&state, PatternObject_GetCode(self));
#endif
}
--- 1424,1431 ----
if (state.charsize == 1) {
! status = sre_match(&state, PatternObject_GetCode(self), 1);
} else {
#if defined(HAVE_UNICODE)
! status = sre_umatch(&state, PatternObject_GetCode(self), 1);
#endif
}
***************
*** 1942,1949 ****
if (state->charsize == 1) {
! status = sre_match(state, PatternObject_GetCode(self->pattern));
} else {
#if defined(HAVE_UNICODE)
! status = sre_umatch(state, PatternObject_GetCode(self->pattern));
#endif
}
--- 2103,2110 ----
if (state->charsize == 1) {
! status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
} else {
#if defined(HAVE_UNICODE)
! status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
#endif
}
Index: sre.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/sre.h,v
retrieving revision 2.16
retrieving revision 2.17
diff -C2 -r2.16 -r2.17
*** sre.h 2000/08/01 18:20:07 2.16
--- sre.h 2000/08/01 21:05:41 2.17
***************
*** 75,78 ****
--- 75,80 ----
/* hooks */
SRE_TOLOWER_HOOK lower;
+ /* debugging */
+ int maxlevel;
} SRE_STATE;