[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.15,2.16

Fredrik Lundh python-dev@python.org
Fri, 30 Jun 2000 06:55:17 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv350/Modules

Modified Files:
	_sre.c 
Log Message:

the mad patcher strikes again:

-- added pickling support (only works if sre is imported)

-- fixed wordsize problems in engine
   (instead of casting literals down to the character size,
   cast characters up to the literal size (same as the code
   word size).  this prevents false hits when you're matching
   a unicode pattern against an 8-bit string. (unfortunately,
   this broke another test, but I think the test should be
   changed in this case; more on that on python-dev)

-- added sre.purge function
   (unofficial, clears the cache)

Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.15
retrieving revision 2.16
diff -C2 -r2.15 -r2.16
*** _sre.c	2000/06/30 10:41:31	2.15
--- _sre.c	2000/06/30 13:55:15	2.16
***************
*** 21,25 ****
   * 00-06-29 fl	fixed split, added more scanner features (0.9.2)
   * 00-06-30 fl	tuning, fast search (0.9.3)
!  * 00-06-30 fl	added assert (lookahead) primitives (0.9.4)
   *
   * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
--- 21,25 ----
   * 00-06-29 fl	fixed split, added more scanner features (0.9.2)
   * 00-06-30 fl	tuning, fast search (0.9.3)
!  * 00-06-30 fl	added assert (lookahead) primitives, etc (0.9.4)
   *
   * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
***************
*** 340,344 ****
  
  LOCAL(int)
! SRE_MEMBER(SRE_CODE* set, SRE_CHAR ch)
  {
  	/* check if character is a member of the given set */
--- 340,344 ----
  
  LOCAL(int)
! SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
  {
  	/* check if character is a member of the given set */
***************
*** 357,361 ****
  
  		case SRE_OP_LITERAL:
! 			if (ch == (SRE_CHAR) set[0])
  				return ok;
  			set++;
--- 357,361 ----
  
  		case SRE_OP_LITERAL:
! 			if (ch == set[0])
  				return ok;
  			set++;
***************
*** 363,367 ****
  
  		case SRE_OP_RANGE:
! 			if ((SRE_CHAR) set[0] <= ch && ch <= (SRE_CHAR) set[1])
  				return ok;
  			set += 2;
--- 363,367 ----
  
  		case SRE_OP_RANGE:
! 			if (set[0] <= ch && ch <= set[1])
  				return ok;
  			set += 2;
***************
*** 456,461 ****
  			/* match literal string */
  			/* args: <code> */
! 			TRACE(("%8d: literal %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
! 			if (ptr >= end || *ptr != (SRE_CHAR) pattern[0])
  				goto failure;
  			pattern++;
--- 456,461 ----
  			/* match literal string */
  			/* args: <code> */
! 			TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
! 			if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
  				goto failure;
  			pattern++;
***************
*** 466,471 ****
  			/* match anything that is not literal character */
  			/* args: <code> */
! 			TRACE(("%8d: literal not %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
! 			if (ptr >= end || *ptr == (SRE_CHAR) pattern[0])
  				goto failure;
  			pattern++;
--- 466,471 ----
  			/* match anything that is not literal character */
  			/* args: <code> */
! 			TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
! 			if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
  				goto failure;
  			pattern++;
***************
*** 529,533 ****
  
  		case SRE_OP_LITERAL_IGNORE:
! 			TRACE(("%8d: literal lower(%c)\n", PTR(ptr), (SRE_CHAR) *pattern));
  			if (ptr >= end ||
                  state->lower(*ptr) != state->lower(*pattern))
--- 529,533 ----
  
  		case SRE_OP_LITERAL_IGNORE:
! 			TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
  			if (ptr >= end ||
                  state->lower(*ptr) != state->lower(*pattern))
***************
*** 538,543 ****
  
  		case SRE_OP_NOT_LITERAL_IGNORE:
! 			TRACE(("%8d: literal not lower(%c)\n", PTR(ptr),
!                    (SRE_CHAR) *pattern));
  			if (ptr >= end ||
                  state->lower(*ptr) == state->lower(*pattern))
--- 538,542 ----
  
  		case SRE_OP_NOT_LITERAL_IGNORE:
! 			TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
  			if (ptr >= end ||
                  state->lower(*ptr) == state->lower(*pattern))
***************
*** 550,554 ****
  			TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
  			if (ptr >= end
! 				|| !SRE_MEMBER(pattern+1, (SRE_CHAR) state->lower(*ptr)))
  				goto failure;
  			pattern += pattern[0];
--- 549,553 ----
  			TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
  			if (ptr >= end
! 				|| !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
  				goto failure;
  			pattern += pattern[0];
***************
*** 632,638 ****
  			} else if (pattern[3] == SRE_OP_LITERAL) {
  				/* repeated literal */
! 				SRE_CHAR chr = (SRE_CHAR) pattern[4];
  				while (count < (int) pattern[2]) {
! 					if (ptr >= end || *ptr != chr)
  						break;
  					ptr++;
--- 631,637 ----
  			} else if (pattern[3] == SRE_OP_LITERAL) {
  				/* repeated literal */
! 				SRE_CODE chr = pattern[4];
  				while (count < (int) pattern[2]) {
! 					if (ptr >= end || (SRE_CODE) ptr[0] != chr)
  						break;
  					ptr++;
***************
*** 642,648 ****
  			} else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
  				/* repeated literal */
! 				SRE_CHAR chr = (SRE_CHAR) pattern[4];
  				while (count < (int) pattern[2]) {
! 					if (ptr >= end || (SRE_CHAR) state->lower(*ptr) != chr)
  						break;
  					ptr++;
--- 641,647 ----
  			} else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
  				/* repeated literal */
! 				SRE_CODE chr = pattern[4];
  				while (count < (int) pattern[2]) {
! 					if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
  						break;
  					ptr++;
***************
*** 652,658 ****
  			} else if (pattern[3] == SRE_OP_NOT_LITERAL) {
  				/* repeated non-literal */
! 				SRE_CHAR chr = (SRE_CHAR) pattern[4];
  				while (count < (int) pattern[2]) {
! 					if (ptr >= end || *ptr == chr)
  						break;
  					ptr++;
--- 651,657 ----
  			} else if (pattern[3] == SRE_OP_NOT_LITERAL) {
  				/* repeated non-literal */
! 				SRE_CODE chr = pattern[4];
  				while (count < (int) pattern[2]) {
! 					if (ptr >= end || (SRE_CODE) ptr[0] == chr)
  						break;
  					ptr++;
***************
*** 662,668 ****
  			} else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
  				/* repeated non-literal */
! 				SRE_CHAR chr = (SRE_CHAR) pattern[4];
  				while (count < (int) pattern[2]) {
! 					if (ptr >= end || (SRE_CHAR) state->lower(*ptr) == chr)
  						break;
  					ptr++;
--- 661,667 ----
  			} else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
  				/* repeated non-literal */
! 				SRE_CODE chr = pattern[4];
  				while (count < (int) pattern[2]) {
! 					if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
  						break;
  					ptr++;
***************
*** 713,717 ****
  				/* tail starts with a literal. skip positions where
  				   the rest of the pattern cannot possibly match */
! 				SRE_CHAR chr = (SRE_CHAR) pattern[pattern[0]+1];
  				TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
  				for (;;) {
--- 712,716 ----
  				/* tail starts with a literal. skip positions where
  				   the rest of the pattern cannot possibly match */
! 				SRE_CODE chr = pattern[pattern[0]+1];
  				TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
  				for (;;) {
***************
*** 869,873 ****
  			while (*pattern) {
  				if (pattern[1] != SRE_OP_LITERAL ||
! 					(ptr < end && *ptr == (SRE_CHAR) pattern[2])) {
  					TRACE(("%8d: branch check\n", PTR(ptr)));
  					state->ptr = ptr;
--- 868,872 ----
  			while (*pattern) {
  				if (pattern[1] != SRE_OP_LITERAL ||
! 					(ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
  					TRACE(("%8d: branch check\n", PTR(ptr)));
  					state->ptr = ptr;
***************
*** 977,981 ****
          while (ptr < end) {
              for (;;) {
!                 if (*ptr != (SRE_CHAR) prefix[i]) {
                      if (!i)
                          break;
--- 976,980 ----
          while (ptr < end) {
              for (;;) {
!                 if ((SRE_CODE) ptr[0] != prefix[i]) {
                      if (!i)
                          break;
***************
*** 1009,1015 ****
  		/* pattern starts with a literal character.  this is used for
             short prefixes, and if fast search is disabled*/
! 		SRE_CHAR chr = (SRE_CHAR) pattern[1];
  		for (;;) {
! 			while (ptr < end && *ptr != chr)
  				ptr++;
  			if (ptr == end)
--- 1008,1014 ----
  		/* pattern starts with a literal character.  this is used for
             short prefixes, and if fast search is disabled*/
! 		SRE_CODE chr = pattern[1];
  		for (;;) {
! 			while (ptr < end && (SRE_CODE) ptr[0] != chr)
  				ptr++;
  			if (ptr == end)