[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.20,2.21 sre.h,2.10,2.11 sre_constants.h,2.6,2.7

Fredrik Lundh python-dev@python.org
Sun, 2 Jul 2000 10:33:30 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv31347/Modules

Modified Files:
	_sre.c sre.h sre_constants.h 
Log Message:


- actually enabled charset anchors in the engine (still not
  used by the code generator)

- changed max repeat value in engine (to match earlier array fix)

- added experimental "which part matched?" mechanism to sre; see
  http://hem.passagen.se/eff/2000_07_01_bot-archive.htm#416954
  or python-dev for details.


Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.20
retrieving revision 2.21
diff -C2 -r2.20 -r2.21
*** _sre.c	2000/07/02 12:00:07	2.20
--- _sre.c	2000/07/02 17:33:27	2.21
***************
*** 22,25 ****
--- 22,26 ----
   * 00-06-30 fl	added fast search optimization (0.9.3)
   * 00-06-30 fl	added assert (lookahead) primitives, etc (0.9.4)
+  * 00-07-02 fl	added charset optimizations, etc (0.9.5)
   *
   * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
***************
*** 32,36 ****
  #ifndef SRE_RECURSIVE
  
! char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
  
  #include "Python.h"
--- 33,37 ----
  #ifndef SRE_RECURSIVE
  
! char copyright[] = " SRE 0.9.5 Copyright (c) 1997-2000 by Secret Labs AB ";
  
  #include "Python.h"
***************
*** 588,591 ****
--- 589,600 ----
  			break;
  
+ 		case SRE_OP_INDEX:
+ 			/* set index */
+ 			/* args: <index> */
+ 			TRACE(("%8d: set index %d\n", PTR(ptr), pattern[0]));
+             state->index = pattern[0];
+ 			pattern++;
+ 			break;
+ 
  		case SRE_OP_JUMP:
  		case SRE_OP_INFO:
***************
*** 811,815 ****
                 points to the stack */
  
!             while (pattern[2] == 32767 || count < (int) pattern[2]) {
  				state->stackbase = stack;
  				i = SRE_MATCH(state, pattern + 3);
--- 820,824 ----
                 points to the stack */
  
!             while (pattern[2] == 65535 || count < (int) pattern[2]) {
  				state->stackbase = stack;
  				i = SRE_MATCH(state, pattern + 3);
***************
*** 981,988 ****
--- 990,999 ----
  
          if (flags & SRE_INFO_PREFIX) {
+             /* pattern starts with a known prefix */
              prefix_len = pattern[5];
              prefix = pattern + 6;
              overlap = prefix + prefix_len - 1;
          } else if (flags & SRE_INFO_CHARSET)
+             /* pattern starts with a character from a known set */
              charset = pattern + 5;
  
***************
*** 1043,1047 ****
  				break;
  		}
- #if 0
      } else if (charset) {
  		/* pattern starts with a character from a known set */
--- 1054,1057 ----
***************
*** 1058,1062 ****
  				break;
          }
- #endif
  	} else
  		/* general case */
--- 1068,1071 ----
***************
*** 1205,1208 ****
--- 1214,1219 ----
  		state->mark[i] = NULL;
  
+     state->index = -1;
+ 
  	state->stack = NULL;
  	state->stackbase = 0;
***************
*** 1287,1290 ****
--- 1298,1303 ----
  				match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
  
+         match->index = state->index;
+ 
  		return (PyObject*) match;
  
***************
*** 1887,1890 ****
--- 1900,1912 ----
  	if (!strcmp(name, "endpos"))
  		return Py_BuildValue("i", 0); /* FIXME */
+ 
+ 	if (!strcmp(name, "index")) {
+         /* experimental */
+         if (self->index < 0) {
+             Py_INCREF(Py_None);
+             return Py_None;
+         } else
+             return Py_BuildValue("i", self->index);
+     }
  
  	PyErr_SetString(PyExc_AttributeError, name);

Index: sre.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/sre.h,v
retrieving revision 2.10
retrieving revision 2.11
diff -C2 -r2.10 -r2.11
*** sre.h	2000/06/29 18:03:25	2.10
--- sre.h	2000/07/02 17:33:27	2.11
***************
*** 34,37 ****
--- 34,38 ----
      PyObject* string; /* link to the target string */
      PatternObject* pattern; /* link to the regex (pattern) object */
+     int index; /* last index marker seen by the engine (-1 if none) */
      int groups; /* number of groups (start/end marks) */
      int mark[2];
***************
*** 58,61 ****
--- 59,63 ----
      int charsize;
      /* registers */
+     int index;
      int lastmark;
      void* mark[SRE_MARK_SIZE];

Index: sre_constants.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/sre_constants.h,v
retrieving revision 2.6
retrieving revision 2.7
diff -C2 -r2.6 -r2.7
*** sre_constants.h	2000/07/02 12:00:07	2.6
--- sre_constants.h	2000/07/02 17:33:27	2.7
***************
*** 24,42 ****
  #define SRE_OP_GROUP 10
  #define SRE_OP_GROUP_IGNORE 11
! #define SRE_OP_IN 12
! #define SRE_OP_IN_IGNORE 13
! #define SRE_OP_INFO 14
! #define SRE_OP_JUMP 15
! #define SRE_OP_LITERAL 16
! #define SRE_OP_LITERAL_IGNORE 17
! #define SRE_OP_MARK 18
! #define SRE_OP_MAX_REPEAT 19
! #define SRE_OP_MAX_REPEAT_ONE 20
! #define SRE_OP_MIN_REPEAT 21
! #define SRE_OP_NOT_LITERAL 22
! #define SRE_OP_NOT_LITERAL_IGNORE 23
! #define SRE_OP_NEGATE 24
! #define SRE_OP_RANGE 25
! #define SRE_OP_REPEAT 26
  #define SRE_AT_BEGINNING 0
  #define SRE_AT_BEGINNING_LINE 1
--- 24,43 ----
  #define SRE_OP_GROUP 10
  #define SRE_OP_GROUP_IGNORE 11
! #define SRE_OP_INDEX 12
! #define SRE_OP_IN 13
! #define SRE_OP_IN_IGNORE 14
! #define SRE_OP_INFO 15
! #define SRE_OP_JUMP 16
! #define SRE_OP_LITERAL 17
! #define SRE_OP_LITERAL_IGNORE 18
! #define SRE_OP_MARK 19
! #define SRE_OP_MAX_REPEAT 20
! #define SRE_OP_MAX_REPEAT_ONE 21
! #define SRE_OP_MIN_REPEAT 22
! #define SRE_OP_NOT_LITERAL 23
! #define SRE_OP_NOT_LITERAL_IGNORE 24
! #define SRE_OP_NEGATE 25
! #define SRE_OP_RANGE 26
! #define SRE_OP_REPEAT 27
  #define SRE_AT_BEGINNING 0
  #define SRE_AT_BEGINNING_LINE 1