[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.45,2.45.2.1 sre_constants.h,2.9,2.9.4.1
Guido van Rossum
gvanrossum@users.sourceforge.net
Wed, 13 Jun 2001 08:15:05 -0700
Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv31452/Modules
Modified Files:
Tag: release20-maint
_sre.c sre_constants.h
Log Message:
Bring SRE up do date with Python 2.1
Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.45
retrieving revision 2.45.2.1
diff -C2 -r2.45 -r2.45.2.1
*** _sre.c 2000/10/03 20:43:34 2.45
--- _sre.c 2001/06/13 15:15:02 2.45.2.1
***************
*** 6,17 ****
* partial history:
* 1999-10-24 fl created (based on existing template matcher code)
! * 2000-03-06 fl first alpha, sort of (0.5)
! * 2000-06-30 fl added fast search optimization (0.9.3)
! * 2000-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
! * 2000-07-02 fl added charset optimizations, etc (0.9.5)
* 2000-07-03 fl store code in pattern object, lookbehind, etc
* 2000-07-08 fl added regs attribute
! * 2000-07-21 fl reset lastindex in scanner methods (0.9.6)
! * 2000-08-01 fl fixes for 1.6b1 (0.9.8)
* 2000-08-03 fl added recursion limit
* 2000-08-07 fl use PyOS_CheckStack() if available
--- 6,17 ----
* partial history:
* 1999-10-24 fl created (based on existing template matcher code)
! * 2000-03-06 fl first alpha, sort of
! * 2000-06-30 fl added fast search optimization
! * 2000-06-30 fl added assert (lookahead) primitives, etc
! * 2000-07-02 fl added charset optimizations, etc
* 2000-07-03 fl store code in pattern object, lookbehind, etc
* 2000-07-08 fl added regs attribute
! * 2000-07-21 fl reset lastindex in scanner methods
! * 2000-08-01 fl fixes for 1.6b1
* 2000-08-03 fl added recursion limit
* 2000-08-07 fl use PyOS_CheckStack() if available
***************
*** 22,27 ****
* 2000-09-21 fl don't use the buffer interface for unicode strings
* 2000-10-03 fl fixed assert_not primitive; support keyword arguments
*
! * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
* This version of the SRE library can be redistributed under CNRI's
--- 22,34 ----
* 2000-09-21 fl don't use the buffer interface for unicode strings
* 2000-10-03 fl fixed assert_not primitive; support keyword arguments
+ * 2000-10-24 fl really fixed assert_not; reset groups in findall
+ * 2000-12-21 fl fixed memory leak in groupdict
+ * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
+ * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
+ * 2001-01-16 fl fixed memory leak in pattern destructor
+ * 2001-03-20 fl lots of fixes for 2.1b2
+ * 2001-04-15 fl export copyright as Python attribute, not global
*
! * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* This version of the SRE library can be redistributed under CNRI's
***************
*** 36,40 ****
#ifndef SRE_RECURSIVE
! char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
#include "Python.h"
--- 43,48 ----
#ifndef SRE_RECURSIVE
! static char copyright[] =
! " SRE 2.1b2 Copyright (c) 1997-2001 by Secret Labs AB ";
#include "Python.h"
***************
*** 45,49 ****
/* name of this module, minus the leading underscore */
! #define MODULE "sre"
/* defining this one enables tracing */
--- 53,59 ----
/* name of this module, minus the leading underscore */
! #if !defined(SRE_MODULE)
! #define SRE_MODULE "sre"
! #endif
/* defining this one enables tracing */
***************
*** 77,80 ****
--- 87,94 ----
#undef USE_INLINE
+ #if PY_VERSION_HEX < 0x01060000
+ #define PyObject_DEL(op) PyMem_DEL((op))
+ #endif
+
/* -------------------------------------------------------------------- */
***************
*** 131,139 ****
120, 121, 122, 123, 124, 125, 126, 127 };
- static unsigned int sre_lower(unsigned int ch)
- {
- return ((ch) < 128 ? sre_char_lower[ch] : ch);
- }
-
#define SRE_IS_DIGIT(ch)\
((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
--- 145,148 ----
***************
*** 147,156 ****
((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
! /* locale-specific character predicates */
!
! static unsigned int sre_lower_locale(unsigned int ch)
{
! return ((ch) < 256 ? tolower((ch)) : ch);
}
#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
--- 156,166 ----
((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
! static unsigned int sre_lower(unsigned int ch)
{
! return ((ch) < 128 ? sre_char_lower[ch] : ch);
}
+
+ /* locale-specific character predicates */
+
#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
***************
*** 159,169 ****
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
/* unicode-specific character predicates */
#if defined(HAVE_UNICODE)
! static unsigned int sre_lower_unicode(unsigned int ch)
! {
! return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
! }
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
--- 169,181 ----
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
+ static unsigned int sre_lower_locale(unsigned int ch)
+ {
+ return ((ch) < 256 ? tolower((ch)) : ch);
+ }
+
/* unicode-specific character predicates */
#if defined(HAVE_UNICODE)
!
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
***************
*** 171,174 ****
--- 183,192 ----
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
+
+ static unsigned int sre_lower_unicode(unsigned int ch)
+ {
+ return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
+ }
+
#endif
***************
*** 217,220 ****
--- 235,255 ----
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
return !SRE_UNI_IS_LINEBREAK(ch);
+ #else
+ case SRE_CATEGORY_UNI_DIGIT:
+ return SRE_IS_DIGIT(ch);
+ case SRE_CATEGORY_UNI_NOT_DIGIT:
+ return !SRE_IS_DIGIT(ch);
+ case SRE_CATEGORY_UNI_SPACE:
+ return SRE_IS_SPACE(ch);
+ case SRE_CATEGORY_UNI_NOT_SPACE:
+ return !SRE_IS_SPACE(ch);
+ case SRE_CATEGORY_UNI_WORD:
+ return SRE_LOC_IS_WORD(ch);
+ case SRE_CATEGORY_UNI_NOT_WORD:
+ return !SRE_LOC_IS_WORD(ch);
+ case SRE_CATEGORY_UNI_LINEBREAK:
+ return SRE_IS_LINEBREAK(ch);
+ case SRE_CATEGORY_UNI_NOT_LINEBREAK:
+ return !SRE_IS_LINEBREAK(ch);
#endif
}
***************
*** 355,358 ****
--- 390,394 ----
case SRE_AT_BEGINNING:
+ case SRE_AT_BEGINNING_STRING:
return ((void*) ptr == state->beginning);
***************
*** 370,373 ****
--- 406,412 ----
SRE_IS_LINEBREAK((int) ptr[0]));
+ case SRE_AT_END_STRING:
+ return ((void*) ptr == state->end);
+
case SRE_AT_BOUNDARY:
if (state->beginning == state->end)
***************
*** 387,390 ****
--- 426,465 ----
SRE_IS_WORD((int) ptr[0]) : 0;
return this == that;
+
+ case SRE_AT_LOC_BOUNDARY:
+ if (state->beginning == state->end)
+ return 0;
+ that = ((void*) ptr > state->beginning) ?
+ SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
+ this = ((void*) ptr < state->end) ?
+ SRE_LOC_IS_WORD((int) ptr[0]) : 0;
+ return this != that;
+
+ case SRE_AT_LOC_NON_BOUNDARY:
+ if (state->beginning == state->end)
+ return 0;
+ that = ((void*) ptr > state->beginning) ?
+ SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
+ this = ((void*) ptr < state->end) ?
+ SRE_LOC_IS_WORD((int) ptr[0]) : 0;
+ return this == that;
+
+ case SRE_AT_UNI_BOUNDARY:
+ if (state->beginning == state->end)
+ return 0;
+ that = ((void*) ptr > state->beginning) ?
+ SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
+ this = ((void*) ptr < state->end) ?
+ SRE_UNI_IS_WORD((int) ptr[0]) : 0;
+ return this != that;
+
+ case SRE_AT_UNI_NON_BOUNDARY:
+ if (state->beginning == state->end)
+ return 0;
+ that = ((void*) ptr > state->beginning) ?
+ SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
+ this = ((void*) ptr < state->end) ?
+ SRE_UNI_IS_WORD((int) ptr[0]) : 0;
+ return this == that;
}
***************
*** 784,794 ****
TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
state->ptr = ptr - pattern[1];
! if (state->ptr < state->beginning)
! return 0;
! i = SRE_MATCH(state, pattern + 2, level + 1);
! if (i < 0)
! return i;
! if (i)
! return 0;
pattern += pattern[0];
break;
--- 859,869 ----
TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
state->ptr = ptr - pattern[1];
! if (state->ptr >= state->beginning) {
! i = SRE_MATCH(state, pattern + 2, level + 1);
! if (i < 0)
! return i;
! if (i)
! return 0;
! }
pattern += pattern[0];
break;
***************
*** 826,830 ****
exactly one character wide, and we're not already
collecting backtracking points. for other cases,
! use the MAX_REPEAT operator instead */
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
--- 901,905 ----
exactly one character wide, and we're not already
collecting backtracking points. for other cases,
! use the MAX_REPEAT operator */
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
***************
*** 900,904 ****
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
! by the UNTIL operator */
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
--- 975,979 ----
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
! by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
***************
*** 974,977 ****
--- 1049,1053 ----
return i;
state->repeat = rp;
+ state->ptr = ptr;
return 0;
***************
*** 986,990 ****
count = rp->count + 1;
! TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
state->ptr = ptr;
--- 1062,1067 ----
count = rp->count + 1;
! TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
! rp->pattern));
state->ptr = ptr;
***************
*** 1004,1012 ****
/* see if the tail matches */
state->repeat = rp->prev;
! i = SRE_MATCH(state, pattern, level + 1);
if (i) {
/* free(rp); */
return i;
}
state->repeat = rp;
--- 1081,1101 ----
/* see if the tail matches */
state->repeat = rp->prev;
! /* FIXME: the following fix doesn't always work (#133283) */
! if (0 && rp->pattern[2] == 65535) {
! /* unbounded repeat */
! for (;;) {
! i = SRE_MATCH(state, pattern, level + 1);
! if (i || ptr >= end)
! break;
! state->ptr = ++ptr;
! }
! } else
! i = SRE_MATCH(state, pattern, level + 1);
if (i) {
/* free(rp); */
return i;
}
+
+ state->ptr = ptr;
state->repeat = rp;
***************
*** 1020,1023 ****
--- 1109,1113 ----
return i;
rp->count = count - 1;
+ state->ptr = ptr;
return 0;
***************
*** 1186,1218 ****
PyObject* groupindex = NULL;
PyObject* indexgroup = NULL;
! if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
! &groups, &groupindex, &indexgroup))
! return NULL;
!
! code = PySequence_Fast(code, "code argument must be a sequence");
! if (!code)
return NULL;
! #if PY_VERSION_HEX >= 0x01060000
! n = PySequence_Size(code);
! #else
! n = PySequence_Length(code);
! #endif
! self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
! if (!self) {
! Py_DECREF(code);
return NULL;
- }
for (i = 0; i < n; i++) {
! PyObject *o = PySequence_Fast_GET_ITEM(code, i);
self->code[i] = (SRE_CODE) PyInt_AsLong(o);
}
-
- Py_DECREF(code);
! if (PyErr_Occurred())
return NULL;
Py_INCREF(pattern);
--- 1276,1299 ----
PyObject* groupindex = NULL;
PyObject* indexgroup = NULL;
! if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
! &PyList_Type, &code, &groups,
! &groupindex, &indexgroup))
return NULL;
! n = PyList_GET_SIZE(code);
! self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
! if (!self)
return NULL;
for (i = 0; i < n; i++) {
! PyObject *o = PyList_GET_ITEM(code, i);
self->code[i] = (SRE_CODE) PyInt_AsLong(o);
}
! if (PyErr_Occurred()) {
! PyObject_DEL(self);
return NULL;
+ }
Py_INCREF(pattern);
***************
*** 1246,1252 ****
if (flags & SRE_FLAG_LOCALE)
return Py_BuildValue("i", sre_lower_locale(character));
- #if defined(HAVE_UNICODE)
if (flags & SRE_FLAG_UNICODE)
return Py_BuildValue("i", sre_lower_unicode(character));
#endif
return Py_BuildValue("i", sre_lower(character));
--- 1327,1335 ----
if (flags & SRE_FLAG_LOCALE)
return Py_BuildValue("i", sre_lower_locale(character));
if (flags & SRE_FLAG_UNICODE)
+ #if defined(HAVE_UNICODE)
return Py_BuildValue("i", sre_lower_unicode(character));
+ #else
+ return Py_BuildValue("i", sre_lower_locale(character));
#endif
return Py_BuildValue("i", sre_lower(character));
***************
*** 1356,1362 ****
if (pattern->flags & SRE_FLAG_LOCALE)
state->lower = sre_lower_locale;
- #if defined(HAVE_UNICODE)
else if (pattern->flags & SRE_FLAG_UNICODE)
state->lower = sre_lower_unicode;
#endif
else
--- 1439,1447 ----
if (pattern->flags & SRE_FLAG_LOCALE)
state->lower = sre_lower_locale;
else if (pattern->flags & SRE_FLAG_UNICODE)
+ #if defined(HAVE_UNICODE)
state->lower = sre_lower_unicode;
+ #else
+ state->lower = sre_lower_locale;
#endif
else
***************
*** 1496,1500 ****
string = state_init(&self->state, pattern, string, start, end);
if (!string) {
! PyObject_Del(self);
return NULL;
}
--- 1581,1585 ----
string = state_init(&self->state, pattern, string, start, end);
if (!string) {
! PyObject_DEL(self);
return NULL;
}
***************
*** 1511,1514 ****
--- 1596,1600 ----
Py_XDECREF(self->pattern);
Py_XDECREF(self->groupindex);
+ Py_XDECREF(self->indexgroup);
PyObject_DEL(self);
}
***************
*** 1594,1598 ****
PyObject* result;
! name = PyString_FromString(MODULE);
if (!name)
return NULL;
--- 1680,1684 ----
PyObject* result;
! name = PyString_FromString(SRE_MODULE);
if (!name)
return NULL;
***************
*** 1681,1684 ****
--- 1767,1772 ----
PyObject* item;
+ state_reset(&state);
+
state.ptr = state.start;
***************
*** 1963,1967 ****
PyObject* def = Py_None;
static char* kwlist[] = { "default", NULL };
! if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
return NULL;
--- 2051,2055 ----
PyObject* def = Py_None;
static char* kwlist[] = { "default", NULL };
! if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
return NULL;
***************
*** 1971,1997 ****
keys = PyMapping_Keys(self->pattern->groupindex);
! if (!keys) {
! Py_DECREF(result);
! return NULL;
! }
for (index = 0; index < PyList_GET_SIZE(keys); index++) {
PyObject* key;
! PyObject* item;
key = PyList_GET_ITEM(keys, index);
! if (!key) {
! Py_DECREF(keys);
! Py_DECREF(result);
! return NULL;
! }
! item = match_getslice(self, key, def);
! if (!item) {
Py_DECREF(key);
! Py_DECREF(keys);
! Py_DECREF(result);
! return NULL;
}
! /* FIXME: <fl> this can fail, right? */
! PyDict_SetItem(result, key, item);
}
--- 2059,2081 ----
keys = PyMapping_Keys(self->pattern->groupindex);
! if (!keys)
! goto failed;
for (index = 0; index < PyList_GET_SIZE(keys); index++) {
+ int status;
PyObject* key;
! PyObject* value;
key = PyList_GET_ITEM(keys, index);
! if (!key)
! goto failed;
! value = match_getslice(self, key, def);
! if (!value) {
Py_DECREF(key);
! goto failed;
}
! status = PyDict_SetItem(result, key, value);
! Py_DECREF(value);
! if (status < 0)
! goto failed;
}
***************
*** 1999,2002 ****
--- 2083,2091 ----
return result;
+
+ failed:
+ Py_DECREF(keys);
+ Py_DECREF(result);
+ return NULL;
}
***************
*** 2325,2339 ****
};
! void
! #if defined(WIN32)
! __declspec(dllexport)
! #endif
init_sre(void)
{
/* Patch object types */
Pattern_Type.ob_type = Match_Type.ob_type =
Scanner_Type.ob_type = &PyType_Type;
- Py_InitModule("_" MODULE, _functions);
}
--- 2414,2438 ----
};
! DL_EXPORT(void)
init_sre(void)
{
+ PyObject* m;
+ PyObject* d;
+
/* Patch object types */
Pattern_Type.ob_type = Match_Type.ob_type =
Scanner_Type.ob_type = &PyType_Type;
+
+ m = Py_InitModule("_" SRE_MODULE, _functions);
+ d = PyModule_GetDict(m);
+
+ PyDict_SetItemString(
+ d, "MAGIC", (PyObject*) PyInt_FromLong(SRE_MAGIC)
+ );
+
+ PyDict_SetItemString(
+ d, "copyright", (PyObject*) PyString_FromString(copyright)
+ );
}
Index: sre_constants.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/sre_constants.h,v
retrieving revision 2.9
retrieving revision 2.9.4.1
diff -C2 -r2.9 -r2.9.4.1
*** sre_constants.h 2000/08/01 22:47:49 2.9
--- sre_constants.h 2001/06/13 15:15:02 2.9.4.1
***************
*** 7,15 ****
* to change anything in here, edit sre_constants.py and run it.
*
! * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
--- 7,16 ----
* to change anything in here, edit sre_constants.py and run it.
*
! * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
+ #define SRE_MAGIC 20010320
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
***************
*** 43,50 ****
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
! #define SRE_AT_BOUNDARY 2
! #define SRE_AT_NON_BOUNDARY 3
! #define SRE_AT_END 4
! #define SRE_AT_END_LINE 5
#define SRE_CATEGORY_DIGIT 0
#define SRE_CATEGORY_NOT_DIGIT 1
--- 44,57 ----
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
! #define SRE_AT_BEGINNING_STRING 2
! #define SRE_AT_BOUNDARY 3
! #define SRE_AT_NON_BOUNDARY 4
! #define SRE_AT_END 5
! #define SRE_AT_END_LINE 6
! #define SRE_AT_END_STRING 7
! #define SRE_AT_LOC_BOUNDARY 8
! #define SRE_AT_LOC_NON_BOUNDARY 9
! #define SRE_AT_UNI_BOUNDARY 10
! #define SRE_AT_UNI_NON_BOUNDARY 11
#define SRE_CATEGORY_DIGIT 0
#define SRE_CATEGORY_NOT_DIGIT 1