[Python-checkins] python/dist/src/Modules pyexpat.c,2.60,2.61

fdrake@users.sourceforge.net fdrake@users.sourceforge.net
Thu, 27 Jun 2002 12:40:51 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv20596/Modules

Modified Files:
	pyexpat.c 
Log Message:
Integrate the changes from PyXML's version of pyexpat.c revisions
1.47, 1.48, 1.49 (name interning support).


Index: pyexpat.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/pyexpat.c,v
retrieving revision 2.60
retrieving revision 2.61
diff -C2 -d -r2.60 -r2.61
*** pyexpat.c	13 Jun 2002 20:32:52 -0000	2.60
--- pyexpat.c	27 Jun 2002 19:40:48 -0000	2.61
***************
*** 61,64 ****
--- 61,65 ----
      int specified_attributes;   /* Report only specified attributes. */
      int in_callback;            /* Is a callback active? */
+     PyObject *intern;           /* Dictionary to intern strings */
      PyObject **handlers;
  } xmlparseobject;
***************
*** 124,128 ****
  
  static PyObject *
! conv_string_to_unicode(XML_Char *str)
  {
      /* XXX currently this code assumes that XML_Char is 8-bit, 
--- 125,129 ----
  
  static PyObject *
! conv_string_to_unicode(const XML_Char *str)
  {
      /* XXX currently this code assumes that XML_Char is 8-bit, 
***************
*** 133,138 ****
          return Py_None;
      }
!     return PyUnicode_DecodeUTF8((const char *)str, 
!                                 strlen((const char *)str), 
                                  "strict");
  }
--- 134,138 ----
          return Py_None;
      }
!     return PyUnicode_DecodeUTF8(str, strlen(str), 
                                  "strict");
  }
***************
*** 156,160 ****
  
  static PyObject *
! conv_string_to_utf8(XML_Char *str)
  {
      /* XXX currently this code assumes that XML_Char is 8-bit, 
--- 156,160 ----
  
  static PyObject *
! conv_string_to_utf8(const XML_Char *str)
  {
      /* XXX currently this code assumes that XML_Char is 8-bit, 
***************
*** 165,169 ****
          return Py_None;
      }
!     return PyString_FromString((const char *)str);
  }
  
--- 165,169 ----
          return Py_None;
      }
!     return PyString_FromString(str);
  }
  
***************
*** 276,279 ****
--- 276,298 ----
  #endif
  
+ static PyObject*
+ string_intern(xmlparseobject *self, const char* str)
+ {
+     PyObject *result = STRING_CONV_FUNC(str);
+     PyObject *value;
+     if (!self->intern)
+ 	return result;
+     value = PyDict_GetItem(self->intern, result);
+     if (!value) {
+ 	if (PyDict_SetItem(self->intern, result, result) == 0)
+             return result;
+         else
+             return NULL;
+     }
+     Py_INCREF(value);
+     Py_DECREF(result);
+     return value;
+ }
+ 
  static void
  my_StartElementHandler(void *userData,
***************
*** 308,312 ****
          }
          for (i = 0; i < max; i += 2) {
!             PyObject *n = STRING_CONV_FUNC((XML_Char *) atts[i]);
              PyObject *v;
              if (n == NULL) {
--- 327,331 ----
          }
          for (i = 0; i < max; i += 2) {
!             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
              PyObject *v;
              if (n == NULL) {
***************
*** 337,341 ****
              }
          }
!         args = Py_BuildValue("(O&N)", STRING_CONV_FUNC,name, container);
          if (args == NULL) {
              Py_DECREF(container);
--- 356,360 ----
              }
          }
! 	args = Py_BuildValue("(NN)", string_intern(self, name), container);
          if (args == NULL) {
              Py_DECREF(container);
***************
*** 395,399 ****
  VOID_HANDLER(EndElement, 
               (void *userData, const XML_Char *name), 
!              ("(O&)", STRING_CONV_FUNC, name))
  
  VOID_HANDLER(ProcessingInstruction,
--- 414,418 ----
  VOID_HANDLER(EndElement, 
               (void *userData, const XML_Char *name), 
!              ("(N)", string_intern(self, name)))
  
  VOID_HANDLER(ProcessingInstruction,
***************
*** 401,405 ****
                const XML_Char *target, 
                const XML_Char *data),
!              ("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data))
  
  #ifndef Py_USING_UNICODE
--- 420,424 ----
                const XML_Char *target, 
                const XML_Char *data),
!              ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
  
  #ifndef Py_USING_UNICODE
***************
*** 422,429 ****
                const XML_Char *publicId,
                const XML_Char *notationName),
!              ("(O&O&O&O&O&)", 
!               STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base, 
!               STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId, 
!               STRING_CONV_FUNC,notationName))
  
  #ifndef Py_USING_UNICODE
--- 441,448 ----
                const XML_Char *publicId,
                const XML_Char *notationName),
!              ("(NNNNN)",
!               string_intern(self, entityName), string_intern(self, base), 
!               string_intern(self, systemId), string_intern(self, publicId), 
!               string_intern(self, notationName)))
  
  #ifndef Py_USING_UNICODE
***************
*** 438,446 ****
                const XML_Char *publicId,
                const XML_Char *notationName),
!              ("O&iNO&O&O&O&",
!               STRING_CONV_FUNC,entityName, is_parameter_entity,
                conv_string_len_to_utf8(value, value_length),
!               STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
!               STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
  #else
  VOID_HANDLER(EntityDecl,
--- 457,466 ----
                const XML_Char *publicId,
                const XML_Char *notationName),
!              ("NiNNNNN",
!               string_intern(self, entityName), is_parameter_entity,
                conv_string_len_to_utf8(value, value_length),
!               string_intern(self, base), string_intern(self, systemId),
!               string_intern(self, publicId),
!               string_intern(self, notationName)))
  #else
  VOID_HANDLER(EntityDecl,
***************
*** 454,464 ****
                const XML_Char *publicId,
                const XML_Char *notationName),
!              ("O&iNO&O&O&O&",
!               STRING_CONV_FUNC,entityName, is_parameter_entity,
                (self->returns_unicode 
                 ? conv_string_len_to_unicode(value, value_length) 
                 : conv_string_len_to_utf8(value, value_length)),
!               STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
!               STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
  #endif
  
--- 474,485 ----
                const XML_Char *publicId,
                const XML_Char *notationName),
!              ("NiNNNNN",
!               string_intern(self, entityName), is_parameter_entity,
                (self->returns_unicode 
                 ? conv_string_len_to_unicode(value, value_length) 
                 : conv_string_len_to_utf8(value, value_length)),
!               string_intern(self, base), string_intern(self, systemId),
!               string_intern(self, publicId),
!               string_intern(self, notationName)))
  #endif
  
***************
*** 474,478 ****
  static PyObject *
  conv_content_model(XML_Content * const model,
!                    PyObject *(*conv_string)(XML_Char *))
  {
      PyObject *result = NULL;
--- 495,499 ----
  static PyObject *
  conv_content_model(XML_Content * const model,
!                    PyObject *(*conv_string)(const XML_Char *))
  {
      PyObject *result = NULL;
***************
*** 515,520 ****
                const XML_Char *name,
                XML_Content *model),
!              ("O&O&",
!               STRING_CONV_FUNC,name,
                (self->returns_unicode ? conv_content_model_unicode
                                       : conv_content_model_utf8),model))
--- 536,541 ----
                const XML_Char *name,
                XML_Content *model),
!              ("NO&",
!               string_intern(self, name),
                (self->returns_unicode ? conv_content_model_unicode
                                       : conv_content_model_utf8),model))
***************
*** 524,529 ****
                const XML_Char *name,
                XML_Content *model),
!              ("O&O&",
!               STRING_CONV_FUNC,name, conv_content_model_utf8,model))
  #endif
  
--- 545,550 ----
                const XML_Char *name,
                XML_Content *model),
!              ("NO&",
!               string_intern(self, name), conv_content_model_utf8,model))
  #endif
  
***************
*** 535,540 ****
                const XML_Char *dflt,
                int isrequired),
!              ("(O&O&O&O&i)",
!               STRING_CONV_FUNC,elname, STRING_CONV_FUNC,attname,
                STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
                isrequired))
--- 556,561 ----
                const XML_Char *dflt,
                int isrequired),
!              ("(NNO&O&i)",
!               string_intern(self, elname), string_intern(self, attname),
                STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
                isrequired))
***************
*** 546,552 ****
  			const XML_Char *systemId,
  			const XML_Char *publicId),
!                 ("(O&O&O&O&)", 
! 		 STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base, 
! 		 STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId))
  
  VOID_HANDLER(StartNamespaceDecl,
--- 567,573 ----
  			const XML_Char *systemId,
  			const XML_Char *publicId),
!                 ("(NNNN)",
! 		 string_intern(self, notationName), string_intern(self, base), 
! 		 string_intern(self, systemId), string_intern(self, publicId)))
  
  VOID_HANDLER(StartNamespaceDecl,
***************
*** 554,567 ****
  		      const XML_Char *prefix,
  		      const XML_Char *uri),
!                 ("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri))
  
  VOID_HANDLER(EndNamespaceDecl,
  		(void *userData,
  		    const XML_Char *prefix),
!                 ("(O&)", STRING_CONV_FUNC,prefix))
  
  VOID_HANDLER(Comment,
!                (void *userData, const XML_Char *prefix),
!                 ("(O&)", STRING_CONV_FUNC,prefix))
  
  VOID_HANDLER(StartCdataSection,
--- 575,589 ----
  		      const XML_Char *prefix,
  		      const XML_Char *uri),
!                 ("(NN)",
!                  string_intern(self, prefix), string_intern(self, uri)))
  
  VOID_HANDLER(EndNamespaceDecl,
  		(void *userData,
  		    const XML_Char *prefix),
!                 ("(N)", string_intern(self, prefix)))
  
  VOID_HANDLER(Comment,
!                (void *userData, const XML_Char *data),
!                 ("(O&)", STRING_CONV_FUNC,data))
  
  VOID_HANDLER(StartCdataSection,
***************
*** 606,612 ****
  		    const XML_Char *publicId),
  		int rc=0;,
!                 ("(O&O&O&O&)", 
! 		 STRING_CONV_FUNC,context, STRING_CONV_FUNC,base, 
! 		 STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId),
  		rc = PyInt_AsLong(rv);, rc,
  		XML_GetUserData(parser))
--- 628,634 ----
  		    const XML_Char *publicId),
  		int rc=0;,
!                 ("(O&NNN)",
! 		 STRING_CONV_FUNC,context, string_intern(self, base), 
! 		 string_intern(self, systemId), string_intern(self, publicId)),
  		rc = PyInt_AsLong(rv);, rc,
  		XML_GetUserData(parser))
***************
*** 618,623 ****
                const XML_Char *sysid, const XML_Char *pubid,
                int has_internal_subset),
!              ("(O&O&O&i)", STRING_CONV_FUNC,doctypeName,
!               STRING_CONV_FUNC,sysid, STRING_CONV_FUNC,pubid,
                has_internal_subset))
  
--- 640,645 ----
                const XML_Char *sysid, const XML_Char *pubid,
                int has_internal_subset),
!              ("(NNNi)", string_intern(self, doctypeName),
!               string_intern(self, sysid), string_intern(self, pubid),
                has_internal_subset))
  
***************
*** 857,860 ****
--- 879,884 ----
  							encoding);
      new_parser->handlers = 0;
+     new_parser->intern = self->intern;
+     Py_XINCREF(new_parser->intern);
  #ifdef Py_TPFLAGS_HAVE_GC
      PyObject_GC_Track(new_parser);
***************
*** 989,993 ****
  
  static PyObject *
! newxmlparseobject(char *encoding, char *namespace_separator)
  {
      int i;
--- 1013,1017 ----
  
  static PyObject *
! newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
  {
      int i;
***************
*** 1023,1026 ****
--- 1047,1052 ----
          self->itself = XML_ParserCreate(encoding);
      }
+     self->intern = intern;
+     Py_XINCREF(self->intern);
  #ifdef Py_TPFLAGS_HAVE_GC
      PyObject_GC_Track(self);
***************
*** 1075,1078 ****
--- 1101,1105 ----
          free(self->handlers);
      }
+     Py_XDECREF(self->intern);
  #if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
      /* Code for versions before 1.6 */
***************
*** 1119,1122 ****
--- 1146,1159 ----
      if (strcmp(name, "specified_attributes") == 0)
          return PyInt_FromLong((long) self->specified_attributes);
+     if (strcmp(name, "intern") == 0) {
+         if (self->intern == NULL) {
+             Py_INCREF(Py_None);
+             return Py_None;
+         }
+         else {
+             Py_INCREF(self->intern);
+             return self->intern;
+         }
+     }
  
      handlernum = handlername2int(name);
***************
*** 1139,1142 ****
--- 1176,1180 ----
          PyList_Append(rc, PyString_FromString("returns_unicode"));
          PyList_Append(rc, PyString_FromString("specified_attributes"));
+         PyList_Append(rc, PyString_FromString("intern"));
  
          return rc;
***************
*** 1222,1225 ****
--- 1260,1265 ----
  {
      clear_handlers(op, 0);
+     Py_XDECREF(op->intern);
+     op->intern = 0;
      return 0;
  }
***************
*** 1276,1283 ****
      char *encoding = NULL;
      char *namespace_separator = NULL;
!     static char *kwlist[] = {"encoding", "namespace_separator", NULL};
  
!     if (!PyArg_ParseTupleAndKeywords(args, kw, "|zz:ParserCreate", kwlist,
!                                      &encoding, &namespace_separator))
          return NULL;
      if (namespace_separator != NULL
--- 1316,1327 ----
      char *encoding = NULL;
      char *namespace_separator = NULL;
!     PyObject *intern = NULL;
!     PyObject *result;
!     int intern_decref = 0;
!     static char *kwlist[] = {"encoding", "namespace_separator", 
! 			     "intern", NULL};
  
!     if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
!                                      &encoding, &namespace_separator, &intern))
          return NULL;
      if (namespace_separator != NULL
***************
*** 1288,1292 ****
          return NULL;
      }
!     return newxmlparseobject(encoding, namespace_separator);
  }
  
--- 1332,1355 ----
          return NULL;
      }
!     /* Explicitly passing None means no interning is desired.
!        Not passing anything means that a new dictionary is used. */
!     if (intern == Py_None)
! 	intern = NULL;
!     else if (intern == NULL) {
! 	intern = PyDict_New();
! 	if (!intern)
! 	    return NULL;
! 	intern_decref = 1;
!     } 
!     else if (!PyDict_Check(intern)) {
! 	PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
! 	return NULL;
!     }
! 
!     result = newxmlparseobject(encoding, namespace_separator, intern);
!     if (intern_decref) {
! 	Py_DECREF(intern);
!     }
!     return result;
  }