[Expat-CVS] expat/lib expat.h,1.33,1.34 xmlparse.c,1.69,1.70

Karl Waclawek kwaclaw@users.sourceforge.net
Mon Aug 26 08:05:04 2002


Update of /cvsroot/expat/expat/lib
In directory usw-pr-cvs1:/tmp/cvs-serv17626

Modified Files:
	expat.h xmlparse.c 
Log Message:
Applied patch #599715: Enable undeclared DTD

Index: expat.h
===================================================================
RCS file: /cvsroot/expat/expat/lib/expat.h,v
retrieving revision 1.33
retrieving revision 1.34
diff -u -d -r1.33 -r1.34
--- expat.h	23 Aug 2002 15:04:51 -0000	1.33
+++ expat.h	26 Aug 2002 15:04:19 -0000	1.34
@@ -195,7 +195,8 @@
    valuable when memory allocation overhead is disproportionatly high,
    such as when a large number of small documnents need to be parsed.
    All handlers are cleared from the parser, except for the 
-   unknownEncodingHandler.
+   unknownEncodingHandler. The parser's external state is re-initialized
+   except for the values of ns and ns_triplets.
 
    Added in Expat 1.95.3.
 */
@@ -332,11 +333,14 @@
 typedef void (*XML_EndNamespaceDeclHandler)(void *userData,
                                             const XML_Char *prefix);
 
-/* This is called if the document is not standalone (it has an
+/* This is called if the document is not standalone, that is, it has an
    external subset or a reference to a parameter entity, but does not
-   have standalone="yes"). If this handler returns 0, then processing
+   have standalone="yes". If this handler returns 0, then processing
    will not continue, and the parser will return a
    XML_ERROR_NOT_STANDALONE error.
+   If parameter entity parsing is enabled, then in addition to the
+   conditions above this handler will only be called if the referenced
+   entity was actually read.
 */
 typedef int (*XML_NotStandaloneHandler)(void *userData);
 
@@ -594,8 +598,11 @@
    + sep + local_name + sep + prefix.
 
    If do_nst is zero, then namespace information is returned in the
-   default manner (URI + sep + local_name) whether or not the names
+   default manner (URI + sep + local_name) whether or not the name
    has a prefix.
+
+   Note: Calling XML_SetReturnNSTriplet after XML_Parse or
+     XML_ParseBuffer has no effect.
 */
 
 XMLPARSEAPI(void)
@@ -609,8 +616,10 @@
 #define XML_GetUserData(parser) (*(void **)(parser))
 
 /* This is equivalent to supplying an encoding argument to
-   XML_ParserCreate. It must not be called after XML_Parse or
-   XML_ParseBuffer.
+   XML_ParserCreate. On success XML_SetEncoding returns non-zero,
+   zero otherwise.
+   Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer
+     has no effect and returns zero.
 */
 XMLPARSEAPI(int)
 XML_SetEncoding(XML_Parser parser, const XML_Char *encoding);
@@ -622,6 +631,22 @@
 XMLPARSEAPI(void)
 XML_UseParserAsHandlerArg(XML_Parser parser);
 
+/* If useDTD == XML_TRUE is passed to this function, then the parser
+   will assume that there is an external subset, even if none is
+   specified in the document. In such a case the parser will call the
+   externalEntityRefHandler with a value of NULL for the systemId
+   argument (the publicId and context arguments will be NULL as well).
+   Note: If this function is called, then this must be done before
+     the first call to XML_Parse or XML_ParseBuffer, since it will
+     have no effect after that.
+   Note: If the document does not have a DOCTYPE declaration at all,
+     then startDoctypeDeclHandler and endDoctypeDeclHandler will not
+     be called, despite an external subset being parsed.
+*/
+XMLPARSEAPI(void)
+XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
+
+
 /* Sets the base to be used for resolving relative URIs in system
    identifiers in declarations.  Resolving relative identifiers is
    left to the application: this value will be passed through as the
@@ -730,11 +755,13 @@
    XML_ExternalEntityParserCreate has been called to create the parser
    for the external parameter entity (context must be 0 for this
    call), it is illegal to make any calls on the old parser until
-   XML_ParserFree has been called on the newly created parser.  If the
-   library has been compiled without support for parameter entity
-   parsing (ie without XML_DTD being defined), then
+   XML_ParserFree has been called on the newly created parser.
+   If the library has been compiled without support for parameter
+   entity parsing (ie without XML_DTD being defined), then
    XML_SetParamEntityParsing will return 0 if parsing of parameter
    entities is requested; otherwise it will return non-zero.
+   Note: If XML_SetParamEntityParsing is called after XML_Parse or
+      XML_ParseBuffer, then it has no effect and will always return 0.
 */
 XMLPARSEAPI(int)
 XML_SetParamEntityParsing(XML_Parser parser,

Index: xmlparse.c
===================================================================
RCS file: /cvsroot/expat/expat/lib/xmlparse.c,v
retrieving revision 1.69
retrieving revision 1.70
diff -u -d -r1.69 -r1.70
--- xmlparse.c	26 Aug 2002 01:33:56 -0000	1.69
+++ xmlparse.c	26 Aug 2002 15:04:19 -0000	1.70
@@ -230,13 +230,13 @@
   STRING_POOL entityValuePool;
   /* false once a parameter entity reference has been skipped */
   XML_Bool keepProcessing;
-  /* indicates if external PE has been read */
-  XML_Bool paramEntityRead;
   /* true once an internal or external PE reference has been encountered;
      any external subset is considered an external PE reference */
   XML_Bool hasParamEntityRefs;
   XML_Bool standalone;
 #ifdef XML_DTD
+  /* indicates if external PE has been read */
+  XML_Bool paramEntityRead;
   HASH_TABLE paramEntities;
 #endif /* XML_DTD */
   PREFIX defaultPrefix;
@@ -490,6 +490,7 @@
   XML_Parser m_parentParser;
 #ifdef XML_DTD
   XML_Bool m_isParamEntity;
+  XML_Bool m_useForeignDTD;
   enum XML_ParamEntityParsing m_paramEntityParsing;
 #endif
 } Parser;
@@ -590,9 +591,12 @@
 #define parentParser (((Parser *)parser)->m_parentParser)
 #ifdef XML_DTD
 #define isParamEntity (((Parser *)parser)->m_isParamEntity)
+#define useForeignDTD (((Parser *)parser)->m_useForeignDTD)
 #define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing)
 #endif /* XML_DTD */
 
+#define parsing (processor != prologInitProcessor)
+
 #ifdef _MSC_VER
 #ifdef _DEBUG
 Parser *
@@ -777,6 +781,7 @@
   parentParser = NULL;
 #ifdef XML_DTD
   isParamEntity = XML_FALSE;
+  useForeignDTD = XML_FALSE;
   paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
 #endif
 }
@@ -824,6 +829,9 @@
 int
 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
 {
+  /* block after XML_Parse()/XML_ParseBuffer() has been called */
+  if (parsing)
+    return 0;
   if (encodingName == NULL)
     protocolEncodingName = NULL;
   else {
@@ -1013,7 +1021,22 @@
 }
 
 void
-XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
+XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
+{
+#ifdef XML_DTD
+  /* block after XML_Parse()/XML_ParseBuffer() has been called */
+  if (parsing)
+    return;
+  useForeignDTD = useDTD;
+#endif
+}
+
+void
+XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
+{
+  /* block after XML_Parse()/XML_ParseBuffer() has been called */
+  if (parsing)
+    return;
   ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
 }
 
@@ -1261,13 +1284,16 @@
 
 int
 XML_SetParamEntityParsing(XML_Parser parser,
-                          enum XML_ParamEntityParsing parsing)
+                          enum XML_ParamEntityParsing peParsing)
 {
+  /* block after XML_Parse()/XML_ParseBuffer() has been called */
+  if (parsing) 
+    return 0;
 #ifdef XML_DTD
-  paramEntityParsing = parsing;
+  paramEntityParsing = peParsing;
   return 1;
 #else
-  return parsing == XML_PARAM_ENTITY_PARSING_NEVER;
+  return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
 #endif
 }
 
@@ -3107,6 +3133,9 @@
       break;
 #endif /* XML_DTD */
     case XML_ROLE_DOCTYPE_PUBLIC_ID:
+#ifdef XML_DTD
+      useForeignDTD = XML_FALSE;
+#endif /* XML_DTD */
       dtd.hasParamEntityRefs = XML_TRUE;
       if (startDoctypeDeclHandler) {
         doctypePubid = poolStoreString(&tempPool, enc,
@@ -3149,41 +3178,73 @@
         poolClear(&tempPool);
         handleDefault = XML_FALSE;
       }
-      /* doctypeSysid will be non-NULL in the case of
+      /* doctypeSysid will be non-NULL in the case of a previous
          XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
          was not set, indicating an external subset
       */
-      if (doctypeSysid) {
-        dtd.paramEntityRead = XML_FALSE;
-#ifdef XML_DTD
+#ifdef XML_DTD 
+      if (doctypeSysid || useForeignDTD) {
+        dtd.hasParamEntityRefs = XML_TRUE; /* when docTypeSysid == NULL */
         if (paramEntityParsing && externalEntityRefHandler) {
           ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities,
                                             externalSubsetName,
-                                            0);
+                                            sizeof(ENTITY));
+          if (!entity)
+            return XML_ERROR_NO_MEMORY;
+          if (useForeignDTD) 
+            entity->base = curBase;
+          dtd.paramEntityRead = XML_FALSE;
           if (!externalEntityRefHandler(externalEntityRefHandlerArg,
                                         0,
                                         entity->base,
                                         entity->systemId,
                                         entity->publicId))
             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
-          if (!dtd.paramEntityRead)
-            dtd.keepProcessing = dtd.standalone;
+          if (dtd.paramEntityRead &&
+              !dtd.standalone &&
+              notStandaloneHandler &&
+              !notStandaloneHandler(handlerArg))
+            return XML_ERROR_NOT_STANDALONE;
+          /* end of DTD - no need to update dtd.keepProcessing */
         }
-        else
-          dtd.keepProcessing = dtd.standalone;
-#endif /* XML_DTD */
-        if (dtd.paramEntityRead
-            && !dtd.standalone
-            && notStandaloneHandler
-            && !notStandaloneHandler(handlerArg))
-          return XML_ERROR_NOT_STANDALONE;
+        useForeignDTD = XML_FALSE;
       }
+#endif /* XML_DTD */
       if (endDoctypeDeclHandler) {
         endDoctypeDeclHandler(handlerArg);
         handleDefault = XML_FALSE;
       }
       break;
     case XML_ROLE_INSTANCE_START:
+#ifdef XML_DTD
+      /* if there is no DOCTYPE declaration then now is the 
+         last chance to read the foreign DTD
+      */
+      if (useForeignDTD) { 
+        dtd.hasParamEntityRefs = XML_TRUE;
+        if (paramEntityParsing && externalEntityRefHandler) {
+          ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities,
+                                            externalSubsetName,
+                                            sizeof(ENTITY));
+          if (!entity)
+            return XML_ERROR_NO_MEMORY;
+          entity->base = curBase;
+          dtd.paramEntityRead = XML_FALSE;
+          if (!externalEntityRefHandler(externalEntityRefHandlerArg,
+                                        0,
+                                        entity->base,
+                                        entity->systemId,
+                                        entity->publicId))
+            return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
+          if (dtd.paramEntityRead &&
+              !dtd.standalone &&
+              notStandaloneHandler &&
+              !notStandaloneHandler(handlerArg))
+            return XML_ERROR_NOT_STANDALONE;
+          /* end of DTD - no need to update dtd.keepProcessing */
+        }
+      }  
+#endif /* XML_DTD */
       processor = contentProcessor;
       return contentProcessor(parser, s, end, nextPtr);
     case XML_ROLE_ATTLIST_ELEMENT_NAME:
@@ -3340,6 +3401,9 @@
       }
       break;
     case XML_ROLE_DOCTYPE_SYSTEM_ID:
+#ifdef XML_DTD
+      useForeignDTD = XML_FALSE;
+#endif /* XML_DTD */
       dtd.hasParamEntityRefs = XML_TRUE;
       if (startDoctypeDeclHandler) {
         doctypeSysid = poolStoreString(&tempPool, enc,
@@ -3350,8 +3414,8 @@
         poolFinish(&tempPool);
         handleDefault = XML_FALSE;
       }
-      else
 #ifdef XML_DTD
+      else
         /* use externalSubsetName to make doctypeSysid non-NULL
            for the case where no startDoctypeDeclHandler is set */
         doctypeSysid = externalSubsetName;
@@ -3647,7 +3711,6 @@
           role == XML_ROLE_INNER_PARAM_ENTITY_REF)
         return XML_ERROR_PARAM_ENTITY_REF;
       dtd.hasParamEntityRefs = XML_TRUE;
-      dtd.paramEntityRead = XML_FALSE;
       if (!paramEntityParsing)
         dtd.keepProcessing = dtd.standalone;
       else {
@@ -3693,6 +3756,7 @@
           break;
         }
         if (externalEntityRefHandler) {
+          dtd.paramEntityRead = XML_FALSE;
           entity->open = XML_TRUE;
           if (!externalEntityRefHandler(externalEntityRefHandlerArg,
                                         0,
@@ -3704,17 +3768,20 @@
           }
           entity->open = XML_FALSE;
           handleDefault = XML_FALSE;
-          if (!dtd.paramEntityRead)
+          if (!dtd.paramEntityRead) {
             dtd.keepProcessing = dtd.standalone;
+            break;
+          }
         }
-        else
+        else {
           dtd.keepProcessing = dtd.standalone;
+          break;
+        }
       }
 #endif /* XML_DTD */
-      if (dtd.paramEntityRead
-          && !dtd.standalone
-          && notStandaloneHandler
-          && !notStandaloneHandler(handlerArg))
+      if (!dtd.standalone &&
+          notStandaloneHandler &&
+          !notStandaloneHandler(handlerArg))
         return XML_ERROR_NOT_STANDALONE;
       break;
 
@@ -4696,23 +4763,24 @@
   hashTableInit(&(p->elementTypes), ms);
   hashTableInit(&(p->attributeIds), ms);
   hashTableInit(&(p->prefixes), ms);
-  p->keepProcessing = XML_TRUE;
-  p->paramEntityRead = XML_FALSE;
-  p->hasParamEntityRefs = XML_FALSE;
-  p->standalone = XML_FALSE;
 #ifdef XML_DTD
+  p->paramEntityRead = XML_FALSE;
   hashTableInit(&(p->paramEntities), ms);
 #endif /* XML_DTD */
   p->defaultPrefix.name = NULL;
   p->defaultPrefix.binding = NULL;
 
   p->in_eldecl = XML_FALSE;
-  p->scaffIndex = 0;
-  p->scaffLevel = 0;
+  p->scaffIndex = NULL;
   p->scaffold = NULL;
-  p->contentStringLen = 0;
+  p->scaffLevel = 0;
   p->scaffSize = 0;
   p->scaffCount = 0;
+  p->contentStringLen = 0;
+
+  p->keepProcessing = XML_TRUE;
+  p->hasParamEntityRefs = XML_FALSE;
+  p->standalone = XML_FALSE;
 }
 
 #ifdef XML_DTD
@@ -4742,6 +4810,7 @@
   }
   hashTableClear(&(p->generalEntities));
 #ifdef XML_DTD
+  p->paramEntityRead = XML_FALSE;
   hashTableClear(&(p->paramEntities));
 #endif /* XML_DTD */
   hashTableClear(&(p->elementTypes));
@@ -4751,6 +4820,10 @@
 #ifdef XML_DTD
   poolClear(&(p->entityValuePool));
 #endif /* XML_DTD */
+  p->defaultPrefix.name = NULL;
+  p->defaultPrefix.binding = NULL;
+
+  p->in_eldecl = XML_FALSE;
   if (p->scaffIndex) {
     FREE(p->scaffIndex);
     p->scaffIndex = NULL;
@@ -4759,6 +4832,14 @@
     FREE(p->scaffold);
     p->scaffold = NULL;
   }
+  p->scaffLevel = 0;
+  p->scaffSize = 0;
+  p->scaffCount = 0;
+  p->contentStringLen = 0;
+
+  p->keepProcessing = XML_TRUE;
+  p->hasParamEntityRefs = XML_FALSE;
+  p->standalone = XML_FALSE;
 }
 
 static void
@@ -4907,10 +4988,10 @@
                        &(newDtd->pool),
                        &(oldDtd->paramEntities), parser))
       return 0;
+  newDtd->paramEntityRead = oldDtd->paramEntityRead;
 #endif /* XML_DTD */
 
   newDtd->keepProcessing = oldDtd->keepProcessing;
-  newDtd->paramEntityRead = oldDtd->paramEntityRead;
   newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
   newDtd->standalone = oldDtd->standalone;