[Python-checkins] gh-98739: Update libexpat from 2.4.9 to 2.5.0 (#98742)

gpshead webhook-mailer at python.org
Thu Oct 27 16:45:41 EDT 2022


https://github.com/python/cpython/commit/3e07f827b359617664ad0880f218f17ae4483299
commit: 3e07f827b359617664ad0880f218f17ae4483299
branch: main
author: Shaun Walbridge <46331011+scdub at users.noreply.github.com>
committer: gpshead <greg at krypto.org>
date: 2022-10-27T13:45:12-07:00
summary:

gh-98739: Update libexpat from 2.4.9 to 2.5.0 (#98742)

* Update libexpat from 2.4.9 to 2.5.0 to address CVE-2022-43680.

Co-authored-by: Shaun Walbridge <shaun.walbridge at gmail.com>

files:
A Misc/NEWS.d/next/Security/2022-10-26-21-04-23.gh-issue-98739.keBWcY.rst
M Modules/expat/expat.h
M Modules/expat/xmlparse.c
M Modules/expat/xmltok_impl.h

diff --git a/Misc/NEWS.d/next/Security/2022-10-26-21-04-23.gh-issue-98739.keBWcY.rst b/Misc/NEWS.d/next/Security/2022-10-26-21-04-23.gh-issue-98739.keBWcY.rst
new file mode 100644
index 000000000000..b63a54b3676c
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2022-10-26-21-04-23.gh-issue-98739.keBWcY.rst
@@ -0,0 +1 @@
+Update bundled libexpat to 2.5.0
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h
index 2b47ce2a8d3a..1c83563cbf68 100644
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -1054,8 +1054,8 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
    See http://semver.org.
 */
 #define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 4
-#define XML_MICRO_VERSION 9
+#define XML_MINOR_VERSION 5
+#define XML_MICRO_VERSION 0
 
 #ifdef __cplusplus
 }
diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c
index c0bece51d700..b6c2eca97567 100644
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@@ -1,4 +1,4 @@
-/* 90815a2b2c80c03b2b889fe1d427bb2b9e3282aa065e42784e001db4f23de324 (2.4.9+)
+/* 5ab094ffadd6edfc94c3eee53af44a86951f9f1f0933ada3114bbce2bfb02c99 (2.5.0+)
                             __  __            _
                          ___\ \/ /_ __   __ _| |_
                         / _ \\  /| '_ \ / _` | __|
@@ -35,6 +35,7 @@
    Copyright (c) 2021      Dong-hee Na <donghee.na at python.org>
    Copyright (c) 2022      Samanta Navarro <ferivoz at riseup.net>
    Copyright (c) 2022      Jeffrey Walton <noloader at gmail.com>
+   Copyright (c) 2022      Jann Horn <jannh at google.com>
    Licensed under the MIT license:
 
    Permission is  hereby granted,  free of charge,  to any  person obtaining
@@ -1068,6 +1069,14 @@ parserCreate(const XML_Char *encodingName,
   parserInit(parser, encodingName);
 
   if (encodingName && ! parser->m_protocolEncodingName) {
+    if (dtd) {
+      // We need to stop the upcoming call to XML_ParserFree from happily
+      // destroying parser->m_dtd because the DTD is shared with the parent
+      // parser and the only guard that keeps XML_ParserFree from destroying
+      // parser->m_dtd is parser->m_isParamEntity but it will be set to
+      // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
+      parser->m_dtd = NULL;
+    }
     XML_ParserFree(parser);
     return NULL;
   }
@@ -3011,9 +3020,6 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
         int len;
         const char *rawName;
         TAG *tag = parser->m_tagStack;
-        parser->m_tagStack = tag->parent;
-        tag->parent = parser->m_freeTagList;
-        parser->m_freeTagList = tag;
         rawName = s + enc->minBytesPerChar * 2;
         len = XmlNameLength(enc, rawName);
         if (len != tag->rawNameLength
@@ -3021,6 +3027,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
           *eventPP = rawName;
           return XML_ERROR_TAG_MISMATCH;
         }
+        parser->m_tagStack = tag->parent;
+        tag->parent = parser->m_freeTagList;
+        parser->m_freeTagList = tag;
         --parser->m_tagLevel;
         if (parser->m_endElementHandler) {
           const XML_Char *localPart;
@@ -4975,10 +4984,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
               parser->m_handlerArg, parser->m_declElementType->name,
               parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
               role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
-          poolClear(&parser->m_tempPool);
           handleDefault = XML_FALSE;
         }
       }
+      poolClear(&parser->m_tempPool);
       break;
     case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
     case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
@@ -5386,7 +5395,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
              *
              * If 'standalone' is false, the DTD must have no
              * parameter entities or we wouldn't have passed the outer
-             * 'if' statement.  That measn the only entity in the hash
+             * 'if' statement.  That means the only entity in the hash
              * table is the external subset name "#" which cannot be
              * given as a parameter entity name in XML syntax, so the
              * lookup must have returned NULL and we don't even reach
@@ -5798,19 +5807,27 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
 
   if (result != XML_ERROR_NONE)
     return result;
-  else if (textEnd != next
-           && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
+
+  if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
     entity->processed = (int)(next - (const char *)entity->textPtr);
     return result;
-  } else {
+  }
+
 #ifdef XML_DTD
-    entityTrackingOnClose(parser, entity, __LINE__);
+  entityTrackingOnClose(parser, entity, __LINE__);
 #endif
-    entity->open = XML_FALSE;
-    parser->m_openInternalEntities = openEntity->next;
-    /* put openEntity back in list of free instances */
-    openEntity->next = parser->m_freeInternalEntities;
-    parser->m_freeInternalEntities = openEntity;
+  entity->open = XML_FALSE;
+  parser->m_openInternalEntities = openEntity->next;
+  /* put openEntity back in list of free instances */
+  openEntity->next = parser->m_freeInternalEntities;
+  parser->m_freeInternalEntities = openEntity;
+
+  // If there are more open entities we want to stop right here and have the
+  // upcoming call to XML_ResumeParser continue with entity content, or it would
+  // be ignored altogether.
+  if (parser->m_openInternalEntities != NULL
+      && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
+    return XML_ERROR_NONE;
   }
 
 #ifdef XML_DTD
diff --git a/Modules/expat/xmltok_impl.h b/Modules/expat/xmltok_impl.h
index c518aada013d..3469c4ae138c 100644
--- a/Modules/expat/xmltok_impl.h
+++ b/Modules/expat/xmltok_impl.h
@@ -45,7 +45,7 @@ enum {
   BT_LF,       /* line feed = "\n" */
   BT_GT,       /* greater than = ">" */
   BT_QUOT,     /* quotation character = "\"" */
-  BT_APOS,     /* aposthrophe = "'" */
+  BT_APOS,     /* apostrophe = "'" */
   BT_EQUALS,   /* equal sign = "=" */
   BT_QUEST,    /* question mark = "?" */
   BT_EXCL,     /* exclamation mark = "!" */



More information about the Python-checkins mailing list