[Python-checkins] r51850 - in python/branches/release25-maint: Lib/sgmllib.py Lib/test/sgml_input.html Lib/test/test_sgmllib.py Misc/NEWS

neal.norwitz python-checkins at python.org
Mon Sep 11 06:05:18 CEST 2006


Author: neal.norwitz
Date: Mon Sep 11 06:05:18 2006
New Revision: 51850

Added:
   python/branches/release25-maint/Lib/test/sgml_input.html   (contents, props changed)
Modified:
   python/branches/release25-maint/Lib/sgmllib.py
   python/branches/release25-maint/Lib/test/test_sgmllib.py
   python/branches/release25-maint/Misc/NEWS
Log:
As mentioned on python-dev, reverting patch #1504333 because it introduced
an infinite loop in rev 47154.

This patch also adds a test to prevent the regression.

Will backport to 2.4 and head later.


Modified: python/branches/release25-maint/Lib/sgmllib.py
==============================================================================
--- python/branches/release25-maint/Lib/sgmllib.py	(original)
+++ python/branches/release25-maint/Lib/sgmllib.py	Mon Sep 11 06:05:18 2006
@@ -29,12 +29,7 @@
 shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
 shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
 piclose = re.compile('>')
-starttag = re.compile(r'<[a-zA-Z][-_.:a-zA-Z0-9]*\s*('
-        r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
-        r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]'
-        r'[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*(?=[\s>/<])))?'
-    r')*\s*/?\s*(?=[<>])')
-endtag = re.compile(r'</?[a-zA-Z][-_.:a-zA-Z0-9]*\s*/?\s*(?=[<>])')
+endbracket = re.compile('[<>]')
 tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
 attrfind = re.compile(
     r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
@@ -254,10 +249,14 @@
             self.finish_shorttag(tag, data)
             self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
             return k
-        match = starttag.match(rawdata, i)
+        # XXX The following should skip matching quotes (' or ")
+        # As a shortcut way to exit, this isn't so bad, but shouldn't
+        # be used to locate the actual end of the start tag since the
+        # < or > characters may be embedded in an attribute value.
+        match = endbracket.search(rawdata, i+1)
         if not match:
             return -1
-        j = match.end(0)
+        j = match.start(0)
         # Now parse the data between i+1 and j into a tag and attrs
         attrs = []
         if rawdata[i:i+2] == '<>':
@@ -306,10 +305,10 @@
     # Internal -- parse endtag
     def parse_endtag(self, i):
         rawdata = self.rawdata
-        match = endtag.match(rawdata, i)
+        match = endbracket.search(rawdata, i+1)
         if not match:
             return -1
-        j = match.end(0)
+        j = match.start(0)
         tag = rawdata[i+2:j].strip().lower()
         if rawdata[j] == '>':
             j = j+1

Added: python/branches/release25-maint/Lib/test/sgml_input.html
==============================================================================
--- (empty file)
+++ python/branches/release25-maint/Lib/test/sgml_input.html	Mon Sep 11 06:05:18 2006
@@ -0,0 +1,212 @@
+<html>
+ <head>
+  <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+  <link rel="stylesheet" type="text/css" href="http://ogame182.de/epicblue/formate.css">
+  <script language="JavaScript" src="js/flotten.js"></script>
+ </head>
+ <body>
+    <script language=JavaScript> if (parent.frames.length == 0) { top.location.href = "http://es.ogame.org/"; } </script> <script language="JavaScript">
+function haha(z1) {
+  eval("location='"+z1.options[z1.selectedIndex].value+"'");
+}
+</script>
+<center>
+<table>
+ <tr>
+  <td></td>
+  <td>
+   <center>
+   <table>
+    <tr>
+     <td><img src="http://ogame182.de/epicblue/planeten/small/s_dschjungelplanet04.jpg" width="50" height="50"></td>
+     <td>
+      <table border="1">
+       <select size="1" onchange="haha(this)">
+                                   <option value="/game/flotten1.php?session=8912ae912fec&cp=33875341&mode=Flotte&gid=&messageziel=&re=0" selected>Alien sex friend    [2:250:6]</option> 
+                                   <option value="/game/flotten1.php?session=8912ae912fec&cp=33905100&mode=Flotte&gid=&messageziel=&re=0" >1989    [2:248:14]</option> 
+                                   <option value="/game/flotten1.php?session=8912ae912fec&cp=34570808&mode=Flotte&gid=&messageziel=&re=0" >1990    [2:248:6]</option> 
+                                   <option value="/game/flotten1.php?session=8912ae912fec&cp=34570858&mode=Flotte&gid=&messageziel=&re=0" >1991    [2:254:6]</option> 
+                                   <option value="/game/flotten1.php?session=8912ae912fec&cp=34572929&mode=Flotte&gid=&messageziel=&re=0" >Colonia    [2:253:12]</option> 
+               </select>
+      </table>
+     </td>
+    </tr>
+  </table>
+  </center>
+  </td>
+  <td>
+   <table border="0" width="100%" cellspacing="0" cellpadding="0">
+    <tr>
+     <td align="center"></td>
+     <td align="center" width="85">
+      <img border="0" src="http://ogame182.de/epicblue/images/metall.gif" width="42" height="22">
+     </td>
+     <td align="center" width="85">
+      <img border="0" src="http://ogame182.de/epicblue/images/kristall.gif" width="42" height="22">
+     </td>
+     <td align="center" width="85">
+      <img border="0" src="http://ogame182.de/epicblue/images/deuterium.gif" width="42" height="22">
+     </td>
+     <td align="center" width="85">
+      <img border="0" src="http://ogame182.de/epicblue/images/energie.gif" width="42" height="22">
+     </td>
+     <td align="center"></td>
+    </tr>
+    <tr>
+     <td align="center"><i><b>&nbsp;&nbsp;</b></i></td>
+     <td align="center" width="85"><i><b><font color="#ffffff">Metal</font></b></i></td>
+     <td align="center" width="85"><i><b><font color="#ffffff">Cristal</font></b></i></td>
+     <td align="center" width="85"><i><b><font color="#ffffff">Deuterio</font></b></i></td>
+     <td align="center" width="85"><i><b><font color="#ffffff">Energía</font></b></i></td>
+     <td align="center"><i><b>&nbsp;&nbsp;</b></i></td>
+    </tr>
+    <tr>
+     <td align="center"></td>
+     <td align="center" width="85">160.636</td>
+     <td align="center" width="85">3.406</td>
+     <td align="center" width="85">39.230</td>
+     <td align="center" width="85"><font color=#ff0000>-80</font>/3.965</td>
+     <td align="center"></td>
+    </tr>
+   </table>
+  </tr>
+ </table>
+  </center>
+<br />
+  <script language="JavaScript">
+  <!--
+     function link_to_gamepay() {
+    self.location = "https://www.gamepay.de/?lang=es&serverID=8&userID=129360&gameID=ogame&gui=v2&chksum=a9751afa9e37e6b1b826356bcca45675";
+  }
+//-->
+  </script>
+<center>
+  <table width="519" border="0" cellpadding="0" cellspacing="1">
+   <tr height="20">
+  <td colspan="8" class="c">Flotas (max. 9)</td>
+   </tr>
+     <tr height="20">
+    <th>Num.</th>
+    <th>Misión</th>
+    <th>Cantidad</th>
+    <th>Comienzo</th>
+    <th>Salida</th>
+    <th>Objetivo</th>
+    <th>Llegada</th>
+    <th>Orden</th>   
+   </tr>
+     <tr height="20">
+    <th>1</th>
+    <th>  
+      <a title="">Espionaje</a>
+      <a title="Flota en el planeta">(F)</a>
+    </th>
+    <th> <a title="Sonda de espionaje: 3 
+">3</a></th>
+    <th>[2:250:6]</th>
+    <th>Wed Aug 9 18:00:02</th>
+    <th>[2:242:5]</th>
+    <th>Wed Aug 9 18:01:02</th>
+    <th>
+         <form action="flotten1.php?session=8912ae912fec" method="POST">
+	<input type="hidden" name="order_return" value="25054490" />
+        <input type="submit" value="Enviar de regreso" />
+     </form>
+            </th>
+   </tr>
+   <tr height="20">
+    <th>2</th>
+    <th>  
+      <a title="">Espionaje</a>
+      <a title="Volver al planeta">(V)</a>
+    </th>
+    <th> <a title="Sonda de espionaje: 3 
+">3</a></th>
+    <th>[2:250:6]</th>
+    <th>Wed Aug 9 17:59:55</th>
+    <th>[2:242:1]</th>
+    <th>Wed Aug 9 18:01:55</th>
+    <th>
+            </th>
+   </tr>
+  </table>
+
+
+  
+<form action="flotten2.php?session=8912ae912fec" method="POST">
+  <table width="519" border="0" cellpadding="0" cellspacing="1">
+       <tr height="20">
+  <td colspan="4" class="c">Nueva misión: elegir naves</td>
+   </tr>
+   <tr height="20">
+  <th>Naves</th>
+  <th>Disponibles</th>
+<!--    <th>Gesch.</th> -->
+    <th>-</th>
+    <th>-</th>
+   </tr>
+   <tr height="20">
+    <th><a title="Velocidad: 8500">Nave pequeña de carga</a></th> 
+    <th>10<input type="hidden" name="maxship202" value="10"/></th>
+<!--    <th>8500 -->
+     <input type="hidden" name="consumption202" value="10"/>
+     <input type="hidden" name="speed202" value="8500" /></th>
+     <input type="hidden" name="capacity202" value="5000" /></th>
+     <th><a href="javascript:maxShip('ship202');" >máx</a> </th>
+     <th><input name="ship202" size="10" value="0" alt="Nave pequeña de carga 10"/></th>
+   </tr>
+   <tr height="20">
+    <th><a title="Velocidad: 12750">Nave grande de carga</a></th> 
+    <th>19<input type="hidden" name="maxship203" value="19"/></th>
+<!--    <th>12750 -->
+     <input type="hidden" name="consumption203" value="50"/>
+     <input type="hidden" name="speed203" value="12750" /></th>
+     <input type="hidden" name="capacity203" value="25000" /></th>
+     <th><a href="javascript:maxShip('ship203');" >máx</a> </th>
+     <th><input name="ship203" size="10" value="0" alt="Nave grande de carga 19"/></th>
+   </tr>
+   <tr height="20">
+    <th><a title="Velocidad: 27000">Crucero</a></th> 
+    <th>6<input type="hidden" name="maxship206" value="6"/></th>
+<!--    <th>27000 -->
+     <input type="hidden" name="consumption206" value="300"/>
+     <input type="hidden" name="speed206" value="27000" /></th>
+     <input type="hidden" name="capacity206" value="800" /></th>
+     <th><a href="javascript:maxShip('ship206');" >máx</a> </th>
+     <th><input name="ship206" size="10" value="0" alt="Crucero 6"/></th>
+   </tr>
+   <tr height="20">
+    <th><a title="Velocidad: 3400">Reciclador</a></th> 
+    <th>1<input type="hidden" name="maxship209" value="1"/></th>
+<!--    <th>3400 -->
+     <input type="hidden" name="consumption209" value="300"/>
+     <input type="hidden" name="speed209" value="3400" /></th>
+     <input type="hidden" name="capacity209" value="20000" /></th>
+     <th><a href="javascript:maxShip('ship209');" >máx</a> </th>
+     <th><input name="ship209" size="10" value="0" alt="Reciclador 1"/></th>
+   </tr>
+   <tr height="20">
+    <th><a title="Velocidad: 170000000">Sonda de espionaje</a></th> 
+    <th>139<input type="hidden" name="maxship210" value="139"/></th>
+<!--    <th>170000000 -->
+     <input type="hidden" name="consumption210" value="1"/>
+     <input type="hidden" name="speed210" value="170000000" /></th>
+     <input type="hidden" name="capacity210" value="5" /></th>
+     <th><a href="javascript:maxShip('ship210');" >máx</a> </th>
+     <th><input name="ship210" size="10" value="0" alt="Sonda de espionaje 139"/></th>
+   </tr>
+   <tr height="20">
+  <th colspan="2"><a href="javascript:noShips();" >Ninguna nave</a></th>
+  <th colspan="2"><a href="javascript:maxShips();" >Todas las naves</a></th>
+   </tr>
+    <tr height="20">
+    <th colspan="4"><input type="submit" value="Continuar" /></th>
+   </tr>
+<tr><th colspan=4>
+<iframe id='a44fb522' name='a44fb522' src='http://ads.gameforgeads.de/adframe.php?n=a44fb522&amp;what=zone:578' framespacing='0' frameborder='no' scrolling='no' width='468' height='60'></iframe>
+<br><center></center></br>
+</th></tr>
+</form>
+</table>
+ </body>
+</html>

Modified: python/branches/release25-maint/Lib/test/test_sgmllib.py
==============================================================================
--- python/branches/release25-maint/Lib/test/test_sgmllib.py	(original)
+++ python/branches/release25-maint/Lib/test/test_sgmllib.py	Mon Sep 11 06:05:18 2006
@@ -286,21 +286,6 @@
             ('codepoint', 'convert', 42),
             ])
 
-    def test_attr_values_quoted_markup(self):
-        """Multi-line and markup in attribute values"""
-        self.check_events("""<a title='foo\n<br>bar'>text</a>""",
-            [("starttag", "a", [("title", "foo\n<br>bar")]),
-             ("data", "text"),
-             ("endtag", "a")])
-        self.check_events("""<a title='less < than'>text</a>""",
-            [("starttag", "a", [("title", "less < than")]),
-             ("data", "text"),
-             ("endtag", "a")])
-        self.check_events("""<a title='greater > than'>text</a>""",
-            [("starttag", "a", [("title", "greater > than")]),
-             ("data", "text"),
-             ("endtag", "a")])
-
     def test_attr_funky_names(self):
         self.check_events("""<a a.b='v' c:d=v e-f=v>""", [
             ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
@@ -376,6 +361,19 @@
             ('decl', 'DOCTYPE doc [<!ATTLIST doc attr (a | b) >]'),
             ])
 
+    def test_read_chunks(self):
+        # SF bug #1541697, this caused sgml parser to hang
+        # Just verify this code doesn't cause a hang.
+        CHUNK = 1024  # increasing this to 8212 makes the problem go away
+
+        f = open(test_support.findfile('sgml_input.html'))
+        fp = sgmllib.SGMLParser()
+        while 1:
+            data = f.read(CHUNK)
+            fp.feed(data)
+            if len(data) != CHUNK:
+                break
+
     # XXX These tests have been disabled by prefixing their names with
     # an underscore.  The first two exercise outstanding bugs in the
     # sgmllib module, and the third exhibits questionable behavior

Modified: python/branches/release25-maint/Misc/NEWS
==============================================================================
--- python/branches/release25-maint/Misc/NEWS	(original)
+++ python/branches/release25-maint/Misc/NEWS	Mon Sep 11 06:05:18 2006
@@ -49,6 +49,8 @@
 Library
 -------
 
+- Reverted patch #1504333 because it introduced an infinite loop.
+
 - Patch #1553314: Fix the inspect.py slowdown that was hurting IPython & SAGE
   by adding smarter caching in inspect.getmodule().
 


More information about the Python-checkins mailing list