[Python-checkins] cpython (merge default -> default): Merge to tip.

brian.quinlan python-checkins at python.org
Fri Apr 8 00:30:59 CEST 2011


http://hg.python.org/cpython/rev/9ddba521c3aa
changeset:   69193:9ddba521c3aa
parent:      69192:126353bc7e94
parent:      69191:567cbddf8678
user:        Brian Quinlan <brian at sweetapp.com>
date:        Fri Apr 08 08:30:41 2011 +1000
summary:
  Merge to tip.

files:
  Lib/html/parser.py            |   2 +-
  Lib/test/regrtest.py          |   2 +-
  Lib/test/test_faulthandler.py |   7 +++
  Lib/test/test_htmlparser.py   |  17 +++++++
  Misc/NEWS                     |   2 +
  Modules/faulthandler.c        |  50 ++++++++++++++--------
  setup.py                      |   2 +
  7 files changed, 61 insertions(+), 21 deletions(-)


diff --git a/Lib/html/parser.py b/Lib/html/parser.py
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -28,7 +28,7 @@
 # make it correctly strict without breaking backward compatibility.
 attrfind = re.compile(
     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
-    r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
+    r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
 attrfind_tolerant = re.compile(
     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
     r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -240,7 +240,7 @@
          findleaks=False, use_resources=None, trace=False, coverdir='coverage',
          runleaks=False, huntrleaks=False, verbose2=False, print_slow=False,
          random_seed=None, use_mp=None, verbose3=False, forever=False,
-         header=False, timeout=30*60):
+         header=False, timeout=60*60):
     """Execute a test suite.
 
     This also parses command-line options and modifies its behavior
diff --git a/Lib/test/test_faulthandler.py b/Lib/test/test_faulthandler.py
--- a/Lib/test/test_faulthandler.py
+++ b/Lib/test/test_faulthandler.py
@@ -8,6 +8,12 @@
 import tempfile
 import unittest
 
+try:
+    import threading
+    HAVE_THREADS = True
+except ImportError:
+    HAVE_THREADS = False
+
 TIMEOUT = 0.5
 
 try:
@@ -279,6 +285,7 @@
         with temporary_filename() as filename:
             self.check_dump_traceback(filename)
 
+    @unittest.skipIf(not HAVE_THREADS, 'need threads')
     def check_dump_traceback_threads(self, filename):
         """
         Call explicitly dump_traceback(all_threads=True) and check the output.
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -217,6 +217,23 @@
             ("starttag", "a", [("href", "mailto:xyz at example.com")]),
             ])
 
+    def test_attr_nonascii(self):
+        # see issue 7311
+        self._run_check("<img src=/foo/bar.png alt=\u4e2d\u6587>", [
+            ("starttag", "img", [("src", "/foo/bar.png"),
+                                 ("alt", "\u4e2d\u6587")]),
+            ])
+        self._run_check("<a title='\u30c6\u30b9\u30c8' "
+                        "href='\u30c6\u30b9\u30c8.html'>", [
+            ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"),
+                               ("href", "\u30c6\u30b9\u30c8.html")]),
+            ])
+        self._run_check('<a title="\u30c6\u30b9\u30c8" '
+                        'href="\u30c6\u30b9\u30c8.html">', [
+            ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"),
+                               ("href", "\u30c6\u30b9\u30c8.html")]),
+            ])
+
     def test_attr_entity_replacement(self):
         self._run_check("""<a b='&amp;&gt;&lt;&quot;&apos;'>""", [
             ("starttag", "a", [("b", "&><\"'")]),
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -94,6 +94,8 @@
 Library
 -------
 
+- Issue #7311: fix html.parser to accept non-ASCII attribute values.
+
 - Issue #11605: email.parser.BytesFeedParser was incorrectly converting multipart
   subpararts with an 8bit CTE into unicode instead of preserving the bytes.
 
diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c
--- a/Modules/faulthandler.c
+++ b/Modules/faulthandler.c
@@ -5,6 +5,9 @@
 #include <frameobject.h>
 #include <signal.h>
 
+/* Allocate at maximum 100 MB of the stack to raise the stack overflow */
+#define STACK_OVERFLOW_MAX_SIZE (100*1024*1024)
+
 #ifdef WITH_THREAD
 #  define FAULTHANDLER_LATER
 #endif
@@ -16,9 +19,6 @@
 #  define FAULTHANDLER_USER
 #endif
 
-/* Allocate at maximum 100 MB of the stack to raise the stack overflow */
-#define STACK_OVERFLOW_MAX_SIZE (100*1024*1024)
-
 #define PUTS(fd, str) write(fd, str, strlen(str))
 
 #ifdef HAVE_SIGACTION
@@ -218,12 +218,7 @@
    This function is signal safe and should only call signal safe functions. */
 
 static void
-faulthandler_fatal_error(
-    int signum
-#ifdef HAVE_SIGACTION
-    , siginfo_t *siginfo, void *ucontext
-#endif
-)
+faulthandler_fatal_error(int signum)
 {
     const int fd = fatal_error.fd;
     unsigned int i;
@@ -255,6 +250,7 @@
     PUTS(fd, handler->name);
     PUTS(fd, "\n\n");
 
+#ifdef WITH_THREAD
     /* SIGSEGV, SIGFPE, SIGABRT, SIGBUS and SIGILL are synchronous signals and
        so are delivered to the thread that caused the fault. Get the Python
        thread state of the current thread.
@@ -264,6 +260,9 @@
        used. Read the thread local storage (TLS) instead: call
        PyGILState_GetThisThreadState(). */
     tstate = PyGILState_GetThisThreadState();
+#else
+    tstate = PyThreadState_Get();
+#endif
     if (tstate == NULL)
         return;
 
@@ -320,7 +319,7 @@
         for (i=0; i < faulthandler_nsignals; i++) {
             handler = &faulthandler_handlers[i];
 #ifdef HAVE_SIGACTION
-            action.sa_sigaction = faulthandler_fatal_error;
+            action.sa_handler = faulthandler_fatal_error;
             sigemptyset(&action.sa_mask);
             /* Do not prevent the signal from being received from within
                its own signal handler */
@@ -451,8 +450,8 @@
 }
 
 static PyObject*
-faulthandler_dump_traceback_later(PyObject *self,
-                                  PyObject *args, PyObject *kwargs)
+faulthandler_dump_tracebacks_later(PyObject *self,
+                                   PyObject *args, PyObject *kwargs)
 {
     static char *kwlist[] = {"timeout", "repeat", "file", "exit", NULL};
     double timeout;
@@ -461,6 +460,7 @@
     PyObject *file = NULL;
     int fd;
     int exit = 0;
+    PyThreadState *tstate;
 
     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
         "d|iOi:dump_tracebacks_later", kwlist,
@@ -477,6 +477,13 @@
         return NULL;
     }
 
+    tstate = PyThreadState_Get();
+    if (tstate == NULL) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "unable to get the current thread state");
+        return NULL;
+    }
+
     file = faulthandler_get_fileno(file, &fd);
     if (file == NULL)
         return NULL;
@@ -490,7 +497,7 @@
     thread.fd = fd;
     thread.timeout_ms = timeout_ms;
     thread.repeat = repeat;
-    thread.interp = PyThreadState_Get()->interp;
+    thread.interp = tstate->interp;
     thread.exit = exit;
 
     /* Arm these locks to serve as events when released */
@@ -537,10 +544,14 @@
     if (!user->enabled)
         return;
 
+#ifdef WITH_THREAD
     /* PyThreadState_Get() doesn't give the state of the current thread if
        the thread doesn't hold the GIL. Read the thread local storage (TLS)
        instead: call PyGILState_GetThisThreadState(). */
     tstate = PyGILState_GetThisThreadState();
+#else
+    tstate = PyThreadState_Get();
+#endif
 
     if (user->all_threads)
         _Py_DumpTracebackThreads(user->fd, user->interp, tstate);
@@ -826,7 +837,7 @@
 faulthandler_traverse(PyObject *module, visitproc visit, void *arg)
 {
 #ifdef FAULTHANDLER_USER
-    unsigned int index;
+    unsigned int signum;
 #endif
 
 #ifdef FAULTHANDLER_LATER
@@ -834,8 +845,8 @@
 #endif
 #ifdef FAULTHANDLER_USER
     if (user_signals != NULL) {
-        for (index=0; index < NSIG; index++)
-            Py_VISIT(user_signals[index].file);
+        for (signum=0; signum < NSIG; signum++)
+            Py_VISIT(user_signals[signum].file);
     }
 #endif
     Py_VISIT(fatal_error.file);
@@ -861,10 +872,11 @@
                "if all_threads is True, into file")},
 #ifdef FAULTHANDLER_LATER
     {"dump_tracebacks_later",
-     (PyCFunction)faulthandler_dump_traceback_later, METH_VARARGS|METH_KEYWORDS,
-     PyDoc_STR("dump_tracebacks_later(timeout, repeat=False, file=sys.stderr):\n"
+     (PyCFunction)faulthandler_dump_tracebacks_later, METH_VARARGS|METH_KEYWORDS,
+     PyDoc_STR("dump_tracebacks_later(timeout, repeat=False, file=sys.stderrn, exit=False):\n"
                "dump the traceback of all threads in timeout seconds,\n"
-               "or each timeout seconds if repeat is True.")},
+               "or each timeout seconds if repeat is True. If exit is True, "
+               "call _exit(1) which is not safe.")},
     {"cancel_dump_tracebacks_later",
      (PyCFunction)faulthandler_cancel_dump_tracebacks_later_py, METH_NOARGS,
      PyDoc_STR("cancel_dump_tracebacks_later():\ncancel the previous call "
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -373,6 +373,8 @@
     def add_multiarch_paths(self):
         # Debian/Ubuntu multiarch support.
         # https://wiki.ubuntu.com/MultiarchSpec
+        if not find_executable('dpkg-architecture'):
+            return
         tmpfile = os.path.join(self.build_temp, 'multiarch')
         if not os.path.exists(self.build_temp):
             os.makedirs(self.build_temp)

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list