[Python-checkins] r84637 - in python/branches/release27-maint: Misc/NEWS Tools/gdb/libpython.py

antoine.pitrou python-checkins at python.org
Wed Sep 8 23:12:36 CEST 2010


Author: antoine.pitrou
Date: Wed Sep  8 23:12:36 2010
New Revision: 84637

Log:
Merged revisions 84635-84636 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r84635 | antoine.pitrou | 2010-09-08 22:57:48 +0200 (mer., 08 sept. 2010) | 5 lines
  
  Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
  as wide (UCS4) unicode builds for both the host interpreter (embedded
  inside gdb) and the interpreter under test.
........
  r84636 | antoine.pitrou | 2010-09-08 23:07:40 +0200 (mer., 08 sept. 2010) | 4 lines
  
  Add a safety limit to the number of unicode characters we fetch
  (followup to r84635, suggested by Dave Malcolm).
........


Modified:
   python/branches/release27-maint/   (props changed)
   python/branches/release27-maint/Misc/NEWS
   python/branches/release27-maint/Tools/gdb/libpython.py

Modified: python/branches/release27-maint/Misc/NEWS
==============================================================================
--- python/branches/release27-maint/Misc/NEWS	(original)
+++ python/branches/release27-maint/Misc/NEWS	Wed Sep  8 23:12:36 2010
@@ -288,6 +288,13 @@
 
 - Issue #7567: Don't call `setupterm' twice.
 
+Tools/Demos
+-----------
+
+- Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
+  as wide (UCS4) unicode builds for both the host interpreter (embedded
+  inside gdb) and the interpreter under test.
+
 Build
 -----
 

Modified: python/branches/release27-maint/Tools/gdb/libpython.py
==============================================================================
--- python/branches/release27-maint/Tools/gdb/libpython.py	(original)
+++ python/branches/release27-maint/Tools/gdb/libpython.py	Wed Sep  8 23:12:36 2010
@@ -1011,6 +1011,18 @@
     _typename = 'PyTypeObject'
 
 
+if sys.maxunicode >= 0x10000:
+    _unichr = unichr
+else:
+    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
+    def _unichr(x):
+        if x < 0x10000:
+            return unichr(x)
+        x -= 0x10000
+        ch1 = 0xD800 | (x >> 10)
+        ch2 = 0xDC00 | (x & 0x3FF)
+        return unichr(ch1) + unichr(ch2)
+
 class PyUnicodeObjectPtr(PyObjectPtr):
     _typename = 'PyUnicodeObject'
 
@@ -1027,37 +1039,36 @@
 
         # Gather a list of ints from the Py_UNICODE array; these are either
         # UCS-2 or UCS-4 code points:
-        Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+        if self.char_width() > 2:
+            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+        else:
+            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
+            # inferior process: we must join surrogate pairs.
+            Py_UNICODEs = []
+            i = 0
+            limit = safety_limit(field_length)
+            while i < limit:
+                ucs = int(field_str[i])
+                i += 1
+                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
+                    Py_UNICODEs.append(ucs)
+                    continue
+                # This could be a surrogate pair.
+                ucs2 = int(field_str[i])
+                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
+                    continue
+                code = (ucs & 0x03FF) << 10
+                code |= ucs2 & 0x03FF
+                code += 0x00010000
+                Py_UNICODEs.append(code)
+                i += 1
 
         # Convert the int code points to unicode characters, and generate a
-        # local unicode instance:
-        result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
+        # local unicode instance.
+        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
+        result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
         return result
 
-    def write_repr(self, out, visited):
-        proxy = self.proxyval(visited)
-        if self.char_width() == 2:
-            # sizeof(Py_UNICODE)==2: join surrogates
-            proxy2 = []
-            i = 0
-            while i < len(proxy):
-                ch = proxy[i]
-                i += 1
-                if (i < len(proxy)
-                and 0xD800 <= ord(ch) < 0xDC00 \
-                and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
-                    # Get code point from surrogate pair
-                    ch2 = proxy[i]
-                    code = (ord(ch) & 0x03FF) << 10
-                    code |= ord(ch2) & 0x03FF
-                    code += 0x00010000
-                    i += 1
-                    proxy2.append(unichr(code))
-                else:
-                    proxy2.append(ch)
-            proxy = u''.join(proxy2)
-        out.write(repr(proxy))
-
 
 def int_from_int(gdbval):
     return int(str(gdbval))


More information about the Python-checkins mailing list