[pypy-svn] r79080 - in pypy/branch/fast-forward/pypy/module/pyexpat: . test

afa at codespeak.net afa at codespeak.net
Sun Nov 14 23:49:41 CET 2010


Author: afa
Date: Sun Nov 14 23:49:39 2010
New Revision: 79080

Modified:
   pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py
   pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py
Log:
Add the default UnknownEncodingHandler to pyexpat


Modified: pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py	(original)
+++ pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py	Sun Nov 14 23:49:39 2010
@@ -44,6 +44,12 @@
         ('type', rffi.INT),
         ('quant', rffi.INT),
     ])
+    XML_Encoding = rffi_platform.Struct('XML_Encoding', [
+        ('map', rffi.CFixedArray(rffi.INT, 1)),
+        ('data', rffi.VOIDP),
+        ('convert', rffi.VOIDP),
+        ('release', rffi.VOIDP),
+    ])
     for name in ['XML_PARAM_ENTITY_PARSING_NEVER',
                  'XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE',
                  'XML_PARAM_ENTITY_PARSING_ALWAYS']:
@@ -56,6 +62,7 @@
     globals()[k] = v
 
 XML_Content_Ptr.TO.become(rffi.CArray(XML_Content))
+XML_Encoding_Ptr = lltype.Ptr(XML_Encoding)
 
 
 def expat_external(*a, **kw):
@@ -171,8 +178,7 @@
     args = ', '.join(arg_names)
     wargs = ', '.join(warg_names)
 
-    if name in ['UnknownEncodingHandler',
-                'ExternalEntityRefHandler',
+    if name in ['ExternalEntityRefHandler',
                 'NotStandaloneHandler']:
         result_type = rffi.INT
         result_converter = "space.int_w(w_result)"
@@ -225,6 +231,30 @@
                           [XML_Parser, callback_type], lltype.Void)
     SETTERS[name] = (index, func, callback)
 
+# special case for UnknownEncodingHandlerData:
+# XML_SetUnknownEncodingHandler() needs an additional argument,
+# and it's not modifiable via user code anyway
+def UnknownEncodingHandlerData_callback(ll_userdata, name, info):
+    id = rffi.cast(lltype.Signed, ll_userdata)
+    userdata = global_storage.get_object(id)
+    space = userdata.space
+    parser = userdata.parser
+
+    name = rffi.charp2str(name)
+
+    try:
+        parser.UnknownEncodingHandler(space, name, info)
+    except OperationError, e:
+        parser._exc_info = e
+        XML_StopParser(parser.itself, XML_FALSE)
+        return 0
+    return 1
+callback_type = lltype.Ptr(lltype.FuncType(
+    [rffi.VOIDP, rffi.CCHARP, XML_Encoding_Ptr], rffi.INT))
+XML_SetUnknownEncodingHandler = expat_external(
+    'XML_SetUnknownEncodingHandler',
+    [XML_Parser, callback_type, rffi.VOIDP], lltype.Void)
+
 ENUMERATE_SETTERS = unrolling_iterable(SETTERS.items())
 
 # Declarations of external functions
@@ -295,10 +325,9 @@
         self._exc_info = None
 
         # Set user data for callback function
-        global_storage.get_nonmoving_id(
-            CallbackData(space, self),
-            id=rffi.cast(lltype.Signed, self.itself))
-        XML_SetUserData(self.itself, rffi.cast(rffi.VOIDP, self.itself))
+        self.id = global_storage.get_nonmoving_id(
+            CallbackData(space, self))
+        XML_SetUserData(self.itself, rffi.cast(rffi.VOIDP, self.id))
 
     def __del__(self):
         if XML_ParserFree: # careful with CPython interpreter shutdown
@@ -429,6 +458,27 @@
 
     sethandler._annspecialcase_ = 'specialize:arg(2)'
 
+    all_chars = ''.join(chr(i) for i in range(256))
+
+    def UnknownEncodingHandler(self, space, name, info):
+        # Yes, supports only 8bit encodings
+        translationmap = space.unicode_w(
+            space.call_method(
+                space.wrap(self.all_chars), "decode",
+                space.wrap(name), space.wrap("replace")))
+
+        for i in range(256):
+            c = translationmap[i]
+            if c == u'\ufffd':
+                info.c_map[i] = rffi.cast(rffi.INT, -1)
+            else:
+                info.c_map[i] = rffi.cast(rffi.INT, c)
+        info.c_data = lltype.nullptr(rffi.VOIDP.TO)
+        info.c_convert = lltype.nullptr(rffi.VOIDP.TO)
+        info.c_release = lltype.nullptr(rffi.VOIDP.TO)
+        return True
+
+
     def setattr(self, space, name, w_value):
         if name == "namespace_prefixes":
             XML_SetReturnNSTriplet(self.itself, space.int_w(w_value))
@@ -665,6 +715,9 @@
                              space.wrap('XML_ParserCreate failed'))
 
     parser = W_XMLParserType(space, xmlparser, w_intern)
+    XML_SetUnknownEncodingHandler(
+        parser.itself, UnknownEncodingHandlerData_callback,
+        rffi.cast(rffi.VOIDP, parser.id))
     return space.wrap(parser)
 ParserCreate.unwrap_spec = [ObjSpace, W_Root, W_Root, W_Root]
 

Modified: pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py	(original)
+++ pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py	Sun Nov 14 23:49:39 2010
@@ -50,6 +50,14 @@
         def gotText(text):
             assert text == u"caf\xe9"
         p.CharacterDataHandler = gotText
-        assert p.returns_unicode
         p.Parse(xml)
 
+    def test_python_encoding(self):
+        # This name is not knonwn by expat
+        xml = "<?xml version='1.0' encoding='latin1'?><s>caf\xe9</s>"
+        import pyexpat
+        p = pyexpat.ParserCreate()
+        def gotText(text):
+            assert text == u"caf\xe9"
+        p.CharacterDataHandler = gotText
+        p.Parse(xml)



More information about the Pypy-commit mailing list