[pypy-svn] r63011 - pypy/trunk/pypy/module/pyexpat

afa at codespeak.net afa at codespeak.net
Tue Mar 17 19:04:02 CET 2009


Author: afa
Date: Tue Mar 17 19:04:02 2009
New Revision: 63011

Modified:
   pypy/trunk/pypy/module/pyexpat/interp_pyexpat.py
Log:
Fix translation of the pyexpat module:

- use the XML_SetUserData to store and retrieve the parser object:
  don't try to make use of nested scope, this doesn't exist in translated code!
  the "global_storage" dict is a hack IMO (but maybe a good one)

- the handler functions are generated, and sethandler() is specialized for each handler

- add XML_UseForeignDTD to line up with the version in pypy/lib/pyexpat.py

/me starts a full translation to see if this really works.


Modified: pypy/trunk/pypy/module/pyexpat/interp_pyexpat.py
==============================================================================
--- pypy/trunk/pypy/module/pyexpat/interp_pyexpat.py	(original)
+++ pypy/trunk/pypy/module/pyexpat/interp_pyexpat.py	Tue Mar 17 19:04:02 2009
@@ -5,41 +5,54 @@
 from pypy.interpreter.error import OperationError
 from pypy.objspace.descroperation import object_setattr
 from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rlib.unroll import unrolling_iterable
+
 from pypy.rpython.tool import rffi_platform
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
 import sys
+import py
 
 if sys.platform == "win32":
     libname = 'libexpat'
 else:
     libname = 'expat'
 eci = ExternalCompilationInfo(
-        libraries=[libname],
-        includes=['expat.h']
+    libraries=[libname],
+    includes=['expat.h'],
+    pre_include_bits=[
+    '#define XML_COMBINED_VERSION' +
+    ' (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)',
+    ],
     )
+
 eci = rffi_platform.configure_external_library(
     libname, eci,
     [dict(prefix='expat-',
           include_dir='lib', library_dir='win32/bin/release'),
      ])
 
+XML_Content_Ptr = lltype.Ptr(lltype.ForwardReference())
+XML_Parser = rffi.VOIDP # an opaque pointer
+
 class CConfigure:
     _compilation_info_ = eci
-    XML_Content = rffi_platform.Struct('XML_Content',[
-        ('numchildren', rffi.INT),
-        ('children', rffi.VOIDP),
+    XML_Content = rffi_platform.Struct('XML_Content', [
+        ('numchildren', rffi.UINT),
+        ('children', XML_Content_Ptr),
         ('name', rffi.CCHARP),
         ('type', rffi.INT),
         ('quant', rffi.INT),
     ])
+    XML_COMBINED_VERSION = rffi_platform.ConstantInteger('XML_COMBINED_VERSION')
     XML_FALSE = rffi_platform.ConstantInteger('XML_FALSE')
     XML_TRUE = rffi_platform.ConstantInteger('XML_TRUE')
-XML_Parser = rffi.VOIDP # an opaque pointer
 
 for k, v in rffi_platform.configure(CConfigure).items():
     globals()[k] = v
 
+XML_Content_Ptr.TO.become(rffi.CArray(XML_Content))
+
 
 def expat_external(*a, **kw):
     kw['compilation_info'] = eci
@@ -69,28 +82,132 @@
     ElementDeclHandler = [rffi.CCHARP, lltype.Ptr(XML_Content)],
     AttlistDeclHandler = [rffi.CCHARP] * 4 + [rffi.INT],
     )
-if True: #XML_COMBINED_VERSION >= 19504:
+if XML_COMBINED_VERSION >= 19504:
     HANDLERS['SkippedEntityHandler'] = [rffi.CCHARP, rffi.INT]
+NB_HANDLERS = len(HANDLERS)
+
+class Storage:
+    "Store objects under a non moving ID"
+    def __init__(self):
+        self.next_id = 0
+        self.storage = {}
+
+    @staticmethod
+    def get_nonmoving_id(obj, id=-1):
+        if id < 0:
+            id = global_storage.next_id
+            global_storage.next_id += 1
+        global_storage.storage[id] = obj
+        return id
+
+    @staticmethod
+    def get_object(id):
+        return global_storage.storage[id]
+
+    @staticmethod
+    def free_nonmoving_id(id):
+        del global_storage.storage[id]
+
+global_storage = Storage()
+
+class CallbackData(Wrappable):
+    def __init__(self, space, parser):
+        self.space = space
+        self.parser = parser
 
 SETTERS = {}
-for name, params in HANDLERS.items():
-    c_name = 'XML_Set' + name
+for index, (name, params) in enumerate(HANDLERS.items()):
+    arg_names = ['arg%d' % (i,) for i in range(len(params))]
+    warg_names = ['w_arg%d' % (i,) for i in range(len(params))]
+
+    converters = []
+    for i, ARG in enumerate(params):
+        if name == "StartElementHandler" and i == 1:
+            converters.append(
+                'w_arg%d = parser.w_convert_attributes(space, arg%d)' % (i, i))
+        elif name in ["CharacterDataHandler", "DefaultHandlerExpand", "DefaultHandler"] and i == 0:
+            converters.append(
+                'w_arg%d = parser.w_convert_charp_n(space, arg%d, arg%d)' % (i, i, i+1))
+            del warg_names[i+1]
+        elif name in ["EntityDeclHandler"] and i == 2:
+            converters.append(
+                'w_arg%d = parser.w_convert_charp_n(space, arg%d, arg%d)' % (i, i, i+1))
+            del warg_names[i+1]
+        elif ARG == rffi.CCHARP:
+            converters.append(
+                'w_arg%d = parser.w_convert_charp(space, arg%d)' % (i, i))
+        elif ARG == lltype.Ptr(XML_Content):
+            converters.append(
+                'w_arg%d = parser.w_convert_model(space, arg%d)' % (i, i))
+            converters.append(
+                'XML_FreeContentModel(parser.itself, arg%d)' % (i,))
+        else:
+            converters.append(
+                'w_arg%d = space.wrap(arg%d)' % (i, i))
+    converters = '; '.join(converters)
+
+    args = ', '.join(arg_names)
+    wargs = ', '.join(warg_names)
+
     if name in ['UnknownEncodingHandler',
                 'ExternalEntityRefHandler',
                 'NotStandaloneHandler']:
-        RESULT_TYPE = rffi.INT
+        result_type = rffi.INT
+        result_converter = "space.int_w(w_result)"
+        result_error = "0"
+    else:
+        result_type = lltype.Void
+        result_converter = "None"
+        result_error = "None"
+
+    if name == 'CharacterDataHandler':
+        pre_code = 'if parser.buffer_string(space, w_arg0, arg1): return'
+    else:
+        pre_code = 'parser.flush_character_buffer(space)'
+
+    if name == 'ExternalEntityRefHandler':
+        post_code = 'if space.is_w(w_result, space.w_None): return 0'
     else:
-        RESULT_TYPE = lltype.Void
-    CALLBACK = lltype.Ptr(lltype.FuncType(
-        [rffi.VOIDP] + params, RESULT_TYPE))
+        post_code = ''
+
+    src = py.code.Source("""
+    def handler(ll_userdata, %(args)s):
+        id = rffi.cast(lltype.Signed, ll_userdata)
+        userdata = global_storage.get_object(id)
+        space = userdata.space
+        parser = userdata.parser
+
+        %(converters)s
+        %(pre_code)s
+        try:
+            w_result = space.call_function(parser.handlers[%(index)s], %(wargs)s)
+        except OperationError, e:
+            parser._exc_info = e
+            XML_StopParser(parser.itself, XML_FALSE)
+            return %(result_error)s
+        %(post_code)s
+        return %(result_converter)s
+    """ % locals())
+
+    exec str(src)
+
+    c_name = 'XML_Set' + name
+    callback_type = lltype.Ptr(lltype.FuncType(
+        [rffi.VOIDP] + params, result_type))
     func = expat_external(c_name,
-                          [XML_Parser, CALLBACK], rffi.INT)
-    SETTERS[name] = func
+                          [XML_Parser, callback_type], rffi.INT)
+    SETTERS[name] = (index, func, handler)
+
+ENUMERATE_SETTERS = unrolling_iterable(SETTERS.items())
 
 XML_ParserCreate = expat_external(
     'XML_ParserCreate', [rffi.CCHARP], XML_Parser)
 XML_ParserCreateNS = expat_external(
     'XML_ParserCreateNS', [rffi.CCHARP, rffi.CHAR], XML_Parser)
+XML_ParserFree = expat_external(
+    'XML_ParserFree', [XML_Parser], lltype.Void)
+XML_SetUserData = expat_external(
+    'XML_SetUserData', [XML_Parser, rffi.VOIDP], lltype.Void)
 XML_Parse = expat_external(
     'XML_Parse', [XML_Parser, rffi.CCHARP, rffi.INT, rffi.INT], rffi.INT)
 XML_StopParser = expat_external(
@@ -100,6 +217,9 @@
     'XML_SetReturnNSTriplet', [XML_Parser, rffi.INT], lltype.Void)
 XML_GetSpecifiedAttributeCount = expat_external(
     'XML_GetSpecifiedAttributeCount', [XML_Parser], rffi.INT)
+if XML_COMBINED_VERSION >= 19505:
+    XML_UseForeignDTD = expat_external(
+        'XML_UseForeignDTD', [XML_Parser, rffi.INT], lltype.Void)
 
 XML_GetErrorCode = expat_external(
     'XML_GetErrorCode', [XML_Parser], rffi.INT)
@@ -118,7 +238,6 @@
 XML_FreeContentModel = expat_external(
     'XML_FreeContentModel', [XML_Parser, lltype.Ptr(XML_Content)], lltype.Void)
 
-
 class W_XMLParserType(Wrappable):
 
     def __init__(self, encoding, namespace_separator, w_intern):
@@ -139,12 +258,27 @@
         else:
             self.itself = XML_ParserCreate(self.encoding)
 
+        self.handlers = [None] * NB_HANDLERS
+
         self.buffer_w = None
         self.buffer_size = 8192
+        self.buffer_used = 0
         self.w_character_data_handler = None
 
         self._exc_info = None
 
+    def __del__(self):
+        if XML_ParserFree: # careful with CPython interpreter shutdown
+            XML_ParserFree(self.itself)
+        if global_storage:
+            global_storage.free_nonmoving_id(
+                rffi.cast(lltype.Signed, self.itself))
+
+    def UseForeignDTD(self, space, w_flag=True):
+        flag = space.is_true(w_flag)
+        XML_UseForeignDTD(self.itself, flag)
+    UseForeignDTD.unwrap_spec = ['self', ObjSpace, W_Root]
+
     # Handlers management
 
     def w_convert(self, space, s):
@@ -168,8 +302,30 @@
         else:
             return space.w_None
 
+    def w_convert_attributes(self, space, attrs):
+        if self.specified_attributes:
+            maxindex = XML_GetSpecifiedAttributeCount(self.itself)
+        else:
+            maxindex = 0
+        while attrs[maxindex]:
+            maxindex += 2 # copied
+
+        if self.ordered_attributes:
+            w_attrs = space.newlist([
+                self.w_convert_charp(space, attrs[i])
+                for i in range(maxindex)])
+        else:
+            w_attrs = space.newdict()
+            for i in range(0, maxindex, 2):
+                space.setitem(
+                    w_attrs,
+                    self.w_convert_charp(space, attrs[i]),
+                    self.w_convert_charp(space, attrs[i + 1]))
+
+        return w_attrs
+
     def w_convert_model(self, space, model):
-        children = [self._conv_content_model(model.children[i])
+        children = [self.w_convert_model(space, model.c_children[i])
                     for i in range(model.c_numchildren)]
         return space.newtuple([
             space.wrap(model.c_type),
@@ -177,124 +333,46 @@
             self.w_convert_charp(space, model.c_name),
             space.newtuple(children)])
 
-    def sethandler(self, space, name, w_handler):
-        if name == 'StartElementHandler':
-            def callback(unused, name, attrs):
+    def buffer_string(self, space, w_string, length):
+        if self.buffer_w is not None:
+            if self.buffer_used + length > self.buffer_size:
                 self.flush_character_buffer(space)
-                w_name = self.w_convert_charp(space, name)
+                # handler might have changed; drop the rest on the floor
+                # if there isn't a handler anymore
+                if self.w_character_data_handler is None:
+                    return True
+            if length <= self.buffer_size:
+                self.buffer_w.append(w_string)
+                self.buffer_used += length
+                return True
+            else:
+                self.buffer_w = []
+                self.buffer_used = 0
+        return False
+
+    def sethandler(self, space, name, w_handler, index, setter, handler):
 
-                if self.specified_attributes:
-                    maxindex = XML_GetSpecifiedAttributeCount(self.itself)
-                else:
-                    maxindex = 0
-                while attrs[maxindex]:
-                    maxindex += 2 # copied
-
-                if self.ordered_attributes:
-                    w_attrs = space.newlist([
-                        self.w_convert_charp(space, attrs[i])
-                        for i in range(maxindex)])
-                else:
-                    w_attrs = space.newdict()
-                    for i in range(0, maxindex, 2):
-                        space.setitem(
-                            w_attrs,
-                            self.w_convert_charp(space, attrs[i]),
-                            self.w_convert_charp(space, attrs[i + 1]))
-                space.call_function(w_handler, w_name, w_attrs)
-
-        elif name == 'CharacterDataHandler':
-            def callback(unused, data, length):
-                w_string = self.w_convert_charp_n(space, data, length)
-
-                if self.buffer_w is None:
-                    space.call_function(w_handler, w_string)
-                else:
-                    if len(self.buffer_w) + length > self.buffer_size: # XXX sum(len(buffer))
-                        self.flush_character_buffer(space)
-                        if self.w_character_data_handler is None:
-                            return
-                    if length >= self.buffer_size:
-                        space.call_function(w_handler, w_string)
-                        self.buffer_w = []
-                    else:
-                        self.buffer_w.append(w_string)
+        if name == 'CharacterDataHandler':
             self.flush_character_buffer(space)
             if space.is_w(w_handler, space.w_None):
                 self.w_character_data_handler = None
             else:
                 self.w_character_data_handler = w_handler
 
-        elif name in ['DefaultHandlerExpand', 'DefaultHandler']:
-            def callback(unused, data, length):
-                w_string = self.w_convert_charp_n(space, data, length)
-                space.call_function(w_handler, w_string)
+        self.handlers[index] = w_handler
+        setter(self.itself, handler)
 
-        elif name == 'ElementDeclHandler':
-            def callback(unused, name, model):
-                self.flush_character_buffer(space)
-                w_model = self.w_convert_model(space, model)
-                XML_FreeContentModel(self.itself, model)
-                space.call_function(w_handler,
-                                    self.w_convert_charp(space, name),
-                                    w_model)
-
-        elif name == 'EntityDeclHandler':
-            def callback(unused, ename, is_param, value, value_len,
-                         base, system_id, pub_id, not_name):
-                self.flush_character_buffer(space)
-
-                space.call_function(
-                    w_handler,
-                    self.w_convert_charp(space, ename),
-                    space.wrap(is_param),
-                    self.w_convert_charp_n(space, value, value_len),
-                    self.w_convert_charp(space, base),
-                    self.w_convert_charp(space, system_id),
-                    self.w_convert_charp(space, pub_id),
-                    self.w_convert_charp(space, not_name))
-
-        elif name == 'ExternalEntityRefHandler':
-            def callback(unused, context, base, system_id, pub_id):
-                w_res = space.call_function(
-                    w_handler,
-                    self.w_convert_charp(space, context),
-                    self.w_convert_charp(space, base),
-                    self.w_convert_charp(space, system_id),
-                    self.w_convert_charp(space, pub_id))
-                if space.is_w(w_res, space.w_None):
-                    return 0
-                return space.int_w(w_res)
-
-        else:
-            ARGTYPES = HANDLERS[name]
-            def callback(unused, *args):
-                self.flush_character_buffer(space)
-                args_w = []
-                for i, arg in enumerate(args):
-                    if ARGTYPES[i] is rffi.CCHARP:
-                        w_arg = self.w_convert_charp(space, arg)
-                    else:
-                        w_arg = space.wrap(arg)
-                    args_w.append(w_arg)
-                space.call_function(w_handler, *args_w)
-
-        def callback_wrapper(*args):
-            # Catch errors and record them
-            try:
-                return callback(*args)
-            except OperationError, e:
-                self._exc_info = e
-                XML_StopParser(self.itself, XML_FALSE)
-        callback_wrapper.func_name = name + '_callback'
-        SETTERS[name](self.itself, callback_wrapper)
+    sethandler._annspecialcase_ = 'specialize:arg(2)'
 
     def setattr(self, space, name, w_value):
         if name == "namespace_prefixes":
             XML_SetReturnNSTriplet(self.itself, space.int_w(w_value))
             return
-        elif name in SETTERS:
-            return self.sethandler(space, name, w_value)
+
+        for handler_name, (index, setter, handler) in ENUMERATE_SETTERS:
+            if name == handler_name:
+                return self.sethandler(space, handler_name, w_value,
+                                       index, setter, handler)
 
         # fallback to object.__setattr__()
         return space.call_function(
@@ -305,7 +383,7 @@
     # Parse methods
 
     def Parse(self, space, data, isfinal=True):
-        res = XML_Parse(self.itself, data, len(data), isfinal)
+        res = XML_Parse(self.itself, data, len(data), bool(isfinal))
         if self._exc_info:
             e = self._exc_info
             self._exc_info = None
@@ -314,8 +392,8 @@
             exc = self.set_error(space, XML_GetErrorCode(self.itself))
             raise exc
         self.flush_character_buffer(space)
-        return res
-    Parse.unwrap_spec = ['self', ObjSpace, str, bool]
+        return space.wrap(res)
+    Parse.unwrap_spec = ['self', ObjSpace, str, int]
 
     def ParseFile(self, space, w_file):
         return
@@ -328,6 +406,7 @@
             space.getattr(space.wrap(''), space.wrap('join')),
             space.newlist(self.buffer_w))
         self.buffer_w = []
+        self.buffer_used = 0
 
         if self.w_character_data_handler:
             space.call_function(self.w_character_data_handler, w_data)
@@ -365,6 +444,7 @@
     def set_buffer_text(space, self, w_value):
         if space.is_true(w_value):
             self.buffer_w = []
+            self.buffer_used = 0
         else:
             self.flush_character_buffer(space)
             self.buffer_w = None
@@ -380,6 +460,10 @@
         setattr(obj, name, space.bool_w(value))
     return GetSetProperty(fget, fset, cls=cls, doc=doc)
 
+XMLParser_methods = ['Parse', 'ParseFile']
+if XML_COMBINED_VERSION >= 19505:
+    XMLParser_methods.append('UseForeignDTD')
+
 W_XMLParserType.typedef = TypeDef(
     "pyexpat.XMLParserType",
     __doc__ = "XML parser",
@@ -402,7 +486,7 @@
     **dict((name, interp2app(getattr(W_XMLParserType, name),
                              unwrap_spec=getattr(W_XMLParserType,
                                                  name).unwrap_spec))
-           for name in "Parse ParseFile".split())
+           for name in XMLParser_methods)
     )
 
 def ParserCreate(space, w_encoding=None, w_namespace_separator=None,
@@ -429,6 +513,11 @@
         w_intern = space.newdict()
 
     parser = W_XMLParserType(encoding, namespace_separator, w_intern)
+    global_storage.get_nonmoving_id(
+        CallbackData(space, parser),
+        id=rffi.cast(lltype.Signed, parser.itself))
+    XML_SetUserData(parser.itself, parser.itself)
+
     return space.wrap(parser)
 ParserCreate.unwrap_spec = [ObjSpace, W_Root, W_Root, W_Root]
 



More information about the Pypy-commit mailing list