[pypy-svn] r55286 - pypy/dist/pypy/lib

fijal at codespeak.net fijal at codespeak.net
Tue May 27 03:46:22 CEST 2008


Author: fijal
Date: Tue May 27 03:46:18 2008
New Revision: 55286

Added:
   pypy/dist/pypy/lib/pyexpat.py   (contents, props changed)
Log:
Working version of pyexpat. Passes all but two tests, but we cannot yet
run it on top of pypy, due to some problems with exceptions.


Added: pypy/dist/pypy/lib/pyexpat.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/lib/pyexpat.py	Tue May 27 03:46:18 2008
@@ -0,0 +1,274 @@
+
+import ctypes
+import ctypes.util
+from ctypes_configure import configure
+from ctypes import c_char_p, c_int, c_void_p, POINTER, c_char
+
+lib = ctypes.CDLL(ctypes.util.find_library('expat'))
+
+class CConfigure:
+    _compilation_info_ = configure.ExternalCompilationInfo(
+        includes = ['expat.h'],
+        libraries = ['expat'],
+        pre_include_lines = [
+        '#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)'],
+        )
+
+    XML_Char = configure.SimpleType('XML_Char', ctypes.c_char)
+    XML_COMBINED_VERSION = configure.ConstantInteger('XML_COMBINED_VERSION')
+
+info = configure.configure(CConfigure)
+XML_Char = info['XML_Char']
+XML_COMBINED_VERSION = info['XML_COMBINED_VERSION']
+XML_Parser = ctypes.c_void_p # an opaque pointer
+assert XML_Char is ctypes.c_char # this assumption is everywhere in
+# cpython's expat, let's explode
+XML_ParserCreate = lib.XML_ParserCreate
+XML_ParserCreate.args = [ctypes.c_char_p]
+XML_ParserCreate.result = XML_Parser
+XML_ParserCreateNS = lib.XML_ParserCreateNS
+XML_ParserCreateNS.args = [c_char_p, c_char]
+XML_ParserCreateNS.result = XML_Parser
+XML_Parse = lib.XML_Parse
+XML_Parse.args = [XML_Parser, ctypes.c_char_p, ctypes.c_int, ctypes.c_int]
+XML_Parse.result = ctypes.c_int
+
+handler_names = [
+    'StartElement',
+    'EndElement',
+    'ProcessingInstruction',
+    'CharacterData',
+    'UnparsedEntityDecl',
+    'NotationDecl',
+    'StartNamespaceDecl',
+    'EndNamespaceDecl',
+    'Comment',
+    'StartCdataSection',
+    'EndCdataSection',
+    'Default',
+    'DefaultHandlerExpand',
+    'NotStandalone',
+    'ExternalEntityRef',
+    'StartDoctypeDecl',
+    'EndDoctypeDecl',
+    'EntityDecl',
+    'XmlDecl',
+    'ElementDecl',
+    'AttlistDecl',
+    ]
+if XML_COMBINED_VERSION >= 19504:
+    handler_names.append('SkippedEntity')
+setters = {}
+
+for name in handler_names:
+    if name == 'DefaultHandlerExpand':
+        newname = 'XML_SetDefaultHandlerExpand'
+    else:
+        name += 'Handler'
+        newname = 'XML_Set' + name
+    cfunc = getattr(lib, newname)
+    cfunc.args = [XML_Parser, ctypes.c_void_p]
+    cfunc.result = ctypes.c_int
+    setters[name] = cfunc
+
+class ExpatError(Exception):
+    pass
+
+error = ExpatError
+
+class XMLParserType(object):
+    specified_attributes = 0
+    ordered_attributes = 0
+    returns_unicode = 1
+    encoding = 'utf-8'
+    def __init__(self, encoding, namespace_separator):
+        self.returns_unicode = 1
+        if encoding:
+            self.encoding = encoding
+        if namespace_separator is None:
+            self.itself = XML_ParserCreate(encoding)
+        else:
+            self.itself = XML_ParserCreateNS(encoding, ord(namespace_separator))
+        if not self.itself:
+            raise RuntimeError("Creating parser failed")
+        self.storage = {}
+        self.buffer = None
+        self.buffer_size = 8192
+        self.character_data_handler = None
+
+    def _flush_character_buffer(self):
+        if not self.buffer:
+            return
+        res = self._call_character_handler(''.join(self.buffer))
+        self.buffer = []
+        return res
+
+    def _call_character_handler(self, buf):
+        if self.character_data_handler:
+            self.character_data_handler(buf)
+
+    def Parse(self, data, is_final):
+        res = XML_Parse(self.itself, data, len(data), is_final)
+        if res == 0:
+            xxx
+        self._flush_character_buffer()
+        return res
+
+    def _sethandler(self, name, real_cb):
+        setter = setters[name]
+        try:
+            cb = self.storage[(name, real_cb)]
+        except KeyError:
+            cb = getattr(self, 'get_cb_for_%s' % name)(real_cb)
+            self.storage[(name, real_cb)] = cb
+        setter(self.itself, cb)
+
+    def get_cb_for_StartElementHandler(self, real_cb):
+        def StartElement(unused, name, attrs):
+            # unpack name and attrs
+            conv = self.conv
+            self._flush_character_buffer()
+            if self.specified_attributes:
+                import pdb
+                pdb.set_trace()
+            max = 0
+            while attrs[max]:
+                max += 2 # copied
+            if self.ordered_attributes:
+                res = [attrs[i] for i in range(max)]
+            else:
+                res = {}
+                for i in range(0, max, 2):
+                    res[conv(attrs[i])] = conv(attrs[i + 1])
+            real_cb(conv(name), res)
+        CB = ctypes.CFUNCTYPE(None, c_void_p, c_char_p, POINTER(c_char_p))
+        return CB(StartElement)
+
+    def get_cb_for_ExternalEntityRefHandler(self, real_cb):
+        def ExternalEntity(unused, context, base, sysId, pubId):
+            self._flush_character_buffer()
+            conv = self.conv
+            return real_cb(conv(context), conv(base), conv(sysId),
+                           conv(pubId))
+        CB = ctypes.CFUNCTYPE(c_int, c_void_p, *([c_char_p] * 4))
+        return CB(ExternalEntity)
+
+    def get_cb_for_CharacterDataHandler(self, real_cb):
+        def CharacterData(unused, s, lgt):
+            if self.buffer is None:
+                self._call_character_handler(self.conv(s[:lgt]))
+            else:
+                if len(self.buffer) + lgt > self.buffer_size:
+                    self._flush_character_buffer()
+                    if self.character_data_handler is None:
+                        return
+                if lgt > self.buffer_size:
+                    self._call_character_handler(s[:lgt])
+                    self.buffer = []
+                else:
+                    self.buffer.append(s[:lgt])
+        CB = ctypes.CFUNCTYPE(None, c_void_p, POINTER(c_char), c_int)
+        return CB(CharacterData)
+
+    def _new_callback_for_string_len(name, sign):
+        def get_callback_for_(self, real_cb):
+            def func(unused, s, len):
+                self._flush_character_buffer()
+                arg = self.conv(s[:len])
+                real_cb(arg)
+            func.func_name = name
+            CB = ctypes.CFUNCTYPE(*sign)
+            return CB(func)
+        get_callback_for_.func_name = 'get_cb_for_' + name
+        return get_callback_for_
+    
+    for name in ['DefaultHandlerExpand',
+                 'DefaultHandler']:
+        sign = [None, c_void_p, POINTER(c_char), c_int]
+        name = 'get_cb_for_' + name
+        locals()[name] = _new_callback_for_string_len(name, sign)
+
+    def _new_callback_for_starargs(name, sign):
+        def get_callback_for_(self, real_cb):
+            def func(unused, *args):
+                self._flush_character_buffer()
+                args = [self.conv(arg) for arg in args]
+                real_cb(*args)
+            func.func_name = name
+            CB = ctypes.CFUNCTYPE(*sign)
+            return CB(func)
+        get_callback_for_.func_name = 'get_cb_for_' + name
+        return get_callback_for_
+    
+    for name, num in [
+        ('EndElementHandler', 1),
+        ('ProcessingInstructionHandler', 2),
+        ('UnparsedEntityDeclHandler', 5),
+        ('NotationDeclHandler', 4),
+        ('StartNamespaceDeclHandler', 2),
+        ('EndNamespaceDeclHandler', 1),
+        ('CommentHandler', 1),
+        ('StartCdataSectionHandler', 0),
+        ('EndCdataSectionHandler', 0)]:
+        sign = [None, c_void_p] + [c_char_p] * num
+        name = 'get_cb_for_' + name
+        locals()[name] = _new_callback_for_starargs(name, sign)
+
+    def conv_unicode(self, s):
+        if s is None:
+            return s
+        return s.decode(self.encoding)
+
+    def __setattr__(self, name, value):
+        # forest of ifs...
+        if name in ['ordered_attributes',
+                    'returns_unicode', 'specified_attributes']:
+            if value:
+                if name == 'returns_unicode':
+                    self.conv = self.conv_unicode
+                self.__dict__[name] = 1
+            else:
+                if name == 'returns_unicode':
+                    self.conv = lambda s: s
+                self.__dict__[name] = 0
+        elif name == 'buffer_text':
+            if value:
+                self.buffer = []
+            else:
+                self._flush_character_buffer()
+                self.buffer = None
+        elif name == 'namespace_prefixes':
+            xxx
+        elif name in setters:
+            if name == 'CharacterDataHandler':
+                # XXX we need to flush buffer here
+                self._flush_character_buffer()
+                self.character_data_handler = value
+            #print name
+            #print value
+            #print
+            self._sethandler(name, value)
+        else:
+            self.__dict__[name] = value
+
+    def __getattr__(self, name):
+        if name == 'buffer_text':
+            return self.buffer is not None
+        return self.__dict__[name]
+
+    def ParseFile(self, file):
+        return self.Parse(file.read(), False)
+
+def ErrorString(errno):
+    xxx
+
+def ParserCreate(encoding=None, namespace_separator=None):
+    if (not isinstance(namespace_separator, str) and
+        not namespace_separator is None):
+        raise TypeError("ParserCreate() argument 2 must be string or None, not %s" % namespace_separator.__class__.__name__)
+    if namespace_separator is not None:
+        if len(namespace_separator) > 1:
+            raise ValueError('namespace_separator must be at most one character, omitted, or None')
+        if len(namespace_separator) == 0:
+            namespace_separator = None
+    return XMLParserType(encoding, namespace_separator)



More information about the Pypy-commit mailing list