[pypy-svn] r78952 - in pypy/branch/fast-forward/pypy/module/pyexpat: . test

afa at codespeak.net afa at codespeak.net
Wed Nov 10 08:26:32 CET 2010


Author: afa
Date: Wed Nov 10 08:26:29 2010
New Revision: 78952

Modified:
   pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py
   pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py
Log:
Really implement interning of pyexpat strings


Modified: pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py	(original)
+++ pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py	Wed Nov 10 08:26:29 2010
@@ -62,32 +62,37 @@
     kw['compilation_info'] = eci
     return rffi.llexternal(*a, **kw)
 
+INTERNED_CCHARP = "INTERNED"
+
 HANDLERS = dict(
-    StartElementHandler = [rffi.CCHARP, rffi.CCHARPP],
-    EndElementHandler = [rffi.CCHARP],
-    ProcessingInstructionHandler = [rffi.CCHARP, rffi.CCHARP],
+    StartElementHandler = [INTERNED_CCHARP, rffi.CCHARPP],
+    EndElementHandler = [INTERNED_CCHARP],
+    ProcessingInstructionHandler = [INTERNED_CCHARP, INTERNED_CCHARP],
     CharacterDataHandler = [rffi.CCHARP, rffi.INT],
-    UnparsedEntityDeclHandler = [rffi.CCHARP] * 5,
-    NotationDeclHandler = [rffi.CCHARP] * 4,
-    StartNamespaceDeclHandler = [rffi.CCHARP, rffi.CCHARP],
-    EndNamespaceDeclHandler = [rffi.CCHARP],
+    UnparsedEntityDeclHandler = [INTERNED_CCHARP] * 5,
+    NotationDeclHandler = [INTERNED_CCHARP] * 4,
+    StartNamespaceDeclHandler = [INTERNED_CCHARP, INTERNED_CCHARP],
+    EndNamespaceDeclHandler = [INTERNED_CCHARP],
     CommentHandler = [rffi.CCHARP],
     StartCdataSectionHandler = [],
     EndCdataSectionHandler = [],
     DefaultHandler = [rffi.CCHARP, rffi.INT],
     DefaultHandlerExpand = [rffi.CCHARP, rffi.INT],
     NotStandaloneHandler = [],
-    ExternalEntityRefHandler = [rffi.CCHARP] * 4,
-    StartDoctypeDeclHandler = [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.INT],
+    ExternalEntityRefHandler = [rffi.CCHARP] + [INTERNED_CCHARP] * 3,
+    StartDoctypeDeclHandler = [INTERNED_CCHARP, INTERNED_CCHARP,
+                               INTERNED_CCHARP, rffi.INT],
     EndDoctypeDeclHandler = [],
-    EntityDeclHandler = [rffi.CCHARP, rffi.INT, rffi.CCHARP, rffi.INT,
-                         rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.CCHARP],
+    EntityDeclHandler = [INTERNED_CCHARP, rffi.INT, rffi.CCHARP, rffi.INT,
+                         INTERNED_CCHARP, INTERNED_CCHARP, INTERNED_CCHARP,
+                         INTERNED_CCHARP],
     XmlDeclHandler = [rffi.CCHARP, rffi.CCHARP, rffi.INT],
-    ElementDeclHandler = [rffi.CCHARP, lltype.Ptr(XML_Content)],
-    AttlistDeclHandler = [rffi.CCHARP] * 4 + [rffi.INT],
+    ElementDeclHandler = [INTERNED_CCHARP, lltype.Ptr(XML_Content)],
+    AttlistDeclHandler = [INTERNED_CCHARP, INTERNED_CCHARP,
+                          rffi.CCHARP, rffi.CCHARP, rffi.INT],
     )
 if XML_COMBINED_VERSION >= 19504:
-    HANDLERS['SkippedEntityHandler'] = [rffi.CCHARP, rffi.INT]
+    HANDLERS['SkippedEntityHandler'] = [INTERNED_CCHARP, rffi.INT]
 NB_HANDLERS = len(HANDLERS)
 
 class Storage:
@@ -128,6 +133,8 @@
     warg_names = ['w_arg%d' % (i,) for i in range(len(params))]
 
     converters = []
+    real_params = []
+
     for i, ARG in enumerate(params):
         # Some custom argument conversions
         if name == "StartElementHandler" and i == 1:
@@ -146,6 +153,10 @@
         elif ARG == rffi.CCHARP:
             converters.append(
                 'w_arg%d = parser.w_convert_charp(space, arg%d)' % (i, i))
+        elif ARG == INTERNED_CCHARP:
+            converters.append(
+                'w_arg%d = parser.w_convert_interned(space, arg%d)' % (i, i))
+            ARG = rffi.CCHARP
         elif ARG == lltype.Ptr(XML_Content):
             converters.append(
                 'w_arg%d = parser.w_convert_model(space, arg%d)' % (i, i))
@@ -154,6 +165,7 @@
         else:
             converters.append(
                 'w_arg%d = space.wrap(arg%d)' % (i, i))
+        real_params.append(ARG)
     converters = '; '.join(converters)
 
     args = ', '.join(arg_names)
@@ -208,7 +220,7 @@
 
     c_name = 'XML_Set' + name
     callback_type = lltype.Ptr(lltype.FuncType(
-        [rffi.VOIDP] + params, result_type))
+        [rffi.VOIDP] + real_params, result_type))
     func = expat_external(c_name,
                           [XML_Parser, callback_type], lltype.Void)
     SETTERS[name] = (index, func, callback)
@@ -340,6 +352,21 @@
         else:
             return space.w_None
 
+    def w_convert_interned(self, space, data):
+        if not data:
+            return space.w_None
+        w_data = self.w_convert_charp(space, data)
+        if not self.w_intern:
+            return w_data
+
+        try:
+            return space.getitem(self.w_intern, w_data)
+        except OperationError, e:
+            if not e.match(space, space.w_KeyError):
+                raise
+        space.setitem(self.w_intern, w_data, w_data)
+        return w_data
+
     def w_convert_charp_n(self, space, data, length):
         ll_length = rffi.cast(lltype.Signed, length)
         if data:
@@ -471,7 +498,7 @@
         else:
             encoding = space.str_w(w_encoding)
 
-        parser = W_XMLParserType(encoding, 0, space.newdict(),
+        parser = W_XMLParserType(encoding, 0, self.w_intern,
                                  _from_external_entity=True)
         parser.itself = XML_ExternalEntityParserCreate(self.itself,
                                                        context, encoding)
@@ -548,7 +575,10 @@
             self.buffer_w = None
 
     def get_intern(space, self):
-        return self.w_intern
+        if self.w_intern:
+            return self.w_intern
+        else:
+            return space.w_None
 
 
 def bool_property(name, cls, doc=None):
@@ -626,8 +656,12 @@
             space.wrap('ParserCreate() argument 2 must be string or None,'
                        ' not %s' % (type_name,)))
 
+    # Explicitly passing None means no interning is desired.
+    # Not passing anything means that a new dictionary is used.
     if w_intern is None:
         w_intern = space.newdict()
+    elif space.is_w(w_intern, space.w_None):
+        w_intern = None
 
     parser = W_XMLParserType(encoding, namespace_separator, w_intern)
     if not parser.itself:

Modified: pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py	(original)
+++ pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py	Wed Nov 10 08:26:29 2010
@@ -16,6 +16,15 @@
 
         raises(pyexpat.ExpatError, p.Parse, "3")
 
+    def test_intern(self):
+        import pyexpat
+        p = pyexpat.ParserCreate()
+        def f(*args): pass
+        p.StartElementHandler = f
+        p.EndElementHandler = f
+        p.Parse("<xml></xml>")
+        assert len(p.intern) == 1
+
     def test_set_buffersize(self):
         import pyexpat, sys
         p = pyexpat.ParserCreate()



More information about the Pypy-commit mailing list