[pypy-svn] r13085 - pypy/dist/pypy/translator/pickle

tismer at codespeak.net tismer at codespeak.net
Sun Jun 5 23:24:38 CEST 2005


Author: tismer
Date: Sun Jun  5 23:24:36 2005
New Revision: 13085

Modified:
   pypy/dist/pypy/translator/pickle/genpickle.py
Log:
lots, lots, lots of changes.

added domains list to distinguish between application domain
and globally known objects.

used inlining to most of constants, to avoid a huge amount of
needless global names.

The generated code is still too huge for the

Modified: pypy/dist/pypy/translator/pickle/genpickle.py
==============================================================================
--- pypy/dist/pypy/translator/pickle/genpickle.py	(original)
+++ pypy/dist/pypy/translator/pickle/genpickle.py	Sun Jun  5 23:24:36 2005
@@ -4,16 +4,22 @@
 to restart code generation after flowing and maybe
 annotation.
 """
+from __future__ import generators, division, nested_scopes
+import __future__
+all_feature_names = __future__.all_feature_names
 import os, sys, new, __builtin__
 
-from pypy.translator.gensupp import uniquemodulename, NameManager, UniqueList
+from pypy.translator.gensupp import uniquemodulename, NameManager
 from pypy.translator.gensupp import builtin_base
 from pypy.rpython.rarithmetic import r_int, r_uint
 from pypy.objspace.flow.model import Variable, Constant, SpaceOperation
 from pypy.objspace.flow.model import FunctionGraph, Block, Link
 from pypy.interpreter.baseobjspace import ObjSpace
+from pypy.translator.pickle import slotted
 
 from pickle import whichmodule, PicklingError
+from copy_reg import _reduce_ex, _reconstructor
+
 import pickle
 
 from types import *
@@ -24,22 +30,42 @@
 
 class GenPickle:
 
-    def __init__(self, translator):
+    def __init__(self, translator, outfile = None):
         self.translator = translator
-        self.initcode = UniqueList([
+        self.initcode = [
+            'from __future__ import %s\n' % ', '.join(all_feature_names) +
             'import new, types, sys',
-            ])
+            ]
 
         self.latercode = []    # list of generators generating extra lines
         self.debugstack = ()   # linked list of nested nameof()
-        self.initcode = UniqueList(self.initcode)
 
         self.picklenames = {Constant(None):  'None',
                             Constant(False): 'False',
                             Constant(True):  'True',
+                            # hack: overflowed float
+                            Constant(float("1e10000000000000000000000000000000")):
+                                'float("1e10000000000000000000000000000000")',
                             }
+        for name in all_feature_names + "new types sys".split():
+            self.picklenames[Constant(globals()[name])] = name
         self.namespace = NameManager()
         self.namespace.make_reserved_names('None False True')
+        self.namespace.make_reserved_names('new types sys')
+        self.namespace.make_reserved_names(' '.join(all_feature_names))
+        self.inline_consts = True # save lots of space
+        self._nesting = 0 # for formatting nested tuples etc.
+        # we distinguish between the "user program" and other stuff.
+        # "user program" will never use save_global.
+        self.domains = (
+            'pypy.objspace.std.',
+            'pypy._cache.',
+            'pypy.interpreter.',
+            'pypy.module.',
+            '__main__',
+            )
+        self.outfile = outfile
+        self._partition = 1234
 
     def nameof(self, obj, debug=None, namehint=None):
         key = Constant(obj)
@@ -58,7 +84,9 @@
             else:
                 for cls in type(obj).__mro__:
                     meth = getattr(self,
-                                   'nameof_' + cls.__name__.replace(' ', ''),
+                                   'nameof_' + ''.join( [
+                                       c for c in cls.__name__
+                                       if c.isalpha() or c == '_'] ),
                                    None)
                     if meth:
                         break
@@ -72,12 +100,27 @@
                     name = meth(obj)
             self.debugstack, x = self.debugstack
             assert x is stackentry
-            self.picklenames[key] = name
+            if name[0].isalpha():
+                # avoid to store things which are used just once
+                self.picklenames[key] = name
             return name
 
     def nameofargs(self, tup):
         """ a string with the nameofs, concatenated """
-        return ', '.join([self.nameof(arg) for arg in tup])
+        if len(tup) < 5:
+            # see if there is nesting to be expected
+            for each in tup:
+                if type(each) is tuple:
+                    break
+            else:
+                return ', '.join([self.nameof(arg) for arg in tup])
+        # we always wrap into multi-lines, this is simple and readable
+        self._nesting += 1
+        space = '  ' * self._nesting
+        ret = '\n' + space + (',\n' + space).join(
+            [self.nameof(arg) for arg in tup]) + ',\n' + space
+        self._nesting -= 1
+        return ret
 
     def uniquename(self, basename):
         return self.namespace.uniquename(basename)
@@ -96,8 +139,22 @@
 
     def nameof_module(self, value):
         # all allowed here, we reproduce ourselves
-        name = self.uniquename('mod%s'%value.__name__)
-        self.initcode_python(name, "__import__(%r)" % (value.__name__,))
+        if self.is_app_domain(value.__name__):
+            name = self.uniquename('gmod_%s' % value.__name__)
+            self.initcode.append('%s = new.module(%r)\n'
+                                 'sys.modules[%r] = %s'% (
+                name, value.__name__, value.__name__, name) )
+            def initmodule():
+                for k, v in value.__dict__.items():
+                    try:
+                        nv = self.nameof(v)
+                        yield '%s.%s = %s' % (name, k, nv)
+                    except PicklingError:
+                        pass
+            self.later(initmodule())
+        else:
+            name = self.uniquename(value.__name__)
+            self.initcode_python(name, "__import__(%r)" % (value.__name__,))
         return name
 
     def nameof_int(self, value):
@@ -109,11 +166,15 @@
     nameof_long = nameof_float = nameof_bool = nameof_NoneType = nameof_int
 
     def nameof_str(self, value):
+        if self.inline_consts:
+            return repr(value)
         name = self.uniquename('gstr_' + value[:32])
         self.initcode_python(name, repr(value))
         return name
 
     def nameof_unicode(self, value):
+        if self.inline_consts:
+            return repr(value)
         name = self.uniquename('guni_' + str(value[:32]))
         self.initcode_python(name, repr(value))
         return name
@@ -121,18 +182,9 @@
     def skipped_function(self, func):
         # debugging only!  Generates a placeholder for missing functions
         # that raises an exception when called.
-        if self.translator.frozen:
-            warning = 'NOT GENERATING'
-        else:
-            warning = 'skipped'
-        printable_name = '(%s:%d) %s' % (
-            func.func_globals.get('__name__', '?'),
-            func.func_code.co_firstlineno,
-            func.__name__)
-        print warning, printable_name
         name = self.uniquename('gskippedfunc_' + func.__name__)
-        self.initcode.append('def %s(*a,**k):' % name)
-        self.initcode.append('  raise NotImplementedError')
+        self.initcode.append('def %s(*a,**k):\n' 
+                             '  raise NotImplementedError' % name)
         return name
 
     def nameof_staticmethod(self, sm):
@@ -156,20 +208,6 @@
                 func, ob, typ))
             return name
 
-    # old version:
-    def should_translate_attr(self, pbc, attr):
-        ann = self.translator.annotator
-        if ann is None or isinstance(pbc, ObjSpace):
-            ignore = getattr(pbc.__class__, 'NOT_RPYTHON_ATTRIBUTES', [])
-            if attr in ignore:
-                return False
-            else:
-                return "probably"   # True
-        classdef = ann.getuserclasses().get(pbc.__class__)
-        if classdef and classdef.about_attribute(attr) is not None:
-            return True
-        return False
-
     # new version: save if we don't know
     def should_translate_attr(self, pbc, attr):
         ann = self.translator.annotator
@@ -210,7 +248,7 @@
 
     def nameof_classobj(self, cls):
         if cls.__doc__ and cls.__doc__.lstrip().startswith('NOT_RPYTHON'):
-            raise Exception, "%r should never be reached" % (cls,)
+            raise PicklingError, "%r should never be reached" % (cls,)
 
         try:
             return self.save_global(cls)
@@ -240,34 +278,45 @@
             content = cls.__dict__.items()
             content.sort()
             ignore = getattr(cls, 'NOT_RPYTHON_ATTRIBUTES', [])
+            isapp = self.is_app_domain(cls.__module__)
             for key, value in content:
                 if key.startswith('__'):
-                    if key in ['__module__', '__doc__', '__dict__',
+                    if key in ['__module__', '__doc__', '__dict__', '__slots__',
                                '__weakref__', '__repr__', '__metaclass__']:
                         continue
                     # XXX some __NAMES__ are important... nicer solution sought
                     #raise Exception, "unexpected name %r in class %s"%(key, cls)
-                if isinstance(value, staticmethod) and value.__get__(1) not in self.translator.flowgraphs and self.translator.frozen:
-                    print value
-                    continue
-                if isinstance(value, classmethod):
-                    doc = value.__get__(cls).__doc__
-                    if doc and doc.lstrip().startswith("NOT_RPYTHON"):
+                if isapp:
+                    if (isinstance(value, staticmethod) and value.__get__(1) not in
+                        self.translator.flowgraphs and self.translator.frozen):
+                        print value
+                        continue
+                    if isinstance(value, classmethod):
+                        doc = value.__get__(cls).__doc__
+                        if doc and doc.lstrip().startswith("NOT_RPYTHON"):
+                            continue
+                    if (isinstance(value, FunctionType) and value not in
+                        self.translator.flowgraphs and self.translator.frozen):
+                        print value
                         continue
-                if isinstance(value, FunctionType) and value not in self.translator.flowgraphs and self.translator.frozen:
-                    print value
-                    continue
                 if key in ignore:
                     continue
-                    
+                if type(value) in self.descriptor_filter:
+                    continue # this gets computed
+
                 yield '%s.%s = %s' % (name, key, self.nameof(value))
 
         baseargs = ", ".join(basenames)
         if baseargs:
             baseargs = '(%s)' % baseargs
-        self.initcode.append('class %s%s:' % (name, baseargs))
-        self.initcode.append('  __metaclass__ = %s' % metaclass)
-        self.later(initclassobj())
+        ini = 'class %s%s:\n  __metaclass__ = %s' % (name, baseargs, metaclass)
+        if '__slots__' in cls.__dict__:
+            ini += '\n  __slots__ = %r' % cls.__slots__
+        self.initcode.append(ini)
+        # squeeze it out, now# self.later(initclassobj())
+        self.picklenames[Constant(cls)] = name
+        for line in initclassobj():
+            self.initcode.append(line)
         return name
 
     nameof_class = nameof_classobj   # for Python 2.2
@@ -285,7 +334,7 @@
         # type 'builtin_function_or_method':
         type(len): 'type(len)',
         # type 'method_descriptor':
-        type(list.append): 'type(list.append)',
+        type(type.__reduce__): 'type(type.__reduce__)',
         # type 'wrapper_descriptor':
         type(type(None).__repr__): 'type(type(None).__repr__)',
         # type 'getset_descriptor':
@@ -295,7 +344,12 @@
         # type 'instancemethod':
         type(Exception().__init__): 'type(Exception().__init__)',
         }
-
+    descriptor_filter = {}
+    for _key in typename_mapping.keys():
+        if _key.__name__.endswith('descriptor'):
+            descriptor_filter[_key] = True
+    del _key
+    
     def nameof_type(self, cls):
         if cls.__module__ != '__builtin__':
             return self.nameof_classobj(cls)   # user-defined type
@@ -315,6 +369,14 @@
         return name
 
     def nameof_tuple(self, tup):
+        # instead of defining myriads of tuples, it seems to
+        # be cheaper to create them inline, although they don't
+        # get constant folded like strings and numbers.
+        if self.inline_consts:
+            argstr = self.nameofargs(tup)
+            if len(tup) == 1 and not argstr.rstrip().endswith(','):
+                argstr += ','
+            return '(%s)' % argstr
         name = self.uniquename('g%dtuple' % len(tup))
         args = [self.nameof(x) for x in tup]
         args = ', '.join(args)
@@ -326,15 +388,21 @@
     def nameof_list(self, lis):
         name = self.uniquename('g%dlist' % len(lis))
         def initlist():
-            extname = self.nameof(extendlist)
-            for i in range(0, len(lis), 5):
-                items = lis[i:i+5]
+            chunk = 20
+            for i in range(0, len(lis), chunk):
+                items = lis[i:i+chunk]
                 itemstr = self.nameofargs(items)
-                yield '%s(%s, %s)' % (extname, name, itemstr)
+                yield '%s.extend([%s])' % (name, itemstr)
         self.initcode_python(name, '[]')
         self.later(initlist())
         return name
 
+    def is_app_domain(self, modname):
+        for domain in self.domains:
+            if modname.startswith(domain):
+                return True
+        return False
+
     def nameof_dict(self, dic):
         if '__name__' in dic:
             module = dic['__name__']
@@ -344,7 +412,7 @@
             except (ImportError, KeyError, TypeError):
                 pass
             else:
-                if dic is mod.__dict__:
+                if dic is mod.__dict__ and not self.is_app_domain(module):
                     dictname = module.split('.')[-1] + '__dict__'
                     dictname = self.uniquename(dictname)
                     self.initcode.append('from %s import __dict__ as %s' % (
@@ -354,11 +422,14 @@
         name = self.uniquename('g%ddict' % len(dic))
         def initdict():
             for k in dic:
-                if type(k) is str:
-                    yield '%s[%r] = %s' % (name, k, self.nameof(dic[k]))
-                else:
-                    yield '%s[%s] = %s' % (name, self.nameof(k),
-                                           self.nameof(dic[k]))
+                try:
+                    if type(k) is str:
+                        yield '%s[%r] = %s' % (name, k, self.nameof(dic[k]))
+                    else:
+                        yield '%s[%s] = %s' % (name, self.nameof(k),
+                                               self.nameof(dic[k]))
+                except PicklingError:
+                    pass
         self.initcode_python(name, '{}')
         self.later(initdict())
         return name
@@ -388,14 +459,11 @@
         def initinstance():
             if hasattr(instance, '__setstate__'):
                 # the instance knows what to do
-                if type(restorestate) is tuple:
-                    # be a little shorter
-                    setstatename = self.nameof(setstate)
-                    argstr = self.nameofargs(restorestate)
-                    yield '%s(%s, %s)' % (setstatename, name, argstr)
-                else:
-                    args = self.nameof(restorestate)
-                    yield '%s.__setstate__(%s)' % (name, args)
+                args = self.nameof(restorestate)
+                yield '%s.__setstate__(%s)' % (name, args)
+                return
+            elif type(restorestate) is tuple:
+                slotted.__setstate__(instance, restorestate)
                 return
             assert type(restorestate) is dict, (
                 "%s has no dict and no __setstate__" % name)
@@ -403,23 +471,44 @@
             content.sort()
             for key, value in content:
                 if self.should_translate_attr(instance, key):
+                    if hasattr(value, '__doc__'):
+                        doc = value.__doc__
+                        if type(doc) is str and doc.lstrip().startswith('NOT_RPYTHON'):
+                            continue
                     line = '%s.%s = %s' % (name, key, self.nameof(value))
                     yield line
         if hasattr(instance, '__reduce_ex__'):
-            reduced = instance.__reduce_ex__()
-            restorer = reduced[0]
-            restorename = self.nameof(restorer)
-            restoreargs = reduced[1]
-            if len(reduced) > 2:
-                restorestate = reduced[2]
+            try:
+                reduced = instance.__reduce_ex__()
+            except TypeError:
+                # oops! slots and no __getstate__?
+                if not (hasattr(instance, '__slots__')
+                        and not hasattr(instance, '__getstate__') ):
+                    print "PROBLEM:", instance
+                    raise
+                assert not hasattr(instance, '__dict__'), ('wrong assumptions'
+                    ' about __slots__ in %s instance without __setstate__,'
+                    ' please update %s' % (cls.__name__, __name__) )
+                restorestate = slotted.__getstate__(instance)
+                restorer = _reconstructor
+                restoreargs = klass, object, None
             else:
-                restorestate = None
+                restorer = reduced[0]
+                restoreargs = reduced[1]
+                if len(reduced) > 2:
+                    restorestate = reduced[2]
+                else:
+                    restorestate = None
+            restorename = self.nameof(restorer)
             # ignore possible dict, handled later by initinstance filtering
             # in other cases, we expect that the class knows what to pickle.
         else:
             restoreargs = (base, cls)
             restorename = '%s.__new__' % base
-            restorestate = instance.__dict__
+            if hasattr(instance, '__getstate__'):
+                restorestate = instance.__getstate__()
+            else:
+                restorestate = instance.__dict__
         restoreargstr = self.nameofargs(restoreargs)
         if isinstance(klass, type):
             self.initcode.append('%s = %s(%s)' % (name, restorename,
@@ -438,7 +527,10 @@
             module = getattr(obj, "__module__", None)
             if module is None:
                 module = whichmodule(obj, name)
-
+            if self.is_app_domain(module):
+                # not allowed to import this
+                raise PicklingError('%s belongs to the user program' %
+                                    name)
             try:
                 __import__(module)
                 mod = sys.modules[module]
@@ -467,7 +559,10 @@
         # look for skipped functions
         if self.translator.frozen:
             if func not in self.translator.flowgraphs:
-                return self.skipped_function(func)
+                # see if this is in translator's domain
+                module = whichmodule(func, func.__name__)
+                if self.is_app_domain(module):
+                    return self.skipped_function(func)
         else:
             if (func.func_doc and
                 func.func_doc.lstrip().startswith('NOT_RPYTHON')):
@@ -479,11 +574,17 @@
         args = (func.func_code, func.func_globals, func.func_name,
                 func.func_defaults, func.func_closure)
         pyfuncobj = self.uniquename('gfunc_' + func.__name__)
+        # touch code,to avoid extra indentation
+        self.nameof(func.func_code)
         self.initcode.append('%s = new.function(%s)' % (pyfuncobj,
                              self.nameofargs(args)) )
         if func.__dict__:
-            self.initcode.append('%s.__dict__.update(%s)' % (
-                pyfuncobj, self.nameof(func.__dict__)) )
+            for k, v in func.__dict__.items():
+                try:
+                    self.initcode.append('%s.%s = %s' % (
+                        pyfuncobj, k, self.nameof(v)) )
+                except PicklingError:
+                    pass
         return pyfuncobj
 
     def nameof_cell(self, cel):
@@ -493,25 +594,32 @@
                                               self.nameof(obj)) )
         return pycell
 
+    def nameof_property(self, prop):
+        pyprop = self.uniquename('gprop_')
+        self.initcode.append('%s = property(%s)' % (pyprop, self.nameofargs(
+            (prop.fget, prop.fset, prop.fdel, prop.__doc__))) )
+        return pyprop
+
     def nameof_code(self, code):
         args = (code.co_argcount, code.co_nlocals, code.co_stacksize,
                 code.co_flags, code.co_code, code.co_consts, code.co_names,
                 code.co_varnames, code.co_filename, code.co_name,
                 code.co_firstlineno, code.co_lnotab, code.co_freevars,
                 code.co_cellvars)
-        # make the code, filename and lnotab strings nicer
-        codestr = code.co_code
-        codestrname = self.uniquename('gcodestr_' + code.co_name)
-        self.picklenames[Constant(codestr)] = codestrname
-        self.initcode.append('%s = %r' % (codestrname, codestr))
-        fnstr = code.co_filename
-        fnstrname = self.uniquename('gfname_' + code.co_name)
-        self.picklenames[Constant(fnstr)] = fnstrname
-        self.initcode.append('%s = %r' % (fnstrname, fnstr))
-        lnostr = code.co_lnotab
-        lnostrname = self.uniquename('glnotab_' + code.co_name)
-        self.picklenames[Constant(lnostr)] = lnostrname
-        self.initcode.append('%s = %r' % (lnostrname, lnostr))
+        if not self.inline_consts:
+            # make the code, filename and lnotab strings nicer
+            codestr = code.co_code
+            codestrname = self.uniquename('gcodestr_' + code.co_name)
+            self.picklenames[Constant(codestr)] = codestrname
+            self.initcode.append('%s = %r' % (codestrname, codestr))
+            fnstr = code.co_filename
+            fnstrname = self.uniquename('gfname_' + code.co_name)
+            self.picklenames[Constant(fnstr)] = fnstrname
+            self.initcode.append('%s = %r' % (fnstrname, fnstr))
+            lnostr = code.co_lnotab
+            lnostrname = self.uniquename('glnotab_' + code.co_name)
+            self.picklenames[Constant(lnostr)] = lnostrname
+            self.initcode.append('%s = %r' % (lnostrname, lnostr))
         argstr = self.nameofargs(args)
         codeobj = self.uniquename('gcode_' + code.co_name)
         self.initcode.append('%s = new.code(%s)' % (codeobj, argstr))
@@ -523,9 +631,36 @@
         if fil is sys.stderr: return "sys.stderr"
         raise Exception, 'Cannot translate an already-open file: %r' % (fil,)
 
+    def nameof_methodwrapper(self, wp):
+        # this object should be enhanced in CPython!
+        reprwp = repr(wp)
+        name = wp.__name__
+        def dummy_methodwrapper():
+            return reprwp + (': method %s of unknown object '
+                'cannot be reconstructed, sorry!' % name )
+        return self.nameof(dummy_methodwrapper)
+
     def later(self, gen):
         self.latercode.append((gen, self.debugstack))
 
+    def spill_source(self, final):
+        def write_block(lines):
+            if not lines:
+                return
+            txt = '\n'.join(lines)
+            print >> self.outfile, txt
+            print >> self.outfile, '## SECTION ##'
+
+        if not self.outfile:
+            return
+        chunk = self._partition
+        while len(self.initcode) >= chunk:
+            write_block(self.initcode[:chunk])
+            del self.initcode[:chunk]
+        if final and self.initcode:
+            write_block(self.initcode)
+            del self.initcode[:]
+
     def collect_initcode(self):
         while self.latercode:
             gen, self.debugstack = self.latercode.pop()
@@ -533,6 +668,9 @@
             for line in gen:
                 self.initcode.append(line)
             self.debugstack = ()
+            if len(self.initcode) >= self._partition:
+                self.spill_source(False)
+        self.spill_source(True)
 
     def getfrozenbytecode(self):
         self.initcode.append('')
@@ -563,11 +701,3 @@
             func.func_defaults, (cel,))
     func = new.function(*args)
     return func()
-
-# save creation of many tuples
-
-def setstate(obj, *args):
-    obj.__setstate__(args)
-
-def extendlist(obj, *args):
-    obj.extend(args)



More information about the Pypy-commit mailing list