[pypy-commit] pypy py3.6-sandbox-2: hg merge sandbox-2

arigo pypy.commits at gmail.com
Sun Aug 11 14:56:04 EDT 2019


Author: Armin Rigo <arigo at tunes.org>
Branch: py3.6-sandbox-2
Changeset: r97149:67130f4a3a0c
Date: 2019-08-11 20:54 +0200
http://bitbucket.org/pypy/pypy/changeset/67130f4a3a0c/

Log:	hg merge sandbox-2

diff too long, truncating to 2000 out of 2641 lines

diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -44,6 +44,12 @@
     #" _ssl", "_hashlib", "crypt"
 ])
 
+# --sandbox
+sandbox_modules = default_modules.copy()
+sandbox_modules.update([
+    "array", "binascii",
+])
+
 import rpython.rlib.rvmprof.cintf
 if rpython.rlib.rvmprof.cintf.IS_SUPPORTED:
     working_modules.add('_vmprof')
@@ -271,7 +277,7 @@
 def enable_allworkingmodules(config):
     modules = working_modules.copy()
     if config.translation.sandbox:
-        modules = default_modules
+        modules = sandbox_modules.copy()
     if config.translation.reverse_debugger:
         for mod in reverse_debugger_disable_modules:
             setattr(config.objspace.usemodules, mod, False)
diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -326,9 +326,7 @@
             config.translation.jit = True
 
         if config.translation.sandbox:
-            assert 0, ("--sandbox is not tested nor maintained.  If you "
-                       "really want to try it anyway, remove this line in "
-                       "pypy/goal/targetpypystandalone.py.")
+            config.objspace.lonepycfiles = False
 
         if config.objspace.usemodules.cpyext:
             if config.translation.gc not in ('incminimark', 'boehm'):
@@ -387,6 +385,8 @@
         from pypy.module.gc.hook import LowLevelGcHooks
         if self.space is None:
             raise Exception("get_gchooks must be called after get_entry_point")
+        if self.space.config.translation.sandbox:
+            return None
         return self.space.fromcache(LowLevelGcHooks)
 
     def get_entry_point(self, config):
diff --git a/pypy/module/gc/moduledef.py b/pypy/module/gc/moduledef.py
--- a/pypy/module/gc/moduledef.py
+++ b/pypy/module/gc/moduledef.py
@@ -16,7 +16,11 @@
 
     def __init__(self, space, w_name):
         if (not space.config.translating or
-                space.config.translation.gctransformer == "framework"):
+                (space.config.translation.gctransformer == "framework"
+                 and not space.config.translation.sandbox)):
+            # some of these functions allow app-level code to do invalid
+            # things by trying hard enough.  For safety, in sandbox mode
+            # we don't provide any of them.
             self.appleveldefs.update({
                 'dump_rpy_heap': 'app_referents.dump_rpy_heap',
                 'get_stats': 'app_referents.get_stats',
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -66,6 +66,13 @@
     from rpython.rlib.rgc import increase_root_stack_depth
     if new_limit <= 0:
         raise oefmt(space.w_ValueError, "recursion limit must be positive")
+    #
+    if space.config.translation.sandbox:
+        if new_limit > space.sys.recursionlimit:
+            msg = "sandbox: cannot increase the recursion limit" 
+            space.warn(space.newtext(msg), space.w_RuntimeWarning)
+        return
+    #
     try:
         _stack_set_length_fraction(new_limit * 0.001)
         _stack_check_noinline()
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -316,15 +316,15 @@
 TM_P = lltype.Ptr(tm)
 c_time = external('time', [rffi.TIME_TP], rffi.TIME_T)
 c_gmtime = external('gmtime', [rffi.TIME_TP], TM_P,
-                    save_err=rffi.RFFI_SAVE_ERRNO)
-c_mktime = external('mktime', [TM_P], rffi.TIME_T)
+                    save_err=rffi.RFFI_SAVE_ERRNO, sandboxsafe=True)
+c_mktime = external('mktime', [TM_P], rffi.TIME_T, sandboxsafe=True)
 c_localtime = external('localtime', [rffi.TIME_TP], TM_P,
-                       save_err=rffi.RFFI_SAVE_ERRNO)
+                       save_err=rffi.RFFI_SAVE_ERRNO, sandboxsafe=True)
 if HAS_CLOCK_GETTIME:
     from rpython.rlib.rtime import TIMESPEC, c_clock_gettime
     from rpython.rlib.rtime import c_clock_settime, c_clock_getres
 if _POSIX:
-    c_tzset = external('tzset', [], lltype.Void)
+    c_tzset = external('tzset', [], lltype.Void, sandboxsafe=True)
 if _WIN:
     win_eci = ExternalCompilationInfo(
         includes = ["time.h"],
@@ -363,7 +363,7 @@
                             rffi.INT, win_eci, calling_conv='c')
 
 c_strftime = external('strftime', [rffi.CCHARP, rffi.SIZE_T, rffi.CCHARP, TM_P],
-                      rffi.SIZE_T)
+                      rffi.SIZE_T, sandboxsafe=True)
 
 def _init_timezone(space):
     timezone = daylight = altzone = 0
@@ -853,7 +853,7 @@
     rffi.setintfield(buf_value, "c_tm_year",
                      rffi.getintfield(buf_value, "c_tm_year") - 1900)
 
-    if _WIN:
+    if _WIN or space.config.translation.sandbox:
         # check that the format string contains only valid directives
         length = len(format)
         i = 0
diff --git a/rpython/annotator/policy.py b/rpython/annotator/policy.py
--- a/rpython/annotator/policy.py
+++ b/rpython/annotator/policy.py
@@ -72,29 +72,3 @@
         for callback in bk.pending_specializations:
             callback()
         del bk.pending_specializations[:]
-        if annotator.added_blocks is not None:
-            all_blocks = annotator.added_blocks
-        else:
-            all_blocks = annotator.annotated
-        for block in list(all_blocks):
-            for i, instr in enumerate(block.operations):
-                if not isinstance(instr, (op.simple_call, op.call_args)):
-                    continue
-                v_func = instr.args[0]
-                s_func = annotator.annotation(v_func)
-                if not hasattr(s_func, 'needs_sandboxing'):
-                    continue
-                key = ('sandboxing', s_func.const)
-                if key not in bk.emulated_pbc_calls:
-                    params_s = s_func.args_s
-                    s_result = s_func.s_result
-                    from rpython.translator.sandbox.rsandbox import make_sandbox_trampoline
-                    sandbox_trampoline = make_sandbox_trampoline(
-                        s_func.name, params_s, s_result)
-                    sandbox_trampoline._signature_ = [SomeTuple(items=params_s)], s_result
-                    bk.emulate_pbc_call(key, bk.immutablevalue(sandbox_trampoline), params_s)
-                else:
-                    s_trampoline = bk.emulated_pbc_calls[key][0]
-                    sandbox_trampoline = s_trampoline.const
-                new = instr.replace({instr.args[0]: Constant(sandbox_trampoline)})
-                block.operations[i] = new
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -115,8 +115,7 @@
     BoolOption("sandbox", "Produce a fully-sandboxed executable",
                default=False, cmdline="--sandbox",
                requires=[("translation.thread", False)],
-               suggests=[("translation.gc", "generation"),
-                         ("translation.gcrootfinder", "shadowstack")]),
+               suggests=[]),
     BoolOption("rweakref", "The backend supports RPython-level weakrefs",
                default=True),
 
diff --git a/rpython/memory/gc/env.py b/rpython/memory/gc/env.py
--- a/rpython/memory/gc/env.py
+++ b/rpython/memory/gc/env.py
@@ -132,7 +132,10 @@
 # ---------- Linux2 ----------
 
 def get_L2cache_linux2():
-    arch = os.uname()[4]  # machine
+    try:
+        arch = os.uname()[4]  # machine
+    except OSError:   # we may simulate a failure from sandboxing, for example
+        return -1
     if arch.endswith('86') or arch == 'x86_64':
         return get_L2cache_linux2_cpuinfo()
     if arch in ('alpha', 'ppc'):
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -519,10 +519,21 @@
             bigobj = self.nonlarge_max + 1
             self.max_number_of_pinned_objects = self.nursery_size / (bigobj * 2)
 
+    def safer_variant(self):
+        # When running in sandbox mode, turn off two features: incrementality
+        # and object pinning.  This should be done in a way that cannot *add*
+        # any security bug, but it could in theory avoid bugs in this complex
+        # logic.
+        return self.config.sandbox
+
     def enable(self):
         self.enabled = True
 
     def disable(self):
+        if self.safer_variant():
+            # gc.disable() is ignored in this mode.  It should not be
+            # allowed to disable major collections.
+            return
         self.enabled = False
 
     def isenabled(self):
@@ -763,6 +774,16 @@
     def collect(self, gen=2):
         """Do a minor (gen=0), start a major (gen=1), or do a full
         major (gen>=2) collection."""
+        self.check_safe_gc_state()
+        if self.safer_variant():
+            # gen < 0 is dangerous, and gen == 1 leaves the GC in the
+            # middle of a major collection.  We disable these two modes
+            # in the safer variant.
+            if gen <= 0:
+                gen = 0
+            else:
+                gen = 2
+        #
         if gen < 0:
             # Dangerous! this makes no progress on the major GC cycle.
             # If called too often, the memory usage will keep increasing,
@@ -786,6 +807,7 @@
             # This does a complete minor and major collection.
             self.minor_and_major_collection()
         self.rrc_invoke_callback()
+        self.check_safe_gc_state()
 
     def collect_step(self):
         """
@@ -795,12 +817,26 @@
         This is meant to be used together with gc.disable(), to have a
         fine-grained control on when the GC runs.
         """
+        # This function should never be called in safer_variant() mode,
+        # because it leaves the GC in the middle of an incremental step.
+        # In PyPy the function gc.collect_step() is removed from --sandbox.
+        if self.safer_variant():
+            out_of_memory("sandbox: collect_step() has been disabled")
+            return False
+        #
         old_state = self.gc_state
         self._minor_collection()
         self.major_collection_step()
         self.rrc_invoke_callback()
         return rgc._encode_states(old_state, self.gc_state)
 
+    def check_safe_gc_state(self):
+        if self.safer_variant():
+            # in this variant, gc_state should always be SCANNING when the
+            # mutator runs
+            if self.gc_state != STATE_SCANNING:
+                out_of_memory("sandbox: unexpected internal GC state")
+
     def minor_collection_with_major_progress(self, extrasize=0,
                                              force_enabled=False):
         """Do a minor collection.  Then, if the GC is enabled and there
@@ -808,6 +844,7 @@
         step.  If there is no major GC but the threshold is reached, start a
         major GC.
         """
+        self.check_safe_gc_state()
         self._minor_collection()
         if not self.enabled and not force_enabled:
             return
@@ -826,6 +863,10 @@
         if self.gc_state != STATE_SCANNING or self.threshold_reached(extrasize):
             self.major_collection_step(extrasize)
 
+            if self.safer_variant():
+                # finish the just-started major collection immediately
+                self.gc_step_until(STATE_SCANNING)
+
             # See documentation in major_collection_step() for target invariants
             while self.gc_state != STATE_SCANNING:    # target (A1)
                 threshold = self.threshold_objects_made_old
@@ -840,6 +881,7 @@
                 self.major_collection_step(extrasize)
 
         self.rrc_invoke_callback()
+        self.check_safe_gc_state()
 
 
     def collect_and_reserve(self, totalsize):
@@ -1098,6 +1140,8 @@
         return self.is_in_nursery(obj)
 
     def pin(self, obj):
+        if self.safer_variant():    # no pinning in the safer variant
+            return False
         if self.pinned_objects_in_nursery >= self.max_number_of_pinned_objects:
             return False
         if not self.is_in_nursery(obj):
@@ -3074,6 +3118,11 @@
 
     def rawrefcount_init(self, dealloc_trigger_callback):
         # see pypy/doc/discussion/rawrefcount.rst
+        if self.safer_variant():
+            # note that the rawrefcount_xxx() functions should not be callable
+            # if sandbox is enabled; see gctransform/framework.py
+            out_of_memory("sandbox: rawrefcount_init() not supported")
+            return
         if not self.rrc_enabled:
             self.rrc_p_list_young = self.AddressStack()
             self.rrc_p_list_old   = self.AddressStack()
diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -479,7 +479,8 @@
                                             annmodel.SomeInteger(nonneg=True)],
                                            annmodel.s_None)
 
-        if hasattr(GCClass, 'rawrefcount_init'):
+        if (hasattr(GCClass, 'rawrefcount_init')
+                and not self.translator.config.translation.sandbox):
             self.rawrefcount_init_ptr = getfn(
                 GCClass.rawrefcount_init,
                 [s_gc, SomePtr(GCClass.RAWREFCOUNT_DEALLOC_TRIGGER)],
diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py
--- a/rpython/rlib/debug.py
+++ b/rpython/rlib/debug.py
@@ -6,7 +6,6 @@
 from rpython.rtyper.extregistry import ExtRegistryEntry
 from rpython.rlib.objectmodel import we_are_translated, always_inline
 from rpython.rlib.rarithmetic import is_valid_int, r_longlong
-from rpython.rtyper.extfunc import register_external
 from rpython.rtyper.lltypesystem import lltype
 from rpython.rtyper.lltypesystem import rffi
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
@@ -460,7 +459,10 @@
 
 
 def attach_gdb():
-    import pdb; pdb.set_trace()
+    if not we_are_translated():
+        import pdb; pdb.set_trace()
+    else:
+        impl_attach_gdb()
 
 if not sys.platform.startswith('win'):
     if sys.platform.startswith('linux'):
@@ -586,11 +588,8 @@
         d['separate_module_files'] = [cppfile]
         return ExternalCompilationInfo(**d)
 
-    ll_attach = rffi.llexternal("AttachToVS", [], lltype.Void,
-                                compilation_info=make_vs_attach_eci())
+    #ll_attach = rffi.llexternal("AttachToVS", [], lltype.Void,
+    #                            compilation_info=make_vs_attach_eci())
     def impl_attach_gdb():
         #ll_attach()
         print "AttachToVS is disabled at the moment (compilation failure)"
-
-register_external(attach_gdb, [], result=None,
-                  export_name="impl_attach_gdb", llimpl=impl_attach_gdb)
diff --git a/rpython/rlib/entrypoint.py b/rpython/rlib/entrypoint.py
--- a/rpython/rlib/entrypoint.py
+++ b/rpython/rlib/entrypoint.py
@@ -41,7 +41,9 @@
     return deco
 
 
-pypy_debug_catch_fatal_exception = rffi.llexternal('pypy_debug_catch_fatal_exception', [], lltype.Void)
+pypy_debug_catch_fatal_exception = rffi.llexternal(
+    'pypy_debug_catch_fatal_exception', [], lltype.Void,
+    sandboxsafe=True)
 
 def entrypoint_highlevel(key, argtypes, c_name=None):
     """
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -311,19 +311,12 @@
 def sc_we_are_translated(ctx):
     return Constant(True)
 
-def register_replacement_for(replaced_function, sandboxed_name=None):
+def register_replacement_for(replaced_function):
     def wrap(func):
         from rpython.rtyper.extregistry import ExtRegistryEntry
-        # to support calling func directly
-        func._sandbox_external_name = sandboxed_name
         class ExtRegistry(ExtRegistryEntry):
             _about_ = replaced_function
             def compute_annotation(self):
-                if sandboxed_name:
-                    config = self.bookkeeper.annotator.translator.config
-                    if config.translation.sandbox:
-                        func._sandbox_external_name = sandboxed_name
-                        func._dont_inline_ = True
                 return self.bookkeeper.immutablevalue(func)
         return func
     return wrap
diff --git a/rpython/rlib/rfloat.py b/rpython/rlib/rfloat.py
--- a/rpython/rlib/rfloat.py
+++ b/rpython/rlib/rfloat.py
@@ -5,7 +5,6 @@
 
 from rpython.annotator.model import SomeString, SomeChar
 from rpython.rlib import objectmodel, unroll
-from rpython.rtyper.extfunc import register_external
 from rpython.rtyper.tool import rffi_platform
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rlib.objectmodel import not_rpython
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -461,9 +461,7 @@
     func = getattr(os, name, None)
     if func is None:
         return lambda f: f
-    return register_replacement_for(
-        func,
-        sandboxed_name='ll_os.ll_os_%s' % name)
+    return register_replacement_for(func)
 
 @specialize.arg(0)
 def handle_posix_error(name, result):
@@ -1081,7 +1079,7 @@
     # for more details. If this get's fixed we can use lltype.Signed
     # again.  (The exact same issue occurs on ppc64 big-endian.)
     c_func = external(name, [rffi.INT], lltype.Signed,
-                      macro=_MACRO_ON_POSIX)
+                      macro=_MACRO_ON_POSIX, sandboxsafe=True)
     returning_int = name in ('WEXITSTATUS', 'WSTOPSIG', 'WTERMSIG')
 
     @replace_os_function(name)
@@ -1992,9 +1990,12 @@
 
 if sys.platform != 'win32':
     # These are actually macros on some/most systems
-    c_makedev = external('makedev', [rffi.INT, rffi.INT], rffi.INT, macro=True)
-    c_major = external('major', [rffi.INT], rffi.INT, macro=True)
-    c_minor = external('minor', [rffi.INT], rffi.INT, macro=True)
+    c_makedev = external('makedev', [rffi.INT, rffi.INT], rffi.INT, macro=True,
+                         sandboxsafe=True)
+    c_major = external('major', [rffi.INT], rffi.INT, macro=True,
+                       sandboxsafe=True)
+    c_minor = external('minor', [rffi.INT], rffi.INT, macro=True,
+                       sandboxsafe=True)
 
     @replace_os_function('makedev')
     def makedev(maj, min):
diff --git a/rpython/rlib/rposix_environ.py b/rpython/rlib/rposix_environ.py
--- a/rpython/rlib/rposix_environ.py
+++ b/rpython/rlib/rposix_environ.py
@@ -5,7 +5,6 @@
 from rpython.rlib.objectmodel import enforceargs
 # importing rposix here creates a cycle on Windows
 from rpython.rtyper.controllerentry import Controller
-from rpython.rtyper.extfunc import register_external
 from rpython.rtyper.lltypesystem import rffi, lltype
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 
@@ -97,9 +96,6 @@
 # Lower-level interface: dummy placeholders and external registations
 
 def r_envkeys():
-    just_a_placeholder
-
-def envkeys_llimpl():
     environ = os_get_environ()
     result = []
     i = 0
@@ -111,10 +107,6 @@
         i += 1
     return result
 
-register_external(r_envkeys, [], [str0],   # returns a list of strings
-                  export_name='ll_os.ll_os_envkeys',
-                  llimpl=envkeys_llimpl)
-
 # ____________________________________________________________
 
 def r_envitems():
@@ -190,18 +182,7 @@
 
     return envitems_llimpl, getenv_llimpl, putenv_llimpl
 
-envitems_llimpl, getenv_llimpl, putenv_llimpl = make_env_impls()
-
-register_external(r_envitems, [], [(str0, str0)],
-                  export_name='ll_os.ll_os_envitems',
-                  llimpl=envitems_llimpl)
-register_external(r_getenv, [str0],
-                  annmodel.SomeString(can_be_None=True, no_nul=True),
-                  export_name='ll_os.ll_os_getenv',
-                  llimpl=getenv_llimpl)
-register_external(r_putenv, [str0, str0], annmodel.s_None,
-                  export_name='ll_os.ll_os_putenv',
-                  llimpl=putenv_llimpl)
+r_envitems, r_getenv, r_putenv = make_env_impls()
 
 # ____________________________________________________________
 
@@ -215,7 +196,7 @@
     os_unsetenv = llexternal('unsetenv', [rffi.CCHARP], rffi.INT,
                                   save_err=rffi.RFFI_SAVE_ERRNO)
 
-    def unsetenv_llimpl(name):
+    def r_unsetenv(name):
         with rffi.scoped_str2charp(name) as l_name:
             error = rffi.cast(lltype.Signed, os_unsetenv(l_name))
         if error:
@@ -229,7 +210,4 @@
             del envkeepalive.byname[name]
             rffi.free_charp(l_oldstring)
 
-    register_external(r_unsetenv, [str0], annmodel.s_None,
-                      export_name='ll_os.ll_os_unsetenv',
-                      llimpl=unsetenv_llimpl)
     REAL_UNSETENV = True
diff --git a/rpython/rlib/rtime.py b/rpython/rlib/rtime.py
--- a/rpython/rlib/rtime.py
+++ b/rpython/rlib/rtime.py
@@ -103,9 +103,7 @@
     func = getattr(pytime, name, None)
     if func is None:
         return lambda f: f
-    return register_replacement_for(
-        func,
-        sandboxed_name='ll_time.ll_time_%s' % name)
+    return register_replacement_for(func)
 
 config = rffi_platform.configure(CConfig)
 globals().update(config)
diff --git a/rpython/rtyper/extfunc.py b/rpython/rtyper/extfunc.py
--- a/rpython/rtyper/extfunc.py
+++ b/rpython/rtyper/extfunc.py
@@ -95,9 +95,7 @@
     def compute_annotation(self):
         s_result = SomeExternalFunction(
             self.name, self.signature_args, self.signature_result)
-        if (self.bookkeeper.annotator.translator.config.translation.sandbox
-                and not self.safe_not_sandboxed):
-            s_result.needs_sandboxing = True
+        assert self.safe_not_sandboxed
         return s_result
 
 
@@ -113,6 +111,12 @@
     sandboxsafe: use True if the function performs no I/O (safe for --sandbox)
     """
 
+    if not sandboxsafe:
+        raise Exception("Don't use the outdated register_external() protocol "
+                        "to invoke external function; use instead "
+                        "rffi.llexternal().  The old register_external() is "
+                        "now only supported with safeboxsafe=True.")
+
     if export_name is None:
         export_name = function.__name__
     params_s = [annotation(arg) for arg in args]
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -1405,12 +1405,14 @@
             lltype.Void,
             releasegil=False,
             calling_conv='c',
+            sandboxsafe=True,
         )
 c_memset = llexternal("memset",
             [VOIDP, lltype.Signed, SIZE_T],
             lltype.Void,
             releasegil=False,
             calling_conv='c',
+            sandboxsafe=True,
         )
 
 
diff --git a/rpython/rtyper/rtyper.py b/rpython/rtyper/rtyper.py
--- a/rpython/rtyper/rtyper.py
+++ b/rpython/rtyper/rtyper.py
@@ -29,7 +29,6 @@
 from rpython.rtyper.rclass import RootClassRepr
 from rpython.tool.pairtype import pair
 from rpython.translator.unsimplify import insert_empty_block
-from rpython.translator.sandbox.rsandbox import make_sandbox_trampoline
 
 
 class RTyperBackend(object):
@@ -569,17 +568,6 @@
     def getcallable(self, graph):
         def getconcretetype(v):
             return self.bindingrepr(v).lowleveltype
-        if self.annotator.translator.config.translation.sandbox:
-            try:
-                name = graph.func._sandbox_external_name
-            except AttributeError:
-                pass
-            else:
-                args_s = [v.annotation for v in graph.getargs()]
-                s_result = graph.getreturnvar().annotation
-                sandboxed = make_sandbox_trampoline(name, args_s, s_result)
-                return self.getannmixlevel().delayedfunction(
-                        sandboxed, args_s, s_result)
 
         return getfunctionptr(graph, getconcretetype)
 
diff --git a/rpython/rtyper/test/test_extfunc.py b/rpython/rtyper/test/test_extfunc.py
--- a/rpython/rtyper/test/test_extfunc.py
+++ b/rpython/rtyper/test/test_extfunc.py
@@ -18,7 +18,7 @@
             "NOT_RPYTHON"
             return eval("x+40")
 
-        register_external(b, [int], result=int)
+        register_external(b, [int], result=int, sandboxsafe=True)
 
         def f():
             return b(2)
@@ -42,7 +42,7 @@
             return y + x
 
         register_external(c, [int, int], result=int, llimpl=llimpl,
-                          export_name='ccc')
+                          export_name='ccc', sandboxsafe=True)
 
         def f():
             return c(3, 4)
@@ -62,7 +62,8 @@
             tuple as an argument so that register_external's behavior for
             tuple-taking functions can be verified.
             """
-        register_external(function_with_tuple_arg, [(int,)], int)
+        register_external(function_with_tuple_arg, [(int,)], int,
+                          sandboxsafe=True)
 
         def f():
             return function_with_tuple_arg((1,))
@@ -82,11 +83,11 @@
         """
         def function_with_list():
             pass
-        register_external(function_with_list, [[int]], int)
+        register_external(function_with_list, [[int]], int, sandboxsafe=True)
 
         def function_returning_list():
             pass
-        register_external(function_returning_list, [], [int])
+        register_external(function_returning_list, [], [int], sandboxsafe=True)
 
         def f():
             return function_with_list(function_returning_list())
@@ -100,7 +101,7 @@
         str0 = SomeString(no_nul=True)
         def os_open(s):
             pass
-        register_external(os_open, [str0], None)
+        register_external(os_open, [str0], None, sandboxsafe=True)
         def f(s):
             return os_open(s)
         policy = AnnotatorPolicy()
@@ -121,7 +122,7 @@
         def os_execve(l):
             pass
 
-        register_external(os_execve, [[str0]], None)
+        register_external(os_execve, [[str0]], None, sandboxsafe=True)
 
         def f(l):
             return os_execve(l)
@@ -149,7 +150,7 @@
         def a_llfakeimpl(i):
             return i * 3
         register_external(a, [int], int, llimpl=a_llimpl,
-                          llfakeimpl=a_llfakeimpl)
+                          llfakeimpl=a_llfakeimpl, sandboxsafe=True)
         def f(i):
             return a(i)
 
diff --git a/rpython/rtyper/test/test_llinterp.py b/rpython/rtyper/test/test_llinterp.py
--- a/rpython/rtyper/test/test_llinterp.py
+++ b/rpython/rtyper/test/test_llinterp.py
@@ -584,7 +584,8 @@
     def raising():
         raise OSError(15, "abcd")
 
-    ext = register_external(external, [], llimpl=raising, llfakeimpl=raising)
+    ext = register_external(external, [], llimpl=raising, llfakeimpl=raising,
+                            sandboxsafe=True)
 
     def f():
         # this is a useful llfakeimpl that raises an exception
diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py
--- a/rpython/translator/c/genc.py
+++ b/rpython/translator/c/genc.py
@@ -928,6 +928,10 @@
     fi = incfilename.open('w')
     fi.write('#ifndef _PY_COMMON_HEADER_H\n#define _PY_COMMON_HEADER_H\n')
 
+    if database.sandbox:
+        from rpython.translator.sandbox import rsandbox
+        eci = eci.merge(rsandbox.extra_eci(database.translator.rtyper))
+
     #
     # Header
     #
diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py
--- a/rpython/translator/c/node.py
+++ b/rpython/translator/c/node.py
@@ -885,11 +885,18 @@
     if db.sandbox:
         if (getattr(obj, 'external', None) is not None and
                 not obj._safe_not_sandboxed):
-            from rpython.translator.sandbox import rsandbox
-            obj.__dict__['graph'] = rsandbox.get_sandbox_stub(
-                obj, db.translator.rtyper)
-            obj.__dict__.pop('_safe_not_sandboxed', None)
-            obj.__dict__.pop('external', None)
+            try:
+                sandbox_mapping = db.sandbox_mapping
+            except AttributeError:
+                sandbox_mapping = db.sandbox_mapping = {}
+            try:
+                obj = sandbox_mapping[obj]
+            except KeyError:
+                from rpython.translator.sandbox import rsandbox
+                llfunc = rsandbox.get_sandbox_stub(
+                    obj, db.translator.rtyper)
+                sandbox_mapping[obj] = llfunc._obj
+                obj = llfunc._obj
     if forcename:
         name = forcename
     else:
diff --git a/rpython/translator/sandbox/_marshal.py b/rpython/translator/sandbox/_marshal.py
deleted file mode 100644
--- a/rpython/translator/sandbox/_marshal.py
+++ /dev/null
@@ -1,695 +0,0 @@
-# Copy of lib_pypy/_marshal.py needed by sandlib
-"""Internal Python object serialization
-
-This module contains functions that can read and write Python values in a binary format. The format is specific to Python, but independent of machine architecture issues (e.g., you can write a Python value to a file on a PC, transport the file to a Sun, and read it back there). Details of the format may change between Python versions.
-"""
-
-# NOTE: This module is used in the Python3 interpreter, but also by
-# the "sandboxed" process.  It must work for Python2 as well.
-
-import types
-from _codecs import utf_8_decode, utf_8_encode
-
-try:
-    intern
-except NameError:
-    from sys import intern
-
-try: from __pypy__ import builtinify
-except ImportError: builtinify = lambda f: f
-
-
-TYPE_NULL     = '0'
-TYPE_NONE     = 'N'
-TYPE_FALSE    = 'F'
-TYPE_TRUE     = 'T'
-TYPE_STOPITER = 'S'
-TYPE_ELLIPSIS = '.'
-TYPE_INT      = 'i'
-TYPE_INT64    = 'I'
-TYPE_FLOAT    = 'f'
-TYPE_COMPLEX  = 'x'
-TYPE_LONG     = 'l'
-TYPE_STRING   = 's'
-TYPE_INTERNED = 't'
-TYPE_STRINGREF= 'R'
-TYPE_TUPLE    = '('
-TYPE_LIST     = '['
-TYPE_DICT     = '{'
-TYPE_CODE     = 'c'
-TYPE_UNICODE  = 'u'
-TYPE_UNKNOWN  = '?'
-TYPE_SET      = '<'
-TYPE_FROZENSET= '>'
-
-class _Marshaller:
-
-    dispatch = {}
-
-    def __init__(self, writefunc):
-        self._write = writefunc
-
-    def dump(self, x):
-        try:
-            self.dispatch[type(x)](self, x)
-        except KeyError:
-            for tp in type(x).mro():
-                func = self.dispatch.get(tp)
-                if func:
-                    break
-            else:
-                raise ValueError("unmarshallable object")
-            func(self, x)
-
-    def w_long64(self, x):
-        self.w_long(x)
-        self.w_long(x>>32)
-
-    def w_long(self, x):
-        a = chr(x & 0xff)
-        x >>= 8
-        b = chr(x & 0xff)
-        x >>= 8
-        c = chr(x & 0xff)
-        x >>= 8
-        d = chr(x & 0xff)
-        self._write(a + b + c + d)
-
-    def w_short(self, x):
-        self._write(chr((x)     & 0xff))
-        self._write(chr((x>> 8) & 0xff))
-
-    def dump_none(self, x):
-        self._write(TYPE_NONE)
-    dispatch[type(None)] = dump_none
-
-    def dump_bool(self, x):
-        if x:
-            self._write(TYPE_TRUE)
-        else:
-            self._write(TYPE_FALSE)
-    dispatch[bool] = dump_bool
-
-    def dump_stopiter(self, x):
-        if x is not StopIteration:
-            raise ValueError("unmarshallable object")
-        self._write(TYPE_STOPITER)
-    dispatch[type(StopIteration)] = dump_stopiter
-
-    def dump_ellipsis(self, x):
-        self._write(TYPE_ELLIPSIS)
-    
-    try:
-        dispatch[type(Ellipsis)] = dump_ellipsis
-    except NameError:
-        pass
-
-    # In Python3, this function is not used; see dump_long() below.
-    def dump_int(self, x):
-        y = x>>31
-        if y and y != -1:
-            self._write(TYPE_INT64)
-            self.w_long64(x)
-        else:
-            self._write(TYPE_INT)
-            self.w_long(x)
-    dispatch[int] = dump_int
-
-    def dump_long(self, x):
-        self._write(TYPE_LONG)
-        sign = 1
-        if x < 0:
-            sign = -1
-            x = -x
-        digits = []
-        while x:
-            digits.append(x & 0x7FFF)
-            x = x>>15
-        self.w_long(len(digits) * sign)
-        for d in digits:
-            self.w_short(d)
-    try:
-        long
-    except NameError:
-        dispatch[int] = dump_long
-    else:
-        dispatch[long] = dump_long
-
-    def dump_float(self, x):
-        write = self._write
-        write(TYPE_FLOAT)
-        s = repr(x)
-        write(chr(len(s)))
-        write(s)
-    dispatch[float] = dump_float
-
-    def dump_complex(self, x):
-        write = self._write
-        write(TYPE_COMPLEX)
-        s = repr(x.real)
-        write(chr(len(s)))
-        write(s)
-        s = repr(x.imag)
-        write(chr(len(s)))
-        write(s)
-    try:
-        dispatch[complex] = dump_complex
-    except NameError:
-        pass
-
-    def dump_string(self, x):
-        # XXX we can't check for interned strings, yet,
-        # so we (for now) never create TYPE_INTERNED or TYPE_STRINGREF
-        self._write(TYPE_STRING)
-        self.w_long(len(x))
-        self._write(x)
-    dispatch[bytes] = dump_string
-
-    def dump_unicode(self, x):
-        self._write(TYPE_UNICODE)
-        #s = x.encode('utf8')
-        s, len_s = utf_8_encode(x)
-        self.w_long(len_s)
-        self._write(s)
-    try:
-        unicode
-    except NameError:
-        dispatch[str] = dump_unicode
-    else:
-        dispatch[unicode] = dump_unicode
-
-    def dump_tuple(self, x):
-        self._write(TYPE_TUPLE)
-        self.w_long(len(x))
-        for item in x:
-            self.dump(item)
-    dispatch[tuple] = dump_tuple
-
-    def dump_list(self, x):
-        self._write(TYPE_LIST)
-        self.w_long(len(x))
-        for item in x:
-            self.dump(item)
-    dispatch[list] = dump_list
-
-    def dump_dict(self, x):
-        self._write(TYPE_DICT)
-        for key, value in x.items():
-            self.dump(key)
-            self.dump(value)
-        self._write(TYPE_NULL)
-    dispatch[dict] = dump_dict
-
-    def dump_code(self, x):
-        self._write(TYPE_CODE)
-        self.w_long(x.co_argcount)
-        self.w_long(x.co_nlocals)
-        self.w_long(x.co_stacksize)
-        self.w_long(x.co_flags)
-        self.dump(x.co_code)
-        self.dump(x.co_consts)
-        self.dump(x.co_names)
-        self.dump(x.co_varnames)
-        self.dump(x.co_freevars)
-        self.dump(x.co_cellvars)
-        self.dump(x.co_filename)
-        self.dump(x.co_name)
-        self.w_long(x.co_firstlineno)
-        self.dump(x.co_lnotab)
-    try:
-        dispatch[types.CodeType] = dump_code
-    except NameError:
-        pass
-
-    def dump_set(self, x):
-        self._write(TYPE_SET)
-        self.w_long(len(x))
-        for each in x:
-            self.dump(each)
-    try:
-        dispatch[set] = dump_set
-    except NameError:
-        pass
-
-    def dump_frozenset(self, x):
-        self._write(TYPE_FROZENSET)
-        self.w_long(len(x))
-        for each in x:
-            self.dump(each)
-    try:
-        dispatch[frozenset] = dump_frozenset
-    except NameError:
-        pass
-
-class _NULL:
-    pass
-
-class _StringBuffer:
-    def __init__(self, value):
-        self.bufstr = value
-        self.bufpos = 0
-
-    def read(self, n):
-        pos = self.bufpos
-        newpos = pos + n
-        ret = self.bufstr[pos : newpos]
-        self.bufpos = newpos
-        return ret
-
-
-class _Unmarshaller:
-
-    dispatch = {}
-
-    def __init__(self, readfunc):
-        self._read = readfunc
-        self._stringtable = []
-
-    def load(self):
-        c = self._read(1)
-        if not c:
-            raise EOFError
-        try:
-            return self.dispatch[c](self)
-        except KeyError:
-            raise ValueError("bad marshal code: %c (%d)" % (c, ord(c)))
-
-    def r_short(self):
-        lo = ord(self._read(1))
-        hi = ord(self._read(1))
-        x = lo | (hi<<8)
-        if x & 0x8000:
-            x = x - 0x10000
-        return x
-
-    def r_long(self):
-        s = self._read(4)
-        a = ord(s[0])
-        b = ord(s[1])
-        c = ord(s[2])
-        d = ord(s[3])
-        x = a | (b<<8) | (c<<16) | (d<<24)
-        if d & 0x80 and x > 0:
-            x = -((1<<32) - x)
-            return int(x)
-        else:
-            return x
-
-    def r_long64(self):
-        a = ord(self._read(1))
-        b = ord(self._read(1))
-        c = ord(self._read(1))
-        d = ord(self._read(1))
-        e = ord(self._read(1))
-        f = ord(self._read(1))
-        g = ord(self._read(1))
-        h = ord(self._read(1))
-        x = a | (b<<8) | (c<<16) | (d<<24)
-        x = x | (e<<32) | (f<<40) | (g<<48) | (h<<56)
-        if h & 0x80 and x > 0:
-            x = -((1<<64) - x)
-        return x
-
-    def load_null(self):
-        return _NULL
-    dispatch[TYPE_NULL] = load_null
-
-    def load_none(self):
-        return None
-    dispatch[TYPE_NONE] = load_none
-
-    def load_true(self):
-        return True
-    dispatch[TYPE_TRUE] = load_true
-
-    def load_false(self):
-        return False
-    dispatch[TYPE_FALSE] = load_false
-
-    def load_stopiter(self):
-        return StopIteration
-    dispatch[TYPE_STOPITER] = load_stopiter
-
-    def load_ellipsis(self):
-        return Ellipsis
-    dispatch[TYPE_ELLIPSIS] = load_ellipsis
-
-    dispatch[TYPE_INT] = r_long
-
-    dispatch[TYPE_INT64] = r_long64
-
-    def load_long(self):
-        size = self.r_long()
-        sign = 1
-        if size < 0:
-            sign = -1
-            size = -size
-        x = 0
-        for i in range(size):
-            d = self.r_short()
-            x = x | (d<<(i*15))
-        return x * sign
-    dispatch[TYPE_LONG] = load_long
-
-    def load_float(self):
-        n = ord(self._read(1))
-        s = self._read(n)
-        return float(s)
-    dispatch[TYPE_FLOAT] = load_float
-
-    def load_complex(self):
-        n = ord(self._read(1))
-        s = self._read(n)
-        real = float(s)
-        n = ord(self._read(1))
-        s = self._read(n)
-        imag = float(s)
-        return complex(real, imag)
-    dispatch[TYPE_COMPLEX] = load_complex
-
-    def load_string(self):
-        n = self.r_long()
-        return self._read(n)
-    dispatch[TYPE_STRING] = load_string
-
-    def load_interned(self):
-        n = self.r_long()
-        ret = intern(self._read(n))
-        self._stringtable.append(ret)
-        return ret
-    dispatch[TYPE_INTERNED] = load_interned
-
-    def load_stringref(self):
-        n = self.r_long()
-        return self._stringtable[n]
-    dispatch[TYPE_STRINGREF] = load_stringref
-
-    def load_unicode(self):
-        n = self.r_long()
-        s = self._read(n)
-        #ret = s.decode('utf8')
-        ret, len_ret = utf_8_decode(s)
-        return ret
-    dispatch[TYPE_UNICODE] = load_unicode
-
-    def load_tuple(self):
-        return tuple(self.load_list())
-    dispatch[TYPE_TUPLE] = load_tuple
-
-    def load_list(self):
-        n = self.r_long()
-        list = [self.load() for i in range(n)]
-        return list
-    dispatch[TYPE_LIST] = load_list
-
-    def load_dict(self):
-        d = {}
-        while 1:
-            key = self.load()
-            if key is _NULL:
-                break
-            value = self.load()
-            d[key] = value
-        return d
-    dispatch[TYPE_DICT] = load_dict
-
-    def load_code(self):
-        argcount = self.r_long()
-        nlocals = self.r_long()
-        stacksize = self.r_long()
-        flags = self.r_long()
-        code = self.load()
-        consts = self.load()
-        names = self.load()
-        varnames = self.load()
-        freevars = self.load()
-        cellvars = self.load()
-        filename = self.load()
-        name = self.load()
-        firstlineno = self.r_long()
-        lnotab = self.load()
-        return types.CodeType(argcount, nlocals, stacksize, flags, code, consts,
-                              names, varnames, filename, name, firstlineno,
-                              lnotab, freevars, cellvars)
-    dispatch[TYPE_CODE] = load_code
-
-    def load_set(self):
-        n = self.r_long()
-        args = [self.load() for i in range(n)]
-        return set(args)
-    dispatch[TYPE_SET] = load_set
-
-    def load_frozenset(self):
-        n = self.r_long()
-        args = [self.load() for i in range(n)]
-        return frozenset(args)
-    dispatch[TYPE_FROZENSET] = load_frozenset
-
-# ________________________________________________________________
-
-def _read(self, n):
-    pos = self.bufpos
-    newpos = pos + n
-    if newpos > len(self.bufstr): raise EOFError
-    ret = self.bufstr[pos : newpos]
-    self.bufpos = newpos
-    return ret
-
-def _read1(self):
-    ret = self.bufstr[self.bufpos]
-    self.bufpos += 1
-    return ret
-
-def _r_short(self):
-    lo = ord(_read1(self))
-    hi = ord(_read1(self))
-    x = lo | (hi<<8)
-    if x & 0x8000:
-        x = x - 0x10000
-    return x
-
-def _r_long(self):
-    # inlined this most common case
-    p = self.bufpos
-    s = self.bufstr
-    a = ord(s[p])
-    b = ord(s[p+1])
-    c = ord(s[p+2])
-    d = ord(s[p+3])
-    self.bufpos += 4
-    x = a | (b<<8) | (c<<16) | (d<<24)
-    if d & 0x80 and x > 0:
-        x = -((1<<32) - x)
-        return int(x)
-    else:
-        return x
-
-def _r_long64(self):
-    a = ord(_read1(self))
-    b = ord(_read1(self))
-    c = ord(_read1(self))
-    d = ord(_read1(self))
-    e = ord(_read1(self))
-    f = ord(_read1(self))
-    g = ord(_read1(self))
-    h = ord(_read1(self))
-    x = a | (b<<8) | (c<<16) | (d<<24)
-    x = x | (e<<32) | (f<<40) | (g<<48) | (h<<56)
-    if h & 0x80 and x > 0:
-        x = -((1<<64) - x)
-    return x
-
-_load_dispatch = {}
-
-class _FastUnmarshaller:
-
-    dispatch = {}
-
-    def __init__(self, buffer):
-        self.bufstr = buffer
-        self.bufpos = 0
-        self._stringtable = []
-
-    def load(self):
-        # make flow space happy
-        c = '?'
-        try:
-            c = self.bufstr[self.bufpos]
-            self.bufpos += 1
-            return _load_dispatch[c](self)
-        except KeyError:
-            raise ValueError("bad marshal code: %c (%d)" % (c, ord(c)))
-        except IndexError:
-            raise EOFError
-
-    def load_null(self):
-        return _NULL
-    dispatch[TYPE_NULL] = load_null
-
-    def load_none(self):
-        return None
-    dispatch[TYPE_NONE] = load_none
-
-    def load_true(self):
-        return True
-    dispatch[TYPE_TRUE] = load_true
-
-    def load_false(self):
-        return False
-    dispatch[TYPE_FALSE] = load_false
-
-    def load_stopiter(self):
-        return StopIteration
-    dispatch[TYPE_STOPITER] = load_stopiter
-
-    def load_ellipsis(self):
-        return Ellipsis
-    dispatch[TYPE_ELLIPSIS] = load_ellipsis
-
-    def load_int(self):
-        return _r_long(self)
-    dispatch[TYPE_INT] = load_int
-
-    def load_int64(self):
-        return _r_long64(self)
-    dispatch[TYPE_INT64] = load_int64
-
-    def load_long(self):
-        size = _r_long(self)
-        sign = 1
-        if size < 0:
-            sign = -1
-            size = -size
-        x = 0
-        for i in range(size):
-            d = _r_short(self)
-            x = x | (d<<(i*15))
-        return x * sign
-    dispatch[TYPE_LONG] = load_long
-
-    def load_float(self):
-        n = ord(_read1(self))
-        s = _read(self, n)
-        return float(s)
-    dispatch[TYPE_FLOAT] = load_float
-
-    def load_complex(self):
-        n = ord(_read1(self))
-        s = _read(self, n)
-        real = float(s)
-        n = ord(_read1(self))
-        s = _read(self, n)
-        imag = float(s)
-        return complex(real, imag)
-    dispatch[TYPE_COMPLEX] = load_complex
-
-    def load_string(self):
-        n = _r_long(self)
-        return _read(self, n)
-    dispatch[TYPE_STRING] = load_string
-
-    def load_interned(self):
-        n = _r_long(self)
-        ret = intern(_read(self, n))
-        self._stringtable.append(ret)
-        return ret
-    dispatch[TYPE_INTERNED] = load_interned
-
-    def load_stringref(self):
-        n = _r_long(self)
-        return self._stringtable[n]
-    dispatch[TYPE_STRINGREF] = load_stringref
-
-    def load_unicode(self):
-        n = _r_long(self)
-        s = _read(self, n)
-        ret = s.decode('utf8')
-        return ret
-    dispatch[TYPE_UNICODE] = load_unicode
-
-    def load_tuple(self):
-        return tuple(self.load_list())
-    dispatch[TYPE_TUPLE] = load_tuple
-
-    def load_list(self):
-        n = _r_long(self)
-        list = []
-        for i in range(n):
-            list.append(self.load())
-        return list
-    dispatch[TYPE_LIST] = load_list
-
-    def load_dict(self):
-        d = {}
-        while 1:
-            key = self.load()
-            if key is _NULL:
-                break
-            value = self.load()
-            d[key] = value
-        return d
-    dispatch[TYPE_DICT] = load_dict
-
-    def load_code(self):
-        argcount = _r_long(self)
-        nlocals = _r_long(self)
-        stacksize = _r_long(self)
-        flags = _r_long(self)
-        code = self.load()
-        consts = self.load()
-        names = self.load()
-        varnames = self.load()
-        freevars = self.load()
-        cellvars = self.load()
-        filename = self.load()
-        name = self.load()
-        firstlineno = _r_long(self)
-        lnotab = self.load()
-        return types.CodeType(argcount, nlocals, stacksize, flags, code, consts,
-                              names, varnames, filename, name, firstlineno,
-                              lnotab, freevars, cellvars)
-    dispatch[TYPE_CODE] = load_code
-
-    def load_set(self):
-        n = _r_long(self)
-        args = [self.load() for i in range(n)]
-        return set(args)
-    dispatch[TYPE_SET] = load_set
-
-    def load_frozenset(self):
-        n = _r_long(self)
-        args = [self.load() for i in range(n)]
-        return frozenset(args)
-    dispatch[TYPE_FROZENSET] = load_frozenset
-
-_load_dispatch = _FastUnmarshaller.dispatch
-
-# _________________________________________________________________
-#
-# user interface
-
-version = 1
-
- at builtinify
-def dump(x, f, version=version):
-    # XXX 'version' is ignored, we always dump in a version-0-compatible format
-    m = _Marshaller(f.write)
-    m.dump(x)
-
- at builtinify
-def load(f):
-    um = _Unmarshaller(f.read)
-    return um.load()
-
- at builtinify
-def dumps(x, version=version):
-    # XXX 'version' is ignored, we always dump in a version-0-compatible format
-    buffer = []
-    m = _Marshaller(buffer.append)
-    m.dump(x)
-    return ''.join(buffer)
-
- at builtinify
-def loads(s):
-    um = _FastUnmarshaller(s)
-    return um.load()
diff --git a/rpython/translator/sandbox/rsandbox.py b/rpython/translator/sandbox/rsandbox.py
--- a/rpython/translator/sandbox/rsandbox.py
+++ b/rpython/translator/sandbox/rsandbox.py
@@ -4,17 +4,19 @@
 and wait for an answer on STDIN.  Enable with 'translate.py --sandbox'.
 """
 import py
+import sys
 
-from rpython.rlib import rmarshal, types
+from rpython.rlib import types
+from rpython.rlib.objectmodel import specialize
 from rpython.rlib.signature import signature
+from rpython.rlib.unroll import unrolling_iterable
 
 # ____________________________________________________________
 #
 # Sandboxing code generator for external functions
 #
 
-from rpython.rlib import rposix
-from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper.llannotation import lltype_to_annotation
 from rpython.rtyper.annlowlevel import MixLevelHelperAnnotator
 from rpython.tool.ansi_print import AnsiLogger
@@ -22,107 +24,63 @@
 log = AnsiLogger("sandbox")
 
 
-# a version of os.read() and os.write() that are not mangled
-# by the sandboxing mechanism
-ll_read_not_sandboxed = rposix.external('read',
-                                        [rffi.INT, rffi.CCHARP, rffi.SIZE_T],
-                                        rffi.SIZE_T,
-                                        sandboxsafe=True,
-                                        _nowrapper=True)
+def getkind(TYPE, parent_function):
+    if TYPE is lltype.Void:
+        return 'v'
+    elif isinstance(TYPE, lltype.Primitive):
+        if TYPE is lltype.Float or TYPE is lltype.SingleFloat:
+            return 'f'
+        if TYPE is lltype.LongFloat:
+            log.WARNING("%r uses a 'long double' argument or return value; "
+                        "sandboxing will export it only as 'double'" %
+                        (parent_function,))
+            return 'f'
+        if TYPE == llmemory.Address:
+            return 'p'
+        return 'i'
+    elif isinstance(TYPE, lltype.Ptr):
+        return 'p'
+    else:
+        log.WARNING("%r: sandboxing does not support argument "
+                    "or return type %r" % (parent_function, TYPE))
+        return 'v'
 
-ll_write_not_sandboxed = rposix.external('write',
-                                         [rffi.INT, rffi.CCHARP, rffi.SIZE_T],
-                                         rffi.SIZE_T,
-                                         sandboxsafe=True,
-                                         _nowrapper=True)
 
+def extra_eci(rtyper):
+    from rpython.translator.c.support import c_string_constant
 
- at signature(types.int(), types.ptr(rffi.CCHARP.TO), types.int(),
-    returns=types.none())
-def writeall_not_sandboxed(fd, buf, length):
-    fd = rffi.cast(rffi.INT, fd)
-    while length > 0:
-        size = rffi.cast(rffi.SIZE_T, length)
-        count = rffi.cast(lltype.Signed, ll_write_not_sandboxed(fd, buf, size))
-        if count <= 0:
-            raise IOError
-        length -= count
-        buf = lltype.direct_ptradd(lltype.direct_arrayitems(buf), count)
-        buf = rffi.cast(rffi.CCHARP, buf)
+    sandboxed_functions = getattr(rtyper, '_sandboxed_functions', [])
+    dump = (
+        "Version: 20001\n" +
+        "Platform: %s\n" % sys.platform +
+        "Funcs: %s" % ' '.join(sorted(sandboxed_functions))
+    )
+    dump = c_string_constant(dump).replace('\n', '\\\n')
 
+    return rffi.ExternalCompilationInfo(separate_module_sources=[
+            '#define RPY_SANDBOX_DUMP %s\n' % (dump,) +
+            py.path.local(__file__).join('..', 'src', 'rsandbox.c').read(),
+        ],
+        post_include_bits=[
+            py.path.local(__file__).join('..', 'src', 'rsandbox.h').read(),
+        ])
 
-class FdLoader(rmarshal.Loader):
-    def __init__(self, fd):
-        rmarshal.Loader.__init__(self, "")
-        self.fd = fd
-        self.buflen = 4096
+def external(funcname, ARGS, RESULT):
+    return rffi.llexternal(funcname, ARGS, RESULT,
+                           sandboxsafe=True, _nowrapper=True)
 
-    def need_more_data(self):
-        buflen = self.buflen
-        with lltype.scoped_alloc(rffi.CCHARP.TO, buflen) as buf:
-            buflen = rffi.cast(rffi.SIZE_T, buflen)
-            fd = rffi.cast(rffi.INT, self.fd)
-            count = ll_read_not_sandboxed(fd, buf, buflen)
-            count = rffi.cast(lltype.Signed, count)
-            if count <= 0:
-                raise IOError
-            self.buf += ''.join([buf[i] for i in range(count)])
-            self.buflen *= 2
+rpy_sandbox_arg = {
+    'i': external('rpy_sandbox_arg_i', [lltype.UnsignedLongLong], lltype.Void),
+    'f': external('rpy_sandbox_arg_f', [lltype.Float],            lltype.Void),
+    'p': external('rpy_sandbox_arg_p', [llmemory.Address],        lltype.Void),
+}
+rpy_sandbox_res = {
+    'v': external('rpy_sandbox_res_v', [rffi.CCHARP], lltype.Void),
+    'i': external('rpy_sandbox_res_i', [rffi.CCHARP], lltype.UnsignedLongLong),
+    'f': external('rpy_sandbox_res_f', [rffi.CCHARP], lltype.Float),
+    'p': external('rpy_sandbox_res_p', [rffi.CCHARP], llmemory.Address),
+}
 
-def sandboxed_io(buf):
-    STDIN = 0
-    STDOUT = 1
-    # send the buffer with the marshalled fnname and input arguments to STDOUT
-    with lltype.scoped_alloc(rffi.CCHARP.TO, len(buf)) as p:
-        for i in range(len(buf)):
-            p[i] = buf[i]
-        writeall_not_sandboxed(STDOUT, p, len(buf))
-    # build a Loader that will get the answer from STDIN
-    loader = FdLoader(STDIN)
-    # check for errors
-    error = load_int(loader)
-    if error != 0:
-        reraise_error(error, loader)
-    else:
-        # no exception; the caller will decode the actual result
-        return loader
-
-def reraise_error(error, loader):
-    if error == 1:
-        raise OSError(load_int(loader), "external error")
-    elif error == 2:
-        raise IOError
-    elif error == 3:
-        raise OverflowError
-    elif error == 4:
-        raise ValueError
-    elif error == 5:
-        raise ZeroDivisionError
-    elif error == 6:
-        raise MemoryError
-    elif error == 7:
-        raise KeyError
-    elif error == 8:
-        raise IndexError
-    else:
-        raise RuntimeError
-
-
- at signature(types.str(), returns=types.impossible())
-def not_implemented_stub(msg):
-    STDERR = 2
-    with rffi.scoped_str2charp(msg + '\n') as buf:
-        writeall_not_sandboxed(STDERR, buf, len(msg) + 1)
-    raise RuntimeError(msg)  # XXX in RPython, the msg is ignored
-
-def make_stub(fnname, msg):
-    """Build always-raising stub function to replace unsupported external."""
-    log.WARNING(msg)
-
-    def execute(*args):
-        not_implemented_stub(msg)
-    execute.__name__ = 'sandboxed_%s' % (fnname,)
-    return execute
 
 def sig_ll(fnobj):
     FUNCTYPE = lltype.typeOf(fnobj)
@@ -130,47 +88,53 @@
     s_result = lltype_to_annotation(FUNCTYPE.RESULT)
     return args_s, s_result
 
-dump_string = rmarshal.get_marshaller(str)
-load_int = rmarshal.get_loader(int)
-
 def get_sandbox_stub(fnobj, rtyper):
     fnname = fnobj._name
+    FUNCTYPE = lltype.typeOf(fnobj)
+    arg_kinds = [getkind(ARG, fnname) for ARG in FUNCTYPE.ARGS]
+    result_kind = getkind(FUNCTYPE.RESULT, fnname)
+
+    unroll_args = unrolling_iterable([
+        (arg_kind, rpy_sandbox_arg[arg_kind],
+         lltype.typeOf(rpy_sandbox_arg[arg_kind]).TO.ARGS[0])
+        for arg_kind in arg_kinds])
+
+    result_func = rpy_sandbox_res[result_kind]
+    RESTYPE = FUNCTYPE.RESULT
+
+    try:
+        lst = rtyper._sandboxed_functions
+    except AttributeError:
+        lst = rtyper._sandboxed_functions = []
+    name_and_sig = '%s(%s)%s' % (fnname, ''.join(arg_kinds), result_kind)
+    lst.append(name_and_sig)
+    log(name_and_sig)
+    name_and_sig = rffi.str2charp(name_and_sig, track_allocation=False)
+
+    def execute(*args):
+        #
+        # serialize the arguments
+        i = 0
+        for arg_kind, func, ARGTYPE in unroll_args:
+            if arg_kind == 'v':
+                continue
+            func(rffi.cast(ARGTYPE, args[i]))
+            i = i + 1
+        #
+        # send the function name and the arguments and wait for an answer
+        result = result_func(name_and_sig)
+        #
+        # result the answer, if any
+        if RESTYPE is not lltype.Void:
+            return rffi.cast(RESTYPE, result)
+    execute.__name__ = 'sandboxed_%s' % (fnname,)
+    #
     args_s, s_result = sig_ll(fnobj)
-    msg = "Not implemented: sandboxing for external function '%s'" % (fnname,)
-    execute = make_stub(fnname, msg)
     return _annotate(rtyper, execute, args_s, s_result)
 
-def make_sandbox_trampoline(fnname, args_s, s_result):
-    """Create a trampoline function with the specified signature.
-
-    The trampoline is meant to be used in place of real calls to the external
-    function named 'fnname'.  It marshals its input arguments, dumps them to
-    STDOUT, and waits for an answer on STDIN.
-    """
-    try:
-        dump_arguments = rmarshal.get_marshaller(tuple(args_s))
-        load_result = rmarshal.get_loader(s_result)
-    except (rmarshal.CannotMarshal, rmarshal.CannotUnmarshall) as e:
-        msg = "Cannot sandbox function '%s': %s" % (fnname, e)
-        execute = make_stub(fnname, msg)
-    else:
-        def execute(*args):
-            # marshal the function name and input arguments
-            buf = []
-            dump_string(buf, fnname)
-            dump_arguments(buf, args)
-            # send the buffer and wait for the answer
-            loader = sandboxed_io(buf)
-            # decode the answer
-            result = load_result(loader)
-            loader.check_finished()
-            return result
-        execute.__name__ = 'sandboxed_%s' % (fnname,)
-    return execute
-
-
 def _annotate(rtyper, f, args_s, s_result):
     ann = MixLevelHelperAnnotator(rtyper)
-    graph = ann.getgraph(f, args_s, s_result)
+    llfunc = ann.delayedfunction(f, args_s, s_result, needtype=True)
     ann.finish()
-    return graph
+    ann.backend_optimize()
+    return llfunc
diff --git a/rpython/translator/sandbox/sandboxio.py b/rpython/translator/sandbox/sandboxio.py
new file mode 100644
--- /dev/null
+++ b/rpython/translator/sandbox/sandboxio.py
@@ -0,0 +1,150 @@
+import struct
+
+
+class SandboxError(Exception):
+    """The sandboxed process misbehaved"""
+
+
+class Ptr(object):
+    def __init__(self, addr):
+        self.addr = addr
+
+    def __repr__(self):
+        return 'Ptr(%s)' % (hex(self.addr),)
+
+
+_ptr_size = struct.calcsize("P")
+_ptr_code = 'q' if _ptr_size == 8 else 'i'
+_pack_one_ptr = struct.Struct("=" + _ptr_code).pack
+_pack_one_longlong = struct.Struct("=q").pack
+_pack_one_double = struct.Struct("=d").pack
+_pack_one_int = struct.Struct("=i").pack
+_pack_two_ptrs = struct.Struct("=" + _ptr_code + _ptr_code).pack
+_unpack_one_ptr = struct.Struct("=" + _ptr_code).unpack
+
+
+class SandboxedIO(object):
+    _message_decoders = {}
+
+
+    def __init__(self, popen):
+        self.popen = popen
+        self.child_stdin = popen.stdin
+        self.child_stdout = popen.stdout
+
+    def close(self):
+        """Kill the subprocess and close the file descriptors to the pipe.
+        """
+        if self.popen.returncode is None:
+            self.popen.terminate()
+        self.child_stdin.close()
+        self.child_stdout.close()
+        self.popen.stderr.close()
+
+    def _read(self, count):
+        result = self.child_stdout.read(count)
+        if len(result) != count:
+            raise SandboxError(
+                "connection interrupted with the sandboxed process")
+        return result
+
+    @staticmethod
+    def _make_message_decoder(data):
+        i1 = data.find('(')
+        i2 = data.find(')')
+        if not (i1 > 0 and i1 < i2 and i2 == len(data) - 2):
+            raise SandboxError(
+                "badly formatted data received from the sandboxed process")
+        pack_args = ['=']
+        for c in data[i1+1:i2]:
+            if c == 'p':
+                pack_args.append(_ptr_code)
+            elif c == 'i':
+                pack_args.append('q')
+            elif c == 'f':
+                pack_args.append('d')
+            elif c == 'v':
+                pass
+            else:
+                raise SandboxError(
+                    "unsupported format string in parentheses: %r" % (data,))
+        unpacker = struct.Struct(''.join(pack_args))
+        decoder = unpacker, data[i1+1:i2]
+
+        SandboxedIO._message_decoders[data] = decoder
+        return decoder
+
+    def read_message(self):
+        """Wait for the next message and returns it.  Raises EOFError if the
+        subprocess finished.  Raises SandboxError if there is another kind
+        of detected misbehaviour.
+        """
+        ch = self.child_stdout.read(1)
+        if len(ch) == 0:
+            raise EOFError
+        n = ord(ch)
+        msg = self._read(n)
+        decoder = self._message_decoders.get(msg)
+        if decoder is None:
+            decoder = self._make_message_decoder(msg)
+
+        unpacker, codes = decoder
+        raw_args = iter(unpacker.unpack(self._read(unpacker.size)))
+        args = []
+        for c in codes:
+            if c == 'p':
+                args.append(Ptr(next(raw_args)))
+            elif c == 'v':
+                args.append(None)
+            else:
+                args.append(next(raw_args))
+        return msg, args
+
+    def read_buffer(self, ptr, length):
+        g = self.child_stdin
+        g.write("R" + _pack_two_ptrs(ptr.addr, length))
+        g.flush()
+        return self._read(length)
+
+    def read_charp(self, ptr, maxlen=-1):
+        g = self.child_stdin
+        g.write("Z" + _pack_two_ptrs(ptr.addr, maxlen))
+        g.flush()
+        length = _unpack_one_ptr(self._read(_ptr_size))[0]
+        return self._read(length)
+
+    def write_buffer(self, ptr, bytes_data):
+        g = self.child_stdin
+        g.write("W" + _pack_two_ptrs(ptr.addr, len(bytes_data)))
+        g.write(bytes_data)
+        # g.flush() not necessary here
+
+    def write_result(self, result):
+        g = self.child_stdin
+        if result is None:
+            g.write('v')
+        elif isinstance(result, Ptr):
+            g.write('p' + _pack_one_ptr(result.addr))
+        elif isinstance(result, float):
+            g.write('f' + _pack_one_double(result))
+        else:
+            g.write('i' + _pack_one_longlong(result))
+        g.flush()
+
+    def set_errno(self, err):
+        g = self.child_stdin
+        g.write("E" + _pack_one_int(err))
+        # g.flush() not necessary here
+
+    def malloc(self, bytes_data):
+        g = self.child_stdin
+        g.write("M" + _pack_one_ptr(len(bytes_data)))
+        g.write(bytes_data)
+        g.flush()
+        addr = _unpack_one_ptr(self._read(_ptr_size))[0]
+        return Ptr(addr)
+
+    def free(self, ptr):
+        g = self.child_stdin
+        g.write("F" + _pack_one_ptr(ptr.addr))
+        # g.flush() not necessary here
diff --git a/rpython/translator/sandbox/sandlib.py b/rpython/translator/sandbox/sandlib.py
--- a/rpython/translator/sandbox/sandlib.py
+++ b/rpython/translator/sandbox/sandlib.py
@@ -18,65 +18,6 @@
     from rpython.tool.ansi_print import AnsiLogger
     return AnsiLogger("sandlib")
 
-# Note: we use lib_pypy/marshal.py instead of the built-in marshal
-# for two reasons.  The built-in module could be made to segfault
-# or be attackable in other ways by sending malicious input to
-# load().  Also, marshal.load(f) blocks with the GIL held when
-# f is a pipe with no data immediately avaialble, preventing the
-# _waiting_thread to run.
-from rpython.translator.sandbox import _marshal as marshal
-
-# Non-marshal result types
-RESULTTYPE_STATRESULT = object()
-RESULTTYPE_LONGLONG = object()
-
-def read_message(f):
-    return marshal.load(f)
-
-def write_message(g, msg, resulttype=None):
-    if resulttype is None:
-        if sys.version_info < (2, 4):
-            marshal.dump(msg, g)
-        else:
-            marshal.dump(msg, g, 0)
-    elif resulttype is RESULTTYPE_STATRESULT:
-        # Hand-coded marshal for stat results that mimics what rmarshal expects.
-        # marshal.dump(tuple(msg)) would have been too easy. rmarshal insists
-        # on 64-bit ints at places, even when the value fits in 32 bits.
-        import struct
-        st = tuple(msg)
-        fmt = "iIIiiiIfff"
-        buf = []
-        buf.append(struct.pack("<ci", '(', len(st)))
-        for c, v in zip(fmt, st):
-            if c == 'i':
-                buf.append(struct.pack("<ci", c, v))
-            elif c == 'I':
-                buf.append(struct.pack("<cq", c, v))
-            elif c == 'f':
-                fstr = "%g" % v
-                buf.append(struct.pack("<cB", c, len(fstr)))
-                buf.append(fstr)
-        g.write(''.join(buf))
-    elif resulttype is RESULTTYPE_LONGLONG:
-        import struct
-        g.write(struct.pack("<cq", 'I', msg))
-    else:
-        raise Exception("Can't marshal: %r (%r)" % (msg, resulttype))
-
-# keep the table in sync with rsandbox.reraise_error()
-EXCEPTION_TABLE = [
-    (1, OSError),
-    (2, IOError),
-    (3, OverflowError),
-    (4, ValueError),
-    (5, ZeroDivisionError),
-    (6, MemoryError),
-    (7, KeyError),
-    (8, IndexError),
-    (9, RuntimeError),
-    ]
-
 def write_exception(g, exception, tb=None):
     for i, excclass in EXCEPTION_TABLE:
         if isinstance(exception, excclass):
diff --git a/rpython/translator/sandbox/src/rsandbox.c b/rpython/translator/sandbox/src/rsandbox.c
new file mode 100644
--- /dev/null
+++ b/rpython/translator/sandbox/src/rsandbox.c
@@ -0,0 +1,239 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+
+#define RPY_SANDBOX_ARGBUF    512
+#define RPY_SANDBOX_NAMEMAX   256
+
+#define RPY_FD_STDIN          0
+#define RPY_FD_STDOUT         1
+
+static char sand_argbuf[RPY_SANDBOX_ARGBUF];
+static size_t sand_nextarg = RPY_SANDBOX_NAMEMAX;
+static int sand_dump_checked = 0;
+
+
+static void sand_writeall(const char *buf, size_t count)
+{
+    while (count > 0) {
+        ssize_t result = write(RPY_FD_STDOUT, buf, count);
+        if (result <= 0) {
+            if (result == 0) {
+                fprintf(stderr, "sandbox: write(stdout) gives the result 0, "
+                                "which is not expected\n");
+            }
+            else {
+                perror("sandbox: write(stdout)");
+            }
+            abort();
+        }
+        if (result > count) {
+            fprintf(stderr, "sandbox: write(stdout) wrote more data than "
+                            "request, which is not expected\n");
+            abort();
+        }
+        buf += result;
+        count -= result;
+    }
+}
+
+static void sand_readall(char *buf, size_t count)
+{
+    while (count > 0) {
+        ssize_t result = read(RPY_FD_STDIN, buf, count);
+        if (result <= 0) {
+            if (result == 0) {
+                fprintf(stderr, "sandbox: stdin is closed, subprocess "
+                                "interrupted\n");
+            }
+            else {
+                perror("sandbox: read(stdin)");
+            }
+            abort();
+        }
+        if (result > count) {
+            fprintf(stderr, "sandbox: read(stdin) returned more data than "
+                            "expected\n");
+            abort();
+        }
+        buf += result;
+        count -= result;
+    }
+}
+
+


More information about the pypy-commit mailing list