[pypy-commit] pypy sandbox-2: in-progress: implementing another approach for sandboxing

arigo pypy.commits at gmail.com
Tue Aug 6 12:09:14 EDT 2019


Author: Armin Rigo <arigo at tunes.org>
Branch: sandbox-2
Changeset: r97077:f2a2ec0e2a42
Date: 2019-08-06 18:08 +0200
http://bitbucket.org/pypy/pypy/changeset/f2a2ec0e2a42/

Log:	in-progress: implementing another approach for sandboxing

diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -311,9 +311,6 @@
             config.translation.jit = True
 
         if config.translation.sandbox:
-            assert 0, ("--sandbox is not tested nor maintained.  If you "
-                       "really want to try it anyway, remove this line in "
-                       "pypy/goal/targetpypystandalone.py.")
             config.objspace.lonepycfiles = False
 
         if config.objspace.usemodules.cpyext:
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -115,8 +115,7 @@
     BoolOption("sandbox", "Produce a fully-sandboxed executable",
                default=False, cmdline="--sandbox",
                requires=[("translation.thread", False)],
-               suggests=[("translation.gc", "generation"),
-                         ("translation.gcrootfinder", "shadowstack")]),
+               suggests=[]),
     BoolOption("rweakref", "The backend supports RPython-level weakrefs",
                default=True),
 
diff --git a/rpython/rtyper/rtyper.py b/rpython/rtyper/rtyper.py
--- a/rpython/rtyper/rtyper.py
+++ b/rpython/rtyper/rtyper.py
@@ -29,7 +29,6 @@
 from rpython.rtyper.rclass import RootClassRepr
 from rpython.tool.pairtype import pair
 from rpython.translator.unsimplify import insert_empty_block
-from rpython.translator.sandbox.rsandbox import make_sandbox_trampoline
 
 
 class RTyperBackend(object):
diff --git a/rpython/translator/sandbox/_marshal.py b/rpython/translator/sandbox/_marshal.py
deleted file mode 100644
--- a/rpython/translator/sandbox/_marshal.py
+++ /dev/null
@@ -1,695 +0,0 @@
-# Copy of lib_pypy/_marshal.py needed by sandlib
-"""Internal Python object serialization
-
-This module contains functions that can read and write Python values in a binary format. The format is specific to Python, but independent of machine architecture issues (e.g., you can write a Python value to a file on a PC, transport the file to a Sun, and read it back there). Details of the format may change between Python versions.
-"""
-
-# NOTE: This module is used in the Python3 interpreter, but also by
-# the "sandboxed" process.  It must work for Python2 as well.
-
-import types
-from _codecs import utf_8_decode, utf_8_encode
-
-try:
-    intern
-except NameError:
-    from sys import intern
-
-try: from __pypy__ import builtinify
-except ImportError: builtinify = lambda f: f
-
-
-TYPE_NULL     = '0'
-TYPE_NONE     = 'N'
-TYPE_FALSE    = 'F'
-TYPE_TRUE     = 'T'
-TYPE_STOPITER = 'S'
-TYPE_ELLIPSIS = '.'
-TYPE_INT      = 'i'
-TYPE_INT64    = 'I'
-TYPE_FLOAT    = 'f'
-TYPE_COMPLEX  = 'x'
-TYPE_LONG     = 'l'
-TYPE_STRING   = 's'
-TYPE_INTERNED = 't'
-TYPE_STRINGREF= 'R'
-TYPE_TUPLE    = '('
-TYPE_LIST     = '['
-TYPE_DICT     = '{'
-TYPE_CODE     = 'c'
-TYPE_UNICODE  = 'u'
-TYPE_UNKNOWN  = '?'
-TYPE_SET      = '<'
-TYPE_FROZENSET= '>'
-
-class _Marshaller:
-
-    dispatch = {}
-
-    def __init__(self, writefunc):
-        self._write = writefunc
-
-    def dump(self, x):
-        try:
-            self.dispatch[type(x)](self, x)
-        except KeyError:
-            for tp in type(x).mro():
-                func = self.dispatch.get(tp)
-                if func:
-                    break
-            else:
-                raise ValueError("unmarshallable object")
-            func(self, x)
-
-    def w_long64(self, x):
-        self.w_long(x)
-        self.w_long(x>>32)
-
-    def w_long(self, x):
-        a = chr(x & 0xff)
-        x >>= 8
-        b = chr(x & 0xff)
-        x >>= 8
-        c = chr(x & 0xff)
-        x >>= 8
-        d = chr(x & 0xff)
-        self._write(a + b + c + d)
-
-    def w_short(self, x):
-        self._write(chr((x)     & 0xff))
-        self._write(chr((x>> 8) & 0xff))
-
-    def dump_none(self, x):
-        self._write(TYPE_NONE)
-    dispatch[type(None)] = dump_none
-
-    def dump_bool(self, x):
-        if x:
-            self._write(TYPE_TRUE)
-        else:
-            self._write(TYPE_FALSE)
-    dispatch[bool] = dump_bool
-
-    def dump_stopiter(self, x):
-        if x is not StopIteration:
-            raise ValueError("unmarshallable object")
-        self._write(TYPE_STOPITER)
-    dispatch[type(StopIteration)] = dump_stopiter
-
-    def dump_ellipsis(self, x):
-        self._write(TYPE_ELLIPSIS)
-    
-    try:
-        dispatch[type(Ellipsis)] = dump_ellipsis
-    except NameError:
-        pass
-
-    # In Python3, this function is not used; see dump_long() below.
-    def dump_int(self, x):
-        y = x>>31
-        if y and y != -1:
-            self._write(TYPE_INT64)
-            self.w_long64(x)
-        else:
-            self._write(TYPE_INT)
-            self.w_long(x)
-    dispatch[int] = dump_int
-
-    def dump_long(self, x):
-        self._write(TYPE_LONG)
-        sign = 1
-        if x < 0:
-            sign = -1
-            x = -x
-        digits = []
-        while x:
-            digits.append(x & 0x7FFF)
-            x = x>>15
-        self.w_long(len(digits) * sign)
-        for d in digits:
-            self.w_short(d)
-    try:
-        long
-    except NameError:
-        dispatch[int] = dump_long
-    else:
-        dispatch[long] = dump_long
-
-    def dump_float(self, x):
-        write = self._write
-        write(TYPE_FLOAT)
-        s = repr(x)
-        write(chr(len(s)))
-        write(s)
-    dispatch[float] = dump_float
-
-    def dump_complex(self, x):
-        write = self._write
-        write(TYPE_COMPLEX)
-        s = repr(x.real)
-        write(chr(len(s)))
-        write(s)
-        s = repr(x.imag)
-        write(chr(len(s)))
-        write(s)
-    try:
-        dispatch[complex] = dump_complex
-    except NameError:
-        pass
-
-    def dump_string(self, x):
-        # XXX we can't check for interned strings, yet,
-        # so we (for now) never create TYPE_INTERNED or TYPE_STRINGREF
-        self._write(TYPE_STRING)
-        self.w_long(len(x))
-        self._write(x)
-    dispatch[bytes] = dump_string
-
-    def dump_unicode(self, x):
-        self._write(TYPE_UNICODE)
-        #s = x.encode('utf8')
-        s, len_s = utf_8_encode(x)
-        self.w_long(len_s)
-        self._write(s)
-    try:
-        unicode
-    except NameError:
-        dispatch[str] = dump_unicode
-    else:
-        dispatch[unicode] = dump_unicode
-
-    def dump_tuple(self, x):
-        self._write(TYPE_TUPLE)
-        self.w_long(len(x))
-        for item in x:
-            self.dump(item)
-    dispatch[tuple] = dump_tuple
-
-    def dump_list(self, x):
-        self._write(TYPE_LIST)
-        self.w_long(len(x))
-        for item in x:
-            self.dump(item)
-    dispatch[list] = dump_list
-
-    def dump_dict(self, x):
-        self._write(TYPE_DICT)
-        for key, value in x.items():
-            self.dump(key)
-            self.dump(value)
-        self._write(TYPE_NULL)
-    dispatch[dict] = dump_dict
-
-    def dump_code(self, x):
-        self._write(TYPE_CODE)
-        self.w_long(x.co_argcount)
-        self.w_long(x.co_nlocals)
-        self.w_long(x.co_stacksize)
-        self.w_long(x.co_flags)
-        self.dump(x.co_code)
-        self.dump(x.co_consts)
-        self.dump(x.co_names)
-        self.dump(x.co_varnames)
-        self.dump(x.co_freevars)
-        self.dump(x.co_cellvars)
-        self.dump(x.co_filename)
-        self.dump(x.co_name)
-        self.w_long(x.co_firstlineno)
-        self.dump(x.co_lnotab)
-    try:
-        dispatch[types.CodeType] = dump_code
-    except NameError:
-        pass
-
-    def dump_set(self, x):
-        self._write(TYPE_SET)
-        self.w_long(len(x))
-        for each in x:
-            self.dump(each)
-    try:
-        dispatch[set] = dump_set
-    except NameError:
-        pass
-
-    def dump_frozenset(self, x):
-        self._write(TYPE_FROZENSET)
-        self.w_long(len(x))
-        for each in x:
-            self.dump(each)
-    try:
-        dispatch[frozenset] = dump_frozenset
-    except NameError:
-        pass
-
-class _NULL:
-    pass
-
-class _StringBuffer:
-    def __init__(self, value):
-        self.bufstr = value
-        self.bufpos = 0
-
-    def read(self, n):
-        pos = self.bufpos
-        newpos = pos + n
-        ret = self.bufstr[pos : newpos]
-        self.bufpos = newpos
-        return ret
-
-
-class _Unmarshaller:
-
-    dispatch = {}
-
-    def __init__(self, readfunc):
-        self._read = readfunc
-        self._stringtable = []
-
-    def load(self):
-        c = self._read(1)
-        if not c:
-            raise EOFError
-        try:
-            return self.dispatch[c](self)
-        except KeyError:
-            raise ValueError("bad marshal code: %c (%d)" % (c, ord(c)))
-
-    def r_short(self):
-        lo = ord(self._read(1))
-        hi = ord(self._read(1))
-        x = lo | (hi<<8)
-        if x & 0x8000:
-            x = x - 0x10000
-        return x
-
-    def r_long(self):
-        s = self._read(4)
-        a = ord(s[0])
-        b = ord(s[1])
-        c = ord(s[2])
-        d = ord(s[3])
-        x = a | (b<<8) | (c<<16) | (d<<24)
-        if d & 0x80 and x > 0:
-            x = -((1<<32) - x)
-            return int(x)
-        else:
-            return x
-
-    def r_long64(self):
-        a = ord(self._read(1))
-        b = ord(self._read(1))
-        c = ord(self._read(1))
-        d = ord(self._read(1))
-        e = ord(self._read(1))
-        f = ord(self._read(1))
-        g = ord(self._read(1))
-        h = ord(self._read(1))
-        x = a | (b<<8) | (c<<16) | (d<<24)
-        x = x | (e<<32) | (f<<40) | (g<<48) | (h<<56)
-        if h & 0x80 and x > 0:
-            x = -((1<<64) - x)
-        return x
-
-    def load_null(self):
-        return _NULL
-    dispatch[TYPE_NULL] = load_null
-
-    def load_none(self):
-        return None
-    dispatch[TYPE_NONE] = load_none
-
-    def load_true(self):
-        return True
-    dispatch[TYPE_TRUE] = load_true
-
-    def load_false(self):
-        return False
-    dispatch[TYPE_FALSE] = load_false
-
-    def load_stopiter(self):
-        return StopIteration
-    dispatch[TYPE_STOPITER] = load_stopiter
-
-    def load_ellipsis(self):
-        return Ellipsis
-    dispatch[TYPE_ELLIPSIS] = load_ellipsis
-
-    dispatch[TYPE_INT] = r_long
-
-    dispatch[TYPE_INT64] = r_long64
-
-    def load_long(self):
-        size = self.r_long()
-        sign = 1
-        if size < 0:
-            sign = -1
-            size = -size
-        x = 0
-        for i in range(size):
-            d = self.r_short()
-            x = x | (d<<(i*15))
-        return x * sign
-    dispatch[TYPE_LONG] = load_long
-
-    def load_float(self):
-        n = ord(self._read(1))
-        s = self._read(n)
-        return float(s)
-    dispatch[TYPE_FLOAT] = load_float
-
-    def load_complex(self):
-        n = ord(self._read(1))
-        s = self._read(n)
-        real = float(s)
-        n = ord(self._read(1))
-        s = self._read(n)
-        imag = float(s)
-        return complex(real, imag)
-    dispatch[TYPE_COMPLEX] = load_complex
-
-    def load_string(self):
-        n = self.r_long()
-        return self._read(n)
-    dispatch[TYPE_STRING] = load_string
-
-    def load_interned(self):
-        n = self.r_long()
-        ret = intern(self._read(n))
-        self._stringtable.append(ret)
-        return ret
-    dispatch[TYPE_INTERNED] = load_interned
-
-    def load_stringref(self):
-        n = self.r_long()
-        return self._stringtable[n]
-    dispatch[TYPE_STRINGREF] = load_stringref
-
-    def load_unicode(self):
-        n = self.r_long()
-        s = self._read(n)
-        #ret = s.decode('utf8')
-        ret, len_ret = utf_8_decode(s)
-        return ret
-    dispatch[TYPE_UNICODE] = load_unicode
-
-    def load_tuple(self):
-        return tuple(self.load_list())
-    dispatch[TYPE_TUPLE] = load_tuple
-
-    def load_list(self):
-        n = self.r_long()
-        list = [self.load() for i in range(n)]
-        return list
-    dispatch[TYPE_LIST] = load_list
-
-    def load_dict(self):
-        d = {}
-        while 1:
-            key = self.load()
-            if key is _NULL:
-                break
-            value = self.load()
-            d[key] = value
-        return d
-    dispatch[TYPE_DICT] = load_dict
-
-    def load_code(self):
-        argcount = self.r_long()
-        nlocals = self.r_long()
-        stacksize = self.r_long()
-        flags = self.r_long()
-        code = self.load()
-        consts = self.load()
-        names = self.load()
-        varnames = self.load()
-        freevars = self.load()
-        cellvars = self.load()
-        filename = self.load()
-        name = self.load()
-        firstlineno = self.r_long()
-        lnotab = self.load()
-        return types.CodeType(argcount, nlocals, stacksize, flags, code, consts,
-                              names, varnames, filename, name, firstlineno,
-                              lnotab, freevars, cellvars)
-    dispatch[TYPE_CODE] = load_code
-
-    def load_set(self):
-        n = self.r_long()
-        args = [self.load() for i in range(n)]
-        return set(args)
-    dispatch[TYPE_SET] = load_set
-
-    def load_frozenset(self):
-        n = self.r_long()
-        args = [self.load() for i in range(n)]
-        return frozenset(args)
-    dispatch[TYPE_FROZENSET] = load_frozenset
-
-# ________________________________________________________________
-
-def _read(self, n):
-    pos = self.bufpos
-    newpos = pos + n
-    if newpos > len(self.bufstr): raise EOFError
-    ret = self.bufstr[pos : newpos]
-    self.bufpos = newpos
-    return ret
-
-def _read1(self):
-    ret = self.bufstr[self.bufpos]
-    self.bufpos += 1
-    return ret
-
-def _r_short(self):
-    lo = ord(_read1(self))
-    hi = ord(_read1(self))
-    x = lo | (hi<<8)
-    if x & 0x8000:
-        x = x - 0x10000
-    return x
-
-def _r_long(self):
-    # inlined this most common case
-    p = self.bufpos
-    s = self.bufstr
-    a = ord(s[p])
-    b = ord(s[p+1])
-    c = ord(s[p+2])
-    d = ord(s[p+3])
-    self.bufpos += 4
-    x = a | (b<<8) | (c<<16) | (d<<24)
-    if d & 0x80 and x > 0:
-        x = -((1<<32) - x)
-        return int(x)
-    else:
-        return x
-
-def _r_long64(self):
-    a = ord(_read1(self))
-    b = ord(_read1(self))
-    c = ord(_read1(self))
-    d = ord(_read1(self))
-    e = ord(_read1(self))
-    f = ord(_read1(self))
-    g = ord(_read1(self))
-    h = ord(_read1(self))
-    x = a | (b<<8) | (c<<16) | (d<<24)
-    x = x | (e<<32) | (f<<40) | (g<<48) | (h<<56)
-    if h & 0x80 and x > 0:
-        x = -((1<<64) - x)
-    return x
-
-_load_dispatch = {}
-
-class _FastUnmarshaller:
-
-    dispatch = {}
-
-    def __init__(self, buffer):
-        self.bufstr = buffer
-        self.bufpos = 0
-        self._stringtable = []
-
-    def load(self):
-        # make flow space happy
-        c = '?'
-        try:
-            c = self.bufstr[self.bufpos]
-            self.bufpos += 1
-            return _load_dispatch[c](self)
-        except KeyError:
-            raise ValueError("bad marshal code: %c (%d)" % (c, ord(c)))
-        except IndexError:
-            raise EOFError
-
-    def load_null(self):
-        return _NULL
-    dispatch[TYPE_NULL] = load_null
-
-    def load_none(self):
-        return None
-    dispatch[TYPE_NONE] = load_none
-
-    def load_true(self):
-        return True
-    dispatch[TYPE_TRUE] = load_true
-
-    def load_false(self):
-        return False
-    dispatch[TYPE_FALSE] = load_false
-
-    def load_stopiter(self):
-        return StopIteration
-    dispatch[TYPE_STOPITER] = load_stopiter
-
-    def load_ellipsis(self):
-        return Ellipsis
-    dispatch[TYPE_ELLIPSIS] = load_ellipsis
-
-    def load_int(self):
-        return _r_long(self)
-    dispatch[TYPE_INT] = load_int
-
-    def load_int64(self):
-        return _r_long64(self)
-    dispatch[TYPE_INT64] = load_int64
-
-    def load_long(self):
-        size = _r_long(self)
-        sign = 1
-        if size < 0:
-            sign = -1
-            size = -size
-        x = 0
-        for i in range(size):
-            d = _r_short(self)
-            x = x | (d<<(i*15))
-        return x * sign
-    dispatch[TYPE_LONG] = load_long
-
-    def load_float(self):
-        n = ord(_read1(self))
-        s = _read(self, n)
-        return float(s)
-    dispatch[TYPE_FLOAT] = load_float
-
-    def load_complex(self):
-        n = ord(_read1(self))
-        s = _read(self, n)
-        real = float(s)
-        n = ord(_read1(self))
-        s = _read(self, n)
-        imag = float(s)
-        return complex(real, imag)
-    dispatch[TYPE_COMPLEX] = load_complex
-
-    def load_string(self):
-        n = _r_long(self)
-        return _read(self, n)
-    dispatch[TYPE_STRING] = load_string
-
-    def load_interned(self):
-        n = _r_long(self)
-        ret = intern(_read(self, n))
-        self._stringtable.append(ret)
-        return ret
-    dispatch[TYPE_INTERNED] = load_interned
-
-    def load_stringref(self):
-        n = _r_long(self)
-        return self._stringtable[n]
-    dispatch[TYPE_STRINGREF] = load_stringref
-
-    def load_unicode(self):
-        n = _r_long(self)
-        s = _read(self, n)
-        ret = s.decode('utf8')
-        return ret
-    dispatch[TYPE_UNICODE] = load_unicode
-
-    def load_tuple(self):
-        return tuple(self.load_list())
-    dispatch[TYPE_TUPLE] = load_tuple
-
-    def load_list(self):
-        n = _r_long(self)
-        list = []
-        for i in range(n):
-            list.append(self.load())
-        return list
-    dispatch[TYPE_LIST] = load_list
-
-    def load_dict(self):
-        d = {}
-        while 1:
-            key = self.load()
-            if key is _NULL:
-                break
-            value = self.load()
-            d[key] = value
-        return d
-    dispatch[TYPE_DICT] = load_dict
-
-    def load_code(self):
-        argcount = _r_long(self)
-        nlocals = _r_long(self)
-        stacksize = _r_long(self)
-        flags = _r_long(self)
-        code = self.load()
-        consts = self.load()
-        names = self.load()
-        varnames = self.load()
-        freevars = self.load()
-        cellvars = self.load()
-        filename = self.load()
-        name = self.load()
-        firstlineno = _r_long(self)
-        lnotab = self.load()
-        return types.CodeType(argcount, nlocals, stacksize, flags, code, consts,
-                              names, varnames, filename, name, firstlineno,
-                              lnotab, freevars, cellvars)
-    dispatch[TYPE_CODE] = load_code
-
-    def load_set(self):
-        n = _r_long(self)
-        args = [self.load() for i in range(n)]
-        return set(args)
-    dispatch[TYPE_SET] = load_set
-
-    def load_frozenset(self):
-        n = _r_long(self)
-        args = [self.load() for i in range(n)]
-        return frozenset(args)
-    dispatch[TYPE_FROZENSET] = load_frozenset
-
-_load_dispatch = _FastUnmarshaller.dispatch
-
-# _________________________________________________________________
-#
-# user interface
-
-version = 1
-
- at builtinify
-def dump(x, f, version=version):
-    # XXX 'version' is ignored, we always dump in a version-0-compatible format
-    m = _Marshaller(f.write)
-    m.dump(x)
-
- at builtinify
-def load(f):
-    um = _Unmarshaller(f.read)
-    return um.load()
-
- at builtinify
-def dumps(x, version=version):
-    # XXX 'version' is ignored, we always dump in a version-0-compatible format
-    buffer = []
-    m = _Marshaller(buffer.append)
-    m.dump(x)
-    return ''.join(buffer)
-
- at builtinify
-def loads(s):
-    um = _FastUnmarshaller(s)
-    return um.load()
diff --git a/rpython/translator/sandbox/rsandbox.py b/rpython/translator/sandbox/rsandbox.py
--- a/rpython/translator/sandbox/rsandbox.py
+++ b/rpython/translator/sandbox/rsandbox.py
@@ -5,16 +5,17 @@
 """
 import py
 
-from rpython.rlib import rmarshal, types
+from rpython.rlib import types
+from rpython.rlib.objectmodel import specialize
 from rpython.rlib.signature import signature
+from rpython.rlib.unroll import unrolling_iterable
 
 # ____________________________________________________________
 #
 # Sandboxing code generator for external functions
 #
 
-from rpython.rlib import rposix
-from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper.llannotation import lltype_to_annotation
 from rpython.rtyper.annlowlevel import MixLevelHelperAnnotator
 from rpython.tool.ansi_print import AnsiLogger
@@ -22,71 +23,6 @@
 log = AnsiLogger("sandbox")
 
 
-# a version of os.read() and os.write() that are not mangled
-# by the sandboxing mechanism
-ll_read_not_sandboxed = rposix.external('read',
-                                        [rffi.INT, rffi.CCHARP, rffi.SIZE_T],
-                                        rffi.SIZE_T,
-                                        sandboxsafe=True,
-                                        _nowrapper=True)
-
-ll_write_not_sandboxed = rposix.external('write',
-                                         [rffi.INT, rffi.CCHARP, rffi.SIZE_T],
-                                         rffi.SIZE_T,
-                                         sandboxsafe=True,
-                                         _nowrapper=True)
-
-
- at signature(types.int(), types.ptr(rffi.CCHARP.TO), types.int(),
-    returns=types.none())
-def writeall_not_sandboxed(fd, buf, length):
-    fd = rffi.cast(rffi.INT, fd)
-    while length > 0:
-        size = rffi.cast(rffi.SIZE_T, length)
-        count = rffi.cast(lltype.Signed, ll_write_not_sandboxed(fd, buf, size))
-        if count <= 0:
-            raise IOError
-        length -= count
-        buf = lltype.direct_ptradd(lltype.direct_arrayitems(buf), count)
-        buf = rffi.cast(rffi.CCHARP, buf)
-
-
-class FdLoader(rmarshal.Loader):
-    def __init__(self, fd):
-        rmarshal.Loader.__init__(self, "")
-        self.fd = fd
-        self.buflen = 4096
-
-    def need_more_data(self):
-        buflen = self.buflen
-        with lltype.scoped_alloc(rffi.CCHARP.TO, buflen) as buf:
-            buflen = rffi.cast(rffi.SIZE_T, buflen)
-            fd = rffi.cast(rffi.INT, self.fd)
-            count = ll_read_not_sandboxed(fd, buf, buflen)
-            count = rffi.cast(lltype.Signed, count)
-            if count <= 0:
-                raise IOError
-            self.buf += ''.join([buf[i] for i in range(count)])
-            self.buflen *= 2
-
-def sandboxed_io(buf):
-    STDIN = 0
-    STDOUT = 1
-    # send the buffer with the marshalled fnname and input arguments to STDOUT
-    with lltype.scoped_alloc(rffi.CCHARP.TO, len(buf)) as p:
-        for i in range(len(buf)):
-            p[i] = buf[i]
-        writeall_not_sandboxed(STDOUT, p, len(buf))
-    # build a Loader that will get the answer from STDIN
-    loader = FdLoader(STDIN)
-    # check for errors
-    error = load_int(loader)
-    if error != 0:
-        reraise_error(error, loader)
-    else:
-        # no exception; the caller will decode the actual result
-        return loader
-
 def reraise_error(error, loader):
     if error == 1:
         raise OSError(load_int(loader), "external error")
@@ -108,21 +44,51 @@
         raise RuntimeError
 
 
- at signature(types.str(), returns=types.impossible())
-def not_implemented_stub(msg):
-    STDERR = 2
-    with rffi.scoped_str2charp(msg + '\n') as buf:
-        writeall_not_sandboxed(STDERR, buf, len(msg) + 1)
-    raise RuntimeError(msg)  # XXX in RPython, the msg is ignored
+def getkind(TYPE, parent_function):
+    if TYPE is lltype.Void:
+        return 'v'
+    elif isinstance(TYPE, lltype.Primitive):
+        if TYPE is lltype.Float or TYPE is lltype.SingleFloat:
+            return 'f'
+        if TYPE is lltype.LongFloat:
+            log.WARNING("%r uses a 'long double' argument or return value; "
+                        "sandboxing will export it only as 'double'" %
+                        (parent_function,))
+            return 'f'
+        if TYPE == llmemory.Address:
+            return 'p'
+        return 'i'
+    elif isinstance(TYPE, lltype.Ptr):
+        return 'p'
+    else:
+        log.WARNING("%r: sandboxing does not support argument "
+                    "or return type %r" % (parent_function, TYPE))
+        return 'v'
 
-def make_stub(fnname, msg):
-    """Build always-raising stub function to replace unsupported external."""
-    log.WARNING(msg)
 
-    def execute(*args):
-        not_implemented_stub(msg)
-    execute.__name__ = 'sandboxed_%s' % (fnname,)
-    return execute
+eci = rffi.ExternalCompilationInfo(separate_module_sources=[
+            py.path.local(__file__).join('..', 'src', 'rsandbox.c').read(),
+        ],
+        post_include_bits=[
+            py.path.local(__file__).join('..', 'src', 'rsandbox.h').read(),
+        ])
+def external(funcname, ARGS, RESULT):
+    return rffi.llexternal(funcname, ARGS, RESULT,
+                           compilation_info=eci, sandboxsafe=True,
+                           _nowrapper=True)
+
+rpy_sandbox_arg = {
+    'i': external('rpy_sandbox_arg_i', [lltype.UnsignedLongLong], lltype.Void),
+    'f': external('rpy_sandbox_arg_f', [lltype.Float],            lltype.Void),
+    'p': external('rpy_sandbox_arg_p', [llmemory.Address],        lltype.Void),
+}
+rpy_sandbox_res = {
+    'v': external('rpy_sandbox_res_v', [rffi.CCHARP], lltype.Void),
+    'i': external('rpy_sandbox_res_i', [rffi.CCHARP], lltype.UnsignedLongLong),
+    'f': external('rpy_sandbox_res_f', [rffi.CCHARP], lltype.Float),
+    'p': external('rpy_sandbox_res_p', [rffi.CCHARP], llmemory.Address),
+}
+
 
 def sig_ll(fnobj):
     FUNCTYPE = lltype.typeOf(fnobj)
@@ -130,47 +96,48 @@
     s_result = lltype_to_annotation(FUNCTYPE.RESULT)
     return args_s, s_result
 
-dump_string = rmarshal.get_marshaller(str)
-load_int = rmarshal.get_loader(int)
-
 def get_sandbox_stub(fnobj, rtyper):
     fnname = fnobj._name
+    FUNCTYPE = lltype.typeOf(fnobj)
+    arg_kinds = [getkind(ARG, fnname) for ARG in FUNCTYPE.ARGS]
+    result_kind = getkind(FUNCTYPE.RESULT, fnname)
+
+    unroll_args = unrolling_iterable([
+        (arg_kind, rpy_sandbox_arg[arg_kind],
+         lltype.typeOf(rpy_sandbox_arg[arg_kind]).TO.ARGS[0])
+        for arg_kind in arg_kinds])
+
+    result_func = rpy_sandbox_res[result_kind]
+    RESTYPE = FUNCTYPE.RESULT
+
+    name_and_sig = '%s(%s)%s' % (fnname, ''.join(arg_kinds), result_kind)
+    log(name_and_sig)
+    name_and_sig = rffi.str2charp(name_and_sig, track_allocation=False)
+
+    def execute(*args):
+        #
+        # serialize the arguments
+        i = 0
+        for arg_kind, func, ARGTYPE in unroll_args:
+            if arg_kind == 'v':
+                continue
+            func(rffi.cast(ARGTYPE, args[i]))
+            i = i + 1
+        #
+        # send the function name and the arguments and wait for an answer
+        result = result_func(name_and_sig)
+        #
+        # result the answer, if any
+        if RESTYPE is not lltype.Void:
+            return rffi.cast(RESTYPE, result)
+    execute.__name__ = 'sandboxed_%s' % (fnname,)
+    #
     args_s, s_result = sig_ll(fnobj)
-    msg = "Not implemented: sandboxing for external function '%s'" % (fnname,)
-    execute = make_stub(fnname, msg)
     return _annotate(rtyper, execute, args_s, s_result)
 
-def make_sandbox_trampoline(fnname, args_s, s_result):
-    """Create a trampoline function with the specified signature.
-
-    The trampoline is meant to be used in place of real calls to the external
-    function named 'fnname'.  It marshals its input arguments, dumps them to
-    STDOUT, and waits for an answer on STDIN.
-    """
-    try:
-        dump_arguments = rmarshal.get_marshaller(tuple(args_s))
-        load_result = rmarshal.get_loader(s_result)
-    except (rmarshal.CannotMarshal, rmarshal.CannotUnmarshall) as e:
-        msg = "Cannot sandbox function '%s': %s" % (fnname, e)
-        execute = make_stub(fnname, msg)
-    else:
-        def execute(*args):
-            # marshal the function name and input arguments
-            buf = []
-            dump_string(buf, fnname)
-            dump_arguments(buf, args)
-            # send the buffer and wait for the answer
-            loader = sandboxed_io(buf)
-            # decode the answer
-            result = load_result(loader)
-            loader.check_finished()
-            return result
-        execute.__name__ = 'sandboxed_%s' % (fnname,)
-    return execute
-
-
 def _annotate(rtyper, f, args_s, s_result):
     ann = MixLevelHelperAnnotator(rtyper)
     graph = ann.getgraph(f, args_s, s_result)
     ann.finish()
+    ann.backend_optimize()
     return graph
diff --git a/rpython/translator/sandbox/src/rsandbox.c b/rpython/translator/sandbox/src/rsandbox.c
new file mode 100644
--- /dev/null
+++ b/rpython/translator/sandbox/src/rsandbox.c
@@ -0,0 +1,176 @@
+#include <stdlib.h>
+#include <string.h>
+
+
+#define RPY_SANDBOX_ARGBUF    512
+#define RPY_SANDBOX_NAMEMAX   256
+
+#define RPY_FD_STDIN          0
+#define RPY_FD_STDOUT         1
+
+static char sand_argbuf[RPY_SANDBOX_ARGBUF];
+static size_t sand_nextarg = RPY_SANDBOX_NAMEMAX;
+
+
+static void sand_writeall(const char *buf, size_t count)
+{
+    while (count > 0) {
+        ssize_t result = write(RPY_FD_STDOUT, buf, count);
+        if (result <= 0) {
+            if (result == 0) {
+                fprintf(stderr, "sandbox: write(stdout) gives the result 0, "
+                                "which is not expected\n");
+            }
+            else {
+                perror("sandbox: write(stdout)");
+            }
+            abort();
+        }
+        if (result > count) {
+            fprintf(stderr, "sandbox: write(stdout) wrote more data than "
+                            "request, which is not expected\n");
+            abort();
+        }
+        buf += result;
+        count -= result;
+    }
+}
+
+static void sand_readall(char *buf, size_t count)
+{
+    while (count > 0) {
+        ssize_t result = read(RPY_FD_STDIN, buf, count);
+        if (result <= 0) {
+            if (result == 0) {
+                fprintf(stderr, "sandbox: stdin was closed\n");
+            }
+            else {
+                perror("sandbox: read(stdin)");
+            }
+            abort();
+        }
+        if (result > count) {
+            fprintf(stderr, "sandbox: read(stdin) returned more data than "
+                            "expected\n");
+            abort();
+        }
+        buf += result;
+        count -= result;
+    }
+}
+
+
+static char *sand_arg_output(size_t size)
+{
+    char *p = sand_argbuf + sand_nextarg;
+    sand_nextarg += size;
+    if (sand_nextarg > RPY_SANDBOX_ARGBUF) {
+        fprintf(stderr,
+                "sandbox: argument buffer overflow (RPY_SANDBOX_ARGBUF)\n");
+        abort();
+    }
+    return p;
+}
+
+void rpy_sandbox_arg_i(unsigned long long i)
+{
+    *(unsigned long long *)sand_arg_output(sizeof(unsigned long long)) = i;
+}
+
+void rpy_sandbox_arg_f(double f)
+{
+    *(double *)sand_arg_output(sizeof(double)) = f;
+}
+
+void rpy_sandbox_arg_p(void *p)
+{
+    *(void **)sand_arg_output(sizeof(void *)) = p;
+}
+
+struct sand_data_s {
+    void *data;
+    size_t size;
+};
+
+static void sand_interact(const char *name_and_sig, char expected_result,
+                          void *result, size_t result_size)
+{
+    size_t name_len = strlen(name_and_sig);
+    assert(name_len > 0);
+    if (name_len > RPY_SANDBOX_NAMEMAX - 1) {
+        fprintf(stderr,
+             "sandbox: function name buffer overflow (RPY_SANDBOX_NAMEMAX)\n");
+        abort();
+    }
+    char *p = sand_argbuf + RPY_SANDBOX_NAMEMAX - name_len - 1;
+    *p = name_len;
+    memcpy(p + 1, name_and_sig, name_len);
+
+    assert(sand_nextarg >= RPY_SANDBOX_NAMEMAX);
+    assert(sand_nextarg <= RPY_SANDBOX_ARGBUF);
+
+    sand_writeall(p, sand_nextarg - (p - sand_argbuf));
+    sand_nextarg = RPY_SANDBOX_NAMEMAX;
+
+    while (1) {
+        struct sand_data_s data_hdr;
+        char command = 0;
+        sand_readall(&command, 1);
+        switch (command) {
+
+            case 'v':
+            case 'i':
+            case 'f':
+            case 'p':
+                if (expected_result != command) {
+                    fprintf(stderr, "sandbox: %s: waiting for a result of type "
+                                    "%c but got %c instead\n", name_and_sig,
+                                    expected_result, command);
+                    abort();
+                }
+                sand_readall((char *)result, result_size);
+                return;
+
+            case 'R':
+                sand_readall((char *)&data_hdr, sizeof(data_hdr));
+                sand_writeall(data_hdr.data, data_hdr.size);
+                break;
+
+            case 'W':
+                sand_readall((char *)&data_hdr, sizeof(data_hdr));
+                sand_readall(data_hdr.data, data_hdr.size);
+                break;
+
+            default:
+                fprintf(stderr, "sandbox: protocol error: unexpected byte %d\n",
+                        (int)command);
+                abort();
+        }
+    }
+}
+
+void rpy_sandbox_res_v(const char *name_and_sig)
+{
+    sand_interact(name_and_sig, 'v', NULL, 0);
+}
+
+unsigned long long rpy_sandbox_res_i(const char *name_and_sig)
+{
+    unsigned long long result;
+    sand_interact(name_and_sig, 'i', &result, sizeof(result));
+    return result;
+}
+
+double rpy_sandbox_res_f(const char *name_and_sig)
+{
+    double result;
+    sand_interact(name_and_sig, 'f', &result, sizeof(result));
+    return result;
+}
+
+void *rpy_sandbox_res_p(const char *name_and_sig)
+{
+    void *result;
+    sand_interact(name_and_sig, 'p', &result, sizeof(result));
+    return result;
+}
diff --git a/rpython/translator/sandbox/src/rsandbox.h b/rpython/translator/sandbox/src/rsandbox.h
new file mode 100644
--- /dev/null
+++ b/rpython/translator/sandbox/src/rsandbox.h
@@ -0,0 +1,9 @@
+
+void rpy_sandbox_arg_i(unsigned long long i);
+void rpy_sandbox_arg_f(double f);
+void rpy_sandbox_arg_p(void *p);
+
+void rpy_sandbox_res_v(const char *name_and_sig);
+unsigned long long rpy_sandbox_res_i(const char *name_and_sig);
+double rpy_sandbox_res_f(const char *name_and_sig);
+void *rpy_sandbox_res_p(const char *name_and_sig);


More information about the pypy-commit mailing list