[pypy-svn] r45202 - in pypy/dist/pypy: annotation rpython/module translator/c translator/c/test

Thu Jul 19 13:56:44 CEST 2007

Author: arigo
Date: Thu Jul 19 13:56:43 2007
New Revision: 45202

Modified:
   pypy/dist/pypy/annotation/builtin.py
   pypy/dist/pypy/rpython/module/ll_os.py
   pypy/dist/pypy/translator/c/sandbox.py
   pypy/dist/pypy/translator/c/sandboxmsg.py
   pypy/dist/pypy/translator/c/test/test_sandbox.py
Log:
Add hooks for ll external functions to specify how to marshal/unmarshal
their arguments and return value for the purpose of sandboxing.

Fight with annotation and rtyping ordering issues by throwing piles of
hack at it :-(


Modified: pypy/dist/pypy/annotation/builtin.py
==============================================================================

--- pypy/dist/pypy/annotation/builtin.py	(original)
+++ pypy/dist/pypy/annotation/builtin.py	Thu Jul 19 13:56:43 2007
@@ -65,8 +65,11 @@
         if step == 0:
             raise Exception, "range() with step zero"
         if s_start.is_constant() and s_stop.is_constant():
-            if len(xrange(s_start.const, s_stop.const, step)) == 0:
-                empty = True
+            try:
+                if len(xrange(s_start.const, s_stop.const, step)) == 0:
+                    empty = True
+            except TypeError:   # if one of the .const is a Symbolic
+                pass
     if empty:
         s_item = s_ImpossibleValue
     else:

Modified: pypy/dist/pypy/rpython/module/ll_os.py
==============================================================================
--- pypy/dist/pypy/rpython/module/ll_os.py	(original)
+++ pypy/dist/pypy/rpython/module/ll_os.py	Thu Jul 19 13:56:43 2007
@@ -167,6 +167,20 @@
 register_external(os.read, [int, int], str, "ll_os.ll_os_read",
                   llimpl=os_read_lltypeimpl, oofakeimpl=os_read_oofakeimpl)
 
+# '--sandbox' support
+def os_read_marshal_input(msg, fd, buf, size):
+    msg.packnum(rffi.cast(lltype.Signed, fd))
+    msg.packsize_t(size)
+def os_read_unmarshal_output(msg, fd, buf, size):
+    data = msg.nextstring()
+    if len(data) > rffi.cast(lltype.Signed, size):
+        raise OverflowError
+    for i in range(len(data)):
+        buf[i] = data[i]
+    return rffi.cast(rffi.SIZE_T, len(data))
+os_read._obj._marshal_input = os_read_marshal_input
+os_read._obj._unmarshal_output = os_read_unmarshal_output
+
 # ------------------------------- os.write ------------------------------
 
 os_write = rffi.llexternal('write', [rffi.INT, rffi.VOIDP, rffi.SIZE_T],
@@ -193,6 +207,12 @@
 register_external(os.write, [int, str], int, "ll_os.ll_os_write",
                   llimpl=os_write_lltypeimpl, oofakeimpl=os_write_oofakeimpl)
 
+# '--sandbox' support
+def os_write_marshal_input(msg, fd, buf, size):
+    msg.packnum(rffi.cast(lltype.Signed, fd))
+    msg.packbuf(buf, 0, rffi.cast(lltype.Signed, size))
+os_write._obj._marshal_input = os_write_marshal_input
+
 # ------------------------------- os.close ------------------------------
 
 os_close = rffi.llexternal('close', [rffi.INT], rffi.INT)

Modified: pypy/dist/pypy/translator/c/sandbox.py
==============================================================================
--- pypy/dist/pypy/translator/c/sandbox.py	(original)
+++ pypy/dist/pypy/translator/c/sandbox.py	Thu Jul 19 13:56:43 2007
@@ -13,6 +13,7 @@
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.annotation import model as annmodel
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.objectmodel import CDefinedIntSymbolic
 from pypy.translator.c import funcgen
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rpython.annlowlevel import MixLevelHelperAnnotator
@@ -70,68 +71,113 @@
         c0 -= 0x100
     return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
 
-
-def get_external_function_sandbox_graph(fnobj, db):
-    """Build the graph of a helper trampoline function to be used
-    in place of real calls to the external function 'fnobj'.  The
-    trampoline marshals its input arguments, dumps them to STDOUT,
-    and waits for an answer on STDIN.
-    """
-    # XXX for now, only supports function with int and string arguments
-    # and returning an int.
-    FUNCTYPE = lltype.typeOf(fnobj)
+def build_default_marshal_input(FUNCTYPE, namehint, cache={}):
+    # return a default 'marshal_input' function
+    try:
+        return cache[FUNCTYPE]
+    except KeyError:
+        pass
     unroll_args = []
     for i, ARG in enumerate(FUNCTYPE.ARGS):
         if ARG == rffi.INT:       # 'int' argument
             methodname = "packnum"
+        elif ARG == rffi.SIZE_T:  # 'size_t' argument
+            methodname = "packsize_t"
         elif ARG == rffi.CCHARP:  # 'char*' argument, assumed zero-terminated
             methodname = "packccharp"
         else:
             raise NotImplementedError("external function %r argument type %s" %
-                                      (fnobj, ARG))
+                                      (namehint, ARG))
         unroll_args.append((i, methodname))
-    if FUNCTYPE.RESULT != rffi.INT:
-        raise NotImplementedError("exernal function %r return type %s" % (
-            fnobj, FUNCTYPE.RESULT))
     unroll_args = unrolling_iterable(unroll_args)
-    fnname = fnobj._name
 
-    def execute(*args):
-        STDIN = 0
-        STDOUT = 1
+    def marshal_input(msg, *args):
         assert len(args) == len(FUNCTYPE.ARGS)
-        # marshal the input arguments
-        msg = MessageBuilder()
-        msg.packstring(fnname)
         for index, methodname in unroll_args:
             getattr(msg, methodname)(args[index])
-        buf = msg.as_rffi_buf()
-        try:
-            writeall_not_sandboxed(STDOUT, buf, msg.getlength())
-        finally:
-            lltype.free(buf, flavor='raw')
 
-        # wait for the answer
-        buf = readall_not_sandboxed(STDIN, 4)
-        try:
-            length = buf2num(buf)
-        finally:
-            lltype.free(buf, flavor='raw')
+    cache[FUNCTYPE] = marshal_input
+    return marshal_input
 
-        length -= 4     # the original length includes the header
-        if length < 0:
-            raise IOError
-        buf = readall_not_sandboxed(STDIN, length)
+def unmarshal_int(msg, *args):    return msg.nextnum()
+def unmarshal_size_t(msg, *args): return msg.nextsize_t()
+def unmarshal_void(msg, *args):   pass
+
+def build_default_unmarshal_output(FUNCTYPE, namehint,
+                                   cache={rffi.INT   : unmarshal_int,
+                                          rffi.SIZE_T: unmarshal_size_t,
+                                          lltype.Void: unmarshal_void}):
+    try:
+        return cache[FUNCTYPE.RESULT]
+    except KeyError:
+        raise NotImplementedError("exernal function %r return type %s" % (
+            namehint, FUNCTYPE.RESULT))
+
+CFalse = CDefinedIntSymbolic('0')    # hack hack
+
+def sandboxed_io(msg):
+    STDIN = 0
+    STDOUT = 1
+    buf = msg.as_rffi_buf()
+    if CFalse:  # hack hack to force a method to be properly annotated/rtyped
+        msg.packstring(chr(CFalse) + chr(CFalse))
+        msg.packsize_t(rffi.cast(rffi.SIZE_T, CFalse))
+        msg.packbuf(buf, CFalse * 5, CFalse * 6)
+    try:
+        writeall_not_sandboxed(STDOUT, buf, msg.getlength())
+    finally:
+        lltype.free(buf, flavor='raw')
+    # wait for the answer
+    buf = readall_not_sandboxed(STDIN, 4)
+    try:
+        length = buf2num(buf)
+    finally:
+        lltype.free(buf, flavor='raw')
+    length -= 4     # the original length includes the header
+    if length < 0:
+        raise IOError
+    buf = readall_not_sandboxed(STDIN, length)
+    msg = LLMessage(buf, 0, length)
+    if CFalse:  # hack hack to force a method to be properly annotated/rtyped
+        msg.nextstring()
+        msg.nextsize_t()
+    return msg
+
+def get_external_function_sandbox_graph(fnobj, db):
+    """Build the graph of a helper trampoline function to be used
+    in place of real calls to the external function 'fnobj'.  The
+    trampoline marshals its input arguments, dumps them to STDOUT,
+    and waits for an answer on STDIN.
+    """
+    # XXX for now, only supports function with int and string arguments
+    # and returning an int or void.  Other cases need a custom
+    # _marshal_input and/or _unmarshal_output function on fnobj.
+    FUNCTYPE = lltype.typeOf(fnobj)
+    fnname = fnobj._name
+    if hasattr(fnobj, '_marshal_input'):
+        marshal_input = fnobj._marshal_input
+    else:
+        marshal_input = build_default_marshal_input(FUNCTYPE, fnname)
+    if hasattr(fnobj, '_unmarshal_output'):
+        unmarshal_output = fnobj._unmarshal_output
+    else:
+        unmarshal_output = build_default_unmarshal_output(FUNCTYPE, fnname)
+
+    def execute(*args):
+        # marshal the input arguments
+        msg = MessageBuilder()
+        msg.packstring(fnname)
+        marshal_input(msg, *args)
+        # send the buffer and wait for the answer
+        msg = sandboxed_io(msg)
         try:
             # decode the answer
-            msg = LLMessage(buf, 0, length)
             errcode = msg.nextnum()
             if errcode != 0:
                 raise IOError
-            result = msg.nextnum()
+            result = unmarshal_output(msg, *args)
         finally:
-            lltype.free(buf, flavor='raw')
-
+            lltype.free(msg.value, flavor='raw')
         return result
     execute = func_with_new_name(execute, 'sandboxed_' + fnname)
 

Modified: pypy/dist/pypy/translator/c/sandboxmsg.py
==============================================================================
--- pypy/dist/pypy/translator/c/sandboxmsg.py	(original)
+++ pypy/dist/pypy/translator/c/sandboxmsg.py	Thu Jul 19 13:56:43 2007
@@ -2,7 +2,7 @@
 import struct
 import select
 
-from pypy.annotation import policy, model as annmodel
+from pypy.rpython.lltypesystem import rffi, lltype
 
 # ____________________________________________________________
 #
@@ -14,28 +14,43 @@
         self.value = ['\xFF', '\xFF', '\xFF', '\xFF']
 
     def packstring(self, s):
-        self.packnum(len(s), "s")
+        self.value.append("s")
+        self._pack4(len(s))
         self.value += s
         return self
-    packstring._annenforceargs_ = policy.Sig(None, str)
 
     def packccharp(self, p):
         length = 0
         while p[length] != '\x00':
             length += 1
-        self.packnum(length, "s")
+        self.value.append("s")
+        self._pack4(length)
         for i in range(length):
             self.value.append(p[i])
         return self
 
-    def packnum(self, n, prefix="i"):
-        self.value.append(prefix)
+    def packbuf(self, buf, start, stop):
+        self.value.append("s")
+        self._pack4(stop - start)
+        for i in range(start, stop):
+            self.value.append(buf[i])
+        return self
+
+    def _pack4(self, n):
         self.value.append(chr((n >> 24) & 0xFF))
         self.value.append(chr((n >> 16) & 0xFF))
         self.value.append(chr((n >>  8) & 0xFF))
         self.value.append(chr((n      ) & 0xFF))
+
+    def packnum(self, n):
+        self.value.append("i")
+        self._pack4(n)
+        return self
+
+    def packsize_t(self, n):
+        self.value.append("I")
+        self._pack4(rffi.cast(lltype.Signed, n))
         return self
-    packnum._annenforceargs_ = policy.Sig(None, int, annmodel.SomeChar())
 
     def _fixlength(self):
         n = len(self.value)
@@ -65,6 +80,7 @@
 class LLMessage(object):
     def __init__(self, value, start, stop):
         self.value = value
+        assert 0 <= start <= stop
         self.pos = start
         self.stop = stop
 
@@ -76,7 +92,12 @@
         return self.value[i]
 
     def nextstring(self):
-        length = self.nextnum("s")
+        t = self._char()
+        if t != "s":
+            raise ValueError
+        length = self._next4()
+        if length < 0:
+            raise ValueError
         i = self.pos
         self.pos = i + length
         if self.pos > self.stop:
@@ -85,10 +106,19 @@
         # not sliceable.  See also the Message subclass.
         return ''.join([self.value[index] for index in range(i, self.pos)])
 
-    def nextnum(self, prefix="i"):
+    def nextnum(self):
+        t = self._char()
+        if t != "i":
+            raise ValueError
+        return self._next4()
+
+    def nextsize_t(self):
         t = self._char()
-        if t != prefix:
+        if t != "I":
             raise ValueError
+        return rffi.cast(rffi.SIZE_T, self._next4_unsigned())
+
+    def _next4(self):
         c0 = ord(self._char())
         c1 = ord(self._char())
         c2 = ord(self._char())
@@ -97,6 +127,13 @@
             c0 -= 0x100
         return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
 
+    def _next4_unsigned(self):
+        c0 = ord(self._char())
+        c1 = ord(self._char())
+        c2 = ord(self._char())
+        c3 = ord(self._char())
+        return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
+
     def end(self):
         return self.pos >= self.stop
 
@@ -109,7 +146,10 @@
         LLMessage.__init__(self, buf, start=0, stop=len(buf))
 
     def nextstring(self):
-        length = self.nextnum("s")
+        t = self._char()
+        if t != "s":
+            raise ValueError
+        length = self._next4()
         i = self.pos
         self.pos = i + length
         if self.pos > self.stop:

Modified: pypy/dist/pypy/translator/c/test/test_sandbox.py
==============================================================================
--- pypy/dist/pypy/translator/c/test/test_sandbox.py	(original)
+++ pypy/dist/pypy/translator/c/test/test_sandbox.py	Thu Jul 19 13:56:43 2007
@@ -65,3 +65,67 @@
     tail = f.read()
     f.close()
     assert tail == ""
+
+def test_sandbox_2():
+    def entry_point(argv):
+        fd = os.open("/tmp/foobar", os.O_RDONLY, 0777)
+        assert fd == 77
+        res = os.read(fd, 123)
+        assert res == "he\x00llo"
+        count = os.write(fd, "world\x00!\x00")
+        assert count == 42
+        os.close(fd)
+        return 0
+
+    t = Translation(entry_point, backend='c', standalone=True, sandbox=True)
+    exe = t.compile()
+    g, f = os.popen2(exe, "t", 0)
+
+    msg = read_message(f, timeout=10.0)
+    m1 = msg.nextstring()
+    assert m1 == "open"
+    m2 = msg.nextstring()
+    assert m2 == "/tmp/foobar"
+    m3 = msg.nextnum()
+    assert m3 == os.O_RDONLY
+    m4 = msg.nextnum()
+    assert m4 == 0777
+    assert msg.end()
+
+    g.write(MessageBuilder().packnum(0).packnum(77).getvalue())
+
+    msg = read_message(f, timeout=10.0)
+    m1 = msg.nextstring()
+    assert m1 == "read"
+    m2 = msg.nextnum()
+    assert m2 == 77
+    m3 = msg.nextsize_t()
+    assert m3 == 123
+    assert msg.end()
+
+    g.write(MessageBuilder().packnum(0).packstring("he\x00llo").getvalue())
+
+    msg = read_message(f, timeout=10.0)
+    m1 = msg.nextstring()
+    assert m1 == "write"
+    m2 = msg.nextnum()
+    assert m2 == 77
+    m3 = msg.nextstring()
+    assert m3 == "world\x00!\x00"
+    assert msg.end()
+
+    g.write(MessageBuilder().packnum(0).packsize_t(42).getvalue())
+
+    msg = read_message(f, timeout=10.0)
+    m1 = msg.nextstring()
+    assert m1 == "close"
+    m2 = msg.nextnum()
+    assert m2 == 77
+    assert msg.end()
+
+    g.write(MessageBuilder().packnum(0).packnum(0).getvalue())
+
+    g.close()
+    tail = f.read()
+    f.close()
+    assert tail == ""