[pypy-commit] pypy reverse-debugger: Don't implement what I wrote (it's a mess). Instead rely on us getting

arigo pypy.commits at gmail.com
Mon Jul 4 03:58:43 EDT 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: reverse-debugger
Changeset: r85528:a142591d6ca2
Date: 2016-07-04 10:00 +0200
http://bitbucket.org/pypy/pypy/changeset/a142591d6ca2/

Log:	Don't implement what I wrote (it's a mess). Instead rely on us
	getting the same addresses as before. Check that by recording in the
	log file the addresses of a random function and a random struct.

diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py
--- a/rpython/translator/c/node.py
+++ b/rpython/translator/c/node.py
@@ -459,23 +459,24 @@
         self.implementationtypename = db.gettype(
             T, varlength=self.getvarlength())
         parent, parentindex = parentlink(obj)
-        mangled = False
+        ## mangled = False
         if obj in exports.EXPORTS_obj2name:
             self.name = exports.EXPORTS_obj2name[obj]
             self.globalcontainer = 2    # meh
         elif parent is None:
             self.name = db.namespace.uniquename('g_' + self.basename())
             self.globalcontainer = True
-            if db.reverse_debugger and T._gckind != 'gc':
-                from rpython.translator.revdb import gencsupp
-                mangled = gencsupp.mangle_name_prebuilt_raw(db, self, T)
+            ## if db.reverse_debugger and T._gckind != 'gc':
+            ##     from rpython.translator.revdb import gencsupp
+            ##     mangled = gencsupp.mangle_name_prebuilt_raw(db, self, T)
         else:
             self.globalcontainer = False
             parentnode = db.getcontainernode(parent)
             defnode = db.gettypedefnode(parentnode.getTYPE())
             self.name = defnode.access_expr(parentnode.name, parentindex)
         if self.typename != self.implementationtypename:
-            if db.gettypedefnode(T).extra_union_for_varlength and not mangled:
+            if db.gettypedefnode(T).extra_union_for_varlength:
+                 ## and not mangled:
                 self.name += '.b'
         self._funccodegen_owner = None
 
@@ -497,8 +498,8 @@
 
     def get_declaration(self):
         name = self.name
-        if name.startswith('RPY_RDB_A(') and name.endswith(')'):
-            name = name[len('RPY_RDB_A('):-1]
+        ## if name.startswith('RPY_RDB_A(') and name.endswith(')'):
+        ##     name = name[len('RPY_RDB_A('):-1]
         if name[-2:] == '.b':
             # xxx fish fish
             assert self.implementationtypename.startswith('struct ')
@@ -802,11 +803,14 @@
             expr = db.get(value)
             if typeOf(value) is Void:
                 comma = ''
-            elif expr.startswith('(&RPY_RDB_A('):
-                # can't use this in static initialization code
-                assert db.reverse_debugger
-                db.late_initializations.append(('%s' % access_expr, expr))
-                expr = 'NULL /* patched later with %s */' % (expr,)
+            ## elif expr.startswith('(&RPY_RDB_A('):
+            ##     # can't use this in static initialization code if we
+            ##     # are inside a GC struct or a static_immutable struct.
+            ##     # (It is not needed inside other raw structs, but we
+            ##     # don't try to optimize that here.)
+            ##     assert db.reverse_debugger
+            ##     db.late_initializations.append(('%s' % access_expr, expr))
+            ##     expr = 'NULL /* patched later with %s */' % (expr,)
         expr += comma
         i = expr.find('\n')
         if i < 0:
diff --git a/rpython/translator/revdb/gencsupp.py b/rpython/translator/revdb/gencsupp.py
--- a/rpython/translator/revdb/gencsupp.py
+++ b/rpython/translator/revdb/gencsupp.py
@@ -9,37 +9,27 @@
 # have the "same" content as they had during recording.  More precisely,
 # we divide all variables according to their type in two categories:
 #
-#  * "moving things", whose value during recording is bitwise different
-#    from their value during replaying;
+#  * non-GC pointers, whose values are bitwise identical.
 #
-#  * "fixed things", whose values are bitwise identical.
+#  * GC pointers: these things are "moving", in the sense that the
+#    bitwise value of a GC pointer during recording is different from
+#    its bitwise value during replaying.  The object at the new
+#    address contains the "same" content as the original one, using
+#    this definition of "same".
 #
-# Moving things are:
+# Note that most non-GC pointers are not followed during replaying: we
+# never call external libraries nor call malloc to get pieces of raw
+# memory.  Writes to raw memory are ignored, and reads return a value
+# recorded in the log file.  However, the following are still needed:
 #
-#  * GC pointers.  During replaying they point to locally-allocated
-#    memory that is an object with the "same" content as during
-#    recording;
+#  * function pointers pointing to RPython functions;
 #
-#  * pointers to RPython functions;
+#  * "static-immutable" structs like vtables.
 #
-#  * pointers to structures with the "static_immutable" hint, like
-#    vtables.
-#
-# Fixed things are the rest:
-#
-#  * integers, floats;
-#
-#  * most raw pointers, which during replaying will thus point to
-#    nonsense.  (This pointer is not used during replaying to
-#    read/write memory: any write is ignored, and any read has its
-#    result recorded in the log.)
-#
-# Note an issue with prebuilt raw pointers to fixed things (i.e. all
-# constants in the C sources that appear either inside the code or
-# inside "static_immutable" or prebuilt GC structures).  During
-# replaying, they must correspond to bitwise the same value as during
-# recording, and not to the local-process address of the raw
-# structure, which is typically different (and should never be used).
+# For now. we must make sure that these are at the same address as
+# they were.  This is very roughly checked.  On Linux, it means you
+# must run with Address Space Layout Randomization disabled.  This
+# might be fixed in the future.
 #
 
 
@@ -49,16 +39,16 @@
         srcdir / 'revdb.c',
     ]
 
-def mangle_name_prebuilt_raw(database, node, S):
-    if (S._gckind != 'gc' and not S._hints.get('is_excdata')
-                          and not S._hints.get('static_immutable')
-                          and not S._hints.get('ignore_revdb')
-                          and not S._hints.get('gcheader')):
-        database.all_raw_structures.append(node)
-        node.name = 'RPY_RDB_A(%s)' % (node.name,)
-        return True
-    else:
-        return False
+## def mangle_name_prebuilt_raw(database, node, S):
+##     if (S._gckind != 'gc' and not S._hints.get('is_excdata')
+##                           and not S._hints.get('static_immutable')
+##                           and not S._hints.get('ignore_revdb')
+##                           and not S._hints.get('gcheader')):
+##         database.all_raw_structures.append(node)
+##         node.name = 'RPY_RDB_A(%s)' % (node.name,)
+##         return True
+##     else:
+##         return False
 
 def prepare_function(funcgen):
     stack_bottom = False
@@ -151,13 +141,13 @@
     db.get(s)
 
     db.stack_bottom_funcnames = []
-    db.all_raw_structures = []
+    ## db.all_raw_structures = []
 
 def write_revdb_def_file(db, target_path):
     f = target_path.open('w')
     funcnames = sorted(db.stack_bottom_funcnames)
-    print >> f, "#define RDB_VERSION  0x%x" % random.randrange(0, sys.maxint)
-    print >> f
+    ## print >> f, "#define RDB_VERSION  0x%x" % random.randrange(0, sys.maxint)
+    ## print >> f
     for i, fn in enumerate(funcnames):
         print >> f, '#define RPY_CALLBACKLOC_%s %d' % (fn, i)
     print >> f
@@ -169,36 +159,36 @@
         else:
             tail = ', \\'
         print >> f, '\t(void *)%s%s' % (fn, tail)
-    print >> f
+    ## print >> f
 
-    def _base(name):
-        assert name.startswith('RPY_RDB_A(')
-        if name.endswith('.b'):
-            name = name[:-2]
-        name = name[len('RPY_RDB_A('):-1]
-        return name
+    ## def _base(name):
+    ##     assert name.startswith('RPY_RDB_A(')
+    ##     if name.endswith('.b'):
+    ##         name = name[:-2]
+    ##     name = name[len('RPY_RDB_A('):-1]
+    ##     return name
 
-    rawstructs = sorted(db.all_raw_structures, key=lambda node: node.name)
-    print >> f, '#define RPY_RDB_A(name)  (*rpy_rdb_struct.name)'
-    print >> f, 'struct rpy_rdb_a_s {'
-    for i, node in enumerate(rawstructs):
-        print >> f, '\t%s;' % (cdecl(node.typename, '*'+_base(node.name)),)
-    if not rawstructs:
-        print >> f, '\tchar dummy;'
-    print >> f, '};'
-    print >> f, 'RPY_EXTERN struct rpy_rdb_a_s rpy_rdb_struct;'
-    print >> f
-    print >> f, '#define RPY_RDB_STRUCT_CONTENT \\'
-    if not rawstructs:
-        print >> f, '\t0'
-    else:
-        for i, node in enumerate(rawstructs):
-            if i == len(rawstructs) - 1:
-                tail = ''
-            else:
-                tail = ', \\'
-            name = '&' + _base(node.name)
-            if node.typename != node.implementationtypename:
-                name = '(%s)%s' % (cdecl(node.typename, '*'), name)
-            print >> f, '\t%s%s' % (name, tail)
+    ## rawstructs = sorted(db.all_raw_structures, key=lambda node: node.name)
+    ## print >> f, '#define RPY_RDB_A(name)  (*rpy_rdb_struct.name)'
+    ## print >> f, 'struct rpy_rdb_a_s {'
+    ## for i, node in enumerate(rawstructs):
+    ##     print >> f, '\t%s;' % (cdecl(node.typename, '*'+_base(node.name)),)
+    ## if not rawstructs:
+    ##     print >> f, '\tchar dummy;'
+    ## print >> f, '};'
+    ## print >> f, 'RPY_EXTERN struct rpy_rdb_a_s rpy_rdb_struct;'
+    ## print >> f
+    ## print >> f, '#define RPY_RDB_STRUCT_CONTENT \\'
+    ## if not rawstructs:
+    ##     print >> f, '\t0'
+    ## else:
+    ##     for i, node in enumerate(rawstructs):
+    ##         if i == len(rawstructs) - 1:
+    ##             tail = ''
+    ##         else:
+    ##             tail = ', \\'
+    ##         name = '&' + _base(node.name)
+    ##         if node.typename != node.implementationtypename:
+    ##             name = '(%s)%s' % (cdecl(node.typename, '*'), name)
+    ##         print >> f, '\t%s%s' % (name, tail)
     f.close()
diff --git a/rpython/translator/revdb/src-revdb/revdb.c b/rpython/translator/revdb/src-revdb/revdb.c
--- a/rpython/translator/revdb/src-revdb/revdb.c
+++ b/rpython/translator/revdb/src-revdb/revdb.c
@@ -22,6 +22,7 @@
 #include "src-revdb/revdb_include.h"
 
 #define RDB_SIGNATURE   "RevDB:"
+#define RDB_VERSION     0x00FF0003
 
 #define WEAKREF_AFTERWARDS_DEAD    ((char)0xf2)
 #define WEAKREF_AFTERWARDS_ALIVE   ((char)0xeb)
@@ -36,7 +37,8 @@
 typedef struct {
     Signed version;
     uint64_t reserved1, reserved2;
-    unsigned int size_rdb_struct;
+    void *ptr1, *ptr2;
+    int reversed3;
     int argc;
     char **argv;
 } rdb_header_t;
@@ -164,13 +166,14 @@
 
         memset(&h, 0, sizeof(h));
         h.version = RDB_VERSION;
-        h.size_rdb_struct = sizeof(rpy_rdb_struct);
+        h.ptr1 = &rpy_reverse_db_stop_point;
+        h.ptr2 = &rpy_revdb;
         h.argc = argc;
         h.argv = argv;
         write_all((const char *)&h, sizeof(h));
 
         /* write the whole content of rpy_rdb_struct */
-        write_all((const char *)&rpy_rdb_struct, sizeof(rpy_rdb_struct));
+        /*write_all((const char *)&rpy_rdb_struct, sizeof(rpy_rdb_struct));*/
 
         fprintf(stderr, "PID %d: recording revdb log to '%s'\n",
                         (int)getpid(), filename);
@@ -665,8 +668,17 @@
                 (long)h.version, (long)RDB_VERSION);
         exit(1);
     }
-    if (h.size_rdb_struct != sizeof(rpy_rdb_struct)) {
-        fprintf(stderr, "bad size_rdb_struct\n");
+    if (h.ptr1 != &rpy_reverse_db_stop_point ||
+        h.ptr2 != &rpy_revdb) {
+        fprintf(stderr,
+                "\n"
+                "In the replaying process, the addresses are different than\n"
+                "in the recording process.  We don't support this case for\n"
+                "now, sorry.  On Linux, check if Address Space Layout\n"
+                "Randomization (ADSL) is enabled, and disable it with:\n"
+                "\n"
+                "    echo 0 | sudo tee /proc/sys/kernel/randomize_va_space\n"
+                "\n");
         exit(1);
     }
     *argc_p = h.argc;
@@ -683,7 +695,7 @@
     }
 
     /* read the whole content of rpy_rdb_struct */
-    read_all((char *)&rpy_rdb_struct, sizeof(rpy_rdb_struct));
+    /*read_all((char *)&rpy_rdb_struct, sizeof(rpy_rdb_struct));*/
 
     rpy_revdb.buf_p = rpy_rev_buffer;
     rpy_revdb.buf_limit = rpy_rev_buffer;
@@ -1344,10 +1356,6 @@
     fq_trigger();
 }
 
-struct rpy_rdb_a_s rpy_rdb_struct = {
-    RPY_RDB_STRUCT_CONTENT   /* macro from revdb_def.h */
-};
-
 static void *callbacklocs[] = {
     RPY_CALLBACKLOCS     /* macro from revdb_def.h */
 };
diff --git a/rpython/translator/revdb/src-revdb/revdb_include.h b/rpython/translator/revdb/src-revdb/revdb_include.h
--- a/rpython/translator/revdb/src-revdb/revdb_include.h
+++ b/rpython/translator/revdb/src-revdb/revdb_include.h
@@ -29,7 +29,7 @@
 RPY_EXTERN void rpy_reverse_db_setup(int *argc_p, char **argv_p[]);
 RPY_EXTERN void rpy_reverse_db_teardown(void);
 
-#if 1    /* enable to print locations to stderr of all the EMITs */
+#if 0    /* enable to print locations to stderr of all the EMITs */
 #  define _RPY_REVDB_PRINT(mode, _e)                                    \
     fprintf(stderr,                                                     \
             "%s:%d: %0*llx\n",                                          \
@@ -37,7 +37,7 @@
             ((unsigned long long)_e) & ((2ULL << (8*sizeof(_e)-1)) - 1))
 #endif
 
-#if 1    /* enable to print all mallocs to stderr */
+#if 0    /* enable to print all mallocs to stderr */
 RPY_EXTERN void seeing_uid(uint64_t uid);
 #  define _RPY_REVDB_PRUID()                                    \
     seeing_uid(uid);                                            \
diff --git a/rpython/translator/revdb/test/test_basic.py b/rpython/translator/revdb/test/test_basic.py
--- a/rpython/translator/revdb/test/test_basic.py
+++ b/rpython/translator/revdb/test/test_basic.py
@@ -16,20 +16,19 @@
 class RDB(object):
     def __init__(self, filename, expected_argv):
         with open(filename, 'rb') as f:
-            header = f.readline()
             self.buffer = f.read()
-        assert header == 'RevDB:\t' + '\t'.join(expected_argv) + '\n'
+        self.cur = self.buffer.index('\x00') + 1
+        header = self.buffer[:self.cur]
+        assert header == 'RevDB:\t' + '\t'.join(expected_argv) + '\n\x00'
         #
-        self.cur = 0
-        x = self.read1('c'); assert x == '\x00'
-        x = self.read1('P'); #assert x == ...random version number...
+        x = self.read1('P'); assert x == 0x00FF0003
         x = self.read1('P'); assert x == 0
         x = self.read1('P'); assert x == 0
-        size_rdb_struct = self.read1('I')
+        x = self.read1('P'); #assert x == &rpy_reverse_db_stop_point
+        x = self.read1('P'); #assert x == &rpy_revdb
+        x = self.read1('i'); assert x == 0
         self.argc = self.read1('i')
         self.argv = self.read1('P')
-        self.rdb_struct = self.buffer[self.cur : self.cur + size_rdb_struct]
-        self.cur += size_rdb_struct
         self.current_packet_end = self.cur
         self.read_check_argv(expected_argv)
 
diff --git a/rpython/translator/revdb/test/test_raw.py b/rpython/translator/revdb/test/test_raw.py
--- a/rpython/translator/revdb/test/test_raw.py
+++ b/rpython/translator/revdb/test/test_raw.py
@@ -43,6 +43,13 @@
         bari = lltype.malloc(BARI, flavor='raw', immortal=True)
         bari.b = ibar
 
+        class X:
+            pass
+        x = X()
+        x.foo = foo
+        x.ibar = ibar
+        x.bari = bari
+
         def main(argv):
             assert bar.p == foo
             assert baz.p == foo
@@ -52,22 +59,29 @@
             assert recbar.super.p == foo
             assert ibar.p == foo
             assert bari.b == ibar
+            assert x.foo == foo
+            assert x.ibar == ibar
+            assert x.bari == bari
             revdb.stop_point()
             return 9
 
         compile(cls, main, backendopt=False)
         run(cls, '')
         rdb = fetch_rdb(cls, [cls.exename])
-        assert len(rdb.rdb_struct) >= 4
+        #assert len(rdb.rdb_struct) >= 4
 
     def test_replaying_raw(self):
         # This tiny test seems to always have foo at the same address
         # in multiple runs.  Here we recompile with different options
         # just to change that address.
-        subprocess.check_call(["make", "clean"],
-                              cwd=os.path.dirname(str(self.exename)))
-        subprocess.check_call(["make", "lldebug"],
-                              cwd=os.path.dirname(str(self.exename)))
+        #
+        # NOTE: not supported right now!  The executable must be
+        # exactly the same one with the same raw addresses.  This
+        # might be fixed in the future.
+        #subprocess.check_call(["make", "clean"],
+        #                      cwd=os.path.dirname(str(self.exename)))
+        #subprocess.check_call(["make", "lldebug"],
+        #                      cwd=os.path.dirname(str(self.exename)))
         #
         child = self.replay()
         child.send(Message(CMD_FORWARD, 2))


More information about the pypy-commit mailing list