[Python-checkins] Issue #11244: The peephole optimizer is now able to constant-fold

antoine.pitrou python-checkins at python.org
Fri Mar 11 17:27:57 CET 2011


http://hg.python.org/cpython/rev/14205d0fee45
changeset:   68375:14205d0fee45
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Fri Mar 11 17:27:02 2011 +0100
summary:
  Issue #11244: The peephole optimizer is now able to constant-fold
arbitrarily complex expressions.  This also fixes a 3.2 regression where
operations involving negative numbers were not constant-folded.

files:
  Lib/test/test_peepholer.py
  Misc/NEWS
  Python/peephole.c

diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py
--- a/Lib/test/test_peepholer.py
+++ b/Lib/test/test_peepholer.py
@@ -8,8 +8,10 @@
     f = StringIO()
     tmp = sys.stdout
     sys.stdout = f
-    dis.dis(func)
-    sys.stdout = tmp
+    try:
+        dis.dis(func)
+    finally:
+        sys.stdout = tmp
     result = f.getvalue()
     f.close()
     return result
@@ -99,6 +101,12 @@
             self.assertIn(elem, asm)
             self.assertNotIn('BUILD_TUPLE', asm)
 
+        # Long tuples should be folded too.
+        asm = dis_single(repr(tuple(range(10000))))
+        # One LOAD_CONST for the tuple, one for the None return value
+        self.assertEqual(asm.count('LOAD_CONST'), 2)
+        self.assertNotIn('BUILD_TUPLE', asm)
+
         # Bug 1053819:  Tuple of constants misidentified when presented with:
         # . . . opcode_with_arg 100   unary_opcode   BUILD_TUPLE 1  . . .
         # The following would segfault upon compilation
@@ -267,6 +275,25 @@
         asm = disassemble(f)
         self.assertNotIn('BINARY_ADD', asm)
 
+    def test_constant_folding(self):
+        # Issue #11244: aggressive constant folding.
+        exprs = [
+            "3 * -5",
+            "-3 * 5",
+            "2 * (3 * 4)",
+            "(2 * 3) * 4",
+            "(-1, 2, 3)",
+            "(1, -2, 3)",
+            "(1, 2, -3)",
+            "(1, 2, -3) * 6",
+            "lambda x: x in {(3 * -5) + (-1 - 6), (1, -2, 3) * 2, None}",
+        ]
+        for e in exprs:
+            asm = dis_single(e)
+            self.assertNotIn('UNARY_', asm, e)
+            self.assertNotIn('BINARY_', asm, e)
+            self.assertNotIn('BUILD_', asm, e)
+
 
 def test_main(verbose=None):
     import sys
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #11244: The peephole optimizer is now able to constant-fold
+  arbitrarily complex expressions.  This also fixes a 3.2 regression where
+  operations involving negative numbers were not constant-folded.
+
 - Issue #11450: Don't truncate hg version info in Py_GetBuildInfo() when
   there are many tags (e.g. when using mq).  Patch by Nadeem Vawda.
 
diff --git a/Python/peephole.c b/Python/peephole.c
--- a/Python/peephole.c
+++ b/Python/peephole.c
@@ -23,6 +23,64 @@
 #define ISBASICBLOCK(blocks, start, bytes) \
     (blocks[start]==blocks[start+bytes-1])
 
+
+#define CONST_STACK_CREATE() { \
+    const_stack_size = 256; \
+    const_stack = PyMem_New(PyObject *, const_stack_size); \
+    load_const_stack = PyMem_New(Py_ssize_t, const_stack_size); \
+    if (!const_stack || !load_const_stack) { \
+        PyErr_NoMemory(); \
+        goto exitError; \
+    } \
+    }
+
+#define CONST_STACK_DELETE() do { \
+    if (const_stack) \
+        PyMem_Free(const_stack); \
+    if (load_const_stack) \
+        PyMem_Free(load_const_stack); \
+    } while(0)
+
+#define CONST_STACK_LEN() (const_stack_top + 1)
+
+#define CONST_STACK_PUSH_OP(i) do { \
+    PyObject *_x; \
+    assert(codestr[i] == LOAD_CONST); \
+    assert(PyList_GET_SIZE(consts) > GETARG(codestr, i)); \
+    _x = PyList_GET_ITEM(consts, GETARG(codestr, i)); \
+    if (++const_stack_top >= const_stack_size) { \
+        const_stack_size *= 2; \
+        PyMem_Resize(const_stack, PyObject *, const_stack_size); \
+        PyMem_Resize(load_const_stack, Py_ssize_t, const_stack_size); \
+        if (!const_stack || !load_const_stack) { \
+            PyErr_NoMemory(); \
+            goto exitError; \
+        } \
+    } \
+    load_const_stack[const_stack_top] = i; \
+    const_stack[const_stack_top] = _x; \
+    in_consts = 1; \
+    } while(0)
+
+#define CONST_STACK_RESET() do { \
+    const_stack_top = -1; \
+    } while(0)
+
+#define CONST_STACK_TOP(x) \
+    const_stack[const_stack_top]
+
+#define CONST_STACK_LASTN(i) \
+    &const_stack[const_stack_top - i + 1]
+
+#define CONST_STACK_POP(i) do { \
+    assert(const_stack_top + 1 >= i); \
+    const_stack_top -= i; \
+    } while(0)
+
+#define CONST_STACK_OP_LASTN(i) \
+    ((const_stack_top >= i - 1) ? load_const_stack[const_stack_top - i + 1] : -1)
+
+
 /* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
    with    LOAD_CONST (c1, c2, ... cn).
    The consts table must still be in list form so that the
@@ -33,17 +91,14 @@
    test; for BUILD_SET it assembles a frozenset rather than a tuple.
 */
 static int
-tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
+tuple_of_constants(unsigned char *codestr, Py_ssize_t n,
+                   PyObject *consts, PyObject **objs)
 {
     PyObject *newconst, *constant;
-    Py_ssize_t i, arg, len_consts;
+    Py_ssize_t i, len_consts;
 
     /* Pre-conditions */
     assert(PyList_CheckExact(consts));
-    assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST || codestr[n*3] == BUILD_SET);
-    assert(GETARG(codestr, (n*3)) == n);
-    for (i=0 ; i<n ; i++)
-        assert(codestr[i*3] == LOAD_CONST);
 
     /* Buildup new tuple of constants */
     newconst = PyTuple_New(n);
@@ -51,16 +106,14 @@
         return 0;
     len_consts = PyList_GET_SIZE(consts);
     for (i=0 ; i<n ; i++) {
-        arg = GETARG(codestr, (i*3));
-        assert(arg < len_consts);
-        constant = PyList_GET_ITEM(consts, arg);
+        constant = objs[i];
         Py_INCREF(constant);
         PyTuple_SET_ITEM(newconst, i, constant);
     }
 
     /* If it's a BUILD_SET, use the PyTuple we just built to create a
       PyFrozenSet, and use that as the constant instead: */
-    if (codestr[n*3] == BUILD_SET) {
+    if (codestr[0] == BUILD_SET) {
         PyObject *tuple = newconst;
         newconst = PyFrozenSet_New(tuple);
         Py_DECREF(tuple);
@@ -77,9 +130,8 @@
 
     /* Write NOPs over old LOAD_CONSTS and
        add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
-    memset(codestr, NOP, n*3);
-    codestr[n*3] = LOAD_CONST;
-    SETARG(codestr, (n*3), len_consts);
+    codestr[0] = LOAD_CONST;
+    SETARG(codestr, 0, len_consts);
     return 1;
 }
 
@@ -87,14 +139,14 @@
    with    LOAD_CONST binop(c1,c2)
    The consts table must still be in list form so that the
    new constant can be appended.
-   Called with codestr pointing to the first LOAD_CONST.
+   Called with codestr pointing to the BINOP.
    Abandons the transformation if the folding fails (i.e.  1+'a').
    If the new constant is a sequence, only folds when the size
    is below a threshold value.  That keeps pyc files from
    becoming large in the presence of code like:  (None,)*1000.
 */
 static int
-fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
+fold_binops_on_constants(unsigned char *codestr, PyObject *consts, PyObject **objs)
 {
     PyObject *newconst, *v, *w;
     Py_ssize_t len_consts, size;
@@ -102,13 +154,11 @@
 
     /* Pre-conditions */
     assert(PyList_CheckExact(consts));
-    assert(codestr[0] == LOAD_CONST);
-    assert(codestr[3] == LOAD_CONST);
 
     /* Create new constant */
-    v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
-    w = PyList_GET_ITEM(consts, GETARG(codestr, 3));
-    opcode = codestr[6];
+    v = objs[0];
+    w = objs[1];
+    opcode = codestr[0];
     switch (opcode) {
         case BINARY_POWER:
             newconst = PyNumber_Power(v, w, Py_None);
@@ -180,16 +230,15 @@
     Py_DECREF(newconst);
 
     /* Write NOP NOP NOP NOP LOAD_CONST newconst */
-    memset(codestr, NOP, 4);
-    codestr[4] = LOAD_CONST;
-    SETARG(codestr, 4, len_consts);
+    codestr[-2] = LOAD_CONST;
+    SETARG(codestr, -2, len_consts);
     return 1;
 }
 
 static int
-fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
+fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts, PyObject *v)
 {
-    PyObject *newconst=NULL, *v;
+    PyObject *newconst=NULL/*, *v*/;
     Py_ssize_t len_consts;
     int opcode;
 
@@ -198,7 +247,6 @@
     assert(codestr[0] == LOAD_CONST);
 
     /* Create new constant */
-    v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
     opcode = codestr[3];
     switch (opcode) {
         case UNARY_NEGATIVE:
@@ -340,7 +388,11 @@
     unsigned char *lineno;
     int *addrmap = NULL;
     int new_line, cum_orig_line, last_line, tabsiz;
-    int cumlc=0, lastlc=0;      /* Count runs of consecutive LOAD_CONSTs */
+    PyObject **const_stack = NULL;
+    Py_ssize_t *load_const_stack = NULL;
+    Py_ssize_t const_stack_top = -1;
+    Py_ssize_t const_stack_size = 0;
+    int in_consts = 0;  /* whether we are in a LOAD_CONST sequence */
     unsigned int *blocks = NULL;
     char *name;
 
@@ -386,12 +438,16 @@
         goto exitError;
     assert(PyList_Check(consts));
 
+    CONST_STACK_CREATE();
+
     for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
       reoptimize_current:
         opcode = codestr[i];
 
-        lastlc = cumlc;
-        cumlc = 0;
+        if (!in_consts) {
+            CONST_STACK_RESET();
+        }
+        in_consts = 0;
 
         switch (opcode) {
             /* Replace UNARY_NOT POP_JUMP_IF_FALSE
@@ -432,21 +488,21 @@
                     goto exitError;
                 else if (h == 0)
                     continue;
-                cumlc = lastlc + 1;
+                CONST_STACK_PUSH_OP(i);
                 break;
 
                 /* Skip over LOAD_CONST trueconst
                    POP_JUMP_IF_FALSE xx. This improves
                    "while 1" performance. */
             case LOAD_CONST:
-                cumlc = lastlc + 1;
+                CONST_STACK_PUSH_OP(i);
                 j = GETARG(codestr, i);
                 if (codestr[i+3] != POP_JUMP_IF_FALSE  ||
                     !ISBASICBLOCK(blocks,i,6)  ||
                     !PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
                     continue;
                 memset(codestr+i, NOP, 6);
-                cumlc = 0;
+                CONST_STACK_RESET();
                 break;
 
                 /* Try to fold tuples of constants (includes a case for lists and sets
@@ -458,19 +514,23 @@
             case BUILD_LIST:
             case BUILD_SET:
                 j = GETARG(codestr, i);
-                h = i - 3 * j;
-                if (h >= 0  &&
-                    j <= lastlc                  &&
+                if (j == 0)
+                    break;
+                h = CONST_STACK_OP_LASTN(j);
+                assert((h >= 0 || CONST_STACK_LEN() < j));
+                if (h >= 0 && j > 0 && j <= CONST_STACK_LEN() &&
                     ((opcode == BUILD_TUPLE &&
-                      ISBASICBLOCK(blocks, h, 3*(j+1))) ||
+                      ISBASICBLOCK(blocks, h, i-h+3)) ||
                      ((opcode == BUILD_LIST || opcode == BUILD_SET) &&
                       codestr[i+3]==COMPARE_OP &&
-                      ISBASICBLOCK(blocks, h, 3*(j+2)) &&
+                      ISBASICBLOCK(blocks, h, i-h+6) &&
                       (GETARG(codestr,i+3)==6 ||
                        GETARG(codestr,i+3)==7))) &&
-                    tuple_of_constants(&codestr[h], j, consts)) {
+                    tuple_of_constants(&codestr[i], j, consts, CONST_STACK_LASTN(j))) {
                     assert(codestr[i] == LOAD_CONST);
-                    cumlc = 1;
+                    memset(&codestr[h], NOP, i - h);
+                    CONST_STACK_POP(j);
+                    CONST_STACK_PUSH_OP(i);
                     break;
                 }
                 if (codestr[i+3] != UNPACK_SEQUENCE  ||
@@ -482,10 +542,12 @@
                 } else if (j == 2) {
                     codestr[i] = ROT_TWO;
                     memset(codestr+i+1, NOP, 5);
+                    CONST_STACK_RESET();
                 } else if (j == 3) {
                     codestr[i] = ROT_THREE;
                     codestr[i+1] = ROT_TWO;
                     memset(codestr+i+2, NOP, 4);
+                    CONST_STACK_RESET();
                 }
                 break;
 
@@ -504,12 +566,18 @@
             case BINARY_AND:
             case BINARY_XOR:
             case BINARY_OR:
-                if (lastlc >= 2                  &&
-                    ISBASICBLOCK(blocks, i-6, 7)  &&
-                    fold_binops_on_constants(&codestr[i-6], consts)) {
+                /* NOTE: LOAD_CONST is saved at `i-2` since it has an arg
+                   while BINOP hasn't */
+                h = CONST_STACK_OP_LASTN(2);
+                assert((h >= 0 || CONST_STACK_LEN() < 2));
+                if (h >= 0 &&
+                    ISBASICBLOCK(blocks, h, i-h+1)  &&
+                    fold_binops_on_constants(&codestr[i], consts, CONST_STACK_LASTN(2))) {
                     i -= 2;
+                    memset(&codestr[h], NOP, i - h);
                     assert(codestr[i] == LOAD_CONST);
-                    cumlc = 1;
+                    CONST_STACK_POP(2);
+                    CONST_STACK_PUSH_OP(i);
                 }
                 break;
 
@@ -518,12 +586,15 @@
             case UNARY_NEGATIVE:
             case UNARY_INVERT:
             case UNARY_POSITIVE:
-                if (lastlc >= 1                  &&
-                    ISBASICBLOCK(blocks, i-3, 4)  &&
-                    fold_unaryops_on_constants(&codestr[i-3], consts))                  {
+                h = CONST_STACK_OP_LASTN(1);
+                assert((h >= 0 || CONST_STACK_LEN() < 1));
+                if (h >= 0 &&
+                    ISBASICBLOCK(blocks, h, i-h+1)  &&
+                    fold_unaryops_on_constants(&codestr[i-3], consts, CONST_STACK_TOP())) {
                     i -= 2;
                     assert(codestr[i] == LOAD_CONST);
-                    cumlc = 1;
+                    CONST_STACK_POP(1);
+                    CONST_STACK_PUSH_OP(i);
                 }
                 break;
 
@@ -680,6 +751,7 @@
     assert(h + nops == codelen);
 
     code = PyBytes_FromStringAndSize((char *)codestr, h);
+    CONST_STACK_DELETE();
     PyMem_Free(addrmap);
     PyMem_Free(codestr);
     PyMem_Free(blocks);
@@ -689,6 +761,7 @@
     code = NULL;
 
  exitUnchanged:
+    CONST_STACK_DELETE();
     if (blocks != NULL)
         PyMem_Free(blocks);
     if (addrmap != NULL)

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list