[Python-checkins] bpo-46407: Optimizing some modulo operations (GH-30653)

Thu Jan 27 19:46:49 EST 2022

https://github.com/python/cpython/commit/f10dafc430279b4e6cf5b981ae3d1d76e8f431ad
commit: f10dafc430279b4e6cf5b981ae3d1d76e8f431ad
branch: main
author: Crowthebird <78076854+thatbirdguythatuknownot at users.noreply.github.com>
committer: tim-one <tim.peters at gmail.com>
date: 2022-01-27T18:46:45-06:00
summary:

bpo-46407: Optimizing some modulo operations (GH-30653)

Added new internal functions to compute mod without also computing the quotient.

The loops can be leaner then, which leads to modestly but reliably faster execution in contexts that know they don't need the quotient.

Code by Jeremiah Vivian (Pascual).

files:
A Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst
M Misc/ACKS
M Objects/longobject.c

diff --git a/Misc/ACKS b/Misc/ACKS
index cf023c9af927a..6df339c3e145b 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1860,6 +1860,7 @@ Kurt Vile
 Norman Vine
 Pauli Virtanen
 Frank Visser
+Jeremiah Vivian (Pascual)
 Johannes Vogel
 Michael Vogt
 Radu Voicilas
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst
new file mode 100644
index 0000000000000..e7f555f1ffc2f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst	
@@ -0,0 +1 @@
+Optimize some modulo operations in ``Objects/longobject.c``. Patch by Jeremiah Vivian.
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 5f0cc579c2cca..cc4ceec21aaca 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1670,6 +1670,35 @@ divrem1(PyLongObject *a, digit n, digit *prem)
     return long_normalize(z);
 }
 
+/* Remainder of long pin, w/ size digits, by non-zero digit n,
+   returning the remainder. pin points at the LSD. */
+
+static digit
+inplace_rem1(digit *pin, Py_ssize_t size, digit n)
+{
+    twodigits rem = 0;
+
+    assert(n > 0 && n <= PyLong_MASK);
+    while (--size >= 0)
+        rem = ((rem << PyLong_SHIFT) | pin[size]) % n;
+    return (digit)rem;
+}
+
+/* Get the remainder of an integer divided by a digit, returning
+   the remainder as the result of the function. The sign of a is
+   ignored; n should not be zero. */
+
+static PyLongObject *
+rem1(PyLongObject *a, digit n)
+{
+    const Py_ssize_t size = Py_ABS(Py_SIZE(a));
+
+    assert(n > 0 && n <= PyLong_MASK);
+    return (PyLongObject *)PyLong_FromLong(
+        (long)inplace_rem1(a->ob_digit, size, n)
+    );
+}
+
 /* Convert an integer to a base 10 string.  Returns a new non-shared
    string.  (Return value is non-shared so that callers can modify the
    returned value if necessary.) */
@@ -2689,6 +2718,47 @@ long_divrem(PyLongObject *a, PyLongObject *b,
     return 0;
 }
 
+/* Int remainder, top-level routine */
+
+static int
+long_rem(PyLongObject *a, PyLongObject *b, PyLongObject **prem)
+{
+    Py_ssize_t size_a = Py_ABS(Py_SIZE(a)), size_b = Py_ABS(Py_SIZE(b));
+
+    if (size_b == 0) {
+        PyErr_SetString(PyExc_ZeroDivisionError,
+                        "integer modulo by zero");
+        return -1;
+    }
+    if (size_a < size_b ||
+        (size_a == size_b &&
+         a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) {
+        /* |a| < |b|. */
+        *prem = (PyLongObject *)long_long((PyObject *)a);
+        return -(*prem == NULL);
+    }
+    if (size_b == 1) {
+        *prem = rem1(a, b->ob_digit[0]);
+        if (*prem == NULL)
+            return -1;
+    }
+    else {
+        /* Slow path using divrem. */
+        x_divrem(a, b, prem);
+        if (*prem == NULL)
+            return -1;
+    }
+    /* Set the sign. */
+    if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) {
+        _PyLong_Negate(prem);
+        if (*prem == NULL) {
+            Py_CLEAR(*prem);
+            return -1;
+        }
+    }
+    return 0;
+}
+
 /* Unsigned int division with remainder -- the algorithm.  The arguments v1
    and w1 should satisfy 2 <= Py_ABS(Py_SIZE(w1)) <= Py_ABS(Py_SIZE(v1)). */
 
@@ -3814,6 +3884,37 @@ l_divmod(PyLongObject *v, PyLongObject *w,
     return 0;
 }
 
+/* Compute
+ *     *pmod = v % w
+ * pmod cannot be NULL. The caller owns a reference to pmod.
+ */
+static int
+l_mod(PyLongObject *v, PyLongObject *w, PyLongObject **pmod)
+{
+    PyLongObject *mod;
+
+    assert(pmod);
+    if (Py_ABS(Py_SIZE(v)) == 1 && Py_ABS(Py_SIZE(w)) == 1) {
+        /* Fast path for single-digit longs */
+        *pmod = (PyLongObject *)fast_mod(v, w);
+        return -(*pmod == NULL);
+    }
+    if (long_rem(v, w, &mod) < 0)
+        return -1;
+    if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) ||
+        (Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) {
+        PyLongObject *temp;
+        temp = (PyLongObject *) long_add(mod, w);
+        Py_DECREF(mod);
+        mod = temp;
+        if (mod == NULL)
+            return -1;
+    }
+    *pmod = mod;
+
+    return 0;
+}
+
 static PyObject *
 long_div(PyObject *a, PyObject *b)
 {
@@ -4100,11 +4201,7 @@ long_mod(PyObject *a, PyObject *b)
 
     CHECK_BINOP(a, b);
 
-    if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) {
-        return fast_mod((PyLongObject*)a, (PyLongObject*)b);
-    }
-
-    if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0)
+    if (l_mod((PyLongObject*)a, (PyLongObject*)b, &mod) < 0)
         mod = NULL;
     return (PyObject *)mod;
 }
@@ -4333,10 +4430,10 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
               while the "large exponent" case multiplies directly by base 31
               times.  It can be unboundedly faster to multiply by
               base % modulus instead.
-           We could _always_ do this reduction, but l_divmod() isn't cheap,
+           We could _always_ do this reduction, but l_mod() isn't cheap,
            so we only do it when it buys something. */
         if (Py_SIZE(a) < 0 || Py_SIZE(a) > Py_SIZE(c)) {
-            if (l_divmod(a, c, NULL, &temp) < 0)
+            if (l_mod(a, c, &temp) < 0)
                 goto Error;
             Py_DECREF(a);
             a = temp;
@@ -4357,7 +4454,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
 #define REDUCE(X)                                       \
     do {                                                \
         if (c != NULL) {                                \
-            if (l_divmod(X, c, NULL, &temp) < 0)        \
+            if (l_mod(X, c, &temp) < 0)                 \
                 goto Error;                             \
             Py_XDECREF(X);                              \
             X = temp;                                   \
@@ -5022,7 +5119,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg)
 
         if (k == 0) {
             /* no progress; do a Euclidean step */
-            if (l_divmod(a, b, NULL, &r) < 0)
+            if (l_mod(a, b, &r) < 0)
                 goto error;
             Py_DECREF(a);
             a = b;