[SciPy-Dev] scipy.sparse versus pysparse

Thu Jul 24 22:47:35 EDT 2014

On Thu, Jul 24, 2014 at 2:49 PM, nicky van foreest <vanforeest at gmail.com> wrote:
> That would be great. I'll check the mailing list to see when it comes along :-)

After profiling the code I see that much of the time is spent doing
numerical things with the DOK data, like negating or scaling each
value.  This is slow for DOK but faster for other formats like COO.
Although you can't set COO matrix entries after having built the
matrix, implementing setdiag seemed enough to work for your case,
although possibly losing the parts of the API that you prefer over
that of pysparse.

I took the liberty to rewrite some of the code exploring some other
scipy.sparse.linalg algorithms; I too run across problems like this
where I need the equilibrium distribution for a sparse instantaneous
transition rate matrix with a combinatorially large state space.  For
your particular example the shift-invert arpack function that uses
superlu for sparse decomposition seems unreasonably effective.

Cheers,
Alex

---

from __future__ import print_function
from numpy import ones, zeros, array
import scipy.sparse as sp
from scipy.sparse.linalg import eigs
from pylab import matshow, savefig
from scipy.linalg import norm
import time

labda, mu1, mu2 = 1., 1.1, 1.01
N1, N2 = 400, 400
size = N1*N2
eps = 1e-3
maxIterations = int(1e5)
guess = array([1] + [0] * (size-1))

def state(i,j):
    return j*N1 + i

def gen_rate_triples():
    for i in range(0,N1-1):
        for j in range(0,N2):
            yield state(i, j), state(i+1, j), labda
    for i in range(0,N1):
        for j in range(1,N2):
            yield state(i, j), state(i, j-1), mu2
    for i in range(1,N1):
        for j in range(0,N2-1):
            yield state(i, j), state(i-1, j+1), mu1

def get_rate_info(triples):
    rows, cols, rates = zip(*triples)
    pre_Q = sp.coo_matrix((rates, (rows, cols)), shape=(size, size))
    exit_rates = pre_Q.dot(ones(size))
    return pre_Q, exit_rates

def get_PT(pre_Q, exit_rates):
    urate = exit_rates.max() * 1.001
    P = pre_Q / urate
    P.setdiag(1 - exit_rates / urate)
    return P.T.tocsr()

def get_QT(pre_Q, exit_rates):
    Q = pre_Q.copy()
    Q.setdiag(-exit_rates)
    return Q.T.tocsr()

def QTpi(QT, guess, tol):
    w, v = eigs(QT, k=1, v0=guess, sigma=1e-6, which='LM',
            tol=tol, maxiter=maxIterations)
    pi = v[:, 0].real
    return pi / pi.sum()

def PTpi(PT, guess, tol):
    w, v = eigs(PT, k=1, v0=guess, tol=eps, maxiter=maxIterations)
    pi = v[:, 0].real
    return pi / pi.sum()

def power_method(PT, guess, abstol):
    p_prev = zeros(size)
    p = guess.copy()
    for i in range(maxIterations):
        if norm(p - p_prev, ord=1) < abstol:
            break
        p_prev = PT.dot(p)
        p = PT.dot(p_prev)
    return p

print('settings:')
print('labda:', labda)
print('mu1:', mu1)
print('mu2:', mu2)
print('N1:', N1)
print('N2:', N2)
print()

print('precalculation times:')
tm = time.time()
pre_Q, exit_rates = get_rate_info(gen_rate_triples())
print(time.time() - tm, 'for some rate info')
tm = time.time()
QT = get_QT(pre_Q, exit_rates)
print(time.time() - tm, 'to make the transition rate matrix')
tm = time.time()
PT = get_PT(pre_Q, exit_rates)
print(time.time() - tm, 'to make the uniformized trans prob matrix')
print()

# use an iterative method
tm = time.time()
pi_P = PTpi(PT, guess, eps)
tm_P = time.time() - tm

# use superlu and arpack
tm = time.time()
pi_Q = QTpi(QT, guess, eps)
tm_Q = time.time() - tm

# use a power method
tm = time.time()
pi_R = power_method(PT, guess, eps)
tm_R = time.time() - tm

# make pngs
matshow(pi_P.reshape(N2,N1)); savefig("pi_P.png")
matshow(pi_Q.reshape(N2,N1)); savefig("pi_Q.png")
matshow(pi_R.reshape(N2,N1)); savefig("pi_R.png")

print('distribution estimates:')
print('P:', pi_P)
print('Q:', pi_Q)
print('R:', pi_R)
print()
print('computation times for the iterations:')
print('P:', tm_P)
print('Q:', tm_Q)
print('R:', tm_R)
print()
print('violation of the invariant pi * P = pi:')
print('P:', norm(PT*pi_P - pi_P, ord=1))
print('Q:', norm(PT*pi_Q - pi_Q, ord=1))
print('R:', norm(PT*pi_R - pi_R, ord=1))
print()
print('violation of the invariant pi * Q = 0:')
print('P:', norm(QT*pi_P, ord=1))
print('Q:', norm(QT*pi_Q, ord=1))
print('R:', norm(QT*pi_R, ord=1))
print()
print('pngs:')
print('P: pi_P.png')
print('Q: pi_Q.png')
print('R: pi_R.png')
print()

---

settings:
labda: 1.0
mu1: 1.1
mu2: 1.01
N1: 400
N2: 400

precalculation times:
0.753726005554 for some rate info
0.045077085495 to make the transition rate matrix
0.0467808246613 to make the uniformized trans prob matrix

distribution estimates:
P: [ 0.00501086  0.00455523  0.00414086 ..., -0.         -0.
-0.        ]
Q: [  9.00503398e-04   8.18606355e-04   7.44190344e-04 ...,   3.62901458e-07
   3.59308342e-07   3.55750817e-07]
R: [ 0.0053192   0.00483551  0.00439555 ...,  0.          0.
0.        ]

computation times for the iterations:
P: 2.88090801239
Q: 4.26303100586
R: 0.834770202637

violation of the invariant pi * P = pi:
P: 0.0016872009599
Q: 2.74708912485e-08
R: 0.000994144099758

violation of the invariant pi * Q = 0:
P: 0.00525244218029
Q: 8.55199061005e-08
R: 0.0030948799384

pngs:
P: pi_P.png
Q: pi_Q.png
R: pi_R.png