[SciPy-User] Dense * sparse slower than expected

Grant Roch grant.roch at gmail.com
Thu May 7 10:12:10 EDT 2015


I'm multiplying a dense matrix and a sparse matrix and was surprised at how
long it was taking given the number of operations that should be taking
place.  I was able to multiply them faster using straight python/numpy than
the scipy routines.  I suspect that I'm not using the appropriate sparse
matrix type, but I tried other various combinations with similar timings.
Any thoughts would be appreciated.

The results are (in seconds):

Scipy: 0.206862998009
Manually: 0.0189853906631

Here is the code to reproduce:

from scipy.sparse import csr_matrix, csc_matrix
import numpy as np
import timeit

def manualMultiply(x, y):

    r       =   np.zeros((x.shape[0], y.shape[1]), dtype=x.dtype.char)
    y       =   y.tocsc()
    indptr  =   y.indptr
    indices =   y.indices
    data    =   y.data

    numYColumns =   y.shape[1]
    for j in xrange(numYColumns):
        rows    =   indices[indptr[j]:indptr[j+1]]
        d       =   data[indptr[j]:indptr[j+1]]
        r[:,j]  =   np.dot(x[:, rows], d.T)

    return r

if __name__=="__main__":

    number      =   10
    indices     =   [  0,   2,   1,   3,   5,   6,  53,  54,  54,  55,  59,
 60,  59,
                    61,  59,  62,  59,  63, 101, 102, 102, 103, 111, 114,
112, 113,
                    193, 196, 194, 195]

    indptr      =   [ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24,
26, 28, 30]

    data        =   [-29.90430622,  29.90430622, -29.90430622,  29.90430622,
                     30.16591252, -30.16591252, -31.25      ,  31.25      ,
                     31.25      , -31.25      , -17.98561151,  17.98561151,
                     -17.98561151,  17.98561151, -17.98561151,  17.98561151,
                     -17.98561151,  17.98561151, -40.81632653,  40.81632653,
                     30.24803388, -30.24803388,  30.48780488, -30.48780488,
                     -30.48780488,  30.48780488,  31.84713376, -31.84713376,
                     31.84713376, -31.84713376]

    np.random.seed(1)
    x   =   np.random.randn(8500, 4581)
    y   =   csc_matrix((data, indices, indptr), shape=(4581, 15),
dtype=np.float64)

    a   =   x * y
    b   =   manualMultiply(x, y)
    print(np.allclose(a, b))
    t   =   timeit.timeit('x * y', 'from __main__ import x, y',
number=number)
    print('Scipy: %s' % (t / number))
    t   =   timeit.timeit('manualMultiply(x, y)', 'from __main__ import x,
y, manualMultiply', number=number)
    print('Manually: %s' % (t / number))

Thanks,

Grant
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.scipy.org/pipermail/scipy-user/attachments/20150507/860568a3/attachment.html>


More information about the SciPy-User mailing list