[SciPy-user] fastest way to populate sparse matrix?

Peter Skomoroch peter.skomoroch at gmail.com
Wed Dec 10 12:54:29 EST 2008


What is the fastest way to replace non-zero elements of a sparse
matrix with corresponding elements from a product of dense matrices,
without the memory overhead of computing the entire dense matrix
product?

The code below demonstrates the way I am doing it now: looping through
the nonzero elements in the sparse matrix, and forming the
corresponding row - column product from the dense matrices.  It uses
the sparse module from the latest scipy trunk.

-Pete

#!/usr/bin/env python
# encoding: utf-8
"""
sparse_fill.py

uses latest sparse package from scipy svn
"""

import sys
import os
import time
from numpy import *
from numpy.random import random
import scipy.sparse as sparse
from scipy import io
import urllib
import tarfile

def main():
    # number of rows    1,916
    # number of columns 1,916
    # nonzeros  195,985
    print "loading matrix..."
    f = urllib.urlopen("http://www.cise.ufl.edu/research/sparse/MM/JGD_CAG/CAG_mat1916.tar.gz")
    tar_download = open('CAG_mat1916.tar.gz','w')
    print >> tar_download, f.read()
    tar_download.close()

    tar = tarfile.open("CAG_mat1916.tar.gz", "r:gz")
    tar.extractall()

    # V is a sparse matrix, with around 5 % of entries populated
    V = io.mmread("CAG_mat1916/CAG_mat1916.mtx").tocsr()
    n,m = V.shape
    r = 200
    eps=1e-9
    nonzeros = [tuple(ij) for ij in transpose(V.nonzero())]
    W = (random([n,r]) ).astype(float32)
    H = random([r,m]).astype(float32)

    #################################################
    # This block needs to be executed many times...
    # Fill non-zero elements of a sparse matrix with
    # corresponding elements from dense matrix product.
    # We don't want to compute the full matrix product so
    # we can save memory
    print "filling..."
    t0=time.time()
    V_approx = sparse.lil_matrix((n,m), dtype=float32)

    for i,j in nonzeros:
        V_approx[i,j] = dot(W[i,:],H[:,j])
    print "time to fill:", time.time() -t0

    W_factor =  (V*H.T + eps ) / ( V_approx*H.T + eps)
    W = W_factor*W
    ####################################################

if __name__ == '__main__':
	main()



--
Peter N. Skomoroch
peter.skomoroch at gmail.com
http://www.datawrangling.com
http://del.icio.us/pskomoroch



More information about the SciPy-User mailing list