[Scipy-svn] r3780 - in trunk/scipy/sparse: . sparsetools tests

Fri Jan 4 04:53:33 EST 2008

Author: wnbell
Date: 2008-01-04 03:53:29 -0600 (Fri, 04 Jan 2008)
New Revision: 3780

Modified:
   trunk/scipy/sparse/bsr.py
   trunk/scipy/sparse/sparsetools/fixed_size.h
   trunk/scipy/sparse/sparsetools/sparsetools.h
   trunk/scipy/sparse/tests/test_sparse.py
Log:
use recursive template for BSR matrix vector product


Modified: trunk/scipy/sparse/bsr.py
===================================================================

--- trunk/scipy/sparse/bsr.py	2008-01-04 07:00:52 UTC (rev 3779)
+++ trunk/scipy/sparse/bsr.py	2008-01-04 09:53:29 UTC (rev 3780)
@@ -11,7 +11,7 @@
 from sparsetools import bsr_matvec, csr_matmat_pass1, bsr_matmat_pass2
 from data import _data_matrix
 from compressed import _cs_matrix
-from base import isspmatrix
+from base import isspmatrix, _formats
 from sputils import isshape, getdtype, to_native, isscalarlike, isdense, \
         upcast
 
@@ -53,8 +53,8 @@
         - The Block Compressed Row (BSR) format is very similar to the
           Compressed Sparse Row (CSR) format.  BSR is appropriate for
           sparse matrices with dense sub matrices like the last example
-          below.  Such matrices often arise, for instance, in finite
-          element discretizations.
+          below.  Such matrices often arise, for instance, in vector-valued
+          finite element discretizations.
 
 
     Examples
@@ -113,11 +113,11 @@
                 self.data   = zeros( (0,) + blocksize, getdtype(dtype, default=float) )
                 self.indices = zeros( 0, dtype=intc )
                 
-                X,Y = blocksize
-                if (M % X) != 0 or (N % Y) != 0:
+                R,C = blocksize
+                if (M % R) != 0 or (N % C) != 0:
                     raise ValueError, 'shape must be multiple of blocksize'
 
-                self.indptr  = zeros(M/X + 1, dtype=intc )
+                self.indptr  = zeros(M/R + 1, dtype=intc )
             
             elif len(arg1) == 2:
                 # (data,(row,col)) format
@@ -279,7 +279,7 @@
         """
         if isdense(other):
             M,N = self.shape
-            X,Y = self.blocksize
+            R,C = self.blocksize
 
             if other.shape != (N,) and other.shape != (N,1):
                 raise ValueError, "dimension mismatch"
@@ -299,7 +299,7 @@
                 y = output
             
             
-            bsr_matvec(M/X, N/Y, X, Y, \
+            bsr_matvec(M/R, N/C, R, C, \
                 self.indptr, self.indices, ravel(self.data), ravel(other), y)
 
             if isinstance(other, matrix):
@@ -389,15 +389,15 @@
         """
         
         M,N = self.shape
-        X,Y = self.blocksize
+        R,C = self.blocksize
 
-        row  = (X * arange(M/X)).repeat(diff(self.indptr))
-        row  = row.repeat(X*Y).reshape(-1,X,Y)
-        row += tile( arange(X).reshape(-1,1), (1,Y) )
+        row  = (R * arange(M/R)).repeat(diff(self.indptr))
+        row  = row.repeat(R*C).reshape(-1,R,C)
+        row += tile( arange(R).reshape(-1,1), (1,C) )
         row  = row.reshape(-1) 
 
-        col  = (Y * self.indices).repeat(X*Y).reshape(-1,X,Y)
-        col += tile( arange(Y), (X,1) )
+        col  = (C * self.indices).repeat(R*C).reshape(-1,R,C)
+        col += tile( arange(C), (R,1) )
         col  = col.reshape(-1)
 
         data = self.data.reshape(-1)
@@ -411,16 +411,16 @@
 
     def transpose(self):
         
-        X,Y = self.blocksize
+        R,C = self.blocksize
         M,N = self.shape
         
         if self.nnz == 0:
-            return bsr_matrix((N,M),blocksize=(Y,X))
+            return bsr_matrix((N,M),blocksize=(C,R))
 
         #use CSR.T to determine a permutation for BSR.T
         from csr import csr_matrix
         data = arange(len(self.indices), dtype=self.indices.dtype)
-        proxy = csr_matrix((data,self.indices,self.indptr),shape=(M/X,N/Y))
+        proxy = csr_matrix((data,self.indices,self.indptr),shape=(M/R,N/C))
         proxy = proxy.tocsc()
 
         data    = self.data.swapaxes(1,2)[proxy.data] #permute data

Modified: trunk/scipy/sparse/sparsetools/fixed_size.h
===================================================================
--- trunk/scipy/sparse/sparsetools/fixed_size.h	2008-01-04 07:00:52 UTC (rev 3779)
+++ trunk/scipy/sparse/sparsetools/fixed_size.h	2008-01-04 09:53:29 UTC (rev 3780)
@@ -19,7 +19,7 @@
         inline T operator()(const T * V1, const T * V2)
         {
             _dot<N-1,T> d;
-            return (*V1) * (*V2) + d(++V1, ++V2);
+            return (*V1) * (*V2) + d(V1 + 1, V2 + 1);
         }
 };
 template<class T>
@@ -41,8 +41,40 @@
 
 
 
+/*
+ *  Matrix Vector Product
+ * 
+ */
+template<int M, int N, class T>
+class _matvec
+{
+    public:
+        inline void operator()(const T * A, const T * X, T * Y)
+        {
+            *Y += dot<N,T>(A,X);
+            _matvec<M-1,N,T> d;
+            d(A + N, X, Y + 1);
+        }
+};
+template<int N, class T>
+class _matvec<1,N,T>
+{
+    public:
+        inline void operator()(const T * A, const T * X, T * Y)
+        {
+            *Y += dot<N,T>(A,X);
+        }
+};
 
+template<int M, int N, class T>
+inline void matvec(const T * A, const T * X, T * Y)
+{
+    _matvec<M,N,T> d;
+    d(A,X,Y);
+}
 
+
+
 template<int N, class T, class bin_op>
 class _vec_binop_vec
 {

Modified: trunk/scipy/sparse/sparsetools/sparsetools.h
===================================================================
--- trunk/scipy/sparse/sparsetools/sparsetools.h	2008-01-04 07:00:52 UTC (rev 3779)
+++ trunk/scipy/sparse/sparsetools/sparsetools.h	2008-01-04 09:53:29 UTC (rev 3780)
@@ -330,7 +330,7 @@
                       const I Bj[],
                             I Cp[])
 {
-//    // method that uses O(n) temp storage
+//    // method that uses O(1) temp storage
 //    const I hash_size = 1 << 5;
 //    I vals[hash_size];
 //    I mask[hash_size];
@@ -477,7 +477,6 @@
     for(I i = 0; i < SIZE; i++){
         Cx[i] = 0;
     }
-    //std::cout << "n_brow " << n_brow << " Cp[-1] " << Cp[n_brow] << " R " << R << " C " << C << " N " << N << std::endl;
  
     std::vector<I>  next(n_bcol,-1);
     std::vector<T*> mats(n_bcol);
@@ -567,18 +566,9 @@
                          I Cp[],         I Cj[],          T Cx[],
                    const bin_op& op)
 {
-   //Method that works for unsorted indices
-   // assert( csr_has_sorted_indices(n_brow,Ap,Aj) );
-   // assert( csr_has_sorted_indices(n_brow,Bp,Bj) );
-
-    //if (R == 3 && C == 3){
-    //    bsr_binop_bsr_fixed<I,T,3,3>(n_brow,n_bcol,Ap,Aj,Ax,Bp,Bj,Bx,Cp,Cj,Cx,op);
-    //    return;
-    //}
-
     const I RC = R*C;
-    T result[8*8];
-    //T zeros[8*8];
+    T * result = Cx;
+
     Cp[0] = 0;
     I nnz = 0;
 
@@ -600,9 +590,7 @@
 
                 if( is_nonzero_block(result,RC) ){
                     Cj[nnz] = A_j;
-                    for(I n = 0; n < RC; n++){
-                        Cx[RC*nnz + n] = result[n];
-                    }
+                    result += RC;
                     nnz++;
                 }
 
@@ -616,9 +604,7 @@
 
                 if(is_nonzero_block(result,RC)){
                     Cj[nnz] = A_j;
-                    for(I n = 0; n < RC; n++){
-                        Cx[RC*nnz + n] = result[n];
-                    }
+                    result += RC;
                     nnz++;
                 }
 
@@ -631,9 +617,7 @@
                 }
                 if(is_nonzero_block(result,RC)){
                     Cj[nnz] = B_j;
-                    for(I n = 0; n < RC; n++){
-                        Cx[RC*nnz + n] = result[n];
-                    }
+                    result += RC;
                     nnz++;
                 }
 
@@ -644,15 +628,14 @@
 
         //tail
         while(A_pos < A_end){
+
             for(I n = 0; n < RC; n++){
                 result[n] = op(Ax[RC*A_pos + n],0);
             }
 
             if(is_nonzero_block(result,RC)){
                 Cj[nnz] = A_j;
-                for(I n = 0; n < RC; n++){
-                    Cx[RC*nnz + n] = result[n];
-                }
+                result += RC;
                 nnz++;
             }
 
@@ -663,11 +646,10 @@
             for(I n = 0; n < RC; n++){
                 result[n] = op(0,Bx[RC*B_pos + n]);
             }
+
             if(is_nonzero_block(result,RC)){
                 Cj[nnz] = B_j;
-                for(I n = 0; n < RC; n++){
-                    Cx[RC*nnz + n] = result[n];
-                }
+                result += RC;
                 nnz++;
             }
 
@@ -1159,38 +1141,15 @@
 	                  const T Xx[],
 	                        T Yx[])
 {
-    //TODO make a matvec template
-    for(I i = 0; i < n_brow; i++) {
-        T r0 = 0;
-        T r1 = 0;
-        T r2 = 0;
-        T r3 = 0;
-        T r4 = 0;
-        T r5 = 0;
-        T r6 = 0;
-        T r7 = 0;
+    for(I i = 0; i < R*n_brow; i++){
+        Yx[i] = 0;
+    }
 
+    for(I i = 0; i < n_brow; i++) {
         for(I jj = Ap[i]; jj < Ap[i+1]; jj++) {
             I j = Aj[jj];
-            const T * base = Ax + jj*(R*C);
-            if (R > 0) r0 += dot<C>(base + 0*C, Xx + j*C);
-            if (R > 1) r1 += dot<C>(base + 1*C, Xx + j*C);
-            if (R > 2) r2 += dot<C>(base + 2*C, Xx + j*C);
-            if (R > 3) r3 += dot<C>(base + 3*C, Xx + j*C);
-            if (R > 4) r4 += dot<C>(base + 4*C, Xx + j*C);
-            if (R > 5) r5 += dot<C>(base + 5*C, Xx + j*C);
-            if (R > 6) r6 += dot<C>(base + 6*C, Xx + j*C);
-            if (R > 7) r7 += dot<C>(base + 7*C, Xx + j*C);
+            matvec<R,C>(Ax + jj*R*C, Xx + j*C, Yx + i*R);
         }
-
-        if (R > 0) Yx[R*i+0] = r0; 
-        if (R > 1) Yx[R*i+1] = r1;
-        if (R > 2) Yx[R*i+2] = r2;
-        if (R > 3) Yx[R*i+3] = r3;
-        if (R > 4) Yx[R*i+4] = r4;
-        if (R > 5) Yx[R*i+5] = r5;
-        if (R > 6) Yx[R*i+6] = r6;
-        if (R > 7) Yx[R*i+7] = r7;
     }
 }
 
@@ -1241,7 +1200,7 @@
 
 
     //otherwise use general method
-    for(I i = 0; i < n_brow; i++){
+    for(I i = 0; i < R*n_brow; i++){
         Yx[i] = 0;
     }
 

Modified: trunk/scipy/sparse/tests/test_sparse.py
===================================================================
--- trunk/scipy/sparse/tests/test_sparse.py	2008-01-04 07:00:52 UTC (rev 3779)
+++ trunk/scipy/sparse/tests/test_sparse.py	2008-01-04 09:53:29 UTC (rev 3780)
@@ -46,7 +46,7 @@
 class TestSparseTools(NumpyTestCase):
     """Simple benchmarks for sparse matrix module"""
 
-    def test_arithmetic(self,level=4):
+    def test_arithmetic(self,level=5):
         matrices = []
         #matrices.append( ('A','Identity', spidentity(500**2,format='csr')) )
         matrices.append( ('A','Poisson5pt', poisson2d(500,format='csr'))  )
@@ -128,7 +128,7 @@
             print fmt % (A.format,name,shape,A.nnz,1e3*(end-start)/float(iter) )
 
 
-    def bench_matvec(self,level=5):
+    def bench_matvec(self,level=4):
         matrices = []
         matrices.append(('Identity',   spidentity(10**4,format='dia')))
         matrices.append(('Identity',   spidentity(10**4,format='csr')))