[Scipy-svn] r3681 - in trunk/scipy/sparse: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Mon Dec 17 23:06:25 EST 2007


Author: wnbell
Date: 2007-12-17 22:06:01 -0600 (Mon, 17 Dec 2007)
New Revision: 3681

Added:
   trunk/scipy/sparse/data.py
Modified:
   trunk/scipy/sparse/compressed.py
   trunk/scipy/sparse/construct.py
   trunk/scipy/sparse/coo.py
   trunk/scipy/sparse/dia.py
   trunk/scipy/sparse/tests/test_base.py
Log:
added _data_matrix base class
reorganized todia() and dia_matrix
added todia() test in test_base


Modified: trunk/scipy/sparse/compressed.py
===================================================================
--- trunk/scipy/sparse/compressed.py	2007-12-18 00:47:46 UTC (rev 3680)
+++ trunk/scipy/sparse/compressed.py	2007-12-18 04:06:01 UTC (rev 3681)
@@ -9,7 +9,8 @@
 from numpy import array, matrix, asarray, asmatrix, zeros, rank, intc, \
         empty, hstack, isscalar, ndarray, shape, searchsorted
 
-from base import spmatrix,isspmatrix
+from base import spmatrix, isspmatrix
+from data import _data_matrix
 import sparsetools
 from sputils import upcast, to_native, isdense, isshape, getdtype, \
         isscalarlike
@@ -23,11 +24,11 @@
     return new
 
 
-class _cs_matrix(spmatrix):
+class _cs_matrix(_data_matrix):
     """base matrix class for compressed row and column oriented matrices"""
     
     def __init__(self, arg1, shape=None, dtype=None, copy=False, dims=None):
-        spmatrix.__init__(self)
+        _data_matrix.__init__(self)
 
         if dims is not None:
             warn("dims is deprecated, use shape instead", DeprecationWarning)
@@ -100,11 +101,6 @@
         return self.indptr[-1]
     nnz = property(fget=getnnz)
     
-    def _get_dtype(self):
-        return self.data.dtype
-    def _set_dtype(self,newtype):
-        self.data.dtype = newtype
-    dtype = property(fget=_get_dtype,fset=_set_dtype)
     
     def _set_self(self, other, copy=False):
         """take the member variables of other and assign them to self"""
@@ -181,20 +177,15 @@
                     raise ValueError,'index pointer values must form a " \
                                         "non-decreasing sequence'
 
-    def astype(self, t):
-        return self._with_data(self.data.astype(t))
 
+    
+    def __imul__(self, other): #self *= other
+        if isscalarlike(other):
+            self.data *= other
+            return self
+        else:
+            raise NotImplementedError
 
-    def __abs__(self):
-        return self._with_data(abs(self.data))
-
-    def _real(self):
-        return self._with_data(numpy.real(self.data))
-
-    def _imag(self):
-        return self._with_data(numpy.imag(self.data))
-
-
     def __add__(self,other):
         # First check if argument is a scalar
         if isscalarlike(other):
@@ -265,16 +256,7 @@
                 tr = asarray(other).transpose()
             return self.transpose().dot(tr).transpose()
 
-    def __imul__(self, other): #self *= other
-        if isscalarlike(other):
-            self.data *= other
-            return self
-        else:
-            raise NotImplementedError
 
-    def __neg__(self):
-        return self._with_data(-self.data)
-
     def __truediv__(self,other):
         if isscalarlike(other):
             return self * (1./other)
@@ -402,8 +384,6 @@
             return spmatrix.sum(self,axis)
             raise ValueError, "axis out of bounds"
 
-    def copy(self):
-        return self._with_data(self.data.copy(),copy=True)
 
     def _get_slice(self, i, start, stop, stride, shape):
         """Returns a view of the elements [i, myslice.start:myslice.stop].
@@ -430,6 +410,12 @@
         M, N = self.shape
         return cls((self.data,self.indices,self.indptr),(N,M),copy=copy)
     
+    def todia(self):
+        return self.tocoo(copy=False).todia()
+    
+    def todok(self):
+        return self.tocoo(copy=False).todok()
+
     def tocoo(self,copy=True):
         """Return a COOrdinate representation of this matrix
 
@@ -453,8 +439,6 @@
         from coo import coo_matrix
         return coo_matrix((data,(row,col)), self.shape)
 
-    def conj(self, copy=False):
-        return self._with_data(self.data.conj(),copy=copy)
 
     def sorted_indices(self):
         """Return a copy of this matrix with sorted indices
@@ -546,8 +530,7 @@
         data, indices, indptr = aux[2], aux[1], aux[0]
         return data, indices, indptr, i1 - i0, j1 - j0
 
-
-    # utility functions 
+    # needed by _data_matrix
     def _with_data(self,data,copy=True):
         """Returns a matrix with the same sparsity structure as self,
         but with different data.  By default the structure arrays
@@ -560,6 +543,7 @@
             return self.__class__((data,self.indices,self.indptr), \
                                    shape=self.shape,dtype=data.dtype)
 
+    # utility functions
     def _binopt(self, other, op, in_shape=None, out_shape=None):
         """apply the binary operation fn to two sparse matrices"""
         other = self._tothis(other)

Modified: trunk/scipy/sparse/construct.py
===================================================================
--- trunk/scipy/sparse/construct.py	2007-12-18 00:47:46 UTC (rev 3680)
+++ trunk/scipy/sparse/construct.py	2007-12-18 04:06:01 UTC (rev 3681)
@@ -19,35 +19,34 @@
 from base import isspmatrix
 
 
-def spdiags(diags, offsets, m, n, format=None):
+def spdiags(data, diags, m, n, format=None):
     """Return a sparse matrix given its diagonals.
 
     B = spdiags(diags, offsets, m, n)
 
     *Parameters*:
-        diags   : matrix whose rows contain the diagonal values
-        offsets : diagonals to set 
+        data   : matrix whose rows contain the diagonal values
+        diags  : diagonals to set 
                     k = 0 - the main diagonal
                     k > 0 - the k-th upper diagonal
                     k < 0 - the k-th lower diagonal
-        m, n    : dimensions of the result
-        format  : format of the result (e.g. "csr")
+        m, n   : dimensions of the result
+        format : format of the result (e.g. "csr")
                     By default (format=None) an appropriate sparse matrix 
                     format is returned.  This choice is subject to change.
 
     *Example*
     -------
-    >>> diags = array([[1,2,3,4]]).repeat(3,axis=0)
-    >>> offsets = array([0,-1,2])
-    >>> spdiags(diags,offsets,4,4).todense()
+    >>> data = array([[1,2,3,4]]).repeat(3,axis=0)
+    >>> diags = array([0,-1,2])
+    >>> spdiags(data,diags,4,4).todense()
     matrix([[1, 0, 3, 0],
             [1, 2, 0, 4],
             [0, 2, 3, 0],
             [0, 0, 3, 4]])
 
     """
-    #TODO update this example
-    return dia_matrix((diags, offsets), shape=(m,n)).asformat(format)
+    return dia_matrix((data, diags), shape=(m,n)).asformat(format)
 
 def spidentity(n, dtype='d', format=None):
     """spidentity(n) returns an (n x n) identity matrix"""

Modified: trunk/scipy/sparse/coo.py
===================================================================
--- trunk/scipy/sparse/coo.py	2007-12-18 00:47:46 UTC (rev 3680)
+++ trunk/scipy/sparse/coo.py	2007-12-18 04:06:01 UTC (rev 3681)
@@ -5,7 +5,8 @@
 from itertools import izip
 from warnings import warn 
 
-from numpy import array, asarray, empty, intc, zeros, bincount
+from numpy import array, asarray, empty, intc, zeros, bincount, \
+        unique, searchsorted
 
 from sparsetools import coo_tocsr, coo_tocsc
 from base import spmatrix, isspmatrix
@@ -248,6 +249,21 @@
         else:
             return self
 
+    def todia(self):
+        from dia import dia_matrix
+
+        ks = self.col - self.row  #the diagonal for each nonzero          
+        diags = unique(ks)
+
+        if len(diags) > 100:
+            pass #do something?
+
+        #initialize and fill in data array
+        data = zeros( (len(diags), self.col.max()+1), dtype=self.dtype)
+        data[ searchsorted(diags,ks), self.col ] = self.data
+
+        return dia_matrix((data,diags),shape=self.shape)
+
     def todok(self):
         from dok import dok_matrix
 

Added: trunk/scipy/sparse/data.py
===================================================================
--- trunk/scipy/sparse/data.py	2007-12-18 00:47:46 UTC (rev 3680)
+++ trunk/scipy/sparse/data.py	2007-12-18 04:06:01 UTC (rev 3681)
@@ -0,0 +1,43 @@
+"""Base class for sparse matrice with a .data attribute
+    
+    subclasses must provide a _with_data() method that
+    creates a new matrix with the same sparsity pattern
+    as self but with a different data array
+
+"""
+
+__all__ = []
+
+from base import spmatrix
+
+class _data_matrix(spmatrix):
+    def __init__(self):
+        spmatrix.__init__(self)
+
+    def _get_dtype(self):
+        return self.data.dtype
+    def _set_dtype(self,newtype):
+        self.data.dtype = newtype
+    dtype = property(fget=_get_dtype,fset=_set_dtype)
+
+    def __abs__(self):
+        return self._with_data(abs(self.data))
+
+    def _real(self):
+        return self._with_data(numpy.real(self.data),copy=False)
+
+    def _imag(self):
+        return self._with_data(numpy.imag(self.data),copy=False)
+    
+    def __neg__(self):
+        return self._with_data(-self.data)
+
+    def astype(self, t):
+        return self._with_data(self.data.astype(t))
+
+    def conj(self, copy=False):
+        return self._with_data(self.data.conj(),copy=copy)
+    
+    def copy(self):
+        return self._with_data(self.data.copy(),copy=True)
+

Modified: trunk/scipy/sparse/dia.py
===================================================================
--- trunk/scipy/sparse/dia.py	2007-12-18 00:47:46 UTC (rev 3680)
+++ trunk/scipy/sparse/dia.py	2007-12-18 04:06:01 UTC (rev 3681)
@@ -2,49 +2,79 @@
 
 __all__ = ['dia_matrix','isspmatrix_dia']
 
-from numpy import asarray, asmatrix, matrix, zeros, arange, unique, \
-        searchsorted, intc, atleast_1d, atleast_2d
+from numpy import asarray, asmatrix, matrix, zeros, arange, array, \
+        empty_like, intc, atleast_1d, atleast_2d, add, multiply, \
+        unique
 
-from base import spmatrix, isspmatrix, isdense
-from sputils import isscalarlike, isshape, upcast, getdtype
+from base import isspmatrix
+from data import _data_matrix
+from sputils import isscalarlike, isshape, upcast, getdtype, isdense
 
-from sets import Set
+class dia_matrix(_data_matrix):
+    """Sparse matrix with DIAgonal storage
 
-class dia_matrix(spmatrix):
-    #TODO add description/motivation
+    This can be instantiated in several ways:
+      - dia_matrix(D)
+        with a dense matrix or rank-2 ndarray D
+
+      - dia_matrix(S)
+        with another sparse matrix S (equivalent to S.todia())
+
+      - dia_matrix((M, N), [dtype])
+        to construct an empty matrix with shape (M, N)
+        dtype is optional, defaulting to dtype='d'.
+
+      - dia_matrix((data, diags), shape=(M, N))
+        where the data[k,:] stores the diagonal entries for
+        diagonal diag[k] (See example below)
+
+
+    *Examples*
+    ----------
+
+    >>> from scipy.sparse import *
+    >>> from scipy import *
+    >>> dia_matrix( (3,4), dtype='i').todense()
+    matrix([[0, 0, 0, 0],
+            [0, 0, 0, 0],
+            [0, 0, 0, 0]])
+    
+    >>> data = array([[1,2,3,4]]).repeat(3,axis=0)
+    >>> diags = array([0,-1,2])
+    >>> dia_matrix( (data,diags), shape=(4,4)).todense()
+    matrix([[1, 0, 3, 0],
+            [1, 2, 0, 4],
+            [0, 2, 3, 0],
+            [0, 0, 3, 4]])
+
+    """
+
     def __init__(self, arg1, shape=None, dtype=None, copy=False):
-        spmatrix.__init__(self)
+        _data_matrix.__init__(self)
 
-        if isdense(arg1) or isspmatrix(arg1):
-            #convert to COO first, then to DIA
-            from coo import coo_matrix
+        if isspmatrix_dia(arg1):
+            if copy:
+                arg1 = arg1.copy()
+            self.data  = arg1.data
+            self.diags = arg1.diags
+            self.shape = arg1.shape
+        elif isdense(arg1) or isspmatrix(arg1):
             if isdense(arg1):
-                A = coo_matrix(arg1)
-            elif arg1.format in ['csr','csc','coo']:
-                A = arg1.tocoo(copy=False)
+                #convert to COO first, then to DIA
+                from coo import coo_matrix
+                A = coo_matrix(arg1).todia()
             else:
-                A = arg1.tocoo()
-            
-            ks = A.col - A.row  #the offset for each nonzero          
-            diags = unique(ks)
-
-            if len(diags) > 10:
-                pass #do something
-
-            #initialize and fill in data array
-            data_shape = (len(diags), A.col.max()+1)
-            data_dtype = getdtype(dtype, default=A.data.dtype)
-            data = zeros(data_shape, dtype=data_dtype)
-            data[ searchsorted(diags,ks), A.col ] = A.data
+                A = arg1.todia()
   
-            self.data,self.diags = data,diags
+            self.data  = A.data
+            self.diags = A.diags
             self.shape = A.shape
         elif isinstance(arg1, tuple):
             if isshape(arg1):
                 # It's a tuple of matrix dimensions (M, N)
                 # create empty matrix
                 self.shape = arg1   #spmatrix checks for errors here
-                self.data   = zeros( (0,0), getdtype(dtype, default=float))
+                self.data  = zeros( (0,0), getdtype(dtype, default=float))
                 self.diags = zeros( (0), dtype=intc)
             else:
                 try:
@@ -55,9 +85,9 @@
                 else:
                     if shape is None:
                         raise ValueError,'expected a shape argument'
-                    self.data   = atleast_2d(asarray(arg1[0],dtype=dtype))
-                    self.diags = atleast_1d(asarray(arg1[1],dtype='i'))
-                    self.shape   = shape
+                    self.data  = atleast_2d(array(arg1[0],dtype=dtype,copy=copy))
+                    self.diags = atleast_1d(array(arg1[1],dtype='i',copy=copy))
+                    self.shape = shape
 
         #check format
         if self.diags.ndim != 1:
@@ -71,19 +101,10 @@
                     'does not match the number of diags (%d)' \
                     % (self.data.shape[0], len(self.diags))
         
-        if len(Set(self.diags)) != len(self.diags):
+        if len(unique(self.diags)) != len(self.diags):
             raise ValueError,'offset array contains duplicate values'
 
 
-
-    def __getdtype(self):
-        return self.data.dtype
-
-    def __setdtype(self,newtype):
-        self.dtype = newtype
-
-    dtype = property(fget=__getdtype,fset=__setdtype)
-
     def getnnz(self):
         """number of nonzero values
 
@@ -101,7 +122,6 @@
     nnz = property(fget=getnnz)
 
 
-
     def __mul__(self, other): # self * other
         """ Scalar, vector, or matrix multiplication
         """
@@ -110,7 +130,6 @@
         else:
             return self.dot(other)
 
-
     def matmat(self, other):
         if isspmatrix(other):
             M, K1 = self.shape
@@ -126,8 +145,7 @@
 
 
     def matvec(self,other):
-        #TODO why is this so slow? it should run at BLAS-speed
-
+        # can we do the multiply add faster?
         x = asarray(other)
 
         if x.ndim == 1:
@@ -136,6 +154,7 @@
             raise ValueError, "dimension mismatch"
 
         y = zeros((self.shape[0],x.shape[1]), dtype=upcast(self.dtype,x.dtype))
+        temp = empty_like( y )
 
         L = self.data.shape[1]
         M,N = self.shape
@@ -147,7 +166,12 @@
             i_start = max(0,-k)
             i_end   = i_start + (j_end - j_start)
 
-            y[i_start:i_end] += diag[j_start:j_end].reshape(-1,1) * x[j_start:j_end]
+            #we'd prefer a fused multiply add here
+            multiply(diag[j_start:j_end].reshape(-1,1), x[j_start:j_end], temp[i_start:i_end])
+            add(y[i_start:i_end],temp[i_start:i_end],y[i_start:i_end])
+           
+            #slower version of two steps above
+            #y[i_start:i_end] += diag[j_start:j_end].reshape(-1,1) * x[j_start:j_end]
 
         
         if isinstance(other, matrix):
@@ -190,6 +214,16 @@
         from coo import coo_matrix
         return coo_matrix((data,(row,col)),shape=self.shape)
 
+    # needed by _data_matrix
+    def _with_data(self,data,copy=True):
+        """Returns a matrix with the same sparsity structure as self,
+        but with different data.  By default the structure arrays
+        (i.e. .indptr and .indices) are copied.
+        """
+        if copy:
+            return dia_matrix( (data,self.diags.copy()), shape=self.shape)
+        else:
+            return dia_matrix( (data,self.diags), shape=self.shape)
 
 
 from sputils import _isinstance

Modified: trunk/scipy/sparse/tests/test_base.py
===================================================================
--- trunk/scipy/sparse/tests/test_base.py	2007-12-18 00:47:46 UTC (rev 3680)
+++ trunk/scipy/sparse/tests/test_base.py	2007-12-18 04:06:01 UTC (rev 3681)
@@ -277,6 +277,10 @@
             assert_equal( result.shape, (4,2) )
             assert_equal( result, dot(a,b) )
 
+    def check_todia(self):
+        a = self.datsp.todia()
+        assert_array_almost_equal(a.todense(), self.dat)
+
     def check_tocoo(self):
         a = self.datsp.tocoo()
         assert_array_almost_equal(a.todense(), self.dat)




More information about the Scipy-svn mailing list