[SciPy-dev] scipy.sparse.dok_matrix changes

James Philbin philbinj at gmail.com
Sat Oct 18 11:56:52 EDT 2008


Hi,

I've recently been using dok_matrix a fair bit and have made a few
improvements, mainly:
i. Improved the docstring.
ii. Fixed the constructor (this fixes #755).
iii. Reduced the codepath for the common case of setting an individual
element (~2x speedup).

There's more work to do on dok_matrix, but this patch might be useful
for some people:
---
Index: scipy/sparse/tests/test_base.py
===================================================================
--- scipy/sparse/tests/test_base.py	(revision 4801)
+++ scipy/sparse/tests/test_base.py	(working copy)
@@ -1162,8 +1162,27 @@
         except:
             caught += 1
         assert_equal(caught,5)
+
+    def test_ctor(self):
+        caught = 0
+        # Empty ctor
+        try:
+            A = dok_matrix()
+        except TypeError, e:
+            caught+=1
+        assert_equal(caught, 1)
+
+        # Dense ctor
+        b = matrix([[1,0,0,0],[0,0,1,0],[0,2,0,3]],'d')
+        A = dok_matrix(b)
+        assert_equal(A.todense(), b)

+        # Sparse ctor
+        c = csr_matrix(b)
+        assert_equal(A.todense(), c.todense())

+
+
 class TestLIL( _TestCommon, _TestHorizSlicing, _TestVertSlicing,
         _TestBothSlicing, _TestGetSet, _TestSolve,
         _TestArithmetic, _TestInplaceArithmetic,
Index: scipy/sparse/dok.py
===================================================================
--- scipy/sparse/dok.py	(revision 4801)
+++ scipy/sparse/dok.py	(working copy)
@@ -14,45 +14,65 @@

 class dok_matrix(spmatrix, dict):
     """Dictionary Of Keys based matrix.  This is an efficient
-    structure for constructing sparse matrices
+    structure for constructing sparse matrices incrementally.
+
+    This can be instatiated in several ways:
+        dok_matrix(D)
+            with a dense matrix, D
+
+        dok_matrix(S)
+            with a sparse matrix, S
+
+        dok_matrix((M,N), [dtype])
+            create the matrix with initial shape (M,N)
+            dtype is optional, defaulting to dtype='d'
+
+    Notes
+    -----
+    Allows for efficient O(1) access of individual elements.
+    Duplicates are not allowed.
+    Can be efficiently converted to a coo_matrix once constructed.
+
+    Examples
+    --------
+    >>> from scipy.sparse import *
+    >>> from scipy import *
+    >>> S = dok_matrix((5,5), dtype=float32)
+    >>> for i in range(5):
+    >>>     for j in range(5):
+    >>>         S[i,j] = i+j # Update element
+
     """
-    def __init__(self, A=None, shape=None, dtype=None, copy=False):
-        """ Create a new dictionary-of-keys sparse matrix.  An optional
-        argument A is accepted, which initializes the dok_matrix with it.
-        This can be a tuple of dimensions (M, N) or a (dense) array
-        to copy.
-        """
-        #TODO deprecate argument A in favor of arg1 style
-
+
+    def __init__(self, arg1, shape=None, dtype=None, copy=False):
         dict.__init__(self)
         spmatrix.__init__(self)
-        self.dtype = getdtype(dtype, A, default=float)
-        if A is not None:
-            if isinstance(A, tuple):
-                # Interpret as dimensions
-                if not isshape(A):
-                    raise TypeError, "dimensions must be a 2-tuple of
positive"\
-                            " integers"
-                self.shape = A
-            elif isspmatrix(A):
-                if isspmatrix_dok(A) and copy:
-                    A = A.copy()
-                else:
-                    A = A.todok()
-                self.update( A )
-                self.shape = A.shape
-                self.dtype = A.dtype
+
+        self.dtype = getdtype(dtype, default=float)
+        if isinstance(arg1, tuple) and isshape(arg1): # (M,N)
+            M, N = arg1
+            self.shape = (M, N)
+        elif isspmatrix(arg1): # Sparse ctor
+            if isspmatrix_dok(arg1) and copy:
+                arg1 = arg1.copy()
             else:
-                #must be dense, convert to COO first, then to DOK
-                try:
-                    A = asarray(A)
-                except:
-                    raise ValueError, "unrecognized form for" \
-                            " %s_matrix constructor" % self.format
-                from coo import coo_matrix
-                self.update( coo_matrix(A).todok() )
-                self.shape = A.shape
-                self.dtype = A.dtype
+                arg1 = arg1.todok()
+            self.update(arg1)
+            self.shape = arg1.shape
+            self.dtype = arg1.dtype
+        else: # Dense ctor
+            try:
+                arg1 = asarray(arg1)
+            except:
+                raise TypeError('invalid input format')
+
+            if len(arg1.shape)!=2:
+                raise TypeError('expected rank <=2 dense array or matrix')
+
+            from coo import coo_matrix
+            self.update( coo_matrix(arg1).todok() )
+            self.shape = arg1.shape
+            self.dtype = arg1.dtype

     def getnnz(self):
         return dict.__len__(self)
@@ -90,7 +110,7 @@

         # Bounds checking
         if isintlike(i):
-            if i < 0:
+            if i < 0:
                 i += self.shape[0]
             if i < 0 or i >= self.shape[0]:
                 raise IndexError, "index out of bounds"
@@ -177,13 +197,10 @@

     def __setitem__(self, key, value):
         try:
-            assert len(key) == 2
-        except (AssertionError, TypeError):
-            raise TypeError, "index must be a pair of integers, slices, or" \
-                    " sequences"
-        i, j = key
+            i, j = key
+        except (ValueError, TypeError):
+            raise TypeError, "index must be a pair of integers or slices"

-
         # First deal with the case where both i and j are integers
         if isintlike(i) and isintlike(j):
             if i < 0:
@@ -193,20 +210,14 @@

             if i < 0 or i >= self.shape[0] or j < 0 or j >= self.shape[1]:
                 raise IndexError, "index out of bounds"
-            if isintlike(value) and value == 0:
-                if key in self.keys():  # get rid of it something already there
-                    del self[key]
+
+            if isscalar(value):
+                if value==0:
+                    del self[(i,j)]
+                else:
+                    dict.__setitem__(self, (i,j), self.dtype.type(value))
             else:
-                # Ensure value is a single element, not a sequence
-                if isinstance(value, float) or isintlike(value) or \
-                        isinstance(value, complex):
-                    dict.__setitem__(self, (i,j), self.dtype.type(value))
-                    newrows = max(self.shape[0], int(key[0])+1)
-                    newcols = max(self.shape[1], int(key[1])+1)
-                    self.shape = (newrows, newcols)
-                else:
-                    raise TypeError, "cannot set matrix element to non-scalar"
-            return                 # done
+                raise TypeError, "cannot set matrix element to a non-scalar"
         else:
             # Either i or j is a slice, sequence, or invalid.  If i is a slice
             # or sequence, unfold it first and call __setitem__ recursively.
---

Thanks,
James



More information about the SciPy-Dev mailing list