[SciPy-dev] Possible fix for scipy.sparse.lil_matrix column-slicing problem

Wed Nov 25 16:29:21 EST 2009

On Tue, Nov 24, 2009 at 3:14 AM, Tim Victor <timvictor at gmail.com> wrote:
> Hi folks. My first attempt at contributing code to a Python project...
>
> Report of the problem prior to my finding it:
> http://mail.scipy.org/pipermail/scipy-user/2009-April/020688.html
>
> Ticket for the problem:
> http://projects.scipy.org/scipy/ticket/917
>
> Related code change between SciPy 0.7.0 and 0.7.1:
> http://projects.scipy.org/scipy/changeset/5630
>
> Diff of the changes is at the end of this message.
>
> I'm running:
> Ubuntu 9.04, Athlon64 X2
> Python version 2.6.2
> NumPy version 1.2.1
> SciPy version 0.7.0
>
> # under SciPy 0.7.0, I got:
>>>> m = sp.sparse.lil_matrix((20,20))
>>>> m[:,0] = sp.ones((20,1),'d')
>>>> m[0,0]
> <1x1 sparse matrix of type '<type 'numpy.float64'>'
>        with 1 stored elements in LInked List format>
>>>> m[0,0].todense()
> matrix([[ 1.]])
>>>> m[0,0][0,0]
> 1.0
>
> # using scipy.sparse.lil.py from SciPy 0.7.1, I get:
>>>> m = sp.sparse.lil_matrix((20,20))
>>>> m[:,0] = sp.ones((20,1),'d')
> Traceback (most recent call last):
>  File "<stdin>", line 1, in <module>
>  File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
> 329, in __setitem__
>    self._insertat3(row, data, j, xx)
>  File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
> 285, in _insertat3
>    self._insertat2(row, data, j, x)
>  File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
> 246, in _insertat2
>    raise ValueError('setting an array element with a sequence')
> ValueError: setting an array element with a sequence
>
> # My version does this:
>>>> m = sp.sparse.lil_matrix((20,20))
>>>> m[:,0] = sp.ones((20,1),'d')
>>>> m[0,0]
> 1.0
>>>> type(m[0,0])
> <type 'numpy.float64'>
>
> # This matches the semantics for NumPy dense matrices:
>>>> m = sp.matrix(sp.zeros((20,20), 'd'))
>>>> m[:,0] = sp.ones((20,1),'d')
>>>> m[0,0]
> 1.0
>>>> type(m[0,0])
> <type 'numpy.float64'>
>
> Comments:
> * The modified lil.py file is about the same size, actually four lines
> longer I think.
>
> * I've timed a selection of operations and all were as fast or faster
> using my modified lil.py. Some are significantly faster now, such as
> when assigning to a tall, narrow range of cells.
>
> * It passes all unit tests in scipy.sparse.test(), but a special case
> in the code is needed to pass the "test_lil_sequence_assignement" test
> in scipy/sparse/tests/test_base.py. What I did might be excessively
> permissive. I couldn't figure out what the semantics should be to
> allow the assignment of a column vector to a row vector.
>
> # Trying the same "test_lil_sequence_assignement" operations on a dense matrix:
>>>> AA = sp.matrix(sp.zeros((4,3)))
>>>> BB = sp.matrix(sp.eye(3,4))
>>>> i0 = [0,1,2]
>>>> i1 = (0,1,2)
>>>> i2 = sp.array( i0 )
>>>> AA[0,i0] = BB[i0,0]
> Traceback (most recent call last):
>  File "<stdin>", line 1, in <module>
> ValueError: array is not broadcastable to correct shape
>>>> AA[1,i1] = BB[i1,1]
> Traceback (most recent call last):
>  File "<stdin>", line 1, in <module>
> ValueError: array is not broadcastable to correct shape
>>>> AA[2,i2] = BB[i2,2]
> Traceback (most recent call last):
>  File "<stdin>", line 1, in <module>
> ValueError: array is not broadcastable to correct shape
>
> Best regards,
>
> Tim Victor
>
> -------------------------------
>
> # apply with "patch lil.py <lil.py.diff"
> 226,233d225
> <
> <     def _insertat(self, i, j, x):
> <         """ helper for __setitem__: insert a value at (i,j) where i, j and x
> <         are all scalars """
> <         row = self.rows[i]
> <         data = self.data[i]
> <         self._insertat2(row, data, j, x)
> <
> 244d235
> <
> 268,270c259
> <
> <     def _insertat3(self, row, data, j, x):
> <         """ helper for __setitem__ """
> ---
>>     def _setitem_setrow(self, row, data, j, xrow, xdata, xcols):
> 274,277c263,274
> <             if isinstance(x, spmatrix):
> <                 x = x.todense()
> <             x = np.asarray(x).squeeze()
> <             if np.isscalar(x) or x.size == 1:
> ---
>>             if xcols == len(j):
>>                 for jj, xi in zip(j, xrange(xcols)):
>>                    pos = bisect_left(xrow, xi)
>>                    if pos != len(xdata) and xrow[pos] == xi:
>>                        self._insertat2(row, data, jj, xdata[pos])
>>                    else:
>>                        self._insertat2(row, data, jj, 0)
>>             elif xcols == 1:           # OK, broadcast across row
>>                 if len(xdata) > 0 and xrow[0] == 0:
>>                     val = xdata[0]
>>                 else:
>>                     val = 0
> 279c276
> <                     self._insertat2(row, data, jj, x)
> ---
>>                     self._insertat2(row, data, jj,val)
> 281,283c278
> <                 # x must be one D. maybe check these things out
> <                 for jj, xx in zip(j, x):
> <                     self._insertat2(row, data, jj, xx)
> ---
>>                 raise IndexError('invalid index')
> 285c280,285
> <             self._insertat2(row, data, j, x)
> ---
>>             if not xcols == 1:
>>                 raise ValueError('array dimensions are not compatible for copy')
>>             if len(xdata) > 0 and xrow[0] == 0:
>>                 self._insertat2(row, data, j, xdata[0])
>>             else:
>>                 self._insertat2(row, data, j, 0)
> 289d288
> <
> 291,295d289
> <         if np.isscalar(x):
> <             x = self.dtype.type(x)
> <         elif not isinstance(x, spmatrix):
> <             x = lil_matrix(x)
> <
> 300a295,300
>>         # shortcut for common case of single entry assign:
>>         if np.isscalar(x) and np.isscalar(i) and np.isscalar(j):
>>             self._insertat2(self.rows[i], self.data[i], j, x)
>>             return
>>
>>         # shortcut for common case of full matrix assign:
> 302,308c302,310
> <             if (isinstance(i, slice) and (i == slice(None))) and \
> <                (isinstance(j, slice) and (j == slice(None))):
> <                 # self[:,:] = other_sparse
> <                 x = lil_matrix(x)
> <                 self.rows = x.rows
> <                 self.data = x.data
> <                 return
> ---
>>           if isinstance(i, slice) and i == slice(None) and \
>>              isinstance(j, slice) and j == slice(None):
>>                x = lil_matrix(x)
>>                self.rows = x.rows
>>                self.data = x.data
>>                return
>>
>>         if isinstance(i, tuple):       # can't index lists with tuple
>>             i = list(i)
> 311,321c313,315
> <             row = self.rows[i]
> <             data = self.data[i]
> <             self._insertat3(row, data, j, x)
> <         elif issequence(i) and issequence(j):
> <             if np.isscalar(x):
> <                 for ii, jj in zip(i, j):
> <                     self._insertat(ii, jj, x)
> <             else:
> <                 for ii, jj, xx in zip(i, j, x):
> <                     self._insertat(ii, jj, xx)
> <         elif isinstance(i, slice) or issequence(i):
> ---
>>             rows = [self.rows[i]]
>>             datas = [self.data[i]]
>>         else:
> 324,329c318,333
> <             if np.isscalar(x):
> <                 for row, data in zip(rows, datas):
> <                     self._insertat3(row, data, j, x)
> <             else:
> <                 for row, data, xx in zip(rows, datas, x):
> <                     self._insertat3(row, data, j, xx)
> ---
>>
>>         x = lil_matrix(x, copy=False)
>>         xrows, xcols = x.shape
>>         if xrows == len(rows):    # normal rectangular copy
>>             for row, data, xrow, xdata in zip(rows, datas, x.rows, x.data):
>>                 self._setitem_setrow(row, data, j, xrow, xdata, xcols)
>>         elif xrows == 1:          # OK, broadcast down column
>>             for row, data in zip(rows, datas):
>>                 self._setitem_setrow(row, data, j, x.rows[0], x.data[0], xcols)
>>
>>         # needed to pass 'test_lil_sequence_assignement' unit test:
>>         # -- set row from column of entries --
>>         elif xcols == len(rows):
>>             x = x.T
>>             for row, data, xrow, xdata in zip(rows, datas, x.rows, x.data):
>>                 self._setitem_setrow(row, data, j, xrow, xdata, xrows)
> 331c335
> <             raise ValueError('invalid index value: %s' % str((i, j)))
> ---
>>             raise IndexError('invalid index')
> _______________________________________________
> SciPy-Dev mailing list
> SciPy-Dev at scipy.org
> http://mail.scipy.org/mailman/listinfo/scipy-dev
>

Could you add this to the trac ticket, so that it doesn't get lost
until someone who knows this code has time to look at it?
maybe with a link to the mailing list.

Thanks,

Josef