[SciPy-dev] Possible fix for scipy.sparse.lil_matrix column-slicing problem

Tim Victor timvictor at gmail.com
Tue Nov 24 03:14:10 EST 2009


Hi folks. My first attempt at contributing code to a Python project...

Report of the problem prior to my finding it:
http://mail.scipy.org/pipermail/scipy-user/2009-April/020688.html

Ticket for the problem:
http://projects.scipy.org/scipy/ticket/917

Related code change between SciPy 0.7.0 and 0.7.1:
http://projects.scipy.org/scipy/changeset/5630

Diff of the changes is at the end of this message.

I'm running:
Ubuntu 9.04, Athlon64 X2
Python version 2.6.2
NumPy version 1.2.1
SciPy version 0.7.0

# under SciPy 0.7.0, I got:
>>> m = sp.sparse.lil_matrix((20,20))
>>> m[:,0] = sp.ones((20,1),'d')
>>> m[0,0]
<1x1 sparse matrix of type '<type 'numpy.float64'>'
	with 1 stored elements in LInked List format>
>>> m[0,0].todense()
matrix([[ 1.]])
>>> m[0,0][0,0]
1.0

# using scipy.sparse.lil.py from SciPy 0.7.1, I get:
>>> m = sp.sparse.lil_matrix((20,20))
>>> m[:,0] = sp.ones((20,1),'d')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
329, in __setitem__
    self._insertat3(row, data, j, xx)
  File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
285, in _insertat3
    self._insertat2(row, data, j, x)
  File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
246, in _insertat2
    raise ValueError('setting an array element with a sequence')
ValueError: setting an array element with a sequence

# My version does this:
>>> m = sp.sparse.lil_matrix((20,20))
>>> m[:,0] = sp.ones((20,1),'d')
>>> m[0,0]
1.0
>>> type(m[0,0])
<type 'numpy.float64'>

# This matches the semantics for NumPy dense matrices:
>>> m = sp.matrix(sp.zeros((20,20), 'd'))
>>> m[:,0] = sp.ones((20,1),'d')
>>> m[0,0]
1.0
>>> type(m[0,0])
<type 'numpy.float64'>

Comments:
* The modified lil.py file is about the same size, actually four lines
longer I think.

* I've timed a selection of operations and all were as fast or faster
using my modified lil.py. Some are significantly faster now, such as
when assigning to a tall, narrow range of cells.

* It passes all unit tests in scipy.sparse.test(), but a special case
in the code is needed to pass the "test_lil_sequence_assignement" test
in scipy/sparse/tests/test_base.py. What I did might be excessively
permissive. I couldn't figure out what the semantics should be to
allow the assignment of a column vector to a row vector.

# Trying the same "test_lil_sequence_assignement" operations on a dense matrix:
>>> AA = sp.matrix(sp.zeros((4,3)))
>>> BB = sp.matrix(sp.eye(3,4))
>>> i0 = [0,1,2]
>>> i1 = (0,1,2)
>>> i2 = sp.array( i0 )
>>> AA[0,i0] = BB[i0,0]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: array is not broadcastable to correct shape
>>> AA[1,i1] = BB[i1,1]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: array is not broadcastable to correct shape
>>> AA[2,i2] = BB[i2,2]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: array is not broadcastable to correct shape

Best regards,

Tim Victor

-------------------------------

# apply with "patch lil.py <lil.py.diff"
226,233d225
<
<     def _insertat(self, i, j, x):
<         """ helper for __setitem__: insert a value at (i,j) where i, j and x
<         are all scalars """
<         row = self.rows[i]
<         data = self.data[i]
<         self._insertat2(row, data, j, x)
<
244d235
<
268,270c259
<
<     def _insertat3(self, row, data, j, x):
<         """ helper for __setitem__ """
---
>     def _setitem_setrow(self, row, data, j, xrow, xdata, xcols):
274,277c263,274
<             if isinstance(x, spmatrix):
<                 x = x.todense()
<             x = np.asarray(x).squeeze()
<             if np.isscalar(x) or x.size == 1:
---
>             if xcols == len(j):
>                 for jj, xi in zip(j, xrange(xcols)):
>                    pos = bisect_left(xrow, xi)
>                    if pos != len(xdata) and xrow[pos] == xi:
>                        self._insertat2(row, data, jj, xdata[pos])
>                    else:
>                        self._insertat2(row, data, jj, 0)
>             elif xcols == 1:           # OK, broadcast across row
>                 if len(xdata) > 0 and xrow[0] == 0:
>                     val = xdata[0]
>                 else:
>                     val = 0
279c276
<                     self._insertat2(row, data, jj, x)
---
>                     self._insertat2(row, data, jj,val)
281,283c278
<                 # x must be one D. maybe check these things out
<                 for jj, xx in zip(j, x):
<                     self._insertat2(row, data, jj, xx)
---
>                 raise IndexError('invalid index')
285c280,285
<             self._insertat2(row, data, j, x)
---
>             if not xcols == 1:
>                 raise ValueError('array dimensions are not compatible for copy')
>             if len(xdata) > 0 and xrow[0] == 0:
>                 self._insertat2(row, data, j, xdata[0])
>             else:
>                 self._insertat2(row, data, j, 0)
289d288
<
291,295d289
<         if np.isscalar(x):
<             x = self.dtype.type(x)
<         elif not isinstance(x, spmatrix):
<             x = lil_matrix(x)
<
300a295,300
>         # shortcut for common case of single entry assign:
>         if np.isscalar(x) and np.isscalar(i) and np.isscalar(j):
>             self._insertat2(self.rows[i], self.data[i], j, x)
>             return
>
>         # shortcut for common case of full matrix assign:
302,308c302,310
<             if (isinstance(i, slice) and (i == slice(None))) and \
<                (isinstance(j, slice) and (j == slice(None))):
<                 # self[:,:] = other_sparse
<                 x = lil_matrix(x)
<                 self.rows = x.rows
<                 self.data = x.data
<                 return
---
>           if isinstance(i, slice) and i == slice(None) and \
>              isinstance(j, slice) and j == slice(None):
>                x = lil_matrix(x)
>                self.rows = x.rows
>                self.data = x.data
>                return
>
>         if isinstance(i, tuple):       # can't index lists with tuple
>             i = list(i)
311,321c313,315
<             row = self.rows[i]
<             data = self.data[i]
<             self._insertat3(row, data, j, x)
<         elif issequence(i) and issequence(j):
<             if np.isscalar(x):
<                 for ii, jj in zip(i, j):
<                     self._insertat(ii, jj, x)
<             else:
<                 for ii, jj, xx in zip(i, j, x):
<                     self._insertat(ii, jj, xx)
<         elif isinstance(i, slice) or issequence(i):
---
>             rows = [self.rows[i]]
>             datas = [self.data[i]]
>         else:
324,329c318,333
<             if np.isscalar(x):
<                 for row, data in zip(rows, datas):
<                     self._insertat3(row, data, j, x)
<             else:
<                 for row, data, xx in zip(rows, datas, x):
<                     self._insertat3(row, data, j, xx)
---
>
>         x = lil_matrix(x, copy=False)
>         xrows, xcols = x.shape
>         if xrows == len(rows):    # normal rectangular copy
>             for row, data, xrow, xdata in zip(rows, datas, x.rows, x.data):
>                 self._setitem_setrow(row, data, j, xrow, xdata, xcols)
>         elif xrows == 1:          # OK, broadcast down column
>             for row, data in zip(rows, datas):
>                 self._setitem_setrow(row, data, j, x.rows[0], x.data[0], xcols)
>
>         # needed to pass 'test_lil_sequence_assignement' unit test:
>         # -- set row from column of entries --
>         elif xcols == len(rows):
>             x = x.T
>             for row, data, xrow, xdata in zip(rows, datas, x.rows, x.data):
>                 self._setitem_setrow(row, data, j, xrow, xdata, xrows)
331c335
<             raise ValueError('invalid index value: %s' % str((i, j)))
---
>             raise IndexError('invalid index')



More information about the SciPy-Dev mailing list