[SciPy-dev] Possible fix for scipy.sparse.lil_matrix column-slicing problem
Tim Victor
timvictor at gmail.com
Tue Nov 24 03:14:10 EST 2009
Hi folks. My first attempt at contributing code to a Python project...
Report of the problem prior to my finding it:
http://mail.scipy.org/pipermail/scipy-user/2009-April/020688.html
Ticket for the problem:
http://projects.scipy.org/scipy/ticket/917
Related code change between SciPy 0.7.0 and 0.7.1:
http://projects.scipy.org/scipy/changeset/5630
Diff of the changes is at the end of this message.
I'm running:
Ubuntu 9.04, Athlon64 X2
Python version 2.6.2
NumPy version 1.2.1
SciPy version 0.7.0
# under SciPy 0.7.0, I got:
>>> m = sp.sparse.lil_matrix((20,20))
>>> m[:,0] = sp.ones((20,1),'d')
>>> m[0,0]
<1x1 sparse matrix of type '<type 'numpy.float64'>'
with 1 stored elements in LInked List format>
>>> m[0,0].todense()
matrix([[ 1.]])
>>> m[0,0][0,0]
1.0
# using scipy.sparse.lil.py from SciPy 0.7.1, I get:
>>> m = sp.sparse.lil_matrix((20,20))
>>> m[:,0] = sp.ones((20,1),'d')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
329, in __setitem__
self._insertat3(row, data, j, xx)
File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
285, in _insertat3
self._insertat2(row, data, j, x)
File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
246, in _insertat2
raise ValueError('setting an array element with a sequence')
ValueError: setting an array element with a sequence
# My version does this:
>>> m = sp.sparse.lil_matrix((20,20))
>>> m[:,0] = sp.ones((20,1),'d')
>>> m[0,0]
1.0
>>> type(m[0,0])
<type 'numpy.float64'>
# This matches the semantics for NumPy dense matrices:
>>> m = sp.matrix(sp.zeros((20,20), 'd'))
>>> m[:,0] = sp.ones((20,1),'d')
>>> m[0,0]
1.0
>>> type(m[0,0])
<type 'numpy.float64'>
Comments:
* The modified lil.py file is about the same size, actually four lines
longer I think.
* I've timed a selection of operations and all were as fast or faster
using my modified lil.py. Some are significantly faster now, such as
when assigning to a tall, narrow range of cells.
* It passes all unit tests in scipy.sparse.test(), but a special case
in the code is needed to pass the "test_lil_sequence_assignement" test
in scipy/sparse/tests/test_base.py. What I did might be excessively
permissive. I couldn't figure out what the semantics should be to
allow the assignment of a column vector to a row vector.
# Trying the same "test_lil_sequence_assignement" operations on a dense matrix:
>>> AA = sp.matrix(sp.zeros((4,3)))
>>> BB = sp.matrix(sp.eye(3,4))
>>> i0 = [0,1,2]
>>> i1 = (0,1,2)
>>> i2 = sp.array( i0 )
>>> AA[0,i0] = BB[i0,0]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: array is not broadcastable to correct shape
>>> AA[1,i1] = BB[i1,1]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: array is not broadcastable to correct shape
>>> AA[2,i2] = BB[i2,2]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: array is not broadcastable to correct shape
Best regards,
Tim Victor
-------------------------------
# apply with "patch lil.py <lil.py.diff"
226,233d225
<
< def _insertat(self, i, j, x):
< """ helper for __setitem__: insert a value at (i,j) where i, j and x
< are all scalars """
< row = self.rows[i]
< data = self.data[i]
< self._insertat2(row, data, j, x)
<
244d235
<
268,270c259
<
< def _insertat3(self, row, data, j, x):
< """ helper for __setitem__ """
---
> def _setitem_setrow(self, row, data, j, xrow, xdata, xcols):
274,277c263,274
< if isinstance(x, spmatrix):
< x = x.todense()
< x = np.asarray(x).squeeze()
< if np.isscalar(x) or x.size == 1:
---
> if xcols == len(j):
> for jj, xi in zip(j, xrange(xcols)):
> pos = bisect_left(xrow, xi)
> if pos != len(xdata) and xrow[pos] == xi:
> self._insertat2(row, data, jj, xdata[pos])
> else:
> self._insertat2(row, data, jj, 0)
> elif xcols == 1: # OK, broadcast across row
> if len(xdata) > 0 and xrow[0] == 0:
> val = xdata[0]
> else:
> val = 0
279c276
< self._insertat2(row, data, jj, x)
---
> self._insertat2(row, data, jj,val)
281,283c278
< # x must be one D. maybe check these things out
< for jj, xx in zip(j, x):
< self._insertat2(row, data, jj, xx)
---
> raise IndexError('invalid index')
285c280,285
< self._insertat2(row, data, j, x)
---
> if not xcols == 1:
> raise ValueError('array dimensions are not compatible for copy')
> if len(xdata) > 0 and xrow[0] == 0:
> self._insertat2(row, data, j, xdata[0])
> else:
> self._insertat2(row, data, j, 0)
289d288
<
291,295d289
< if np.isscalar(x):
< x = self.dtype.type(x)
< elif not isinstance(x, spmatrix):
< x = lil_matrix(x)
<
300a295,300
> # shortcut for common case of single entry assign:
> if np.isscalar(x) and np.isscalar(i) and np.isscalar(j):
> self._insertat2(self.rows[i], self.data[i], j, x)
> return
>
> # shortcut for common case of full matrix assign:
302,308c302,310
< if (isinstance(i, slice) and (i == slice(None))) and \
< (isinstance(j, slice) and (j == slice(None))):
< # self[:,:] = other_sparse
< x = lil_matrix(x)
< self.rows = x.rows
< self.data = x.data
< return
---
> if isinstance(i, slice) and i == slice(None) and \
> isinstance(j, slice) and j == slice(None):
> x = lil_matrix(x)
> self.rows = x.rows
> self.data = x.data
> return
>
> if isinstance(i, tuple): # can't index lists with tuple
> i = list(i)
311,321c313,315
< row = self.rows[i]
< data = self.data[i]
< self._insertat3(row, data, j, x)
< elif issequence(i) and issequence(j):
< if np.isscalar(x):
< for ii, jj in zip(i, j):
< self._insertat(ii, jj, x)
< else:
< for ii, jj, xx in zip(i, j, x):
< self._insertat(ii, jj, xx)
< elif isinstance(i, slice) or issequence(i):
---
> rows = [self.rows[i]]
> datas = [self.data[i]]
> else:
324,329c318,333
< if np.isscalar(x):
< for row, data in zip(rows, datas):
< self._insertat3(row, data, j, x)
< else:
< for row, data, xx in zip(rows, datas, x):
< self._insertat3(row, data, j, xx)
---
>
> x = lil_matrix(x, copy=False)
> xrows, xcols = x.shape
> if xrows == len(rows): # normal rectangular copy
> for row, data, xrow, xdata in zip(rows, datas, x.rows, x.data):
> self._setitem_setrow(row, data, j, xrow, xdata, xcols)
> elif xrows == 1: # OK, broadcast down column
> for row, data in zip(rows, datas):
> self._setitem_setrow(row, data, j, x.rows[0], x.data[0], xcols)
>
> # needed to pass 'test_lil_sequence_assignement' unit test:
> # -- set row from column of entries --
> elif xcols == len(rows):
> x = x.T
> for row, data, xrow, xdata in zip(rows, datas, x.rows, x.data):
> self._setitem_setrow(row, data, j, xrow, xdata, xrows)
331c335
< raise ValueError('invalid index value: %s' % str((i, j)))
---
> raise IndexError('invalid index')
More information about the SciPy-Dev
mailing list