[SciPy-dev] Possible fix for scipy.sparse.lil_matrix column-slicing problem
josef.pktd at gmail.com
josef.pktd at gmail.com
Wed Nov 25 16:29:21 EST 2009
On Tue, Nov 24, 2009 at 3:14 AM, Tim Victor <timvictor at gmail.com> wrote:
> Hi folks. My first attempt at contributing code to a Python project...
>
> Report of the problem prior to my finding it:
> http://mail.scipy.org/pipermail/scipy-user/2009-April/020688.html
>
> Ticket for the problem:
> http://projects.scipy.org/scipy/ticket/917
>
> Related code change between SciPy 0.7.0 and 0.7.1:
> http://projects.scipy.org/scipy/changeset/5630
>
> Diff of the changes is at the end of this message.
>
> I'm running:
> Ubuntu 9.04, Athlon64 X2
> Python version 2.6.2
> NumPy version 1.2.1
> SciPy version 0.7.0
>
> # under SciPy 0.7.0, I got:
>>>> m = sp.sparse.lil_matrix((20,20))
>>>> m[:,0] = sp.ones((20,1),'d')
>>>> m[0,0]
> <1x1 sparse matrix of type '<type 'numpy.float64'>'
> with 1 stored elements in LInked List format>
>>>> m[0,0].todense()
> matrix([[ 1.]])
>>>> m[0,0][0,0]
> 1.0
>
> # using scipy.sparse.lil.py from SciPy 0.7.1, I get:
>>>> m = sp.sparse.lil_matrix((20,20))
>>>> m[:,0] = sp.ones((20,1),'d')
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
> 329, in __setitem__
> self._insertat3(row, data, j, xx)
> File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
> 285, in _insertat3
> self._insertat2(row, data, j, x)
> File "/usr/lib/python2.6/dist-packages/scipy/sparse/lil.py", line
> 246, in _insertat2
> raise ValueError('setting an array element with a sequence')
> ValueError: setting an array element with a sequence
>
> # My version does this:
>>>> m = sp.sparse.lil_matrix((20,20))
>>>> m[:,0] = sp.ones((20,1),'d')
>>>> m[0,0]
> 1.0
>>>> type(m[0,0])
> <type 'numpy.float64'>
>
> # This matches the semantics for NumPy dense matrices:
>>>> m = sp.matrix(sp.zeros((20,20), 'd'))
>>>> m[:,0] = sp.ones((20,1),'d')
>>>> m[0,0]
> 1.0
>>>> type(m[0,0])
> <type 'numpy.float64'>
>
> Comments:
> * The modified lil.py file is about the same size, actually four lines
> longer I think.
>
> * I've timed a selection of operations and all were as fast or faster
> using my modified lil.py. Some are significantly faster now, such as
> when assigning to a tall, narrow range of cells.
>
> * It passes all unit tests in scipy.sparse.test(), but a special case
> in the code is needed to pass the "test_lil_sequence_assignement" test
> in scipy/sparse/tests/test_base.py. What I did might be excessively
> permissive. I couldn't figure out what the semantics should be to
> allow the assignment of a column vector to a row vector.
>
> # Trying the same "test_lil_sequence_assignement" operations on a dense matrix:
>>>> AA = sp.matrix(sp.zeros((4,3)))
>>>> BB = sp.matrix(sp.eye(3,4))
>>>> i0 = [0,1,2]
>>>> i1 = (0,1,2)
>>>> i2 = sp.array( i0 )
>>>> AA[0,i0] = BB[i0,0]
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> ValueError: array is not broadcastable to correct shape
>>>> AA[1,i1] = BB[i1,1]
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> ValueError: array is not broadcastable to correct shape
>>>> AA[2,i2] = BB[i2,2]
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> ValueError: array is not broadcastable to correct shape
>
> Best regards,
>
> Tim Victor
>
> -------------------------------
>
> # apply with "patch lil.py <lil.py.diff"
> 226,233d225
> <
> < def _insertat(self, i, j, x):
> < """ helper for __setitem__: insert a value at (i,j) where i, j and x
> < are all scalars """
> < row = self.rows[i]
> < data = self.data[i]
> < self._insertat2(row, data, j, x)
> <
> 244d235
> <
> 268,270c259
> <
> < def _insertat3(self, row, data, j, x):
> < """ helper for __setitem__ """
> ---
>> def _setitem_setrow(self, row, data, j, xrow, xdata, xcols):
> 274,277c263,274
> < if isinstance(x, spmatrix):
> < x = x.todense()
> < x = np.asarray(x).squeeze()
> < if np.isscalar(x) or x.size == 1:
> ---
>> if xcols == len(j):
>> for jj, xi in zip(j, xrange(xcols)):
>> pos = bisect_left(xrow, xi)
>> if pos != len(xdata) and xrow[pos] == xi:
>> self._insertat2(row, data, jj, xdata[pos])
>> else:
>> self._insertat2(row, data, jj, 0)
>> elif xcols == 1: # OK, broadcast across row
>> if len(xdata) > 0 and xrow[0] == 0:
>> val = xdata[0]
>> else:
>> val = 0
> 279c276
> < self._insertat2(row, data, jj, x)
> ---
>> self._insertat2(row, data, jj,val)
> 281,283c278
> < # x must be one D. maybe check these things out
> < for jj, xx in zip(j, x):
> < self._insertat2(row, data, jj, xx)
> ---
>> raise IndexError('invalid index')
> 285c280,285
> < self._insertat2(row, data, j, x)
> ---
>> if not xcols == 1:
>> raise ValueError('array dimensions are not compatible for copy')
>> if len(xdata) > 0 and xrow[0] == 0:
>> self._insertat2(row, data, j, xdata[0])
>> else:
>> self._insertat2(row, data, j, 0)
> 289d288
> <
> 291,295d289
> < if np.isscalar(x):
> < x = self.dtype.type(x)
> < elif not isinstance(x, spmatrix):
> < x = lil_matrix(x)
> <
> 300a295,300
>> # shortcut for common case of single entry assign:
>> if np.isscalar(x) and np.isscalar(i) and np.isscalar(j):
>> self._insertat2(self.rows[i], self.data[i], j, x)
>> return
>>
>> # shortcut for common case of full matrix assign:
> 302,308c302,310
> < if (isinstance(i, slice) and (i == slice(None))) and \
> < (isinstance(j, slice) and (j == slice(None))):
> < # self[:,:] = other_sparse
> < x = lil_matrix(x)
> < self.rows = x.rows
> < self.data = x.data
> < return
> ---
>> if isinstance(i, slice) and i == slice(None) and \
>> isinstance(j, slice) and j == slice(None):
>> x = lil_matrix(x)
>> self.rows = x.rows
>> self.data = x.data
>> return
>>
>> if isinstance(i, tuple): # can't index lists with tuple
>> i = list(i)
> 311,321c313,315
> < row = self.rows[i]
> < data = self.data[i]
> < self._insertat3(row, data, j, x)
> < elif issequence(i) and issequence(j):
> < if np.isscalar(x):
> < for ii, jj in zip(i, j):
> < self._insertat(ii, jj, x)
> < else:
> < for ii, jj, xx in zip(i, j, x):
> < self._insertat(ii, jj, xx)
> < elif isinstance(i, slice) or issequence(i):
> ---
>> rows = [self.rows[i]]
>> datas = [self.data[i]]
>> else:
> 324,329c318,333
> < if np.isscalar(x):
> < for row, data in zip(rows, datas):
> < self._insertat3(row, data, j, x)
> < else:
> < for row, data, xx in zip(rows, datas, x):
> < self._insertat3(row, data, j, xx)
> ---
>>
>> x = lil_matrix(x, copy=False)
>> xrows, xcols = x.shape
>> if xrows == len(rows): # normal rectangular copy
>> for row, data, xrow, xdata in zip(rows, datas, x.rows, x.data):
>> self._setitem_setrow(row, data, j, xrow, xdata, xcols)
>> elif xrows == 1: # OK, broadcast down column
>> for row, data in zip(rows, datas):
>> self._setitem_setrow(row, data, j, x.rows[0], x.data[0], xcols)
>>
>> # needed to pass 'test_lil_sequence_assignement' unit test:
>> # -- set row from column of entries --
>> elif xcols == len(rows):
>> x = x.T
>> for row, data, xrow, xdata in zip(rows, datas, x.rows, x.data):
>> self._setitem_setrow(row, data, j, xrow, xdata, xrows)
> 331c335
> < raise ValueError('invalid index value: %s' % str((i, j)))
> ---
>> raise IndexError('invalid index')
> _______________________________________________
> SciPy-Dev mailing list
> SciPy-Dev at scipy.org
> http://mail.scipy.org/mailman/listinfo/scipy-dev
>
Could you add this to the trac ticket, so that it doesn't get lost
until someone who knows this code has time to look at it?
maybe with a link to the mailing list.
Thanks,
Josef
More information about the SciPy-Dev
mailing list