[Scipy-svn] r6860 - trunk/scipy/io/matlab
scipy-svn at scipy.org
scipy-svn at scipy.org
Thu Nov 11 19:55:57 EST 2010
Author: matthew.brett at gmail.com
Date: 2010-11-11 18:55:56 -0600 (Thu, 11 Nov 2010)
New Revision: 6860
Modified:
trunk/scipy/io/matlab/mio5_utils.pyx
Log:
ENH: allow non-conforming strings with zero bytes and non-zero length
Modified: trunk/scipy/io/matlab/mio5_utils.pyx
===================================================================
--- trunk/scipy/io/matlab/mio5_utils.pyx 2010-11-02 15:19:28 UTC (rev 6859)
+++ trunk/scipy/io/matlab/mio5_utils.pyx 2010-11-12 00:55:56 UTC (rev 6860)
@@ -705,7 +705,7 @@
return scipy.sparse.csc_matrix(
(data,rowind,indptr),
shape=(M,N))
-
+
cpdef cnp.ndarray read_char(self, VarHeader5 header):
''' Read char matrices from stream as arrays
@@ -713,7 +713,7 @@
string by later processing in ``array_from_header``
'''
'''Notes to friendly fellow-optimizer
-
+
This routine is not much optimized. If I was going to do it,
I'd store the codecs as an object pointer array, as for the
.dtypes, I might use python_string.PyBytes_Decode for decoding,
@@ -724,7 +724,7 @@
deals with unicode strings passed as memory,
My own unicode introduction here:
- https://cirl.berkeley.edu/mb312/pydagogue/python_unicode.html
+ http://matthew-brett.github.com/pydagogue/python_unicode.html
'''
cdef:
cnp.uint32_t mdtype, byte_count
@@ -732,14 +732,23 @@
size_t el_count
object data, res, codec
cnp.ndarray arr
+ cnp.dtype dt
cdef size_t length = self.size_from_header(header)
data = self.read_element(
&mdtype, &byte_count, <void **>&data_ptr, True)
+ # There are mat files in the wild that have 0 byte count strings, but
+ # maybe with non-zero length.
+ if byte_count == 0:
+ arr = np.array(' ' * length, dtype='U')
+ return np.ndarray(shape=header.dims,
+ dtype=self.U1_dtype,
+ buffer=arr,
+ order='F')
# Character data can be of apparently numerical types,
# specifically np.uint8, np.int8, np.uint16. np.unit16 can have
# a length 1 type encoding, like ascii, or length 2 type
# encoding
- cdef cnp.dtype dt = <cnp.dtype>self.dtypes[mdtype]
+ dt = <cnp.dtype>self.dtypes[mdtype]
if mdtype == miUINT16:
codec = self.uint16_codec
if self.codecs['uint16_len'] == 1: # need LSBs only
@@ -759,14 +768,13 @@
uc_str = data.decode(codec)
# cast to array to deal with 2, 4 byte width characters
arr = np.array(uc_str, dtype='U')
- dt = self.U1_dtype
# could take this to numpy C-API level, but probably not worth
# it
return np.ndarray(shape=header.dims,
- dtype=dt,
+ dtype=self.U1_dtype,
buffer=arr,
order='F')
-
+
cpdef cnp.ndarray read_cells(self, VarHeader5 header):
''' Read cell array from stream '''
cdef:
More information about the Scipy-svn
mailing list