[Scipy-svn] r5582 - in trunk/scipy/io/matlab: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Sat Feb 21 05:12:09 EST 2009


Author: matthew.brett at gmail.com
Date: 2009-02-21 04:12:05 -0600 (Sat, 21 Feb 2009)
New Revision: 5582

Modified:
   trunk/scipy/io/matlab/mio5.py
   trunk/scipy/io/matlab/miobase.py
   trunk/scipy/io/matlab/tests/test_mio.py
   trunk/scipy/io/matlab/zlibstreams.py
Log:
Avoid reading matlab functions (return error string); remove unpopular and slow zlib stream reader from use in reader

Modified: trunk/scipy/io/matlab/mio5.py
===================================================================
--- trunk/scipy/io/matlab/mio5.py	2009-02-20 23:40:19 UTC (rev 5581)
+++ trunk/scipy/io/matlab/mio5.py	2009-02-21 10:12:05 UTC (rev 5582)
@@ -24,7 +24,8 @@
 import scipy.sparse
 
 from miobase import MatFileReader, MatArrayReader, MatMatrixGetter, \
-     MatFileWriter, MatStreamWriter, docfiller, matdims
+     MatFileWriter, MatStreamWriter, docfiller, matdims, \
+     MatReadError
 
 miINT8 = 1
 miUINT8 = 2
@@ -65,9 +66,31 @@
 mxUINT64_CLASS = 15
 mxFUNCTION_CLASS = 16
 # Not doing anything with these at the moment.
-mxOPAQUE_CLASS = 17
+mxOPAQUE_CLASS = 17 # This appears to be a function workspace
+# https://www-old.cae.wisc.edu/pipermail/octave-maintainers/2007-May/002824.html
 mxOBJECT_CLASS_FROM_MATRIX_H = 18
 
+mxmap = { # Sometimes good for debug prints
+    mxCELL_CLASS: 'mxCELL_CLASS',
+    mxSTRUCT_CLASS: 'mxSTRUCT_CLASS',
+    mxOBJECT_CLASS: 'mxOBJECT_CLASS',
+    mxCHAR_CLASS: 'mxCHAR_CLASS',
+    mxSPARSE_CLASS: 'mxSPARSE_CLASS',
+    mxDOUBLE_CLASS: 'mxDOUBLE_CLASS',
+    mxSINGLE_CLASS: 'mxSINGLE_CLASS',
+    mxINT8_CLASS: 'mxINT8_CLASS',
+    mxUINT8_CLASS: 'mxUINT8_CLASS',
+    mxINT16_CLASS: 'mxINT16_CLASS',
+    mxUINT16_CLASS: 'mxUINT16_CLASS',
+    mxINT32_CLASS: 'mxINT32_CLASS',
+    mxUINT32_CLASS: 'mxUINT32_CLASS',
+    mxINT64_CLASS: 'mxINT64_CLASS',
+    mxUINT64_CLASS: 'mxUINT64_CLASS',
+    mxFUNCTION_CLASS: 'mxFUNCTION_CLASS',
+    mxOPAQUE_CLASS: 'mxOPAQUE_CLASS',
+    mxOBJECT_CLASS_FROM_MATRIX_H: 'mxOBJECT_CLASS_FROM_MATRIX_H',
+}
+
 mdtypes_template = {
     miINT8: 'i1',
     miUINT8: 'u1',
@@ -319,7 +342,7 @@
         if mc == mxOBJECT_CLASS:
             return Mat5ObjectMatrixGetter(self, header)
         if mc == mxFUNCTION_CLASS:
-            return Mat5FunctionMatrixGetter(self, header)
+            return Mat5FunctionGetter(self, header)
         raise TypeError, 'No reader for class code %s' % mc
 
 
@@ -329,25 +352,11 @@
     Sets up reader for gzipped stream on init, providing wrapper
     for this new sub-stream.
 
-    Note that we use a zlib stream reader to return the data from the
-    zlib compressed stream.
-
-    In our case, we want this reader (under the hood) to do one small
-    read of the stream to get enough data to check the variable name,
-    because we may want to skip this variable - for which we need to
-    check the name.  If we need to read the rest of the data (not
-    skipping), then (under the hood) the steam reader decompresses the
-    whole of the rest of the stream ready for returning here to
-    construct the array.  This avoids the overhead of reading the
-    stream in small chunks - the default behavior of our zlib stream
-    reader.
-
-    This is why we use TwoShotZlibInputStream below.
     '''
     def __init__(self, array_reader, byte_count):
+        instr = array_reader.mat_stream.read(byte_count)
         super(Mat5ZArrayReader, self).__init__(
-            TwoShotZlibInputStream(array_reader.mat_stream,
-                                   byte_count),
+            StringIO(zlib.decompress(instr)),
             array_reader.dtypes,
             array_reader.processor_func,
             array_reader.codecs,
@@ -516,10 +525,9 @@
         return MatlabObject(result, classname)
 
 
-class Mat5FunctionMatrixGetter(Mat5CellMatrixGetter):
+class Mat5FunctionGetter(Mat5ObjectMatrixGetter):
     def get_raw_array(self):
-        result = super(Mat5FunctionMatrixGetter, self).get_raw_array()
-        return MatlabFunction(result)
+        raise MatReadError('Cannot read matlab functions')
 
 
 class MatFile5Reader(MatFileReader):

Modified: trunk/scipy/io/matlab/miobase.py
===================================================================
--- trunk/scipy/io/matlab/miobase.py	2009-02-20 23:40:19 UTC (rev 5581)
+++ trunk/scipy/io/matlab/miobase.py	2009-02-21 10:12:05 UTC (rev 5582)
@@ -10,6 +10,7 @@
 
 import byteordercodes as boc
 
+class MatReadError(Exception): pass
 
 doc_dict = \
     {'file_arg':
@@ -136,6 +137,7 @@
     else:
         raise ValueError('Unknown mat file type, version %s' % ret)
 
+class MatReadError(Exception): pass
 
 def matdims(arr, oned_as='column'):
     ''' Determine equivalent matlab dimensions for given array 
@@ -412,7 +414,7 @@
                 arr = np.squeeze(arr)
                 if not arr.size:
                     arr = np.array([])
-                elif not arr.shape: # 0d coverted to scalar
+                elif not arr.shape and arr.dtype.isbuiltin: # 0d coverted to scalar
                     arr = arr.item()
             return arr
         return func
@@ -442,7 +444,11 @@
             if variable_names and name not in variable_names:
                 getter.to_next()
                 continue
-            res = getter.get_array()
+            try:
+                res = getter.get_array()
+            except MatReadError, err:
+                res = "Read error: %s" % err
+                getter.to_next()
             mdict[name] = res
             if getter.is_global:
                 mdict['__globals__'].append(name)

Modified: trunk/scipy/io/matlab/tests/test_mio.py
===================================================================
--- trunk/scipy/io/matlab/tests/test_mio.py	2009-02-20 23:40:19 UTC (rev 5581)
+++ trunk/scipy/io/matlab/tests/test_mio.py	2009-02-21 10:12:05 UTC (rev 5582)
@@ -188,16 +188,14 @@
     {'name': 'sparsecomplex',
      'expected': {'testsparsecomplex': SP.coo_matrix(B)},
      })
-'''
+# We cannot read matlab functions for the moment
 case_table5.append(
     {'name': 'func',
-     'expected': {'testfunc': 0},
+     'expected': {'testfunc': 'Read error: Cannot read matlab functions'},
      })
-'''
 
-# These should also have matlab load equivalents,
-case_table5_rt = case_table5[:]
-# Inline functions can't be concatenated in matlab
+case_table5_rt = case_table5[:-1] # not the function read write
+# Inline functions can't be concatenated in matlab, so RT only
 case_table5_rt.append(
     {'name': 'objectarray',
      'expected': {'testobjectarray': np.repeat(MO, 2).reshape(1,2)}})

Modified: trunk/scipy/io/matlab/zlibstreams.py
===================================================================
--- trunk/scipy/io/matlab/zlibstreams.py	2009-02-20 23:40:19 UTC (rev 5581)
+++ trunk/scipy/io/matlab/zlibstreams.py	2009-02-21 10:12:05 UTC (rev 5582)
@@ -122,13 +122,14 @@
             return
         # read until we have enough bytes in the buffer
         read_to_end = bytes == -1
-
+        
+        bytes_to_fill = bytes - len(self.data)
+        if not (bytes_to_fill or read_to_end):
+            return
         # store data chunks in a list until the end so that we avoid the
         # quadratic behavior of continuously extending a string
         data_chunks = [self.data]
-        bytes_to_fill = bytes - len(self.data)
-
-        while read_to_end or bytes_to_fill > 0:
+        while bytes_to_fill > 0 or read_to_end:
             z_n_to_fetch = self._blocksize_iterator.next()
             if z_n_to_fetch == 0:
                 self.exhausted = True
@@ -198,11 +199,13 @@
             string containing read data
 
         '''
-        self.__fill(bytes)
         if bytes == -1:
+            self.__fill(bytes)
             data = self.data
             self.data = ""
         else:
+            if len(self.data) < bytes:
+                self.__fill(bytes)
             data = self.data[:bytes]
             self.data = self.data[bytes:]
         self.unzipped_pos += len(data)




More information about the Scipy-svn mailing list