[Scipy-svn] r6861 - trunk/scipy/io/matlab

scipy-svn at scipy.org scipy-svn at scipy.org
Thu Nov 11 19:56:02 EST 2010


Author: matthew.brett at gmail.com
Date: 2010-11-11 18:56:02 -0600 (Thu, 11 Nov 2010)
New Revision: 6861

Modified:
   trunk/scipy/io/matlab/mio5.py
Log:
ENH: add utility function to pull variables out of mat file as individual mat files

Modified: trunk/scipy/io/matlab/mio5.py
===================================================================
--- trunk/scipy/io/matlab/mio5.py	2010-11-12 00:55:56 UTC (rev 6860)
+++ trunk/scipy/io/matlab/mio5.py	2010-11-12 00:56:02 UTC (rev 6861)
@@ -436,7 +436,76 @@
                     break
         return mdict
 
-    
+
+def varmats_from_mat(file_obj):
+    """ Pull variables out of mat 5 file as a sequence of mat file objects
+
+    This can be useful with a difficult mat file, containing unreadable
+    variables.  This routine pulls the variables out in raw form and puts them,
+    unread, back into a file stream for saving or reading.  Another use is the
+    pathological case where there is more than one variable of the same name in
+    the file; this routine returns the duplicates, whereas the standard reader
+    will overwrite duplicates in the returned dictionary.
+
+    The file pointer in `file_obj` will be undefined.  File pointers for the
+    returned file-like objects are set at 0.
+
+    Parameters
+    ----------
+    file_obj : file-like
+        file object containing mat file
+
+    Returns
+    -------
+    named_mats : list
+        list contains tuples of (name, BytesIO) where BytesIO is a file-like
+        object containing mat file contents as for a single variable.  The
+        BytesIO contains a string with the original header and a single var. If
+        ``var_file_obj`` is an individual BytesIO instance, then save as a mat
+        file with something like ``open('test.mat',
+        'wb').write(var_file_obj.read())``
+
+    Example
+    -------
+    >>> import scipy.io
+
+    BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for
+    python < 3.
+
+    >>> mat_fileobj = BytesIO()
+    >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
+    >>> varmats = varmats_from_mat(mat_fileobj)
+    >>> sorted([name for name, str_obj in varmats])
+    ['a', 'b']
+    """
+    rdr = MatFile5Reader(file_obj)
+    file_obj.seek(0)
+    # Raw read of top-level file header
+    hdr_len = np.dtype(mdtypes_template['file_header']).itemsize
+    raw_hdr = file_obj.read(hdr_len)
+    # Initialize variable reading
+    file_obj.seek(0)
+    rdr.initialize_read()
+    mdict = rdr.read_file_header()
+    next_position = file_obj.tell()
+    named_mats = []
+    while not rdr.end_of_stream():
+        start_position = next_position
+        hdr, next_position = rdr.read_var_header()
+        name = asstr(hdr.name)
+        # Read raw variable string
+        file_obj.seek(start_position)
+        byte_count = next_position - start_position
+        var_str = file_obj.read(byte_count)
+        # write to stringio object
+        out_obj = BytesIO()
+        out_obj.write(raw_hdr)
+        out_obj.write(var_str)
+        out_obj.seek(0)
+        named_mats.append((name, out_obj))
+    return named_mats
+
+
 def to_writeable(source):
     ''' Convert input object ``source`` to something we can write
 




More information about the Scipy-svn mailing list