[Scipy-svn] r6861 - trunk/scipy/io/matlab
scipy-svn at scipy.org
scipy-svn at scipy.org
Thu Nov 11 19:56:02 EST 2010
Author: matthew.brett at gmail.com
Date: 2010-11-11 18:56:02 -0600 (Thu, 11 Nov 2010)
New Revision: 6861
Modified:
trunk/scipy/io/matlab/mio5.py
Log:
ENH: add utility function to pull variables out of mat file as individual mat files
Modified: trunk/scipy/io/matlab/mio5.py
===================================================================
--- trunk/scipy/io/matlab/mio5.py 2010-11-12 00:55:56 UTC (rev 6860)
+++ trunk/scipy/io/matlab/mio5.py 2010-11-12 00:56:02 UTC (rev 6861)
@@ -436,7 +436,76 @@
break
return mdict
-
+
+def varmats_from_mat(file_obj):
+ """ Pull variables out of mat 5 file as a sequence of mat file objects
+
+ This can be useful with a difficult mat file, containing unreadable
+ variables. This routine pulls the variables out in raw form and puts them,
+ unread, back into a file stream for saving or reading. Another use is the
+ pathological case where there is more than one variable of the same name in
+ the file; this routine returns the duplicates, whereas the standard reader
+ will overwrite duplicates in the returned dictionary.
+
+ The file pointer in `file_obj` will be undefined. File pointers for the
+ returned file-like objects are set at 0.
+
+ Parameters
+ ----------
+ file_obj : file-like
+ file object containing mat file
+
+ Returns
+ -------
+ named_mats : list
+ list contains tuples of (name, BytesIO) where BytesIO is a file-like
+ object containing mat file contents as for a single variable. The
+ BytesIO contains a string with the original header and a single var. If
+ ``var_file_obj`` is an individual BytesIO instance, then save as a mat
+ file with something like ``open('test.mat',
+ 'wb').write(var_file_obj.read())``
+
+ Example
+ -------
+ >>> import scipy.io
+
+ BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for
+ python < 3.
+
+ >>> mat_fileobj = BytesIO()
+ >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
+ >>> varmats = varmats_from_mat(mat_fileobj)
+ >>> sorted([name for name, str_obj in varmats])
+ ['a', 'b']
+ """
+ rdr = MatFile5Reader(file_obj)
+ file_obj.seek(0)
+ # Raw read of top-level file header
+ hdr_len = np.dtype(mdtypes_template['file_header']).itemsize
+ raw_hdr = file_obj.read(hdr_len)
+ # Initialize variable reading
+ file_obj.seek(0)
+ rdr.initialize_read()
+ mdict = rdr.read_file_header()
+ next_position = file_obj.tell()
+ named_mats = []
+ while not rdr.end_of_stream():
+ start_position = next_position
+ hdr, next_position = rdr.read_var_header()
+ name = asstr(hdr.name)
+ # Read raw variable string
+ file_obj.seek(start_position)
+ byte_count = next_position - start_position
+ var_str = file_obj.read(byte_count)
+ # write to stringio object
+ out_obj = BytesIO()
+ out_obj.write(raw_hdr)
+ out_obj.write(var_str)
+ out_obj.seek(0)
+ named_mats.append((name, out_obj))
+ return named_mats
+
+
def to_writeable(source):
''' Convert input object ``source`` to something we can write
More information about the Scipy-svn
mailing list