[Scipy-svn] r2236 - trunk/Lib/io

scipy-svn at scipy.org scipy-svn at scipy.org
Fri Sep 29 07:36:40 EDT 2006


Author: matthew.brett at gmail.com
Date: 2006-09-29 06:36:34 -0500 (Fri, 29 Sep 2006)
New Revision: 2236

Removed:
   trunk/Lib/io/bytestream.py
Modified:
   trunk/Lib/io/mio4.py
   trunk/Lib/io/mio5.py
   trunk/Lib/io/miobase.py
Log:
Cleanups to processor function, drafting mat5 writing

Deleted: trunk/Lib/io/bytestream.py
===================================================================
--- trunk/Lib/io/bytestream.py	2006-09-26 16:59:28 UTC (rev 2235)
+++ trunk/Lib/io/bytestream.py	2006-09-29 11:36:34 UTC (rev 2236)
@@ -1,69 +0,0 @@
-# Author: Matthew Brett
-
-''' File-like interfact for memmapped array '''
-
-from numpy import *
-
-class ByteStream(object):
-    ''' Overlays file-like interface on memmapped array
-
-    This may speed up array reading from files
-    
-    @byte_array         - uint array or string containing bytes
-    '''
-
-    def __init__(self, byte_array):
-        if isinstance(byte_array, ndarray):
-            if not byte_array.dtype == uint8:
-                raise ValueError, 'Need uint8 byte array as array input'
-            self.bytes = byte_array
-        elif isinstance(byte_array, basestring):
-            self.bytes = ndarray(
-                shape=(len(byte_array)),
-                dtype=uint8,
-                buffer=byte_array)
-        else:
-            raise ValueError, "Need string or byte array as input"
-        self.array_len = len(byte_array)
-        self.seek(0)
-
-    # current file position
-    def get_pos(self):
-        return self._pos
-    def set_pos(self, offset):
-        if offset < 0:
-            raise IOError, 'Invalid argument'
-        self._pos = offset
-    pos = property(get_pos,
-                   set_pos,
-                   None,
-                   'get/set current position')
-    
-    def seek(self, offset, whence=0):
-        """ Method emulates seek method of file objects """
-        if whence == 0:
-            self.pos = offset
-        elif whence == 1: # seek relative to the current position
-            self.pos += offset
-        elif whence == 2: # relative to end
-            self.pos = self.array_len + offset
-        else:
-            raise ValueError, 'Invalid value %d for whence parameter' % whence
-
-    def tell(self):
-        return self.pos
-    
-    def read(self, num_bytes=-1):
-        if num_bytes < 0:
-            num_bytes = self.array_len
-        if self.pos >= self.array_len:
-            return array([], dtype=uint8)
-        next_pos = min(self.pos + num_bytes, self.array_len)
-        res = self.bytes[self.pos:next_pos]
-        self.pos = next_pos
-        return res
-
-    def write(self, data):
-        assert False, 'Not implemented'
-        
-

Modified: trunk/Lib/io/mio4.py
===================================================================
--- trunk/Lib/io/mio4.py	2006-09-26 16:59:28 UTC (rev 2235)
+++ trunk/Lib/io/mio4.py	2006-09-29 11:36:34 UTC (rev 2236)
@@ -323,9 +323,6 @@
     arr = array(arr)
     if arr.dtype.hasobject:
         raise TypeError, 'Cannot save object arrays in Mat4'
-    if have_sparse:
-        if scipy.sparse.issparse(arr):
-            return Mat4SparseWriter(stream, arr, name)
     if arr.dtype.kind in ('U', 'S'):
         return Mat4CharWriter(stream, arr, name)
     else:

Modified: trunk/Lib/io/mio5.py
===================================================================
--- trunk/Lib/io/mio5.py	2006-09-26 16:59:28 UTC (rev 2235)
+++ trunk/Lib/io/mio5.py	2006-09-29 11:36:34 UTC (rev 2236)
@@ -517,5 +517,193 @@
         return 0 not in mopt_bytes
 
 
-class Mat5Writer(MatFileWriter):
-    pass
+class Mat5MatrixWriter(MatStreamWriter):
+
+    def write_header(self, mclass,
+                     is_global,
+                     is_complex=False,
+                     is_logical=False,
+                     nzmax=0):
+        ''' Write header for given data options
+        @mclass      - mat5 matrix class
+        @is_global   - True if matrix is global
+        @is_complex  - True is matrix is complex
+        @is_logical  - True if matrix is logical
+        '''
+        dims = self.arr.shape
+        header = empty((), mdtypes_template['header'])
+        M = not ByteOrder.little_endian
+        O = 0
+        header['mopt'] = (M * 1000 +
+                          O * 100 + 
+                          P * 10 +
+                          T)
+        header['mrows'] = dims[0]
+        header['ncols'] = dims[1]
+        header['imagf'] = imagf
+        header['namlen'] = len(self.name) + 1
+        self.write_bytes(header)
+        self.write_string(self.name + '\0')
+        
+    def write(self):
+        assert False, 'Not implemented'
+
+
+class Mat5NumericWriter(Mat5MatrixWriter):
+
+    def write(self):
+        # identify matlab type for array
+        # make at least 2d
+        # write miMATRIX tag
+        # write array flags (complex, global, logical, class, nzmax)
+        # dimensions
+        # array name
+        # maybe downcast array to smaller matlab type
+        # write real
+        # write imaginary
+        # put padded length in miMATRIX tag
+        pass
+    
+
+class Mat5CharWriter(Mat5MatrixWriter):
+
+    def write(self):
+        self.arr_to_chars()
+        self.arr_to_2d()
+        dims = self.arr.shape
+        self.write_header(P=miUINT8,
+                          T=mxCHAR_CLASS)
+        if self.arr.dtype.kind == 'U':
+            # Recode unicode to ascii
+            n_chars = product(dims)
+            st_arr = ndarray(shape=(),
+                             dtype=self.arr_dtype_number(n_chars),
+                             buffer=self.arr)
+            st = st_arr.item().encode('ascii')
+            self.arr = ndarray(shape=dims, dtype='S1', buffer=st)
+        self.write_bytes(self.arr)
+
+
+class Mat5SparseWriter(Mat5MatrixWriter):
+
+    def write(self):
+        ''' Sparse matrices are 2D
+        See docstring for Mat5SparseGetter
+        '''
+        imagf = self.arr.dtype.kind == 'c'
+        N = self.arr.nnz
+        ijd = zeros((N+1, 3+imagf), dtype='f8')
+        for i in range(N):
+            ijd[i,0], ijd[i,1] = self.arr.rowcol(i)
+        ijd[:-1,0:2] += 1 # 1 based indexing
+        if imagf:
+            ijd[:-1,2] = self.arr.data.real
+            ijd[:-1,3] = self.arr.data.imag
+        else:
+            ijd[:-1,2] = self.arr.data
+        ijd[-1,0:2] = self.arr.shape
+        self.write_header(P=miDOUBLE,
+                          T=mxSPARSE_CLASS,
+                          dims=ijd.shape)
+        self.write_bytes(ijd)
+        
+    
+def matrix_writer_factory(stream, arr, name, unicode_strings=False, is_global=False):
+    ''' Factory function to return matrix writer given variable to write
+    @stream      - file or file-like stream to write to
+    @arr         - array to write
+    @name        - name in matlab (TM) workspace
+    '''
+    if have_sparse:
+        if scipy.sparse.issparse(arr):
+            return Mat5SparseWriter(stream, arr, name, is_global)
+    arr = array(arr)
+    if arr.dtype.hasobject:
+        types, arr_type = classify_mobjects(arr)
+        if arr_type == 'c':
+            return Mat5CellWriter(stream, arr, name, is_global, types)
+        elif arr_type == 's':
+            return Mat5StructWriter(stream, arr, name, is_global)
+        elif arr_type == 'o':
+            return Mat5ObjectWriter(stream, arr, name, is_global)
+    if arr.dtype.kind in ('U', 'S'):
+        if unicode_strings:
+            return Mat5UniCharWriter(stream, arr, name, is_global)
+        else:
+            return Mat5IntCharWriter(stream, arr, name, is_global)            
+    else:
+        return Mat5NumericWriter(stream, arr, name, is_global)
+                    
+def classify_mobjects(objarr):
+    ''' Function to classify objects passed for writing
+    returns
+    types         - S1 array of same shape as objarr with codes for each object
+                    i  - invalid object
+                    a  - ndarray
+                    s  - matlab struct
+                    o  - matlab object
+    arr_type       - one of
+                    c  - cell array
+                    s  - struct array
+                    o  - object array
+    '''
+    N = objarr.size
+    types = empty((N,), dtype='S1')
+    types[:] = 'i'
+    type_set = set()
+    flato = objarr.flat
+    for i in range(N):
+        obj = flato[i]
+        if isinstance(obj, ndarray):
+            types[i] = 'a'
+            continue
+        try:
+            fns = tuple(obj._fieldnames)
+        except AttributeError:
+            continue
+        try:
+            cn = obj._classname
+        except AttributeError:
+            types[i] = 's'
+            type_set.add(fns)
+            continue
+        types[i] = 'o'
+        type_set.add((cn, fns))
+    arr_type = 'c'
+    if len(set(types))==1 and len(type_set) == 1:
+        arr_type = types[0]
+    return types.reshape(objarr.shape), arr_type
+           
+        
+class MatFile5Writer(MatFileWriter):
+    ''' Class for writing mat5 files '''
+    def __init__(self, file_stream,
+                 do_compression=False,
+                 unicode_strings=False,
+                 global_vars=None):
+        super(MatFile5Writer, self).__init__(file_stream)
+        self.do_compression = do_compression
+        self.unicode_strings = unicode_strings
+        if global_vars:
+            self.global_vars = global_vars
+        else:
+            self.global_vars = []
+        
+    def put_variables(self, mdict):
+        for name, var in mdict.items():
+            is_global = name in self.global_vars
+            stream = StringIO()
+            matrix_writer_factory(stream,
+                                  var,
+                                  name,
+                                  is_global,
+                                  self.unicode_strings,
+                                  ).write()
+            if self.do_compression:
+                str = zlib.compress(stream.getvalue())
+                tag = empty((), mdtypes_template['tag_full'])
+                tag['mdtype'] = miCOMPRESSED
+                tag['byte_count'] = len(str)
+                self.file_stream.write(tag.tostring() + str)
+            else:
+                self.file_stream.write(stream.getvalue())

Modified: trunk/Lib/io/miobase.py
===================================================================
--- trunk/Lib/io/miobase.py	2006-09-26 16:59:28 UTC (rev 2235)
+++ trunk/Lib/io/miobase.py	2006-09-29 11:36:34 UTC (rev 2236)
@@ -88,8 +88,9 @@
                           in ('native', '=')
                           or in ('little', '<')
                           or in ('BIG', '>')
-    @base_name          - base name for unnamed variables
-    @matlab_compatible  - return arrays as matlab (TM) saved them
+    @base_name          - base name for unnamed variables (unused in code)
+    @mat_dtype          - return arrays in same dtype as loaded into matlab
+                          (instead of the dtype with which they are saved)
     @squeeze_me         - whether to squeeze unit dimensions or not
     @chars_as_strings   - whether to convert char arrays to string arrays
 
@@ -106,7 +107,7 @@
     def __init__(self, mat_stream,
                  byte_order=None,
                  base_name='raw',
-                 matlab_compatible=False,
+                 mat_dtype=False,
                  squeeze_me=True,
                  chars_as_strings=True,
                  ):
@@ -119,22 +120,26 @@
         self.base_name = base_name
         self._squeeze_me = squeeze_me
         self._chars_as_strings = chars_as_strings
-        self.matlab_compatible = matlab_compatible
+        self._mat_dtype = mat_dtype
+        self.processor_func = self.get_processor_func()
         
-    # matlab_compatible property sets squeeze_me and chars_as_strings
-    def get_matlab_compatible(self):
-        return self._matlab_compatible
-    def set_matlab_compatible(self, m_l_c):
-        self._matlab_compatible = m_l_c
-        if m_l_c:
-            self._squeeze_me = False
-            self._chars_as_strings = False
+    def set_matlab_compatible(self):
+        ''' Sets options to return arrays as matlab (tm) loads them '''
+        self._mat_dtype = True
+        self._squeeze_me = False
+        self._chars_as_strings = False
         self.processor_func = self.get_processor_func()
-    matlab_compatible = property(get_matlab_compatible,
-                                 set_matlab_compatible,
-                                 None,
-                                 'get/set matlab_compatible property')
 
+    def get_mat_dtype(self):
+        return self._mat_dtype
+    def set_mat_dtype(self, mat_dtype):
+        self._mat_dtype = mat_dtype
+        self.processor_func = self.get_processor_func()
+    mat_dtype = property(get_mat_dtype,
+                         set_mat_dtype,
+                         None,
+                         'get/set mat_dtype property')
+
     def get_squeeze_me(self):
         return self._squeeze_me
     def set_squeeze_me(self, squeeze_me):
@@ -151,9 +156,9 @@
         self._chars_as_strings = chars_as_strings
         self.processor_func = self.get_processor_func()
     chars_as_strings = property(get_chars_as_strings,
-                          set_chars_as_strings,
-                          None,
-                          'get/set squeeze me property')
+                                set_chars_as_strings,
+                                None,
+                                'get/set squeeze me property')
     
     def get_order_code(self):
         return self._order_code
@@ -200,7 +205,7 @@
 
         The read array is the first argument.
         The getter, passed as second argument to the function, must
-        define properties, iff matlab_compatible option is True:
+        define properties, iff mat_dtype option is True:
         
         mat_dtype    - data type when loaded into matlab (tm)
                        (None for no conversion)
@@ -223,7 +228,7 @@
                         arr[...,i] = self.chars_to_str(str_arr[i])
                 else: # return string
                     arr = self.chars_to_str(arr)
-            if self.matlab_compatible:
+            if self.mat_dtype:
                 # Apply options to replicate matlab's (TM)
                 # load into workspace
                 if getter.mat_dtype is not None:




More information about the Scipy-svn mailing list