[Scipy-svn] r2339 - trunk/Lib/io
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Nov 29 13:40:29 EST 2006
Author: matthew.brett at gmail.com
Date: 2006-11-29 12:40:23 -0600 (Wed, 29 Nov 2006)
New Revision: 2339
Added:
trunk/Lib/io/recaster.py
Modified:
trunk/Lib/io/miobase.py
Log:
More work on recasting, moved recasting into own module
Modified: trunk/Lib/io/miobase.py
===================================================================
--- trunk/Lib/io/miobase.py 2006-11-29 13:13:49 UTC (rev 2338)
+++ trunk/Lib/io/miobase.py 2006-11-29 18:40:23 UTC (rev 2339)
@@ -378,268 +378,3 @@
def __init__(self, file_stream):
self.file_stream = file_stream
-
-class DownCaster(object):
- ''' Downcast arrays to acceptable datatypes
-
- Initialization specifies acceptable datatypes (ADs)
-
- Implements downcast method - returns array that may be of
- different storage type to the input array, where the new type is
- one of the ADs.
-
- Also implements downcast_and_nearest method - returns downcast
- array, and datatype within the ADs that is nearest to the input
- datatype.
-
- The algorithm for the "nearest" is: return input datatype if in
- ADs; else return a higher precision datatype in ADs of same type
- if available; else return the next lower precision datatype in
- ADs of same type; else raise an error.
-
- At its simplest, the downcast method can reject arrays that
- are not in the list of ADs.
-
- '''
-
- _sctype_trans = {'complex': 'c', 'c': 'c',
- 'float': 'f', 'f': 'f',
- 'int': 'i', 'i': 'i',
- 'uint': 'u', 'u': 'u'}
-
- def __init__(self, sctype_list=None, sctype_tols=None):
- ''' Set types for which we are attempting to downcast
-
- Input
- sctype_list - list of acceptable scalar types
- If None defaults to all system types
- sctype_tols - dictionary key datatype, values rtol, tol
- to specify tolerances for checking near equality in downcasting
-
- '''
- sys_dict = self.system_sctype_dict()
- if sctype_list is None:
- self.sctype_dict = sys_dict.copy()
- sctype_list = self.sctype_dict.keys()
- else:
- D = {}
- for k, v in sys_dict.items():
- if k in sctype_list:
- D[k] = v
- self.sctype_dict = D
- self.sctype_list = sctype_list
- self.sctype_tols = self.default_sctype_tols()
- if sctype_tols is not None:
- self.sctype_tols.merge(sctype_tols)
- self.sized_sctypes = {}
- for k in ('c', 'f', 'i', 'u'):
- self.sized_sctypes[k] = self.sctypes_by_size(k)
- self.int_sctypes = [T for T in self.sctype_list if dtype(T).kind in ('i', 'u')]
- N = {}
- for k in sys_dict:
- N[k] = self._nearest_dtype(k)
- self.nearest_dtypes = N
-
- def system_sctype_dict(self):
- d_dict = {}
- for sc_type in ('complex','float'):
- t_list = sctypes[sc_type]
- for T in t_list:
- dt = dtype(T)
- d_dict[T] = {
- 'kind': dt.kind,
- 'size': dt.itemsize}
- for T in sctypes['int']:
- dt = dtype(T)
- sz = dt.itemsize
- bits = sz*8-1
- end = 2**bits
- d_dict[T] = {
- 'kind': dt.kind,
- 'size': sz,
- 'min': -end,
- 'max': end-1
- }
- for T in sctypes['uint']:
- dt = dtype(T)
- sz = dt.itemsize
- bits = sz*8
- end = 2**bits
- d_dict[T] = {
- 'kind': dt.kind,
- 'size': sz,
- 'min': 0,
- 'max': end
- }
- return d_dict
-
- def default_sctype_tols(self):
- ''' Default allclose tolerance values for dtypes '''
- t_dict = {}
- for sc_type in ('complex','float'):
- t_list = sctypes[sc_type]
- for T in t_list:
- dt = dtype(T)
- F = finfo(dt)
- t_dict[T] = {
- 'rtol': F.eps,
- 'atol': F.tiny}
- tiny = finfo(float64).tiny
- for sc_type in ('int', 'uint'):
- t_list = sctypes[sc_type]
- for T in t_list:
- dt = dtype(T)
- t_dict[T] = {
- 'rtol': 0,
- 'atol': tiny}
- return t_dict
-
- def tols_from_sctype(self, sctype):
- ''' Return rtol and atol for sctype '''
- tols = self.sctype_tols[sctype]
- return tols['rtol'], tols['atol']
-
- def sctypes_by_size(self, sctype):
- ''' Returns storage size ordered list of entries of scalar type sctype
-
- Input
- sctype - one of "complex" or "c", "float" or "f" ,
- "int" or "i", "uint" or "u"
- '''
- try:
- sctype = self._sctype_trans[sctype]
- except KeyError:
- raise TypeError, 'Did not recognize sctype %s' % sctype
- D = []
- for t in self.sctype_list:
- dt = dtype(t)
- if dt.kind == sctype:
- D.append([t, dt.itemsize])
- D.sort(lambda x, y: cmp(y[1], x[1]))
- return D
-
- def _nearest_dtype(self, dt):
- ''' Return dtype closest in size to that of dt
-
- Input
- dt - dtype
-
- ID = input dtype. VD = valid dtype. Return ID if ID is
- in VDs. If ID is smaller / larger than all VDs, return
- smallest / largest VD. Otherwise return nearest VD larger than
- ID.
- '''
- dt = dtype(dt)
- if dt in self.sctype_list:
- return dt
- sctypes = self.sized_sctypes[dt.kind]
- if not sctypes:
- return None
- dti = dt.itemsize
- for i, t in enumerate(sctypes):
- if t[1] < dti:
- break
- else:
- return t[0]
- if i:
- i-=1
- return sctypes[i][0]
-
- def smaller_same_kind(self, arr):
- ''' Return arr maybe downcast to same kind, smaller storage
-
- If arr cannot be downcast within given tolerances, then return
- arr if arr is in list of acceptable types, otherwise return
- None
- '''
- dt = arr.dtype
- dti = dt.itemsize
- sctypes = self.sized_sctypes[dt.kind]
- scts = [t[0] for i, t in enumerate(sctypes) if t[1] < dti]
- rtol, atol = self.tols_from_sctype(dt.type)
- ret_arr = arr
- for T in scts:
- test_arr = arr.astype(T)
- if allclose(test_arr, arr, rtol, atol):
- ret_arr = test_arr
- else:
- break
- else: # No downcasting withing tolerance
- if dt not in self.sctype_list:
- return None
- return ret_arr
-
- def smallest_int_dtype(self, mx, mn):
- ''' Return integer type with smallest storage containing mx and mn
-
- Inputs
- mx - maximum value
- mn - minumum value
-
- Returns None if no integer can contain this range
- '''
- dt = None
- for T in self.int_sctypes:
- t_dict = self.sctype_dict[T]
- if t_dict['max'] >= mx and t_dict['min'] <= mn:
- c_sz = t_dict['size']
- if dt is None or c_sz < sz:
- dt = T
- sz = c_sz
- return dt
-
- def recast(self, arr):
- arr = self.downcast(arr)
- if arr is not None:
- return arr
- # Could not downcast, arr dtype not in known list
- dt = self.capable_dtype[arr.dtype.type]
- if dt is not None:
- return arr.astype(dt)
- raise ValueError, 'Could not recast array within precision'
-
- def downcast(self, arr):
- dtk = arr.dtype.kind
- if dtk == 'c':
- return self.downcast_complex(arr)
- elif dtk == 'f':
- return self.downcast_float(arr)
- elif dtk in ('u', 'i'):
- return self.downcast_integer(arr)
- else:
- raise TypeError, 'Do not recognize array kind %s' % dtk
-
- def downcast_complex(self, arr):
- # can we downcast to float?
- dt = arr.dtype
- dti = ceil(dt.itemsize / 2)
- sctypes = self.sized_sctypes['f']
- flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti]
- test_arr = arr.astype(flts[0])
- rtol, atol = self.tols_from_sctype(dt.type)
- if allclose(arr, test_arr, rtol, atol):
- return self.downcast_float(test_arr)
- # try downcasting to another complex type
- return self.smaller_same_kind(arr)
-
- def downcast_float(self, arr):
- # Try integer
- test_arr = self.downcast_integer(arr)
- rtol, atol = self.tols_from_sctype(arr.dtype.type)
- if allclose(arr, test_arr, rtol, atol):
- return test_arr
- # Otherwise descend the float types
- return self.smaller_same_kind(arr)
-
- def downcast_integer(self, arr):
- ''' Downcasts arr to integer
-
- Returns None if range of arr cannot be contained in acceptable
- integer types
- '''
- mx = amax(arr)
- mn = amin(arr)
- idt = self.smallest_int_dtype(mx, mn)
- if idt:
- return arr.astype(idt)
- return None
Added: trunk/Lib/io/recaster.py
===================================================================
--- trunk/Lib/io/recaster.py 2006-11-29 13:13:49 UTC (rev 2338)
+++ trunk/Lib/io/recaster.py 2006-11-29 18:40:23 UTC (rev 2339)
@@ -0,0 +1,342 @@
+# Author: Matthew Brett
+
+"""
+Recaster class for recasting numeric arrays
+"""
+
+from numpy import *
+
+
+class Recaster(object):
+ ''' Class to recast arrays to one of acceptable scalar types
+
+ Initialization specifies acceptable types (ATs)
+
+ Implements downcast and recast method - returns array that may be
+ of different storage type to the input array, where the new type
+ is one of the ATs. Downcast forces return array to be same size or
+ smaller than the input. recast method will return a larger type
+ if no smaller type will contain the data without loss of
+ precision.
+
+ At its simplest, the downcast method can reject arrays that
+ are not in the list of ATs.
+ '''
+
+ _sctype_trans = {'complex': 'c', 'c': 'c',
+ 'float': 'f', 'f': 'f',
+ 'int': 'i', 'i': 'i',
+ 'uint': 'u', 'u': 'u'}
+
+ def __init__(self, sctype_list=None, sctype_tols=None):
+ ''' Set types for which we are attempting to downcast
+
+ Input
+ sctype_list - list of acceptable scalar types
+ If None defaults to all system types
+ sctype_tols - dictionary key datatype, values rtol, tol
+ to specify tolerances for checking near equality in downcasting
+ '''
+ sys_dict = self.system_sctype_dict()
+ if sctype_list is None:
+ self.sctype_dict = sys_dict.copy()
+ sctype_list = self.sctype_dict.keys()
+ else:
+ D = {}
+ for k, v in sys_dict.items():
+ if k in sctype_list:
+ D[k] = v
+ self.sctype_dict = D
+ self.sctype_list = sctype_list
+ self.sctype_tols = self.default_sctype_tols()
+ if sctype_tols is not None:
+ self.sctype_tols.merge(sctype_tols)
+ # Cache sctype sizes,
+ self.sized_sctypes = {}
+ for k in ('c', 'f', 'i', 'u'):
+ self.sized_sctypes[k] = self.sctypes_by_size(k)
+ self.int_sctypes = [T for T in self.sctype_list if dtype(T).kind in ('i', 'u')]
+ self.all_int_sized_sctypes = []
+ for k, v in self.sized_sctypes.items():
+ if k in ('u', 'i'):
+ self.all_int_sized_sctypes.append(v)
+ self.nearest_dtypes = {}
+ for k in sys_dict:
+ self.nearest_dtypes[k] = self._nearest_dtype(k)
+ self.capable_dtypes = {}
+ for k in sys_dict:
+ self.capable_dtypes[k] = self._capable_dtype(k)
+
+ def system_sctype_dict(self):
+ d_dict = {}
+ for sc_type in ('complex','float'):
+ t_list = sctypes[sc_type]
+ for T in t_list:
+ dt = dtype(T)
+ d_dict[T] = {
+ 'kind': dt.kind,
+ 'size': dt.itemsize}
+ for T in sctypes['int']:
+ dt = dtype(T)
+ sz = dt.itemsize
+ bits = sz*8-1
+ end = 2**bits
+ d_dict[T] = {
+ 'kind': dt.kind,
+ 'size': sz,
+ 'min': -end,
+ 'max': end-1
+ }
+ for T in sctypes['uint']:
+ dt = dtype(T)
+ sz = dt.itemsize
+ bits = sz*8
+ end = 2**bits
+ d_dict[T] = {
+ 'kind': dt.kind,
+ 'size': sz,
+ 'min': 0,
+ 'max': end
+ }
+ return d_dict
+
+ def default_sctype_tols(self):
+ ''' Default allclose tolerance values for all dtypes '''
+ t_dict = {}
+ for sc_type in ('complex','float'):
+ t_list = sctypes[sc_type]
+ for T in t_list:
+ dt = dtype(T)
+ F = finfo(dt)
+ t_dict[T] = {
+ 'rtol': F.eps,
+ 'atol': F.tiny}
+ F = finfo(float64)
+ for sc_type in ('int', 'uint'):
+ t_list = sctypes[sc_type]
+ for T in t_list:
+ dt = dtype(T)
+ t_dict[T] = {
+ 'rtol': F.eps,
+ 'atol': F.tiny}
+ return t_dict
+
+ def tols_from_sctype(self, sctype):
+ ''' Return rtol and atol for sctype '''
+ tols = self.sctype_tols[sctype]
+ return tols['rtol'], tols['atol']
+
+ def sctypes_by_size(self, sctype):
+ ''' Returns storage size ordered list of entries of scalar type sctype
+
+ Input
+ sctype - one of "complex" or "c", "float" or "f" ,
+ "int" or "i", "uint" or "u"
+ '''
+ try:
+ sctype = self._sctype_trans[sctype]
+ except KeyError:
+ raise TypeError, 'Did not recognize sctype %s' % sctype
+ D = []
+ for t in self.sctype_list:
+ dt = dtype(t)
+ if dt.kind == sctype:
+ D.append([t, dt.itemsize])
+ D.sort(lambda x, y: cmp(y[1], x[1]))
+ return D
+
+ def _nearest_sctype(self, sct):
+ ''' Return scalar type closest in size to that of sct
+
+ Input
+ sct - sctype
+
+ ID = input sctype. AT = acceptable sctype. Return ID if ID is
+ in ATs. If ID is smaller / larger than all ATs, return
+ smallest / largest AT. Otherwise return nearest AT larger than
+ ID.
+ '''
+ dt = dtype(sct)
+ if sct in self.sctype_list:
+ return sct
+ sctypes = self.sized_sctypes[dt.kind]
+ if not sctypes:
+ return None
+ dti = sct.itemsize
+ for i, t in enumerate(sctypes):
+ if t[1] < dti:
+ break
+ else:
+ return t[0]
+ if i:
+ i-=1
+ return sctypes[i][0]
+
+ def _capable_sctype(self, sct):
+ ''' Return smallest scalar type containing sct type without precision loss
+
+ Input
+ sct - scalar type
+
+ ID = input type. AT = acceptable type. Return ID if ID is
+ in ATs. Otherwise return smallest AT that is larger than or
+ same size as ID.
+
+ If the desired sctype is an integer, returns the smallest
+ integer (int or uint) that can contain the range of the input
+ integer type
+
+ If there is no type that can contain sct without loss of
+ precision, return None
+ '''
+ if sct in self.sctype_list:
+ return sct
+ out_t = None
+ # Unsigned and signed integers
+ # Precision loss defined by max min outside datatype range
+ dt = dtype(sct)
+ if dt.kind in ('u', 'i'):
+ sctypes = self.all_int_sized_sctypes
+ if not sctypes:
+ return None
+ D = self.sctype_dict
+ mx = D[sct]['max']
+ mn = D[sct]['min']
+ for i, t in emumerate(sctypes):
+ this_sct = t[0]
+ this_d = D[dt]
+ if this_d['max'] >= mx and this_d['min'] <= mn:
+ out_t = this_sct
+ else:
+ # Complex and float types
+ # Precision loss defined by data size < sct
+ sctypes = self.sized_sctypes[sct]
+ if not sctypes:
+ return None
+ dti = dtype(sct).itemsize
+ out_t = None
+ for i, t in enumerate(sctypes):
+ if t[1] >= dti:
+ out_t = t[0]
+ return out_t
+
+ def smaller_same_kind(self, arr):
+ ''' Return arr maybe downcast to same kind, smaller storage
+
+ If arr cannot be downcast within given tolerances, then:
+ return arr if arr is in list of acceptable types, otherwise
+ return None
+ '''
+ dtp = arr.dtype
+ dti = dt.itemsize
+ sctypes = self.sized_sctypes[dtp.kind]
+ sctypes = [t[0] for i, t in enumerate(sctypes) if t[1] < dti]
+ return self.smaller_from_sctypes(arr, sctypes)
+
+ def smallest_from_sctypes(self, arr, sctypes):
+ ''' Returns array recast to smallest possible type from list
+
+ Inputs
+ arr - array to recast
+ sctypes - list of scalar types to try
+
+ Returns None if no recast is within tolerance
+ '''
+ dt = arr.dtype.type
+ rtol, atol = self.tols_from_sctype(dt)
+ ret_arr = arr
+ for T in sctypes:
+ test_arr = arr.astype(T)
+ if allclose(test_arr, arr, rtol, atol):
+ ret_arr = test_arr
+ else:
+ break
+ else: # No downcasting withing tolerance
+ if dt not in self.sctype_list:
+ return None
+ return ret_arr
+
+ def smallest_int_dtype(self, mx, mn):
+ ''' Return integer type with smallest storage containing mx and mn
+
+ Inputs
+ mx - maximum value
+ mn - minumum value
+
+ Returns None if no integer can contain this range
+ '''
+ dt = None
+ for T in self.int_sctypes:
+ t_dict = self.sctype_dict[T]
+ if t_dict['max'] >= mx and t_dict['min'] <= mn:
+ c_sz = t_dict['size']
+ if dt is None or c_sz < sz:
+ dt = T
+ sz = c_sz
+ return dt
+
+ def recast(self, arr):
+ ''' Try arr downcast, upcast if necesary to get compatible type '''
+ dt = arr.dtype.type
+ ret_arr = self.downcast(arr)
+ if ret_arr is not None:
+ return ret_arr
+ # Could not downcast, arr dtype not in known list
+ # Try upcast to larger dtype of same kind
+ udt = self.capable_dtype[dt]
+ if udt is not None:
+ return arr.astype(udt)
+ # We are stuck for floats and complex now
+ # Can try casting integers to floats
+ if arr.dt.kind in ('i', 'u'):
+ sctypes = self.sized_sctypes['f']
+ arr = self.smallest_from_sctypes(arr, sctypes)
+ if arr is not None:
+ return arr
+ raise ValueError, 'Could not recast array within precision'
+
+ def downcast(self, arr):
+ dtk = arr.dtype.kind
+ if dtk == 'c':
+ return self.downcast_complex(arr)
+ elif dtk == 'f':
+ return self.downcast_float(arr)
+ elif dtk in ('u', 'i'):
+ return self.downcast_integer(arr)
+ else:
+ raise TypeError, 'Do not recognize array kind %s' % dtk
+
+ def downcast_complex(self, arr):
+ # can we downcast to float?
+ dt = arr.dtype
+ dti = ceil(dt.itemsize / 2)
+ sctypes = self.sized_sctypes['f']
+ flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti]
+ test_arr = arr.astype(flts[0])
+ rtol, atol = self.tols_from_sctype(dt.type)
+ if allclose(arr, test_arr, rtol, atol):
+ return self.downcast_float(test_arr)
+ # try downcasting to another complex type
+ return self.smaller_same_kind(arr)
+
+ def downcast_float(self, arr):
+ # Try integer
+ test_arr = self.downcast_integer(arr)
+ rtol, atol = self.tols_from_sctype(arr.dtype.type)
+ if allclose(arr, test_arr, rtol, atol):
+ return test_arr
+ # Otherwise descend the float types
+ return self.smaller_same_kind(arr)
+
+ def downcast_integer(self, arr):
+ ''' Downcasts arr to integer
+
+ Returns None if range of arr cannot be contained in acceptable
+ integer types
+ '''
+ mx = amax(arr)
+ mn = amin(arr)
+ idt = self.smallest_int_dtype(mx, mn)
+ if idt:
+ return arr.astype(idt)
+ return None
More information about the Scipy-svn
mailing list