[Scipy-svn] r4561 - branches/Interpolate1D

Thu Jul 24 17:13:18 EDT 2008

Author: fcady
Date: 2008-07-24 16:13:18 -0500 (Thu, 24 Jul 2008)
New Revision: 4561

Modified:
   branches/Interpolate1D/Interpolate1D.py
   branches/Interpolate1D/TODO.txt
   branches/Interpolate1D/interpolate1d.py
Log:
test included for proper removal of bad data points

Modified: branches/Interpolate1D/Interpolate1D.py
===================================================================

--- branches/Interpolate1D/Interpolate1D.py	2008-07-24 19:43:52 UTC (rev 4560)
+++ branches/Interpolate1D/Interpolate1D.py	2008-07-24 21:13:18 UTC (rev 4561)
@@ -45,7 +45,7 @@
     
 def interp1d(x, y, new_x, kind='linear', low=np.NaN, high=np.NaN, \
                     kindkw={}, lowkw={}, highkw={}, \
-                    remove_bad_data = False, bad_data=[]):
+                    remove_bad_data = False, bad_data=[], interp_axis = 0):
     """ A function for interpolation of 1D data.
         
         Parameters
@@ -196,51 +196,47 @@
     def __init__(self, x, y, kind='linear', low=np.NaN, high=np.NaN, \
                         kindkw={}, lowkw={}, highkw={}, \
                         remove_bad_data = False, bad_data=[]):
+        # FIXME: don't allow copying multiple times.
+        # FIXME : allow no copying, in case user has huge dataset
         
-        # store properly-formatted versions of x and y
-        self._format_array(x, y, remove_bad_data = remove_bad_data, bad_data = bad_data)
-
-        # store interpolation functions for each range
-        self.kind = self._init_interp_method(self._x, self._y, kind, kindkw)
-        self.low = self._init_interp_method(self._x, self._y, low, lowkw)
-        self.high = self._init_interp_method(self._x, self._y, high, highkw)
-
-    def _format_array(self, x, y, remove_bad_data = False, bad_data = []):
-        """
-            Assigns properly formatted versions of x and y to self._x and self._y.
-            Also records data types.
-            
-            Formatting includes removal of all points whose x or y coordinate
-            is in missing_data.  This is the primary difference from
-            make_array_safe.
-        
-        """
-        # FIXME: don't allow copying multiple times.
-         
-        # check acceptable lengths for x and y
+        # check acceptable size and dimensions
+        x = np.array(x)
+        y = np.array(y)
         assert len(x) > 0 and len(y) > 0 , "Interpolate1D does not support\
                                         arrays of length 0"
+        assert x.ndim == 1 , "x must be one-dimensional"
+        assert y.ndim == 1 , "y must be one-dimensional" 
         assert len(x) == len(y) , "x and y must be of the same length"
         
-        # remove bad data
-        x = np.array(x)
-        y = np.array(y)
+        # remove bad data, is there is any
         if remove_bad_data:
-            mask = np.array([  (xi not in bad_data) and (not np.isnan(xi)) and \
-                                        (y[i] not in bad_data) and (not np.isnan(y[i])) \
-                                    for i, xi in enumerate(x) ])
-            x = x[mask]
-            y = y[mask]
-            
+            x, y = self._remove_bad_data(x, y, bad_data)
+        
         # select proper dataypes and make arrays
         self._xdtype = {np.float32 : np.float32}.setdefault(type(x[0]), np.float64) # unless data is float32,  cast to float64
         self._ydtype = {np.float32 : np.float32}.setdefault(type(y[0]), np.float64)
         self._x = make_array_safe(x, self._xdtype).copy()
         self._y = make_array_safe(y, self._ydtype).copy()
-            
-        # check dimensionality
-        assert self._x.ndim == 1 , "x must be one-dimensional"
-        assert self._y.ndim == 1 , "y must be one-dimensional"    
+
+        # store interpolation functions for each range
+        self.kind = self._init_interp_method(self._x, self._y, kind, kindkw)
+        self.low = self._init_interp_method(self._x, self._y, low, lowkw)
+        self.high = self._init_interp_method(self._x, self._y, high, highkw)
+
+    def _remove_bad_data(self, x, y, bad_data = [None, np.NaN]):
+        """ removes data points whose x or y coordinate is
+            either in bad_data or is a NaN.
+        """
+        # FIXME : In the future, it may be good to just replace the bad points with good guesses.
+        #       Especially in generalizing the higher dimensions
+        # FIXME : This step is very inefficient because it iterates over the array
+        mask = np.array([  (xi not in bad_data) and (not np.isnan(xi)) and \
+                                    (y[i] not in bad_data) and (not np.isnan(y[i])) \
+                                for i, xi in enumerate(x) ])
+        x = x[mask]
+        y = y[mask]
+        return x, y
+  
     
     def _init_interp_method(self, x, y, interp_arg, kw):
         """
@@ -429,5 +425,16 @@
         new_y = interp1d(x, y, new_x, kind='linear', low='linear', high='linear')        
         self.assertAllclose(new_x, new_y)
         
+    def test_removeBad(self):
+        """make sure : interp1d works with bad data
+        """
+        N = 7.0 # must be >=5
+        x = arange(N); x[2] = np.NaN
+        y = arange(N); y[4] = None; y[0]=np.NaN
+        new_x = arange(N+1)-0.5
+        new_y = interp1d(x, y, new_x, kind='linear', low='linear', high='linear', \
+                                    remove_bad_data = True, bad_data = [None])
+        self.assertAllclose(new_x, new_y)
+        
 if __name__ == '__main__':
     unittest.main()                 
\ No newline at end of file

Modified: branches/Interpolate1D/TODO.txt
===================================================================
--- branches/Interpolate1D/TODO.txt	2008-07-24 19:43:52 UTC (rev 4560)
+++ branches/Interpolate1D/TODO.txt	2008-07-24 21:13:18 UTC (rev 4561)
@@ -94,6 +94,13 @@
     This will probably take the form of two additional
     classes both based on interpolate1d.  Thus it probably
     shouldn't be done until interpolate1d is more settled.
+    
+    There is an interesting problem here.  Most of the extensions
+    I have assume a regular grid.  First off, this is often unrealistic.
+    Secondly, if I DO use a rectangular grid, how do I deal with bad
+    data?  The best way is probably a pre-processing where you
+    interpolate values for the bad points (linear would be a nice simple
+    way to do it at first, just to get it working)
 
 
 **more convenient way to enter kw arguments
@@ -123,3 +130,11 @@
     arrays.
 
     Whatever is done must be very well commented.
+    
+    Also, there's a problem with removing bad data and still
+    retaining a rectangular array.  I would have to store a list
+    of (x,y) pairs, which suggests I should do this with a wrapper
+    function anyway.  Not memory efficient, but oh well.  In the
+    future, however, when x is a ND array, I should work around this;
+    I should have it be Rn -> Rm and fill in missing values to the best
+    of my ability.

Modified: branches/Interpolate1D/interpolate1d.py
===================================================================
--- branches/Interpolate1D/interpolate1d.py	2008-07-24 19:43:52 UTC (rev 4560)
+++ branches/Interpolate1D/interpolate1d.py	2008-07-24 21:13:18 UTC (rev 4561)
@@ -45,7 +45,7 @@
     
 def interp1d(x, y, new_x, kind='linear', low=np.NaN, high=np.NaN, \
                     kindkw={}, lowkw={}, highkw={}, \
-                    remove_bad_data = False, bad_data=[]):
+                    remove_bad_data = False, bad_data=[], interp_axis = 0):
     """ A function for interpolation of 1D data.
         
         Parameters
@@ -196,51 +196,47 @@
     def __init__(self, x, y, kind='linear', low=np.NaN, high=np.NaN, \
                         kindkw={}, lowkw={}, highkw={}, \
                         remove_bad_data = False, bad_data=[]):
+        # FIXME: don't allow copying multiple times.
+        # FIXME : allow no copying, in case user has huge dataset
         
-        # store properly-formatted versions of x and y
-        self._format_array(x, y, remove_bad_data = remove_bad_data, bad_data = bad_data)
-
-        # store interpolation functions for each range
-        self.kind = self._init_interp_method(self._x, self._y, kind, kindkw)
-        self.low = self._init_interp_method(self._x, self._y, low, lowkw)
-        self.high = self._init_interp_method(self._x, self._y, high, highkw)
-
-    def _format_array(self, x, y, remove_bad_data = False, bad_data = []):
-        """
-            Assigns properly formatted versions of x and y to self._x and self._y.
-            Also records data types.
-            
-            Formatting includes removal of all points whose x or y coordinate
-            is in missing_data.  This is the primary difference from
-            make_array_safe.
-        
-        """
-        # FIXME: don't allow copying multiple times.
-         
-        # check acceptable lengths for x and y
+        # check acceptable size and dimensions
+        x = np.array(x)
+        y = np.array(y)
         assert len(x) > 0 and len(y) > 0 , "Interpolate1D does not support\
                                         arrays of length 0"
+        assert x.ndim == 1 , "x must be one-dimensional"
+        assert y.ndim == 1 , "y must be one-dimensional" 
         assert len(x) == len(y) , "x and y must be of the same length"
         
-        # remove bad data
-        x = np.array(x)
-        y = np.array(y)
+        # remove bad data, is there is any
         if remove_bad_data:
-            mask = np.array([  (xi not in bad_data) and (not np.isnan(xi)) and \
-                                        (y[i] not in bad_data) and (not np.isnan(y[i])) \
-                                    for i, xi in enumerate(x) ])
-            x = x[mask]
-            y = y[mask]
-            
+            x, y = self._remove_bad_data(x, y, bad_data)
+        
         # select proper dataypes and make arrays
         self._xdtype = {np.float32 : np.float32}.setdefault(type(x[0]), np.float64) # unless data is float32,  cast to float64
         self._ydtype = {np.float32 : np.float32}.setdefault(type(y[0]), np.float64)
         self._x = make_array_safe(x, self._xdtype).copy()
         self._y = make_array_safe(y, self._ydtype).copy()
-            
-        # check dimensionality
-        assert self._x.ndim == 1 , "x must be one-dimensional"
-        assert self._y.ndim == 1 , "y must be one-dimensional"    
+
+        # store interpolation functions for each range
+        self.kind = self._init_interp_method(self._x, self._y, kind, kindkw)
+        self.low = self._init_interp_method(self._x, self._y, low, lowkw)
+        self.high = self._init_interp_method(self._x, self._y, high, highkw)
+
+    def _remove_bad_data(self, x, y, bad_data = [None, np.NaN]):
+        """ removes data points whose x or y coordinate is
+            either in bad_data or is a NaN.
+        """
+        # FIXME : In the future, it may be good to just replace the bad points with good guesses.
+        #       Especially in generalizing the higher dimensions
+        # FIXME : This step is very inefficient because it iterates over the array
+        mask = np.array([  (xi not in bad_data) and (not np.isnan(xi)) and \
+                                    (y[i] not in bad_data) and (not np.isnan(y[i])) \
+                                for i, xi in enumerate(x) ])
+        x = x[mask]
+        y = y[mask]
+        return x, y
+  
     
     def _init_interp_method(self, x, y, interp_arg, kw):
         """
@@ -429,5 +425,16 @@
         new_y = interp1d(x, y, new_x, kind='linear', low='linear', high='linear')        
         self.assertAllclose(new_x, new_y)
         
+    def test_removeBad(self):
+        """make sure : interp1d works with bad data
+        """
+        N = 7.0 # must be >=5
+        x = arange(N); x[2] = np.NaN
+        y = arange(N); y[4] = None; y[0]=np.NaN
+        new_x = arange(N+1)-0.5
+        new_y = interp1d(x, y, new_x, kind='linear', low='linear', high='linear', \
+                                    remove_bad_data = True, bad_data = [None])
+        self.assertAllclose(new_x, new_y)
+        
 if __name__ == '__main__':
     unittest.main()                 
\ No newline at end of file