[Scipy-svn] r2987 - in trunk/Lib/sandbox/timeseries: . tests

Fri May 11 22:05:24 EDT 2007

Author: pierregm
Date: 2007-05-11 21:04:59 -0500 (Fri, 11 May 2007)
New Revision: 2987

Modified:
   trunk/Lib/sandbox/timeseries/tdates.py
   trunk/Lib/sandbox/timeseries/tests/test_dates.py
   trunk/Lib/sandbox/timeseries/tests/test_multitimeseries.py
   trunk/Lib/sandbox/timeseries/tests/test_timeseries.py
   trunk/Lib/sandbox/timeseries/tmulti.py
   trunk/Lib/sandbox/timeseries/tseries.py
Log:
MaskedArray
core      : fixed a bug w/ subok=False that prevented the mask to be inherited
testutils : fixed assert_array_compare to force the compared elements to pure ndarray

TimeSeries
tseries   : fixed the 'filled' method
          : allows 2D arrays to be passed to convert (thx to David Huard for the inspiration)
          : added David Huards function/method split
tdates    : guess_freq : returns FR_UND when only one element

Modified: trunk/Lib/sandbox/timeseries/tdates.py
===================================================================

--- trunk/Lib/sandbox/timeseries/tdates.py	2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tdates.py	2007-05-12 02:04:59 UTC (rev 2987)
@@ -458,7 +458,9 @@
     Returns a frequency code (alpha character)."""
     ddif = numeric.asarray(numpy.diff(dates))
     ddif.sort()
-    if ddif[0] == ddif[-1] == 1.:
+    if ddif.size == 0:
+        fcode = _c.FR_UND
+    elif ddif[0] == ddif[-1] == 1.:
         fcode = _c.FR_DAY
     elif (ddif[0] == 1.) and (ddif[-1] == 3.):
         fcode = _c.FR_BUS
@@ -699,4 +701,4 @@
     if 1:
         "Tests the automatic sorting of dates."
         D = date_array_fromlist(dlist=['2006-01','2005-01','2004-01'],freq='M')
-        assert_equal(D.view(ndarray), [24037, 24049, 24061])
\ No newline at end of file
+        assert_equal(D.view(ndarray), [24037, 24049, 24061])

Modified: trunk/Lib/sandbox/timeseries/tests/test_dates.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tests/test_dates.py	2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tests/test_dates.py	2007-05-12 02:04:59 UTC (rev 2987)
@@ -109,6 +109,10 @@
         dobj = [DateFromString(d) for d in dlist]
         odates = date_array_fromlist(dobj)
         assert_equal(dates,odates)
+        #
+        D = date_array_fromlist(dlist=['2006-01'])
+        assert_equal(D.tovalue(), [732312, ])
+        assert_equal(D.freq, C.FR_UND)
         print "finished test_fromsobjects"
 
     def test_consistent_value(self):

Modified: trunk/Lib/sandbox/timeseries/tests/test_multitimeseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tests/test_multitimeseries.py	2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tests/test_multitimeseries.py	2007-05-12 02:04:59 UTC (rev 2987)
@@ -72,6 +72,7 @@
         assert(mts['2007-01']._data == mrec[0])
         assert_equal(mts['2007-01']._dates, dates[0])       
         #
+        assert(isinstance(mts.f0, TimeSeries))
         assert_equal(mts.f0, time_series(d, dates=dates, mask=m))
         assert_equal(mts.f1, time_series(d[::-1], dates=dates, mask=m[::-1]))
         assert((mts._fieldmask == N.core.records.fromarrays([m, m[::-1]])).all())

Modified: trunk/Lib/sandbox/timeseries/tests/test_timeseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tests/test_timeseries.py	2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tests/test_timeseries.py	2007-05-12 02:04:59 UTC (rev 2987)
@@ -28,7 +28,8 @@
 from timeseries import tseries
 from timeseries import Date, date_array_fromlist, date_array, thisday
 from timeseries import time_series, TimeSeries, adjust_endpoints, \
-    mask_period, align_series, fill_missing_dates, tsmasked, concatenate_series
+    mask_period, align_series, fill_missing_dates, tsmasked, concatenate_series,\
+    stack, split
 
 class test_creation(NumpyTestCase):
     "Base test class for MaskedArrays."
@@ -76,15 +77,14 @@
         "Tests the creation of a series from a datearray"
         _, dates, _ = self.d
         data = dates
-
+        #
         series = time_series(data, dates)
         assert(isinstance(series, TimeSeries))
         assert_equal(series._dates, dates)
         assert_equal(series._data, data)
         assert_equal(series.freqstr, 'D')
-
+        #
         series[5] = MA.masked
-
         # ensure that series can be represented by a string after masking a value
         # (there was a bug before that prevented this from working when using a
         # DateArray for the data)
@@ -99,7 +99,7 @@
         assert_equal(series._data.size, 15)
         
     def test_unsorted(self):
-        "Tests that the data are porperly sorted along the dates."
+        "Tests that the data are properly sorted along the dates."
         dlist = ['2007-01-%02i' % i for i in (3,2,1)]
         data = [10,20,30]
         series = time_series(data,dlist)
@@ -368,6 +368,19 @@
         assert_array_equal(shift_negative, shift_negative_result)
         assert_array_equal(shift_positive, shift_positive_result)
     #
+    def test_split(self):
+        """Test the split function."""
+        ms = time_series(N.arange(62).reshape(31,2),
+                         start_date=Date(freq='d', year=2005, month=7, day=1))
+        d1,d2 = split(ms)
+        assert_array_equal(d1.data, ms.data[:,0])
+        assert_array_equal(d1.dates, ms.dates)
+        assert_array_equal(d2.data, ms.data[:,1])
+
+        series = self.d[0]
+        ss = split(series)[0]
+        assert_array_equal(series, ss)
+    #
     def test_convert(self):
         """Test convert function
 
@@ -379,6 +392,8 @@
                                     start_date=Date(freq='m', year=2005, month=6))
         highFreqSeries = time_series(N.arange(100),
                                     start_date=Date(freq='b', year=2005, month=6, day=1))
+        ndseries = time_series(N.arange(124).reshape(62,2), 
+                             start_date=Date(freq='d', year=2005, month=7, day=1))
 
         lowToHigh_start = lowFreqSeries.convert('B', position='START')
 
@@ -411,6 +426,8 @@
                      (Date(freq='b', year=2005, month=6, day=1) + 99).asfreq('M'))
 
         assert_array_equal(lowFreqSeries, lowFreqSeries.convert("M"))
+                
+        assert_equal(ndseries.convert('M',sum), [[930,961],[2852,2883]])
     #
     def test_fill_missing_dates(self):
         """Test fill_missing_dates function"""
@@ -451,8 +468,6 @@
                            inplace=False)
         result = N.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0])
         assert_equal(mask._mask, result.repeat(2).reshape(-1,2))
-
-
     #
     def test_pickling(self):
         "Tests pickling/unpickling"
@@ -578,7 +593,6 @@
 
 
 
-
 ###############################################################################
 #------------------------------------------------------------------------------
 if __name__ == "__main__":

Modified: trunk/Lib/sandbox/timeseries/tmulti.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tmulti.py	2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tmulti.py	2007-05-12 02:04:59 UTC (rev 2987)
@@ -122,7 +122,8 @@
         if isinstance(obj, (MaskedRecords)):
             self.__dict__.update(_fieldmask=obj._fieldmask,
                                  _hardmask=obj._hardmask,
-                                 _fill_value=obj._fill_value,                                 
+                                 _fill_value=obj._fill_value,    
+                                 _names = obj.dtype.names                             
                                  )
             if isinstance(obj, MultiTimeSeries):
                 self.__dict__.update(observed=obj.observed,
@@ -135,7 +136,8 @@
                                  observed=None,
                                  _fieldmask = nomask,
                                  _hardmask = False,
-                                 fill_value = None
+                                 fill_value = None,
+                                 _names = self.dtype.names
                                 )
         return
     
@@ -152,27 +154,14 @@
     
     #......................................................
     def __getattribute__(self, attr):
-        return MaskedRecords.__getattribute__(self,attr)
-#        try:
-#            # Returns a generic attribute
-#            return object.__getattribute__(self,attr)
-#        except AttributeError: 
-#            # OK, so attr must be a field name
-#            pass
-#        # Get the list of fields ......
-#        _names = self.dtype.names
-#        _local = self.__dict__
-#        _mask = _local['_fieldmask']
-#        if attr in _names:
-#            _data = self._data
-#            obj = numeric.asarray(_data.__getattribute__(attr)).view(MaskedArray)
-#            obj._mask = make_mask(_mask.__getattribute__(attr))
-#            return obj
-#        elif attr == '_mask':
-#            if self.size > 1:
-#                return _mask.view((bool_, len(self.dtype))).all(1)
-#            return _mask.view((bool_, len(self.dtype)))
-#        raise AttributeError,"No attribute '%s' !" % attr
+        getattribute = MaskedRecords.__getattribute__
+        _dict = getattribute(self,'__dict__')
+        if attr in _dict.get('_names',[]):
+            obj = getattribute(self,attr).view(TimeSeries)
+            obj._dates = _dict['_dates']
+            return obj
+        return getattribute(self,attr)
+
             
     def __setattr__(self, attr, val):
         newattr = attr not in self.__dict__
@@ -526,7 +515,9 @@
         mts = MultiTimeSeries(mrec,dates)
         self_data = [d, m, mrec, dlist, dates, ts, mts]
         
-    if 1:        
+        assert(isinstance(mts.f0, TimeSeries))
+        
+    if 0:        
         mts[:2] = 5
         assert_equal(mts.f0._data, [5,5,2,3,4])
         assert_equal(mts.f1._data, [5,5,2,1,0])

Modified: trunk/Lib/sandbox/timeseries/tseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tseries.py	2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tseries.py	2007-05-12 02:04:59 UTC (rev 2987)
@@ -28,7 +28,7 @@
 
 import maskedarray as MA
 from maskedarray import MaskedArray, MAError, masked, nomask, \
-    filled, getmask, getmaskarray, make_mask_none, mask_or, make_mask, \
+    filled, getmask, getmaskarray, hsplit, make_mask_none, mask_or, make_mask, \
     masked_array
 
 import tcore as corelib
@@ -49,7 +49,7 @@
 'time_series', 'tsmasked',
 'mask_period','mask_inside_period','mask_outside_period','compressed',
 'adjust_endpoints','align_series','aligned','convert','group_byperiod',
-'pct','tshift','fill_missing_dates', 'stack', 'concatenate_series',
+'pct','tshift','fill_missing_dates', 'split', 'stack', 'concatenate_series',
 'empty_like',
 'day_of_week','day_of_year','day','month','quarter','year',
 'hour','minute','second',
@@ -505,14 +505,14 @@
            it is enabled. Otherwise fill with fill value.
         """
         desc = """\
-timeseries(data  =
+timeseries(
  %(data)s,
            dates =
  %(time)s,
            freq  = %(freq)s)
 """
         desc_short = """\
-timeseries(data  = %(data)s,
+timeseries(%(data)s,
            dates = %(time)s,
            freq  = %(freq)s)
 """
@@ -717,6 +717,30 @@
                 result = super(TimeSeries, self).transpose(*axes)
                 result._dates = self._dates
         return result
+    
+    def split(self):
+        """Split a multiple series into individual columns."""
+        if self.ndim == 1:
+            return [self]
+        else:
+            n = self.shape[1]
+            arr = hsplit(self, n)[0]
+            return [self.__class__(numpy.squeeze(a), 
+                                   self._dates, 
+                                   **_attrib_dict(self)) for a in arr]        
+    
+    def filled(self, fill_value=None):
+        """Returns an array of the same class as `_data`,
+ with masked values filled with `fill_value`.
+Subclassing is preserved.
+
+If `fill_value` is None, uses self.fill_value.
+        """
+        result = self._series.filled(fill_value=fill_value).view(type(self))
+        result._dates = self._dates
+        result.copy_attributes(self)
+        return result
+    
     #......................................................
     def copy_attributes(self, oldseries, exclude=[]):
         "Copies the attributes from oldseries if they are not in the exclude list."
@@ -850,6 +874,9 @@
 hour = _frommethod('hour')
 minute = _frommethod('minute')
 second = _frommethod('second')
+
+split = _frommethod('split')
+
 #
 ##### ---------------------------------------------------------------------------
 #---- ... Additional methods ...
@@ -1172,8 +1199,8 @@
     return [adjust_endpoints(x, start_date, end_date) for x in series]
 aligned = align_series
 #....................................................................
-def convert(series, freq, func='auto', position='END'):
-    """Converts a series to a frequency.
+def _convert1d(series, freq, func='auto', position='END'):
+    """Converts a series to a frequency. Private function called by convert
 
     When converting to a lower frequency, func is a function that acts
     on a 1-d array and returns a scalar or 1-d array. func should handle
@@ -1207,7 +1234,8 @@
             "Cannot adjust a series with missing or duplicated dates."
 
     if position.upper() not in ('END','START'):
-        raise ValueError("invalid value for position argument: (%s)",str(position))
+        raise ValueError("Invalid value for position argument: (%s). "\
+                         "Should be in ['END','START']," % str(position))
 
     start_date = series._dates[0]
 
@@ -1241,6 +1269,37 @@
     newseries.copy_attributes(series)
     return newseries
 
+def convert(series, freq, func='auto', position='END'):
+    """Converts a series to a frequency. Private function called by convert
+
+    When converting to a lower frequency, func is a function that acts
+    on a 1-d array and returns a scalar or 1-d array. func should handle
+    masked values appropriately. If func is "auto", then an
+    appropriate function is determined based on the observed attribute
+    of the series. If func is None, then a 2D array is returned, where each
+    column represents the values appropriately grouped into the new frequency.
+    interp and position will be ignored in this case.
+
+    When converting to a higher frequency, position is 'START' or 'END'
+    and determines where the data point is in each period (eg. if going
+    from monthly to daily, and position is 'END', then each data point is
+    placed at the end of the month).
+    """
+    if series.ndim == 1:
+        obj = _convert1d(series, freq, func, position)
+    elif series.ndim == 2:
+        base = _convert1d(series[:,0], freq, func, position)
+        obj = MA.column_stack([_convert1d(m,freq,func,position)._series 
+                               for m in series.split()]).view(type(series))
+        obj._dates = base._dates                        
+        if func is None or (func,series.observed) == ('auto','UNDEFINED'):         
+            shp = obj.shape
+            ncols = base.shape[-1]
+            obj.shape = (shp[0], shp[-1]//ncols, ncols)
+            obj = numpy.swapaxes(obj,1,2)
+    return obj
+        
+
 def group_byperiod(series, freq, position='END'):
     """Converts a series to a frequency, without any processing. If the series
     has missing data, it is first filled with masked data. Duplicate values in the
@@ -1422,7 +1481,7 @@
 #    return time_series(newdata.reshape(nshp), newdates)
 #...............................................................................
 def stack(*series):
-    """performs a column_stack on the data from each series, and the
+    """Performs a column_stack on the data from each series, and the
 resulting series has the same dates as each individual series. All series
 must be date compatible.
 
@@ -1513,4 +1572,24 @@
         # ensure that series can be represented by a string after masking a value
         # (there was a bug before that prevented this from working when using a
         # DateArray for the data)
-        strrep = str(series)
\ No newline at end of file
+        strrep = str(series)
+    
+    if 0:
+        series = time_series(numpy.arange(1,501),
+                             start_date=Date('D', string='2007-01-01'))
+        mseries = convert(series, 'M')
+        aseries = convert(mseries, 'A')
+        (freq, func, position) = ('A', None, 'END')
+        
+        tmp = mseries[:,0].convert('A')
+        aseries = MA.concatenate([_convert1d(m,'A')._series for m in mseries.split()],
+                                 axis=-1).view(type(series))
+        aseries._dates = tmp._dates                                 
+        shp = aseries.shape
+        aseries.shape = (shp[0], shp[-1]//tmp.shape[-1], tmp.shape[-1])
+        numpy.swapaxes(aseries,1,2)
+    
+    if 1:
+        series = time_series(N.arange(124).reshape(62,2), 
+                             start_date=Date(freq='d', year=2005, month=7, day=1))
+        assert_equal(series.convert('M',sum), [[930,961],[2852,2883]])
\ No newline at end of file