[Scipy-svn] r3206 - in trunk/Lib/sandbox/timeseries: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Sat Jul 28 20:54:49 EDT 2007


Author: pierregm
Date: 2007-07-28 19:54:10 -0500 (Sat, 28 Jul 2007)
New Revision: 3206

Modified:
   trunk/Lib/sandbox/timeseries/tests/test_timeseries.py
   trunk/Lib/sandbox/timeseries/tseries.py
Log:
tseries : fixed fill_missing_dates for 2D series

Modified: trunk/Lib/sandbox/timeseries/tests/test_timeseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tests/test_timeseries.py	2007-07-27 22:47:56 UTC (rev 3205)
+++ trunk/Lib/sandbox/timeseries/tests/test_timeseries.py	2007-07-29 00:54:10 UTC (rev 3206)
@@ -26,7 +26,7 @@
 from maskedarray.testutils import assert_equal, assert_array_equal
 
 from timeseries import tseries
-from timeseries import Date, date_array_fromlist, date_array, thisday
+from timeseries import Date, date_array_fromlist, date_array_fromrange, date_array, thisday
 from timeseries import time_series, TimeSeries, adjust_endpoints, \
     mask_period, align_series, align_with, fill_missing_dates, tsmasked, \
     concatenate_series, stack, split
@@ -445,16 +445,35 @@
         """Test fill_missing_dates function"""
         _start = Date(freq='m', year=2005, month=1)
         _end = Date(freq='m', year=2005, month=4)
-
+        #
         dates = date_array([_start, _end], freq='M')
         series = time_series([1, 2], dates)
         filled_ser = fill_missing_dates(series)
-
+        #
         assert_equal(filled_ser.start_date, _start)
         assert_equal(filled_ser.end_date, _end)
         assert(filled_ser.isfull())
         assert(not filled_ser.has_duplicated_dates())
         assert_equal(filled_ser.size, _end - _start + 1)
+        #
+        data = N.arange(5*24).reshape(5,24)
+        datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
+        dates = date_array_fromlist(datelist, 'D')
+        dseries = time_series(data, dates)
+        ndates = date_array_fromrange(start_date=dates[0],end_date=dates[-2])
+        #
+        fseries = fill_missing_dates(dseries)
+        assert_equal(fseries.shape, (6,24))
+        assert_equal(fseries._mask[:,0], [0,0,0,1,0,0])
+        #
+        fseries = fill_missing_dates(dseries[:,0])
+        assert_equal(fseries.shape, (6,))
+        assert_equal(fseries._mask, [0,0,0,1,0,0])
+        #
+        series = time_series(data.ravel()[:4].reshape(2,2),dates=dates[:-1])
+        fseries = fill_missing_dates(series)
+        assert_equal(fseries.shape, (5,))
+        assert_equal(fseries._mask, [0,0,0,1,0,])
     #
     def test_maskperiod(self):
         "Test mask_period"

Modified: trunk/Lib/sandbox/timeseries/tseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tseries.py	2007-07-27 22:47:56 UTC (rev 3205)
+++ trunk/Lib/sandbox/timeseries/tseries.py	2007-07-29 00:54:10 UTC (rev 3206)
@@ -1440,44 +1440,52 @@
     `fill_value` : float *[None]*
         Default value for missing data. If None, the data are just masked.
     """
-
+    # Check the frequency ........
     orig_freq = freq
     freq = check_freq(freq)
-
     if orig_freq is not None and freq == _c.FR_UND:
         freqstr = check_freq_str(freq)
         raise ValueError,\
               "Unable to define a proper date resolution (found %s)." % freqstr
+    # Check the dates .............
     if dates is None:
         if not isTimeSeries(data):
             raise InsufficientDateError
         dates = data._dates
-        freq = dates.freq
-        datad = data._series._data
-        datam = data._series._mask
-#        if fill_value is None:
-#            fill_value = data._fill_value
-    elif not isinstance(dates, DateArray):
-        dates = DateArray(dates, freq)
-        if isinstance(data, MaskedArray):
-            datad = data._data
-            datam = data._mask
-        else:
-            datad = data
-            datam = nomask
+    else:
+        if not isinstance(dates, DateArray):
+            dates = DateArray(dates, freq)
     dflat = dates.asfreq(freq).ravel()
-    n = len(dflat)
     if not dflat.has_missing_dates():
-        return time_series(data, dflat)
+        if isinstance(data, TimeSeries):
+            return data
+        data = data.view(TimeSeries)
+        data._dates = dflat
+        return data
+    # Check the data ..............
+    if isinstance(data, MaskedArray):
+        datad = data._data
+        datam = data._mask
+        if isinstance(data, TimeSeries):
+            datat = type(data)
+        else:
+            datat = TimeSeries
+    else:
+        datad = numpy.asarray(data)
+        datam = nomask
+        datat = TimeSeries
+    # Check whether we need to flatten the data
+    if dates.ndim > 1 and dates.ndim == datad.ndim:
+        datad.shape = -1
     # ...and now, fill it ! ......
     (tstart, tend) = dflat[[0,-1]]
     newdates = date_array(start_date=tstart, end_date=tend)
-    nsize = newdates.size
+    (osize, nsize) = (dflat.size, newdates.size)
     #.............................
     # Get the steps between consecutive data.
     delta = dflat.get_steps()-1
     gap = delta.nonzero()
-    slcid = numpy.r_[[0,], numpy.arange(1,n)[gap], [n,]]
+    slcid = numpy.r_[[0,], numpy.arange(1,osize)[gap], [osize,]]
     oldslc = numpy.array([slice(i,e)
                           for (i,e) in numpy.broadcast(slcid[:-1],slcid[1:])])
     addidx = delta[gap].astype(int_).cumsum()
@@ -1493,9 +1501,10 @@
         assert numpy.equal(vdflat[osl],vnewdates[nsl]).all(),\
             "Slicing mishap ! Please check %s (old) and %s (new)" % (osl,nsl)
     #.............................
-    data = MA.asarray(data)
-    newdatad = numeric.empty(nsize, data.dtype)
-    newdatam = numeric.ones(nsize, bool_)
+    newshape = list(datad.shape)
+    newshape[0] = nsize
+    newdatad = numeric.empty(newshape, data.dtype)
+    newdatam = numeric.ones(newshape, bool_)
     #....
     if datam is nomask:
         for (new,old) in zip(newslc,oldslc):
@@ -1506,12 +1515,13 @@
             newdatad[new] = datad[old]
             newdatam[new] = datam[old]
     newdata = MA.masked_array(newdatad, mask=newdatam, fill_value=fill_value)
-    # Get new shape ..............
-    if data.ndim == 1:
-        nshp = (newdates.size,)
-    else:
-        nshp = tuple([-1,] + list(data.shape[1:]))
-    _data = newdata.reshape(nshp).view(type(data))
+#    # Get new shape ..............
+#    if data.ndim == 1:
+#        nshp = (newdates.size,)
+#    else:
+#        nshp = tuple([-1,] + list(data.shape[1:]))
+#    _data = newdata.reshape(nshp).view(type(data))
+    _data = newdata.view(datat)
     _data._dates = newdates
     return _data
 #    return time_series(newdata.reshape(nshp), newdates)
@@ -1589,4 +1599,26 @@
         assert_equal(b._dates, series._dates)
         assert_equal(a[-5:], series[:5])
         assert_equal(b[:5], series[-5:])
-
+    #
+    if 1:
+        data = numpy.arange(5*24).reshape(5,24)
+        datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
+        dates = date_array_fromlist(datelist, 'D')
+        dseries = time_series(data, dates)
+        ndates = date_array_fromrange(start_date=dates[0],end_date=dates[-2])
+        #
+        fseries = fill_missing_dates(dseries)
+        assert_equal(fseries.shape, (6,24))
+        assert_equal(fseries._mask[:,0], [0,0,0,1,0,0])
+        #
+        fseries = fill_missing_dates(dseries[:,0])
+        assert_equal(fseries.shape, (6,))
+        assert_equal(fseries._mask, [0,0,0,1,0,0])
+        #
+        series = time_series(data.ravel()[:4].reshape(2,2),dates=dates[:-1])
+        fseries = fill_missing_dates(series)
+        assert_equal(fseries.shape, (5,))
+        assert_equal(fseries._mask, [0,0,0,1,0,])
+        #
+        fseries = fill_missing_dates(data, date_array_fromlist(datelist,'D'))
+        




More information about the Scipy-svn mailing list