[Scipy-svn] r3305 - in trunk/scipy/sandbox/timeseries: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Wed Sep 5 21:13:35 EDT 2007


Author: pierregm
Date: 2007-09-05 20:13:29 -0500 (Wed, 05 Sep 2007)
New Revision: 3305

Added:
   trunk/scipy/sandbox/timeseries/tests/test_extras.py
   trunk/scipy/sandbox/timeseries/textras.py
Modified:
   trunk/scipy/sandbox/timeseries/tdates.py
   trunk/scipy/sandbox/timeseries/tests/test_dates.py
   trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py
   trunk/scipy/sandbox/timeseries/tests/test_timeseries.py
   trunk/scipy/sandbox/timeseries/tmulti.py
   trunk/scipy/sandbox/timeseries/tseries.py
Log:
timeseries:
tseries : * simplified TimeSeries.__new__ to only accept DateArrays as dates. To create a new TimeSeries object, use time_series
extras  : introducing isleapyear, count_missing, accept_atmost_missing

Modified: trunk/scipy/sandbox/timeseries/tdates.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tdates.py	2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tdates.py	2007-09-06 01:13:29 UTC (rev 3305)
@@ -214,6 +214,7 @@
 
     def __getitem__(self, indx):
         reset_full = True
+        # Determine what kind of index is used
         if isinstance(indx, Date):
             indx = self.find_dates(indx)
             reset_full = False
@@ -222,7 +223,12 @@
                 indx = self.find_dates(indx)
             except AttributeError:
                 pass
+        # Select the data
         r = ndarray.__getitem__(self, indx)
+        # Select the corresponding unsorted indices (if needed)
+        if self._unsorted is not None:
+            unsorted = self._unsorted[indx]
+        # Case 1. A simple integer
         if isinstance(r, (generic, int)):
             return Date(self.freq, value=r)
         elif hasattr(r, 'size') and r.size == 1:
@@ -679,41 +685,42 @@
 if __name__ == '__main__':
     import maskedarray.testutils
     from maskedarray.testutils import assert_equal
-    if 0:
-        dlist = ['2007-%02i' % i for i in range(1,5)+range(7,13)]
-        mdates = date_array_fromlist(dlist, 'M')
-        # Using an integer
-        assert_equal(mdates[0].value, 24073)
-        assert_equal(mdates[-1].value, 24084)
-        # Using a date
-        lag = mdates.find_dates(mdates[0])
-        print mdates[lag]
-        assert_equal(mdates[lag], mdates[0])
-    if 0:
-        hodie = today('D')
-        D = DateArray(today('D'))
-        assert_equal(D.freq, 6000)
-    if 0:
-        freqs = [x[0] for x in corelib.freq_dict.values() if x[0] != 'U']
-        print freqs
-        for f in freqs:
-            print f
-            today = thisday(f)
-            assert(Date(freq=f, value=today.value) == today)
-    if 0:
-        D = date_array(freq='U', start_date=Date('U',1), length=10)
-    if 0:
-        dlist = ['2007-01-%02i' % i for i in (1,2,4,5,7,8,10,11,13)]
-        ords = numpy.fromiter((DateTimeFromString(s).toordinal() for s in dlist),
-                               float_)
-    if 0:
-        "Tests the automatic sorting of dates."
-        D = date_array_fromlist(dlist=['2006-01','2005-01','2004-01'],freq='M')
-        assert_equal(D.view(ndarray), [24037, 24049, 24061])
+#    if 0:
+#        dlist = ['2007-%02i' % i for i in range(1,5)+range(7,13)]
+#        mdates = date_array_fromlist(dlist, 'M')
+#        # Using an integer
+#        assert_equal(mdates[0].value, 24073)
+#        assert_equal(mdates[-1].value, 24084)
+#        # Using a date
+#        lag = mdates.find_dates(mdates[0])
+#        print mdates[lag]
+#        assert_equal(mdates[lag], mdates[0])
+#    if 0:
+#        hodie = today('D')
+#        D = DateArray(today('D'))
+#        assert_equal(D.freq, 6000)
+#    if 0:
+#        freqs = [x[0] for x in corelib.freq_dict.values() if x[0] != 'U']
+#        print freqs
+#        for f in freqs:
+#            print f
+#            today = thisday(f)
+#            assert(Date(freq=f, value=today.value) == today)
+#    if 0:
+#        D = date_array(freq='U', start_date=Date('U',1), length=10)
+#    if 0:
+#        dlist = ['2007-01-%02i' % i for i in (1,2,4,5,7,8,10,11,13)]
+#        ords = numpy.fromiter((DateTimeFromString(s).toordinal() for s in dlist),
+#                               float_)
+#    if 0:
+#        "Tests the automatic sorting of dates."
+#        D = date_array_fromlist(dlist=['2006-01','2005-01','2004-01'],freq='M')
+#        assert_equal(D.view(ndarray), [24037, 24049, 24061])
 
     if 1:
         dlist = ['2007-%02i' % i for i in range(1,5)+range(7,13)]
         mdates = date_array_fromlist(dlist, 'M')
         
-        print mdates.tostr()
-        
\ No newline at end of file
+    if 2:
+        dlist = ['2007-01','2007-03','2007-04','2007-02']
+        mdates = date_array_fromlist(dlist, 'M')

Modified: trunk/scipy/sandbox/timeseries/tests/test_dates.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_dates.py	2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tests/test_dates.py	2007-09-06 01:13:29 UTC (rev 3305)
@@ -28,7 +28,8 @@
 import timeseries as ts
 from timeseries import const as C
 from timeseries.parser import DateFromString, DateTimeFromString
-from timeseries import *
+from timeseries import Date, DateArray,\
+    thisday, today, date_array, date_array_fromlist
 from timeseries.cseries import freq_dict
 
 

Added: trunk/scipy/sandbox/timeseries/tests/test_extras.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_extras.py	2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tests/test_extras.py	2007-09-06 01:13:29 UTC (rev 3305)
@@ -0,0 +1,84 @@
+# pylint: disable-msg=W0611, W0612, W0511,R0201
+"""Tests suite for MaskedArray.
+Adapted from the original test_ma by Pierre Gerard-Marchant
+
+:author: Pierre Gerard-Marchant & Matt Knox
+:contact: pierregm_at_uga_dot_edu & mattknox_ca_at_hotmail_dot_com
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__     = '$Date$'
+
+
+import numpy
+from numpy.testing import NumpyTest, NumpyTestCase
+import maskedarray
+from maskedarray import masked
+from maskedarray.testutils import assert_equal, assert_almost_equal
+
+from timeseries import time_series, Date
+from timeseries import extras
+from timeseries.extras import *
+
+#..............................................................................
+class test_misc(NumpyTestCase):
+    "Base test class for MaskedArrays."
+    def __init__(self, *args, **kwds):
+        NumpyTestCase.__init__(self, *args, **kwds)
+    #
+    def test_leapyear(self):
+        leap = isleapyear([1900,1901,1902,1903,1904,2000,2001,2002,2003,2004])
+        assert_equal(leap, [0,0,0,0,1,1,0,0,0,1]) 
+        
+#..............................................................................
+class test_countmissing(NumpyTestCase):
+    #
+    def __init__(self, *args, **kwds):    
+        NumpyTestCase.__init__(self, *args, **kwds)
+        data = time_series(numpy.arange(731), 
+                           start_date=Date(string='2003-01-01', freq='D'),
+                           freq='D')
+        self.data = data
+        
+    def test_count_missing(self):
+        data = self.data
+        assert_equal(count_missing(data), 0)
+        assert_equal(count_missing(data.convert('A')), (0,0))
+        assert_equal(count_missing(data.convert('M')), [0]*24)
+        #
+        series = data.copy()
+        series[numpy.logical_not(data.day % 10)] = masked
+        assert_equal(count_missing(series), 70)
+        assert_equal(count_missing(series.convert('A')), (35,35))
+        assert_equal(count_missing(series.convert('M')), 
+                     [3,2,3,3,3,3,3,3,3,3,3,3]*2)
+        #
+        series[series.day == 31] = masked
+        assert_equal(count_missing(series), 84)
+        assert_equal(count_missing(series.convert('A')), (42,42))
+        assert_equal(count_missing(series.convert('M')), 
+                     [4,2,4,3,4,3,4,4,3,4,3,4]*2)
+    #
+    def test_accept_atmost_missing(self):
+        series = self.data.copy()
+        series[numpy.logical_not(self.data.day % 10)] = masked    
+        result = accept_atmost_missing(series.convert('M'),3,True)
+        assert_equal(result._mask.all(-1), [0]*24)    
+        result = accept_atmost_missing(series.convert('M'),3,False)
+        assert_equal(result._mask.all(-1), [1,0,1,1,1,1,1,1,1,1,1,1]*2)    
+        result = accept_atmost_missing(series.convert('M'),0.1,True)
+        assert_equal(result._mask.all(-1), [0]*24)    
+        result = accept_atmost_missing(series.convert('A'),35,True)
+        assert_equal(result._mask.all(-1), [0,0])    
+        result = accept_atmost_missing(series.convert('A'),35,False)
+        assert_equal(result._mask.all(-1), [1,1])    
+        result = accept_atmost_missing(series.convert('A'),0.05,True)
+        assert_equal(result._mask.all(-1), [1,1])    
+        
+
+###############################################################################
+#------------------------------------------------------------------------------
+if __name__ == "__main__":
+    NumpyTest().run()        
\ No newline at end of file


Property changes on: trunk/scipy/sandbox/timeseries/tests/test_extras.py
___________________________________________________________________
Name: svn:keywords
   + Date 
Author 
Revision
Id

Modified: trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py	2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py	2007-09-06 01:13:29 UTC (rev 3305)
@@ -12,7 +12,7 @@
 
 import types
 
-import numpy as N
+import numpy
 import numpy.core.fromnumeric  as fromnumeric
 from numpy.testing import NumpyTest, NumpyTestCase
 from numpy.testing.utils import build_err_msg
@@ -41,10 +41,10 @@
         
     def setup(self):       
         "Generic setup" 
-        d = N.arange(5)
+        d = numpy.arange(5)
         m = MA.make_mask([1,0,0,1,1])
-        base_d = N.r_[d,d[::-1]].reshape(2,-1).T
-        base_m = N.r_[[m, m[::-1]]].T
+        base_d = numpy.r_[d,d[::-1]].reshape(2,-1).T
+        base_m = numpy.r_[[m, m[::-1]]].T
         base = MA.array(base_d, mask=base_m)    
         mrec = MR.fromarrays(base.T,)
         dlist = ['2007-%02i' % (i+1) for i in d]
@@ -62,9 +62,10 @@
         assert_equal(mts['f0']._mask, m)
         #
         assert(isinstance(mts[0], MultiTimeSeries))
-        assert_equal(mts._data[0], mrec[0])
+        assert_equal(mts._data[0], mrec._data[0])
         # We can't use assert_equal here, as it tries to convert the tuple into a singleton
-        assert(mts[0]._data.view(N.ndarray) == mrec[0])
+#        assert(mts[0]._data.view(numpyndarray) == mrec[0])
+        assert_equal(numpy.asarray(mts._data[0]), mrec[0])
         assert_equal(mts._dates[0], dates[0])  
         assert_equal(mts[0]._dates, dates[0])
         #
@@ -75,8 +76,8 @@
         assert(isinstance(mts.f0, TimeSeries))
         assert_equal(mts.f0, time_series(d, dates=dates, mask=m))
         assert_equal(mts.f1, time_series(d[::-1], dates=dates, mask=m[::-1]))
-        assert((mts._fieldmask == N.core.records.fromarrays([m, m[::-1]])).all())
-        assert_equal(mts._mask, N.r_[[m,m[::-1]]].all(0))
+        assert((mts._fieldmask == numpy.core.records.fromarrays([m, m[::-1]])).all())
+        assert_equal(mts._mask, numpy.r_[[m,m[::-1]]].all(0))
         assert_equal(mts.f0[1], mts[1].f0)
         #
         assert(isinstance(mts[:2], MultiTimeSeries))
@@ -124,7 +125,7 @@
         mts.harden_mask()
         assert(mts._hardmask)
         mts._mask = nomask
-        assert_equal(mts._mask, N.r_[[m,m[::-1]]].all(0))
+        assert_equal(mts._mask, numpy.r_[[m,m[::-1]]].all(0))
         mts.soften_mask()
         assert(not mts._hardmask)
         mts._mask = nomask
@@ -141,7 +142,7 @@
     def test_fromrecords(self):
         "Test from recarray."
         [d, m, mrec, dlist, dates, ts, mts] = self.data
-        nrec = N.core.records.fromarrays(N.r_[[d,d[::-1]]])
+        nrec = numpy.core.records.fromarrays(numpy.r_[[d,d[::-1]]])
         mrecfr = fromrecords(nrec.tolist(), dates=dates)
         assert_equal(mrecfr.f0, mrec.f0)
         assert_equal(mrecfr.dtype, mrec.dtype)

Modified: trunk/scipy/sandbox/timeseries/tests/test_timeseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_timeseries.py	2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tests/test_timeseries.py	2007-09-06 01:13:29 UTC (rev 3305)
@@ -11,7 +11,7 @@
 __revision__ = "$Revision$"
 __date__     = '$Date$'
 
-import numpy as N
+import numpy
 from numpy import bool_, complex_, float_, int_, object_
 import numpy.core.fromnumeric  as fromnumeric
 import numpy.core.numeric as numeric
@@ -105,14 +105,15 @@
         series = time_series(data,dlist)
         assert_equal(series._data,[30,20,10])
         #
-        series = TimeSeries(data, dlist)
+        dates = date_array_fromlist(dlist, freq='D')
+        series = TimeSeries(data, dates)
         assert_equal(series._data,[30,20,10])
         #
-        series = TimeSeries(data, dlist, mask=[1,0,0])
+        series = time_series(data, dlist, mask=[1,0,0])
         assert_equal(series._mask,[0,0,1])
         #
         data = masked_array([10,20,30],mask=[1,0,0])
-        series = TimeSeries(data, dlist)
+        series = time_series(data, dlist)
         assert_equal(series._mask,[0,0,1])
 #...............................................................................
 
@@ -265,7 +266,7 @@
         # With set
         series[:5] = 0
         assert_equal(series[:5]._series, [0,0,0,0,0])
-        dseries = N.log(series)
+        dseries = numpy.log(series)
         series[-5:] = dseries[-5:]
         assert_equal(series[-5:], dseries[-5:])
         # Now, using dates !
@@ -275,7 +276,7 @@
     def test_on2d(self):
         "Tests getitem on a 2D series"
         (a,b,d) = ([1,2,3],[3,2,1], date_array(thisday('M'),length=3))
-        ser_x = time_series(N.column_stack((a,b)), dates=d)
+        ser_x = time_series(numpy.column_stack((a,b)), dates=d)
         assert_equal(ser_x[0,0], time_series(a[0],d[0]))
         assert_equal(ser_x[0,:], time_series([(a[0],b[0])], d[0]))
         assert_equal(ser_x[:,0], time_series(a, d))
@@ -285,20 +286,20 @@
         "Tests getitem on a nD series"
         hodie = thisday('D')
         # Case 1D
-        series = time_series(N.arange(5), mask=[1,0,0,0,0], start_date=hodie)
+        series = time_series(numpy.arange(5), mask=[1,0,0,0,0], start_date=hodie)
         assert_equal(series[0], 0)
         # Case 1D + mask
-        series = time_series(N.arange(5), mask=[1,0,0,0,0], start_date=hodie)
+        series = time_series(numpy.arange(5), mask=[1,0,0,0,0], start_date=hodie)
         assert series[0] is tsmasked
         # Case 2D
-        series = time_series(N.arange(10).reshape(5,2), start_date=hodie)
+        series = time_series(numpy.arange(10).reshape(5,2), start_date=hodie)
         assert_equal(len(series), 5)
         assert_equal(series[0], [[0,1]])
         assert_equal(series[0]._dates[0], (hodie))
         assert_equal(series[:,0], [0,2,4,6,8])
         assert_equal(series[:,0]._dates, series._dates)
         # Case 2D + mask
-        series = time_series(N.arange(10).reshape(5,2), start_date=hodie,
+        series = time_series(numpy.arange(10).reshape(5,2), start_date=hodie,
                              mask=[[1,1],[0,0],[0,0],[0,0],[0,0]])
         assert_equal(len(series), 5)
         assert_equal(series[0], [[0,1]])
@@ -308,7 +309,7 @@
         assert_equal(series[:,0]._mask, [1,0,0,0,0])
         assert_equal(series[:,0]._dates, series._dates)
         # Case 3D
-        series = time_series(N.arange(30).reshape(5,3,2), start_date=hodie)
+        series = time_series(numpy.arange(30).reshape(5,3,2), start_date=hodie)
         x = series[0]
         assert_equal(len(series), 5)
         assert_equal(series[0], [[[0,1],[2,3],[4,5]]])
@@ -337,7 +338,7 @@
         assert_equal(dseries, series[3:-2])
         dseries = adjust_endpoints(series, end_date=Date('D', string='2007-01-31'))
         assert_equal(dseries.size, 31)
-        assert_equal(dseries._mask, N.r_[series._mask, [1]*16])
+        assert_equal(dseries._mask, numpy.r_[series._mask, [1]*16])
         dseries = adjust_endpoints(series, end_date=Date('D', string='2007-01-06'))
         assert_equal(dseries.size, 6)
         assert_equal(dseries, series[:6])
@@ -345,7 +346,7 @@
                                    start_date=Date('D', string='2007-01-06'),
                                    end_date=Date('D', string='2007-01-31'))
         assert_equal(dseries.size, 26)
-        assert_equal(dseries._mask, N.r_[series._mask[5:], [1]*16])
+        assert_equal(dseries._mask, numpy.r_[series._mask[5:], [1]*16])
     #
     def test_alignseries(self):
         "Tests align_series & align_with"
@@ -382,7 +383,7 @@
     #
     def test_split(self):
         """Test the split function."""
-        ms = time_series(N.arange(62).reshape(31,2),
+        ms = time_series(numpy.arange(62).reshape(31,2),
                          start_date=Date(freq='d', year=2005, month=7, day=1))
         d1,d2 = split(ms)
         assert_array_equal(d1.data, ms.data[:,0])
@@ -400,11 +401,11 @@
 date conversion algorithms already tested by asfreq in the
 test_dates test suite.
         """
-        lowFreqSeries = time_series(N.arange(10),
+        lowFreqSeries = time_series(numpy.arange(10),
                                     start_date=Date(freq='m', year=2005, month=6))
-        highFreqSeries = time_series(N.arange(100),
+        highFreqSeries = time_series(numpy.arange(100),
                                     start_date=Date(freq='b', year=2005, month=6, day=1))
-        ndseries = time_series(N.arange(124).reshape(62,2), 
+        ndseries = time_series(numpy.arange(124).reshape(62,2), 
                              start_date=Date(freq='d', year=2005, month=7, day=1))
 
         lowToHigh_start = lowFreqSeries.convert('B', position='START')
@@ -456,7 +457,7 @@
         assert(not filled_ser.has_duplicated_dates())
         assert_equal(filled_ser.size, _end - _start + 1)
         #
-        data = N.arange(5*24).reshape(5,24)
+        data = numpy.arange(5*24).reshape(5,24)
         datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
         dates = date_array_fromlist(datelist, 'D')
         dseries = time_series(data, dates)
@@ -482,7 +483,7 @@
         (start, end) = ('2007-01-06', '2007-01-12')
         mask = mask_period(series, start, end, inside=True, include_edges=True,
                            inplace=False)
-        assert_equal(mask._mask, N.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0]))
+        assert_equal(mask._mask, numpy.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0]))
         mask = mask_period(series, start, end, inside=True, include_edges=False,
                            inplace=False)
         assert_equal(mask._mask, [0,0,0,0,0,0,1,1,1,1,1,0,0,0,0])
@@ -497,7 +498,7 @@
         series = time_series(data, dates=dates)
         mask = mask_period(series, start, end, inside=True, include_edges=True,
                            inplace=False)
-        result = N.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0])
+        result = numpy.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0])
         assert_equal(mask._mask, result.repeat(2).reshape(-1,2))
     #
     def test_pickling(self):
@@ -509,14 +510,14 @@
         assert_equal(series_pickled._data, series._data)
         assert_equal(series_pickled._mask, series._mask)
         #
-        data = masked_array(N.matrix(range(10)).T, mask=[1,0,0,0,0]*2)
+        data = masked_array(numpy.matrix(range(10)).T, mask=[1,0,0,0,0]*2)
         dates = date_array(start_date=thisday('D'), length=10)
         series = time_series(data,dates=dates)
         series_pickled = cPickle.loads(series.dumps())
         assert_equal(series_pickled._dates, series._dates)
         assert_equal(series_pickled._data, series._data)
         assert_equal(series_pickled._mask, series._mask)
-        assert(isinstance(series_pickled._data, N.matrix))
+        assert(isinstance(series_pickled._data, numpy.matrix))
 
 
     def test_empty_timeseries(self):
@@ -529,25 +530,25 @@
 
     def test__timeseriescompat_multiple(self):
         "Tests the compatibility of multiple time series."
-        seriesM_10 = time_series(N.arange(10),
+        seriesM_10 = time_series(numpy.arange(10),
                                     date_array(
                                       start_date=Date(freq='m', year=2005, month=1),
                                       length=10)
                                 )
 
-        seriesD_10 = time_series(N.arange(10),
+        seriesD_10 = time_series(numpy.arange(10),
                                     date_array(
                                       start_date=Date(freq='d', year=2005, month=1, day=1),
                                       length=10)
                                 )
 
-        seriesD_5 = time_series(N.arange(5),
+        seriesD_5 = time_series(numpy.arange(5),
                                     date_array(
                                       start_date=Date(freq='d', year=2005, month=1, day=1),
                                       length=5)
                                 )
 
-        seriesD_5_apr = time_series(N.arange(5),
+        seriesD_5_apr = time_series(numpy.arange(5),
                                     date_array(
                                       start_date=Date(freq='d', year=2005, month=4, day=1),
                                       length=5)
@@ -583,7 +584,7 @@
         data = masked_array(numeric.arange(15), mask=[1,0,0,0,0]*3, dtype=float_)
         series = time_series(data, dlist)
         #
-        keeper = N.array([0,1,1,1,1]*3, dtype=bool_)
+        keeper = numpy.array([0,1,1,1,1]*3, dtype=bool_)
         c_series = series.compressed()
         assert_equal(c_series._data, [1,2,3,4,6,7,8,9,11,12,13,14])
         assert_equal(c_series._mask, nomask)
@@ -593,7 +594,7 @@
                                 dates=dates)
         c_series = series_st.compressed()
         d = [1,2,3,6,7,8,11,12,13]
-        assert_equal(c_series._data, N.c_[(d,list(reversed(d)))])
+        assert_equal(c_series._data, numpy.c_[(d,list(reversed(d)))])
         assert_equal(c_series._mask, nomask)
         assert_equal(c_series._dates, dates[d])
 

Added: trunk/scipy/sandbox/timeseries/textras.py
===================================================================
--- trunk/scipy/sandbox/timeseries/textras.py	2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/textras.py	2007-09-06 01:13:29 UTC (rev 3305)
@@ -0,0 +1,106 @@
+"""
+Extras functions for time series.
+
+:author: Pierre GF Gerard-Marchant & Matt Knox
+:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__     = '$Date$'
+
+
+import numpy
+import maskedarray
+from maskedarray import masked
+
+import const as _c
+from tseries import TimeSeries
+
+
+
+__all__ = ['isleapyear', 'count_missing', 'accept_atmost_missing']
+
+#..............................................................................
+def isleapyear(year):
+    """Returns true if year is a leap year.
+    
+:Input:
+    year : integer / sequence
+        A given (list of) year(s).
+    """
+    year = numpy.asarray(year)
+    return numpy.logical_or(year % 400 == 0,
+                            numpy.logical_and(year % 4 == 0, year % 100 > 0))
+
+#..............................................................................    
+def count_missing(series):
+    """Returns the number of missing data per period.
+    
+    
+Notes
+----- 
+This function is designed to return the actual number of missing values when 
+a series has been converted from one frequency to a smaller frequency.
+    
+For example, converting a 12-month-long daily series to months will yield 
+a (12x31) array, with missing values in February, April, June... 
+count_missing will discard these extra missing values.
+    """
+    if not isinstance(series, TimeSeries):
+        raise TypeError, "The input data should be a valid TimeSeries object! "\
+                         "(got %s instead)" % type(series)
+    if series.ndim == 1:
+        return len(series) - series.count()
+    elif series.ndim != 2:
+        raise NotImplementedError
+    #
+    missing =  series.shape[-1] - series.count(axis=-1)
+    period = series.shape[-1]
+    freq = series.freq
+    if (period == 366) and (freq//_c.FR_ANN == 1):
+        # row: years, cols: days
+        missing -= ~isleapyear(series.year)
+    elif period == 31 and (freq//_c.FR_MTH == 1):
+        months = series.months
+        # row: months, cols: days
+        missing[numpy.array([m in [4,6,9,11] for m in months])] -= 1
+        isfeb = (months == 2)
+        missing[isfeb] -= 2
+        missing[isfeb & ~isleapyear(series.year)] -= 1
+    elif period not in (12,7):
+        raise NotImplementedError, "Not yet implemented for that frequency..."
+    return missing
+    
+#.............................................................................
+def accept_atmost_missing(series, max_missing, strict=False):
+    """Masks the rows of the series that contains more than max_missing missing data.
+    Returns a new masked series.
+    
+:Inputs:
+    series : TimeSeries
+        Input time series.
+    max_missing : float
+        Number of maximum acceptable missing values per row (if larger than 1),
+        or maximum acceptable percentage of missing values (if lower than 1).
+    strict : boolean *[False]*
+        Whether the
+    """
+    series = numpy.array(series, copy=True, subok=True)
+    if not isinstance(series, TimeSeries):
+        raise TypeError, "The input data should be a valid TimeSeries object! "\
+                         "(got %s instead)" % type(series)
+    # Find the number of missing values ....
+    missing = count_missing(series)
+    # Transform an acceptable percentage in a number
+    if max_missing < 1:
+        max_missing = numpy.round(max_missing * series.shape[-1],0)
+    #
+    series.unshare_mask()
+    if strict:
+        series[missing > max_missing] = masked
+    else:
+        series[missing >= max_missing] = masked
+    return series
+    
\ No newline at end of file


Property changes on: trunk/scipy/sandbox/timeseries/textras.py
___________________________________________________________________
Name: svn:keywords
   + Date 
Author 
Revision
Id

Modified: trunk/scipy/sandbox/timeseries/tmulti.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tmulti.py	2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tmulti.py	2007-09-06 01:13:29 UTC (rev 3305)
@@ -516,16 +516,9 @@
         self_data = [d, m, mrec, dlist, dates, ts, mts]
         
         assert(isinstance(mts.f0, TimeSeries))
-        
-    if 0:        
-        mts[:2] = 5
-        assert_equal(mts.f0._data, [5,5,2,3,4])
-        assert_equal(mts.f1._data, [5,5,2,1,0])
-        assert_equal(mts.f0._mask, [0,0,0,1,1])
-        assert_equal(mts.f1._mask, [0,0,0,0,1])
-        mts.harden_mask()
-        mts[-2:] = 5
-        assert_equal(mts.f0._data, [5,5,2,3,4])
-        assert_equal(mts.f1._data, [5,5,2,5,0])
-        assert_equal(mts.f0._mask, [0,0,0,1,1])
-        assert_equal(mts.f1._mask, [0,0,0,0,1]) 
\ No newline at end of file
+    #
+    if 1:
+        recfirst = mts._data[0]
+        print recfirst, type(recfirst)
+        print mrec[0], type(mrec[0])
+    

Modified: trunk/scipy/sandbox/timeseries/tseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tseries.py	2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tseries.py	2007-09-06 01:13:29 UTC (rev 3305)
@@ -18,7 +18,7 @@
 
 import numpy
 from numpy import ndarray
-from numpy.core import bool_, complex_, float_, int_, object_
+from numpy import bool_, complex_, float_, int_, object_
 from numpy.core.multiarray import dtype
 import numpy.core.fromnumeric as fromnumeric
 import numpy.core.numeric as numeric
@@ -222,7 +222,7 @@
         common_freq = unique_freqs.item()
     except ValueError:
         raise TimeSeriesError, \
-            "All series must have same frequency!"
+            "All series must have same frequency! (got %s instead)" % unique_freqs
     return common_freq
 
 ##### --------------------------------------------------------------------------
@@ -337,49 +337,63 @@
     options = None
     _defaultobserved = None
     _genattributes = ['fill_value', 'observed']
-    def __new__(cls, data, dates=None, mask=nomask,
-                freq=None, observed=None, start_date=None, length=None,
+    def __new__(cls, data, dates, mask=nomask,
+#                freq=None, 
+                observed=None, #start_date=None, length=None,
                 dtype=None, copy=False, fill_value=None, subok=True,
                 keep_mask=True, small_mask=True, hard_mask=False, **options):
         maparms = dict(copy=copy, dtype=dtype, fill_value=fill_value,subok=subok,
                        keep_mask=keep_mask, small_mask=small_mask,
                        hard_mask=hard_mask,)
         _data = MaskedArray(data, mask=mask, **maparms)
-        # Get the frequency ..........................
-        freq = check_freq(freq)
+#        # Get the frequency ..........................
+#        freq = check_freq(freq)
         # Get the dates ..............................
-        if dates is None:
-            newdates = getattr(data, '_dates', None)
-        else:
-            newdates = dates
-        if newdates is not None:
-            if not hasattr(newdates, 'freq'):
-                newdates = date_array(dlist=dates, freq=freq)
-            if freq != _c.FR_UND and newdates.freq != freq:
-                newdates = newdates.asfreq(freq)
-        else:
-            dshape = _data.shape
-            if len(dshape) > 0:
-                if length is None:
-                    length = dshape[0]
-                newdates = date_array(start_date=start_date, length=length,
-                                      freq=freq)
-            else:
-                newdates = date_array([], freq=freq)
+        if not isinstance(dates, (Date, DateArray)):
+            raise TypeError("The input dates should be a valid Date or DateArray object! "\
+                            "(got %s instead)" % type(dates))
+#            newdates = date_array(dates)
+#        elif isinstance(dates, (tuple, list, ndarray)):
+#            newdates = date_array(dlist=dates, freq=freq)
+#        if newdates is not None:
+#            if freq != _c.FR_UND and newdates.freq != freq:
+#                newdates = newdates.asfreq(freq)
+#        else:
+#            dshape = _data.shape
+#            if len(dshape) > 0:
+#                if length is None:
+#                    length = dshape[0]
+#                newdates = date_array(start_date=start_date, length=length,
+#                                      freq=freq)
+#            else:
+#                newdates = date_array([], freq=freq)
         # Get observed ...............................
         observed = getattr(data, 'observed', fmtObserv(observed))
         # Get the data ...............................
-        if newdates._unsorted is not None:
-            _data = _data[newdates._unsorted]
         if not subok or not isinstance(_data,TimeSeries):
             _data = _data.view(cls)
         if _data is masked:
             assert(numeric.size(newdates)==1)
             return _data.view(cls)
-        assert(_datadatescompat(_data,newdates))
-        _data._dates = newdates
-        if _data._dates.size == _data.size and _data.ndim > 1:
-            _data._dates.shape = _data.shape
+        assert(_datadatescompat(_data,dates))
+#        assert(_datadatescompat(_data,newdates))
+        #
+#        _data._dates = newdates
+        _data._dates = dates
+        if _data._dates.size == _data.size: 
+            if _data.ndim > 1:
+                current_shape = data.shape
+#                if newdates._unsorted is not None:
+                if dates._unsorted is not None:
+                    _data.shape = (-1,)
+#                    _data = _data[newdates._unsorted]
+                    _data = _data[dates._unsorted]
+                    _data.shape = current_shape
+                _data._dates.shape = current_shape
+            elif dates._unsorted is not None:
+                _data = _data[dates._unsorted]
+#            elif newdates._unsorted is not None:
+#                _data = _data[newdates._unsorted]
         _data.observed = observed
         return _data
     #............................................
@@ -919,17 +933,6 @@
 TimeSeries.tofile = tofile
 
 #............................................
-def tolist(self, fill_value=None):
-    """Copies the date and data portion of the time series to a hierarchical
-python list and returns that list. Data items are converted to the nearest
-compatible Python type. Dates are converted to standard Python datetime
-objects. Masked values are filled with `fill_value`"""
-    return [(d.datetime, v) for (d,v) in \
-                                zip(self.dates, self._series.tolist())]
-TimeSeries.tolist = tolist
-
-#............................................
-
 def asrecords(series):
     """Returns the masked time series as a recarray.
 Fields are `_dates`, `_data` and _`mask`.
@@ -990,27 +993,45 @@
     `data` :
         Array of data.
     """
-    data = numeric.array(data, copy=False, subok=True)
+    maparms = dict(copy=copy, dtype=dtype, fill_value=fill_value, subok=True,
+                   keep_mask=keep_mask, small_mask=small_mask,
+                   hard_mask=hard_mask,)
+    data = masked_array(data, mask=mask, **maparms)
+    #   data = data.view(MaskedArray)
+    freq = check_freq(freq)
+    #
     if dates is None:
+        _dates = getattr(data, '_dates', None)        
+    elif isinstance(dates, (Date, DateArray)):
+        _dates = date_array(dates)
+    elif isinstance(dates, (tuple, list, ndarray)):
+        _dates = date_array(dlist=dates, freq=freq)
+    else:
+        _dates = date_array([], freq=freq)
+    #
+    if _dates is not None:
+        # Make sure _dates has the proper freqncy
+        if (freq != _c.FR_UND) and (_dates.freq != freq):
+            _dates = _dates.asfreq(freq)
+    else:
         dshape = data.shape
         if len(dshape) > 0:
             if length is None:
                 length = dshape[0]
         if len(dshape) > 0:
-            dates = date_array(start_date=start_date, end_date=end_date,
+            _dates = date_array(start_date=start_date, end_date=end_date,
                                length=length, freq=freq)
         else:
-            dates = date_array([], freq=freq)
-    elif not isinstance(dates, DateArray):
-        dates = date_array(dlist=dates, freq=freq)
-    if dates._unsorted is not None:
-        idx = dates._unsorted
+            _dates = date_array([], freq=freq)
+    #
+    if _dates._unsorted is not None:
+        idx = _dates._unsorted
         data = data[idx]
-        if mask is not nomask:
-            mask = mask[idx]
-        dates._unsorted = None
-    return TimeSeries(data=data, dates=dates, mask=mask, 
-                      observed=observed, copy=copy, dtype=dtype, 
+        _dates._unsorted = None
+    return TimeSeries(data=data, dates=_dates, mask=data._mask,
+#                      freq=freq, 
+                      observed=observed,
+                      copy=copy, dtype=dtype, 
                       fill_value=fill_value, keep_mask=keep_mask, 
                       small_mask=small_mask, hard_mask=hard_mask,)
 
@@ -1597,7 +1618,7 @@
 ################################################################################
 if __name__ == '__main__':
     from maskedarray.testutils import assert_equal, assert_array_equal
-    if 1:
+    if 0:
         dlist = ['2007-01-%02i' % i for i in range(1,16)]
         dates = date_array_fromlist(dlist)
         data = masked_array(numeric.arange(15), mask=[1,0,0,0,0]*3)
@@ -1611,7 +1632,7 @@
         assert_equal(a[-5:], series[:5])
         assert_equal(b[:5], series[-5:])
     #
-    if 1:
+    if 0:
         data = numpy.arange(5*24).reshape(5,24)
         datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
         dates = date_array_fromlist(datelist, 'D')
@@ -1632,4 +1653,49 @@
         assert_equal(fseries._mask, [0,0,0,1,0,])
         #
         fseries = fill_missing_dates(data, date_array_fromlist(datelist,'D'))
+    #
+    if 0:
+        "Make sure we're not losing the fill_value"
+        dlist = ['2007-01-%02i' % i for i in range(1,16)]
+        dates = date_array_fromlist(dlist)
+        series = time_series(MA.zeros(dates.shape), dates=dates, fill_value=-9999)
+        assert_equal(series.fill_value, -9999)
+    if 0:
+        "Check time_series w/ an existing time series"
+        dlist = ['2007-01-%02i' % i for i in range(1,16)]
+        dates = date_array_fromlist(dlist)
+        series = time_series(MA.zeros(dates.shape), dates=dates, fill_value=-9999)
+        newseries = time_series(series, fill_value=+9999)
+        assert_equal(newseries._data, series._data)
+        assert_equal(newseries._mask, series._mask)
+        assert_equal(newseries.fill_value, +9999)
         
+    if 0:
+        data = numpy.arange(5*24).reshape(5,24)
+        datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
+        dates = date_array_fromlist(datelist, 'D')
+#        dseries = time_series(data, dates)
+        ndates = date_array_fromrange(start_date=dates[0],end_date=dates[-2])
+        #
+        (A,B) = (data.ravel()[:4].reshape(2,2), dates[:-1])
+        series = time_series(A,B)
+        fseries = fill_missing_dates(series)
+        assert_equal(fseries.shape, (5,))
+        assert_equal(fseries._mask, [0,0,0,1,0,])
+    #
+    if 1:        
+        dlist = ['2007-01-%02i' % i for i in (3,2,1)]
+        data = [10,20,30]
+#        series = time_series(data, dlist, mask=[1,0,0])
+#        data = masked_array([10,20,30],mask=[1,0,0])
+#        series = time_series(data, dlist)
+        series = time_series(data, dlist, mask=[1,0,0])
+        assert_equal(series._mask,[0,0,1])
+    if 1:
+        dlist = ['2007-01-%02i' % i for i in range(1,16)]
+        dates = date_array_fromlist(dlist)
+        data = masked_array(numeric.arange(15), mask=[1,0,0,0,0]*3)
+        series = time_series(data, dlist)
+        
+        empty_series = time_series([], freq='d')
+        a, b = align_series(series, empty_series)




More information about the Scipy-svn mailing list