[Scipy-svn] r3305 - in trunk/scipy/sandbox/timeseries: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Sep 5 21:13:35 EDT 2007
Author: pierregm
Date: 2007-09-05 20:13:29 -0500 (Wed, 05 Sep 2007)
New Revision: 3305
Added:
trunk/scipy/sandbox/timeseries/tests/test_extras.py
trunk/scipy/sandbox/timeseries/textras.py
Modified:
trunk/scipy/sandbox/timeseries/tdates.py
trunk/scipy/sandbox/timeseries/tests/test_dates.py
trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py
trunk/scipy/sandbox/timeseries/tests/test_timeseries.py
trunk/scipy/sandbox/timeseries/tmulti.py
trunk/scipy/sandbox/timeseries/tseries.py
Log:
timeseries:
tseries : * simplified TimeSeries.__new__ to only accept DateArrays as dates. To create a new TimeSeries object, use time_series
extras : introducing isleapyear, count_missing, accept_atmost_missing
Modified: trunk/scipy/sandbox/timeseries/tdates.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tdates.py 2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tdates.py 2007-09-06 01:13:29 UTC (rev 3305)
@@ -214,6 +214,7 @@
def __getitem__(self, indx):
reset_full = True
+ # Determine what kind of index is used
if isinstance(indx, Date):
indx = self.find_dates(indx)
reset_full = False
@@ -222,7 +223,12 @@
indx = self.find_dates(indx)
except AttributeError:
pass
+ # Select the data
r = ndarray.__getitem__(self, indx)
+ # Select the corresponding unsorted indices (if needed)
+ if self._unsorted is not None:
+ unsorted = self._unsorted[indx]
+ # Case 1. A simple integer
if isinstance(r, (generic, int)):
return Date(self.freq, value=r)
elif hasattr(r, 'size') and r.size == 1:
@@ -679,41 +685,42 @@
if __name__ == '__main__':
import maskedarray.testutils
from maskedarray.testutils import assert_equal
- if 0:
- dlist = ['2007-%02i' % i for i in range(1,5)+range(7,13)]
- mdates = date_array_fromlist(dlist, 'M')
- # Using an integer
- assert_equal(mdates[0].value, 24073)
- assert_equal(mdates[-1].value, 24084)
- # Using a date
- lag = mdates.find_dates(mdates[0])
- print mdates[lag]
- assert_equal(mdates[lag], mdates[0])
- if 0:
- hodie = today('D')
- D = DateArray(today('D'))
- assert_equal(D.freq, 6000)
- if 0:
- freqs = [x[0] for x in corelib.freq_dict.values() if x[0] != 'U']
- print freqs
- for f in freqs:
- print f
- today = thisday(f)
- assert(Date(freq=f, value=today.value) == today)
- if 0:
- D = date_array(freq='U', start_date=Date('U',1), length=10)
- if 0:
- dlist = ['2007-01-%02i' % i for i in (1,2,4,5,7,8,10,11,13)]
- ords = numpy.fromiter((DateTimeFromString(s).toordinal() for s in dlist),
- float_)
- if 0:
- "Tests the automatic sorting of dates."
- D = date_array_fromlist(dlist=['2006-01','2005-01','2004-01'],freq='M')
- assert_equal(D.view(ndarray), [24037, 24049, 24061])
+# if 0:
+# dlist = ['2007-%02i' % i for i in range(1,5)+range(7,13)]
+# mdates = date_array_fromlist(dlist, 'M')
+# # Using an integer
+# assert_equal(mdates[0].value, 24073)
+# assert_equal(mdates[-1].value, 24084)
+# # Using a date
+# lag = mdates.find_dates(mdates[0])
+# print mdates[lag]
+# assert_equal(mdates[lag], mdates[0])
+# if 0:
+# hodie = today('D')
+# D = DateArray(today('D'))
+# assert_equal(D.freq, 6000)
+# if 0:
+# freqs = [x[0] for x in corelib.freq_dict.values() if x[0] != 'U']
+# print freqs
+# for f in freqs:
+# print f
+# today = thisday(f)
+# assert(Date(freq=f, value=today.value) == today)
+# if 0:
+# D = date_array(freq='U', start_date=Date('U',1), length=10)
+# if 0:
+# dlist = ['2007-01-%02i' % i for i in (1,2,4,5,7,8,10,11,13)]
+# ords = numpy.fromiter((DateTimeFromString(s).toordinal() for s in dlist),
+# float_)
+# if 0:
+# "Tests the automatic sorting of dates."
+# D = date_array_fromlist(dlist=['2006-01','2005-01','2004-01'],freq='M')
+# assert_equal(D.view(ndarray), [24037, 24049, 24061])
if 1:
dlist = ['2007-%02i' % i for i in range(1,5)+range(7,13)]
mdates = date_array_fromlist(dlist, 'M')
- print mdates.tostr()
-
\ No newline at end of file
+ if 2:
+ dlist = ['2007-01','2007-03','2007-04','2007-02']
+ mdates = date_array_fromlist(dlist, 'M')
Modified: trunk/scipy/sandbox/timeseries/tests/test_dates.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_dates.py 2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tests/test_dates.py 2007-09-06 01:13:29 UTC (rev 3305)
@@ -28,7 +28,8 @@
import timeseries as ts
from timeseries import const as C
from timeseries.parser import DateFromString, DateTimeFromString
-from timeseries import *
+from timeseries import Date, DateArray,\
+ thisday, today, date_array, date_array_fromlist
from timeseries.cseries import freq_dict
Added: trunk/scipy/sandbox/timeseries/tests/test_extras.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_extras.py 2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tests/test_extras.py 2007-09-06 01:13:29 UTC (rev 3305)
@@ -0,0 +1,84 @@
+# pylint: disable-msg=W0611, W0612, W0511,R0201
+"""Tests suite for MaskedArray.
+Adapted from the original test_ma by Pierre Gerard-Marchant
+
+:author: Pierre Gerard-Marchant & Matt Knox
+:contact: pierregm_at_uga_dot_edu & mattknox_ca_at_hotmail_dot_com
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__ = '$Date$'
+
+
+import numpy
+from numpy.testing import NumpyTest, NumpyTestCase
+import maskedarray
+from maskedarray import masked
+from maskedarray.testutils import assert_equal, assert_almost_equal
+
+from timeseries import time_series, Date
+from timeseries import extras
+from timeseries.extras import *
+
+#..............................................................................
+class test_misc(NumpyTestCase):
+ "Base test class for MaskedArrays."
+ def __init__(self, *args, **kwds):
+ NumpyTestCase.__init__(self, *args, **kwds)
+ #
+ def test_leapyear(self):
+ leap = isleapyear([1900,1901,1902,1903,1904,2000,2001,2002,2003,2004])
+ assert_equal(leap, [0,0,0,0,1,1,0,0,0,1])
+
+#..............................................................................
+class test_countmissing(NumpyTestCase):
+ #
+ def __init__(self, *args, **kwds):
+ NumpyTestCase.__init__(self, *args, **kwds)
+ data = time_series(numpy.arange(731),
+ start_date=Date(string='2003-01-01', freq='D'),
+ freq='D')
+ self.data = data
+
+ def test_count_missing(self):
+ data = self.data
+ assert_equal(count_missing(data), 0)
+ assert_equal(count_missing(data.convert('A')), (0,0))
+ assert_equal(count_missing(data.convert('M')), [0]*24)
+ #
+ series = data.copy()
+ series[numpy.logical_not(data.day % 10)] = masked
+ assert_equal(count_missing(series), 70)
+ assert_equal(count_missing(series.convert('A')), (35,35))
+ assert_equal(count_missing(series.convert('M')),
+ [3,2,3,3,3,3,3,3,3,3,3,3]*2)
+ #
+ series[series.day == 31] = masked
+ assert_equal(count_missing(series), 84)
+ assert_equal(count_missing(series.convert('A')), (42,42))
+ assert_equal(count_missing(series.convert('M')),
+ [4,2,4,3,4,3,4,4,3,4,3,4]*2)
+ #
+ def test_accept_atmost_missing(self):
+ series = self.data.copy()
+ series[numpy.logical_not(self.data.day % 10)] = masked
+ result = accept_atmost_missing(series.convert('M'),3,True)
+ assert_equal(result._mask.all(-1), [0]*24)
+ result = accept_atmost_missing(series.convert('M'),3,False)
+ assert_equal(result._mask.all(-1), [1,0,1,1,1,1,1,1,1,1,1,1]*2)
+ result = accept_atmost_missing(series.convert('M'),0.1,True)
+ assert_equal(result._mask.all(-1), [0]*24)
+ result = accept_atmost_missing(series.convert('A'),35,True)
+ assert_equal(result._mask.all(-1), [0,0])
+ result = accept_atmost_missing(series.convert('A'),35,False)
+ assert_equal(result._mask.all(-1), [1,1])
+ result = accept_atmost_missing(series.convert('A'),0.05,True)
+ assert_equal(result._mask.all(-1), [1,1])
+
+
+###############################################################################
+#------------------------------------------------------------------------------
+if __name__ == "__main__":
+ NumpyTest().run()
\ No newline at end of file
Property changes on: trunk/scipy/sandbox/timeseries/tests/test_extras.py
___________________________________________________________________
Name: svn:keywords
+ Date
Author
Revision
Id
Modified: trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py 2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py 2007-09-06 01:13:29 UTC (rev 3305)
@@ -12,7 +12,7 @@
import types
-import numpy as N
+import numpy
import numpy.core.fromnumeric as fromnumeric
from numpy.testing import NumpyTest, NumpyTestCase
from numpy.testing.utils import build_err_msg
@@ -41,10 +41,10 @@
def setup(self):
"Generic setup"
- d = N.arange(5)
+ d = numpy.arange(5)
m = MA.make_mask([1,0,0,1,1])
- base_d = N.r_[d,d[::-1]].reshape(2,-1).T
- base_m = N.r_[[m, m[::-1]]].T
+ base_d = numpy.r_[d,d[::-1]].reshape(2,-1).T
+ base_m = numpy.r_[[m, m[::-1]]].T
base = MA.array(base_d, mask=base_m)
mrec = MR.fromarrays(base.T,)
dlist = ['2007-%02i' % (i+1) for i in d]
@@ -62,9 +62,10 @@
assert_equal(mts['f0']._mask, m)
#
assert(isinstance(mts[0], MultiTimeSeries))
- assert_equal(mts._data[0], mrec[0])
+ assert_equal(mts._data[0], mrec._data[0])
# We can't use assert_equal here, as it tries to convert the tuple into a singleton
- assert(mts[0]._data.view(N.ndarray) == mrec[0])
+# assert(mts[0]._data.view(numpyndarray) == mrec[0])
+ assert_equal(numpy.asarray(mts._data[0]), mrec[0])
assert_equal(mts._dates[0], dates[0])
assert_equal(mts[0]._dates, dates[0])
#
@@ -75,8 +76,8 @@
assert(isinstance(mts.f0, TimeSeries))
assert_equal(mts.f0, time_series(d, dates=dates, mask=m))
assert_equal(mts.f1, time_series(d[::-1], dates=dates, mask=m[::-1]))
- assert((mts._fieldmask == N.core.records.fromarrays([m, m[::-1]])).all())
- assert_equal(mts._mask, N.r_[[m,m[::-1]]].all(0))
+ assert((mts._fieldmask == numpy.core.records.fromarrays([m, m[::-1]])).all())
+ assert_equal(mts._mask, numpy.r_[[m,m[::-1]]].all(0))
assert_equal(mts.f0[1], mts[1].f0)
#
assert(isinstance(mts[:2], MultiTimeSeries))
@@ -124,7 +125,7 @@
mts.harden_mask()
assert(mts._hardmask)
mts._mask = nomask
- assert_equal(mts._mask, N.r_[[m,m[::-1]]].all(0))
+ assert_equal(mts._mask, numpy.r_[[m,m[::-1]]].all(0))
mts.soften_mask()
assert(not mts._hardmask)
mts._mask = nomask
@@ -141,7 +142,7 @@
def test_fromrecords(self):
"Test from recarray."
[d, m, mrec, dlist, dates, ts, mts] = self.data
- nrec = N.core.records.fromarrays(N.r_[[d,d[::-1]]])
+ nrec = numpy.core.records.fromarrays(numpy.r_[[d,d[::-1]]])
mrecfr = fromrecords(nrec.tolist(), dates=dates)
assert_equal(mrecfr.f0, mrec.f0)
assert_equal(mrecfr.dtype, mrec.dtype)
Modified: trunk/scipy/sandbox/timeseries/tests/test_timeseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_timeseries.py 2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tests/test_timeseries.py 2007-09-06 01:13:29 UTC (rev 3305)
@@ -11,7 +11,7 @@
__revision__ = "$Revision$"
__date__ = '$Date$'
-import numpy as N
+import numpy
from numpy import bool_, complex_, float_, int_, object_
import numpy.core.fromnumeric as fromnumeric
import numpy.core.numeric as numeric
@@ -105,14 +105,15 @@
series = time_series(data,dlist)
assert_equal(series._data,[30,20,10])
#
- series = TimeSeries(data, dlist)
+ dates = date_array_fromlist(dlist, freq='D')
+ series = TimeSeries(data, dates)
assert_equal(series._data,[30,20,10])
#
- series = TimeSeries(data, dlist, mask=[1,0,0])
+ series = time_series(data, dlist, mask=[1,0,0])
assert_equal(series._mask,[0,0,1])
#
data = masked_array([10,20,30],mask=[1,0,0])
- series = TimeSeries(data, dlist)
+ series = time_series(data, dlist)
assert_equal(series._mask,[0,0,1])
#...............................................................................
@@ -265,7 +266,7 @@
# With set
series[:5] = 0
assert_equal(series[:5]._series, [0,0,0,0,0])
- dseries = N.log(series)
+ dseries = numpy.log(series)
series[-5:] = dseries[-5:]
assert_equal(series[-5:], dseries[-5:])
# Now, using dates !
@@ -275,7 +276,7 @@
def test_on2d(self):
"Tests getitem on a 2D series"
(a,b,d) = ([1,2,3],[3,2,1], date_array(thisday('M'),length=3))
- ser_x = time_series(N.column_stack((a,b)), dates=d)
+ ser_x = time_series(numpy.column_stack((a,b)), dates=d)
assert_equal(ser_x[0,0], time_series(a[0],d[0]))
assert_equal(ser_x[0,:], time_series([(a[0],b[0])], d[0]))
assert_equal(ser_x[:,0], time_series(a, d))
@@ -285,20 +286,20 @@
"Tests getitem on a nD series"
hodie = thisday('D')
# Case 1D
- series = time_series(N.arange(5), mask=[1,0,0,0,0], start_date=hodie)
+ series = time_series(numpy.arange(5), mask=[1,0,0,0,0], start_date=hodie)
assert_equal(series[0], 0)
# Case 1D + mask
- series = time_series(N.arange(5), mask=[1,0,0,0,0], start_date=hodie)
+ series = time_series(numpy.arange(5), mask=[1,0,0,0,0], start_date=hodie)
assert series[0] is tsmasked
# Case 2D
- series = time_series(N.arange(10).reshape(5,2), start_date=hodie)
+ series = time_series(numpy.arange(10).reshape(5,2), start_date=hodie)
assert_equal(len(series), 5)
assert_equal(series[0], [[0,1]])
assert_equal(series[0]._dates[0], (hodie))
assert_equal(series[:,0], [0,2,4,6,8])
assert_equal(series[:,0]._dates, series._dates)
# Case 2D + mask
- series = time_series(N.arange(10).reshape(5,2), start_date=hodie,
+ series = time_series(numpy.arange(10).reshape(5,2), start_date=hodie,
mask=[[1,1],[0,0],[0,0],[0,0],[0,0]])
assert_equal(len(series), 5)
assert_equal(series[0], [[0,1]])
@@ -308,7 +309,7 @@
assert_equal(series[:,0]._mask, [1,0,0,0,0])
assert_equal(series[:,0]._dates, series._dates)
# Case 3D
- series = time_series(N.arange(30).reshape(5,3,2), start_date=hodie)
+ series = time_series(numpy.arange(30).reshape(5,3,2), start_date=hodie)
x = series[0]
assert_equal(len(series), 5)
assert_equal(series[0], [[[0,1],[2,3],[4,5]]])
@@ -337,7 +338,7 @@
assert_equal(dseries, series[3:-2])
dseries = adjust_endpoints(series, end_date=Date('D', string='2007-01-31'))
assert_equal(dseries.size, 31)
- assert_equal(dseries._mask, N.r_[series._mask, [1]*16])
+ assert_equal(dseries._mask, numpy.r_[series._mask, [1]*16])
dseries = adjust_endpoints(series, end_date=Date('D', string='2007-01-06'))
assert_equal(dseries.size, 6)
assert_equal(dseries, series[:6])
@@ -345,7 +346,7 @@
start_date=Date('D', string='2007-01-06'),
end_date=Date('D', string='2007-01-31'))
assert_equal(dseries.size, 26)
- assert_equal(dseries._mask, N.r_[series._mask[5:], [1]*16])
+ assert_equal(dseries._mask, numpy.r_[series._mask[5:], [1]*16])
#
def test_alignseries(self):
"Tests align_series & align_with"
@@ -382,7 +383,7 @@
#
def test_split(self):
"""Test the split function."""
- ms = time_series(N.arange(62).reshape(31,2),
+ ms = time_series(numpy.arange(62).reshape(31,2),
start_date=Date(freq='d', year=2005, month=7, day=1))
d1,d2 = split(ms)
assert_array_equal(d1.data, ms.data[:,0])
@@ -400,11 +401,11 @@
date conversion algorithms already tested by asfreq in the
test_dates test suite.
"""
- lowFreqSeries = time_series(N.arange(10),
+ lowFreqSeries = time_series(numpy.arange(10),
start_date=Date(freq='m', year=2005, month=6))
- highFreqSeries = time_series(N.arange(100),
+ highFreqSeries = time_series(numpy.arange(100),
start_date=Date(freq='b', year=2005, month=6, day=1))
- ndseries = time_series(N.arange(124).reshape(62,2),
+ ndseries = time_series(numpy.arange(124).reshape(62,2),
start_date=Date(freq='d', year=2005, month=7, day=1))
lowToHigh_start = lowFreqSeries.convert('B', position='START')
@@ -456,7 +457,7 @@
assert(not filled_ser.has_duplicated_dates())
assert_equal(filled_ser.size, _end - _start + 1)
#
- data = N.arange(5*24).reshape(5,24)
+ data = numpy.arange(5*24).reshape(5,24)
datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
dates = date_array_fromlist(datelist, 'D')
dseries = time_series(data, dates)
@@ -482,7 +483,7 @@
(start, end) = ('2007-01-06', '2007-01-12')
mask = mask_period(series, start, end, inside=True, include_edges=True,
inplace=False)
- assert_equal(mask._mask, N.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0]))
+ assert_equal(mask._mask, numpy.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0]))
mask = mask_period(series, start, end, inside=True, include_edges=False,
inplace=False)
assert_equal(mask._mask, [0,0,0,0,0,0,1,1,1,1,1,0,0,0,0])
@@ -497,7 +498,7 @@
series = time_series(data, dates=dates)
mask = mask_period(series, start, end, inside=True, include_edges=True,
inplace=False)
- result = N.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0])
+ result = numpy.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0])
assert_equal(mask._mask, result.repeat(2).reshape(-1,2))
#
def test_pickling(self):
@@ -509,14 +510,14 @@
assert_equal(series_pickled._data, series._data)
assert_equal(series_pickled._mask, series._mask)
#
- data = masked_array(N.matrix(range(10)).T, mask=[1,0,0,0,0]*2)
+ data = masked_array(numpy.matrix(range(10)).T, mask=[1,0,0,0,0]*2)
dates = date_array(start_date=thisday('D'), length=10)
series = time_series(data,dates=dates)
series_pickled = cPickle.loads(series.dumps())
assert_equal(series_pickled._dates, series._dates)
assert_equal(series_pickled._data, series._data)
assert_equal(series_pickled._mask, series._mask)
- assert(isinstance(series_pickled._data, N.matrix))
+ assert(isinstance(series_pickled._data, numpy.matrix))
def test_empty_timeseries(self):
@@ -529,25 +530,25 @@
def test__timeseriescompat_multiple(self):
"Tests the compatibility of multiple time series."
- seriesM_10 = time_series(N.arange(10),
+ seriesM_10 = time_series(numpy.arange(10),
date_array(
start_date=Date(freq='m', year=2005, month=1),
length=10)
)
- seriesD_10 = time_series(N.arange(10),
+ seriesD_10 = time_series(numpy.arange(10),
date_array(
start_date=Date(freq='d', year=2005, month=1, day=1),
length=10)
)
- seriesD_5 = time_series(N.arange(5),
+ seriesD_5 = time_series(numpy.arange(5),
date_array(
start_date=Date(freq='d', year=2005, month=1, day=1),
length=5)
)
- seriesD_5_apr = time_series(N.arange(5),
+ seriesD_5_apr = time_series(numpy.arange(5),
date_array(
start_date=Date(freq='d', year=2005, month=4, day=1),
length=5)
@@ -583,7 +584,7 @@
data = masked_array(numeric.arange(15), mask=[1,0,0,0,0]*3, dtype=float_)
series = time_series(data, dlist)
#
- keeper = N.array([0,1,1,1,1]*3, dtype=bool_)
+ keeper = numpy.array([0,1,1,1,1]*3, dtype=bool_)
c_series = series.compressed()
assert_equal(c_series._data, [1,2,3,4,6,7,8,9,11,12,13,14])
assert_equal(c_series._mask, nomask)
@@ -593,7 +594,7 @@
dates=dates)
c_series = series_st.compressed()
d = [1,2,3,6,7,8,11,12,13]
- assert_equal(c_series._data, N.c_[(d,list(reversed(d)))])
+ assert_equal(c_series._data, numpy.c_[(d,list(reversed(d)))])
assert_equal(c_series._mask, nomask)
assert_equal(c_series._dates, dates[d])
Added: trunk/scipy/sandbox/timeseries/textras.py
===================================================================
--- trunk/scipy/sandbox/timeseries/textras.py 2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/textras.py 2007-09-06 01:13:29 UTC (rev 3305)
@@ -0,0 +1,106 @@
+"""
+Extras functions for time series.
+
+:author: Pierre GF Gerard-Marchant & Matt Knox
+:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__ = '$Date$'
+
+
+import numpy
+import maskedarray
+from maskedarray import masked
+
+import const as _c
+from tseries import TimeSeries
+
+
+
+__all__ = ['isleapyear', 'count_missing', 'accept_atmost_missing']
+
+#..............................................................................
+def isleapyear(year):
+ """Returns true if year is a leap year.
+
+:Input:
+ year : integer / sequence
+ A given (list of) year(s).
+ """
+ year = numpy.asarray(year)
+ return numpy.logical_or(year % 400 == 0,
+ numpy.logical_and(year % 4 == 0, year % 100 > 0))
+
+#..............................................................................
+def count_missing(series):
+ """Returns the number of missing data per period.
+
+
+Notes
+-----
+This function is designed to return the actual number of missing values when
+a series has been converted from one frequency to a smaller frequency.
+
+For example, converting a 12-month-long daily series to months will yield
+a (12x31) array, with missing values in February, April, June...
+count_missing will discard these extra missing values.
+ """
+ if not isinstance(series, TimeSeries):
+ raise TypeError, "The input data should be a valid TimeSeries object! "\
+ "(got %s instead)" % type(series)
+ if series.ndim == 1:
+ return len(series) - series.count()
+ elif series.ndim != 2:
+ raise NotImplementedError
+ #
+ missing = series.shape[-1] - series.count(axis=-1)
+ period = series.shape[-1]
+ freq = series.freq
+ if (period == 366) and (freq//_c.FR_ANN == 1):
+ # row: years, cols: days
+ missing -= ~isleapyear(series.year)
+ elif period == 31 and (freq//_c.FR_MTH == 1):
+ months = series.months
+ # row: months, cols: days
+ missing[numpy.array([m in [4,6,9,11] for m in months])] -= 1
+ isfeb = (months == 2)
+ missing[isfeb] -= 2
+ missing[isfeb & ~isleapyear(series.year)] -= 1
+ elif period not in (12,7):
+ raise NotImplementedError, "Not yet implemented for that frequency..."
+ return missing
+
+#.............................................................................
+def accept_atmost_missing(series, max_missing, strict=False):
+ """Masks the rows of the series that contains more than max_missing missing data.
+ Returns a new masked series.
+
+:Inputs:
+ series : TimeSeries
+ Input time series.
+ max_missing : float
+ Number of maximum acceptable missing values per row (if larger than 1),
+ or maximum acceptable percentage of missing values (if lower than 1).
+ strict : boolean *[False]*
+ Whether the
+ """
+ series = numpy.array(series, copy=True, subok=True)
+ if not isinstance(series, TimeSeries):
+ raise TypeError, "The input data should be a valid TimeSeries object! "\
+ "(got %s instead)" % type(series)
+ # Find the number of missing values ....
+ missing = count_missing(series)
+ # Transform an acceptable percentage in a number
+ if max_missing < 1:
+ max_missing = numpy.round(max_missing * series.shape[-1],0)
+ #
+ series.unshare_mask()
+ if strict:
+ series[missing > max_missing] = masked
+ else:
+ series[missing >= max_missing] = masked
+ return series
+
\ No newline at end of file
Property changes on: trunk/scipy/sandbox/timeseries/textras.py
___________________________________________________________________
Name: svn:keywords
+ Date
Author
Revision
Id
Modified: trunk/scipy/sandbox/timeseries/tmulti.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tmulti.py 2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tmulti.py 2007-09-06 01:13:29 UTC (rev 3305)
@@ -516,16 +516,9 @@
self_data = [d, m, mrec, dlist, dates, ts, mts]
assert(isinstance(mts.f0, TimeSeries))
-
- if 0:
- mts[:2] = 5
- assert_equal(mts.f0._data, [5,5,2,3,4])
- assert_equal(mts.f1._data, [5,5,2,1,0])
- assert_equal(mts.f0._mask, [0,0,0,1,1])
- assert_equal(mts.f1._mask, [0,0,0,0,1])
- mts.harden_mask()
- mts[-2:] = 5
- assert_equal(mts.f0._data, [5,5,2,3,4])
- assert_equal(mts.f1._data, [5,5,2,5,0])
- assert_equal(mts.f0._mask, [0,0,0,1,1])
- assert_equal(mts.f1._mask, [0,0,0,0,1])
\ No newline at end of file
+ #
+ if 1:
+ recfirst = mts._data[0]
+ print recfirst, type(recfirst)
+ print mrec[0], type(mrec[0])
+
Modified: trunk/scipy/sandbox/timeseries/tseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tseries.py 2007-09-06 00:46:24 UTC (rev 3304)
+++ trunk/scipy/sandbox/timeseries/tseries.py 2007-09-06 01:13:29 UTC (rev 3305)
@@ -18,7 +18,7 @@
import numpy
from numpy import ndarray
-from numpy.core import bool_, complex_, float_, int_, object_
+from numpy import bool_, complex_, float_, int_, object_
from numpy.core.multiarray import dtype
import numpy.core.fromnumeric as fromnumeric
import numpy.core.numeric as numeric
@@ -222,7 +222,7 @@
common_freq = unique_freqs.item()
except ValueError:
raise TimeSeriesError, \
- "All series must have same frequency!"
+ "All series must have same frequency! (got %s instead)" % unique_freqs
return common_freq
##### --------------------------------------------------------------------------
@@ -337,49 +337,63 @@
options = None
_defaultobserved = None
_genattributes = ['fill_value', 'observed']
- def __new__(cls, data, dates=None, mask=nomask,
- freq=None, observed=None, start_date=None, length=None,
+ def __new__(cls, data, dates, mask=nomask,
+# freq=None,
+ observed=None, #start_date=None, length=None,
dtype=None, copy=False, fill_value=None, subok=True,
keep_mask=True, small_mask=True, hard_mask=False, **options):
maparms = dict(copy=copy, dtype=dtype, fill_value=fill_value,subok=subok,
keep_mask=keep_mask, small_mask=small_mask,
hard_mask=hard_mask,)
_data = MaskedArray(data, mask=mask, **maparms)
- # Get the frequency ..........................
- freq = check_freq(freq)
+# # Get the frequency ..........................
+# freq = check_freq(freq)
# Get the dates ..............................
- if dates is None:
- newdates = getattr(data, '_dates', None)
- else:
- newdates = dates
- if newdates is not None:
- if not hasattr(newdates, 'freq'):
- newdates = date_array(dlist=dates, freq=freq)
- if freq != _c.FR_UND and newdates.freq != freq:
- newdates = newdates.asfreq(freq)
- else:
- dshape = _data.shape
- if len(dshape) > 0:
- if length is None:
- length = dshape[0]
- newdates = date_array(start_date=start_date, length=length,
- freq=freq)
- else:
- newdates = date_array([], freq=freq)
+ if not isinstance(dates, (Date, DateArray)):
+ raise TypeError("The input dates should be a valid Date or DateArray object! "\
+ "(got %s instead)" % type(dates))
+# newdates = date_array(dates)
+# elif isinstance(dates, (tuple, list, ndarray)):
+# newdates = date_array(dlist=dates, freq=freq)
+# if newdates is not None:
+# if freq != _c.FR_UND and newdates.freq != freq:
+# newdates = newdates.asfreq(freq)
+# else:
+# dshape = _data.shape
+# if len(dshape) > 0:
+# if length is None:
+# length = dshape[0]
+# newdates = date_array(start_date=start_date, length=length,
+# freq=freq)
+# else:
+# newdates = date_array([], freq=freq)
# Get observed ...............................
observed = getattr(data, 'observed', fmtObserv(observed))
# Get the data ...............................
- if newdates._unsorted is not None:
- _data = _data[newdates._unsorted]
if not subok or not isinstance(_data,TimeSeries):
_data = _data.view(cls)
if _data is masked:
assert(numeric.size(newdates)==1)
return _data.view(cls)
- assert(_datadatescompat(_data,newdates))
- _data._dates = newdates
- if _data._dates.size == _data.size and _data.ndim > 1:
- _data._dates.shape = _data.shape
+ assert(_datadatescompat(_data,dates))
+# assert(_datadatescompat(_data,newdates))
+ #
+# _data._dates = newdates
+ _data._dates = dates
+ if _data._dates.size == _data.size:
+ if _data.ndim > 1:
+ current_shape = data.shape
+# if newdates._unsorted is not None:
+ if dates._unsorted is not None:
+ _data.shape = (-1,)
+# _data = _data[newdates._unsorted]
+ _data = _data[dates._unsorted]
+ _data.shape = current_shape
+ _data._dates.shape = current_shape
+ elif dates._unsorted is not None:
+ _data = _data[dates._unsorted]
+# elif newdates._unsorted is not None:
+# _data = _data[newdates._unsorted]
_data.observed = observed
return _data
#............................................
@@ -919,17 +933,6 @@
TimeSeries.tofile = tofile
#............................................
-def tolist(self, fill_value=None):
- """Copies the date and data portion of the time series to a hierarchical
-python list and returns that list. Data items are converted to the nearest
-compatible Python type. Dates are converted to standard Python datetime
-objects. Masked values are filled with `fill_value`"""
- return [(d.datetime, v) for (d,v) in \
- zip(self.dates, self._series.tolist())]
-TimeSeries.tolist = tolist
-
-#............................................
-
def asrecords(series):
"""Returns the masked time series as a recarray.
Fields are `_dates`, `_data` and _`mask`.
@@ -990,27 +993,45 @@
`data` :
Array of data.
"""
- data = numeric.array(data, copy=False, subok=True)
+ maparms = dict(copy=copy, dtype=dtype, fill_value=fill_value, subok=True,
+ keep_mask=keep_mask, small_mask=small_mask,
+ hard_mask=hard_mask,)
+ data = masked_array(data, mask=mask, **maparms)
+ # data = data.view(MaskedArray)
+ freq = check_freq(freq)
+ #
if dates is None:
+ _dates = getattr(data, '_dates', None)
+ elif isinstance(dates, (Date, DateArray)):
+ _dates = date_array(dates)
+ elif isinstance(dates, (tuple, list, ndarray)):
+ _dates = date_array(dlist=dates, freq=freq)
+ else:
+ _dates = date_array([], freq=freq)
+ #
+ if _dates is not None:
+ # Make sure _dates has the proper freqncy
+ if (freq != _c.FR_UND) and (_dates.freq != freq):
+ _dates = _dates.asfreq(freq)
+ else:
dshape = data.shape
if len(dshape) > 0:
if length is None:
length = dshape[0]
if len(dshape) > 0:
- dates = date_array(start_date=start_date, end_date=end_date,
+ _dates = date_array(start_date=start_date, end_date=end_date,
length=length, freq=freq)
else:
- dates = date_array([], freq=freq)
- elif not isinstance(dates, DateArray):
- dates = date_array(dlist=dates, freq=freq)
- if dates._unsorted is not None:
- idx = dates._unsorted
+ _dates = date_array([], freq=freq)
+ #
+ if _dates._unsorted is not None:
+ idx = _dates._unsorted
data = data[idx]
- if mask is not nomask:
- mask = mask[idx]
- dates._unsorted = None
- return TimeSeries(data=data, dates=dates, mask=mask,
- observed=observed, copy=copy, dtype=dtype,
+ _dates._unsorted = None
+ return TimeSeries(data=data, dates=_dates, mask=data._mask,
+# freq=freq,
+ observed=observed,
+ copy=copy, dtype=dtype,
fill_value=fill_value, keep_mask=keep_mask,
small_mask=small_mask, hard_mask=hard_mask,)
@@ -1597,7 +1618,7 @@
################################################################################
if __name__ == '__main__':
from maskedarray.testutils import assert_equal, assert_array_equal
- if 1:
+ if 0:
dlist = ['2007-01-%02i' % i for i in range(1,16)]
dates = date_array_fromlist(dlist)
data = masked_array(numeric.arange(15), mask=[1,0,0,0,0]*3)
@@ -1611,7 +1632,7 @@
assert_equal(a[-5:], series[:5])
assert_equal(b[:5], series[-5:])
#
- if 1:
+ if 0:
data = numpy.arange(5*24).reshape(5,24)
datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
dates = date_array_fromlist(datelist, 'D')
@@ -1632,4 +1653,49 @@
assert_equal(fseries._mask, [0,0,0,1,0,])
#
fseries = fill_missing_dates(data, date_array_fromlist(datelist,'D'))
+ #
+ if 0:
+ "Make sure we're not losing the fill_value"
+ dlist = ['2007-01-%02i' % i for i in range(1,16)]
+ dates = date_array_fromlist(dlist)
+ series = time_series(MA.zeros(dates.shape), dates=dates, fill_value=-9999)
+ assert_equal(series.fill_value, -9999)
+ if 0:
+ "Check time_series w/ an existing time series"
+ dlist = ['2007-01-%02i' % i for i in range(1,16)]
+ dates = date_array_fromlist(dlist)
+ series = time_series(MA.zeros(dates.shape), dates=dates, fill_value=-9999)
+ newseries = time_series(series, fill_value=+9999)
+ assert_equal(newseries._data, series._data)
+ assert_equal(newseries._mask, series._mask)
+ assert_equal(newseries.fill_value, +9999)
+ if 0:
+ data = numpy.arange(5*24).reshape(5,24)
+ datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
+ dates = date_array_fromlist(datelist, 'D')
+# dseries = time_series(data, dates)
+ ndates = date_array_fromrange(start_date=dates[0],end_date=dates[-2])
+ #
+ (A,B) = (data.ravel()[:4].reshape(2,2), dates[:-1])
+ series = time_series(A,B)
+ fseries = fill_missing_dates(series)
+ assert_equal(fseries.shape, (5,))
+ assert_equal(fseries._mask, [0,0,0,1,0,])
+ #
+ if 1:
+ dlist = ['2007-01-%02i' % i for i in (3,2,1)]
+ data = [10,20,30]
+# series = time_series(data, dlist, mask=[1,0,0])
+# data = masked_array([10,20,30],mask=[1,0,0])
+# series = time_series(data, dlist)
+ series = time_series(data, dlist, mask=[1,0,0])
+ assert_equal(series._mask,[0,0,1])
+ if 1:
+ dlist = ['2007-01-%02i' % i for i in range(1,16)]
+ dates = date_array_fromlist(dlist)
+ data = masked_array(numeric.arange(15), mask=[1,0,0,0,0]*3)
+ series = time_series(data, dlist)
+
+ empty_series = time_series([], freq='d')
+ a, b = align_series(series, empty_series)
More information about the Scipy-svn
mailing list