[Scipy-svn] r3469 - in trunk/scipy/sandbox/timeseries: . lib/tests tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sat Oct 27 16:44:06 EDT 2007
Author: pierregm
Date: 2007-10-27 15:44:00 -0500 (Sat, 27 Oct 2007)
New Revision: 3469
Modified:
trunk/scipy/sandbox/timeseries/extras.py
trunk/scipy/sandbox/timeseries/lib/tests/test_moving_funcs.py
trunk/scipy/sandbox/timeseries/tests/test_timeseries.py
trunk/scipy/sandbox/timeseries/tests/test_trecords.py
trunk/scipy/sandbox/timeseries/trecords.py
trunk/scipy/sandbox/timeseries/tseries.py
Log:
tseries : introduce concatenate and deprecate concatenate_series
: fill_missing_dates : fixed fill_value inheritance
trecords : fixed printing of record
extras : count_missing : added support for quarterly periods.
Modified: trunk/scipy/sandbox/timeseries/extras.py
===================================================================
--- trunk/scipy/sandbox/timeseries/extras.py 2007-10-27 18:25:05 UTC (rev 3468)
+++ trunk/scipy/sandbox/timeseries/extras.py 2007-10-27 20:44:00 UTC (rev 3469)
@@ -69,6 +69,22 @@
isfeb = (months == 2)
missing[isfeb] -= 2
missing[isfeb & ~isleapyear(series.year)] -= 1
+ elif period == 92 and (freq//_c.FR_QTR == 1):
+ # row: quarters, cold:days
+ months = series.months
+ if freq in (_c.FR_QTREJAN, _c.FR_QTRSJAN, _c.FR_QTREAPR, _c.FR_QTRSAPR,
+ _c.FR_QTREOCT, _c.FR_QTRSOCT, _c.FR_QTREOCT, _c.FR_QTRSOCT):
+ isfeb = (months == 4)
+ missing[isfeb] -= 2
+ elif freq in (_c.FR_QTREFEB, _c.FR_QTRSFEB, _c.FR_QTREMAY, _c.FR_QTRSMAY,
+ _c.FR_QTREAUG, _c.FR_QTRSAUG, _c.FR_QTRENOV, _c.FR_QTRSNOV):
+ missing[numpy.array([m in [2,11] for m in months])] -= 1
+ isfeb = (months == 2)
+ elif freq in (_c.FR_QTREMAR, _c.FR_QTRSMAR, _c.FR_QTREJUN, _c.FR_QTRSJUN,
+ _c.FR_QTRESEP, _c.FR_QTRSSEP, _c.FR_QTREDEC, _c.FR_QTRSDEC):
+ missing[numpy.array([m in [3,6] for m in months])] -= 1
+ isfeb = (months == 3)
+ missing[isfeb & ~isleapyear(series.year)] -= 1
elif period not in (12,7):
raise NotImplementedError, "Not yet implemented for that frequency..."
return missing
Modified: trunk/scipy/sandbox/timeseries/lib/tests/test_moving_funcs.py
===================================================================
--- trunk/scipy/sandbox/timeseries/lib/tests/test_moving_funcs.py 2007-10-27 18:25:05 UTC (rev 3468)
+++ trunk/scipy/sandbox/timeseries/lib/tests/test_moving_funcs.py 2007-10-27 20:44:00 UTC (rev 3469)
@@ -143,4 +143,4 @@
#------------------------------------------------------------------------------
if __name__ == "__main__":
- NumpyTest().run()
+ NumpyTest().run()
\ No newline at end of file
Modified: trunk/scipy/sandbox/timeseries/tests/test_timeseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_timeseries.py 2007-10-27 18:25:05 UTC (rev 3468)
+++ trunk/scipy/sandbox/timeseries/tests/test_timeseries.py 2007-10-27 20:44:00 UTC (rev 3469)
@@ -29,7 +29,7 @@
from timeseries import Date, date_array_fromlist, date_array_fromrange, date_array, thisday
from timeseries import time_series, TimeSeries, adjust_endpoints, \
mask_period, align_series, align_with, fill_missing_dates, tsmasked, \
- concatenate_series, stack, split
+ concatenate, stack, split
class TestCreation(NumpyTestCase):
"Base test class for MaskedArrays."
@@ -601,27 +601,30 @@
def test_concatenate(self):
"Tests concatenate"
dlist = ['2007-%02i' % i for i in range(1,6)]
- dates = date_array_fromlist(dlist)
- data = masked_array(numeric.arange(5), mask=[1,0,0,0,0], dtype=float_)
+ _dates = date_array_fromlist(dlist)
+ data = masked_array(numpy.arange(5), mask=[1,0,0,0,0], dtype=float_)
#
- ser_1 = time_series(data, dates)
- ser_2 = time_series(data, dates=dates+10)
- newseries = concatenate_series(ser_1, ser_2)
- assert_equal(newseries._data,[0,1,2,3,4,0,0,0,0,0,0,1,2,3,4])
+ ser_1 = time_series(data, _dates)
+ ser_2 = time_series(data, dates=_dates+10)
+ newseries = concatenate((ser_1, ser_2), fill_missing=True)
+ assert_equal(newseries._series,[0,1,2,3,4,0,0,0,0,0,0,1,2,3,4])
assert_equal(newseries._mask,[1,0,0,0,0]+[1]*5+[1,0,0,0,0])
+ assert ~(newseries.has_missing_dates())
#
- ser_1 = time_series(data, dates)
- ser_2 = time_series(data, dates=dates+10)
- newseries = concatenate_series(ser_1, ser_2, keep_gap=False)
+ ser_1 = time_series(data, _dates)
+ ser_2 = time_series(data, dates=_dates+10)
+ newseries = concatenate((ser_1, ser_2), keep_gap=False)
assert_equal(newseries._data,[0,1,2,3,4,0,1,2,3,4])
assert_equal(newseries._mask,[1,0,0,0,0]+[1,0,0,0,0])
assert newseries.has_missing_dates()
#
- ser_2 = time_series(data, dates=dates+3)
- newseries = concatenate_series(ser_1, ser_2)
- assert_equal(newseries._data,[0,1,2,0,1,2,3,4])
- assert_equal(newseries._mask,[1,0,0,1,0,0,0,0])
+ ser_2 = time_series(data, dates=_dates+3)
+ newseries = concatenate((ser_1, ser_2))
+ assert_equal(newseries._data,[0,1,2,3,4,2,3,4])
+ assert_equal(newseries._mask,[1,0,0,0,0,0,0,0])
#
+ newseries = concatenate((ser_1, ser_1[::-1]))
+ assert_equal(newseries, ser_1)
Modified: trunk/scipy/sandbox/timeseries/tests/test_trecords.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_trecords.py 2007-10-27 18:25:05 UTC (rev 3468)
+++ trunk/scipy/sandbox/timeseries/tests/test_trecords.py 2007-10-27 20:44:00 UTC (rev 3469)
@@ -186,4 +186,4 @@
###############################################################################
#------------------------------------------------------------------------------
if __name__ == "__main__":
- NumpyTest().run()
+ NumpyTest().run()
\ No newline at end of file
Modified: trunk/scipy/sandbox/timeseries/trecords.py
===================================================================
--- trunk/scipy/sandbox/timeseries/trecords.py 2007-10-27 18:25:05 UTC (rev 3468)
+++ trunk/scipy/sandbox/timeseries/trecords.py 2007-10-27 20:44:00 UTC (rev 3469)
@@ -255,9 +255,9 @@
for s in zip(*[getattr(self,f) for f in self.dtype.names])]
return "[%s]" % ", ".join(mstr)
else:
- mstr = numeric.asarray(self._data.item(), dtype=object_)
- mstr[list(self._fieldmask)] = masked_print_option
- return str(mstr)
+ mstr = ["%s" % ",".join([str(i) for i in s])
+ for s in zip([getattr(self,f) for f in self.dtype.names])]
+ return "(%s)" % ", ".join(mstr)
def __repr__(self):
"""x.__repr__() <==> repr(x)
@@ -522,3 +522,4 @@
print recfirst, type(recfirst)
print mrec[0], type(mrec[0])
+
Modified: trunk/scipy/sandbox/timeseries/tseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tseries.py 2007-10-27 18:25:05 UTC (rev 3468)
+++ trunk/scipy/sandbox/timeseries/tseries.py 2007-10-27 20:44:00 UTC (rev 3469)
@@ -25,7 +25,7 @@
from numpy.core.records import recarray
from numpy.core.records import fromarrays as recfromarrays
-import maskedarray as MA
+import maskedarray
from maskedarray import MaskedArray, MAError, masked, nomask, \
filled, getmask, getmaskarray, hsplit, make_mask_none, mask_or, make_mask, \
masked_array
@@ -43,15 +43,21 @@
__all__ = [
'TimeSeriesError','TimeSeriesCompatibilityError','TimeSeries','isTimeSeries',
'time_series', 'tsmasked',
-'mask_period','mask_inside_period','mask_outside_period','compressed',
-'adjust_endpoints','align_series','align_with','aligned','convert','group_byperiod',
-'pct','tshift','fill_missing_dates', 'split', 'stack', 'concatenate_series',
+'adjust_endpoints','align_series','align_with','aligned','asrecords',
+'compressed','concatenate', 'concatenate_series','convert',
+'day_of_week','day_of_year','day',
'empty_like',
-'day_of_week','day_of_year','day','month','quarter','year',
-'hour','minute','second',
-'tofile','asrecords','flatten',
-'first_unmasked_val', 'last_unmasked_val'
- ]
+'fill_missing_dates','first_unmasked_val','flatten',
+'group_byperiod',
+'hour',
+'last_unmasked_val',
+'mask_period','mask_inside_period','mask_outside_period','minute','month',
+'pct',
+'quarter',
+'second','split', 'stack',
+'tofile','tshift',
+'year',
+]
def _unmasked_val(marray, x):
"helper function for first_unmasked_val and last_unmasked_val"
@@ -60,9 +66,9 @@
except AssertionError:
raise ValueError("array must have ndim == 1")
- idx = MA.extras.flatnotmasked_edges(marray)
+ idx = maskedarray.extras.flatnotmasked_edges(marray)
if idx is None:
- return MA.masked
+ return masked
return marray[idx[x]]
def first_unmasked_val(marray):
@@ -1268,7 +1274,7 @@
tempData = masked_array(_values, mask=_mask)
if tempData.ndim == 2 and func is not None:
- tempData = MA.apply_along_axis(func, -1, tempData, *args, **kwargs)
+ tempData = maskedarray.apply_along_axis(func, -1, tempData, *args, **kwargs)
newseries = tempData.view(type(series))
newseries._dates = date_array(start_date=start_date, length=len(newseries),
@@ -1294,7 +1300,7 @@
obj = _convert1d(series, freq, func, position, *args, **kwargs)
elif series.ndim == 2:
base = _convert1d(series[:,0], freq, func, position, *args, **kwargs)
- obj = MA.column_stack([_convert1d(m,freq,func,position,
+ obj = maskedarray.column_stack([_convert1d(m,freq,func,position,
*args, **kwargs)._series
for m in series.split()]).view(type(series))
obj._dates = base._dates
@@ -1488,7 +1494,9 @@
for (new,old) in zip(newslc,oldslc):
newdatad[new] = datad[old]
newdatam[new] = datam[old]
- newdata = MA.masked_array(newdatad, mask=newdatam, fill_value=fill_value)
+ if fill_value is None:
+ fill_value = getattr(data, 'fill_value', None)
+ newdata = maskedarray.masked_array(newdatad, mask=newdatam, fill_value=fill_value)
_data = newdata.view(datat)
_data._dates = newdates
return _data
@@ -1498,50 +1506,74 @@
resulting series has the same dates as each individual series. All series
must be date compatible.
-:Parameters:
- `*series` : the series to be stacked
+*Parameters*:
+ series : the series to be stacked
"""
_timeseriescompat_multiple(*series)
- return time_series(MA.column_stack(series), series[0]._dates,
+ return time_series(maskedarray.column_stack(series), series[0]._dates,
**_attrib_dict(series[0]))
#...............................................................................
def concatenate_series(*series, **kwargs):
- """Concatenates a sequence of series, by chronological order.
- Overlapping data are processed in a FIFO basis: the data from the first series
- of the sequence will be overwritten by the data of the second series, and so forth.
- If keep_gap is true, any gap between consecutive, non overlapping series are
- kept: the corresponding data are masked.
- """
+ msg = """The use of this function is deprecated.
+Please use concatenate instead.
+Note: Please pay attention to the order of the series!"""
+ raise NameError(msg)
+
+
+def concatenate(series, axis=0, remove_duplicates=True, fill_missing=False):
+ """Joins series together.
- keep_gap = kwargs.pop('keep_gap', True)
- if len(kwargs) > 0:
- raise KeyError("unrecognized keyword: %s" % list(kwargs)[0])
+The series are joined in chronological order. Duplicated dates are handled with
+the `remove_duplicates` parameter. If remove_duplicate=False, duplicated dates are
+saved. Otherwise, only the first occurence of the date is conserved.
+
+Example
+>>> a = time_series([1,2,3], start_date=today('D'))
+>>> b = time_series([10,20,30], start_date=today('D')+1)
+>>> c = concatenate((a,b))
+>>> c._series
+masked_array(data = [ 1 2 3 30],
+ mask = False,
+ fill_value=999999)
+
- common_f = _compare_frequencies(*series)
- start_date = min([s.start_date for s in series if s.start_date is not None])
- end_date = max([s.end_date for s in series if s.end_date is not None])
- newdtype = max([s.dtype for s in series])
- whichone = numeric.zeros((end_date-start_date+1), dtype=int_)
- newseries = time_series(numeric.empty((end_date-start_date+1), dtype=newdtype),
- dates=date_array(start_date, end_date, freq=common_f),
- mask=True)
- newdata = newseries._data
- newmask = newseries._mask
- for (k,s) in enumerate(series):
- start = s.start_date - start_date
- end = start + len(s)
- whichone[start:end] = k+1
- newdata[start:end] = s._data
- if s._mask is nomask:
- newmask[start:end] = False
- else:
- newmask[start:end] = s._mask
- keeper = whichone.astype(bool_)
- if not keep_gap:
- newseries = newseries[keeper]
+*Parameters*:
+ series : {sequence}
+ Sequence of time series to join
+ axis : {integer}
+ Axis along which to join
+ remove_duplicates : boolean
+ Whether to remove duplicated dates.
+ fill_missing : {boolean}
+ Whether to fill the missing dates with missing values.
+ """
+ # Get the common frequency, raise an error if incompatibility
+ common_f = _compare_frequencies(*series)
+ # Concatenate the order of series
+ sidx = numpy.concatenate([numpy.repeat(i,len(s))
+ for (i,s) in enumerate(series)], axis=axis)
+ # Concatenate the dates and data
+ ndates = numpy.concatenate([s._dates for s in series], axis=axis)
+ ndata = maskedarray.concatenate([s._series for s in series], axis=axis)
+ # Resort the data chronologically
+ norder = ndates.argsort(kind='mergesort')
+ ndates = ndates[norder]
+ ndata = ndata[norder]
+ sidx = sidx[norder]
+ #
+ if not remove_duplicates:
+ ndates = date_array_fromlist(ndates, freq=common_f)
+ result = time_series(ndata, dates=ndates)
else:
- newdata[~keeper] = 0
- return newseries
+ # Find the original dates
+ orig = numpy.concatenate([[True],(numpy.diff(ndates) != 0)])
+ result = time_series(ndata.compress(orig),
+ dates=ndates.compress(orig),freq=common_f)
+ if fill_missing:
+ result = fill_missing_dates(result)
+ return result
+
+
#...............................................................................
def empty_like(series):
"""Returns an empty series with the same dtype, mask and dates as series."""
@@ -1593,32 +1625,29 @@
"Make sure we're not losing the fill_value"
dlist = ['2007-01-%02i' % i for i in range(1,16)]
dates = date_array_fromlist(dlist)
- series = time_series(MA.zeros(dates.shape), dates=dates, fill_value=-9999)
+ series = time_series(maskedarray.zeros(dates.shape), dates=dates, fill_value=-9999)
assert_equal(series.fill_value, -9999)
if 0:
"Check time_series w/ an existing time series"
dlist = ['2007-01-%02i' % i for i in range(1,16)]
dates = date_array_fromlist(dlist)
- series = time_series(MA.zeros(dates.shape), dates=dates, fill_value=-9999)
+ series = time_series(maskedarray.zeros(dates.shape), dates=dates, fill_value=-9999)
newseries = time_series(series, fill_value=+9999)
assert_equal(newseries._data, series._data)
assert_equal(newseries._mask, series._mask)
assert_equal(newseries.fill_value, +9999)
-
- if 0:
- data = numpy.arange(5*24).reshape(5,24)
+ if 1:
+ "Check that the fill_value is kept"
+ data = [0,1,2,3,4,]
datelist = ['2007-07-01','2007-07-02','2007-07-03','2007-07-05','2007-07-06']
dates = date_array_fromlist(datelist, 'D')
-# dseries = time_series(data, dates)
+ dseries = time_series(data, dates, fill_value=-999)
ndates = date_array_fromrange(start_date=dates[0],end_date=dates[-2])
- #
- (A,B) = (data.ravel()[:4].reshape(2,2), dates[:-1])
- series = time_series(A,B)
- fseries = fill_missing_dates(series)
- assert_equal(fseries.shape, (5,))
- assert_equal(fseries._mask, [0,0,0,1,0,])
+ fseries = fill_missing_dates(dseries)
+ assert_equal(dseries.fill_value, fseries.fill_value)
+
#
- if 1:
+ if 0:
dlist = ['2007-01-%02i' % i for i in (3,2,1)]
data = [10,20,30]
# series = time_series(data, dlist, mask=[1,0,0])
@@ -1626,7 +1655,7 @@
# series = time_series(data, dlist)
series = time_series(data, dlist, mask=[1,0,0])
assert_equal(series._mask,[0,0,1])
- if 1:
+ if 0:
dlist = ['2007-01-%02i' % i for i in range(1,16)]
dates = date_array_fromlist(dlist)
data = masked_array(numeric.arange(15), mask=[1,0,0,0,0]*3)
@@ -1634,3 +1663,10 @@
empty_series = time_series([], freq='d')
a, b = align_series(series, empty_series)
+
+ if 1:
+ "Check concatenate..."
+ import dates
+ tt = time_series([.2,.2,.3],start_date=dates.Date('T',string='2007-10-10 01:10'))
+ tt._dates += [0, 9, 18]
+
More information about the Scipy-svn
mailing list