[Scipy-svn] r2987 - in trunk/Lib/sandbox/timeseries: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Fri May 11 22:05:24 EDT 2007
Author: pierregm
Date: 2007-05-11 21:04:59 -0500 (Fri, 11 May 2007)
New Revision: 2987
Modified:
trunk/Lib/sandbox/timeseries/tdates.py
trunk/Lib/sandbox/timeseries/tests/test_dates.py
trunk/Lib/sandbox/timeseries/tests/test_multitimeseries.py
trunk/Lib/sandbox/timeseries/tests/test_timeseries.py
trunk/Lib/sandbox/timeseries/tmulti.py
trunk/Lib/sandbox/timeseries/tseries.py
Log:
MaskedArray
core : fixed a bug w/ subok=False that prevented the mask to be inherited
testutils : fixed assert_array_compare to force the compared elements to pure ndarray
TimeSeries
tseries : fixed the 'filled' method
: allows 2D arrays to be passed to convert (thx to David Huard for the inspiration)
: added David Huards function/method split
tdates : guess_freq : returns FR_UND when only one element
Modified: trunk/Lib/sandbox/timeseries/tdates.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tdates.py 2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tdates.py 2007-05-12 02:04:59 UTC (rev 2987)
@@ -458,7 +458,9 @@
Returns a frequency code (alpha character)."""
ddif = numeric.asarray(numpy.diff(dates))
ddif.sort()
- if ddif[0] == ddif[-1] == 1.:
+ if ddif.size == 0:
+ fcode = _c.FR_UND
+ elif ddif[0] == ddif[-1] == 1.:
fcode = _c.FR_DAY
elif (ddif[0] == 1.) and (ddif[-1] == 3.):
fcode = _c.FR_BUS
@@ -699,4 +701,4 @@
if 1:
"Tests the automatic sorting of dates."
D = date_array_fromlist(dlist=['2006-01','2005-01','2004-01'],freq='M')
- assert_equal(D.view(ndarray), [24037, 24049, 24061])
\ No newline at end of file
+ assert_equal(D.view(ndarray), [24037, 24049, 24061])
Modified: trunk/Lib/sandbox/timeseries/tests/test_dates.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tests/test_dates.py 2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tests/test_dates.py 2007-05-12 02:04:59 UTC (rev 2987)
@@ -109,6 +109,10 @@
dobj = [DateFromString(d) for d in dlist]
odates = date_array_fromlist(dobj)
assert_equal(dates,odates)
+ #
+ D = date_array_fromlist(dlist=['2006-01'])
+ assert_equal(D.tovalue(), [732312, ])
+ assert_equal(D.freq, C.FR_UND)
print "finished test_fromsobjects"
def test_consistent_value(self):
Modified: trunk/Lib/sandbox/timeseries/tests/test_multitimeseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tests/test_multitimeseries.py 2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tests/test_multitimeseries.py 2007-05-12 02:04:59 UTC (rev 2987)
@@ -72,6 +72,7 @@
assert(mts['2007-01']._data == mrec[0])
assert_equal(mts['2007-01']._dates, dates[0])
#
+ assert(isinstance(mts.f0, TimeSeries))
assert_equal(mts.f0, time_series(d, dates=dates, mask=m))
assert_equal(mts.f1, time_series(d[::-1], dates=dates, mask=m[::-1]))
assert((mts._fieldmask == N.core.records.fromarrays([m, m[::-1]])).all())
Modified: trunk/Lib/sandbox/timeseries/tests/test_timeseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tests/test_timeseries.py 2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tests/test_timeseries.py 2007-05-12 02:04:59 UTC (rev 2987)
@@ -28,7 +28,8 @@
from timeseries import tseries
from timeseries import Date, date_array_fromlist, date_array, thisday
from timeseries import time_series, TimeSeries, adjust_endpoints, \
- mask_period, align_series, fill_missing_dates, tsmasked, concatenate_series
+ mask_period, align_series, fill_missing_dates, tsmasked, concatenate_series,\
+ stack, split
class test_creation(NumpyTestCase):
"Base test class for MaskedArrays."
@@ -76,15 +77,14 @@
"Tests the creation of a series from a datearray"
_, dates, _ = self.d
data = dates
-
+ #
series = time_series(data, dates)
assert(isinstance(series, TimeSeries))
assert_equal(series._dates, dates)
assert_equal(series._data, data)
assert_equal(series.freqstr, 'D')
-
+ #
series[5] = MA.masked
-
# ensure that series can be represented by a string after masking a value
# (there was a bug before that prevented this from working when using a
# DateArray for the data)
@@ -99,7 +99,7 @@
assert_equal(series._data.size, 15)
def test_unsorted(self):
- "Tests that the data are porperly sorted along the dates."
+ "Tests that the data are properly sorted along the dates."
dlist = ['2007-01-%02i' % i for i in (3,2,1)]
data = [10,20,30]
series = time_series(data,dlist)
@@ -368,6 +368,19 @@
assert_array_equal(shift_negative, shift_negative_result)
assert_array_equal(shift_positive, shift_positive_result)
#
+ def test_split(self):
+ """Test the split function."""
+ ms = time_series(N.arange(62).reshape(31,2),
+ start_date=Date(freq='d', year=2005, month=7, day=1))
+ d1,d2 = split(ms)
+ assert_array_equal(d1.data, ms.data[:,0])
+ assert_array_equal(d1.dates, ms.dates)
+ assert_array_equal(d2.data, ms.data[:,1])
+
+ series = self.d[0]
+ ss = split(series)[0]
+ assert_array_equal(series, ss)
+ #
def test_convert(self):
"""Test convert function
@@ -379,6 +392,8 @@
start_date=Date(freq='m', year=2005, month=6))
highFreqSeries = time_series(N.arange(100),
start_date=Date(freq='b', year=2005, month=6, day=1))
+ ndseries = time_series(N.arange(124).reshape(62,2),
+ start_date=Date(freq='d', year=2005, month=7, day=1))
lowToHigh_start = lowFreqSeries.convert('B', position='START')
@@ -411,6 +426,8 @@
(Date(freq='b', year=2005, month=6, day=1) + 99).asfreq('M'))
assert_array_equal(lowFreqSeries, lowFreqSeries.convert("M"))
+
+ assert_equal(ndseries.convert('M',sum), [[930,961],[2852,2883]])
#
def test_fill_missing_dates(self):
"""Test fill_missing_dates function"""
@@ -451,8 +468,6 @@
inplace=False)
result = N.array([0,0,0,0,0,1,1,1,1,1,1,1,0,0,0])
assert_equal(mask._mask, result.repeat(2).reshape(-1,2))
-
-
#
def test_pickling(self):
"Tests pickling/unpickling"
@@ -578,7 +593,6 @@
-
###############################################################################
#------------------------------------------------------------------------------
if __name__ == "__main__":
Modified: trunk/Lib/sandbox/timeseries/tmulti.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tmulti.py 2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tmulti.py 2007-05-12 02:04:59 UTC (rev 2987)
@@ -122,7 +122,8 @@
if isinstance(obj, (MaskedRecords)):
self.__dict__.update(_fieldmask=obj._fieldmask,
_hardmask=obj._hardmask,
- _fill_value=obj._fill_value,
+ _fill_value=obj._fill_value,
+ _names = obj.dtype.names
)
if isinstance(obj, MultiTimeSeries):
self.__dict__.update(observed=obj.observed,
@@ -135,7 +136,8 @@
observed=None,
_fieldmask = nomask,
_hardmask = False,
- fill_value = None
+ fill_value = None,
+ _names = self.dtype.names
)
return
@@ -152,27 +154,14 @@
#......................................................
def __getattribute__(self, attr):
- return MaskedRecords.__getattribute__(self,attr)
-# try:
-# # Returns a generic attribute
-# return object.__getattribute__(self,attr)
-# except AttributeError:
-# # OK, so attr must be a field name
-# pass
-# # Get the list of fields ......
-# _names = self.dtype.names
-# _local = self.__dict__
-# _mask = _local['_fieldmask']
-# if attr in _names:
-# _data = self._data
-# obj = numeric.asarray(_data.__getattribute__(attr)).view(MaskedArray)
-# obj._mask = make_mask(_mask.__getattribute__(attr))
-# return obj
-# elif attr == '_mask':
-# if self.size > 1:
-# return _mask.view((bool_, len(self.dtype))).all(1)
-# return _mask.view((bool_, len(self.dtype)))
-# raise AttributeError,"No attribute '%s' !" % attr
+ getattribute = MaskedRecords.__getattribute__
+ _dict = getattribute(self,'__dict__')
+ if attr in _dict.get('_names',[]):
+ obj = getattribute(self,attr).view(TimeSeries)
+ obj._dates = _dict['_dates']
+ return obj
+ return getattribute(self,attr)
+
def __setattr__(self, attr, val):
newattr = attr not in self.__dict__
@@ -526,7 +515,9 @@
mts = MultiTimeSeries(mrec,dates)
self_data = [d, m, mrec, dlist, dates, ts, mts]
- if 1:
+ assert(isinstance(mts.f0, TimeSeries))
+
+ if 0:
mts[:2] = 5
assert_equal(mts.f0._data, [5,5,2,3,4])
assert_equal(mts.f1._data, [5,5,2,1,0])
Modified: trunk/Lib/sandbox/timeseries/tseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tseries.py 2007-05-12 02:04:48 UTC (rev 2986)
+++ trunk/Lib/sandbox/timeseries/tseries.py 2007-05-12 02:04:59 UTC (rev 2987)
@@ -28,7 +28,7 @@
import maskedarray as MA
from maskedarray import MaskedArray, MAError, masked, nomask, \
- filled, getmask, getmaskarray, make_mask_none, mask_or, make_mask, \
+ filled, getmask, getmaskarray, hsplit, make_mask_none, mask_or, make_mask, \
masked_array
import tcore as corelib
@@ -49,7 +49,7 @@
'time_series', 'tsmasked',
'mask_period','mask_inside_period','mask_outside_period','compressed',
'adjust_endpoints','align_series','aligned','convert','group_byperiod',
-'pct','tshift','fill_missing_dates', 'stack', 'concatenate_series',
+'pct','tshift','fill_missing_dates', 'split', 'stack', 'concatenate_series',
'empty_like',
'day_of_week','day_of_year','day','month','quarter','year',
'hour','minute','second',
@@ -505,14 +505,14 @@
it is enabled. Otherwise fill with fill value.
"""
desc = """\
-timeseries(data =
+timeseries(
%(data)s,
dates =
%(time)s,
freq = %(freq)s)
"""
desc_short = """\
-timeseries(data = %(data)s,
+timeseries(%(data)s,
dates = %(time)s,
freq = %(freq)s)
"""
@@ -717,6 +717,30 @@
result = super(TimeSeries, self).transpose(*axes)
result._dates = self._dates
return result
+
+ def split(self):
+ """Split a multiple series into individual columns."""
+ if self.ndim == 1:
+ return [self]
+ else:
+ n = self.shape[1]
+ arr = hsplit(self, n)[0]
+ return [self.__class__(numpy.squeeze(a),
+ self._dates,
+ **_attrib_dict(self)) for a in arr]
+
+ def filled(self, fill_value=None):
+ """Returns an array of the same class as `_data`,
+ with masked values filled with `fill_value`.
+Subclassing is preserved.
+
+If `fill_value` is None, uses self.fill_value.
+ """
+ result = self._series.filled(fill_value=fill_value).view(type(self))
+ result._dates = self._dates
+ result.copy_attributes(self)
+ return result
+
#......................................................
def copy_attributes(self, oldseries, exclude=[]):
"Copies the attributes from oldseries if they are not in the exclude list."
@@ -850,6 +874,9 @@
hour = _frommethod('hour')
minute = _frommethod('minute')
second = _frommethod('second')
+
+split = _frommethod('split')
+
#
##### ---------------------------------------------------------------------------
#---- ... Additional methods ...
@@ -1172,8 +1199,8 @@
return [adjust_endpoints(x, start_date, end_date) for x in series]
aligned = align_series
#....................................................................
-def convert(series, freq, func='auto', position='END'):
- """Converts a series to a frequency.
+def _convert1d(series, freq, func='auto', position='END'):
+ """Converts a series to a frequency. Private function called by convert
When converting to a lower frequency, func is a function that acts
on a 1-d array and returns a scalar or 1-d array. func should handle
@@ -1207,7 +1234,8 @@
"Cannot adjust a series with missing or duplicated dates."
if position.upper() not in ('END','START'):
- raise ValueError("invalid value for position argument: (%s)",str(position))
+ raise ValueError("Invalid value for position argument: (%s). "\
+ "Should be in ['END','START']," % str(position))
start_date = series._dates[0]
@@ -1241,6 +1269,37 @@
newseries.copy_attributes(series)
return newseries
+def convert(series, freq, func='auto', position='END'):
+ """Converts a series to a frequency. Private function called by convert
+
+ When converting to a lower frequency, func is a function that acts
+ on a 1-d array and returns a scalar or 1-d array. func should handle
+ masked values appropriately. If func is "auto", then an
+ appropriate function is determined based on the observed attribute
+ of the series. If func is None, then a 2D array is returned, where each
+ column represents the values appropriately grouped into the new frequency.
+ interp and position will be ignored in this case.
+
+ When converting to a higher frequency, position is 'START' or 'END'
+ and determines where the data point is in each period (eg. if going
+ from monthly to daily, and position is 'END', then each data point is
+ placed at the end of the month).
+ """
+ if series.ndim == 1:
+ obj = _convert1d(series, freq, func, position)
+ elif series.ndim == 2:
+ base = _convert1d(series[:,0], freq, func, position)
+ obj = MA.column_stack([_convert1d(m,freq,func,position)._series
+ for m in series.split()]).view(type(series))
+ obj._dates = base._dates
+ if func is None or (func,series.observed) == ('auto','UNDEFINED'):
+ shp = obj.shape
+ ncols = base.shape[-1]
+ obj.shape = (shp[0], shp[-1]//ncols, ncols)
+ obj = numpy.swapaxes(obj,1,2)
+ return obj
+
+
def group_byperiod(series, freq, position='END'):
"""Converts a series to a frequency, without any processing. If the series
has missing data, it is first filled with masked data. Duplicate values in the
@@ -1422,7 +1481,7 @@
# return time_series(newdata.reshape(nshp), newdates)
#...............................................................................
def stack(*series):
- """performs a column_stack on the data from each series, and the
+ """Performs a column_stack on the data from each series, and the
resulting series has the same dates as each individual series. All series
must be date compatible.
@@ -1513,4 +1572,24 @@
# ensure that series can be represented by a string after masking a value
# (there was a bug before that prevented this from working when using a
# DateArray for the data)
- strrep = str(series)
\ No newline at end of file
+ strrep = str(series)
+
+ if 0:
+ series = time_series(numpy.arange(1,501),
+ start_date=Date('D', string='2007-01-01'))
+ mseries = convert(series, 'M')
+ aseries = convert(mseries, 'A')
+ (freq, func, position) = ('A', None, 'END')
+
+ tmp = mseries[:,0].convert('A')
+ aseries = MA.concatenate([_convert1d(m,'A')._series for m in mseries.split()],
+ axis=-1).view(type(series))
+ aseries._dates = tmp._dates
+ shp = aseries.shape
+ aseries.shape = (shp[0], shp[-1]//tmp.shape[-1], tmp.shape[-1])
+ numpy.swapaxes(aseries,1,2)
+
+ if 1:
+ series = time_series(N.arange(124).reshape(62,2),
+ start_date=Date(freq='d', year=2005, month=7, day=1))
+ assert_equal(series.convert('M',sum), [[930,961],[2852,2883]])
\ No newline at end of file
More information about the Scipy-svn
mailing list