From scipy-svn at scipy.org Fri Dec 1 10:32:56 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 1 Dec 2006 09:32:56 -0600 (CST) Subject: [Scipy-svn] r2344 - in trunk/Lib/io: . tests Message-ID: <20061201153256.4586D39C1D8@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-12-01 09:32:51 -0600 (Fri, 01 Dec 2006) New Revision: 2344 Modified: trunk/Lib/io/recaster.py trunk/Lib/io/tests/test_recaster.py Log: Added options to recaster, minor refactoring Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-11-30 19:14:12 UTC (rev 2343) +++ trunk/Lib/io/recaster.py 2006-12-01 15:32:51 UTC (rev 2344) @@ -66,26 +66,49 @@ _sctype_attributes = sctype_attributes() - def __init__(self, sctype_list=None, sctype_tols=None): + def __init__(self, sctype_list=None, + downcast_fp_to_fp = True, + downcast_fp_to_int = True, + downcast_int_to_int = True, + upcast_int_to_fp = True, + sctype_tols=None): ''' Set types for which we are attempting to downcast Input sctype_list - list of acceptable scalar types If None defaults to all system types + downcast_fp_to_fp - if True, tries to downcast floats and complex + to smaller size of same type + downcast_fp_to_int - if True, tries to downcast floats and complex + to integers + downcast_int_to_int - if True, tries to downcast integers to + smaller of same type + upcast_int_to_fp - if True, tries to upcast integers that could not + be downcast to floating point type sctype_tols - dictionary key datatype, values rtol, tol - to specify tolerances for checking near equality in downcasting + to specify tolerances for checking near equality in + downcasting + + Note that tolerance values for integers are used for upcasting + integers to floats ''' if sctype_list is None: sctype_list = self._sctype_attributes.keys() self.sctype_list = sctype_list + # Casting options self.sctype_tols = self.default_sctype_tols() + self.downcast_fp_to_fp = downcast_fp_to_fp + self.downcast_fp_to_int = downcast_fp_to_int + self.downcast_int_to_int = downcast_int_to_int + self.upcast_int_to_fp = upcast_int_to_fp + # Tolerances if sctype_tols is not None: self.sctype_tols.update(sctype_tols) # Cache sctype sizes, self.sized_sctypes = {} for k in ('c', 'f', 'i', 'u'): self.sized_sctypes[k] = self.sctypes_by_size(k) - # All integer sizes + # Cache all integer sizes self.ints_sized_sctypes = [] for k, v in self.sized_sctypes.items(): if k in ('u', 'i'): @@ -93,7 +116,7 @@ self.ints_sized_sctypes.append(e) if self.ints_sized_sctypes: self.ints_sized_sctypes.sort(lambda x, y: cmp(y[1], x[1])) - # Capable types list + # Cache capable types list self._capable_sctypes = {} for k in self._sctype_attributes: self._capable_sctypes[k] = self.get_capable_sctype(k) @@ -193,7 +216,13 @@ ''' Return rtol and atol for sctype ''' tols = self.sctype_tols[sctype] return tols['rtol'], tols['atol'] - + + def arr_if_valid(self, arr): + ''' Returns array if of valid sctype, None otherwise ''' + if arr.dtype.type not in self.sctype_list: + return None + return arr + def smallest_same_kind(self, arr): ''' Return arr maybe downcast to same kind, smaller storage @@ -221,19 +250,16 @@ Returns None if no recast is within tolerance ''' - dt = arr.dtype.type - rtol, atol = self.tols_from_sctype(dt) + sct = arr.dtype.type + rtol, atol = self.tols_from_sctype(sct) ret_arr = arr for T in sctypes: test_arr = arr.astype(T) if allclose(test_arr, arr, rtol, atol): ret_arr = test_arr - can_downcast = True else: break - if ret_arr.dtype.type not in self.sctype_list: - return None - return ret_arr + return self.arr_if_valid(ret_arr) def smallest_int_sctype(self, mx, mn): ''' Return integer type with smallest storage containing mx and mn @@ -256,39 +282,46 @@ def downcast(self, arr): dtk = arr.dtype.kind if dtk == 'c': - return self.downcast_complex(arr) + ret = self.downcast_complex(arr) elif dtk == 'f': - return self.downcast_float(arr) + ret = self.downcast_float(arr) elif dtk in ('u', 'i'): - return self.downcast_integer(arr) + ret = self.downcast_integer(arr) else: raise TypeError, 'Do not recognize array kind %s' % dtk - + if ret is None: + raise ValueError, 'Could not downcast array within precision' + return ret + def downcast_complex(self, arr): ''' Downcasts complex array to smaller type if possible ''' # can we downcast to float? - dt = arr.dtype - dti = ceil(dt.itemsize / 2) - sctypes = self.sized_sctypes['f'] - flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti] - if flts: # There are smaller floats to try - test_arr = arr.astype(flts[0]) - rtol, atol = self.tols_from_sctype(dt.type) - if allclose(arr, test_arr, rtol, atol): - arr = test_arr + if self.downcast_fp_to_fp: + dt = arr.dtype + dti = ceil(dt.itemsize / 2) + sctypes = self.sized_sctypes['f'] + flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti] + if flts: # There are smaller floats to try + test_arr = arr.astype(flts[0]) + rtol, atol = self.tols_from_sctype(dt.type) + if allclose(arr, test_arr, rtol, atol): + arr = test_arr # try downcasting to int or another complex type return self.downcast_to_int_or_same(arr) def downcast_to_int_or_same(self, arr): ''' Downcast to integer or smaller of same kind ''' # Try integer - test_arr = self.downcast_integer(arr) - rtol, atol = self.tols_from_sctype(arr.dtype.type) - if allclose(arr, test_arr, rtol, atol): - return test_arr + if self.downcast_fp_to_int: + test_arr = self.downcast_integer(arr) + rtol, atol = self.tols_from_sctype(arr.dtype.type) + if allclose(arr, test_arr, rtol, atol): + return test_arr # Otherwise descend the types of same kind - return self.smallest_same_kind(arr) - + if self.downcast_fp_to_fp: + return self.smallest_same_kind(arr) + return self.arr_if_valid(arr) + downcast_float = downcast_to_int_or_same def downcast_integer(self, arr): @@ -297,6 +330,8 @@ Returns None if range of arr cannot be contained in acceptable integer types ''' + if not self.downcast_int_to_int: + return arr_if_valid(arr) mx = amax(arr) mn = amin(arr) idt = self.smallest_int_sctype(mx, mn) @@ -306,21 +341,42 @@ def recast(self, arr): ''' Try arr downcast, upcast if necesary to get compatible type ''' - dt = arr.dtype.type - ret_arr = self.downcast(arr) - if ret_arr is not None: - return ret_arr + try: + return self.downcast(arr) + except ValueError: + pass + dt = arr.dtype # Could not downcast, arr dtype not in known list # Try upcast to larger dtype of same kind - udt = self.capable_dtype[dt] + sct = dt.type + udt = self.capable_sctype[sct] if udt is not None: return arr.astype(udt) + # Could be an integer type that we have not tried + # to downcast + if not self.downcast_int_to_int and dt.kind in ('u', 'i'): + arr = self.downcast_integer(arr) + if arr is not None: + return arr # We are stuck for floats and complex now # Can try casting integers to floats - if arr.dt.kind in ('i', 'u'): + if self.upcast_int_to_fp and dt.kind in ('i', 'u'): sctypes = self.sized_sctypes['f'] arr = self._smallest_from_sctypes(arr, sctypes) if arr is not None: return arr raise ValueError, 'Could not recast array within precision' + def recast_best_sctype(self, arr): + ''' Recast array, return closest sctype to original + + Returns tuple of recast array and best sctype to contain + original data before recasting + ''' + sct = arr.dtype.type + arr = self.recast(arr) + if sct not in self.sctype_list: + sct = self.capable_sctype[sct] + if sct is None: + sct = arr.dtype.type + return arr, sct Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-11-30 19:14:12 UTC (rev 2343) +++ trunk/Lib/io/tests/test_recaster.py 2006-12-01 15:32:51 UTC (rev 2344) @@ -58,6 +58,7 @@ if expect_none: assert C is None, 'Expecting None for %s' % T else: + assert C is not None, 'Got unexpected None from %s' % T assert C.dtype.type == req_type, \ 'Expected %s type, got %s type' % \ (C.dtype.type, req_type) From scipy-svn at scipy.org Sat Dec 2 02:49:36 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 2 Dec 2006 01:49:36 -0600 (CST) Subject: [Scipy-svn] r2345 - trunk/Lib/interpolate/fitpack Message-ID: <20061202074936.34FED39C00F@new.scipy.org> Author: oliphant Date: 2006-12-02 01:49:29 -0600 (Sat, 02 Dec 2006) New Revision: 2345 Modified: trunk/Lib/interpolate/fitpack/fppogr.f Log: Add back the initialization of vb. It is unclear why it was removed originally in r79 Modified: trunk/Lib/interpolate/fitpack/fppogr.f =================================================================== --- trunk/Lib/interpolate/fitpack/fppogr.f 2006-12-01 15:32:51 UTC (rev 2344) +++ trunk/Lib/interpolate/fitpack/fppogr.f 2006-12-02 07:49:29 UTC (rev 2345) @@ -45,7 +45,7 @@ pi = datan2(0d0,-one) per = pi+pi vb = v(1) -C ve = vb+per + ve = vb+per cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c part 1: determination of the number of knots and their position. c c **************************************************************** c From scipy-svn at scipy.org Sun Dec 3 23:37:52 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 3 Dec 2006 22:37:52 -0600 (CST) Subject: [Scipy-svn] r2346 - trunk/Lib/sandbox/models Message-ID: <20061204043752.BC71239C0C5@new.scipy.org> Author: jonathan.taylor Date: 2006-12-03 22:37:49 -0600 (Sun, 03 Dec 2006) New Revision: 2346 Added: trunk/Lib/sandbox/models/gam.py Log: added rough sketch at generalized additive models Added: trunk/Lib/sandbox/models/gam.py =================================================================== --- trunk/Lib/sandbox/models/gam.py 2006-12-02 07:49:29 UTC (rev 2345) +++ trunk/Lib/sandbox/models/gam.py 2006-12-04 04:37:49 UTC (rev 2346) @@ -0,0 +1,240 @@ +import numpy as N +from scipy.sandbox.models import family + +from glm import model as glm +from bspline import SmoothingSpline + +def default_smoother(x): + _x = x.copy() + _x.sort() + n = x.shape[0] + # taken form smooth.spline in R + print "herenow" + if n < 50: + nknots = n + else: + a1 = N.log(50) / N.log(2) + a2 = N.log(100) / N.log(2) + a3 = N.log(140) / N.log(2) + a4 = N.log(200) / N.log(2) + if n < 200: + nknots = 2**(a1 + (a2 - a1) * (n - 50)/150.) + elif n < 800: + nknots = 2**(a2 + (a3 - a2) * (n - 200)/600.) + elif n < 3200: + nknots = 2**(a3 + (a4 - a3) * (n - 800)/2400.) + else: + nknots = 200 + (n - 3200.)**0.2 + knots = _x[N.linspace(0, n-1, nknots).astype(N.int32)] + s = SmoothingSpline(knots) + s.gram(d=2) + s.target_df = 5 + return s + +class offset: + + def __init__(self, fn, offset): + self.fn = fn + self.offset = offset + + def __call__(self, *args, **kw): + return self.fn(*args, **kw) + offset + +class results: + + def __init__(self, Y, alpha, design, smoothers, family, offset): + self.Y = Y + self.alpha = alpha + self.smoothers = smoothers + self.offset = offset + self.family = family + self.design = design + self.offset = offset + self.mu = self(design) + + def __call__(self, design): + return self.family.link.inverse(self.predict(design)) + + def predict(self, design): + return N.sum(self.smoothed(design), axis=0) + self.alpha + + def smoothed(self, design): + return N.array([self.smoothers[i](design[:,i]) + self.offset[i] for i in range(design.shape[1])]) + +class additive_model: + + def __init__(self, design, smoothers=None, weights=None): + self.design = design + if weights is not None: + self.weights = weights + else: + self.weights = N.ones(self.design.shape[0]) + + self.smoothers = smoothers or [default_smoother(design[:,i]) for i in range(design.shape[1])] + for i in range(design.shape[1]): + self.smoothers[i].df = 10 + self.family = family.Gaussian() + + def __iter__(self): + self.iter = 0 + self.dev = N.inf + return self + + def next(self): + _results = self.results; Y = self.results.Y + mu = _results.predict(self.design) + offset = N.zeros(self.design.shape[1], N.float64) + alpha = (Y * self.weights).sum() / self.weights.sum() + for i in range(self.design.shape[1]): + tmp = self.smoothers[i](self.design[:,i]) + self.smoothers[i].smooth(Y - alpha - mu + tmp, x=self.design[:,i], + weights=self.weights) + tmp2 = self.smoothers[i](self.design[:,i]) + offset[i] = -(tmp2*self.weights).sum() / self.weights.sum() + mu += tmp2 - tmp + + return results(Y, alpha, self.design, self.smoothers, self.family, offset) + + def cont(self, tol=1.0e-02): + + curdev = (((self.results.Y - self.results.predict(self.design))**2) * self.weights).sum() + + if N.fabs((self.dev - curdev) / curdev) < tol: + self.dev = curdev + return False + + self.iter += 1 + self.dev = curdev + return True + + def df_resid(self): + return self.results.Y.shape[0] - N.array([self.smoothers[i].df_fit() for i in range(self.design.shape[1])]).sum() + + def estimate_scale(self): + return ((self.results.Y - self.results(self.design))**2).sum() / self.df_resid() + + def fit(self, Y): + iter(self) + mu = 0 + alpha = (Y * self.weights).sum() / self.weights.sum() + + offset = N.zeros(self.design.shape[1], N.float64) + + for i in range(self.design.shape[1]): + self.smoothers[i].smooth(Y - alpha - mu, x=self.design[:,i], + weights=self.weights) + tmp = self.smoothers[i](self.design[:,i]) + offset[i] = (tmp * self.weights).sum() / self.weights.sum() + tmp -= tmp.sum() + mu += tmp + + self.results = results(Y, alpha, self.design, self.smoothers, self.family, offset) + + while self.cont(): + self.results = self.next() + + return self.results + +class model(glm, additive_model): + + niter = 10 + + def __init__(self, design, smoothers=None, family=family.Gaussian()): + glm.__init__(self, design, family=family) + additive_model.__init__(self, design, smoothers=smoothers) + self.family = family + + def next(self): + _results = self.results; Y = _results.Y + _results.mu = self.family.link.inverse(_results.predict(self.design)) + self.weights = self.family.weights(_results.mu) + Z = _results.predict(self.design) + self.family.link.deriv(_results.mu) * (Y - _results.mu) + m = additive_model(self.design, smoothers=self.smoothers, weights=self.weights) + _results = m.fit(Z) + _results.Y = Y + _results.mu = self.family.link.inverse(_results.predict(self.design)) + self.iter += 1 + self.results = _results + + return _results + + def estimate_scale(self, Y=None): + """ + Return Pearson\'s X^2 estimate of scale. + """ + + if Y is None: + Y = self.Y + resid = Y - self.results.mu + return (N.power(resid, 2) / self.family.variance(self.results.mu)).sum() / additive_model.df_resid(self) + + def fit(self, Y): + self.Y = N.asarray(Y, N.float64) + + iter(self) + alpha = self.Y.mean() + Z = self.family.link(alpha) + self.family.link.deriv(alpha) * (Y - alpha) + m = additive_model(self.design, smoothers=self.smoothers) + self.results = m.fit(Z) + self.results.mu = self.family.link.inverse(self.results.predict(self.design)) + self.results.Y = Y + + while self.cont(): + self.results = self.next() + self.scale = self.results.scale = self.estimate_scale() + + + return self.results + + +if __name__ == "__main__": + + import numpy.random as R + n = lambda x: (x - x.mean()) / x.std() + n_ = lambda x: (x - x.mean()) + x1 = R.standard_normal(500) + x1.sort() + x2 = R.standard_normal(500) + x2.sort() + y = R.standard_normal((500,)) + f1 = lambda x1: (x1 + x1**2 - 3 - 1.5 * x1**3 + N.exp(-x1)) + f2 = lambda x2: (x2 + x2**2 - N.exp(x2)) + z = n(f1(x1)) + n(f2(x2)) + z = n(z) * 0.1 + + y += z + d = N.array([x1,x2]).T + m = additive_model(d) + m.fit(y) + x = N.linspace(-2,2,50) + + import scipy.stats, time + + f = family.Binomial() + b = N.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)]) + b.shape = y.shape + m = model(d, family=f) + toc = time.time() + m.fit(b) + tic = time.time() + import pylab + pylab.figure(num=1) + pylab.plot(x1, n(m.smoothers[0](x1))); pylab.plot(x1, n(f1(x1)), linewidth=2) + pylab.figure(num=2) + pylab.plot(x2, n(m.smoothers[1](x2))); pylab.plot(x2, n(f2(x2)), linewidth=2); + print tic-toc + + f = family.Poisson() + p = N.asarray([scipy.stats.poisson.rvs(p) for p in f.link.inverse(y)]) + p.shape = y.shape + m = model(d, family=f) + toc = time.time() + m.fit(p) + tic = time.time() + print tic-toc + pylab.figure(num=1) + pylab.plot(x1, n(m.smoothers[0](x1))); pylab.plot(x1, n(f1(x1)), linewidth=2) + pylab.figure(num=2) + pylab.plot(x2, n(m.smoothers[1](x2))); pylab.plot(x2, n(f2(x2)), linewidth=2) + pylab.show() + From scipy-svn at scipy.org Sun Dec 3 23:40:36 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 3 Dec 2006 22:40:36 -0600 (CST) Subject: [Scipy-svn] r2347 - trunk/Lib/sandbox/models Message-ID: <20061204044036.DCE9B39C0C5@new.scipy.org> Author: jonathan.taylor Date: 2006-12-03 22:40:31 -0600 (Sun, 03 Dec 2006) New Revision: 2347 Added: trunk/Lib/sandbox/models/bspline.py Removed: trunk/Lib/sandbox/models/bsplines.py Log: renmaed bsplines.py -> bspline.py Copied: trunk/Lib/sandbox/models/bspline.py (from rev 2310, trunk/Lib/sandbox/models/bsplines.py) =================================================================== --- trunk/Lib/sandbox/models/bsplines.py 2006-11-08 00:20:45 UTC (rev 2310) +++ trunk/Lib/sandbox/models/bspline.py 2006-12-04 04:40:31 UTC (rev 2347) @@ -0,0 +1,371 @@ + +import numpy as N +import numpy.linalg as L + +from scipy.optimize import golden +from scipy.sandbox.models import _bspline +from scipy.linalg import solveh_banded + +def _upper2lower(ub): + """ + Convert upper triangular banded matrix to lower banded form. + """ + + lb = N.zeros(ub.shape, ub.dtype) + nrow, ncol = ub.shape + for i in range(ub.shape[0]): + lb[i,0:(ncol-i)] = ub[nrow-1-i,i:ncol] + lb[i,(ncol-i):] = ub[nrow-1-i,0:i] + return lb + +def _lower2upper(lb): + """ + Convert upper triangular banded matrix to lower banded form. + """ + + ub = N.zeros(lb.shape, lb.dtype) + nrow, ncol = lb.shape + for i in range(lb.shape[0]): + ub[nrow-1-i,i:ncol] = lb[i,0:(ncol-i)] + ub[nrow-1-i,0:i] = lb[i,(ncol-i):] + return ub + +def _triangle2unit(tb, lower=0): + """ + Take a banded triangular matrix and return its diagonal and the unit matrix: + the banded triangular matrix with 1's on the diagonal. + """ + + if lower: d = tb[0].copy() + else: d = tb[-1].copy() + + if lower: return d, (tb / d) + else: + l = _upper2lower(tb) + return d, _lower2upper(l / d) + +def _trace_symbanded(a,b, lower=0): + """ + Compute the trace(a*b) for two upper or lower banded real symmetric matrices. + """ + + if lower: + t = _zero_triband(a * b, lower=1) + return t[0].sum() + 2 * t[1:].sum() + else: + t = _zero_triband(a * b, lower=0) + return t[-1].sum() + 2 * t[:-1].sum() + + +def _zero_triband(a, lower=0): + """ + Zero out unnecessary elements of a real symmetric banded matrix. + """ + + nrow, ncol = a.shape + if lower: + for i in range(nrow): a[i,(ncol-i):] = 0. + else: + for i in range(nrow): a[i,0:i] = 0. + return a + +def _zerofunc(x): + return N.zeros(x.shape, N.float) + + +class BSpline: + + """ + knots should be sorted, knots[0] is lower boundary, knots[1] is upper boundary + knots[1:-1] are internal knots + """ + + def __init__(self, knots, order=4, coef=None, M=None, eps=0.0): + knots = N.squeeze(N.unique(N.asarray(knots))) + + if knots.ndim != 1: + raise ValueError, 'expecting 1d array for knots' + + self.m = order + if M is None: + M = self.m + self.M = M +# if self.M < self.m: +# raise 'multiplicity of knots, M, must be at least equal to order, m' + + self.tau = N.hstack([[knots[0]-eps]*(self.M-1), knots, [knots[-1]+eps]*(self.M-1)]) + self.K = knots.shape[0] - 2 + if coef is None: + self.coef = N.zeros((self.K + 2 * self.M - self.m), N.float64) + else: + self.coef = N.squeeze(coef) + if self.coef.shape != (self.K + 2 * self.M - self.m): + raise ValueError, 'coefficients of Bspline have incorrect shape' + + def __call__(self, x): + b = N.asarray(self.basis(x)).T + return N.squeeze(N.dot(b, self.coef)) + + def basis_element(self, x, i, d=0): + x = N.asarray(x, N.float64) + _shape = x.shape + if _shape == (): + x.shape = (1,) + x.shape = (N.product(_shape,axis=0),) + if i < self.tau.shape[0] - 1: + ## TODO: OWNDATA flags... + v = _bspline.evaluate(x, self.tau, self.m, d, i, i+1) + else: + return N.zeros(x.shape, N.float64) + + if (i == self.tau.shape[0] - self.m): + v = N.where(N.equal(x, self.tau[-1]), 1, v) + v.shape = _shape + return v + + def basis(self, x, d=0, upper=None, lower=None): + x = N.asarray(x) + _shape = x.shape + if _shape == (): + x.shape = (1,) + x.shape = (N.product(_shape,axis=0),) + + if upper is None: + upper = self.tau.shape[0] - self.m + if lower is None: + lower = 0 + upper = min(upper, self.tau.shape[0] - self.m) + lower = max(0, lower) + + d = N.asarray(d) + if d.shape == (): + v = _bspline.evaluate(x, self.tau, self.m, int(d), lower, upper) + else: + if d.shape[0] != 2: + raise ValueError, "if d is not an integer, expecting a jx2 array with first row indicating order \ + of derivative, second row coefficient in front." + + v = 0 + for i in range(d.shape[1]): + v += d[1,i] * _bspline.evaluate(x, self.tau, self.m, d[0,i], lower, upper) + + v.shape = (upper-lower,) + _shape + if upper == self.tau.shape[0] - self.m: + v[-1] = N.where(N.equal(x, self.tau[-1]), 1, v[-1]) + return v + + def gram(self, d=0, full=False): + """ + Compute Gram inner product matrix. + """ + + d = N.squeeze(d) + if N.asarray(d).shape == (): + self.g = _bspline.gram(self.tau, self.m, int(d), int(d)) + else: + d = N.asarray(d) + if d.shape[0] != 2: + raise ValueError, "if d is not an integer, expecting a jx2 array with first row indicating order \ + of derivative, second row coefficient in front." + if d.shape == (2,): + d.shape = (2,1) + self.g = 0 + for i in range(d.shape[1]): + for j in range(d.shape[1]): + self.g += d[1,i]* d[1,j] * _bspline.gram(self.tau, self.m, int(d[0,i]), int(d[0,j])) + self.g = self.g.T + self.d = d + return N.nan_to_num(self.g) + +class SmoothingSpline(BSpline): + + penmax = 30. + method = "target_df" + target_df = 5 + default_pen = 1.0e-03 + + def smooth(self, y, x=None, weights=None): + if self.method == "target_df": + self.fit_target_df(y, x=x, weights=weights, df=self.target_df) + elif self.method == "optimize_gcv": + self.fit_optimize_gcv(y, x=x, weights=weights) + + def fit(self, y, x=None, weights=None, pen=0.): + banded = True + + if x is None: + x = self.tau[(self.M-1):-(self.M-1)] # internal knots + + if pen == 0.: # can't use cholesky for singular matrices + banded = False + + if x.shape != y.shape: + raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots' + + bt = self.basis(x) + if pen >= self.penmax: + pen = self.penmax + + + if weights is not None: + self.weights = weights + else: + self.weights = 1. + + _w = N.sqrt(self.weights) + bt *= _w + + # throw out rows with zeros (this happens at boundary points!) + + mask = N.flatnonzero(1 - N.alltrue(N.equal(bt, 0), axis=0)) + + bt = bt[:,mask] + y = y[mask] + + self.df_total = y.shape[0] + bty = N.dot(bt, _w * y) + self.N = y.shape[0] + if not banded: + self.btb = N.dot(bt, bt.T) + _g = band2array(self.g, lower=1, symmetric=True) + self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3] + self.rank = min(self.rank, self.btb.shape[0]) + else: + self.btb = N.zeros(self.g.shape, N.float64) + nband, nbasis = self.g.shape + for i in range(nbasis): + for k in range(min(nband, nbasis-i)): + self.btb[k,i] = (bt[i] * bt[i+k]).sum() + + bty.shape = (1,bty.shape[0]) + self.chol, self.coef = solveh_banded(self.btb + + pen*self.g, + bty, lower=1) + + self.coef = N.squeeze(self.coef) + self.resid = y * self.weights - N.dot(self.coef, bt) + self.pen = pen + + def gcv(self): + """ + Generalized cross-validation score of current fit. + """ + + norm_resid = (self.resid**2).sum() + return norm_resid / (self.df_total - self.trace()) + + def df_resid(self): + """ + self.N - self.trace() + + where self.N is the number of observations of last fit. + """ + + return self.N - self.trace() + + def df_fit(self): + """ + = self.trace() + + How many degrees of freedom used in the fit? + """ + return self.trace() + + def trace(self): + """ + Trace of the smoothing matrix S(pen) + """ + + if self.pen > 0: + _invband = _bspline.invband(self.chol.copy()) + tr = _trace_symbanded(_invband, self.btb, lower=1) + return tr + else: + return self.rank + + def fit_target_df(self, y, x=None, df=None, weights=None, tol=1.0e-03): + """ + Fit smoothing spline with approximately df degrees of freedom + used in the fit, i.e. so that self.trace() is approximately df. + + In general, df must be greater than the dimension of the null space + of the Gram inner product. For cubic smoothing splines, this means + that df > 2. + + """ + + df = df or self.target_df + + apen, bpen = 0, 1.0e-03 + olddf = y.shape[0] - self.m + + if hasattr(self, "pen"): + self.fit(y, x=x, weights=weights, pen=self.pen) + curdf = self.trace() + if N.fabs(curdf - df) / df < tol: + return + if curdf > df: + apen, bpen = self.pen, 2 * self.pen + else: + apen, bpen = 0., self.pen + + while True: + curpen = 0.5 * (apen + bpen) + self.fit(y, x=x, weights=weights, pen=curpen) + curdf = self.trace() + if curdf > df: + apen, bpen = curpen, 2 * curpen + else: + apen, bpen = apen, curpen + if apen >= self.penmax: + raise ValueError, "penalty too large, try setting penmax higher or decreasing df" + if N.fabs(curdf - df) / df < tol: + break + + def fit_optimize_gcv(self, y, x=None, weights=None, tol=1.0e-03, + bracket=(0,1.0e-03)): + """ + Fit smoothing spline trying to optimize GCV. + + Try to find a bracketing interval for scipy.optimize.golden + based on bracket. + + It is probably best to use target_df instead, as it is + sometimes difficult to find a bracketing interval. + + """ + + def _gcv(pen, y, x): + self.fit(y, x=x, pen=N.exp(pen)) + a = self.gcv() + return a + + a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol) + + +def band2array(a, lower=0, symmetric=False, hermitian=False): + """ + Take an upper or lower triangular banded matrix and return a matrix using + LAPACK storage convention. For testing banded Cholesky decomposition, etc. + """ + + n = a.shape[1] + r = a.shape[0] + _a = 0 + + if not lower: + for j in range(r): + _b = N.diag(a[r-1-j],k=j)[j:(n+j),j:(n+j)] + _a += _b + if symmetric and j > 0: _a += _b.T + elif hermitian and j > 0: _a += _b.conjugate().T + else: + for j in range(r): + _b = N.diag(a[j],k=j)[0:n,0:n] + _a += _b + if symmetric and j > 0: _a += _b.T + elif hermitian and j > 0: _a += _b.conjugate().T + _a = _a.T + + return _a + Deleted: trunk/Lib/sandbox/models/bsplines.py =================================================================== --- trunk/Lib/sandbox/models/bsplines.py 2006-12-04 04:37:49 UTC (rev 2346) +++ trunk/Lib/sandbox/models/bsplines.py 2006-12-04 04:40:31 UTC (rev 2347) @@ -1,281 +0,0 @@ -import numpy as N -import numpy.linalg as L -import scipy.integrate -from scipy.sandbox.models import _bspline - -# note to self: check out eig_banded! in linalg.decomp? - -def _zerofunc(x): - return N.zeros(x.shape, N.float) - -class BSpline: - - """ - knots should be sorted, knots[0] is lower boundary, knots[1] is upper boundary - knots[1:-1] are internal knots - """ - - def __init__(self, knots, order=4, coef=None, M=None, eps=0.0): - self.knots = N.squeeze(N.asarray(knots)) - - if self.knots.ndim != 1: - raise ValueError, 'expecting 1d array for knots' - - self.m = order - if M is None: - M = self.m - self.M = M - if self.M < self.m: - raise 'multiplicity of knots, M, must be at least equal to order, m' - - self.tau = N.hstack([[knots[0]-eps]*(self.M-1), knots, [knots[-1]+eps]*(self.M-1)]) - self.K = self.knots.shape[0] - 2 - if coef is None: - coef = N.zeros((self.K + 2 * self.M - self.m), N.float64) - else: - self.coef = N.squeeze(coef) - if self.coef.shape != (self.K + 2 * self.M - self.m): - raise ValueError, 'coefficients of Bspline have incorrect shape' - def __call__(self, x): - v = 0 - b = self.basis(x) - for i in range(self.coef.shape[0]): - v += b[i] * self.coef[i] - return v - - def basis_element(self, x, i, d=0): - x = N.asarray(x, N.float64) - _shape = x.shape - if _shape == (): - x.shape = (1,) - x.shape = (N.product(_shape,axis=0),) - if i < self.tau.shape[0] - 1: - ## TODO: OWNDATA flags... - v = _bspline.evaluate(x, self.tau, self.m, d, i, i+1) - else: - return N.zeros(x.shape, N.float64) - - if (i == self.tau.shape[0] - self.m): - v = N.where(N.equal(x, self.tau[-1]), 1, v) - v.shape = _shape - return v - - def basis(self, x, d=0, upper=None, lower=None): - x = N.asarray(x) - _shape = x.shape - if _shape == (): - x.shape = (1,) - x.shape = (N.product(_shape,axis=0),) - - if upper is None: - upper = self.tau.shape[0] - self.m - if lower is None: - lower = 0 - upper = min(upper, self.tau.shape[0] - self.m) - lower = max(0, lower) - - v = _bspline.evaluate(x, self.tau, self.m, d, lower, upper) - v.shape = (upper-lower,) + _shape - if upper == self.tau.shape[0] - self.m: - v[-1] = N.where(N.equal(x, self.tau[-1]), 1, v[-1]) - return v - -## x = N.asarray(x) -## if upper == None: -## upper = self.tau.shape[0]-1 -## if lower == None: -## lower = 0 -## lower = max(0, lower); upper = min(self.tau.shape[0]-1, upper) -## which = [lower, upper] -## which.sort() -## lower, upper = which - -## if m is None: -## m = self.m - -## if m == 1: -## nbasis = upper - lower -## v = N.zeros((nbasis,) + x.shape, N.float64) -## for i in range(nbasis): -## if self.tau[i+lower] == self.tau[i+lower+1]: -## v[i] = N.zeros(x.shape, N.float64) -## else: -## if d <= 0: -## v[i] = (N.greater_equal(x, self.tau[i+lower]) * -## N.less(x, self.tau[i+lower+1])) -## return v -## else: -## b = self.basis(x, d=d-1, m=m-1, lower=lower, -## upper=upper+1) -## nbasis = b.shape[0] - 1 - -## v = N.zeros((nbasis,) + x.shape, N.float64) - -## for i in range(nbasis): - -## if self.tau[i+lower+m-1] != self.tau[i+lower]: -## if d <= 0: -## f1 = (x - self.tau[i+lower]) / (self.tau[i+lower+m-1] - self.tau[i+lower]) -## else: -## f1 = (m-1) / (self.tau[i+lower+m-1] - self.tau[i+lower]) -## else: -## f1 = 0 - -## if self.tau[i+lower+m] != self.tau[i+lower+1]: -## if d <= 0: -## f2 = (self.tau[i+lower+m] - x) / (self.tau[i+lower+m] - self.tau[i+lower+1]) -## else: -## f2 = -(m-1) / (self.tau[i+lower+m] - self.tau[i+lower+1]) -## else: -## f2 = 0 - -## v[i] = f1*b[i] + f2*b[i+1] - - def gram(self, dl=0, dr=0, full=False): - """ - Approximate Gram inner product matrix using n - equally spaced sample points. - - """ - - self.g = N.nan_to_num(N.transpose(_bspline.gram(self.tau, self.m, dl, dr))) - return self.g - -class SmoothingSpline(BSpline): - - def fit(self, y, x=None, weights=None, pen=0., compute_gram=True): - banded = True - if x is None: - x = self.knots # internal knots - - if pen == 0.: # can't do pinv for singular matrices - banded = False - - if x.shape != y.shape: - raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots' - - bt = self.basis(x) - - if weights is not None: - bt *= N.sqrt(weights) - - # throw out rows with zeros (this happens at boundary points!) - - mask = N.flatnonzero(1 - N.alltrue(N.equal(bt, 0), axis=0)) - - bt = bt[:,mask] - y = y[mask] - - if compute_gram: - self.g = self.gram(dr=2, dl=2) - - if not banded: - btb = N.dot(bt, N.transpose(bt)) - else: - btb = N.zeros(self.g.shape, N.float64) - nband, nbasis = self.g.shape - for i in range(nbasis): - for k in range(nband): - j = i + self.m - 1 - k - if j >= 0 and j < nbasis: - btb[k,i] = (bt[i] * bt[j]).sum() - btb[nband-1-k,i] = btb[k,i] - - bty = N.dot(bt, y) - if not banded: - self.coef, r, self.rank = L.lstsq(btb + pen*self.g, bty)[0:3] - else: - self.coef = scipy.linalg.solve_banded((self.m-1,)*2, - btb + pen*self.g, - bty) - - -## s = BSpline(N.linspace(0,1,11), order=4) -## x = N.linspace(0,1,5001) -## y = s.basis(x, d=2) -## import pylab -## print s.tau -## def f(x): -## return s.basis_element(x,6) * s.basis_element(x, 8) -## print scipy.integrate.romberg(f, 0, 1), 'integral' - -## pylab.plot(x, y[6]*y[8]) -## pylab.show() - - -import pylab -import time, gc -toc = time.time() -for i in range(1000): - s = SmoothingSpline(N.linspace(0,1,51), order=4, M=4) - f = s.gram(dr=2, dl=2) - -gc.collect() -tic = time.time() -print (tic-toc) / 1000 - -## reader = csv.reader(file('/home/jtaylo/Desktop/bspline.csv')) -## v = [] -## for row in reader: -## v.append([float(x) for x in row]) -## v = N.array(v) - -import numpy.random as R -## import pylab - -y = N.arange(51) + 10 * R.standard_normal((51,)) -x = N.linspace(0,1,51) #s.knots[1:-1] -toc = time.time() -#s.fit(y, x=x, pen=1000.0, compute_gram=True) - -y[-1] = 150. -for i in range(10): - s.fit(y, x=x, pen=1.0e-20, compute_gram=False) -tic = time.time() -print (tic-toc) / 10 - -pylab.plot(x, y, 'bo') -x = N.linspace(-0.1,1.1,501) -pylab.plot(x, s(x), 'r') -pylab.show() - -## print N.allclose(y, s(x)), N.add.reduce((y-s(x))**2) -## m = 0 -## x = N.linspace(0,1,101) -## b = s.basis(x) -## X = []; Y=[] - -## def itergrad(x, delta=1, n=1): -## if n > 1: -## z = N.gradient(x, delta) -## return itergrad(z, delta=delta,n=n-1) -## else: -## return N.gradient(x, delta) - -## d = 3 -## for i in [0]: -## #x = N.linspace(s.tau[i] + 0.01, s.tau[i+3] - 0.01, 1253) -## x = N.linspace(0, 1, 1001) -## db = s.basis(x,d=d) -## b = s.basis(x) -## z = itergrad(b[i], n=d, delta=x[1]-x[0]) -## X += [db[i]] -## Y += [z] -## pylab.plot(x, db[i]) -## #pylab.plot(x, z, 'ro') -## # pylab.show() - -## X = N.hstack(X); Y=N.hstack(Y) - -## g = s.gram(dleft=2,dright=2) -## ## x = N.linspace(0,1,1000) -## ## ss = s.basis(x) -## ## G = N.zeros((g.shape[0],)*2, N.float64) -## ## for i in range(g.shape[0]): -## ## print G.shape -## ## for j in range(g.shape[0]): -## ## G[i,j] = scipy.trapz(ss[i]*ss[j],x=x) - -## ## print g.shape -## ## print N.corrcoef(X,Y) - From scipy-svn at scipy.org Sun Dec 3 23:41:52 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 3 Dec 2006 22:41:52 -0600 (CST) Subject: [Scipy-svn] r2348 - trunk/Lib/sandbox/models Message-ID: <20061204044152.E914239C0C5@new.scipy.org> Author: jonathan.taylor Date: 2006-12-03 22:41:48 -0600 (Sun, 03 Dec 2006) New Revision: 2348 Modified: trunk/Lib/sandbox/models/bspline_module.py Log: added function for trace of inverse banded * banded matrix -- used in calculating GCV for SmoothingSpline Modified: trunk/Lib/sandbox/models/bspline_module.py =================================================================== --- trunk/Lib/sandbox/models/bspline_module.py 2006-12-04 04:40:31 UTC (rev 2347) +++ trunk/Lib/sandbox/models/bspline_module.py 2006-12-04 04:41:48 UTC (rev 2348) @@ -2,7 +2,7 @@ from scipy.weave import ext_tools import scipy.special.orthogonal -def build_bspline_module(): +def setup_bspline_module(): """ Builds an extension module with Bspline basis calculators using weave. @@ -147,7 +147,7 @@ mod.add_function(bspline_eval) bspline_eval.customize.add_support_code(eval_code) - nq = 5 + nq = 18 qx, qw = scipy.special.orthogonal.p_roots(nq) dl = dr = 2 @@ -184,6 +184,7 @@ double bspline_quad(double *knots, int nknots, int m, int l, int r, int dl, int dr) + /* This is based on scipy.integrate.fixed_quad */ { double *y; double qx[%(nq)d]={%(qx)s}; @@ -196,23 +197,14 @@ result = 0; - if (l <= r) { - lower = l - m / 2 - 1; - upper = r + m / 2 + 1; - } - else { - lower = r - m / 2 - 1; - upper = l + m / 2 + 1; - } + /* TO DO: figure out knot span more efficiently */ - if (lower < m) { - lower = m - 1; - } - if (upper > nknots-m-1) { - upper = nknots-m; - } - - for (k=lower; k<=upper; k++) { + lower = l - m - 1; + if (lower < 0) { lower = 0;} + upper = lower + 2 * m + 4; + if (upper > nknots - 1) {upper = nknots-1;} +/* upper = nknots - m; */ + for (k=lower; k 0) { data[j*n+i] = 0;} + } + } + + for (i=n-1; i>=0; i--) { + for (j=1; j <= (mdata; + invband_compute(&data, L, NL[1], NL[0]-1); + + return_val = (PyObject *) invband; + + ''' + + invband = ext_tools.ext_function('invband', + invband_ext_code, + ['L']) + invband.customize.add_support_code(invband_support_code) + mod.add_function(invband) + + return mod + +mod = setup_bspline_module() + +def build_bspline_module(): mod.compile() -try: - import _bspline -except ImportError: - build_bspline_module() - import _bspline +# try: +# import _bspline +# except ImportError: +# build_bspline_module() +# import _bspline ## if __name__ == '__main__': ## knots = N.hstack([[0]*3, N.linspace(0,1,11).astype(N.float64), [1]*3]) From scipy-svn at scipy.org Sun Dec 3 23:42:56 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 3 Dec 2006 22:42:56 -0600 (CST) Subject: [Scipy-svn] r2349 - trunk/Lib/sandbox/models Message-ID: <20061204044256.E75E739C0C5@new.scipy.org> Author: jonathan.taylor Date: 2006-12-03 22:42:51 -0600 (Sun, 03 Dec 2006) New Revision: 2349 Modified: trunk/Lib/sandbox/models/setup.py Log: setup changed to install _bspline in the relevant site_packages tree Modified: trunk/Lib/sandbox/models/setup.py =================================================================== --- trunk/Lib/sandbox/models/setup.py 2006-12-04 04:41:48 UTC (rev 2348) +++ trunk/Lib/sandbox/models/setup.py 2006-12-04 04:42:51 UTC (rev 2349) @@ -1,3 +1,4 @@ + def configuration(parent_package='',top_path=None, package_name='models'): from numpy.distutils.misc_util import Configuration config = Configuration(package_name,parent_package,top_path) @@ -6,9 +7,20 @@ config.add_data_dir('tests') - + try: + from bspline_module import mod + n, s, d = weave_ext(mod) + config.add_extension(n, s, **d) + except ImportError: pass + return config +def weave_ext(mod): + d = mod.setup_extension().__dict__ + n = d['name']; del(d['name']) + s = d['sources']; del(d['sources']) + return n, s, d + if __name__ == '__main__': from numpy.distutils.core import setup From scipy-svn at scipy.org Sun Dec 3 23:44:00 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 3 Dec 2006 22:44:00 -0600 (CST) Subject: [Scipy-svn] r2350 - trunk/Lib/sandbox/models/family Message-ID: <20061204044400.196E939C031@new.scipy.org> Author: jonathan.taylor Date: 2006-12-03 22:43:50 -0600 (Sun, 03 Dec 2006) New Revision: 2350 Modified: trunk/Lib/sandbox/models/family/family.py Log: minor changes to family.py Modified: trunk/Lib/sandbox/models/family/family.py =================================================================== --- trunk/Lib/sandbox/models/family/family.py 2006-12-04 04:42:51 UTC (rev 2349) +++ trunk/Lib/sandbox/models/family/family.py 2006-12-04 04:43:50 UTC (rev 2350) @@ -6,12 +6,15 @@ valid = [-N.inf, N.inf] + tol = 1.0e-05 + def __init__(self, link, variance): self.link = link self.variance = variance def weights(self, mu): - """ + + """ Weights for IRLS step. """ From scipy-svn at scipy.org Sun Dec 3 23:46:46 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 3 Dec 2006 22:46:46 -0600 (CST) Subject: [Scipy-svn] r2351 - in trunk/Lib/sandbox/models: . tests Message-ID: <20061204044646.2F71539C02D@new.scipy.org> Author: jonathan.taylor Date: 2006-12-03 22:46:41 -0600 (Sun, 03 Dec 2006) New Revision: 2351 Modified: trunk/Lib/sandbox/models/__init__.py trunk/Lib/sandbox/models/cox.py trunk/Lib/sandbox/models/formula.py trunk/Lib/sandbox/models/glm.py trunk/Lib/sandbox/models/model.py trunk/Lib/sandbox/models/tests/test_formula.py trunk/Lib/sandbox/models/tests/test_glm.py Log: other changes to scipy.sandbox.models -- still in progress! Modified: trunk/Lib/sandbox/models/__init__.py =================================================================== --- trunk/Lib/sandbox/models/__init__.py 2006-12-04 04:43:50 UTC (rev 2350) +++ trunk/Lib/sandbox/models/__init__.py 2006-12-04 04:46:41 UTC (rev 2351) @@ -6,11 +6,5 @@ from glm import model as glm from rlm import model as rlm - -import unittest -def suite(): - return unittest.TestSuite([tests.suite()]) - - -from numpy.testing import ScipyTest -test = ScipyTest().test +from numpy.testing import NumpyTest +test = NumpyTest().test Modified: trunk/Lib/sandbox/models/cox.py =================================================================== --- trunk/Lib/sandbox/models/cox.py 2006-12-04 04:43:50 UTC (rev 2350) +++ trunk/Lib/sandbox/models/cox.py 2006-12-04 04:46:41 UTC (rev 2351) @@ -209,4 +209,4 @@ c = coxph(subjects, f) c.cache() - c.newton([0.4]) +# c.newton([0.4]) Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-12-04 04:43:50 UTC (rev 2350) +++ trunk/Lib/sandbox/models/formula.py 2006-12-04 04:46:41 UTC (rev 2351) @@ -266,7 +266,7 @@ def __call__(self, *args, **kw): """ A quantitative is just like term, except there is an additional - transformation: self.transfrom. + transformation: self.transform. """ return self.transform(term.__call__(self, *args, **kw)) @@ -374,16 +374,20 @@ allvals.shape = (1,) + allvals.shape return allvals - def hasterm(self, term): + def hasterm(self, query_term): """ Determine whether a given term is in a formula. """ - if not isinstance(term, formula): - return term.termname in self.termnames() - elif len(term.terms) == 1: - term = term.terms[0] - return term.termname in self.termnames() + if not isinstance(query_term, formula): + if type(query_term) == type("name"): + try: query = self[query_term] + except: return False + elif isinstance(query_term, term): + return query_term.termname in self.termnames() + elif len(query_term.terms) == 1: + query_term = query_term.terms[0] + return query_term.termname in self.termnames() else: raise ValueError, 'more than one term passed to hasterm' @@ -394,14 +398,14 @@ else: raise KeyError, 'formula has no such term: %s' % repr(name) - def termcolumns(self, term, dict=False): + def termcolumns(self, query_term, dict=False): """ Return a list of the indices of all columns associated to a given term. """ - if self.hasterm(term): - names = term.names() + if self.hasterm(query_term): + names = query_term.names() value = {} for name in names: value[name] = self._names.index(name) Modified: trunk/Lib/sandbox/models/glm.py =================================================================== --- trunk/Lib/sandbox/models/glm.py 2006-12-04 04:43:50 UTC (rev 2350) +++ trunk/Lib/sandbox/models/glm.py 2006-12-04 04:46:41 UTC (rev 2351) @@ -28,23 +28,25 @@ if Y is None: Y = self.Y return self.family.deviance(Y, results.mu) / scale - def next(self, results, Y): + def next(self): + results = self.results; Y = self.Y self.weights = self.family.weights(results.mu) self.initialize(self.design) Z = results.predict + self.family.link.deriv(results.mu) * (Y - results.mu) newresults = wls_model.fit(self, Z) + newresults.Y = Y newresults.mu = self.family.link.inverse(newresults.predict) self.iter += 1 return newresults - def cont(self, results, tol=1.0e-05): + def cont(self, tol=1.0e-05): """ Continue iterating, or has convergence been obtained? """ if self.iter >= model.niter: return False - curdev = self.deviance(results=results) + curdev = self.deviance(results=self.results) if N.fabs((self.dev - curdev) / curdev) < tol: return False @@ -72,7 +74,7 @@ self.scale = self.results.scale = self.estimate_scale() while self.cont(self.results): - self.results = self.next(self.results, Y) + self.results = self.next() self.scale = self.results.scale = self.estimate_scale() return self.results Modified: trunk/Lib/sandbox/models/model.py =================================================================== --- trunk/Lib/sandbox/models/model.py 2006-12-04 04:43:50 UTC (rev 2350) +++ trunk/Lib/sandbox/models/model.py 2006-12-04 04:46:41 UTC (rev 2351) @@ -64,9 +64,10 @@ raise NotImplementedError def newton(self, theta): - def f(theta): - return -self.logL(theta) - self.results = scipy.optimize.fmin(f, theta) + raise NotImplementedError +# def f(theta): +# return -self.logL(theta) +# self.results = scipy.optimize.fmin(f, theta) class LikelihoodModelResults: Modified: trunk/Lib/sandbox/models/tests/test_formula.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-04 04:43:50 UTC (rev 2350) +++ trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-04 04:46:41 UTC (rev 2351) @@ -17,6 +17,7 @@ self.assertRaises(ValueError, formula.term, "name", termname=0) + def test_str(self): t = formula.term("name") s = str(t) @@ -58,6 +59,43 @@ self.formula += self.terms[i] self.formula.namespace = self.namespace + def test_namespace(self): + space1 = {'X':N.arange(50), 'Y':N.arange(50)*2} + space2 = {'X':N.arange(20), 'Y':N.arange(20)*2} + X = formula.term('X') + Y = formula.term('Y') + + X.namespace = space1 + assert_almost_equal(X(), N.arange(50)) + + Y.namespace = space2 + assert_almost_equal(Y(), N.arange(20)*2) + + f = X + Y + + f.namespace = space1 + self.assertEqual(f().shape, (2,50)) + assert_almost_equal(Y(), N.arange(50)*2) + assert_almost_equal(X(), N.arange(50)) + + f.namespace = space2 + self.assertEqual(f().shape, (2,20)) + assert_almost_equal(Y(), N.arange(20)*2) + assert_almost_equal(X(), N.arange(20)) + + + def test_termcolumns(self): + t1 = formula.term("A") + t2 = formula.term("B") + f = t1 + t2 + t1 * t2 + def other(val): + return N.array([3.2*val,4.342*val**2, 5.234*val**3]) + q = formula.quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other) + f += q + q.namespace = f.namespace = self.formula.namespace + assert_almost_equal(q(), f()[f.termcolumns(q)]) + + def test_str(self): s = str(self.formula) @@ -133,7 +171,7 @@ def test_power(self): t = self.terms[2] - t2 = formula.quantitative('t', func=t)**2 + t2 = t**2 t.namespace = t2.namespace = self.formula.namespace assert_almost_equal(t()**2, t2()) Modified: trunk/Lib/sandbox/models/tests/test_glm.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_glm.py 2006-12-04 04:43:50 UTC (rev 2350) +++ trunk/Lib/sandbox/models/tests/test_glm.py 2006-12-04 04:46:41 UTC (rev 2351) @@ -7,9 +7,8 @@ W = R.standard_normal +class test_Regression(NumpyTestCase): -class test_Regression(ScipyTestCase): - def check_Logistic(self): X = W((40,10)) Y = N.greater(W((40,)), 0) @@ -27,10 +26,6 @@ results = cmodel.fit(Y) self.assertEquals(results.df_resid, 31) - -def suite(): - suite = unittest.makeSuite(RegressionTest) - return suite if __name__ == "__main__": - ScipyTest().run() + NumpyTest().run() From scipy-svn at scipy.org Tue Dec 5 14:32:08 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 5 Dec 2006 13:32:08 -0600 (CST) Subject: [Scipy-svn] r2352 - trunk/Lib/sparse Message-ID: <20061205193208.A8BAC39C22D@new.scipy.org> Author: edschofield Date: 2006-12-05 13:32:01 -0600 (Tue, 05 Dec 2006) New Revision: 2352 Modified: trunk/Lib/sparse/sparse.py Log: Applied Neilen Marais's patch for non-machine-size int indices in sparse.lil_matrix (ticket #307) Modified: trunk/Lib/sparse/sparse.py =================================================================== --- trunk/Lib/sparse/sparse.py 2006-12-04 04:46:41 UTC (rev 2351) +++ trunk/Lib/sparse/sparse.py 2006-12-05 19:32:01 UTC (rev 2352) @@ -2445,6 +2445,7 @@ raise IndexError, "lil_matrix supports slices only of a single row" # TODO: add support for this, like in __setitem__ elif isintlike(i): + i = int(i) # Python list indices must be machine-sized ints if not (i>=0 and i=0 and j=0 and i=0 and j Author: oliphant Date: 2006-12-05 13:50:48 -0600 (Tue, 05 Dec 2006) New Revision: 2353 Modified: trunk/Lib/stats/continuous.lyx trunk/Lib/stats/morestats.py Log: Add a link to a paper which explains the bayes_mvs code. Modified: trunk/Lib/stats/continuous.lyx =================================================================== --- trunk/Lib/stats/continuous.lyx 2006-12-05 19:32:01 UTC (rev 2352) +++ trunk/Lib/stats/continuous.lyx 2006-12-05 19:50:48 UTC (rev 2353) @@ -693,7 +693,7 @@ Median and mode \layout Standard -The mean, +The median, \begin_inset Formula $m_{n}$ \end_inset Modified: trunk/Lib/stats/morestats.py =================================================================== --- trunk/Lib/stats/morestats.py 2006-12-05 19:32:01 UTC (rev 2352) +++ trunk/Lib/stats/morestats.py 2006-12-05 19:50:48 UTC (rev 2353) @@ -40,6 +40,11 @@ ### Bayesian confidence intervals for mean, variance, std ########################################################## +## See the paper "A Bayesian perspective on estimating +## mean, variance, and standard-deviation from data +## at http://dspace.byu.edu/bitstream/1877/438/1/bayes_mvs.pdf +## (Permanent link at http://hdl.handle.net/1877/438 ) + # assume distributions are gaussian with given means and variances. def _gauss_mvs(x, n, alpha): xbar = x.mean() From scipy-svn at scipy.org Tue Dec 5 14:51:23 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 5 Dec 2006 13:51:23 -0600 (CST) Subject: [Scipy-svn] r2354 - trunk/Lib/stats Message-ID: <20061205195123.2EA5C39C00C@new.scipy.org> Author: oliphant Date: 2006-12-05 13:51:20 -0600 (Tue, 05 Dec 2006) New Revision: 2354 Modified: trunk/Lib/stats/morestats.py Log: Add a link to a paper which explains the bayes_mvs code. Modified: trunk/Lib/stats/morestats.py =================================================================== --- trunk/Lib/stats/morestats.py 2006-12-05 19:50:48 UTC (rev 2353) +++ trunk/Lib/stats/morestats.py 2006-12-05 19:51:20 UTC (rev 2354) @@ -41,7 +41,8 @@ ########################################################## ## See the paper "A Bayesian perspective on estimating -## mean, variance, and standard-deviation from data +## mean, variance, and standard-deviation from data" by +## Travis E. Oliphant ## at http://dspace.byu.edu/bitstream/1877/438/1/bayes_mvs.pdf ## (Permanent link at http://hdl.handle.net/1877/438 ) From scipy-svn at scipy.org Tue Dec 5 20:59:33 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 5 Dec 2006 19:59:33 -0600 (CST) Subject: [Scipy-svn] r2355 - branches Message-ID: <20061206015933.344F339C2CE@new.scipy.org> Author: cdavid Date: 2006-12-05 19:59:24 -0600 (Tue, 05 Dec 2006) New Revision: 2355 Added: branches/cdavid_pyem_axis_support/ Log: * branch scipy to add axis like support to pyem and cluster packages Copied: branches/cdavid_pyem_axis_support (from rev 2354, trunk) From scipy-svn at scipy.org Wed Dec 6 04:22:40 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 6 Dec 2006 03:22:40 -0600 (CST) Subject: [Scipy-svn] r2356 - trunk/Lib/io/tests Message-ID: <20061206092240.D836039C1E5@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-12-06 03:22:34 -0600 (Wed, 06 Dec 2006) New Revision: 2356 Modified: trunk/Lib/io/tests/test_recaster.py Log: Fixed unit test that fails on 32bit Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-12-06 01:59:24 UTC (rev 2355) +++ trunk/Lib/io/tests/test_recaster.py 2006-12-06 09:22:34 UTC (rev 2356) @@ -52,7 +52,7 @@ tdtsz = N.dtype(T).itemsize ok_T = T in R.sctype_list expect_none = ((req_type is None) or - ((tdtsz < rdtsz) and not ok_T)) + ((tdtsz <= rdtsz) and not ok_T)) A = N.array(value, T) C = R.smallest_same_kind(A) if expect_none: From scipy-svn at scipy.org Wed Dec 6 07:34:37 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 6 Dec 2006 06:34:37 -0600 (CST) Subject: [Scipy-svn] r2357 - in trunk/Lib/sandbox/pyem: . src tests Message-ID: <20061206123437.157D239C0A9@new.scipy.org> Author: cdavid Date: 2006-12-06 06:27:51 -0600 (Wed, 06 Dec 2006) New Revision: 2357 Added: trunk/Lib/sandbox/pyem/densities2.py trunk/Lib/sandbox/pyem/src/pure_den.c Modified: trunk/Lib/sandbox/pyem/ trunk/Lib/sandbox/pyem/online_em.py trunk/Lib/sandbox/pyem/setup.py trunk/Lib/sandbox/pyem/src/Makefile trunk/Lib/sandbox/pyem/tests/test_online_em.py Log: Add densities2.py for preliminary axis support and add specialized class for online EM in 1d for a 10-100x speed incread Property changes on: trunk/Lib/sandbox/pyem ___________________________________________________________________ Name: svn:ignore - *.pyc *.swp *.pyd *.so + *.pyc *.swp *.pyd *.so *.prof Added: trunk/Lib/sandbox/pyem/densities2.py =================================================================== --- trunk/Lib/sandbox/pyem/densities2.py 2006-12-06 09:22:34 UTC (rev 2356) +++ trunk/Lib/sandbox/pyem/densities2.py 2006-12-06 12:27:51 UTC (rev 2357) @@ -0,0 +1,267 @@ +#! /usr/bin/python +# +# Copyrighted David Cournapeau +# Last Change: Wed Dec 06 09:00 PM 2006 J + +# New version, with default numpy ordering. + +import numpy as N +import numpy.linalg as lin +from numpy.random import randn +from scipy.stats import chi2 + +# Error classes +class DenError(Exception): + """Base class for exceptions in this module. + + Attributes: + expression -- input expression in which the error occurred + message -- explanation of the error""" + def __init__(self, message): + self.message = message + + def __str__(self): + return self.message + +#============ +# Public API +#============ +# The following function do all the fancy stuff to check that parameters +# are Ok, and call the right implementation if args are OK. +def gauss_den(x, mu, va, log = False, axis = -1): + """ Compute multivariate Gaussian density at points x for + mean mu and variance va along specified axis: + + requirements: + * mean must be rank 0 (1d) or rank 1 (multi variate gaussian) + * va must be rank 0 (1d), rank 1(multi variate, diag covariance) or rank 2 + (multivariate, full covariance). + * in 1 dimension case, any rank for mean and va is ok, as long as their size + is 1 (eg they contain only 1 element) + + Caution: if x is rank 1, it is assumed you have a 1d problem. You cannot compute + the gaussian densities of only one sample of dimension d; for this, you have + to use a rank 2 ! + + If log is True, than the log density is returned + (useful for underflow ?)""" + + # If data is rank 1, then we have 1 dimension problem. + if x.ndim == 1: + d = 1 + n = x.size + if not N.size(mu) == 1: + raise DenError("for 1 dimension problem, mean must have only one element") + + if not N.size(va) == 1: + raise DenError("for 1 dimension problem, mean must have only one element") + + return _scalar_gauss_den(x, mu, va, log) + # If data is rank 2, then we may have 1 dimension or multi-variate problem + elif x.ndim == 2: + oaxis = (axis + 1) % 2 + n = x.shape[axis] + d = x.shape[oaxis] + + # Get away with 1d case now + if d == 1: + return _scalar_gauss_den(x, mu, va, log) + + # Now, d > 1 (numpy attributes should be valid on mean and va now) + if not N.size(mu) == d or not mu.ndim == 1: + raise DenError("data is %d dimension, but mean's shape is %s" \ + % (d, N.shape(mu)) + " (should be (%d,))" % d) + + isfull = (va.ndim == 2) + if not (N.size(va) == d or (isfull and va.shape[0] == va.shape[1] == d)): + raise DenError("va has an invalid shape or number of elements") + + if isfull: + # Compute along rows + if oaxis == 0: + return _full_gauss_den(x, mu[:, N.newaxis], va, log, axis) + else: + return _full_gauss_den(x, mu, va, log, axis) + else: + return _diag_gauss_den(x, mu, va, log, axis) + else: + raise RuntimeError("Sorry, only rank up to 2 supported") + +# To plot a confidence ellipse from multi-variate gaussian pdf +def gauss_ell(mu, va, dim = [0, 1], npoints = 100, level = 0.39): + """ Given a mean and covariance for multi-variate + gaussian, returns npoints points for the ellipse + of confidence given by level (all points will be inside + the ellipsoides with a probability equal to level) + + Returns the coordinate x and y of the ellipse""" + + c = N.array(dim) + + if mu.size < 2: + raise RuntimeError("this function only make sense for dimension 2 and more") + + if mu.size == va.size: + mode = 'diag' + else: + if va.ndim == 2: + if va.shape[0] == va.shape[1]: + mode = 'full' + else: + raise DenError("variance not square") + else: + raise DenError("mean and variance are not dim conformant") + + # If X ~ N(mu, va), then [X` * va^(-1/2) * X] ~ Chi2 + chi22d = chi2(2) + mahal = N.sqrt(chi22d.ppf(level)) + + # Generates a circle of npoints + theta = N.linspace(0, 2 * N.pi, npoints) + circle = mahal * N.array([N.cos(theta), N.sin(theta)]) + + # Get the dimension which we are interested in: + mu = mu[dim] + if mode == 'diag': + va = va[dim] + elps = N.outer(mu, N.ones(npoints)) + elps += N.dot(N.diag(N.sqrt(va)), circle) + elif mode == 'full': + va = va[c,:][:,c] + # Method: compute the cholesky decomp of each cov matrix, that is + # compute cova such as va = cova * cova' + # WARN: scipy is different than matlab here, as scipy computes a lower + # triangular cholesky decomp: + # - va = cova * cova' (scipy) + # - va = cova' * cova (matlab) + # So take care when comparing results with matlab ! + cova = lin.cholesky(va) + elps = N.outer(mu, N.ones(npoints)) + elps += N.dot(cova, circle) + else: + raise DenParam("var mode not recognized") + + return elps[0, :], elps[1, :] + +#============= +# Private Api +#============= +# Those 3 functions do almost all the actual computation +def _scalar_gauss_den(x, mu, va, log): + """ This function is the actual implementation + of gaussian pdf in scalar case. It assumes all args + are conformant, so it should not be used directly + + Call gauss_den instead""" + inva = 1/va + fac = (2*N.pi) ** (-1/2.0) * N.sqrt(inva) + y = ((x-mu) ** 2) * -0.5 * inva + if not log: + y = fac * N.exp(y.ravel()) + else: + y = y + log(fac) + + return y + +def _diag_gauss_den(x, mu, va, log, axis): + """ This function is the actual implementation + of gaussian pdf in scalar case. It assumes all args + are conformant, so it should not be used directly + + Call gauss_den instead""" + # Diagonal matrix case + d = mu.size + if axis % 2 == 0: + x = N.swapaxes(x, 0, 1) + + if not log: + inva = 1/va[0] + fac = (2*N.pi) ** (-d/2.0) * N.sqrt(inva) + y = (x[0] - mu[0]) ** 2 * inva * -0.5 + for i in range(1, d): + inva = 1/va[i] + fac *= N.sqrt(inva) + y += (x[i] - mu[i]) ** 2 * inva * -0.5 + y = fac * N.exp(y) + else: + y = _scalar_gauss_den(x[0], mu[0], va[0], log) + for i in range(1, d): + y += _scalar_gauss_den(x[i], mu[i], va[i], log) + + return y + +def _full_gauss_den(x, mu, va, log, axis): + """ This function is the actual implementation + of gaussian pdf in full matrix case. + + It assumes all args are conformant, so it should + not be used directly Call gauss_den instead + + Does not check if va is definite positive (on inversible + for that matter), so the inverse computation and/or determinant + would throw an exception.""" + d = mu.size + inva = lin.inv(va) + fac = 1 / N.sqrt( (2*N.pi) ** d * N.fabs(lin.det(va))) + + # # Slow version (does not work since version 0.6) + # n = N.size(x, 0) + # y = N.zeros(n) + # for i in range(n): + # y[i] = N.dot(x[i,:], + # N.dot(inva, N.transpose(x[i,:]))) + # y *= -0.5 + + # we are using a trick with sum to "emulate" + # the matrix multiplication inva * x without any explicit loop + if axis % 2 == 1: + y = N.dot(inva, (x-mu)) + y = -0.5 * N.sum(y * (x-mu), 0) + else: + y = N.dot((x-mu), inva) + y = -0.5 * N.sum(y * (x-mu), 1) + + if not log: + y = fac * N.exp(y) + else: + y = y + N.log(fac) + + return y + +if __name__ == "__main__": + import pylab + + #========================================= + # Test plotting a simple diag 2d variance: + #========================================= + va = N.array([5, 3]) + mu = N.array([2, 3]) + + # Generate a multivariate gaussian of mean mu and covariance va + X = randn(2, 1e3) + Yc = N.dot(N.diag(N.sqrt(va)), X) + Yc = Yc.transpose() + mu + + # Plotting + Xe, Ye = gauss_ell(mu, va, npoints = 100) + pylab.figure() + pylab.plot(Yc[:, 0], Yc[:, 1], '.') + pylab.plot(Xe, Ye, 'r') + + #========================================= + # Test plotting a simple full 2d variance: + #========================================= + va = N.array([[0.2, 0.1],[0.1, 0.5]]) + mu = N.array([0, 3]) + + # Generate a multivariate gaussian of mean mu and covariance va + X = randn(1e3, 2) + Yc = N.dot(lin.cholesky(va), X.transpose()) + Yc = Yc.transpose() + mu + + # Plotting + Xe, Ye = gauss_ell(mu, va, npoints = 100, level=0.95) + pylab.figure() + pylab.plot(Yc[:, 0], Yc[:, 1], '.') + pylab.plot(Xe, Ye, 'r') + pylab.show() Modified: trunk/Lib/sandbox/pyem/online_em.py =================================================================== --- trunk/Lib/sandbox/pyem/online_em.py 2006-12-06 09:22:34 UTC (rev 2356) +++ trunk/Lib/sandbox/pyem/online_em.py 2006-12-06 12:27:51 UTC (rev 2357) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Mon Oct 23 07:00 PM 2006 J +# Last Change: Wed Dec 06 09:00 PM 2006 J #--------------------------------------------- # This is not meant to be used yet !!!! I am @@ -21,9 +21,10 @@ from numpy import mean from numpy.testing import assert_array_almost_equal, assert_array_equal -from gmm_em import ExpMixtureModel, GMM, EM, multiple_gauss_den +from gmm_em import ExpMixtureModel, GMM, EM from gauss_mix import GM from kmean import kmean +import densities2 as D import copy from numpy.random import seed @@ -50,7 +51,7 @@ return self.message class OnGMM(ExpMixtureModel): - """A Class for 'online' (ie recursive) EM. Insteand + """A Class for 'online' (ie recursive) EM. Instead of running the E step on the whole data, the sufficient statistics are updated for each new frame of data, and used in the (unchanged) M step""" @@ -135,14 +136,14 @@ self.init = init_methods[init] - def compute_sufficient_statistics(self, frame, nu): - """ sufficient_statistics(frame, nu) + def compute_sufficient_statistics_frame(self, frame, nu): + """ sufficient_statistics(frame, nu) for one frame of data - frame has to be rank 2 !""" - gamma = multiple_gauss_den(frame, self.pmu, self.pva)[0] + frame has to be rank 1 !""" + gamma = multiple_gauss_den_frame(frame, self.pmu, self.pva) gamma *= self.pw gamma /= N.sum(gamma) - # <1>(t) = cw(t), each column is one component cw = (cw1, ..., cwK); + # <1>(t) = cw(t), self.cw = cw(t), each element is one component running weight #self.cw = (1 - nu) * self.cw + nu * gamma self.cw *= (1 - nu) self.cw += nu * gamma @@ -151,16 +152,136 @@ self.cx[k] = (1 - nu) * self.cx[k] + nu * frame * gamma[k] self.cxx[k] = (1 - nu) * self.cxx[k] + nu * frame ** 2 * gamma[k] - def update_em(self): + def update_em_frame(self): for k in range(self.gm.k): self.cmu[k] = self.cx[k] / self.cw[k] self.cva[k] = self.cxx[k] / self.cw[k] - self.cmu[k] ** 2 +import _rawden + +class OnGMM1d(ExpMixtureModel): + """Special purpose case optimized for 1d dimensional cases. + + Require each frame to be a float, which means the API is a bit + different than OnGMM. You are trading elegance for speed here !""" + def init_kmean(self, init_data, niter = 5): + """ Init the model using kmean.""" + assert init_data.ndim == 1 + k = self.gm.k + w = N.ones(k) / k + + # Init the internal state of EM + self.cx = w * mean(init_data) + self.cxx = w * mean(init_data ** 2) + + # w, mu and va init is the same that in the standard case + (code, label) = kmean(init_data[:, N.newaxis], \ + init_data[0:k, N.newaxis], niter) + mu = code.copy() + va = N.zeros((k, 1)) + for i in range(k): + va[i] = N.cov(init_data[N.where(label==i)], rowvar = 0) + + self.gm.set_param(w, mu, va) + # c* are the parameters which are computed at every step (ie + # when a new frame is taken into account + self.cw = self.gm.w + self.cmu = self.gm.mu[:, 0] + self.cva = self.gm.va[:, 0] + + # p* are the parameters used when computing gaussian densities + # they are the same than c* in the online case + # self.pw = self.cw.copy() + # self.pmu = self.cmu.copy() + # self.pva = self.cva.copy() + self.pw = self.cw + self.pmu = self.cmu + self.pva = self.cva + + def __init__(self, gm, init_data, init = 'kmean'): + self.gm = gm + if self.gm.d is not 1: + raise RuntimeError("expects 1d gm only !") + + # Possible init methods + init_methods = {'kmean' : self.init_kmean} + self.init = init_methods[init] + + def compute_sufficient_statistics_frame(self, frame, nu): + """expects frame and nu to be float. Returns + cw, cxx and cxx, eg the sufficient statistics.""" + _rawden.compute_ss_frame_1d(frame, self.cw, self.cmu, self.cva, + self.cx, self.cxx, nu) + return self.cw, self.cx, self.cxx + + def update_em_frame(self, cw, cx, cxx): + """Update EM state using SS as returned by + compute_sufficient_statistics_frame. """ + self.cmu = cx / cw + self.cva = cxx / cw - self.cmu ** 2 + + def compute_em_frame(self, frame, nu): + """Run a whole em step for one frame. frame and nu should be float; + if you don't need to split E and M steps, this is faster than calling + compute_sufficient_statistics_frame and update_em_frame""" + _rawden.compute_em_frame_1d(frame, self.cw, self.cmu, self.cva, \ + self.cx, self.cxx, nu) +#class OnlineEM: +# def __init__(self, ogm): +# """Init Online Em algorithm with ogm, an instance of OnGMM.""" +# if not isinstance(ogm, OnGMM): +# raise TypeError("expect a OnGMM instance for the model") +# +# def init_em(self): +# pass +# +# def train(self, data, nu): +# pass +# +# def train_frame(self, frame, nu): +# pass + +def multiple_gauss_den_frame(data, mu, va): + """Helper function to generate several Gaussian + pdf (different parameters) from one frame of data. + + Semantics depending on data's rank + - rank 0: mu and va expected to have rank 0 or 1 + - rank 1: mu and va expected to have rank 2.""" + if N.ndim(data) == 0: + # scalar case + k = mu.size + inva = 1/va + fac = (2*N.pi) ** (-1/2.0) * N.sqrt(inva) + y = ((data-mu) ** 2) * -0.5 * inva + return fac * N.exp(y.ravel()) + elif N.ndim(data) == 1: + # multi variate case (general case) + k = mu.shape[0] + y = N.zeros(k, data.dtype) + if mu.size == va.size: + # diag case + for i in range(k): + #y[i] = D.gauss_den(data, mu[i], va[i]) + # This is a bit hackish: _diag_gauss_den implementation's + # changes can break this, but I don't see how to easily fix this + y[i] = D._diag_gauss_den(data, mu[i], va[i], False, -1) + return y + else: + raise RuntimeError("full not implemented yet") + #for i in range(K): + # y[i] = D.gauss_den(data, mu[i, :], + # va[d*i:d*i+d, :]) + #return y.T + else: + raise RuntimeError("frame should be rank 0 or 1 only") + + if __name__ == '__main__': d = 1 k = 2 mode = 'diag' - nframes = int(1e3) + nframes = int(5e3) emiter = 4 seed(5) @@ -206,13 +327,29 @@ for i in range(1, len(lamb)): nu[i] = 1./(1 + lamb[i] / nu[i-1]) + print "meth1" # object version of online EM for t in range(nframes): - ogmm.compute_sufficient_statistics(data[t:t+1, :], nu[t]) - ogmm.update_em() + ogmm.compute_sufficient_statistics_frame(data[t], nu[t]) + ogmm.update_em_frame() ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) + # 1d optimized version + ogm2 = GM(d, k, mode) + ogmm2 = OnGMM1d(ogm2, 'kmean') + ogmm2.init(init_data[:, 0]) + + print "meth2" + # object version of online EM + for t in range(nframes): + ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t]) + ogmm2.update_em_frame() + + #ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva) + + print ogmm.cw + print ogmm2.cw #+++++++++++++++ # Draw the model #+++++++++++++++ Modified: trunk/Lib/sandbox/pyem/setup.py =================================================================== --- trunk/Lib/sandbox/pyem/setup.py 2006-12-06 09:22:34 UTC (rev 2356) +++ trunk/Lib/sandbox/pyem/setup.py 2006-12-06 12:27:51 UTC (rev 2357) @@ -1,12 +1,19 @@ #! /usr/bin/env python -# Last Change: Thu Nov 09 06:00 PM 2006 J +# Last Change: Wed Dec 06 08:00 PM 2006 J # TODO: # - check how to handle cmd line build options with distutils and use # it in the building process """ pyem is a small python package to estimate Gaussian Mixtures Models -from data, using Expectation Maximization""" +from data, using Expectation Maximization. +Maximum likelihood EM for mixture of Gaussian is implemented, with BIC computation +for number of cluster assessment. + +There is also an experimental online EM version (the EM is updated for each new +sample), and I plan to add Variational Bayes and/or MCMC support for Bayesian approach +for estimating meta parameters of mixtures. """ + from os.path import join # This import from __init__ looks strange, should check whether there is no other way from info import version as pyem_version @@ -28,6 +35,10 @@ #define_macros=[('LIBSVM_EXPORTS', None), # ('LIBSVM_DLL', None)], sources=[join('src', 'c_gden.c')]) + config.add_extension('_rawden', + #define_macros=[('LIBSVM_EXPORTS', None), + # ('LIBSVM_DLL', None)], + sources=[join('src', 'pure_den.c')]) return config Modified: trunk/Lib/sandbox/pyem/src/Makefile =================================================================== --- trunk/Lib/sandbox/pyem/src/Makefile 2006-12-06 09:22:34 UTC (rev 2356) +++ trunk/Lib/sandbox/pyem/src/Makefile 2006-12-06 12:27:51 UTC (rev 2357) @@ -4,26 +4,36 @@ PYREX = python2.4-pyrexc PYTHONINC = -I/usr/include/python2.4 -NUMPYINC = -I/usr/lib/python2.4/site-packages/numpy/core/include +NUMPYINC = -I/home/david/local/lib/python2.4/site-packages/numpy/core/include OPTIMS = -O3 -funroll-all-loops -march=pentium4 -msse2 -WARN = -W -Wall +WARN = -Wall -W -Winline -Wstrict-prototypes -Wmissing-prototypes \ + -Waggregate-return -Wcast-align -Wcast-qual -Wnested-externs \ + -Wshadow -Wbad-function-cast -Wwrite-strings CFLAGS = $(PYTHONINC) $(NUMPYINC) $(OPTIMS) $(WARN) +targets: c_gden.so _rawden.so + c_gden.so: c_gden.o $(LD) -shared -o $@ $< -Wl,-soname,$@ +_rawden.so: pure_den.o + $(LD) -shared -o $@ $< -Wl,-soname,$@ + c_gden.o: c_gden.c $(CC) -c $(CFLAGS) -fPIC $< -c_gmm.so: c_gmm.o - $(LD) -shared -o $@ $< -Wl,-soname,$@ - -c_gmm.o: c_gmm.c +pure_den.o: pure_den.c $(CC) -c $(CFLAGS) -fPIC $< -c_gmm.c: c_gmm.pyx c_numpy.pxd c_python.pxd - $(PYREX) $< +#c_gmm.so: c_gmm.o +# $(LD) -shared -o $@ $< -Wl,-soname,$@ +# +#c_gmm.o: c_gmm.c +# $(CC) -c $(CFLAGS) -fPIC $< +# +#c_gmm.c: c_gmm.pyx c_numpy.pxd c_python.pxd +# $(PYREX) $< clean: rm -f c_gmm.c Added: trunk/Lib/sandbox/pyem/src/pure_den.c =================================================================== --- trunk/Lib/sandbox/pyem/src/pure_den.c 2006-12-06 09:22:34 UTC (rev 2356) +++ trunk/Lib/sandbox/pyem/src/pure_den.c 2006-12-06 12:27:51 UTC (rev 2357) @@ -0,0 +1,458 @@ +/* + * Last Change: Wed Dec 06 08:00 PM 2006 J + */ +#include +#include + +#include + +PyObject* compute_ss_frame_1d_py(PyObject* dum, PyObject *arg); +PyObject* compute_em_frame_1d_py(PyObject* dum, PyObject *arg); + +/* + * Pure C methods + */ +static int compute_ss_frame_1d(double sample, int nc, double* w, double* mu, double *va, + double *cx, double *cxx, double nu); +static int update_em_frame_1d(const double* w, const double* cx, const double *cxx, + int nc, double *cmu, double *cva); + +static PyMethodDef mymethods[] = { + {"compute_ss_frame_1d", compute_ss_frame_1d_py, METH_VARARGS, ""}, + {"compute_em_frame_1d", compute_em_frame_1d_py, METH_VARARGS, ""}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +/* + * function table + */ +PyMODINIT_FUNC init_rawden(void) +{ + (void)Py_InitModule("_rawden", mymethods); + import_array(); +} + +PyObject* compute_ss_frame_1d_py(PyObject* self, PyObject *args) +{ + PyObject *w, *mu, *va, *cx, *cxx; + PyObject *w_a, *mu_a, *va_a, *cx_a, *cxx_a; + double nu, sample; + npy_intp ndim, *dims, len; + double *mu_ca, *va_ca, *w_ca, *cx_ca, *cxx_ca; + + const npy_intp mrank = 1; + + int st; + + /* + * Init python object holder to NULL so that we can use Py_XDECREF on all + * the objets when something is woring + */ + w = NULL; + mu = NULL; + va = NULL; + cx = NULL; + cxx = NULL; + + w_a = NULL; + mu_a = NULL; + va_a = NULL; + cx_a = NULL; + cxx_a = NULL; + /* + * Parsing of args: w, cx and cxx are inout + */ + if (!PyArg_ParseTuple(args, "dOOOOOd", &sample, &w, &mu, &va, &cx, &cxx, &nu)){ + return NULL; + } + if ( (nu > 1) | (nu <= 0) ) { + PyErr_SetString(PyExc_TypeError, "nu should be between 0 and 1"); + return NULL; + } + + /* inout entries */ + w_a = PyArray_FROM_OTF(w, NPY_DOUBLE, NPY_INOUT_ARRAY); + if (w_a == NULL) { + PyErr_SetString(PyExc_TypeError, "w array not convertible"); + return NULL; + } + + cx_a = PyArray_FROM_OTF(cx, NPY_DOUBLE, NPY_INOUT_ARRAY); + if (cx_a == NULL) { + PyErr_SetString(PyExc_TypeError, "cx array not convertible"); + goto fail; + } + + cxx_a = PyArray_FROM_OTF(cxx, NPY_DOUBLE, NPY_INOUT_ARRAY); + if (cxx_a == NULL) { + PyErr_SetString(PyExc_TypeError, "cxx array not convertible"); + goto fail; + } + + /* in only entries */ + mu_a = PyArray_FROM_OTF(mu, NPY_DOUBLE, NPY_IN_ARRAY); + if (mu_a == NULL) { + PyErr_SetString(PyExc_TypeError, "mu array not convertible"); + goto fail; + } + + va_a = PyArray_FROM_OTF(va, NPY_DOUBLE, NPY_IN_ARRAY); + if (va_a == NULL) { + PyErr_SetString(PyExc_TypeError, "va array not convertible"); + goto fail; + } + + /* + * Check that in and out have same size and same rank + */ + ndim = PyArray_NDIM(w_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "w rank should be 1"); + goto fail; + } + ndim = PyArray_NDIM(cx_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "cx rank should be 1"); + goto fail; + } + ndim = PyArray_NDIM(cxx_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "cxx rank should be 1"); + goto fail; + } + ndim = PyArray_NDIM(mu_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "mu rank should be 1"); + goto fail; + } + ndim = PyArray_NDIM(va_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "va rank should be 1"); + goto fail; + } + + dims = PyArray_DIMS(w_a); + len = dims[0]; + //fprintf(stderr, "%s:%s, len is %d\n", __FILE__, __func__, len); + dims = PyArray_DIMS(cx_a); + if(dims[0] != len) { + PyErr_SetString(PyExc_TypeError, "cx shape should match !"); + goto fail; + } + dims = PyArray_DIMS(cxx_a); + if(dims[0] != len) { + PyErr_SetString(PyExc_TypeError, "cxx shape should match !"); + goto fail; + } + dims = PyArray_DIMS(mu_a); + if(dims[0] != len) { + PyErr_SetString(PyExc_TypeError, "mu_a shape should match !"); + goto fail; + } + dims = PyArray_DIMS(va_a); + if(dims[0] != len) { + PyErr_SetString(PyExc_TypeError, "va_a shape should match !"); + goto fail; + } + + /* + * Get pointer to the data + */ + w_ca = PyArray_DATA(w_a); + if (w_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for w_ca"); + goto fail; + } + cx_ca = PyArray_DATA(cx_a); + if (cx_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for cx_ca"); + goto fail; + } + cxx_ca = PyArray_DATA(cxx_a); + if (w_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for cxx_ca"); + goto fail; + } + mu_ca = PyArray_DATA(mu_a); + if (mu_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for mu_ca"); + goto fail; + } + va_ca = PyArray_DATA(va_a); + if (va_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for va_ca"); + goto fail; + } + /* + * Call actual implementation + */ + st = compute_ss_frame_1d(sample, len, w_ca, mu_ca, va_ca, cx_ca, cxx_ca, nu); + if (st) { + PyErr_SetString(PyExc_TypeError, "Error while calling multi_gauss...."); + goto fail; + } + + Py_DECREF(w_a); + Py_DECREF(cx_a); + Py_DECREF(cxx_a); + Py_DECREF(mu_a); + Py_DECREF(va_a); + + Py_INCREF(Py_None); + return Py_None; + +fail: + Py_XDECREF(w_a); + Py_XDECREF(cx_a); + Py_XDECREF(cxx_a); + Py_XDECREF(mu_a); + Py_XDECREF(va_a); + return NULL; +} + +PyObject* compute_em_frame_1d_py(PyObject* self, PyObject *args) +{ + PyObject *w, *mu, *va, *cx, *cxx; + PyObject *w_a, *mu_a, *va_a, *cx_a, *cxx_a; + double nu, sample; + npy_intp ndim, *dims, len; + double *mu_ca, *va_ca, *w_ca, *cx_ca, *cxx_ca; + + const npy_intp mrank = 1; + + int st; + + /* + * Init python object holder to NULL so that we can use Py_XDECREF on all + * the objets when something is woring + */ + w = NULL; + mu = NULL; + va = NULL; + cx = NULL; + cxx = NULL; + + w_a = NULL; + mu_a = NULL; + va_a = NULL; + cx_a = NULL; + cxx_a = NULL; + /* + * Parsing of args: w, cx and cxx are inout + */ + if (!PyArg_ParseTuple(args, "dOOOOOd", &sample, &w, &mu, &va, &cx, &cxx, &nu)){ + return NULL; + } + if ( (nu > 1) | (nu <= 0) ) { + PyErr_SetString(PyExc_TypeError, "nu should be between 0 and 1"); + return NULL; + } + + /* inout entries */ + w_a = PyArray_FROM_OTF(w, NPY_DOUBLE, NPY_INOUT_ARRAY); + if (w_a == NULL) { + PyErr_SetString(PyExc_TypeError, "w array not convertible"); + return NULL; + } + + cx_a = PyArray_FROM_OTF(cx, NPY_DOUBLE, NPY_INOUT_ARRAY); + if (cx_a == NULL) { + PyErr_SetString(PyExc_TypeError, "cx array not convertible"); + goto fail; + } + + cxx_a = PyArray_FROM_OTF(cxx, NPY_DOUBLE, NPY_INOUT_ARRAY); + if (cxx_a == NULL) { + PyErr_SetString(PyExc_TypeError, "cxx array not convertible"); + goto fail; + } + + /* in only entries */ + mu_a = PyArray_FROM_OTF(mu, NPY_DOUBLE, NPY_IN_ARRAY); + if (mu_a == NULL) { + PyErr_SetString(PyExc_TypeError, "mu array not convertible"); + goto fail; + } + + va_a = PyArray_FROM_OTF(va, NPY_DOUBLE, NPY_IN_ARRAY); + if (va_a == NULL) { + PyErr_SetString(PyExc_TypeError, "va array not convertible"); + goto fail; + } + + /* + * Check that in and out have same size and same rank + */ + ndim = PyArray_NDIM(w_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "w rank should be 1"); + goto fail; + } + ndim = PyArray_NDIM(cx_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "cx rank should be 1"); + goto fail; + } + ndim = PyArray_NDIM(cxx_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "cxx rank should be 1"); + goto fail; + } + ndim = PyArray_NDIM(mu_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "mu rank should be 1"); + goto fail; + } + ndim = PyArray_NDIM(va_a); + if(ndim != mrank) { + PyErr_SetString(PyExc_TypeError, "va rank should be 1"); + goto fail; + } + + dims = PyArray_DIMS(w_a); + len = dims[0]; + //fprintf(stderr, "%s:%s, len is %d\n", __FILE__, __func__, len); + dims = PyArray_DIMS(cx_a); + if(dims[0] != len) { + PyErr_SetString(PyExc_TypeError, "cx shape should match !"); + goto fail; + } + dims = PyArray_DIMS(cxx_a); + if(dims[0] != len) { + PyErr_SetString(PyExc_TypeError, "cxx shape should match !"); + goto fail; + } + dims = PyArray_DIMS(mu_a); + if(dims[0] != len) { + PyErr_SetString(PyExc_TypeError, "mu_a shape should match !"); + goto fail; + } + dims = PyArray_DIMS(va_a); + if(dims[0] != len) { + PyErr_SetString(PyExc_TypeError, "va_a shape should match !"); + goto fail; + } + + /* + * Get pointer to the data + */ + w_ca = PyArray_DATA(w_a); + if (w_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for w_ca"); + goto fail; + } + cx_ca = PyArray_DATA(cx_a); + if (cx_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for cx_ca"); + goto fail; + } + cxx_ca = PyArray_DATA(cxx_a); + if (w_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for cxx_ca"); + goto fail; + } + mu_ca = PyArray_DATA(mu_a); + if (mu_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for mu_ca"); + goto fail; + } + va_ca = PyArray_DATA(va_a); + if (va_ca == NULL) { + PyErr_SetString(PyExc_TypeError, "Unknown Error for va_ca"); + goto fail; + } + /* + * Call actual implementation + */ + st = compute_ss_frame_1d(sample, len, w_ca, mu_ca, va_ca, cx_ca, cxx_ca, nu); + if (st) { + PyErr_SetString(PyExc_TypeError, "Error while calling multi_gauss...."); + goto fail; + } + st = update_em_frame_1d(w_ca, cx_ca, cxx_ca, len, mu_ca, va_ca); + if (st) { + PyErr_SetString(PyExc_TypeError, "Error while calling update_em_frame_1d...."); + goto fail; + } + + Py_DECREF(w_a); + Py_DECREF(cx_a); + Py_DECREF(cxx_a); + Py_DECREF(mu_a); + Py_DECREF(va_a); + + Py_INCREF(Py_None); + return Py_None; + +fail: + Py_XDECREF(w_a); + Py_XDECREF(cx_a); + Py_XDECREF(cxx_a); + Py_XDECREF(mu_a); + Py_XDECREF(va_a); + return NULL; +} + +int compute_ss_frame_1d(double sample, int nc, double* w, double* mu, double *va, + double *cx, double *cxx, double nu) +{ + /* + * TODO: check va division + */ + int i; + double inva, fac, *gam, acc; + + gam = malloc(sizeof(*gam) * nc); + if (gam == NULL) { + return -1; + } + + /* Compute gamma */ + acc = 0; + for (i = 0; i < nc; ++i) { + inva = 1/va[i]; + fac = 1 / sqrt(2 * M_PI * va[i]); + gam[i] = fac * exp( -0.5 * inva * (sample - mu[i]) * (sample - mu[i])); + gam[i] *= w[i]; + acc += gam[i]; + } + /* Normalize gamma */ + for (i = 0; i < nc; ++i) { + gam[i] /= acc; + } + + /* Compute new SS from EM (cx and cxx) */ + for (i = 0; i < nc; ++i) { + w[i] *= (1 - nu); + w[i] += nu * gam[i]; + cx[i] = (1 - nu) * cx[i] + nu * sample * gam[i]; + cxx[i] = (1 - nu) * cxx[i] + nu * sample * sample * gam[i]; + } + + free(gam); + + return 0; +} + +/* + * update mu and va from SS w, cx and cxx. Only mu and va are modified + * all arrays have same length nc + */ +int update_em_frame_1d(const double* cw, const double* cx, const double *cxx, + int nc, double *cmu, double *cva) +{ + /* + * TODO: check va division + */ + int i; + double invw; + + /* Compute new SS from EM (cx and cxx) */ + for (i = 0; i < nc; ++i) { + invw = 1/cw[i]; + cmu[i] = cx[i] * invw; + cva[i] = cxx[i] * invw - cmu[i] * cmu[i]; + } + + return 0; +} Modified: trunk/Lib/sandbox/pyem/tests/test_online_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-12-06 09:22:34 UTC (rev 2356) +++ trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-12-06 12:27:51 UTC (rev 2357) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Thu Nov 16 09:00 PM 2006 J +# Last Change: Wed Dec 06 09:00 PM 2006 J import copy @@ -11,7 +11,7 @@ set_package_path() from pyem import GM, GMM -from pyem.online_em import OnGMM +from pyem.online_em import OnGMM, OnGMM1d restore_path() # #Optional: @@ -21,6 +21,7 @@ # Error precision allowed (nb of decimals) AR_AS_PREC = 12 +KM_ITER = 5 class OnlineEmTest(NumpyTestCase): def _create_model(self, d, k, mode, nframes, emiter): @@ -38,7 +39,7 @@ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') - gmm.init(data) + gmm.init(data, niter = KM_ITER) self.gm0 = copy.copy(gmm.gm) # The actual EM, with likelihood computation @@ -91,7 +92,7 @@ ogm = GM(d, k, mode) ogmm = OnGMM(ogm, 'kmean') init_data = self.data - ogmm.init(init_data) + ogmm.init(init_data, niter = KM_ITER) # Check that online kmean init is the same than kmean offline init ogm0 = copy.copy(ogm) @@ -116,8 +117,8 @@ ogmm.pva = ogmm.cva.copy() for e in range(emiter): for t in range(nframes): - ogmm.compute_sufficient_statistics(self.data[t:t+1, :], nu[t]) - ogmm.update_em() + ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t]) + ogmm.update_em_frame() # Change pw args only a each epoch ogmm.pw = ogmm.cw.copy() @@ -147,12 +148,56 @@ d = 1 k = 2 mode = 'diag' - nframes = int(1e3) + nframes = int(5e2) emiter = 4 self._create_model(d, k, mode, nframes, emiter) self._run_pure_online(d, k, mode, nframes) + def check_1d_imp(self): + d = 1 + k = 2 + mode = 'diag' + nframes = int(5e2) + emiter = 4 + + self._create_model(d, k, mode, nframes, emiter) + gmref = self._run_pure_online(d, k, mode, nframes) + gmtest = self._run_pure_online_1d(d, k, mode, nframes) + + assert_array_almost_equal(gmref.w, gmtest.w, AR_AS_PREC) + assert_array_almost_equal(gmref.mu, gmtest.mu, AR_AS_PREC) + assert_array_almost_equal(gmref.va, gmtest.va, AR_AS_PREC) + + def _run_pure_online_1d(self, d, k, mode, nframes): + #++++++++++++++++++++++++++++++++++++++++ + # Approximate the models with online EM + #++++++++++++++++++++++++++++++++++++++++ + ogm = GM(d, k, mode) + ogmm = OnGMM1d(ogm, 'kmean') + init_data = self.data[0:nframes / 20, :] + ogmm.init(init_data[:, 0]) + + # Forgetting param + ku = 0.005 + t0 = 200 + lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0) + nu0 = 0.2 + nu = N.zeros((len(lamb), 1)) + nu[0] = nu0 + for i in range(1, len(lamb)): + nu[i] = 1./(1 + lamb[i] / nu[i-1]) + + # object version of online EM + for t in range(nframes): + # the assert are here to check we do not create copies + # unvoluntary for parameters + a, b, c = ogmm.compute_sufficient_statistics_frame(self.data[t, 0], nu[t]) + ogmm.update_em_frame(a, b, c) + + ogmm.gm.set_param(ogmm.cw, ogmm.cmu[:, N.newaxis], ogmm.cva[:, N.newaxis]) + + return ogmm.gm def _run_pure_online(self, d, k, mode, nframes): #++++++++++++++++++++++++++++++++++++++++ # Approximate the models with online EM @@ -179,10 +224,11 @@ assert ogmm.pw is ogmm.cw assert ogmm.pmu is ogmm.cmu assert ogmm.pva is ogmm.cva - ogmm.compute_sufficient_statistics(self.data[t:t+1, :], nu[t]) - ogmm.update_em() + ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t]) + ogmm.update_em_frame() ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) + return ogmm.gm if __name__ == "__main__": NumpyTest().run() From scipy-svn at scipy.org Wed Dec 6 07:48:15 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 6 Dec 2006 06:48:15 -0600 (CST) Subject: [Scipy-svn] r2358 - trunk/Lib/sandbox/pyem/profile_data Message-ID: <20061206124815.03E0939C00E@new.scipy.org> Author: cdavid Date: 2006-12-06 06:46:23 -0600 (Wed, 06 Dec 2006) New Revision: 2358 Added: trunk/Lib/sandbox/pyem/profile_data/profile_online_em.py Log: Add a script to profile online em Added: trunk/Lib/sandbox/pyem/profile_data/profile_online_em.py =================================================================== --- trunk/Lib/sandbox/pyem/profile_data/profile_online_em.py 2006-12-06 12:27:51 UTC (rev 2357) +++ trunk/Lib/sandbox/pyem/profile_data/profile_online_em.py 2006-12-06 12:46:23 UTC (rev 2358) @@ -0,0 +1,241 @@ +# /usr/bin/python +# Last Change: Wed Dec 06 08:00 PM 2006 J +import copy + +import numpy as N + +from gauss_mix import GM +from gmm_em import GMM + +def _generate_data(nframes, d, k, mode = 'diag'): + N.random.seed(0) + w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) + gm = GM.fromvalues(w, mu, va) + # Sample nframes frames from the model + data = gm.sample(nframes) + + #++++++++++++++++++++++++++++++++++++++++++ + # Approximate the models with classical EM + #++++++++++++++++++++++++++++++++++++++++++ + emiter = 5 + # Init the model + lgm = GM(d, k, mode) + gmm = GMM(lgm, 'kmean') + gmm.init(data) + + gm0 = copy.copy(gmm.gm) + # The actual EM, with likelihood computation + like = N.zeros(emiter) + for i in range(emiter): + g, tgd = gmm.sufficient_statistics(data) + like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) + gmm.update_em(data, g) + + return data, gm + +nframes = int(5e3) +d = 1 +k = 2 +niter = 1 + +def test_v1(): + # Generate test data + data, gm = _generate_data(nframes, d, k) + for i in range(niter): + iter_1(data, gm) + +def test_v2(): + # Generate test data + data, gm = _generate_data(nframes, d, k) + for i in range(niter): + iter_2(data, gm) + +def test_v3(): + # Generate test data + data, gm = _generate_data(nframes, d, k) + for i in range(niter): + iter_3(data, gm) + +def test_v4(): + # Generate test data + data, gm = _generate_data(nframes, d, k) + for i in range(niter): + iter_4(data, gm) + +def iter_1(data, gm): + """Online EM with original densities + original API""" + from online_em import OnGMM + + nframes = data.shape[0] + ogm = copy.copy(gm) + ogmm = OnGMM(ogm, 'kmean') + init_data = data[0:nframes / 20, :] + ogmm.init(init_data) + + # Forgetting param + ku = 0.005 + t0 = 200 + lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0) + nu0 = 0.2 + nu = N.zeros((len(lamb), 1)) + nu[0] = nu0 + for i in range(1, len(lamb)): + nu[i] = 1./(1 + lamb[i] / nu[i-1]) + + # object version of online EM + for t in range(nframes): + ogmm.compute_sufficient_statistics_frame(data[t], nu[t]) + ogmm.update_em_frame() + + ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) + print ogmm.cw + print ogmm.cmu + print ogmm.cva + +def iter_2(data, gm): + """Online EM with densities2 + original API""" + from online_em2 import OnGMM + + nframes = data.shape[0] + ogm = copy.copy(gm) + ogmm = OnGMM(ogm, 'kmean') + init_data = data[0:nframes / 20, :] + ogmm.init(init_data) + + # Forgetting param + ku = 0.005 + t0 = 200 + lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0) + nu0 = 0.2 + nu = N.zeros((len(lamb), 1)) + nu[0] = nu0 + for i in range(1, len(lamb)): + nu[i] = 1./(1 + lamb[i] / nu[i-1]) + + # object version of online EM + for t in range(nframes): + ogmm.compute_sufficient_statistics_frame(data[t], nu[t]) + ogmm.update_em_frame() + + ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) + print ogmm.cw + print ogmm.cmu + print ogmm.cva + +def iter_3(data, gm): + """Online EM with densities + 1d API""" + from online_em import OnGMM1d + + #def blop(self, frame, nu): + # self.compute_sufficient_statistics_frame(frame, nu) + #OnGMM.blop = blop + + nframes = data.shape[0] + ogm = copy.copy(gm) + ogmm = OnGMM1d(ogm, 'kmean') + init_data = data[0:nframes / 20, :] + ogmm.init(init_data[:, 0]) + + # Forgetting param + ku = 0.005 + t0 = 200 + lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0) + nu0 = 0.2 + nu = N.zeros((len(lamb), 1)) + nu[0] = nu0 + for i in range(1, len(lamb)): + nu[i] = 1./(1 + lamb[i] / nu[i-1]) + + # object version of online EM + for t in range(nframes): + #assert ogmm.cw is ogmm.pw + #assert ogmm.cva is ogmm.pva + #assert ogmm.cmu is ogmm.pmu + a, b, c = ogmm.compute_sufficient_statistics_frame(data[t, 0], nu[t]) + ##ogmm.blop(data[t,0], nu[t]) + ogmm.update_em_frame(a, b, c) + + #ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) + print ogmm.cw + print ogmm.cmu + print ogmm.cva + +def iter_4(data, gm): + """Online EM with densities2 + 1d API""" + from online_em2 import OnGMM1d + + #def blop(self, frame, nu): + # self.compute_sufficient_statistics_frame(frame, nu) + #OnGMM.blop = blop + + nframes = data.shape[0] + ogm = copy.copy(gm) + ogmm = OnGMM1d(ogm, 'kmean') + init_data = data[0:nframes / 20, :] + ogmm.init(init_data[:, 0]) + + # Forgetting param + ku = 0.005 + t0 = 200 + lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0) + nu0 = 0.2 + nu = N.zeros((len(lamb), 1)) + nu[0] = nu0 + for i in range(1, len(lamb)): + nu[i] = 1./(1 + lamb[i] / nu[i-1]) + + # object version of online EM + def blop(): + #for t in range(nframes): + # #assert ogmm.cw is ogmm.pw + # #assert ogmm.cva is ogmm.pva + # #assert ogmm.cmu is ogmm.pmu + # #a, b, c = ogmm.compute_sufficient_statistics_frame(data[t, 0], nu[t]) + # ###ogmm.blop(data[t,0], nu[t]) + # #ogmm.update_em_frame(a, b, c) + # ogmm.compute_em_frame(data[t, 0], nu[t]) + [ogmm.compute_em_frame(data[t, 0], nu[t]) for t in range(nframes)] + blop() + + #ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) + print ogmm.cw + print ogmm.cmu + print ogmm.cva + + + +if __name__ == '__main__': + #import hotshot, hotshot.stats + #profile_file = 'onem1.prof' + #prof = hotshot.Profile(profile_file, lineevents=1) + #prof.runcall(test_v1) + #p = hotshot.stats.load(profile_file) + #print p.sort_stats('cumulative').print_stats(20) + #prof.close() + + #import hotshot, hotshot.stats + #profile_file = 'onem2.prof' + #prof = hotshot.Profile(profile_file, lineevents=1) + #prof.runcall(test_v2) + #p = hotshot.stats.load(profile_file) + #print p.sort_stats('cumulative').print_stats(20) + #prof.close() + + import hotshot, hotshot.stats + profile_file = 'onem3.prof' + prof = hotshot.Profile(profile_file, lineevents=1) + prof.runcall(test_v3) + p = hotshot.stats.load(profile_file) + print p.sort_stats('cumulative').print_stats(20) + prof.close() + + import hotshot, hotshot.stats + profile_file = 'onem4.prof' + prof = hotshot.Profile(profile_file, lineevents=1) + prof.runcall(test_v4) + p = hotshot.stats.load(profile_file) + print p.sort_stats('cumulative').print_stats(20) + prof.close() + #test_v1() + #test_v2() + #test_v3() From scipy-svn at scipy.org Wed Dec 6 20:11:36 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 6 Dec 2006 19:11:36 -0600 (CST) Subject: [Scipy-svn] r2359 - in trunk/Lib: linalg sandbox sandbox/xplt signal stats Message-ID: <20061207011136.48CC039C127@new.scipy.org> Author: oliphant Date: 2006-12-06 19:11:18 -0600 (Wed, 06 Dec 2006) New Revision: 2359 Added: trunk/Lib/sandbox/wavelet/ Modified: trunk/Lib/linalg/__init__.py trunk/Lib/linalg/basic.py trunk/Lib/sandbox/xplt/Mplot.py trunk/Lib/signal/signaltools.py trunk/Lib/signal/sigtoolsmodule.c trunk/Lib/stats/continuous.lyx Log: Fix up so that scipy.linalg.pinv2 is selected for numpy.dual (it uses SVD like numpy.dual does). Modified: trunk/Lib/linalg/__init__.py =================================================================== --- trunk/Lib/linalg/__init__.py 2006-12-06 12:46:23 UTC (rev 2358) +++ trunk/Lib/linalg/__init__.py 2006-12-07 01:11:18 UTC (rev 2359) @@ -15,11 +15,17 @@ from numpy.dual import register_func for k in ['norm', 'inv', 'svd', 'solve', 'det', 'eig', 'eigh', 'eigvals', - 'eigvalsh', 'lstsq', 'pinv', 'cholesky']: + 'eigvalsh', 'lstsq', 'cholesky']: try: register_func(k, eval(k)) except ValueError: pass + +try: + register_func('pinv', pinv2) +except ValueError: + pass + del k, register_func from numpy.testing import ScipyTest Modified: trunk/Lib/linalg/basic.py =================================================================== --- trunk/Lib/linalg/basic.py 2006-12-06 12:46:23 UTC (rev 2358) +++ trunk/Lib/linalg/basic.py 2006-12-07 01:11:18 UTC (rev 2359) @@ -370,13 +370,15 @@ return x,resids,rank,s -def pinv(a, cond=None): - """ pinv(a, cond=None) -> a_pinv +def pinv(a, cond=None, rcond=None): + """ pinv(a, rcond=None) -> a_pinv Compute generalized inverse of A using least-squares solver. """ a = asarray_chkfinite(a) b = numpy.identity(a.shape[0], dtype=a.dtype) + if rcond is not None: + cond = rcond return lstsq(a, b, cond=cond)[0] @@ -384,14 +386,16 @@ feps = numpy.finfo(single).eps _array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1} -def pinv2(a, cond=None): - """ pinv2(a, cond=None) -> a_pinv +def pinv2(a, cond=None, rcond=None): + """ pinv2(a, rcond=None) -> a_pinv Compute the generalized inverse of A using svd. """ a = asarray_chkfinite(a) u, s, vh = decomp.svd(a) t = u.dtype.char + if rcond is not None: + cond = rcond if cond in [None,-1]: cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]] m,n = a.shape Modified: trunk/Lib/sandbox/xplt/Mplot.py =================================================================== --- trunk/Lib/sandbox/xplt/Mplot.py 2006-12-06 12:46:23 UTC (rev 2358) +++ trunk/Lib/sandbox/xplt/Mplot.py 2006-12-07 01:11:18 UTC (rev 2359) @@ -10,8 +10,8 @@ numpy = Numeric from numpy import ravel, reshape, repeat, arange, transpose, compress, \ where, ones, newaxis, asarray -import numpy.lib.mlab as MLab -from numpy.lib.mlab import pi, cos, sin, arctan2, array, angle +import numpy.oldnumeric.mlab as MLab +from numpy.oldnumeric.mlab import pi, cos, sin, arctan2, array, angle import types import write_style points = 0.0013000 Modified: trunk/Lib/signal/signaltools.py =================================================================== --- trunk/Lib/signal/signaltools.py 2006-12-06 12:46:23 UTC (rev 2358) +++ trunk/Lib/signal/signaltools.py 2006-12-07 01:11:18 UTC (rev 2359) @@ -277,7 +277,7 @@ def convolve2d(in1, in2, mode='full', boundary='fill', fillvalue=0): - """Conolve two 2-dimensional arrays. + """Convolve two 2-dimensional arrays. Description: Modified: trunk/Lib/signal/sigtoolsmodule.c =================================================================== --- trunk/Lib/signal/sigtoolsmodule.c 2006-12-06 12:46:23 UTC (rev 2358) +++ trunk/Lib/signal/sigtoolsmodule.c 2006-12-07 01:11:18 UTC (rev 2359) @@ -1740,7 +1740,7 @@ PyObject *in1=NULL, *in2=NULL, *fill_value=NULL; int mode=2, boundary=0, typenum, flag, flip=1, ret; - intp *aout_dimens, *dims=NULL; + intp *aout_dimens=NULL, *dims=NULL; char zeros[32]; /* Zeros */ int n1, n2, i; PyArrayObject *ain1=NULL, *ain2=NULL, *aout=NULL; @@ -1804,7 +1804,6 @@ } aout = (PyArrayObject *)PyArray_SimpleNew(ain1->nd, aout_dimens, typenum); - free(aout_dimens); if (aout == NULL) goto fail; flag = mode + boundary + (typenum << TYPE_SHIFT) + \ @@ -1849,6 +1848,7 @@ } fail: + free(aout_dimens); Py_XDECREF(ain1); Py_XDECREF(ain2); Py_XDECREF(aout); Modified: trunk/Lib/stats/continuous.lyx =================================================================== --- trunk/Lib/stats/continuous.lyx 2006-12-06 12:46:23 UTC (rev 2358) +++ trunk/Lib/stats/continuous.lyx 2006-12-07 01:11:18 UTC (rev 2359) @@ -2931,8 +2931,8 @@ \begin_inset Formula \begin{eqnarray*} -\mu & = & \frac{1}{a-1}\\ -\mu_{2} & = & \frac{1}{\left(a-2\right)\left(a-1\right)}-\mu^{2}\\ +\mu & = & \frac{1}{a-1}\quad a>1\\ +\mu_{2} & = & \frac{1}{\left(a-2\right)\left(a-1\right)}-\mu^{2}\quad a>2\\ \gamma_{1} & = & \frac{\frac{1}{\left(a-3\right)\left(a-2\right)\left(a-1\right)}-3\mu\mu_{2}-\mu^{3}}{\mu_{2}^{3/2}}\\ \gamma_{2} & = & \frac{\frac{1}{\left(a-4\right)\left(a-3\right)\left(a-2\right)\left(a-1\right)}-4\mu\mu_{3}-6\mu^{2}\mu_{2}-\mu^{4}}{\mu_{2}^{2}}-3\end{eqnarray*} From scipy-svn at scipy.org Wed Dec 6 20:35:09 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 6 Dec 2006 19:35:09 -0600 (CST) Subject: [Scipy-svn] r2360 - trunk/Lib/sandbox Message-ID: <20061207013509.29F8A39C11B@new.scipy.org> Author: mattknox_ca Date: 2006-12-06 19:34:33 -0600 (Wed, 06 Dec 2006) New Revision: 2360 Added: trunk/Lib/sandbox/test/ Log: testing svn. Will delete this if I get it working From scipy-svn at scipy.org Wed Dec 6 20:35:52 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 6 Dec 2006 19:35:52 -0600 (CST) Subject: [Scipy-svn] r2361 - trunk/Lib/sandbox Message-ID: <20061207013552.43FA439C11B@new.scipy.org> Author: mattknox_ca Date: 2006-12-06 19:35:50 -0600 (Wed, 06 Dec 2006) New Revision: 2361 Removed: trunk/Lib/sandbox/test/ Log: was just testing svn. deleting my test folder From scipy-svn at scipy.org Thu Dec 7 07:39:06 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Dec 2006 06:39:06 -0600 (CST) Subject: [Scipy-svn] r2362 - in trunk/Lib/sparse: . tests Message-ID: <20061207123906.69FBD39C08E@new.scipy.org> Author: rc Date: 2006-12-07 06:39:01 -0600 (Thu, 07 Dec 2006) New Revision: 2362 Modified: trunk/Lib/sparse/sparse.py trunk/Lib/sparse/tests/test_sparse.py Log: coo._normalize() update by Nathan Bell (ticket 326) Modified: trunk/Lib/sparse/sparse.py =================================================================== --- trunk/Lib/sparse/sparse.py 2006-12-07 01:35:50 UTC (rev 2361) +++ trunk/Lib/sparse/sparse.py 2006-12-07 12:39:01 UTC (rev 2362) @@ -2276,19 +2276,20 @@ def _normalize(self, rowfirst=False): if rowfirst: - l = zip(self.row, self.col, self.data) - l.sort() - row, col, data = list(itertools.izip(*l)) - return data, row, col + #sort by increasing rows first, columns second + if getattr(self, '_is_normalized', None): + #columns already sorted, use stable sort for rows + P = numpy.argsort(self.row,kind='mergesort') + return self.data[P], self.row[P], self.col[P] + else: + #nothing already sorted + P = numpy.lexsort(keys=(self.col,self.row)) + return self.data[P], self.row[P], self.col[P] if getattr(self, '_is_normalized', None): return self.data, self.row, self.col - l = zip(self.col, self.row, self.data) - l.sort() - # This breaks when len(self.data) etc == 0. Does this matter? - col, row, data = list(itertools.izip(*l)) - self.col = asarray(col, intc) - self.row = asarray(row, intc) - self.data = array(data, self.dtype) + #sort by increasing rows first, columns second + P = numpy.lexsort(keys=(self.row,self.col)) + self.data,self.row,self.col = self.data[P], self.row[P], self.col[P] setattr(self, '_is_normalized', 1) return self.data, self.row, self.col Modified: trunk/Lib/sparse/tests/test_sparse.py =================================================================== --- trunk/Lib/sparse/tests/test_sparse.py 2006-12-07 01:35:50 UTC (rev 2361) +++ trunk/Lib/sparse/tests/test_sparse.py 2006-12-07 12:39:01 UTC (rev 2362) @@ -21,8 +21,8 @@ import random from numpy.testing import * set_package_path() -from scipy.sparse import csc_matrix, csr_matrix, dok_matrix, spidentity, \ - speye, lil_matrix +from scipy.sparse import csc_matrix, csr_matrix, dok_matrix, coo_matrix, \ + spidentity, speye, lil_matrix from scipy.linsolve import splu restore_path() @@ -638,6 +638,38 @@ b = array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype='d') assert_array_equal(a.toarray(), b) +class test_coo(ScipyTestCase): + def check_normalize( self ): + + row = numpy.array([2, 3, 1, 3, 0, 1, 3, 0, 2, 1, 2]) + col = numpy.array([0, 1, 0, 0, 1, 1, 2, 2, 2, 2, 1]) + data = numpy.array([ 6., 10., 3., 9., 1., 4., + 11., 2., 8., 5., 7.]) + + # coo.todense() + # matrix([[ 0., 1., 2.], + # [ 3., 4., 5.], + # [ 6., 7., 8.], + # [ 9., 10., 11.]]) + coo = coo_matrix((data,(row,col)),(4,3)) + + ndata,nrow,ncol = coo._normalize(rowfirst=True) + assert(zip(nrow,ncol,ndata) == sorted(zip(row,col,data))) #should sort by rows, then cols + assert_array_equal(coo.data, data) #coo.data has not changed + assert_array_equal(coo.row, row) #coo.row has not changed + assert_array_equal(coo.col, col) #coo.col has not changed + + + ndata,nrow,ncol = coo._normalize(rowfirst=False) + assert(zip(ncol,nrow,ndata) == sorted(zip(col,row,data))) #should sort by cols, then rows + assert_array_equal(coo.data, ndata) #coo.data has changed + assert_array_equal(coo.row, nrow) #coo.row has changed + assert_array_equal(coo.col, ncol) #coo.col has changed + + assert_array_equal(coo.tocsr().todense(), coo.todense()) + assert_array_equal(coo.tocsc().todense(), coo.todense()) + + if __name__ == "__main__": ScipyTest().run() From scipy-svn at scipy.org Thu Dec 7 22:58:41 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Dec 2006 21:58:41 -0600 (CST) Subject: [Scipy-svn] r2363 - trunk/Lib/io/tests Message-ID: <20061208035841.8AE2B39C00A@new.scipy.org> Author: oliphant Date: 2006-12-07 21:58:39 -0600 (Thu, 07 Dec 2006) New Revision: 2363 Modified: trunk/Lib/io/tests/test_recaster.py Log: Fix test. Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-12-07 12:39:01 UTC (rev 2362) +++ trunk/Lib/io/tests/test_recaster.py 2006-12-08 03:58:39 UTC (rev 2363) @@ -90,7 +90,8 @@ (N.int64, params[N.int64]['min'], 0)] for T, mn, mx in test_triples: rt = RF.smallest_int_sctype(mx, mn) - assert rt == T, 'Expected %s, got %s type' % (T, rt) + assert N.dtype(rt) == N.dtype(T), \ + 'Expected %s, got %s type' % (T, rt) def test_downcasts(self): value = 100 From scipy-svn at scipy.org Thu Dec 7 22:59:33 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Dec 2006 21:59:33 -0600 (CST) Subject: [Scipy-svn] r2364 - tags Message-ID: <20061208035933.57AB239C00A@new.scipy.org> Author: oliphant Date: 2006-12-07 21:59:29 -0600 (Thu, 07 Dec 2006) New Revision: 2364 Added: tags/0.5.2/ Log: Tag for 0.5.2 release Copied: tags/0.5.2 (from rev 2363, trunk) From scipy-svn at scipy.org Thu Dec 7 23:00:13 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Dec 2006 22:00:13 -0600 (CST) Subject: [Scipy-svn] r2365 - trunk/Lib Message-ID: <20061208040013.D9C1739C00A@new.scipy.org> Author: oliphant Date: 2006-12-07 22:00:07 -0600 (Thu, 07 Dec 2006) New Revision: 2365 Modified: trunk/Lib/version.py Log: New version. Modified: trunk/Lib/version.py =================================================================== --- trunk/Lib/version.py 2006-12-08 03:59:29 UTC (rev 2364) +++ trunk/Lib/version.py 2006-12-08 04:00:07 UTC (rev 2365) @@ -1,4 +1,4 @@ -version = '0.5.2' +version = '0.5.3' release=False if not release: From scipy-svn at scipy.org Thu Dec 7 23:02:04 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Dec 2006 22:02:04 -0600 (CST) Subject: [Scipy-svn] r2366 - tags/0.5.2/Lib Message-ID: <20061208040204.4973639C00A@new.scipy.org> Author: oliphant Date: 2006-12-07 22:02:02 -0600 (Thu, 07 Dec 2006) New Revision: 2366 Modified: tags/0.5.2/Lib/version.py Log: Make tag a release. Modified: tags/0.5.2/Lib/version.py =================================================================== --- tags/0.5.2/Lib/version.py 2006-12-08 04:00:07 UTC (rev 2365) +++ tags/0.5.2/Lib/version.py 2006-12-08 04:02:02 UTC (rev 2366) @@ -1,5 +1,5 @@ version = '0.5.2' -release=False +release=True if not release: import os From scipy-svn at scipy.org Thu Dec 7 23:21:58 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Dec 2006 22:21:58 -0600 (CST) Subject: [Scipy-svn] r2367 - tags/0.5.2/Lib/stsci/convolve Message-ID: <20061208042158.2676839C04E@new.scipy.org> Author: oliphant Date: 2006-12-07 22:21:55 -0600 (Thu, 07 Dec 2006) New Revision: 2367 Modified: tags/0.5.2/Lib/stsci/convolve/setup.py Log: Fix author Modified: tags/0.5.2/Lib/stsci/convolve/setup.py =================================================================== --- tags/0.5.2/Lib/stsci/convolve/setup.py 2006-12-08 04:02:02 UTC (rev 2366) +++ tags/0.5.2/Lib/stsci/convolve/setup.py 2006-12-08 04:21:55 UTC (rev 2367) @@ -5,10 +5,6 @@ from numpy.distutils.misc_util import Configuration config = Configuration('convolve',parent_package,top_path, package_path='lib', - author='Todd Miller', - author_email = 'help at stsci.edu', - description = 'image array convolution functions', - version = '0.1' ) config.add_extension('_correlate', @@ -20,5 +16,9 @@ if __name__ == "__main__": from numpy.distutils.core import setup config = configuration(top_path='').todict() - setup(**config) + setup(version = '0.1', + author='Todd Miller', + author_email = 'help at stsci.edu', + description = 'image array convolution functions', + **config) From scipy-svn at scipy.org Thu Dec 7 23:24:22 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Dec 2006 22:24:22 -0600 (CST) Subject: [Scipy-svn] r2368 - tags/0.5.2/Lib/stsci/image Message-ID: <20061208042422.3394D39C04E@new.scipy.org> Author: oliphant Date: 2006-12-07 22:24:19 -0600 (Thu, 07 Dec 2006) New Revision: 2368 Modified: tags/0.5.2/Lib/stsci/image/setup.py Log: Fix author Modified: tags/0.5.2/Lib/stsci/image/setup.py =================================================================== --- tags/0.5.2/Lib/stsci/image/setup.py 2006-12-08 04:21:55 UTC (rev 2367) +++ tags/0.5.2/Lib/stsci/image/setup.py 2006-12-08 04:24:19 UTC (rev 2368) @@ -4,12 +4,7 @@ def configuration(parent_package='',top_path=None): from numpy.distutils.misc_util import Configuration config = Configuration('image',parent_package,top_path, - package_path='lib', - author='Todd Miller', - author_email = 'help at stsci.edu', - description = 'image array manipulation functions', - version = '0.1' - ) + package_path='lib') config.add_extension('_combine', sources=["src/_combinemodule.c"], define_macros = [('NUMPY', '1')], @@ -19,5 +14,9 @@ if __name__ == "__main__": from numpy.distutils.core import setup config = configuration(top_path='').todict() - setup(**config) + setup(version = '0.1', + author='Todd Miller', + author_email = 'help at stsci.edu', + description = 'image array manipulation functions', + **config) From scipy-svn at scipy.org Fri Dec 8 00:13:39 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Dec 2006 23:13:39 -0600 (CST) Subject: [Scipy-svn] r2369 - in trunk/Lib/stsci: convolve image Message-ID: <20061208051339.A458C39C00A@new.scipy.org> Author: oliphant Date: 2006-12-07 23:13:33 -0600 (Thu, 07 Dec 2006) New Revision: 2369 Modified: trunk/Lib/stsci/convolve/setup.py trunk/Lib/stsci/image/setup.py Log: Fix author info in trunk. Modified: trunk/Lib/stsci/convolve/setup.py =================================================================== --- trunk/Lib/stsci/convolve/setup.py 2006-12-08 04:24:19 UTC (rev 2368) +++ trunk/Lib/stsci/convolve/setup.py 2006-12-08 05:13:33 UTC (rev 2369) @@ -4,12 +4,7 @@ def configuration(parent_package='',top_path=None): from numpy.distutils.misc_util import Configuration config = Configuration('convolve',parent_package,top_path, - package_path='lib', - author='Todd Miller', - author_email = 'help at stsci.edu', - description = 'image array convolution functions', - version = '0.1' - ) + package_path='lib') config.add_extension('_correlate', sources=["src/_correlatemodule.c"], @@ -20,5 +15,9 @@ if __name__ == "__main__": from numpy.distutils.core import setup config = configuration(top_path='').todict() - setup(**config) + setup(author='Todd Miller', + author_email = 'help at stsci.edu', + description = 'image array convolution functions', + version = '0.1', + **config) Modified: trunk/Lib/stsci/image/setup.py =================================================================== --- trunk/Lib/stsci/image/setup.py 2006-12-08 04:24:19 UTC (rev 2368) +++ trunk/Lib/stsci/image/setup.py 2006-12-08 05:13:33 UTC (rev 2369) @@ -4,12 +4,7 @@ def configuration(parent_package='',top_path=None): from numpy.distutils.misc_util import Configuration config = Configuration('image',parent_package,top_path, - package_path='lib', - author='Todd Miller', - author_email = 'help at stsci.edu', - description = 'image array manipulation functions', - version = '0.1' - ) + package_path='lib') config.add_extension('_combine', sources=["src/_combinemodule.c"], define_macros = [('NUMPY', '1')], @@ -19,5 +14,9 @@ if __name__ == "__main__": from numpy.distutils.core import setup config = configuration(top_path='').todict() - setup(**config) + setup(author='Todd Miller', + author_email = 'help at stsci.edu', + description = 'image array manipulation functions', + version = '0.1', + **config) From scipy-svn at scipy.org Fri Dec 8 11:06:10 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 10:06:10 -0600 (CST) Subject: [Scipy-svn] r2370 - in trunk/Lib/sandbox: . timeseries Message-ID: <20061208160610.9732539C0A1@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 10:05:35 -0600 (Fri, 08 Dec 2006) New Revision: 2370 Added: trunk/Lib/sandbox/timeseries/ trunk/Lib/sandbox/timeseries/README trunk/Lib/sandbox/timeseries/__init__.py trunk/Lib/sandbox/timeseries/__init__.pyc trunk/Lib/sandbox/timeseries/corelib.py trunk/Lib/sandbox/timeseries/corelib.pyc trunk/Lib/sandbox/timeseries/cseries.pyd trunk/Lib/sandbox/timeseries/cseriesmodule.c trunk/Lib/sandbox/timeseries/shiftingarray.py trunk/Lib/sandbox/timeseries/shiftingarray.pyc trunk/Lib/sandbox/timeseries/timeseries.py trunk/Lib/sandbox/timeseries/timeseries.pyc trunk/Lib/sandbox/timeseries/tsdate.py trunk/Lib/sandbox/timeseries/tsdate.pyc Log: uploaded timeseries module to sandbox Added: trunk/Lib/sandbox/timeseries/README =================================================================== --- trunk/Lib/sandbox/timeseries/README 2006-12-08 05:13:33 UTC (rev 2369) +++ trunk/Lib/sandbox/timeseries/README 2006-12-08 16:05:35 UTC (rev 2370) @@ -0,0 +1,22 @@ +Requirements and warnings: + +1. version 2.0.x of the mx DateTime module MUST be installed. Only "tested" with 2.0.3 +2. Only tested with numpy 1.0.1 +3. Only tested with Python 2.4.x +4. Only tested on Windows Platform +5. the included cseries.pyd file was compiled for 32-bit windows, so if you are trying + this on another platform, the first thing you need to do is recompile it + + +Instructions: + +1. read through the included example.py script in the examples subfolder. This illustrates + the basic functionality of the module. I recommend placing print statements after each + variable assignment, one at a time, to see the result of each item in the examples. + + Documentation is very limited, so the examples really are the best starting point. + +2. Before you get too crazy and start modifying the examples and writing your own scripts, + please read todo.txt in the doc subdirectory for an outline of limitations in the current + module. + Added: trunk/Lib/sandbox/timeseries/__init__.py =================================================================== --- trunk/Lib/sandbox/timeseries/__init__.py 2006-12-08 05:13:33 UTC (rev 2369) +++ trunk/Lib/sandbox/timeseries/__init__.py 2006-12-08 16:05:35 UTC (rev 2370) @@ -0,0 +1,5 @@ +from timeseries import * +from tsdate import * +from corelib import * +from numpy import ma +masked = ma.masked \ No newline at end of file Added: trunk/Lib/sandbox/timeseries/__init__.pyc =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/timeseries/__init__.pyc ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/timeseries/corelib.py =================================================================== --- trunk/Lib/sandbox/timeseries/corelib.py 2006-12-08 05:13:33 UTC (rev 2369) +++ trunk/Lib/sandbox/timeseries/corelib.py 2006-12-08 16:05:35 UTC (rev 2370) @@ -0,0 +1,79 @@ +import numpy + +#converts possible strings for frequency into acceptable values +def fmtFreq (freqStr): + if freqStr is None: + return None + elif freqStr.upper() in ("A","ANNUAL","B","BUSINESS","D","DAILY","M","MONTHLY","Q","QUARTERLY","S","SECONDLY"): + return freqStr[0].upper() + else: + raise ValueError("Invalid frequency: "+str(freqStr)) + + +#converts possible strings for observed into acceptable values +def fmtObserv(obStr): + + obsVals = ( "UNDEFINED", + "BEGINNING", + "END", + "AVERAGED", + "SUMMED", + "ANNUALIZED", + "FORMULA", + "HIGH", + "LOW") + + if obStr is None: + return None + elif obStr.upper() in obsVals: + return obStr.upper() + elif obStr.upper() in ("UNDEFINED", "BEGIN", "END", "AVERAGE", "SUM", "ANNUAL" , "FORMULA", "HIGH", "LOW"): + obStr = obStr.upper() + for x in obsVals: + if obStr[:2] == x[:2]: + return x + else: + raise ValueError("Invalid value for observed attribute: "+str(obStr)) + +def freqToType(freq): + return freqTypeMapping[fmtFreq(freq)] + + +# fake data type for date variables +class DateSpec: + def __init__(self, freq): + self.freq = fmtFreq(freq) + + def __hash__(self): return hash(self.freq) + + def __eq__(self, other): + if hasattr(other, "freq"): return self.freq == other.freq + else: return False + + def __str__(self): return "Date(" + str(self.freq) + ")" + + + +# define custom numpy types. +# Note: A more robust approach would register these as actual valid numpy types +# this is just a hack for now +numpy.dateS = DateSpec("Secondly") +numpy.dateD = DateSpec("Daily") +numpy.dateB = DateSpec("Business") +numpy.dateM = DateSpec("Monthly") +numpy.dateQ = DateSpec("Quarterly") +numpy.dateA = DateSpec("Annual") + +freqTypeMapping = { + 'S':numpy.dateS, + 'D':numpy.dateD, + 'B':numpy.dateB, + 'M':numpy.dateM, + 'Q':numpy.dateQ, + 'A':numpy.dateA +} + +def isDateType(dtype): + if len([x for x in (numpy.dateS,numpy.dateD,numpy.dateB,numpy.dateM,numpy.dateQ,numpy.dateA) if x == dtype]) > 0: return True + else: return False + Added: trunk/Lib/sandbox/timeseries/corelib.pyc =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/timeseries/corelib.pyc ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/timeseries/cseries.pyd =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/timeseries/cseries.pyd ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/timeseries/cseriesmodule.c =================================================================== --- trunk/Lib/sandbox/timeseries/cseriesmodule.c 2006-12-08 05:13:33 UTC (rev 2369) +++ trunk/Lib/sandbox/timeseries/cseriesmodule.c 2006-12-08 16:05:35 UTC (rev 2370) @@ -0,0 +1,538 @@ +#include +//#include +#include +#include +#include +#include "mxDateTime.h" +#include "arrayobject.h" + +static char cseries_doc[] = "Speed sensitive time series operations"; + +/////////////////////////////////////////////////////////////////////// + + +static int +freqVal(char freq) +{ + switch(freq) + { + case 'A': + //annual + return 1; + case 'Q': + //quarterly + return 2; + case 'M': + //monthly + return 3; + case 'B': + //business + return 4; + case 'D': + //daily + return 5; + default: + return 0; + } +} + + +//fromDate is periods since Dec 31, 1849 +static long +convert(long fromDate, char fromFreq, char toFreq, int notStartInd, int atEnd) +{ + long absdate, origin, secondorigin, secsInDay; + long converted; + int rem; + int y,m,d,s; + + mxDateTimeObject *theDate; + mxDateTimeObject *convDate; + + origin = 675333; + secondorigin = 722814; + secsInDay = 86400; + + //convert fromDate to days since Dec 31, 1849 (Jan 1, 1850 would have absdate of 1) + switch(fromFreq) + { + case 'D': + absdate = fromDate; + break; + case 'B': + absdate = (fromDate/5)*7 + fromDate%5; + break; + case 'M': + y = fromDate/12 + 1; + m = fromDate%12; + if (atEnd) m++; + if (m == 0) + { + m = 12; + y--; + } + d=1; + break; + case 'Q': + y = fromDate/4 + 1; + m = (fromDate%4) * 3; + if (!atEnd) m -= 2; //change to first month of quarter + else m += 1; + if (m < 1) + { + m += 12; + y--; + } + else if (m == 12) + { + m = 1; + y++; + } + d=1; + break; + case 'A': + y = fromDate-1; + if (atEnd == 1) y++; + m = 1; + d = 1; + break; + default: + return -1; + } + + if (freqVal(fromFreq) < 4) + { + //switch to years from 0 for mxDateTime + y+= 1849; + + theDate = (mxDateTimeObject *)mxDateTime.DateTime_FromDateAndTime(y,m,d,0,0,0); + absdate = (long)(theDate->absdate); + if (atEnd == 1) absdate--; + } + else + { + //days from 0 for mxDateTime + absdate += origin; + } + + if (atEnd) s = secsInDay-1; + else s = 0; + + convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(absdate,s); + + //switch back to days and years since 1849 for pyTSA Date + absdate -= origin; + y = convDate->year - 1849; + m = convDate->month; + + //convert convDate to appropriate # of periods according to toFreq + switch(toFreq) + { + case 'D': + converted = absdate; + break; + case 'B': + rem = absdate%7; + if (rem > 4) //is weekend day + { + if (notStartInd == 1 && freqVal(fromFreq) > 4) + { + return -1; + } + else + { + d = convDate->day; + d -= rem - 4; //change to friday before weekend + if (d < 1) d += 3; //if friday was prev. month, change to monday instead + absdate = absdate - convDate->day + d; + converted = (long)((absdate / 7 * 5.0) + absdate%7); + } + } + else + { + converted = (long)((absdate / 7 * 5.0) + rem); + } + break; + case 'M': + converted = (long)((y-1)*12 + m); + break; + case 'Q': + converted = (long)((y-1)*4 + ((m-1)/3) + 1); + break; + case 'A': + converted = (long)(y+1); + break; + default: + return -1; + } + + return converted; +} + + +static long +expand(long oldSize, char fromFr, char toFr) +{ + long newSize; + int fromFreq, toFreq; + + if (fromFr == toFr) return oldSize; + + fromFreq = freqVal(fromFr); + toFreq = freqVal(toFr); + if (fromFreq*toFreq == 0) return oldSize; //invalid frequency + + newSize = oldSize; + + while (toFreq > fromFreq) + { + if (fromFreq == 1) //Annual + { + newSize *= 4; //quarters in year + fromFreq++; + } + else if (fromFreq == 2) //Quarterly + { + newSize *= 3; //months in quarter + fromFreq++; + } + else if (fromFreq == 3) //Monthly + { + newSize *= 31; //max days in month + fromFreq++; + } + else if (fromFreq == 4) //Business + { + newSize *= 2; //max d days for each b days + fromFreq++; + } + } + + + return newSize; +} + + +/////////////////////////////////////////////////////////////////////// +/* +OBSERVED + +from lower freq to higher freq +---------------------- + +summed -- all values in period set as lower freq's value / # of values + +rest -- all values in period set as lower freq's value + +from higher freq to lower freq +---------------------- +begin - lower freq's value set as first value in period +end - lower freq's value set as end value in period +summed - lower freq's value set as sum of all values in period +averaged - lower freq's value set as average of all values in period +high - lower freq's value set as largest value in period +low - lower freq's value set as smallest value in period + +*/ +/////////////////////////////////////////////////////////////////////// + +static void +adjValForObsSet(PyArrayObject *theArray, char obs, PyObject **newVal, PyObject **newValMask, PyObject *val, PyObject *valMask, long curPerLen) +{ + double dblVal; + long lngValMask, lngAllMasked; + + lngValMask = PyInt_AsLong(valMask); + lngAllMasked = PyInt_AsLong(*newValMask); + + if (!lngValMask) { + + // if any value is not masked, then we shall not mask the aggregated result + *newValMask = valMask; + + if (obs == 'B') + { + if (lngAllMasked) { + *newVal = val; + } + } + else if ( PyArray_ISFLOAT(theArray) && (obs=='S' || obs=='A') ) + { + + if (obs == 'S') + { + //observed is summed + + dblVal = PyFloat_AsDouble(*newVal); + dblVal += PyFloat_AsDouble(val); + *newVal = PyFloat_FromDouble(dblVal); + } + else + { + //observed is averaged + + dblVal = PyFloat_AsDouble(*newVal); + dblVal *= (curPerLen-1); + dblVal += PyFloat_AsDouble(val); + dblVal /= curPerLen; + *newVal = PyFloat_FromDouble(dblVal); + } + + } + else if ( PyArray_ISNUMBER(theArray) && (obs=='H' || obs=='L') ) + { + + if (obs == 'H') + { + //observed is high + + if (PyFloat_AsDouble(val) > PyFloat_AsDouble(*newVal)) *newVal = val; + } + else if (obs == 'L') + { + //observed is low + + if (PyFloat_AsDouble(val) < PyFloat_AsDouble(*newVal)) *newVal = val; + } + + } + else + { + //observed is not beginning and + //val is string or (val is date and observed is summed/averaged) + //or observed is end or not supported + + *newVal = val; + } + } + +} + + +static //PyArrayObject * +setArrayItem(PyArrayObject **theArray, long index, PyObject *newVal) +{ + char *setptr; + + if (index >= 0) + { + //set value in array + setptr = (*theArray)->data + (index) * (*theArray)->strides[0]; + PyArray_SETITEM(*theArray,setptr,newVal); + } + + //return theArray; +} + + +static char cseries_reindex_doc[] = ""; +static PyObject * +cseries_reindex(PyObject *self, PyObject *args) +{ + PyArrayObject *array; + PyArrayObject *tempArray; + PyArrayObject *newArray; + + PyArrayObject *mask; + PyArrayObject *tempMask; + PyArrayObject *newMask; + + PyObject *returnVal = NULL; + + int notStartInd, atEnd; + long startIndex, newStart; + long i, curPerInd, nextPerInd, prevIndex, curIndex; + long dim; + long curPerLen; + long lngValMask; + char *fromFreq, *toFreq, *observed; + + char *getptr; + PyObject *val, *newVal; + + char *getptrMask; + PyObject *valMask, *newValMask; + + int toFrVal, fromFrVal; + + returnVal = PyDict_New(); + + if (!PyArg_ParseTuple(args, "OssslO:reindex(array, fromfreq, tofreq, observed, startIndex,mask)", &tempArray, &fromFreq, &toFreq, &observed, &startIndex, &tempMask)) return NULL; + + if (toFreq[0] == fromFreq[0]) + { + + PyDict_SetItemString(returnVal, "values", (PyObject*)tempArray); + PyDict_SetItemString(returnVal, "mask", (PyObject*)tempMask); + + return returnVal; + } + + array = PyArray_GETCONTIGUOUS(tempArray); + mask = PyArray_GETCONTIGUOUS(tempMask); + + //expand size to fit new values if needed + dim = expand(array->dimensions[0], fromFreq[0], toFreq[0]); + + //initialize new array + newArray = (PyArrayObject*)PyArray_SimpleNew(array->nd, &dim, array->descr->type_num); + newMask = (PyArrayObject*)PyArray_SimpleNew(mask->nd, &dim, mask->descr->type_num); + + for (i = 0; i < dim; i++) + { + setArrayItem(&newArray, i, PyInt_FromLong(1)); + setArrayItem(&newMask, i, PyInt_FromLong(1)); + } + + //convert start index to new frequency + notStartInd = 0; + atEnd = 0; + newStart = convert(startIndex, fromFreq[0], toFreq[0], notStartInd, atEnd); + + //initialize prevIndex + prevIndex = newStart - 1; + + notStartInd = 1; + atEnd = 0; + + //set values in the new array + for (i = 0; i < array->dimensions[0]; i++) + { + //find index for start of current period in new frequency + curPerInd = convert(startIndex + i, fromFreq[0], toFreq[0], notStartInd, atEnd); + + //get frequency numeric mapping + fromFrVal = freqVal(fromFreq[0]); + toFrVal = freqVal(toFreq[0]); + + //get value from old array + getptr = array->data + i*array->strides[0]; + val = PyArray_GETITEM(array,getptr); + + //get the mask corresponding to the old value + getptrMask = mask->data + i*mask->strides[0]; + valMask = PyArray_GETITEM(mask,getptrMask); + + if (fromFrVal < toFrVal) + { + //from lower freq to higher freq + + newVal = val; + newValMask = valMask; + + //find index for start of next period in new frequency + nextPerInd = convert(startIndex + i + 1, fromFreq[0], toFreq[0], notStartInd, atEnd); + + //adjust for observed setting + if (observed[0] == 'S' && PyArray_ISFLOAT(array) && !( (fromFrVal == 4 && toFrVal == 5) || (fromFrVal == 5 && toFrVal == 4) ) ) + { + //summed + + //all values in period set as old array's value / # of values + newVal = PyFloat_FromDouble( PyFloat_AsDouble(val) / (nextPerInd - curPerInd) ); + } + + //set each value in period + for (curIndex = curPerInd; curIndex < nextPerInd; curIndex++) + { + setArrayItem(&newArray, curIndex-newStart, newVal); + setArrayItem(&newMask, curIndex-newStart, newValMask); + } + } + else + { + + lngValMask = PyInt_AsLong(valMask); + + //from higher freq to lower freq + + if (curPerInd != prevIndex) + { + //starting new period in old array + + + //set value in the new array + setArrayItem(&newArray, prevIndex-newStart, newVal); + setArrayItem(&newMask, prevIndex-newStart, newValMask); + + //reset period length + curPerLen = 0; + + + + if (!lngValMask) { + curPerLen++; + } + + + + //store current index and value + prevIndex = curPerInd; + newVal = val; + newValMask = valMask; + + } + else + { + //still in same period + + + + if (!lngValMask) { + curPerLen++; + } + + //adjust new value according to observed setting + adjValForObsSet(array, observed[0], &newVal, &newValMask, val, valMask, curPerLen); + } + + } + + } + + //set value of last item in the new array + setArrayItem(&newArray, curPerInd-newStart, newVal); + setArrayItem(&newMask, curPerInd-newStart, newValMask); + + PyDict_SetItemString(returnVal, "values", (PyObject*)newArray); + PyDict_SetItemString(returnVal, "mask", (PyObject*)newMask); + + return returnVal; + +} + + +static char cseries_convert_doc[] = ""; +static PyObject * +cseries_convert(PyObject *self, PyObject *args) +{ + long fromDate; + char* fromFreq; + char* toFreq; + int notStartInd, atEnd; + + if (!PyArg_ParseTuple(args, "lss:convert(fromDate, fromfreq, tofreq)", &fromDate, &fromFreq, &toFreq)) return NULL; + + //always want start of period (only matters when converting from lower freq to higher freq ie. m -> d) + atEnd = 0; + notStartInd = 0; + + return PyInt_FromLong(convert(fromDate, fromFreq[0], toFreq[0], notStartInd, atEnd)); +} + + +/////////////////////////////////////////////////////////////////////// + +static PyMethodDef cseries_methods[] = { + {"reindex", cseries_reindex, METH_VARARGS, cseries_reindex_doc}, + {"convert", cseries_convert, METH_VARARGS, cseries_convert_doc}, + {NULL, NULL} +}; + +PyMODINIT_FUNC +initcseries(void) +{ + Py_InitModule3("cseries", cseries_methods, cseries_doc); + mxDateTime_ImportModuleAndAPI(); + import_array(); +} \ No newline at end of file Added: trunk/Lib/sandbox/timeseries/shiftingarray.py =================================================================== --- trunk/Lib/sandbox/timeseries/shiftingarray.py 2006-12-08 05:13:33 UTC (rev 2369) +++ trunk/Lib/sandbox/timeseries/shiftingarray.py 2006-12-08 16:05:35 UTC (rev 2370) @@ -0,0 +1,302 @@ +import numpy, types , corelib +import copy +from numpy import ma + +class ShiftingArray(object): + def __init__(self, values, dtype=None, startIndex=None, mask=ma.nomask): + + # hack to convert our fake date data types to real data types + if corelib.isDateType(dtype): + self.dtype = numpy.int_ + else: + self.dtype = dtype + + if self.dtype is None: + self.dtype = values.dtype + + # need to use the empty function instead of passing an empty list + # because that won't work when type=numpy.object_ + if len(values) == 0 and dtype is numpy.object_: + tempData = ma.array(numpy.empty((0,), self.dtype)) + else: + tempData = ma.array(values, self.dtype) + + newSize = tempData.size*2 + + firstIndex = newSize//4 + lastIndex = firstIndex + tempData.size - 1 + if startIndex is None: + self.indexZeroRepresents = None + else: + self.indexZeroRepresents = int(startIndex)-firstIndex + + if mask is not ma.nomask: + tempMask = ma.make_mask(mask) + tempData[tempMask] = ma.masked + + self.data = ma.array(numpy.empty(newSize,self.dtype)) + + if firstIndex > 0: + self.data[0:firstIndex] = ma.masked + if self.data.size > lastIndex+1: self.data[lastIndex+1:self.data.size] = ma.masked + + self.data[firstIndex:lastIndex+1] = tempData[:] + + + def shift(self, n): + self.indexZeroRepresents += n + + + #DATA ACCESS + + def __setitem__(self, index, value): + self.__expandToFit(self.__minIndex(index),self.__maxIndex(index)) + convIndex = self.__convIndex(index) + self.data[convIndex] = value + + + def __getitem__(self, index): + self.__expandToFit(self.__minIndex(index),self.__maxIndex(index)) + convIndex = self.__convIndex(index) + return self.data[convIndex] + + def _shift(self, startIndex, endIndex): + self.__expandToFit(startIndex, endIndex) + return self.data[startIndex-self.indexZeroRepresents:endIndex-self.indexZeroRepresents+1] + + + #PRIVATE FUNCTIONS + + def __convIndex(self,index): + + if self.indexZeroRepresents is not None: + if isinstance(index,ShiftingArray): + + if index.indexZeroRepresents > self.indexZeroRepresents: + #expand index to the left + originalSize = index.data.size + shiftAmt = index.indexZeroRepresents - self.indexZeroRepresents + newSize = originalSize + shiftAmt + temp = ma.array(numpy.empty(newSize, index.data.dtype)) + temp[newSize-originalSize:] = index.data + temp[0:shiftAmt] = False + temp = temp.filled(False) + else: + #chop off first portion of data + temp = index.data[self.indexZeroRepresents - index.indexZeroRepresents:].filled(False) + + # chop off extra values on right hand side + if temp.size > self.data.size: return temp[:self.data.size] + else: return temp + + elif type(index) == types.SliceType: + if index.start is None: tempStart = None + else: tempStart = index.start - self.indexZeroRepresents + if index.stop is None: tempStop = None + else: tempStop = index.stop - self.indexZeroRepresents + tempStep = index.step + + return slice(tempStart,tempStop,tempStep) + else: + return index - self.indexZeroRepresents + + else: + return index + + def __maxIndex(self,index): + if type(index) == types.IntType: return index + if type(index) == types.SliceType: return index.stop + elif isinstance(index,ShiftingArray): return index.lastValue() + elif hasattr(index,'__len__'): return max(index) + else: return int(index) + + def __minIndex(self,index): + if type(index) == types.IntType: return index + if type(index) == types.SliceType: return index.start + elif isinstance(index,ShiftingArray): return index.firstValue() + elif hasattr(index,'__len__'): return min(index) + else: return int(index) + + def __expandToFit(self, minRange, maxRange=None): + + if self.indexZeroRepresents is None: + self.indexZeroRepresents = minRange + + if maxRange is None: + maxRange = minRange + if maxRange < minRange: + raise ValueError("invalid range: " + str(minRange) + " to " + str(maxRange)) + + minRange -= self.indexZeroRepresents + maxRange -= self.indexZeroRepresents + + if maxRange > self.data.size-1: #expand to the right + originalSize = self.data.size + newSize = originalSize + while maxRange > newSize-1: + newSize = expandAmt(newSize) + + self.data = self.data.resize(numpy.shape(numpy.empty(newSize))) + self.data[originalSize:] = ma.masked + + + if minRange < 0: #expand to the left + originalSize = self.data.size + newSize = originalSize + shiftAmt = int(0) + while minRange + shiftAmt < 0: + newSize = expandAmt(newSize) + shiftAmt = int(newSize - originalSize) + + temp = ma.array(numpy.empty(newSize, self.data.dtype)) + temp[newSize-originalSize:] = self.data + self.data = temp + self.data[0:shiftAmt] = ma.masked + + self.indexZeroRepresents -= shiftAmt + + + + #MATH FUNCTIONS + + def __add__(self, other,fill_value=ma.masked): return doFunc(self,other, ma.add,fill_value=fill_value) + def __radd__(self, other): return self+other + def __sub__(self, other,fill_value=ma.masked): return doFunc(self,other, ma.subtract,fill_value=fill_value) + def __rsub__(self, other): return doFunc((self*-1),other, ma.add) + def __mul__(self, other,fill_value=ma.masked): return doFunc(self,other, ma.multiply,fill_value=fill_value) + def __rmul__(self, other): return self*other + def __div__(self, other,fill_value=ma.masked): return doFunc(self,other, ma.divide,fill_value=fill_value) + def __rdiv__(self, other): return doFunc(pow(self,-1),other, ma.multiply) + def __pow__(self, other,fill_value=ma.masked): return doFunc(self,other, ma.power,fill_value=fill_value) + + def __eq__(self, other): return doFunc(self,other, ma.equal) + def __le__(self, other): return doFunc(self,other, ma.less_equal) + def __lt__(self, other): return doFunc(self,other, ma.less) + def __ge__(self, other): return doFunc(self,other, ma.greater_equal) + def __gt__(self, other): return doFunc(self,other, ma.greater) + + def max(self,other): return doFunc(self,other, ma.maximum) + def min(self,other): return doFunc(self,other, ma.minimum) + + #INFO FUNCTIONS + + def __len__(self): + fv = self.firstValue() + if fv is not None: + return self.lastValue()-fv+1 + else: + return 0 + + def firstValue(self): + firstIndex = first_unmasked(self.data) + if self.indexZeroRepresents is None or firstIndex is None: + return None + else: + return firstIndex+self.indexZeroRepresents + + + def lastValue(self): + lastIndex = last_unmasked(self.data) + if self.indexZeroRepresents is None or lastIndex is None: + return None + else: + return lastIndex+self.indexZeroRepresents + + + #DISPLAY FUNCTIONS + + def __str__(self): + retVal = "" + if self.firstValue() is not None: + for i in range(first_unmasked(self.data), last_unmasked(self.data)+1): + index = str(i+self.indexZeroRepresents) + index = index + (" " * (6-len(index))) + retVal += index + "---> " + str(self.data[i]) + "\n" + return retVal + else: + return "" + + def show(self, showLists=True): + print "indexZeroRepresents = ", self.indexZeroRepresents + print self.data + + + +#apply func to ser1 and ser2, replacing masked values with fill_value +def doFunc(ser1, ser2, func,fill_value=ma.masked): + if not isinstance(ser2, ShiftingArray): + if ser1.indexZeroRepresents is None: + return ShiftingArray([],ser1.data.dtype) + else: + ser2 = ShiftingArray([ser2]*len(ser1),ser1.data.dtype, ser1.firstValue()) + + sFV, sLV = ser1.firstValue(), ser1.lastValue() + oFV, oLV = ser2.firstValue(), ser2.lastValue() + + if ser1.indexZeroRepresents is not None and ser2.indexZeroRepresents is not None: + if fill_value is not ma.masked: + minVal = min(sFV, oFV) + maxVal = max(sLV, oLV) + else: + minVal = max(sFV, oFV) + maxVal = min(sLV, oLV) + elif ser1.indexZeroRepresents is None and ser2.indexZeroRepresents is None: + return ShiftingArray([],ser1.data.dtype) + elif ser1.indexZeroRepresents is None: + minVal = oFV + maxVal = oLV + else: #ser2.indexZeroRepresents is None: + minVal = sFV + maxVal = sLV + + if maxVal < minVal: + return ShiftingArray([],ser1.data.dtype, startIndex=minVal) + + data1 = ser1._shift(minVal, maxVal) + data2 = ser2._shift(minVal, maxVal) + + if fill_value is ma.masked: + mask = data1.mask | data2.mask + else: + mask = data1.mask & data2.mask + + data1 = data1.filled(fill_value) + data2 = data2.filled(fill_value) + + data = func(data1,data2) + + return ShiftingArray(data,data.dtype,minVal, mask) + + +def doFunc_oneseries(ser,func): + + sFV = ser.firstValue() + + if sFV is None: + return ser + else: + result = func(ser.data) + return ShiftingArray(result,result.dtype,sFV,result.mask) + + +#MISC GLOBAL FUNCTIONS + +def expandAmt(size): + EXPAND_MULT = 1.2 + EXPAND_ADD = 5 + return round(size*EXPAND_MULT) + EXPAND_ADD + + +def first_unmasked(m): + idx = numpy.where(m.mask == False) + if len(idx) != 0 and len(idx[0]) != 0: + return idx[0][0] + else: + return None + +def last_unmasked(m): + idx = numpy.where(m.mask == False) + if len(idx) != 0 and len(idx[0]) != 0: + return idx[0][-1] + else: + return None \ No newline at end of file Added: trunk/Lib/sandbox/timeseries/shiftingarray.pyc =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/timeseries/shiftingarray.pyc ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-08 05:13:33 UTC (rev 2369) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-08 16:05:35 UTC (rev 2370) @@ -0,0 +1,232 @@ +import numpy +from numpy import ma +import types + +import corelib +import shiftingarray as sa +from shiftingarray import doFunc, doFunc_oneseries +import cseries +import tsdate +import copy + +class TimeSeries(sa.ShiftingArray): + def __init__(self,values=[], dtype=numpy.float64, freq=None, observed='END', startIndex=None,mask=ma.nomask): + + if freq is None: raise ValueError("freq not specified") + + super(TimeSeries, self).__init__(values, dtype, startIndex,mask) + self.freq = corelib.fmtFreq(freq) + self.observed = corelib.fmtObserv(observed) + self.dtype = dtype + + def __getitem__(self, key): + if isinstance(key,tsdate.Date): + if self.freq != key.freq: + raise "series of frequency "+str(self.freq)+" given date expression of type "+str(key.freq) + else: + key = int(key) + return super(TimeSeries, self).__getitem__(key) + + def __setitem__(self, key, value): + if isinstance(key, tsdate.Date): + key = int(key) + super(TimeSeries, self).__setitem__(key,value) + + + def convert(self, freq, observed=None): + # return self converted to freq, method according to self.observed + toFreq = corelib.fmtFreq(freq) + fromFreq = self.freq + + if fromFreq != toFreq: + if observed is None: observed=self.observed + else: observed = corelib.fmtObserv(observed) + + firstIndex = sa.first_unmasked(self.data) + if firstIndex is None: + return TimeSeries([],dtype=self.dtype,freq=toFreq,observed=observed) + + startIndexAdj = self.firstValue() + + lastIndex = sa.last_unmasked(self.data) + + tempData = copy.deepcopy(self.data[firstIndex:lastIndex+1]) + tempMask = tempData.mask + tempData = tempData.filled() + + cRetVal = cseries.reindex(tempData, fromFreq, toFreq, observed, startIndexAdj,tempMask) + _values = cRetVal['values'] + _mask = cRetVal['mask'] + + startIndex = cseries.convert(startIndexAdj, fromFreq, toFreq) + + return TimeSeries(_values,dtype=self.data.dtype,freq=toFreq,observed=observed,startIndex=startIndex, mask=_mask) + + else: + return copy.deepcopy(self) + + + def __str__(self): + retVal = "" + if self.firstValue() is not None: + for i in range(self.firstValue(),self.lastValue()+1): + index = str(tsdate.Date(freq=self.freq,val=i)) + index = index + (" " * (6-len(index))) + retVal += index + "---> " + str(super(TimeSeries, self).__getitem__(i)) + "\n" + return retVal + else: + return "" + + + ### DATA + + def __add__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__add__(other),self.freq,self.observed) + + def __radd__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__add__(other),self.freq,self.observed) + + def __sub__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__sub__(other),self.freq,self.observed) + + def __rsub__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__rsub__(other),self.freq,self.observed) + + def __mul__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__mul__(other),self.freq,self.observed) + + def __rmul__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__rmul__(other),self.freq,self.observed) + + def __div__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__div__(other),self.freq,self.observed) + + def __rdiv__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__rdiv__(other),self.freq,self.observed) + + def __pow__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__pow__(other),self.freq,self.observed) + + ### IN PLACE + + def __iadd__(self, other): + validOpInputs(self,other) + self = SAtoTS(super(TimeSeries, self).__add__(other),self.freq,self.observed) + return self + + def __isub__(self, other): + validOpInputs(self,other) + self = SAtoTS(super(TimeSeries, self).__sub__(other),self.freq,self.observed) + return self + + def __imul__(self, other): + validOpInputs(self,other) + self = SAtoTS(super(TimeSeries, self).__mul__(other),self.freq,self.observed) + return self + + def __idiv__(self, other): + validOpInputs(self,other) + self = SAtoTS(super(TimeSeries, self).__div__(other),self.freq,self.observed) + return self + + # this overrides & and should only be used by boolean series + def __and__(self, other): + validOpInputs(self,other) + return self * other + + # this overrides | and should only be used by boolean series + def __or__(self, other): + validOpInputs(self,other) + return ~(~self & ~other) + + # this overrides ~ and should only be used by boolean series + # it is our "not" operator + def __invert__(self): + return self == False + + ### COMPARISON + + def __eq__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__eq__(other),self.freq,self.observed) + + def __le__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__le__(other),self.freq,self.observed) + + def __lt__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__lt__(other),self.freq,self.observed) + + def __ge__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__ge__(other),self.freq,self.observed) + + def __gt__(self, other): + validOpInputs(self,other) + return SAtoTS(super(TimeSeries, self).__gt__(other),self.freq,self.observed) + +def tser(start,end): + if start.freq != end.freq: + raise ValueError("start and end dates must have same frequency!") + return TimeSeries(numpy.arange(int(start),int(end)+1),dtype=corelib.freqTypeMapping[start.freq],freq=start.freq,observed='END',startIndex=int(start)) + +def validOpInputs(ser1,ser2): + if isinstance(ser1,TimeSeries) and isinstance(ser2,TimeSeries) and ser1.freq != ser2.freq: + raise "operation cannot be performed on series with different frequencies ("+str(ser1.freq) + " and " + str(ser2.freq)+")" + + +def SAtoTS(values,freq,observed,dtype=None): + if dtype is None: _dtype = values.dtype + else: _dtype = dtype + return TimeSeries(values.data,dtype=_dtype,freq=freq,observed=observed,startIndex=values.indexZeroRepresents) + + +# math functions (two series) +def add(ser1,ser2,fill_value=ma.masked): + return apply_func_twoseries(ma.add,ser1,ser2,fill_value) + +def multiply(ser1,ser2,fill_value=ma.masked): + return apply_func_twoseries(ma.multiply,ser1,ser2,fill_value) + +def divide(ser1,ser2,fill_value=ma.masked): + return apply_func_twoseries(ma.divide,ser1,ser2,fill_value) + +def subtract(ser1,ser2,fill_value=ma.masked): + return apply_func_twoseries(ma.subtract,ser1,ser2,fill_value) + +# math functions (one series, return series) +def sqrt(ser): + return apply_func_oneseries(ma.sqrt,ser) + +# math functions (one series, return scalar) +def sum(ser): + return ma.sum(ser.data) + +def product(ser): + return ma.product(ser.data) + +def average(ser): + return ma.average(ser.data) + +def where(condition,x,y): + tempResult = ma.where(condition.data,x,y) + return TimeSeries(tempResult,dtype=numpy.bool_,freq=condition.freq,observed=condition.observed,startIndex=condition.indexZeroRepresents) + +# generic functions +def apply_func_twoseries(func,ser1,ser2,fill_value=ma.masked): + validOpInputs(ser1,ser2) + return SAtoTS(doFunc(ser1,ser2,func,fill_value=fill_value),ser1.freq,ser1.observed) + +def apply_func_oneseries(func,ser): + return SAtoTS(doFunc_oneseries(ser,func),ser.freq,ser.observed) + Added: trunk/Lib/sandbox/timeseries/timeseries.pyc =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/timeseries/timeseries.pyc ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-08 05:13:33 UTC (rev 2369) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-08 16:05:35 UTC (rev 2370) @@ -0,0 +1,290 @@ +import corelib +import mx.DateTime +import numpy + +class Date: + def __init__(self,freq,year=None, month=None, day=None, seconds=None,quarter=None, date=None, val=None): + + if hasattr(freq,'freq'): + self.freq = corelib.fmtFreq(freq.freq) + else: + self.freq = corelib.fmtFreq(freq) + self.type = corelib.freqToType(self.freq) + + if val is not None: + if self.freq == 'D': + self.__date = val+originDate + elif self.freq == 'B': + self.__date = originDate + val + (val//5)*7 - (val//5)*5 + elif self.freq == 'S': + self.__date = secondlyOriginDate + mx.DateTime.DateTimeDeltaFromSeconds(val) + elif self.freq == 'M': + self.__date = originDate + mx.DateTime.RelativeDateTime(months=val, day=-1) + elif self.freq == 'A': + self.__date = originDate + mx.DateTime.RelativeDateTime(years=val, month=-1, day=-1) + elif self.freq == 'Q': + self.__date = originDate + 1 + mx.DateTime.RelativeDateTime(years=int(val/4), month=int(12 * (float(val)/4 - val/4)), day=-1) + elif date is not None: + self.__date = date + else: + error = ValueError("Insufficient parameters given to create a date at the given frequency") + + if year is None: + raise error + + if self.freq in ('B','D'): + if month is None or day is None: raise error + elif self.freq == 'M': + if month is None: raise error + day = -1 + elif self.freq == 'Q': + if quarter is None: raise error + month = quarter * 3 + day = -1 + elif self.freq == 'A': + month = -1 + day = -1 + elif self.freq == 'S': + if month is None or day is None or seconds is None: raise error + + if self.freq != 'S': + self.__date = mx.DateTime.Date(year, month, day) + if self.freq == 'B': + if self.__date.day_of_week == 5 or self.__date.day_of_week == 6: + raise ValueError("Weekend passed as business day") + else: + _hours = int(seconds/3600) + _minutes = int((seconds - _hours*3600)/60) + _seconds = seconds % 60 + + self.__date = mx.DateTime.Date(year, month, day, _hours, _minutes, _seconds) + + + def day(self): return self.getDate().day + def day_of_week(self): return self.getDate().day_of_week + def month(self): return self.getDate().month + def quarter(self): return monthToQuarter(self.getDate().month) + def year(self): return self.getDate().year + def seconds(self): return int(self.getDate().second) + def minute(self): return int(self.getDate().minute) + def hour(self): return int(self.getDate().hour) + + def strfmt(self,fmt): + qFmt = fmt.replace("%q","XXXX") + tmpStr = self.__date.strftime(qFmt) + return tmpStr.replace("XXXX",str(self.quarter())) + + def __str__(self): + if self.freq in ("B","D"): + return self.__date.strftime("%d-%b-%y") + elif self.freq == "S": + return self.__date.strftime("%d-%b-%Y %H:%M:%S") + elif self.freq == "M": + return self.__date.strftime("%b-%Y") + elif self.freq == "Q": + return str(self.year())+"q"+str(self.quarter()) + elif self.freq == "A": + return str(self.year()) + else: + return self.__date.strftime("%d-%b-%y") + + + def __add__(self, other): + if isinstance(other, Date): + raise TypeError("Cannot add dates") + return Date(freq=self.freq, val=int(self) + other) + + def __radd__(self, other): return self+other + + def __sub__(self, other): + try: return self + (-1) * other + except: pass + try: + if self.freq <> other.freq: + raise ValueError("Cannont subtract dates of different frequency (" + str(self.freq) + " <> " + str(other.freq) + ")") + return int(self) - int(other) + except TypeError: + raise TypeError("Could not subtract types " + str(type(self)) + " and " + str(type(other))) + + def __repr__(self): return "<" + str(self.freq) + ":" + str(self) + ">" + + def __eq__(self, other): + if self.freq <> other.freq: + raise TypeError("frequencies are not equal!") + return int(self) == int(other) + + def __cmp__(self, other): + if self.freq <> other.freq: + raise TypeError("frequencies are not equal!") + return int(self)-int(other) + + def __hash__(self): return hash(int(self)) ^ hash(self.freq) + + def __int__(self): + return self.value() + + def value(self): + if self.freq == 'D': + return int((self.__date-originDate).days) + elif self.freq == 'B': + days = (self.__date-originDate).days + weeks = days // 7 + return int((weeks*5) + (days - weeks*7)) + elif self.freq == 'M': + return (self.__date.year - originDate.year)*12 + (self.__date.month - originDate.month) + elif self.freq == 'S': + return int((self.__date - secondlyOriginDate).seconds) + elif self.freq == 'A': + return int(self.__date.year - originDate.year + 1) + elif self.freq == 'Q': + return int ((self.__date.year - originDate.year)*4 + (self.__date.month - originDate.month)/3) + + def mxDate(self): + return self.__date + +originDate = mx.DateTime.Date(1850)-1 +secondlyOriginDate = mx.DateTime.Date(1980) - mx.DateTime.DateTimeDeltaFromSeconds(1) + + +####################### +# FUNCTIONS +####################### +def monthToQuarter(monthNum): + return int((monthNum-1)/3)+1 + +def thisday(freq): + + freq = corelib.fmtFreq(freq) + + tempDate = mx.DateTime.now() + + # if it is Saturday or Sunday currently, freq==B, then we want to use Friday + if freq == 'B' and tempDate.day_of_week >= 5: + tempDate -= (tempDate.day_of_week - 4) + if freq == 'B' or freq == 'D' or freq == 'S': + return Date(freq, date=tempDate) + elif freq == 'M': + return Date(freq,tempDate.year,tempDate.month) + elif freq == 'Q': + return Date(freq,tempDate.year,quarter=monthToQuarter(tempDate.month)) + elif freq == 'A': + return Date(freq,tempDate.year) + +def prevbusday(day_end_hour=18,day_end_min=0): + tempDate = mx.DateTime.localtime() + + dateNum = tempDate.hour + float(tempDate.minute)/60 + checkNum = day_end_hour + float(day_end_min)/60 + + if dateNum < checkNum: return thisday('B') - 1 + else: return thisday('B') + + +# returns _date converted to a date of _destFreq according to _relation +# _relation = "BEFORE" or "AFTER" (not case sensitive) +def dateOf(_date,_destFreq,_relation="BEFORE"): + + _destFreq = corelib.fmtFreq(_destFreq) + _rel = _relation.upper()[0] + + if _date.freq == _destFreq: + return _date + elif _date.freq == 'D': + + if _destFreq == 'B': + # BEFORE result: preceeding Friday if _date is a weekend, same day otherwise + # AFTER result: following Monday if _date is a weekend, same day otherwise + tempDate = _date.mxDate() + if _rel == "B": + if tempDate.day_of_week >= 5: tempDate -= (tempDate.day_of_week - 4) + elif _rel == "A": + if tempDate.day_of_week >= 5: tempDate += 7 - tempDate.day_of_week + return Date(freq='B',date=tempDate) + + elif _destFreq == 'M': return Date('M',_date.mxDate().year,_date.mxDate().month) + + elif _destFreq == 'S': + if _rel == "B": return Date('S',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day,0) + elif _rel == "A": return Date('S',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day,24*60*60-1) + + elif _destFreq == 'Q': return Date('Q',_date.mxDate().year,quarter=monthToQuarter(_date.mxDate().month)) + + elif _destFreq == 'A': return Date('A',_date.mxDate().year) + + elif _date.freq == 'B': + + if _destFreq == 'D': return Date('D',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day) + + elif _destFreq == 'M': return Date('M',_date.mxDate().year,_date.mxDate().month) + + elif _destFreq == 'S': + if _rel == "B": return Date('S',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day,0) + elif _rel == "A": return Date('S',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day,24*60*60-1) + + elif _destFreq == 'Q': return Date('Q',_date.mxDate().year,quarter=monthToQuarter(_date.mxDate().month)) + + elif _destFreq == 'A': return Date('A',_date.mxDate().year) + + elif _date.freq == 'M': + + if _destFreq == 'D': + tempDate = _date.mxDate() + if _rel == "B": + return Date('D',_date.mxDate().year,_date.mxDate().month,1) + elif _rel == "A": + if _date.mxDate().month == 12: + tempMonth = 1 + tempYear = _date.mxDate().year + 1 + else: + tempMonth = _date.mxDate().month + 1 + tempYear = _date.mxDate().year + return Date('D',tempYear,tempMonth,1)-1 + + elif _destFreq == 'B': + if _rel == "B": return dateOf(dateOf(_date,'D',"BEFORE"),'B',"AFTER") + elif _rel == "A": return dateOf(dateOf(_date,'D',"AFTER"),'B',"BEFORE") + + elif _destFreq == 'S': + if _rel == "B": return dateOf(dateOf(_date,'D',"BEFORE"),'S',"BEFORE") + elif _rel == "A": return dateOf(dateOf(_date,'D',"AFTER"),'S',"AFTER") + + elif _destFreq == 'Q': return Date('Q',_date.mxDate().year,quarter=monthToQuarter(_date.mxDate().month)) + + elif _destFreq == 'A': return Date('A',_date.mxDate().year) + + elif _date.freq == 'S': + + if _destFreq == 'D': + return Date('D',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day) + elif _destFreq == 'B': + if _rel == "B": return dateOf(dateOf(_date,'D'),'B',"BEFORE") + elif _rel == "A": return dateOf(dateOf(_date,'D'),'B',"AFTER") + elif _destFreq == 'M': + return Date('M',_date.mxDate().year,_date.mxDate().month) + + elif _date.freq == 'Q': + + if _destFreq == 'D': + if _rel == "B": return dateOf(_date-1,'D',"AFTER")+1 + elif _rel == "A": return Date('D',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day) + elif _destFreq == 'B': + if _rel == "B": return dateOf(dateOf(_date,'D'),'B',"AFTER") + if _rel == "A": return dateOf(dateOf(_date,'D',"AFTER"),'B',"BEFORE") + elif _destFreq == 'M': + if _rel == "B": return dateOf(_date-1,'M',"AFTER")+1 + elif _rel == "A": return Date('M',_date.mxDate().year,_date.mxDate().month) + elif _destFreq == 'A': return Date('A',_date.mxDate().year) + elif _date.freq == 'A': + + if _destFreq == 'D': + if _rel == "B": return Date('D',_date.mxDate().year, 1, 1) + elif _rel == "A": return Date('D',_date.mxDate().year,12,31) + elif _destFreq == 'B': + if _rel == "B": return dateOf(dateOf(_date,'D'),'B',"AFTER") + if _rel == "A": return dateOf(dateOf(_date,'D',"AFTER"),'B',"BEFORE") + elif _destFreq == 'M': + if _rel == "B": return Date('M',_date.mxDate().year,1) + elif _rel == "A": return Date('M',_date.mxDate().year,12) + elif _destFreq == 'Q': + if _rel == "B": return Date('Q',_date.mxDate().year,quarter=1) + elif _rel == "A": return Date('Q',_date.mxDate().year,quarter=4) \ No newline at end of file Added: trunk/Lib/sandbox/timeseries/tsdate.pyc =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/timeseries/tsdate.pyc ___________________________________________________________________ Name: svn:mime-type + application/octet-stream From scipy-svn at scipy.org Fri Dec 8 11:06:27 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 10:06:27 -0600 (CST) Subject: [Scipy-svn] r2371 - trunk/Lib/sandbox/timeseries Message-ID: <20061208160627.7CFAF39C0A1@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 10:06:25 -0600 (Fri, 08 Dec 2006) New Revision: 2371 Removed: trunk/Lib/sandbox/timeseries/__init__.pyc Log: Removed file/folder Deleted: trunk/Lib/sandbox/timeseries/__init__.pyc =================================================================== (Binary files differ) From scipy-svn at scipy.org Fri Dec 8 11:06:35 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 10:06:35 -0600 (CST) Subject: [Scipy-svn] r2372 - trunk/Lib/sandbox/timeseries Message-ID: <20061208160635.D0C3C39C0A3@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 10:06:34 -0600 (Fri, 08 Dec 2006) New Revision: 2372 Removed: trunk/Lib/sandbox/timeseries/corelib.pyc Log: Removed file/folder Deleted: trunk/Lib/sandbox/timeseries/corelib.pyc =================================================================== (Binary files differ) From scipy-svn at scipy.org Fri Dec 8 11:06:47 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 10:06:47 -0600 (CST) Subject: [Scipy-svn] r2373 - trunk/Lib/sandbox/timeseries Message-ID: <20061208160647.5259639C0A1@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 10:06:45 -0600 (Fri, 08 Dec 2006) New Revision: 2373 Removed: trunk/Lib/sandbox/timeseries/shiftingarray.pyc Log: Removed file/folder Deleted: trunk/Lib/sandbox/timeseries/shiftingarray.pyc =================================================================== (Binary files differ) From scipy-svn at scipy.org Fri Dec 8 11:06:54 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 10:06:54 -0600 (CST) Subject: [Scipy-svn] r2374 - trunk/Lib/sandbox/timeseries Message-ID: <20061208160654.7D13A39C0A1@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 10:06:53 -0600 (Fri, 08 Dec 2006) New Revision: 2374 Removed: trunk/Lib/sandbox/timeseries/timeseries.pyc Log: Removed file/folder Deleted: trunk/Lib/sandbox/timeseries/timeseries.pyc =================================================================== (Binary files differ) From scipy-svn at scipy.org Fri Dec 8 11:07:03 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 10:07:03 -0600 (CST) Subject: [Scipy-svn] r2375 - trunk/Lib/sandbox/timeseries Message-ID: <20061208160703.3FEF539C0A1@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 10:07:01 -0600 (Fri, 08 Dec 2006) New Revision: 2375 Removed: trunk/Lib/sandbox/timeseries/tsdate.pyc Log: Removed file/folder Deleted: trunk/Lib/sandbox/timeseries/tsdate.pyc =================================================================== (Binary files differ) From scipy-svn at scipy.org Fri Dec 8 11:26:40 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 10:26:40 -0600 (CST) Subject: [Scipy-svn] r2376 - trunk/Lib/sandbox/timeseries Message-ID: <20061208162640.44CE939C00E@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 10:26:36 -0600 (Fri, 08 Dec 2006) New Revision: 2376 Added: trunk/Lib/sandbox/timeseries/cseries.c Removed: trunk/Lib/sandbox/timeseries/cseriesmodule.c Log: Renamed remotely Copied: trunk/Lib/sandbox/timeseries/cseries.c (from rev 2375, trunk/Lib/sandbox/timeseries/cseriesmodule.c) Deleted: trunk/Lib/sandbox/timeseries/cseriesmodule.c =================================================================== --- trunk/Lib/sandbox/timeseries/cseriesmodule.c 2006-12-08 16:07:01 UTC (rev 2375) +++ trunk/Lib/sandbox/timeseries/cseriesmodule.c 2006-12-08 16:26:36 UTC (rev 2376) @@ -1,538 +0,0 @@ -#include -//#include -#include -#include -#include -#include "mxDateTime.h" -#include "arrayobject.h" - -static char cseries_doc[] = "Speed sensitive time series operations"; - -/////////////////////////////////////////////////////////////////////// - - -static int -freqVal(char freq) -{ - switch(freq) - { - case 'A': - //annual - return 1; - case 'Q': - //quarterly - return 2; - case 'M': - //monthly - return 3; - case 'B': - //business - return 4; - case 'D': - //daily - return 5; - default: - return 0; - } -} - - -//fromDate is periods since Dec 31, 1849 -static long -convert(long fromDate, char fromFreq, char toFreq, int notStartInd, int atEnd) -{ - long absdate, origin, secondorigin, secsInDay; - long converted; - int rem; - int y,m,d,s; - - mxDateTimeObject *theDate; - mxDateTimeObject *convDate; - - origin = 675333; - secondorigin = 722814; - secsInDay = 86400; - - //convert fromDate to days since Dec 31, 1849 (Jan 1, 1850 would have absdate of 1) - switch(fromFreq) - { - case 'D': - absdate = fromDate; - break; - case 'B': - absdate = (fromDate/5)*7 + fromDate%5; - break; - case 'M': - y = fromDate/12 + 1; - m = fromDate%12; - if (atEnd) m++; - if (m == 0) - { - m = 12; - y--; - } - d=1; - break; - case 'Q': - y = fromDate/4 + 1; - m = (fromDate%4) * 3; - if (!atEnd) m -= 2; //change to first month of quarter - else m += 1; - if (m < 1) - { - m += 12; - y--; - } - else if (m == 12) - { - m = 1; - y++; - } - d=1; - break; - case 'A': - y = fromDate-1; - if (atEnd == 1) y++; - m = 1; - d = 1; - break; - default: - return -1; - } - - if (freqVal(fromFreq) < 4) - { - //switch to years from 0 for mxDateTime - y+= 1849; - - theDate = (mxDateTimeObject *)mxDateTime.DateTime_FromDateAndTime(y,m,d,0,0,0); - absdate = (long)(theDate->absdate); - if (atEnd == 1) absdate--; - } - else - { - //days from 0 for mxDateTime - absdate += origin; - } - - if (atEnd) s = secsInDay-1; - else s = 0; - - convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(absdate,s); - - //switch back to days and years since 1849 for pyTSA Date - absdate -= origin; - y = convDate->year - 1849; - m = convDate->month; - - //convert convDate to appropriate # of periods according to toFreq - switch(toFreq) - { - case 'D': - converted = absdate; - break; - case 'B': - rem = absdate%7; - if (rem > 4) //is weekend day - { - if (notStartInd == 1 && freqVal(fromFreq) > 4) - { - return -1; - } - else - { - d = convDate->day; - d -= rem - 4; //change to friday before weekend - if (d < 1) d += 3; //if friday was prev. month, change to monday instead - absdate = absdate - convDate->day + d; - converted = (long)((absdate / 7 * 5.0) + absdate%7); - } - } - else - { - converted = (long)((absdate / 7 * 5.0) + rem); - } - break; - case 'M': - converted = (long)((y-1)*12 + m); - break; - case 'Q': - converted = (long)((y-1)*4 + ((m-1)/3) + 1); - break; - case 'A': - converted = (long)(y+1); - break; - default: - return -1; - } - - return converted; -} - - -static long -expand(long oldSize, char fromFr, char toFr) -{ - long newSize; - int fromFreq, toFreq; - - if (fromFr == toFr) return oldSize; - - fromFreq = freqVal(fromFr); - toFreq = freqVal(toFr); - if (fromFreq*toFreq == 0) return oldSize; //invalid frequency - - newSize = oldSize; - - while (toFreq > fromFreq) - { - if (fromFreq == 1) //Annual - { - newSize *= 4; //quarters in year - fromFreq++; - } - else if (fromFreq == 2) //Quarterly - { - newSize *= 3; //months in quarter - fromFreq++; - } - else if (fromFreq == 3) //Monthly - { - newSize *= 31; //max days in month - fromFreq++; - } - else if (fromFreq == 4) //Business - { - newSize *= 2; //max d days for each b days - fromFreq++; - } - } - - - return newSize; -} - - -/////////////////////////////////////////////////////////////////////// -/* -OBSERVED - -from lower freq to higher freq ----------------------- - -summed -- all values in period set as lower freq's value / # of values - -rest -- all values in period set as lower freq's value - -from higher freq to lower freq ----------------------- -begin - lower freq's value set as first value in period -end - lower freq's value set as end value in period -summed - lower freq's value set as sum of all values in period -averaged - lower freq's value set as average of all values in period -high - lower freq's value set as largest value in period -low - lower freq's value set as smallest value in period - -*/ -/////////////////////////////////////////////////////////////////////// - -static void -adjValForObsSet(PyArrayObject *theArray, char obs, PyObject **newVal, PyObject **newValMask, PyObject *val, PyObject *valMask, long curPerLen) -{ - double dblVal; - long lngValMask, lngAllMasked; - - lngValMask = PyInt_AsLong(valMask); - lngAllMasked = PyInt_AsLong(*newValMask); - - if (!lngValMask) { - - // if any value is not masked, then we shall not mask the aggregated result - *newValMask = valMask; - - if (obs == 'B') - { - if (lngAllMasked) { - *newVal = val; - } - } - else if ( PyArray_ISFLOAT(theArray) && (obs=='S' || obs=='A') ) - { - - if (obs == 'S') - { - //observed is summed - - dblVal = PyFloat_AsDouble(*newVal); - dblVal += PyFloat_AsDouble(val); - *newVal = PyFloat_FromDouble(dblVal); - } - else - { - //observed is averaged - - dblVal = PyFloat_AsDouble(*newVal); - dblVal *= (curPerLen-1); - dblVal += PyFloat_AsDouble(val); - dblVal /= curPerLen; - *newVal = PyFloat_FromDouble(dblVal); - } - - } - else if ( PyArray_ISNUMBER(theArray) && (obs=='H' || obs=='L') ) - { - - if (obs == 'H') - { - //observed is high - - if (PyFloat_AsDouble(val) > PyFloat_AsDouble(*newVal)) *newVal = val; - } - else if (obs == 'L') - { - //observed is low - - if (PyFloat_AsDouble(val) < PyFloat_AsDouble(*newVal)) *newVal = val; - } - - } - else - { - //observed is not beginning and - //val is string or (val is date and observed is summed/averaged) - //or observed is end or not supported - - *newVal = val; - } - } - -} - - -static //PyArrayObject * -setArrayItem(PyArrayObject **theArray, long index, PyObject *newVal) -{ - char *setptr; - - if (index >= 0) - { - //set value in array - setptr = (*theArray)->data + (index) * (*theArray)->strides[0]; - PyArray_SETITEM(*theArray,setptr,newVal); - } - - //return theArray; -} - - -static char cseries_reindex_doc[] = ""; -static PyObject * -cseries_reindex(PyObject *self, PyObject *args) -{ - PyArrayObject *array; - PyArrayObject *tempArray; - PyArrayObject *newArray; - - PyArrayObject *mask; - PyArrayObject *tempMask; - PyArrayObject *newMask; - - PyObject *returnVal = NULL; - - int notStartInd, atEnd; - long startIndex, newStart; - long i, curPerInd, nextPerInd, prevIndex, curIndex; - long dim; - long curPerLen; - long lngValMask; - char *fromFreq, *toFreq, *observed; - - char *getptr; - PyObject *val, *newVal; - - char *getptrMask; - PyObject *valMask, *newValMask; - - int toFrVal, fromFrVal; - - returnVal = PyDict_New(); - - if (!PyArg_ParseTuple(args, "OssslO:reindex(array, fromfreq, tofreq, observed, startIndex,mask)", &tempArray, &fromFreq, &toFreq, &observed, &startIndex, &tempMask)) return NULL; - - if (toFreq[0] == fromFreq[0]) - { - - PyDict_SetItemString(returnVal, "values", (PyObject*)tempArray); - PyDict_SetItemString(returnVal, "mask", (PyObject*)tempMask); - - return returnVal; - } - - array = PyArray_GETCONTIGUOUS(tempArray); - mask = PyArray_GETCONTIGUOUS(tempMask); - - //expand size to fit new values if needed - dim = expand(array->dimensions[0], fromFreq[0], toFreq[0]); - - //initialize new array - newArray = (PyArrayObject*)PyArray_SimpleNew(array->nd, &dim, array->descr->type_num); - newMask = (PyArrayObject*)PyArray_SimpleNew(mask->nd, &dim, mask->descr->type_num); - - for (i = 0; i < dim; i++) - { - setArrayItem(&newArray, i, PyInt_FromLong(1)); - setArrayItem(&newMask, i, PyInt_FromLong(1)); - } - - //convert start index to new frequency - notStartInd = 0; - atEnd = 0; - newStart = convert(startIndex, fromFreq[0], toFreq[0], notStartInd, atEnd); - - //initialize prevIndex - prevIndex = newStart - 1; - - notStartInd = 1; - atEnd = 0; - - //set values in the new array - for (i = 0; i < array->dimensions[0]; i++) - { - //find index for start of current period in new frequency - curPerInd = convert(startIndex + i, fromFreq[0], toFreq[0], notStartInd, atEnd); - - //get frequency numeric mapping - fromFrVal = freqVal(fromFreq[0]); - toFrVal = freqVal(toFreq[0]); - - //get value from old array - getptr = array->data + i*array->strides[0]; - val = PyArray_GETITEM(array,getptr); - - //get the mask corresponding to the old value - getptrMask = mask->data + i*mask->strides[0]; - valMask = PyArray_GETITEM(mask,getptrMask); - - if (fromFrVal < toFrVal) - { - //from lower freq to higher freq - - newVal = val; - newValMask = valMask; - - //find index for start of next period in new frequency - nextPerInd = convert(startIndex + i + 1, fromFreq[0], toFreq[0], notStartInd, atEnd); - - //adjust for observed setting - if (observed[0] == 'S' && PyArray_ISFLOAT(array) && !( (fromFrVal == 4 && toFrVal == 5) || (fromFrVal == 5 && toFrVal == 4) ) ) - { - //summed - - //all values in period set as old array's value / # of values - newVal = PyFloat_FromDouble( PyFloat_AsDouble(val) / (nextPerInd - curPerInd) ); - } - - //set each value in period - for (curIndex = curPerInd; curIndex < nextPerInd; curIndex++) - { - setArrayItem(&newArray, curIndex-newStart, newVal); - setArrayItem(&newMask, curIndex-newStart, newValMask); - } - } - else - { - - lngValMask = PyInt_AsLong(valMask); - - //from higher freq to lower freq - - if (curPerInd != prevIndex) - { - //starting new period in old array - - - //set value in the new array - setArrayItem(&newArray, prevIndex-newStart, newVal); - setArrayItem(&newMask, prevIndex-newStart, newValMask); - - //reset period length - curPerLen = 0; - - - - if (!lngValMask) { - curPerLen++; - } - - - - //store current index and value - prevIndex = curPerInd; - newVal = val; - newValMask = valMask; - - } - else - { - //still in same period - - - - if (!lngValMask) { - curPerLen++; - } - - //adjust new value according to observed setting - adjValForObsSet(array, observed[0], &newVal, &newValMask, val, valMask, curPerLen); - } - - } - - } - - //set value of last item in the new array - setArrayItem(&newArray, curPerInd-newStart, newVal); - setArrayItem(&newMask, curPerInd-newStart, newValMask); - - PyDict_SetItemString(returnVal, "values", (PyObject*)newArray); - PyDict_SetItemString(returnVal, "mask", (PyObject*)newMask); - - return returnVal; - -} - - -static char cseries_convert_doc[] = ""; -static PyObject * -cseries_convert(PyObject *self, PyObject *args) -{ - long fromDate; - char* fromFreq; - char* toFreq; - int notStartInd, atEnd; - - if (!PyArg_ParseTuple(args, "lss:convert(fromDate, fromfreq, tofreq)", &fromDate, &fromFreq, &toFreq)) return NULL; - - //always want start of period (only matters when converting from lower freq to higher freq ie. m -> d) - atEnd = 0; - notStartInd = 0; - - return PyInt_FromLong(convert(fromDate, fromFreq[0], toFreq[0], notStartInd, atEnd)); -} - - -/////////////////////////////////////////////////////////////////////// - -static PyMethodDef cseries_methods[] = { - {"reindex", cseries_reindex, METH_VARARGS, cseries_reindex_doc}, - {"convert", cseries_convert, METH_VARARGS, cseries_convert_doc}, - {NULL, NULL} -}; - -PyMODINIT_FUNC -initcseries(void) -{ - Py_InitModule3("cseries", cseries_methods, cseries_doc); - mxDateTime_ImportModuleAndAPI(); - import_array(); -} \ No newline at end of file From scipy-svn at scipy.org Fri Dec 8 13:45:59 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 12:45:59 -0600 (CST) Subject: [Scipy-svn] r2377 - in trunk/Lib/sandbox/timeseries: . examples Message-ID: <20061208184559.E744E39C018@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 12:45:57 -0600 (Fri, 08 Dec 2006) New Revision: 2377 Added: trunk/Lib/sandbox/timeseries/examples/ trunk/Lib/sandbox/timeseries/examples/example.py Log: Added a folder remotely Added: trunk/Lib/sandbox/timeseries/examples/example.py =================================================================== --- trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-08 16:26:36 UTC (rev 2376) +++ trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-08 18:45:57 UTC (rev 2377) @@ -0,0 +1,104 @@ +import numpy as np +import timeseries as ts + + +# create a time series at business frequency and fill it with random data +bSer = ts.TimeSeries(np.random.uniform(-100,100,600),dtype=np.float64,freq='B',observed='SUMMED',startIndex=ts.thisday('B')-600) + + +""" +Set negative values to zero. Note that ts.where returns a TimeSeries object. +Indexing by booleans can only be done with TimeSeries objects of the same +frequency. +""" +bSer[ts.where(bSer < 0,True,False)] = 0 + + +""" +Convert bSer to a monthly frequency series. + +The optional observed argument to the convert method specifies what +method will be used to perform the frequency conversion. If it is +not specified, the observed attribute of the series will be used to +determine the method. +""" +mSer1 = bSer.convert('M',observed='AVERAGED') + + +# create another monthly frequency series +mSer2 = ts.TimeSeries(np.random.uniform(-100,100,100),dtype=np.float64,freq='m',observed='END',startIndex=ts.thisday('M')-110) + + +""" +Slicing also supported. The intention is to have indexing behave +largely in the same manner as regular numpy arrays. It sure would be +nice if we could slice with the dates directly, but as it stands we +shall have to cast the dates to integers +""" +mSer2[int(ts.thisday('m')-60):int(ts.thisday('m')-45)] = 12 + + +# Mask a value. series.lastValue() returns the index of the last +# unmasked value in the series (as an integer, not a Date object) +mSer2[mSer2.lastValue()-40] = ts.masked #ts.masked is the same thing as numpy.ma.masked + + +""" +Only series of the same frequency can be used in the basic operations. +The results are the same as you would expect for masked arrays with the +basic operations. + +Notice that the start and end indices of mSer1 and mSer2 do not need to +line up. This conversion is done implicitly. +""" +mAdd1 = mSer1 + mSer2 + + +""" +if you want more control over behaviour of masked values, use ts.add +(or multiply, etc) instead. + +if a fill_value is specified, both TimeSeries objects are filled from +min(mSer1.firstValue(),mSer2.firstValue()) to max(mSer1.lastValue(),mSer2.lastValue()) +wherever the series are masked before performing the operation +""" +mAdd2 = ts.add(mSer1,mSer2,fill_value=0) + + +# calculate the average value in the series. Behaves the same as in ma +bAverage = ts.average(bSer) + + +# Take the sqrt root of each element in the series (returns a TimeSeries object). +# Not all functions from ma supported yet, but they are easy to implement +# for the most part. +bSqrt = ts.sqrt(bSer) + + +# get the last day of this year, at daily frequency +dLastDayOfYear = ts.dateOf(ts.thisday('A'),'D','AFTER') + +# get the first day of this year, at business frequency +bFirstDayOfYear = ts.dateOf(ts.thisday('A'),'B','BEFORE') + +# get the last day of the previous quarter, business frequency +bFirstDayOfLastQuarter = ts.dateOf(ts.thisday('Q')-1,'B','AFTER') + +# dateOf can also go from high frequency to low frequency. In this case, the third parameter has no impact +aTrueValue = (ts.thisday('Q') == ts.dateOf(ts.thisday('b'),'Q')) + +# dates of the same frequency can be subtracted (but not added obviously) +numberOfBusinessDaysPassedThisYear = ts.thisday('b') - bFirstDayOfYear + +# integers can be added/substracted to/from dates +fiveDaysFromNow = ts.thisday('d') + 5 + + +# get the previous business day, where business day is considered to +# end at day_end_hour and day_end_min +pbd = ts.prevbusday(day_end_hour=18,day_end_min=0) + + +# construct a date object explicitly +myDateQ = ts.Date(freq='Q',year=2004,quarter=3) +myDateD = ts.Date(freq='D',year=1985,month=10,day=4) \ No newline at end of file From scipy-svn at scipy.org Fri Dec 8 13:46:35 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Dec 2006 12:46:35 -0600 (CST) Subject: [Scipy-svn] r2378 - in trunk/Lib/sandbox/timeseries: . doc Message-ID: <20061208184635.AA30D39C018@new.scipy.org> Author: mattknox_ca Date: 2006-12-08 12:46:34 -0600 (Fri, 08 Dec 2006) New Revision: 2378 Added: trunk/Lib/sandbox/timeseries/doc/ trunk/Lib/sandbox/timeseries/doc/todo.txt Log: Added a folder remotely Added: trunk/Lib/sandbox/timeseries/doc/todo.txt =================================================================== --- trunk/Lib/sandbox/timeseries/doc/todo.txt 2006-12-08 18:45:57 UTC (rev 2377) +++ trunk/Lib/sandbox/timeseries/doc/todo.txt 2006-12-08 18:46:34 UTC (rev 2378) @@ -0,0 +1,71 @@ +To-do: + =================================================================== + These are not necessarily things that I am personally going to do, + but would be nice to have none-the-less :) + =================================================================== + + - make the various Date data types actual numpy data types instead of + an ugly hack (this is probably beyond my ability) + + - add month(), year(), day_of_week(), etc... functions that take a + TimeSeries object with dtype as one of the Date types and returns + a TimeSeries object of integer type with the relevant info. + This would make it easy to do things like retrieving all the values + that occur on a Friday for a particular series, for example. + + I could write a brute force method for this right now fairly easily, + but it would be very slow. + + - support "secondly" frequency data in a more robust way. Frequency + conversion methods are not supported for this currently. + + - add a "basis" option to the convert method. Possible values are + 'business' or 'daily'. This would be used in determining the weights + of the months, quarters, etc when converting things with + observed='AVERAGED' (right now, it always assumes equal weighting) + + This should also be an attribute of each TimeSeries object that would + provide the default behaviour if the basis option was not specified. + + - add an "ignore" option to the convert method to specify whether to + ignore masked values in the calculation or not (current behaviour is + to ignore masked values). + + - support for wider variety of standard numpy/ma functions + + - add time series specific concepts/functions like moving averages, MACD, moving + standard deviation, etc... + + - add support for more frequencies: semi-annual, hourly, minutely, + decade, weekly, and maybe even sub-frequencies like quarterly with + quarters ending on months other than Mar/Jun/Sep/Dec, weekly ending + on different days of the the week, etc... + + - Decide how best to handle frequency conversion between frequencies that + don't have a nice relationship where the lower frequency strictly contains + whole periods of the higher frequency. For example, if converting weekly to + monthly, some weeks overlap more than one month. + + - Perhaps change frequency conversion code to simply group data in the C code, + (eg. if going from daily to monthly, create a 2 dimensional array + where each column represents one month's worth of data), and then + perform mathematical operation on the python side (eg. sum/average/etc + the columns to reduce back to a 1-d array). This would not work well + for the scenario described in the previous bullet, but would work nicely + for a lot of scenarios. + + +Wishlist: + + - currently, the code relies on the mx.DateTime module (both the python, + and c extension code). For obvious reasons, it would be nice if + external dependencies could be eliminated as much as possible. Not sure + if the built in datetime module is up to the task though, particularly + where the c-api is concerned. + + - integration with pytables perhaps? Definitely a longer term thing, but eventually who knows? (I currently have my + own module for writing this stuff to FAME databases, but pytables would be cooler) + + - integration with matplotlib? + + - report generation module? From scipy-svn at scipy.org Sat Dec 9 12:09:26 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Dec 2006 11:09:26 -0600 (CST) Subject: [Scipy-svn] r2379 - trunk/Lib/sandbox/timeseries Message-ID: <20061209170926.22B9E39C0F7@new.scipy.org> Author: mattknox_ca Date: 2006-12-09 11:09:21 -0600 (Sat, 09 Dec 2006) New Revision: 2379 Modified: trunk/Lib/sandbox/timeseries/tsdate.py Log: fixed problem with still using calling old method name in many places Modified: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-08 18:46:34 UTC (rev 2378) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-09 17:09:21 UTC (rev 2379) @@ -60,14 +60,14 @@ self.__date = mx.DateTime.Date(year, month, day, _hours, _minutes, _seconds) - def day(self): return self.getDate().day - def day_of_week(self): return self.getDate().day_of_week - def month(self): return self.getDate().month - def quarter(self): return monthToQuarter(self.getDate().month) - def year(self): return self.getDate().year - def seconds(self): return int(self.getDate().second) - def minute(self): return int(self.getDate().minute) - def hour(self): return int(self.getDate().hour) + def day(self): return self.mxDate().day + def day_of_week(self): return self.mxDate().day_of_week + def month(self): return self.mxDate().month + def quarter(self): return monthToQuarter(self.mxDate().month) + def year(self): return self.mxDate().year + def seconds(self): return int(self.mxDate().second) + def minute(self): return int(self.mxDate().minute) + def hour(self): return int(self.mxDate().hour) def strfmt(self,fmt): qFmt = fmt.replace("%q","XXXX") From scipy-svn at scipy.org Sat Dec 9 12:15:09 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Dec 2006 11:15:09 -0600 (CST) Subject: [Scipy-svn] r2380 - trunk/Lib/sandbox/timeseries Message-ID: <20061209171509.C301C39C0F7@new.scipy.org> Author: mattknox_ca Date: 2006-12-09 11:15:02 -0600 (Sat, 09 Dec 2006) New Revision: 2380 Modified: trunk/Lib/sandbox/timeseries/shiftingarray.py Log: general cleanup Modified: trunk/Lib/sandbox/timeseries/shiftingarray.py =================================================================== --- trunk/Lib/sandbox/timeseries/shiftingarray.py 2006-12-09 17:09:21 UTC (rev 2379) +++ trunk/Lib/sandbox/timeseries/shiftingarray.py 2006-12-09 17:15:02 UTC (rev 2380) @@ -1,5 +1,4 @@ import numpy, types , corelib -import copy from numpy import ma class ShiftingArray(object): @@ -216,12 +215,8 @@ else: return "" - def show(self, showLists=True): - print "indexZeroRepresents = ", self.indexZeroRepresents - print self.data - #apply func to ser1 and ser2, replacing masked values with fill_value def doFunc(ser1, ser2, func,fill_value=ma.masked): if not isinstance(ser2, ShiftingArray): @@ -241,7 +236,7 @@ minVal = max(sFV, oFV) maxVal = min(sLV, oLV) elif ser1.indexZeroRepresents is None and ser2.indexZeroRepresents is None: - return ShiftingArray([],ser1.data.dtype) + return ShiftingArray([],ser1.data.dtype) elif ser1.indexZeroRepresents is None: minVal = oFV maxVal = oLV From scipy-svn at scipy.org Sat Dec 9 12:24:56 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Dec 2006 11:24:56 -0600 (CST) Subject: [Scipy-svn] r2381 - trunk/Lib/sandbox/timeseries Message-ID: <20061209172456.C5DEC39C0F7@new.scipy.org> Author: mattknox_ca Date: 2006-12-09 11:24:53 -0600 (Sat, 09 Dec 2006) New Revision: 2381 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: general code cleanup Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-09 17:15:02 UTC (rev 2380) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-09 17:24:53 UTC (rev 2381) @@ -1,6 +1,5 @@ import numpy from numpy import ma -import types import corelib import shiftingarray as sa @@ -10,7 +9,7 @@ import copy class TimeSeries(sa.ShiftingArray): - def __init__(self,values=[], dtype=numpy.float64, freq=None, observed='END', startIndex=None,mask=ma.nomask): + def __init__(self, values=[], dtype=numpy.float64, freq=None, observed='END', startIndex=None, mask=ma.nomask): if freq is None: raise ValueError("freq not specified") @@ -30,7 +29,7 @@ def __setitem__(self, key, value): if isinstance(key, tsdate.Date): key = int(key) - super(TimeSeries, self).__setitem__(key,value) + super(TimeSeries, self).__setitem__(key, value) def convert(self, freq, observed=None): @@ -44,7 +43,7 @@ firstIndex = sa.first_unmasked(self.data) if firstIndex is None: - return TimeSeries([],dtype=self.dtype,freq=toFreq,observed=observed) + return TimeSeries([], dtype=self.dtype, freq=toFreq, observed=observed) startIndexAdj = self.firstValue() @@ -54,13 +53,13 @@ tempMask = tempData.mask tempData = tempData.filled() - cRetVal = cseries.reindex(tempData, fromFreq, toFreq, observed, startIndexAdj,tempMask) + cRetVal = cseries.reindex(tempData, fromFreq, toFreq, observed, startIndexAdj, tempMask) _values = cRetVal['values'] _mask = cRetVal['mask'] startIndex = cseries.convert(startIndexAdj, fromFreq, toFreq) - return TimeSeries(_values,dtype=self.data.dtype,freq=toFreq,observed=observed,startIndex=startIndex, mask=_mask) + return TimeSeries(_values, dtype=self.data.dtype, freq=toFreq, observed=observed, startIndex=startIndex, mask=_mask) else: return copy.deepcopy(self) @@ -81,71 +80,71 @@ ### DATA def __add__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__add__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed) def __radd__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__add__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed) def __sub__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__sub__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__sub__(other), self.freq, self.observed) def __rsub__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__rsub__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__rsub__(other), self.freq, self.observed) def __mul__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__mul__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__mul__(other), self.freq, self.observed) def __rmul__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__rmul__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__rmul__(other), self.freq, self.observed) def __div__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__div__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__div__(other), self.freq, self.observed) def __rdiv__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__rdiv__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__rdiv__(other), self.freq, self.observed) def __pow__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__pow__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__pow__(other), self.freq, self.observed) ### IN PLACE def __iadd__(self, other): - validOpInputs(self,other) - self = SAtoTS(super(TimeSeries, self).__add__(other),self.freq,self.observed) + validOpInputs(self, other) + self = SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed) return self def __isub__(self, other): - validOpInputs(self,other) - self = SAtoTS(super(TimeSeries, self).__sub__(other),self.freq,self.observed) + validOpInputs(self, other) + self = SAtoTS(super(TimeSeries, self).__sub__(other), self.freq, self.observed) return self def __imul__(self, other): - validOpInputs(self,other) - self = SAtoTS(super(TimeSeries, self).__mul__(other),self.freq,self.observed) + validOpInputs(self, other) + self = SAtoTS(super(TimeSeries, self).__mul__(other), self.freq, self.observed) return self def __idiv__(self, other): - validOpInputs(self,other) - self = SAtoTS(super(TimeSeries, self).__div__(other),self.freq,self.observed) + validOpInputs(self, other) + self = SAtoTS(super(TimeSeries, self).__div__(other), self.freq, self.observed) return self # this overrides & and should only be used by boolean series def __and__(self, other): - validOpInputs(self,other) + validOpInputs(self, other) return self * other # this overrides | and should only be used by boolean series def __or__(self, other): - validOpInputs(self,other) + validOpInputs(self, other) return ~(~self & ~other) # this overrides ~ and should only be used by boolean series @@ -156,57 +155,57 @@ ### COMPARISON def __eq__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__eq__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__eq__(other), self.freq, self.observed) def __le__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__le__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__le__(other), self.freq, self.observed) def __lt__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__lt__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__lt__(other), self.freq, self.observed) def __ge__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__ge__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__ge__(other), self.freq, self.observed) def __gt__(self, other): - validOpInputs(self,other) - return SAtoTS(super(TimeSeries, self).__gt__(other),self.freq,self.observed) + validOpInputs(self, other) + return SAtoTS(super(TimeSeries, self).__gt__(other), self.freq, self.observed) -def tser(start,end): +def tser(start, end): if start.freq != end.freq: raise ValueError("start and end dates must have same frequency!") - return TimeSeries(numpy.arange(int(start),int(end)+1),dtype=corelib.freqTypeMapping[start.freq],freq=start.freq,observed='END',startIndex=int(start)) + return TimeSeries(numpy.arange(int(start), int(end)+1), dtype=corelib.freqTypeMapping[start.freq], freq=start.freq, observed='END', startIndex=int(start)) -def validOpInputs(ser1,ser2): - if isinstance(ser1,TimeSeries) and isinstance(ser2,TimeSeries) and ser1.freq != ser2.freq: +def validOpInputs(ser1, ser2): + if isinstance(ser1, TimeSeries) and isinstance(ser2, TimeSeries) and ser1.freq != ser2.freq: raise "operation cannot be performed on series with different frequencies ("+str(ser1.freq) + " and " + str(ser2.freq)+")" -def SAtoTS(values,freq,observed,dtype=None): +def SAtoTS(values, freq, observed, dtype=None): if dtype is None: _dtype = values.dtype else: _dtype = dtype - return TimeSeries(values.data,dtype=_dtype,freq=freq,observed=observed,startIndex=values.indexZeroRepresents) + return TimeSeries(values.data, dtype=_dtype, freq=freq, observed=observed, startIndex=values.indexZeroRepresents) # math functions (two series) -def add(ser1,ser2,fill_value=ma.masked): - return apply_func_twoseries(ma.add,ser1,ser2,fill_value) +def add(ser1, ser2, fill_value=ma.masked): + return apply_func_twoseries(ma.add, ser1, ser2, fill_value) -def multiply(ser1,ser2,fill_value=ma.masked): - return apply_func_twoseries(ma.multiply,ser1,ser2,fill_value) +def multiply(ser1, ser2, fill_value=ma.masked): + return apply_func_twoseries(ma.multiply, ser1, ser2, fill_value) -def divide(ser1,ser2,fill_value=ma.masked): - return apply_func_twoseries(ma.divide,ser1,ser2,fill_value) +def divide(ser1, ser2, fill_value=ma.masked): + return apply_func_twoseries(ma.divide, ser1, ser2, fill_value) -def subtract(ser1,ser2,fill_value=ma.masked): - return apply_func_twoseries(ma.subtract,ser1,ser2,fill_value) +def subtract(ser1, ser2, fill_value=ma.masked): + return apply_func_twoseries(ma.subtract, ser1, ser2, fill_value) # math functions (one series, return series) def sqrt(ser): - return apply_func_oneseries(ma.sqrt,ser) + return apply_func_oneseries(ma.sqrt, ser) # math functions (one series, return scalar) def sum(ser): @@ -218,15 +217,15 @@ def average(ser): return ma.average(ser.data) -def where(condition,x,y): - tempResult = ma.where(condition.data,x,y) - return TimeSeries(tempResult,dtype=numpy.bool_,freq=condition.freq,observed=condition.observed,startIndex=condition.indexZeroRepresents) +def where(condition, x, y): + tempResult = ma.where(condition.data, x, y) + return TimeSeries(tempResult, dtype=numpy.bool_, freq=condition.freq, observed=condition.observed, startIndex=condition.indexZeroRepresents) # generic functions -def apply_func_twoseries(func,ser1,ser2,fill_value=ma.masked): - validOpInputs(ser1,ser2) - return SAtoTS(doFunc(ser1,ser2,func,fill_value=fill_value),ser1.freq,ser1.observed) +def apply_func_twoseries(func, ser1, ser2, fill_value=ma.masked): + validOpInputs(ser1, ser2) + return SAtoTS(doFunc(ser1, ser2, func, fill_value=fill_value), ser1.freq, ser1.observed) -def apply_func_oneseries(func,ser): - return SAtoTS(doFunc_oneseries(ser,func),ser.freq,ser.observed) +def apply_func_oneseries(func, ser): + return SAtoTS(doFunc_oneseries(ser, func),ser.freq, ser.observed) From scipy-svn at scipy.org Sat Dec 9 13:19:16 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Dec 2006 12:19:16 -0600 (CST) Subject: [Scipy-svn] r2382 - trunk/Lib/sandbox/timeseries Message-ID: <20061209181916.51ACC39C074@new.scipy.org> Author: mattknox_ca Date: 2006-12-09 12:19:12 -0600 (Sat, 09 Dec 2006) New Revision: 2382 Modified: trunk/Lib/sandbox/timeseries/tsdate.py Log: code cleanup and bug fixes Modified: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-09 17:24:53 UTC (rev 2381) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-09 18:19:12 UTC (rev 2382) @@ -1,11 +1,10 @@ import corelib import mx.DateTime -import numpy class Date: - def __init__(self,freq,year=None, month=None, day=None, seconds=None,quarter=None, date=None, val=None): + def __init__(self, freq, year=None, month=None, day=None, seconds=None,quarter=None, mxDate=None, val=None): - if hasattr(freq,'freq'): + if hasattr(freq, 'freq'): self.freq = corelib.fmtFreq(freq.freq) else: self.freq = corelib.fmtFreq(freq) @@ -24,15 +23,15 @@ self.__date = originDate + mx.DateTime.RelativeDateTime(years=val, month=-1, day=-1) elif self.freq == 'Q': self.__date = originDate + 1 + mx.DateTime.RelativeDateTime(years=int(val/4), month=int(12 * (float(val)/4 - val/4)), day=-1) - elif date is not None: - self.__date = date + elif mxDate is not None: + self.__date = mxDate else: error = ValueError("Insufficient parameters given to create a date at the given frequency") if year is None: raise error - if self.freq in ('B','D'): + if self.freq in ('B', 'D'): if month is None or day is None: raise error elif self.freq == 'M': if month is None: raise error @@ -69,24 +68,24 @@ def minute(self): return int(self.mxDate().minute) def hour(self): return int(self.mxDate().hour) - def strfmt(self,fmt): - qFmt = fmt.replace("%q","XXXX") + def strfmt(self, fmt): + qFmt = fmt.replace("%q", "XXXX") tmpStr = self.__date.strftime(qFmt) - return tmpStr.replace("XXXX",str(self.quarter())) + return tmpStr.replace("XXXX", str(self.quarter())) def __str__(self): - if self.freq in ("B","D"): - return self.__date.strftime("%d-%b-%y") + if self.freq in ("B", "D"): + return self.strfmt("%d-%b-%y") elif self.freq == "S": - return self.__date.strftime("%d-%b-%Y %H:%M:%S") + return self.strfmt("%d-%b-%Y %H:%M:%S") elif self.freq == "M": - return self.__date.strftime("%b-%Y") + return self.strfmt("%b-%Y") elif self.freq == "Q": - return str(self.year())+"q"+str(self.quarter()) + return self.strfmt("%Yq%q") elif self.freq == "A": - return str(self.year()) + return self.strfmt("%Y") else: - return self.__date.strftime("%d-%b-%y") + return self.strfmt("%d-%b-%y") def __add__(self, other): @@ -100,8 +99,8 @@ try: return self + (-1) * other except: pass try: - if self.freq <> other.freq: - raise ValueError("Cannont subtract dates of different frequency (" + str(self.freq) + " <> " + str(other.freq) + ")") + if self.freq != other.freq: + raise ValueError("Cannont subtract dates of different frequency (" + str(self.freq) + " != " + str(other.freq) + ")") return int(self) - int(other) except TypeError: raise TypeError("Could not subtract types " + str(type(self)) + " and " + str(type(other))) @@ -109,12 +108,12 @@ def __repr__(self): return "<" + str(self.freq) + ":" + str(self) + ">" def __eq__(self, other): - if self.freq <> other.freq: + if self.freq != other.freq: raise TypeError("frequencies are not equal!") return int(self) == int(other) def __cmp__(self, other): - if self.freq <> other.freq: + if self.freq != other.freq: raise TypeError("frequencies are not equal!") return int(self)-int(other) @@ -162,15 +161,16 @@ if freq == 'B' and tempDate.day_of_week >= 5: tempDate -= (tempDate.day_of_week - 4) if freq == 'B' or freq == 'D' or freq == 'S': - return Date(freq, date=tempDate) + return Date(freq, mxDate=tempDate) elif freq == 'M': - return Date(freq,tempDate.year,tempDate.month) + return Date(freq, year=tempDate.year, month=tempDate.month) elif freq == 'Q': - return Date(freq,tempDate.year,quarter=monthToQuarter(tempDate.month)) + return Date(freq, yaer=tempDate.year, quarter=monthToQuarter(tempDate.month)) elif freq == 'A': - return Date(freq,tempDate.year) + return Date(freq, year=tempDate.year) -def prevbusday(day_end_hour=18,day_end_min=0): + +def prevbusday(day_end_hour=18, day_end_min=0): tempDate = mx.DateTime.localtime() dateNum = tempDate.hour + float(tempDate.minute)/60 @@ -180,111 +180,111 @@ else: return thisday('B') -# returns _date converted to a date of _destFreq according to _relation -# _relation = "BEFORE" or "AFTER" (not case sensitive) -def dateOf(_date,_destFreq,_relation="BEFORE"): +# returns date converted to a date of toFreq according to relation +# relation = "BEFORE" or "AFTER" (not case sensitive) +def dateOf(date, toFreq, relation="BEFORE"): - _destFreq = corelib.fmtFreq(_destFreq) - _rel = _relation.upper()[0] + toFreq = corelib.fmtFreq(toFreq) + _rel = relation.upper()[0] - if _date.freq == _destFreq: - return _date - elif _date.freq == 'D': + if date.freq == toFreq: + return date + elif date.freq == 'D': - if _destFreq == 'B': - # BEFORE result: preceeding Friday if _date is a weekend, same day otherwise - # AFTER result: following Monday if _date is a weekend, same day otherwise - tempDate = _date.mxDate() - if _rel == "B": + if toFreq == 'B': + # BEFORE result: preceeding Friday if date is a weekend, same day otherwise + # AFTER result: following Monday if date is a weekend, same day otherwise + tempDate = date.mxDate() + if _rel == 'B': if tempDate.day_of_week >= 5: tempDate -= (tempDate.day_of_week - 4) - elif _rel == "A": + elif _rel == 'A': if tempDate.day_of_week >= 5: tempDate += 7 - tempDate.day_of_week - return Date(freq='B',date=tempDate) + return Date(freq='B', mxDate=tempDate) - elif _destFreq == 'M': return Date('M',_date.mxDate().year,_date.mxDate().month) + elif toFreq == 'M': return Date(freq='M', year=date.year(), month=date.month()) - elif _destFreq == 'S': - if _rel == "B": return Date('S',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day,0) - elif _rel == "A": return Date('S',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day,24*60*60-1) + elif toFreq == 'S': + if _rel == 'B': return Date(freq='S', year=date.year(), month=date.month(), day=date.day(), seconds=0) + elif _rel == "A": return Date(freq='S', year=date.year(), month=date.month(), day=date.day(), seconds=24*60*60-1) - elif _destFreq == 'Q': return Date('Q',_date.mxDate().year,quarter=monthToQuarter(_date.mxDate().month)) + elif toFreq == 'Q': return Date(freq='Q', year=date.year(), quarter=date.quarter()) - elif _destFreq == 'A': return Date('A',_date.mxDate().year) + elif toFreq == 'A': return Date(freq='A', year=date.year()) - elif _date.freq == 'B': + elif date.freq == 'B': - if _destFreq == 'D': return Date('D',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day) + if toFreq == 'D': return Date(freq='D', year=date.year(), month=date.month(), day=date.day()) - elif _destFreq == 'M': return Date('M',_date.mxDate().year,_date.mxDate().month) + elif toFreq == 'M': return Date(freq='M', year=date.year(), month=date.month()) - elif _destFreq == 'S': - if _rel == "B": return Date('S',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day,0) - elif _rel == "A": return Date('S',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day,24*60*60-1) + elif toFreq == 'S': + if _rel == 'B': return Date(freq='S', year=date.year(), month=date.month(), day=date.day(), seconds=0) + elif _rel == 'A': return Date(freq='S', year=date.year(), month=date.month(), dday=ate.day(), seconds=24*60*60-1) - elif _destFreq == 'Q': return Date('Q',_date.mxDate().year,quarter=monthToQuarter(_date.mxDate().month)) + elif toFreq == 'Q': return Date(freq='Q', year=date.year(), quarter=date.quarter()) - elif _destFreq == 'A': return Date('A',_date.mxDate().year) + elif toFreq == 'A': return Date(freq='A', year=date.year()) - elif _date.freq == 'M': + elif date.freq == 'M': - if _destFreq == 'D': - tempDate = _date.mxDate() - if _rel == "B": - return Date('D',_date.mxDate().year,_date.mxDate().month,1) - elif _rel == "A": - if _date.mxDate().month == 12: + if toFreq == 'D': + tempDate = date.mxDate() + if _rel == 'B': + return Date(freq='D', year=date.year(), month=date.month(), day=1) + elif _rel == 'A': + if date.month() == 12: tempMonth = 1 - tempYear = _date.mxDate().year + 1 + tempYear = date.year() + 1 else: - tempMonth = _date.mxDate().month + 1 - tempYear = _date.mxDate().year - return Date('D',tempYear,tempMonth,1)-1 + tempMonth = date.month() + 1 + tempYear = date.year() + return Date('D', year=tempYear, month=tempMonth, day=1)-1 - elif _destFreq == 'B': - if _rel == "B": return dateOf(dateOf(_date,'D',"BEFORE"),'B',"AFTER") - elif _rel == "A": return dateOf(dateOf(_date,'D',"AFTER"),'B',"BEFORE") + elif toFreq == 'B': + if _rel == 'B': return dateOf(dateOf(date, 'D', "BEFORE"), 'B', "AFTER") + elif _rel == 'A': return dateOf(dateOf(date, 'D', "AFTER"), 'B', "BEFORE") - elif _destFreq == 'S': - if _rel == "B": return dateOf(dateOf(_date,'D',"BEFORE"),'S',"BEFORE") - elif _rel == "A": return dateOf(dateOf(_date,'D',"AFTER"),'S',"AFTER") + elif toFreq == 'S': + if _rel == 'B': return dateOf(dateOf(date, 'D', "BEFORE"), 'S', "BEFORE") + elif _rel == 'A': return dateOf(dateOf(date, 'D', "AFTER"), 'S', "AFTER") - elif _destFreq == 'Q': return Date('Q',_date.mxDate().year,quarter=monthToQuarter(_date.mxDate().month)) + elif toFreq == 'Q': return Date(freq='Q', year=date.year(), quarter=date.quarter()) - elif _destFreq == 'A': return Date('A',_date.mxDate().year) + elif toFreq == 'A': return Date(freq='A', year=date.year()) - elif _date.freq == 'S': + elif date.freq == 'S': - if _destFreq == 'D': - return Date('D',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day) - elif _destFreq == 'B': - if _rel == "B": return dateOf(dateOf(_date,'D'),'B',"BEFORE") - elif _rel == "A": return dateOf(dateOf(_date,'D'),'B',"AFTER") - elif _destFreq == 'M': - return Date('M',_date.mxDate().year,_date.mxDate().month) + if toFreq == 'D': + return Date('D', year=date.year(), month=date.month(), day=date.day()) + elif toFreq == 'B': + if _rel == 'B': return dateOf(dateOf(date, 'D'), 'B', "BEFORE") + elif _rel == 'A': return dateOf(dateOf(date, 'D'), 'B', "AFTER") + elif toFreq == 'M': + return Date(freq='M', year=date.year(), month=date.month()) - elif _date.freq == 'Q': + elif date.freq == 'Q': - if _destFreq == 'D': - if _rel == "B": return dateOf(_date-1,'D',"AFTER")+1 - elif _rel == "A": return Date('D',_date.mxDate().year,_date.mxDate().month,_date.mxDate().day) - elif _destFreq == 'B': - if _rel == "B": return dateOf(dateOf(_date,'D'),'B',"AFTER") - if _rel == "A": return dateOf(dateOf(_date,'D',"AFTER"),'B',"BEFORE") - elif _destFreq == 'M': - if _rel == "B": return dateOf(_date-1,'M',"AFTER")+1 - elif _rel == "A": return Date('M',_date.mxDate().year,_date.mxDate().month) - elif _destFreq == 'A': return Date('A',_date.mxDate().year) - elif _date.freq == 'A': + if toFreq == 'D': + if _rel == 'B': return dateOf(date-1, 'D', "AFTER")+1 + elif _rel == 'A': return Date(freq='D', year=date.year(), month=date.month(), day=date.day()) + elif toFreq == 'B': + if _rel == 'B': return dateOf(dateOf(date, 'D'), 'B', "AFTER") + if _rel == 'A': return dateOf(dateOf(date, 'D', "AFTER"), 'B', "BEFORE") + elif toFreq == 'M': + if _rel == 'B': return dateOf(date-1, 'M', "AFTER")+1 + elif _rel == 'A': return Date(freq='M', year=date.year(), month=date.month()) + elif toFreq == 'A': return Date(freq='A', year=date.year()) + elif date.freq == 'A': - if _destFreq == 'D': - if _rel == "B": return Date('D',_date.mxDate().year, 1, 1) - elif _rel == "A": return Date('D',_date.mxDate().year,12,31) - elif _destFreq == 'B': - if _rel == "B": return dateOf(dateOf(_date,'D'),'B',"AFTER") - if _rel == "A": return dateOf(dateOf(_date,'D',"AFTER"),'B',"BEFORE") - elif _destFreq == 'M': - if _rel == "B": return Date('M',_date.mxDate().year,1) - elif _rel == "A": return Date('M',_date.mxDate().year,12) - elif _destFreq == 'Q': - if _rel == "B": return Date('Q',_date.mxDate().year,quarter=1) - elif _rel == "A": return Date('Q',_date.mxDate().year,quarter=4) \ No newline at end of file + if toFreq == 'D': + if _rel == 'B': return Date(freq='D', year=date.year(), month=1, day=1) + elif _rel == 'A': return Date(freq='D', year=date.year(), month=12, day=31) + elif toFreq == 'B': + if _rel == 'B': return dateOf(dateOf(date, 'D'), 'B', "AFTER") + if _rel == 'A': return dateOf(dateOf(date, 'D', "AFTER"), 'B', "BEFORE") + elif toFreq == 'M': + if _rel == 'B': return Date(freq='M', year=date.year(), month=1) + elif _rel == 'A': return Date(freq='M', year=date.year(), month=12) + elif toFreq == 'Q': + if _rel == 'B': return Date(freq='Q', year=date.year(), quarter=1) + elif _rel == 'A': return Date(freq='Q', year=date.year(), quarter=4) \ No newline at end of file From scipy-svn at scipy.org Sat Dec 9 13:21:29 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Dec 2006 12:21:29 -0600 (CST) Subject: [Scipy-svn] r2383 - trunk/Lib/sandbox/timeseries/doc Message-ID: <20061209182129.5DCAD39C074@new.scipy.org> Author: mattknox_ca Date: 2006-12-09 12:21:25 -0600 (Sat, 09 Dec 2006) New Revision: 2383 Modified: trunk/Lib/sandbox/timeseries/doc/todo.txt Log: added item about dateOf function Modified: trunk/Lib/sandbox/timeseries/doc/todo.txt =================================================================== --- trunk/Lib/sandbox/timeseries/doc/todo.txt 2006-12-09 18:19:12 UTC (rev 2382) +++ trunk/Lib/sandbox/timeseries/doc/todo.txt 2006-12-09 18:21:25 UTC (rev 2383) @@ -16,6 +16,9 @@ I could write a brute force method for this right now fairly easily, but it would be very slow. + - add support for a few more frequency conversions that are missing + in the dateOf function in tsdate.py + - support "secondly" frequency data in a more robust way. Frequency conversion methods are not supported for this currently. From scipy-svn at scipy.org Sat Dec 9 18:09:33 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Dec 2006 17:09:33 -0600 (CST) Subject: [Scipy-svn] r2384 - trunk/Lib/sandbox/timeseries Message-ID: <20061209230933.896F139C118@new.scipy.org> Author: mattknox_ca Date: 2006-12-09 17:09:29 -0600 (Sat, 09 Dec 2006) New Revision: 2384 Added: trunk/Lib/sandbox/timeseries/readme.txt Removed: trunk/Lib/sandbox/timeseries/README Log: Renamed remotely Deleted: trunk/Lib/sandbox/timeseries/README =================================================================== --- trunk/Lib/sandbox/timeseries/README 2006-12-09 18:21:25 UTC (rev 2383) +++ trunk/Lib/sandbox/timeseries/README 2006-12-09 23:09:29 UTC (rev 2384) @@ -1,22 +0,0 @@ -Requirements and warnings: - -1. version 2.0.x of the mx DateTime module MUST be installed. Only "tested" with 2.0.3 -2. Only tested with numpy 1.0.1 -3. Only tested with Python 2.4.x -4. Only tested on Windows Platform -5. the included cseries.pyd file was compiled for 32-bit windows, so if you are trying - this on another platform, the first thing you need to do is recompile it - - -Instructions: - -1. read through the included example.py script in the examples subfolder. This illustrates - the basic functionality of the module. I recommend placing print statements after each - variable assignment, one at a time, to see the result of each item in the examples. - - Documentation is very limited, so the examples really are the best starting point. - -2. Before you get too crazy and start modifying the examples and writing your own scripts, - please read todo.txt in the doc subdirectory for an outline of limitations in the current - module. - Copied: trunk/Lib/sandbox/timeseries/readme.txt (from rev 2383, trunk/Lib/sandbox/timeseries/README) From scipy-svn at scipy.org Sun Dec 10 18:55:19 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 10 Dec 2006 17:55:19 -0600 (CST) Subject: [Scipy-svn] r2385 - trunk/Lib/linalg Message-ID: <20061210235519.B4CEE39C193@new.scipy.org> Author: timl Date: 2006-12-10 17:55:11 -0600 (Sun, 10 Dec 2006) New Revision: 2385 Modified: trunk/Lib/linalg/basic.py trunk/Lib/linalg/generic_flapack.pyf trunk/Lib/linalg/info.py Log: add support for cholesky decomposition and solution of banded linear systems with Hermitian or symmetric matrices in scipy.linalg Modified: trunk/Lib/linalg/basic.py =================================================================== --- trunk/Lib/linalg/basic.py 2006-12-09 23:09:29 UTC (rev 2384) +++ trunk/Lib/linalg/basic.py 2006-12-10 23:55:11 UTC (rev 2385) @@ -10,7 +10,7 @@ __all__ = ['solve','inv','det','lstsq','norm','pinv','pinv2', 'tri','tril','triu','toeplitz','hankel','lu_solve', 'cho_solve','solve_banded','LinAlgError','kron', - 'all_mat'] + 'all_mat', 'cholesky_banded', 'solveh_banded'] #from blas import get_blas_funcs from flinalg import get_flinalg_funcs @@ -170,7 +170,94 @@ raise ValueError,\ 'illegal value in %-th argument of internal gbsv'%(-info) +def solveh_banded(ab, b, overwrite_ab=0, overwrite_b=0, + lower=0): + """ solveh_banded(ab, b, overwrite_ab=0, overwrite_b=0) -> c, x + Solve a linear system of equations a * x = b for x where + a is a banded symmetric or Hermitian positive definite + matrix stored in lower diagonal ordered form (lower=1) + + a11 a22 a33 a44 a55 a66 + a21 a32 a43 a54 a65 * + a31 a42 a53 a64 * * + + or upper diagonal ordered form + + * * a31 a42 a53 a64 + * a21 a32 a43 a54 a65 + a11 a22 a33 a44 a55 a66 + + Inputs: + + ab -- An N x l + b -- An N x nrhs matrix or N vector. + overwrite_y - Discard data in y, where y is ab or b. + lower - is ab in lower or upper form? + + Outputs: + + c: the Cholesky factorization of ab + x: the solution to ab * x = b + + """ + ab, b = map(asarray_chkfinite,(ab,b)) + + pbsv, = get_lapack_funcs(('pbsv',),(ab,b)) + c,x,info = pbsv(ab,b, + lower=lower, + overwrite_ab=overwrite_ab, + overwrite_b=overwrite_b) + if info==0: + return c, x + if info>0: + raise LinAlgError, "%d-th leading minor not positive definite" % info + raise ValueError,\ + 'illegal value in %d-th argument of internal pbsv'%(-info) + +def cholesky_banded(ab, overwrite_ab=0, lower=0): + """ cholesky_banded(ab, overwrite_ab=0, lower=0) -> c + + Compute the Cholesky decomposition of a + banded symmetric or Hermitian positive definite + matrix stored in lower diagonal ordered form (lower=1) + + a11 a22 a33 a44 a55 a66 + a21 a32 a43 a54 a65 * + a31 a42 a53 a64 * * + + or upper diagonal ordered form + + * * a31 a42 a53 a64 + * a21 a32 a43 a54 a65 + a11 a22 a33 a44 a55 a66 + + Inputs: + + ab -- An N x l + overwrite_ab - Discard data in ab + lower - is ab in lower or upper form? + + Outputs: + + c: the Cholesky factorization of ab + + """ + ab = asarray_chkfinite(ab) + + pbtrf, = get_lapack_funcs(('pbtrf',),(ab,)) + c,info = pbtrf(ab, + lower=lower, + overwrite_ab=overwrite_ab) + + if info==0: + return c + if info>0: + raise LinAlgError, "%d-th leading minor not positive definite" % info + raise ValueError,\ + 'illegal value in %d-th argument of internal pbtrf'%(-info) + + # matrix inversion def inv(a, overwrite_a=0): """ inv(a, overwrite_a=0) -> a_inv Modified: trunk/Lib/linalg/generic_flapack.pyf =================================================================== --- trunk/Lib/linalg/generic_flapack.pyf 2006-12-09 23:09:29 UTC (rev 2384) +++ trunk/Lib/linalg/generic_flapack.pyf 2006-12-10 23:55:11 UTC (rev 2385) @@ -13,6 +13,113 @@ python module generic_flapack interface + subroutine pbtrf(lower,n,kd,ab,ldab,info) + + ! Compute Cholesky decomposition of banded symmetric positive definite + ! matrix: + ! A = U^T * U, C = U if lower = 0 + ! A = L * L^T, C = L if lower = 1 + ! C is triangular matrix of the corresponding Cholesky decomposition. + + callstatement (*f2py_func)((lower?"L":"U"),&n,&kd,ab,&ldab,&info); + callprotoargument char*,int*,int*,*,int*,int* + + integer optional,check(shape(ab,0)==ldab),depend(ab) :: ldab=shape(ab,0) + integer intent(hide),depend(ab) :: n=shape(ab,1) + integer intent(hide),depend(ab) :: kd=shape(ab,0)-1 + integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 + + dimension(ldab,n),intent(in,out,copy,out=c) :: ab + integer intent(out) :: info + + end subroutine pbtrf + + subroutine pbtrf(lower,n,kd,ab,ldab,info) + + + ! Compute Cholesky decomposition of banded symmetric positive definite + ! matrix: + ! A = U^H * U, C = U if lower = 0 + ! A = L * L^H, C = L if lower = 1 + ! C is triangular matrix of the corresponding Cholesky decomposition. + + callstatement (*f2py_func)((lower?"L":"U"),&n,&kd,ab,&ldab,&info); + callprotoargument char*,int*,int*,*,int*,int* + + integer optional,check(shape(ab,0)==ldab),depend(ab) :: ldab=shape(ab,0) + integer intent(hide),depend(ab) :: n=shape(ab,1) + integer intent(hide),depend(ab) :: kd=shape(ab,0)-1 + integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 + + dimension(ldab,n),intent(in,out,copy,out=c) :: ab + integer intent(out) :: info + + end subroutine pbtrf + + subroutine pbsv(lower,n,kd,nrhs,ab,ldab,b,ldb,info) + + ! + ! Computes the solution to a real system of linear equations + ! A * X = B, + ! where A is an N-by-N symmetric positive definite band matrix and X + ! and B are N-by-NRHS matrices. + ! + ! The Cholesky decomposition is used to factor A as + ! A = U**T * U, if lower=1, or + ! A = L * L**T, if lower=0 + ! where U is an upper triangular band matrix, and L is a lower + ! triangular band matrix, with the same number of superdiagonals or + ! subdiagonals as A. The factored form of A is then used to solve the + ! system of equations A * X = B. + + callstatement (*f2py_func)((lower?"L":"U"),&n,&kd,&nrhs,ab,&ldab,b,&ldb,&info); + callprotoargument char*,int*,int*,int*,*,int*,*,int*,int* + + integer optional,check(shape(ab,0)==ldab),depend(ab) :: ldab=shape(ab,0) + integer intent(hide),depend(ab) :: n=shape(ab,1) + integer intent(hide),depend(ab) :: kd=shape(ab,0)-1 + integer intent(hide),depend(b) :: ldb=shape(b,1) + integer intent(hide),depend(b) :: nrhs=shape(b,0) + integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 + + dimension(nrhs,ldb),intent(in,out,copy,out=x) :: b + dimension(ldab,n),intent(in,out,copy,out=c) :: ab + integer intent(out) :: info + + end subroutine pbsv + + subroutine pbsv(lower,n,kd,nrhs,ab,ldab,b,ldb,info) + + ! + ! Computes the solution to a real system of linear equations + ! A * X = B, + ! where A is an N-by-N Hermitian positive definite band matrix and X + ! and B are N-by-NRHS matrices. + ! + ! The Cholesky decomposition is used to factor A as + ! A = U**H * U, if lower=1, or + ! A = L * L**H, if lower=0 + ! where U is an upper triangular band matrix, and L is a lower + ! triangular band matrix, with the same number of superdiagonals or + ! subdiagonals as A. The factored form of A is then used to solve the + ! system of equations A * X = B. + + callstatement (*f2py_func)((lower?"L":"U"),&n,&kd,&nrhs,ab,&ldab,b,&ldb,&info); + callprotoargument char*,int*,int*,int*,*,int*,*,int*,int* + + integer optional,check(shape(ab,0)==ldab),depend(ab) :: ldab=shape(ab,0) + integer intent(hide),depend(ab) :: n=shape(ab,1) + integer intent(hide),depend(ab) :: kd=shape(ab,0)-1 + integer intent(hide),depend(b) :: ldb=shape(b,1) + integer intent(hide),depend(b) :: nrhs=shape(b,0) + integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 + + dimension(nrhs,ldb),intent(in,out,copy,out=x) :: b + dimension(ldab,n),intent(in,out,copy,out=c) :: ab + integer intent(out) :: info + + end subroutine pbsv + subroutine gebal(scale,permute,n,a,m,lo,hi,pivscale,info) ! ! ba,lo,hi,pivscale,info = gebal(a,scale=0,permute=0,overwrite_a=0) Modified: trunk/Lib/linalg/info.py =================================================================== --- trunk/Lib/linalg/info.py 2006-12-09 23:09:29 UTC (rev 2384) +++ trunk/Lib/linalg/info.py 2006-12-10 23:55:11 UTC (rev 2385) @@ -7,6 +7,7 @@ inv --- Find the inverse of a square matrix solve --- Solve a linear system of equations solve_banded --- Solve a linear system of equations with a banded matrix + solveh_banded --- Solve a linear system of equations with a Hermitian or symmetric banded matrix, returning the Cholesky decomposition as well det --- Find the determinant of a square matrix norm --- matrix and vector norm lstsq --- Solve linear least-squares problem @@ -27,6 +28,7 @@ diagsvd --- construct matrix of singular values from output of svd orth --- construct orthonormal basis for range of A using svd cholesky --- Cholesky decomposition of a matrix + cholesky_banded --- Cholesky decomposition of a banded symmetric or Hermitian matrix cho_factor --- Cholesky decomposition for use in solving linear system cho_solve --- Solve previously factored linear system qr --- QR decomposition of a matrix From scipy-svn at scipy.org Mon Dec 11 10:03:38 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 09:03:38 -0600 (CST) Subject: [Scipy-svn] r2386 - trunk/Lib/sandbox/timeseries Message-ID: <20061211150338.73CCC39C18B@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 09:03:34 -0600 (Mon, 11 Dec 2006) New Revision: 2386 Modified: trunk/Lib/sandbox/timeseries/tsdate.py Log: fixed typo Modified: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-10 23:55:11 UTC (rev 2385) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-11 15:03:34 UTC (rev 2386) @@ -165,7 +165,7 @@ elif freq == 'M': return Date(freq, year=tempDate.year, month=tempDate.month) elif freq == 'Q': - return Date(freq, yaer=tempDate.year, quarter=monthToQuarter(tempDate.month)) + return Date(freq, year=tempDate.year, quarter=monthToQuarter(tempDate.month)) elif freq == 'A': return Date(freq, year=tempDate.year) From scipy-svn at scipy.org Mon Dec 11 10:13:48 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 09:13:48 -0600 (CST) Subject: [Scipy-svn] r2387 - trunk/Lib/sandbox/timeseries Message-ID: <20061211151348.8E98D39C18B@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 09:13:45 -0600 (Mon, 11 Dec 2006) New Revision: 2387 Modified: trunk/Lib/sandbox/timeseries/cseries.c Log: removed useless parameter in function, split apart convert function into 2 separate functions Modified: trunk/Lib/sandbox/timeseries/cseries.c =================================================================== --- trunk/Lib/sandbox/timeseries/cseries.c 2006-12-11 15:03:34 UTC (rev 2386) +++ trunk/Lib/sandbox/timeseries/cseries.c 2006-12-11 15:13:45 UTC (rev 2387) @@ -36,24 +36,18 @@ } } - -//fromDate is periods since Dec 31, 1849 static long -convert(long fromDate, char fromFreq, char toFreq, int notStartInd, int atEnd) +toDaily(long fromDate, char fromFreq) { - long absdate, origin, secondorigin, secsInDay; - long converted; - int rem; - int y,m,d,s; + long absdate, origin, secondorigin; + int y,m,d; mxDateTimeObject *theDate; - mxDateTimeObject *convDate; origin = 675333; secondorigin = 722814; - secsInDay = 86400; - //convert fromDate to days since Dec 31, 1849 (Jan 1, 1850 would have absdate of 1) + //convert fromDate to days since (0 AD - 1 day) switch(fromFreq) { case 'D': @@ -65,7 +59,7 @@ case 'M': y = fromDate/12 + 1; m = fromDate%12; - if (atEnd) m++; + if (m == 0) { m = 12; @@ -75,9 +69,8 @@ break; case 'Q': y = fromDate/4 + 1; - m = (fromDate%4) * 3; - if (!atEnd) m -= 2; //change to first month of quarter - else m += 1; + m = (fromDate%4) * 3 - 2; + if (m < 1) { m += 12; @@ -92,7 +85,6 @@ break; case 'A': y = fromDate-1; - if (atEnd == 1) y++; m = 1; d = 1; break; @@ -107,7 +99,6 @@ theDate = (mxDateTimeObject *)mxDateTime.DateTime_FromDateAndTime(y,m,d,0,0,0); absdate = (long)(theDate->absdate); - if (atEnd == 1) absdate--; } else { @@ -115,11 +106,30 @@ absdate += origin; } - if (atEnd) s = secsInDay-1; - else s = 0; + return absdate; - convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(absdate,s); +} + +//fromDate is periods since Dec 31, 1849 +static long +convert(long fromDate, char fromFreq, char toFreq, int notStartInd) +{ + long absdate, origin, secondorigin, secsInDay; + long converted; + int rem; + int y,m,d; + + mxDateTimeObject *convDate; + + origin = 675333; + secondorigin = 722814; + secsInDay = 86400; + + absdate = toDaily(fromDate, fromFreq); + + convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(absdate,0); + //switch back to days and years since 1849 for pyTSA Date absdate -= origin; y = convDate->year - 1849; @@ -339,7 +349,7 @@ PyObject *returnVal = NULL; - int notStartInd, atEnd; + int notStartInd; long startIndex, newStart; long i, curPerInd, nextPerInd, prevIndex, curIndex; long dim; @@ -386,20 +396,18 @@ //convert start index to new frequency notStartInd = 0; - atEnd = 0; - newStart = convert(startIndex, fromFreq[0], toFreq[0], notStartInd, atEnd); + newStart = convert(startIndex, fromFreq[0], toFreq[0], notStartInd); //initialize prevIndex prevIndex = newStart - 1; notStartInd = 1; - atEnd = 0; //set values in the new array for (i = 0; i < array->dimensions[0]; i++) { //find index for start of current period in new frequency - curPerInd = convert(startIndex + i, fromFreq[0], toFreq[0], notStartInd, atEnd); + curPerInd = convert(startIndex + i, fromFreq[0], toFreq[0], notStartInd); //get frequency numeric mapping fromFrVal = freqVal(fromFreq[0]); @@ -421,7 +429,7 @@ newValMask = valMask; //find index for start of next period in new frequency - nextPerInd = convert(startIndex + i + 1, fromFreq[0], toFreq[0], notStartInd, atEnd); + nextPerInd = convert(startIndex + i + 1, fromFreq[0], toFreq[0], notStartInd); //adjust for observed setting if (observed[0] == 'S' && PyArray_ISFLOAT(array) && !( (fromFrVal == 4 && toFrVal == 5) || (fromFrVal == 5 && toFrVal == 4) ) ) @@ -509,15 +517,14 @@ long fromDate; char* fromFreq; char* toFreq; - int notStartInd, atEnd; + int notStartInd; if (!PyArg_ParseTuple(args, "lss:convert(fromDate, fromfreq, tofreq)", &fromDate, &fromFreq, &toFreq)) return NULL; //always want start of period (only matters when converting from lower freq to higher freq ie. m -> d) - atEnd = 0; notStartInd = 0; - return PyInt_FromLong(convert(fromDate, fromFreq[0], toFreq[0], notStartInd, atEnd)); + return PyInt_FromLong(convert(fromDate, fromFreq[0], toFreq[0], notStartInd)); } From scipy-svn at scipy.org Mon Dec 11 10:14:34 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 09:14:34 -0600 (CST) Subject: [Scipy-svn] r2388 - trunk/Lib/sandbox/timeseries Message-ID: <20061211151434.B0EEC39C18B@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 09:14:31 -0600 (Mon, 11 Dec 2006) New Revision: 2388 Modified: trunk/Lib/sandbox/timeseries/cseries.pyd Log: recompiled for newest code Modified: trunk/Lib/sandbox/timeseries/cseries.pyd =================================================================== (Binary files differ) From scipy-svn at scipy.org Mon Dec 11 13:00:22 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 12:00:22 -0600 (CST) Subject: [Scipy-svn] r2389 - in trunk/Lib/sandbox: . maskedarray Message-ID: <20061211180022.F034439C209@new.scipy.org> Author: pierregm Date: 2006-12-11 12:00:04 -0600 (Mon, 11 Dec 2006) New Revision: 2389 Added: trunk/Lib/sandbox/maskedarray/ trunk/Lib/sandbox/maskedarray/CHANGELOG trunk/Lib/sandbox/maskedarray/LICENSE trunk/Lib/sandbox/maskedarray/README trunk/Lib/sandbox/maskedarray/__init__.py trunk/Lib/sandbox/maskedarray/core.py trunk/Lib/sandbox/maskedarray/extras.py trunk/Lib/sandbox/maskedarray/mpl_maskedarray.patch trunk/Lib/sandbox/maskedarray/setup.py trunk/Lib/sandbox/maskedarray/tests/ trunk/Lib/sandbox/maskedarray/testutils.py trunk/Lib/sandbox/maskedarray/version.py Log: First log Added: trunk/Lib/sandbox/maskedarray/CHANGELOG =================================================================== --- trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,20 @@ +#2006-12-09: - Code reorganization: define 2 modules, core and extras +#2006-11-25: - Disable copy by default +# - Added keep_mask flag (to save mask when creating a ma from a ma) +# - Fixed functions: empty_like +# - Fixed methods: .any and .all +# - New functions: masked_all, masked_all_like +# - New methods: .squeeze +#2006-11-20: - fixed make_mask +# - fixed nonzero method +#2006-11-16: - fixed .T +#2006-11-12: - add max, min as function (not only method...) +# - repr returns a name like masked_xxx, where xxx is the subclass +#2006-10-31: - make sure that make_mask returns a pure ndarray. +#2006-10-30: - When converted to a float, a masked singleton is transformed to nan +# instead of raising an exception. +#21: Use __get__ method in _arraymethods, _arithmethods, _compamethods +#18: Updated put to match the definition of numpy 1.0, deleted putmask, changed resize +#2: prevent an extra kword being sent to make_mask_none + +#............................................................ \ No newline at end of file Added: trunk/Lib/sandbox/maskedarray/LICENSE =================================================================== --- trunk/Lib/sandbox/maskedarray/LICENSE 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/LICENSE 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,24 @@ +* Copyright (c) 2006, University of Georgia and Pierre G.F. Gerard-Marchant +* All rights reserved. +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of the Univeristy of Georgia nor the +* names of its contributors may be used to endorse or promote products +* derived from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file Added: trunk/Lib/sandbox/maskedarray/README =================================================================== --- trunk/Lib/sandbox/maskedarray/README 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/README 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1 @@ +# An alternative implementation of masked arrays in numpy \ No newline at end of file Added: trunk/Lib/sandbox/maskedarray/__init__.py =================================================================== --- trunk/Lib/sandbox/maskedarray/__init__.py 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/__init__.py 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,25 @@ +"""Masked arrays add-ons. + +A collection of utilities for maskedarray + +:author: Pierre GF Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: __init__.py 38 2006-12-09 23:01:14Z backtopop $ +""" +__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)" +__version__ = '1.0' +__revision__ = "$Revision: 38 $" +__date__ = '$Date: 2006-12-09 18:01:14 -0500 (Sat, 09 Dec 2006) $' + +import core +reload(core) +from core import * + +import extras +reload(extras) +from extras import * + + +__all__ = ['core', 'extras'] +__all__ += core.__all__ +__all__ += extras.__all__ \ No newline at end of file Added: trunk/Lib/sandbox/maskedarray/core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/core.py 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/core.py 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,2907 @@ +"""MA: a facility for dealing with missing observations +MA is generally used as a numpy.array look-alike. +by Paul F. Dubois. + +Copyright 1999, 2000, 2001 Regents of the University of California. +Released for unlimited redistribution. +Adapted for numpy_core 2005 by Travis Oliphant and +(mainly) Paul Dubois. + +Subclassing of the base ndarray 2006 by Pierre Gerard-Marchant. +pgmdevlist_at_gmail_dot_com + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: core.py 40 2006-12-10 19:50:35Z backtopop $ +""" +__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)" +__version__ = '1.0' +__revision__ = "$Revision: 40 $" +__date__ = '$Date: 2006-12-10 14:50:35 -0500 (Sun, 10 Dec 2006) $' + +__all__ = ['MAError', 'MaskType', 'MaskedArray', + 'bool_', 'complex_', 'float_', 'int_', 'object_', + 'abs', 'absolute', 'add', 'all', 'allclose', 'allequal', 'alltrue', + 'amax', 'amin', 'anom', 'anomalies', 'any', 'arange', + 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', + 'arctanh', 'argmax', 'argmin', 'argsort', 'around', + 'array', 'asarray', 'average', + 'bitwise_and', 'bitwise_or', 'bitwise_xor', + 'ceil', 'choose', 'compressed', 'concatenate', 'conjugate', + 'cos', 'cosh', 'count', + 'diagonal', 'divide', 'dump', 'dumps', + 'empty', 'empty_like', 'equal', 'exp', + 'fabs', 'fmod', 'filled', 'floor', 'floor_divide', + 'getmask', 'getmaskarray', 'greater', 'greater_equal', 'hypot', + 'ids', 'inner', 'innerproduct', + 'isMA', 'isMaskedArray', 'is_mask', 'is_masked', 'isarray', + 'left_shift', 'less', 'less_equal', 'load', 'loads', 'log', 'log10', + 'logical_and', 'logical_not', 'logical_or', 'logical_xor', + 'make_mask', 'make_mask_none', 'mask_or', 'masked', + 'masked_array', 'masked_equal', 'masked_greater', + 'masked_greater_equal', 'masked_inside', 'masked_less', + 'masked_less_equal', 'masked_not_equal', 'masked_object', + 'masked_outside', 'masked_print_option', 'masked_singleton', + 'masked_values', 'masked_where', 'max', 'maximum', 'mean', 'min', + 'minimum', 'multiply', + 'negative', 'nomask', 'nonzero', 'not_equal', + 'ones', 'outer', 'outerproduct', + 'product', 'ptp', 'put', 'putmask', + 'rank', 'ravel', 'remainder', 'repeat', 'reshape', 'resize', + 'right_shift', 'round_', + 'shape', 'sin', 'sinh', 'size', 'sometrue', 'sort', 'sqrt', 'std', + 'subtract', 'sum', 'swapaxes', + 'take', 'tan', 'tanh', 'transpose', 'true_divide', + 'var', 'where', + 'zeros'] + + +import sys +import types +import cPickle +# +import numpy +from numpy import bool_, complex_, float_, int_, object_ + +import numpy.core.umath as umath +import numpy.core.fromnumeric as fromnumeric +from numpy.core.numeric import ndarray +from numpy.core.fromnumeric import amax, amin +from numpy.core.numerictypes import bool_, typecodes +from numpy.core.multiarray import dtype +import numpy.core.numeric as numeric +from numpy.lib.shape_base import expand_dims as n_expand_dims +import warnings + + +MaskType = bool_ +nomask = MaskType(0) + +divide_tolerance = 1.e-35 + +#####-------------------------------------------------------------------------- +#---- --- Helper functions --- +#####-------------------------------------------------------------------------- +def convert_typecode(f,dtchar): + """Converts the type of `f` to a type compatible with `dtchar`, for inline operations.""" + ftype = f.dtype.char + if dtchar == ftype: + return f + elif dtchar in typecodes['Integer']: + if ftype in typecodes['Integer']: + f = f.astype(dtchar) + else: + raise TypeError, 'Incorrect type for in-place operation.' + elif dtchar in typecodes['Float']: + if ftype in typecodes['Integer']: + f = f.astype(dtchar) + elif ftype in typecodes['Float']: + f = f.astype(dtchar) + else: + raise TypeError, 'Incorrect type for in-place operation.' + elif dtchar in typecodes['Complex']: + if ftype in typecodes['Integer']: + f = f.astype(dtchar) + elif ftype in typecodes['Float']: + f = f.astype(dtchar) + elif ftype in typecodes['Complex']: + f = f.astype(dtchar) + else: + raise TypeError, 'Incorrect type for in-place operation.' + else: + raise TypeError, 'Incorrect type for in-place operation.' + return f + +#####-------------------------------------------------------------------------- +#---- --- Exceptions --- +#####-------------------------------------------------------------------------- +class MAError(Exception): + "Class for MA related errors." + def __init__ (self, args=None): + "Creates an exception." + self.args = args + def __str__(self): + "Calculates the string representation." + return str(self.args) + __repr__ = __str__ + +#####-------------------------------------------------------------------------- +#---- --- Filling options --- +#####-------------------------------------------------------------------------- +# Use single element arrays or scalars. +default_real_fill_value = 1.e20 +default_complex_fill_value = 1.e20 + 0.0j +default_character_fill_value = '-' +default_integer_fill_value = 999999 +default_object_fill_value = '?' + +def default_fill_value (obj): + "Calculates the default fill value for an object `obj`." + if isinstance(obj, types.FloatType): + return default_real_fill_value + elif isinstance(obj, types.IntType) or isinstance(obj, types.LongType): + return default_integer_fill_value + elif isinstance(obj, types.StringType): + return default_character_fill_value + elif isinstance(obj, types.ComplexType): + return default_complex_fill_value + elif isinstance(obj, MaskedArray) or isinstance(obj, ndarray): + x = obj.dtype.char + if x in typecodes['Float']: + return default_real_fill_value + if x in typecodes['Integer']: + return default_integer_fill_value + if x in typecodes['Complex']: + return default_complex_fill_value + if x in typecodes['Character']: + return default_character_fill_value + if x in typecodes['UnsignedInteger']: + return umath.absolute(default_integer_fill_value) + return default_object_fill_value + else: + return default_object_fill_value + +def minimum_fill_value (obj): + "Calculates the default fill value suitable for taking the minimum of `obj`." + if isinstance(obj, types.FloatType): + return numeric.inf + elif isinstance(obj, types.IntType) or isinstance(obj, types.LongType): + return sys.maxint + elif isinstance(obj, MaskedArray) or isinstance(obj, ndarray): + x = obj.dtype.char + if x in typecodes['Float']: + return numeric.inf + if x in typecodes['Integer']: + return sys.maxint + if x in typecodes['UnsignedInteger']: + return sys.maxint + else: + raise TypeError, 'Unsuitable type for calculating minimum.' + +def maximum_fill_value (obj): + "Calculates the default fill value suitable for taking the maximum of `obj`." + if isinstance(obj, types.FloatType): + return -numeric.inf + elif isinstance(obj, types.IntType) or isinstance(obj, types.LongType): + return -sys.maxint + elif isinstance(obj, MaskedArray) or isinstance(obj, ndarray): + x = obj.dtype.char + if x in typecodes['Float']: + return -numeric.inf + if x in typecodes['Integer']: + return -sys.maxint + if x in typecodes['UnsignedInteger']: + return 0 + else: + raise TypeError, 'Unsuitable type for calculating maximum.' + +def set_fill_value (a, fill_value): + "Sets the fill value of `a` if it is a masked array." + if isinstance(a, MaskedArray): + a.set_fill_value(fill_value) + +def get_fill_value (a): + """Returns the fill value of `a`, if any. + Otherwise, returns the default fill value for that type. + """ + if isinstance(a, MaskedArray): + result = a.fill_value + else: + result = default_fill_value(a) + return result + +def common_fill_value (a, b): + "Returns the common fill_value of `a` and `b`, if any, or `None`." + t1 = get_fill_value(a) + t2 = get_fill_value(b) + if t1 == t2: + return t1 + return None + +#................................................ +def filled(a, value = None): + """Returns `a` as an array with masked data replaced by `value`. +If `value` is `None` or the special element `masked`, `get_fill_value(a)` +is used instead. + +If `a` is already a contiguous numeric array, `a` itself is returned. + +`filled(a)` can be used to be sure that the result is numeric when passing +an object a to other software ignorant of MA, in particular to numpy itself. + """ + if hasattr(a, 'filled'): + return a.filled(value) + elif isinstance(a, ndarray): # and a.flags['CONTIGUOUS']: + return a + elif isinstance(a, types.DictType): + return numeric.array(a, 'O') + else: + return numeric.array(a) + +#####-------------------------------------------------------------------------- +#---- --- Ufuncs --- +#####-------------------------------------------------------------------------- +ufunc_domain = {} +ufunc_fills = {} + +class domain_check_interval: + """Defines a valid interval, +so that `domain_check_interval(a,b)(x) = true` where `x < a` or `x > b`.""" + def __init__(self, a, b): + "domain_check_interval(a,b)(x) = true where x < a or y > b" + if (a > b): + (a, b) = (b, a) + self.a = a + self.b = b + + def __call__ (self, x): + "Execute the call behavior." + return umath.logical_or(umath.greater (x, self.b), + umath.less(x, self.a)) +#............................ +class domain_tan: + """Defines a valid interval for the `tan` function, +so that `domain_tan(eps) = True where `abs(cos(x)) < eps`""" + def __init__(self, eps): + "domain_tan(eps) = true where abs(cos(x)) < eps)" + self.eps = eps + def __call__ (self, x): + "Execute the call behavior." + return umath.less(umath.absolute(umath.cos(x)), self.eps) +#............................ +class domain_safe_divide: + """defines a domain for safe division.""" + def __init__ (self, tolerance=divide_tolerance): + self.tolerance = tolerance + def __call__ (self, a, b): + return umath.absolute(a) * self.tolerance >= umath.absolute(b) +#............................ +class domain_greater: + "domain_greater(v)(x) = true where x <= v" + def __init__(self, critical_value): + "domain_greater(v)(x) = true where x <= v" + self.critical_value = critical_value + + def __call__ (self, x): + "Execute the call behavior." + return umath.less_equal(x, self.critical_value) +#............................ +class domain_greater_equal: + "domain_greater_equal(v)(x) = true where x < v" + def __init__(self, critical_value): + "domain_greater_equal(v)(x) = true where x < v" + self.critical_value = critical_value + + def __call__ (self, x): + "Execute the call behavior." + return umath.less(x, self.critical_value) +#.............................................................................. +class masked_unary_operation: + """Defines masked version of unary operations, +where invalid values are pre-masked. + +:IVariables: + - `f` : function. + - `fill` : Default filling value *[0]*. + - `domain` : Default domain *[None]*. + """ + def __init__ (self, mufunc, fill=0, domain=None): + """ masked_unary_operation(aufunc, fill=0, domain=None) + aufunc(fill) must be defined + self(x) returns aufunc(x) + with masked values where domain(x) is true or getmask(x) is true. + """ + self.f = mufunc + self.fill = fill + self.domain = domain + self.__doc__ = getattr(mufunc, "__doc__", str(mufunc)) + self.__name__ = getattr(mufunc, "__name__", str(mufunc)) + ufunc_domain[mufunc] = domain + ufunc_fills[mufunc] = fill + # + def __call__ (self, a, *args, **kwargs): + "Execute the call behavior." +# numeric tries to return scalars rather than arrays when given scalars. + m = getmask(a) + d1 = filled(a, self.fill) + if self.domain is not None: + m = mask_or(m, self.domain(d1)) + result = self.f(d1, *args, **kwargs) + if isinstance(a, MaskedArray): + return a.__class__(result, mask=m) + return masked_array(result, mask=m) + # + def __str__ (self): + return "Masked version of %s. [Invalid values are masked]" % str(self.f) +#.............................................................................. +class masked_binary_operation: + """Defines masked version of binary operations, +where invalid values are pre-masked. + +:IVariables: + - `f` : function. + - `fillx` : Default filling value for first array*[0]*. + - `filly` : Default filling value for second array*[0]*. + - `domain` : Default domain *[None]*. + """ + def __init__ (self, mbfunc, fillx=0, filly=0): + """abfunc(fillx, filly) must be defined. + abfunc(x, filly) = x for all x to enable reduce. + """ + self.f = mbfunc + self.fillx = fillx + self.filly = filly + self.__doc__ = getattr(mbfunc, "__doc__", str(mbfunc)) + self.__name__ = getattr(mbfunc, "__name__", str(mbfunc)) + ufunc_domain[mbfunc] = None + ufunc_fills[mbfunc] = (fillx, filly) + # + def __call__ (self, a, b, *args, **kwargs): + "Execute the call behavior." + m = mask_or(getmask(a), getmask(b)) + d1 = filled(a, self.fillx) + d2 = filled(b, self.filly) + result = self.f(d1, d2, *args, **kwargs) +# if isinstance(result, ndarray) \ +# and m.ndim != 0 \ +# and m.shape != result.shape: +# m = mask_or(getmaskarray(a), getmaskarray(b)) + if isinstance(result, MaskedArray): + return result.__class__(result, mask=m) + return masked_array(result, mask=m) + # + def reduce (self, target, axis=0, dtype=None): + """Reduces `target` along the given `axis`.""" + if isinstance(target, MaskedArray): + tclass = target.__class__ + else: + tclass = MaskedArray + m = getmask(target) + t = filled(target, self.filly) + if t.shape == (): + t = t.reshape(1) + if m is not nomask: + m = make_mask(m, copy=1) + m.shape = (1,) + if m is nomask: + return tclass(self.f.reduce (t, axis)) + else: + t = tclass(t, mask=m) + # XXX: "or t.dtype" below is a workaround for what appears + # XXX: to be a bug in reduce. + t = self.f.reduce(filled(t, self.filly), axis, dtype=dtype or t.dtype) + m = umath.logical_and.reduce(m, axis) + if isinstance(t, ndarray): + return tclass(t, mask=m, fill_value=get_fill_value(target)) + elif m: + return masked + else: + return t + + def outer (self, a, b): + "Returns the function applied to the outer product of a and b." + ma = getmask(a) + mb = getmask(b) + if ma is nomask and mb is nomask: + m = nomask + else: + ma = getmaskarray(a) + mb = getmaskarray(b) + m = umath.logical_or.outer(ma, mb) + d = self.f.outer(filled(a, self.fillx), filled(b, self.filly)) + if isinstance(d, MaskedArray): + return d.__class__(d, mask=m) + return masked_array(d, mask=m) + + def accumulate (self, target, axis=0): + """Accumulates `target` along `axis` after filling with y fill value.""" + if isinstance(target, MaskedArray): + tclass = target.__class__ + else: + tclass = masked_array + t = filled(target, self.filly) + return tclass(self.f.accumulate(t, axis)) + + def __str__ (self): + return "Masked version of " + str(self.f) +#.............................................................................. +class domained_binary_operation: + """Defines binary operations that have a domain, like divide. + +These are complicated so they are a separate class. +They have no reduce, outer or accumulate. + +:IVariables: + - `f` : function. + - `fillx` : Default filling value for first array*[0]*. + - `filly` : Default filling value for second array*[0]*. + - `domain` : Default domain *[None]*. + """ + def __init__ (self, dbfunc, domain, fillx=0, filly=0): + """abfunc(fillx, filly) must be defined. + abfunc(x, filly) = x for all x to enable reduce. + """ + self.f = dbfunc + self.domain = domain + self.fillx = fillx + self.filly = filly + self.__doc__ = getattr(dbfunc, "__doc__", str(dbfunc)) + self.__name__ = getattr(dbfunc, "__name__", str(dbfunc)) + ufunc_domain[dbfunc] = domain + ufunc_fills[dbfunc] = (fillx, filly) + + def __call__(self, a, b): + "Execute the call behavior." + ma = getmask(a) + mb = getmask(b) + d1 = filled(a, self.fillx) + d2 = filled(b, self.filly) + t = self.domain(d1, d2) + + if fromnumeric.sometrue(t, None): + d2 = numeric.where(t, self.filly, d2) + mb = mask_or(mb, t) + m = mask_or(ma, mb) + result = self.f(d1, d2) + return masked_array(result, mask=m) + + def __str__ (self): + return "Masked version of " + str(self.f) + +#.............................................................................. +# Unary ufuncs +exp = masked_unary_operation(umath.exp) +conjugate = masked_unary_operation(umath.conjugate) +sin = masked_unary_operation(umath.sin) +cos = masked_unary_operation(umath.cos) +tan = masked_unary_operation(umath.tan) +arctan = masked_unary_operation(umath.arctan) +arcsinh = masked_unary_operation(umath.arcsinh) +sinh = masked_unary_operation(umath.sinh) +cosh = masked_unary_operation(umath.cosh) +tanh = masked_unary_operation(umath.tanh) +abs = absolute = masked_unary_operation(umath.absolute) +fabs = masked_unary_operation(umath.fabs) +negative = masked_unary_operation(umath.negative) +floor = masked_unary_operation(umath.floor) +ceil = masked_unary_operation(umath.ceil) +around = masked_unary_operation(fromnumeric.round_) +logical_not = masked_unary_operation(umath.logical_not) +# Domained unary ufuncs +sqrt = masked_unary_operation(umath.sqrt, 0.0, domain_greater_equal(0.0)) +log = masked_unary_operation(umath.log, 1.0, domain_greater(0.0)) +log10 = masked_unary_operation(umath.log10, 1.0, domain_greater(0.0)) +tan = masked_unary_operation(umath.tan, 0.0, domain_tan(1.e-35)) +arcsin = masked_unary_operation(umath.arcsin, 0.0, + domain_check_interval(-1.0, 1.0)) +arccos = masked_unary_operation(umath.arccos, 0.0, + domain_check_interval(-1.0, 1.0)) +arccosh = masked_unary_operation(umath.arccosh, 1.0, domain_greater_equal(1.0)) +arctanh = masked_unary_operation(umath.arctanh, 0.0, + domain_check_interval(-1.0+1e-15, 1.0-1e-15)) +# Binary ufuncs +add = masked_binary_operation(umath.add) +subtract = masked_binary_operation(umath.subtract) +multiply = masked_binary_operation(umath.multiply, 1, 1) +arctan2 = masked_binary_operation(umath.arctan2, 0.0, 1.0) +equal = masked_binary_operation(umath.equal) +equal.reduce = None +not_equal = masked_binary_operation(umath.not_equal) +not_equal.reduce = None +less_equal = masked_binary_operation(umath.less_equal) +less_equal.reduce = None +greater_equal = masked_binary_operation(umath.greater_equal) +greater_equal.reduce = None +less = masked_binary_operation(umath.less) +less.reduce = None +greater = masked_binary_operation(umath.greater) +greater.reduce = None +logical_and = masked_binary_operation(umath.logical_and) +alltrue = masked_binary_operation(umath.logical_and, 1, 1).reduce +logical_or = masked_binary_operation(umath.logical_or) +sometrue = logical_or.reduce +logical_xor = masked_binary_operation(umath.logical_xor) +bitwise_and = masked_binary_operation(umath.bitwise_and) +bitwise_or = masked_binary_operation(umath.bitwise_or) +bitwise_xor = masked_binary_operation(umath.bitwise_xor) +hypot = masked_binary_operation(umath.hypot) +# Domained binary ufuncs +divide = domained_binary_operation(umath.divide, domain_safe_divide(), 0, 1) +true_divide = domained_binary_operation(umath.true_divide, + domain_safe_divide(), 0, 1) +floor_divide = domained_binary_operation(umath.floor_divide, + domain_safe_divide(), 0, 1) +remainder = domained_binary_operation(umath.remainder, + domain_safe_divide(), 0, 1) +fmod = domained_binary_operation(umath.fmod, domain_safe_divide(), 0, 1) + + +#####-------------------------------------------------------------------------- +#---- --- Mask creation functions --- +#####-------------------------------------------------------------------------- +def getmask(a): + """Returns the mask of `a`, if any, or `nomask`. +Returns `nomask` if `a` is not a masked array. +To get an array for sure use getmaskarray.""" + if hasattr(a, "_mask"): + return a._mask + else: + return nomask + +def getmaskarray(a): + """Returns the mask of `a`, if any. +Otherwise, returns an array of `False`, with the same shape as `a`. + """ + m = getmask(a) + if m is nomask: + return make_mask_none(fromnumeric.shape(a)) + else: + return m + +def is_mask(m): + """Returns `True` if `m` is a legal mask. +Does not check contents, only type. + """ + try: + return m.dtype.type is MaskType + except AttributeError: + return False +# +def make_mask(m, copy=False, flag=False): + """make_mask(m, copy=0, flag=0) +Returns `m` as a mask, creating a copy if necessary or requested. +The function can accept any sequence of integers or `nomask`. +Does not check that contents must be 0s and 1s. +If `flag=True`, returns `nomask` if `m` contains no true elements. + +:Parameters: + - `m` (ndarray) : Mask. + - `copy` (boolean, *[False]*) : Returns a copy of `m` if true. + - `flag` (boolean, *[False]*): Flattens mask to `nomask` if `m` is all false. + """ + if m is nomask: + return nomask + elif isinstance(m, ndarray): + if m.dtype.type is MaskType: + if copy: + result = numeric.array(m, dtype=MaskType, copy=copy) + else: + result = m + else: + result = numeric.array(m, dtype=MaskType) + else: + result = numeric.array(filled(m, True), dtype=MaskType) + + if flag and not result.any(): + return nomask + else: + return result + +def make_mask_none(s): + "Returns a mask of shape `s`, filled with `False`." + result = numeric.zeros(s, dtype=MaskType) + return result + +def mask_or (m1, m2, copy=False, flag=True): + """Returns the combination of two masks `m1` and `m2`. +The masks are combined with the `logical_or` operator, treating `nomask` as false. +The result may equal m1 or m2 if the other is nomask. + +:Parameters: + - `m` (ndarray) : Mask. + - `copy` (boolean, *[False]*) : Returns a copy of `m` if true. + - `flag` (boolean, *[False]*): Flattens mask to `nomask` if `m` is all false. + """ + if m1 is nomask: + return make_mask(m2, copy=copy, flag=flag) + if m2 is nomask: + return make_mask(m1, copy=copy, flag=flag) + if m1 is m2 and is_mask(m1): + return m1 + return make_mask(umath.logical_or(m1, m2), copy=copy, flag=flag) + +#####-------------------------------------------------------------------------- +#--- --- Masking functions --- +#####-------------------------------------------------------------------------- +def masked_where(condition, x, copy=True): + """Returns `x` as an array masked where `condition` is true. +Masked values of `x` or `condition` are kept. + +:Parameters: + - `condition` (ndarray) : Masking condition. + - `x` (ndarray) : Array to mask. + - `copy` (boolean, *[False]*) : Returns a copy of `m` if true. + """ + cm = filled(condition,1) + if isinstance(x,MaskedArray): + m = mask_or(x._mask, cm) + return x.__class__(x._data, mask=m, copy=copy) + else: + return MaskedArray(fromnumeric.asarray(x), copy=copy, mask=cm) + +def masked_greater(x, value, copy=1): + "Shortcut to `masked_where`, with ``condition = (x > value)``." + return masked_where(greater(x, value), x, copy=copy) + +def masked_greater_equal(x, value, copy=1): + "Shortcut to `masked_where`, with ``condition = (x >= value)``." + return masked_where(greater_equal(x, value), x, copy=copy) + +def masked_less(x, value, copy=True): + "Shortcut to `masked_where`, with ``condition = (x < value)``." + return masked_where(less(x, value), x, copy=copy) + +def masked_less_equal(x, value, copy=True): + "Shortcut to `masked_where`, with ``condition = (x <= value)``." + return masked_where(less_equal(x, value), x, copy=copy) + +def masked_not_equal(x, value, copy=True): + "Shortcut to `masked_where`, with ``condition = (x != value)``." + return masked_where((x != value), x, copy=copy) + +# +def masked_equal(x, value, copy=True): + """Shortcut to `masked_where`, with ``condition = (x == value)``. +For floating point, consider `masked_values(x, value)` instead. + """ + return masked_where((x == value), x, copy=copy) +# d = filled(x, 0) +# c = umath.equal(d, value) +# m = mask_or(c, getmask(x)) +# return array(d, mask=m, copy=copy) + +def masked_inside(x, v1, v2, copy=True): + """Shortcut to `masked_where`, where `condition` is True for x inside +the interval `[v1,v2]` ``(v1 <= x <= v2)``. +The boundaries `v1` and `v2` can be given in either order. + """ + if v2 < v1: + (v1, v2) = (v2, v1) + xf = filled(x) + condition = (xf >= v1) & (xf <= v2) + return masked_where(condition, x, copy=copy) + +def masked_outside(x, v1, v2, copy=True): + """Shortcut to `masked_where`, where `condition` is True for x outside +the interval `[v1,v2]` ``(x < v1)|(x > v2)``. +The boundaries `v1` and `v2` can be given in either order. + """ + if v2 < v1: + (v1, v2) = (v2, v1) + xf = filled(x) + condition = (xf < v1) | (xf > v2) + return masked_where(condition, x, copy=copy) + +# +def masked_object(x, value, copy=True): + """Masks the array `x` where the data are exactly equal to `value`. +This function is suitable only for `object` arrays: for floating point, +please use `masked_values` instead. +The mask is set to `nomask` if posible. + +:parameter copy (Boolean, *[True]*): Returns a copy of `x` if true. """ + if isMaskedArray(x): + condition = umath.equal(x._data, value) + mask = x._mask + else: + condition = umath.equal(fromnumeric.asarray(x), value) + mask = nomask + mask = mask_or(mask, make_mask(condition, flag=True)) + return masked_array(x, mask=mask, copy=copy, fill_value=value) + +def masked_values(x, value, rtol=1.e-5, atol=1.e-8, copy=True): + """Masks the array `x` where the data are approximately equal to `value` +(that is, ``abs(x - value) <= atol+rtol*abs(value)``). +Suitable only for floating points. For integers, please use `masked_equal`. +The mask is set to `nomask` if posible. + +:Parameters: + - `rtol` (Float, *[1e-5]*): Tolerance parameter. + - `atol` (Float, *[1e-8]*): Tolerance parameter. + - `copy` (boolean, *[False]*) : Returns a copy of `x` if True. + """ + abs = umath.absolute + xnew = filled(x, value) + if issubclass(xnew.dtype.type, numeric.floating): + condition = umath.less_equal(abs(xnew-value), atol+rtol*abs(value)) + try: + mask = x._mask + except AttributeError: + mask = nomask + else: + condition = umath.equal(xnew, value) + mask = nomask + mask = mask_or(mask, make_mask(condition, flag=True)) + return masked_array(xnew, mask=mask, copy=copy, fill_value=value) + +#####-------------------------------------------------------------------------- +#---- --- Printing options --- +#####-------------------------------------------------------------------------- +class _MaskedPrintOption: + """Handles the string used to represent missing data in a masked array.""" + def __init__ (self, display): + "Creates the masked_print_option object." + self._display = display + self._enabled = True + + def display(self): + "Displays the string to print for masked values." + return self._display + + def set_display (self, s): + "Sets the string to print for masked values." + self._display = s + + def enabled(self): + "Is the use of the display value enabled?" + return self._enabled + + def enable(self, flag=1): + "Set the enabling flag to `flag`." + self._enabled = flag + + def __str__ (self): + return str(self._display) + + __repr__ = __str__ + +#if you single index into a masked location you get this object. +masked_print_option = _MaskedPrintOption('--') + +#####-------------------------------------------------------------------------- +#---- --- MaskedArray class --- +#####-------------------------------------------------------------------------- +class MaskedArray(numeric.ndarray, object): + """Arrays with possibly masked values. +Masked values of True exclude the corresponding element from any computation. + +Construction: + x = array(data, dtype=None, copy=True, order=False, + mask = nomask, fill_value=None, flag=True) + +If copy=False, every effort is made not to copy the data: +If `data` is a MaskedArray, and argument mask=nomask, then the candidate data +is `data._data` and the mask used is `data._mask`. +If `data` is a numeric array, it is used as the candidate raw data. +If `dtype` is not None and is different from data.dtype.char then a data copy is required. +Otherwise, the candidate is used. + +If a data copy is required, the raw (unmasked) data stored is the result of: +numeric.array(data, dtype=dtype.char, copy=copy) + +If `mask` is `nomask` there are no masked values. +Otherwise mask must be convertible to an array of booleans with the same shape as x. +If `flag` is True, a mask consisting of zeros (False) only is compressed to `nomask`. +Otherwise, the mask is not compressed. + +fill_value is used to fill in masked values when necessary, such as when +printing and in method/function filled(). +The fill_value is not used for computation within this module. + """ + __array_priority__ = 10.1 + + def __new__(cls, data, mask=nomask, dtype=None, copy=False, fill_value=None, + flag=True, keep_mask=True): + """array(data, dtype=None, copy=True, mask=nomask, fill_value=None) + +If `data` is already a ndarray, its dtype becomes the default value of dtype. + """ + if dtype is not None: + dtype = numeric.dtype(dtype) + # 1. Argument is MA ........... + if isinstance(data, MaskedArray) or\ + (hasattr(data,"_mask") and hasattr(data,"_data")) : + if keep_mask: + if mask is nomask: + cls._basemask = data._mask + else: + cls._basemask = mask_or(data._mask, mask) + else: + # Force copy of mask if it changes + cls._basemask = make_mask(mask, copy=copy, flag=flag) + # Update fille_value + if fill_value is None: + cls._fill_value = data._fill_value + else: + cls._fill_value = fill_value + return numeric.array(data._data, dtype=dtype, copy=copy).view(cls) + # 2. Argument is not MA ....... + if isinstance(data, ndarray): + if dtype is not None and data.dtype != dtype: + _data = data.astype(dtype) + elif copy: + _data = data.copy() + else: + _data = data + else: + try: + _data = numeric.array(data, dtype=dtype, copy=copy) + except TypeError: + _data = empty(len(data), dtype=dtype) + for (k,v) in enumerate(data): + _data[k] = v + if mask is nomask: + cls._basemask = getmask(_data) + return _data.view(cls) + # Define mask ................. + _mask = make_mask(mask, copy=False, flag=flag) + #....Check shapes compatibility + if _mask is not nomask: + (nd, nm) = (_data.size, _mask.size) + if (nm != nd): + if nm == 1: + _mask = fromnumeric.resize(_mask, _data.shape) + elif nd == 1: + _data = fromnumeric.resize(_data, _mask.shape) + else: + msg = "Mask and data not compatible: data size is %i, "+\ + "mask size is %i." + raise MAError, msg % (nm, nd) + elif (_mask.shape != _data.shape): + _mask = _mask.reshape(_data.shape).copy() + #.... + cls._fill_value = fill_value + cls._basemask = _mask + return numeric.asanyarray(_data).view(cls) + #.................................. + def __array__ (self, t=None, context=None): + "Special hook for numeric. Converts to numeric if possible." + # Er... Do we really need __array__ ? + if self._mask is not nomask: + if fromnumeric.ravel(self._mask).any(): + if context is None: + # Hardliner stand: raise an exception + # We may wanna use warnings.warn instead + raise MAError,\ + "Cannot automatically convert masked array to "\ + "numeric because data\n is masked in one or "\ + "more locations." + #return self._data + else: + func, args, i = context + fills = ufunc_fills.get(func) + if fills is None: + raise MAError, "%s not known to ma" % func + return self.filled(fills[i]) + else: # Mask is all false + # Optimize to avoid future invocations of this section. + self._mask = nomask + self._shared_mask = 0 + if t: + return self._data.astype(t) + else: + return self._data + #.................................. + def __array_wrap__(self, obj, context=None): + """Special hook for ufuncs. +Wraps the numpy array and sets the mask according to context. + """ + mclass = self.__class__ + #.......... + if context is None: + print "DEBUG _wrap_: no context" + return mclass(obj, mask=self._mask, copy=False) + #.......... + (func, args) = context[:2] + m = reduce(mask_or, [getmask(arg) for arg in args]) + # Get domain mask + domain = ufunc_domain.get(func, None) + if domain is not None: + m = mask_or(m, domain(*[getattr(arg, '_data', arg) for arg in args])) + # Update mask + if m is not nomask: + try: + dshape = obj.shape + except AttributeError: + pass + else: + if m.shape != dshape: + m = reduce(mask_or, [getmaskarray(arg) for arg in args]) + return mclass(obj, copy=False, mask=m) + #........................ + def __array_finalize__(self,obj): + """Finalizes the masked array. + """ + # + if not hasattr(self, "_data"): + try: + self._data = obj._data + except AttributeError: + self._data = obj + # + self.fill_value = self._fill_value + # + if not hasattr(self, '_mask'): + self._mask = self._basemask + # + return + #............................................ + def __getitem__(self, i): + """x.__getitem__(y) <==> x[y] +Returns the item described by i. Not a copy as in previous versions. + """ + dout = self._data[i] + if self._mask is nomask: + if numeric.size(dout)==1: + return dout + else: + return self.__class__(dout, mask=nomask, + fill_value=self._fill_value) + #.... +# m = self._mask.copy() + m = self._mask + mi = m[i] + if mi.size == 1: + if mi: + return masked + else: + return dout + else: + return self.__class__(dout, mask=mi, fill_value=self._fill_value) + #........................ + def __setitem__(self, index, value): + """x.__setitem__(i, y) <==> x[i]=y +Sets item described by index. If value is masked, masks those locations. + """ + d = self._data + if self is masked: + raise MAError, 'Cannot alter the masked element.' + #.... + if value is masked: + if self._mask is nomask: + _mask = make_mask_none(d.shape) + else: + _mask = self._mask.copy() + _mask[index] = True + self._mask = _mask + return + #.... + m = getmask(value) + value = filled(value).astype(d.dtype) + d[index] = value + if m is nomask: + if self._mask is not nomask: + _mask = self._mask.copy() + _mask[index] = False + else: + _mask = nomask + else: + if self._mask is nomask: + _mask = make_mask_none(d.shape) + else: + _mask = self._mask.copy() + _mask[index] = m + self._mask = _mask + #............................................ + def __getslice__(self, i, j): + """x.__getslice__(i, j) <==> x[i:j] +Returns the slice described by i, j. +The use of negative indices is not supported.""" + m = self._mask + dout = self._data[i:j] + if m is nomask: + return self.__class__(dout, fill_value=self._fill_value) + else: + return self.__class__(dout, mask=m[i:j], fill_value=self._fill_value) + #........................ + def __setslice__(self, i, j, value): + """x.__setslice__(i, j, value) <==> x[i:j]=value +Sets a slice i:j to `value`. +If `value` is masked, masks those locations.""" + d = self._data + if self is masked: + #TODO: Well, maybe we could/should + raise MAError, "Cannot alter the 'masked' object." + #.... + if value is masked: + if self._mask is nomask: + _mask = make_mask_none(d.shape) + else: + _mask = self._mask.copy() + _mask[i:j] = True + self._mask = _mask + return + #.... + m = getmask(value) + value = filled(value).astype(d.dtype) + d[i:j] = value + if m is nomask: + if self._mask is not nomask: + _mask = self._mask.copy() + _mask[i:j] = False + else: + _mask = nomask + else: + if self._mask is nomask: + _mask = make_mask_none(d.shape) + else: + _mask = self._mask.copy() + _mask[i:j] = m + self._mask = make_mask(_mask, flag=True) + #............................................ + # If we don't want to crash the performance, we better leave __getattribute__ alone... +# def __getattribute__(self, name): +# """x.__getattribute__('name') = x.name +#Returns the chosen attribute. +#If the attribute cannot be directly accessed, checks the _data section. +# """ +# try: +# return ndarray.__getattribute__(self, name) +# except AttributeError: +# pass +# try: +# return self._data.__getattribute__(name) +# except AttributeError: +# raise AttributeError + #............................................ + def __str__(self): + """x.__str__() <==> str(x) +Calculates the string representation, using masked for fill if it is enabled. +Otherwise, fills with fill value. + """ + if masked_print_option.enabled(): + f = masked_print_option + # XXX: Without the following special case masked + # XXX: would print as "[--]", not "--". Can we avoid + # XXX: checks for masked by choosing a different value + # XXX: for the masked singleton? 2005-01-05 -- sasha + if self is masked: + return str(f) + m = self._mask + if m is nomask: + res = self._data + else: + if m.shape == () and m: + return str(f) + # convert to object array to make filled work + res = self._data.astype("|O8") + res[self._mask] = f + else: + res = self.filled(self.fill_value) + return str(res) + + def __repr__(self): + """x.__repr__() <==> repr(x) +Calculates the repr representation, using masked for fill if it is enabled. +Otherwise fill with fill value. + """ + with_mask = """\ +masked_%(name)s(data = + %(data)s, + mask = + %(mask)s, + fill_value=%(fill)s) +""" + with_mask1 = """\ +masked_%(name)s(data = %(data)s, + mask = %(mask)s, + fill_value=%(fill)s) +""" + n = len(self.shape) + name = repr(self._data).split('(')[0] + if n <= 1: + return with_mask1 % { + 'name': name, + 'data': str(self), + 'mask': str(self.mask), + 'fill': str(self.fill_value), + } + return with_mask % { + 'name': name, + 'data': str(self), + 'mask': str(self.mask), + 'fill': str(self.fill_value), + } + #............................................ + def __abs__(self): + """x.__abs__() <==> abs(x) +Returns a masked array of the current subclass, with the new `_data` +the absolute of the inital `_data`. + """ + return self.__class__(self._data.__abs__(), mask=self._mask) + # + def __neg__(self): + """x.__abs__() <==> neg(x) +Returns a masked array of the current subclass, with the new `_data` +the negative of the inital `_data`.""" + try: + return self.__class__(self._data.__neg__(), mask=self._mask) + except MAError: + return negative(self) + # + def __iadd__(self, other): + "Adds other to self in place." + f = convert_typecode(filled(other, 0), self._data.dtype.char) + if self._mask is nomask: + self._data += f + m = getmask(other) + self._mask = m + ###self._shared_mask = m is not nomask + else: + tmp = masked_array(f, mask=getmask(other)) + self._data += tmp._data + self._mask = mask_or(self._mask, tmp._mask) + ###self._shared_mask = 1 + return self + # + def __isub__(self, other): + "Subtracts other from self in place." + f = convert_typecode(filled(other, 0), self._data.dtype.char) + if self._mask is nomask: + self._data -= f + m = getmask(other) + self._mask = m + ###self._shared_mask = m is not nomask + else: + tmp = masked_array(f, mask=getmask(other)) + self._data -= tmp._data + self._mask = mask_or(self._mask, tmp._mask) + ###self._shared_mask = 1 + return self + # + def __imul__(self, other): + "Multiplies self by other in place." + f = convert_typecode(filled(other, 0), self._data.dtype.char) + if self._mask is nomask: + self._data *= f + m = getmask(other) + self._mask = m + ####self._shared_mask = m is not nomask + else: + tmp = masked_array(f, mask=getmask(other)) + self._data *= tmp._data + self._mask = mask_or(self._mask, tmp._mask) + ###self._shared_mask = 1 + return self + # + def __idiv__(self, other): + "Divides self by other in place." + f = convert_typecode(filled(other, 0), self._data.dtype.char) + mo = getmask(other) + result = divide(self, masked_array(f, mask=mo)) + self._data = result._data + dm = result._mask + if dm is not self._mask: + self._mask = dm + return self + +# # +# def __eq__(self, other): +# return equal(self,other) +# +# def __ne__(self, other): +# return not_equal(self,other) +# +# def __lt__(self, other): +# return less(self,other) +# +# def __le__(self, other): +# return less_equal(self,other) +# +# def __gt__(self, other): +# return greater(self,other) +# +# def __ge__(self, other): +# return greater_equal(self,other) + + #............................................ + def __float__(self): + "Converts self to float." + if self._mask is not nomask: + print "Warning: converting a masked element to nan." + return numpy.nan + #raise MAError, 'Cannot convert masked element to a Python float.' + return float(self._data.item()) + + def __int__(self): + "Converts self to int." + if self._mask is not nomask: + raise MAError, 'Cannot convert masked element to a Python int.' + return int(self._data.item()) + + @property + def dtype(self): + """returns the data type of `_data`.""" + return self._data.dtype + + def astype (self, tc): + """Returns self as an array of given type. +Subclassing is preserved.""" + if tc == self._data.dtype: + return self + d = self._data.astype(tc) +# print "DEBUG: _astype: d", d +# print "DEBUG: _astype: m", self._mask + return self.__class__(d, mask=self._mask) + #............................................ + def _get_flat(self): + """Calculates the flat value. + """ + if self._mask is nomask: + return masked_array(self._data.ravel(), mask=nomask, copy=False, + fill_value = self.fill_value) + else: + return masked_array(self._data.ravel(), mask=self._mask.ravel(), + copy=False, fill_value = self.fill_value) + # + def _set_flat (self, value): + "x.flat = value" + y = self.ravel() + y[:] = value + # + flat = property(fget=_get_flat, fset=_set_flat, doc="Flat version") + # + #............................................ + def _get_real(self): + "Returns the real part of a complex array." + return masked_array(self._data.real, mask=self.mask, + fill_value = self.fill_value) +# if self.mask is nomask: +# return masked_array(self._data.real, mask=nomask, +# fill_value = self.fill_value) +# else: +# return masked_array(self._data.real, mask=self.mask, +# fill_value = self.fill_value) + + def _set_real (self, value): + "Sets the real part of a complex array to `value`." + y = self.real + y[...] = value + + real = property(fget=_get_real, fset=_set_real, doc="Get it real!") + + def _get_imaginary(self): + "Returns the imaginary part of a complex array." + return masked_array(self._data.imag, mask=nomask, + fill_value = self.fill_value) + + def _set_imaginary (self, value): + "Sets the imaginary part of a complex array to `value`." + y = self.imaginary + y[...] = value + + imag = property(fget=_get_imaginary, fset=_set_imaginary, + doc="Imaginary part.") + imaginary = imag + #............................................ + def _get_mask(self): + """Returns the current mask.""" + return self._mask + + def _set_mask(self, mask): + """Sets the mask to `mask`.""" + mask = make_mask(mask, copy=False, flag=True) + if mask is not nomask: + if mask.size != self._data.size: + raise ValueError, "Inconsistent shape between data and mask!" + if mask.shape != self._data.shape: + mask.shape = self._data.shape + self._mask = mask + + mask = property(fget=_get_mask, fset=_set_mask, doc="Mask") + #............................................ + def get_fill_value(self): + "Returns the filling value." + return self._fill_value + + def set_fill_value(self, value=None): + """Sets the filling value to `value`. +If None, uses the default, based on the data type.""" + if value is None: + value = default_fill_value(self._data) + self._fill_value = value + + fill_value = property(fget=get_fill_value, fset=set_fill_value, + doc="Filling value") + + def filled(self, fill_value=None): + """Returns an array of the same class as `_data`, + with masked values filled with `fill_value`. +Subclassing is preserved. + +If `fill_value` is None, uses self.fill_value. + """ + d = self._data + m = self._mask + if m is nomask: +# return fromnumeric.asarray(d) + return d + # + if fill_value is None: + value = self._fill_value + else: + value = fill_value + # + if self is masked_singleton: + result = numeric.array(value) + else: + try: +# result = numeric.array(d, dtype=d.dtype, copy=True) + result = d.copy() + result[m] = value + except (TypeError, AttributeError): + #ok, can't put that value in here + value = numeric.array(value, dtype=object) + d = d.astype(object) + result = fromnumeric.choose(m, (d, value)) + except IndexError: + #ok, if scalar + if d.shape: + raise + elif m: + result = numeric.array(value, dtype=d.dtype) + else: + result = d + return result + + def compressed(self): + "A 1-D array of all the non-masked data." + d = self._data.ravel() + if self._mask is nomask: + return d +# return numeric.asarray(d) + else: +# m = 1 - self._mask.ravel() +# return numeric.asarray(d.compress(m)) + return d.compress(-self._mask.ravel()) + #............................................ + def count(self, axis=None): + """Counts the non-masked elements of the array along a given axis, +and returns a masked array where the mask is True where all data are masked. +If `axis` is None, counts all the non-masked elements, and returns either a +scalar or the masked singleton.""" + m = self._mask + s = self._data.shape + ls = len(s) + if m is nomask: + if ls == 0: + return 1 + if ls == 1: + return s[0] + if axis is None: + return self._data.size + else: + n = s[axis] + t = list(s) + del t[axis] + return numeric.ones(t) * n + n1 = fromnumeric.size(m, axis) + n2 = m.astype(int_).sum(axis) + if axis is None: + return (n1-n2) + else: + return masked_array(n1 - n2) + #............................................ + def _get_shape(self): + "Returns the current shape." + return self._data.shape + # + def _set_shape (self, newshape): + "Sets the array's shape." + self._data.shape = newshape + if self._mask is not nomask: + #self._mask = self._mask.copy() + self._mask.shape = newshape + # + shape = property(fget=_get_shape, fset=_set_shape, + doc="Shape of the array, as a tuple.") + # + def _get_size(self): + "Returns the current size." + return self._data.size + size = property(fget=_get_size, + doc="Size (number of elements) of the array.") + # + def reshape (self, *s): + """Reshapes the array to shape s. +Returns a new masked array. +If you want to modify the shape in place, please use `a.shape = s`""" + if self._mask is not nomask: + return self.__class__(self._data.reshape(*s), + mask=self._mask.reshape(*s)) + else: + return self.__class__(self._data.reshape(*s)) + # + def repeat(self, repeats, axis=None): + """Repeat elements of `a` `repeats` times along `axis`. +`repeats` is a sequence of length `a.shape[axis]` telling how many times +each element should be repeated. +The mask is repeated accordingly. + """ + f = self.filled() + if isinstance(repeats, types.IntType): + if axis is None: + num = f.size + else: + num = f.shape[axis] + repeats = tuple([repeats]*num) + + m = self._mask + if m is not nomask: + m = fromnumeric.repeat(m, repeats, axis) + d = fromnumeric.repeat(f, repeats, axis) + return self.__class__(d, mask=m, fill_value=self.fill_value) + # + def resize(self, newshape, refcheck=True, order=False): + """Attempts to modify size and shape of self inplace. + The array must own its own memory and not be referenced by other arrays. + Returns None. + """ + raiseit = False + try: + self._data.resize(newshape,) + except ValueError: + raiseit = True + if self.mask is not nomask: + try: + self._mask.resize(newshape,) + except ValueError: + raiseit = True + if raiseit: + msg = "Cannot resize an array that has been referenced or "+\ + "is referencing another array in this way.\n"+\ + "Use the resize function." + raise ValueError, msg + return None + + +# # +# def transpose(self,axes=None): +# """Returns a view of 'a' with axes transposed.""" +# (d,m) = (self._data, self._mask) +# if m is nomask: +# return self.__class__(d.transpose(axes), copy=False) +# else: +# return self.__class__(d.transpose(axes), +# mask=m.transpose(axes), copy=False) +# # +# def swapaxes(self, axis1, axis2): +# (d,m) = (self._data, self._mask) +# if m is nomask: +# return self.__class__(d.swapaxes(axis1, axis2), +# copy=False) +# else: +# return self.__class__(data=d.swapaxes(axis1, axis2), +# mask=m.swapaxes(axis1, axis2), +# copy=False) + # +# def take(self, indices, axis=None, out=None, mode='raise'): +# "returns selection of items from a." +# (d,m) = (self._data, self._mask) +# if m is nomask: +# return self.__class__(d.take(indices, axis=axis, out=out, mode=mode)) +# else: +# return self.__class__(d.take(indices, axis=axis, out=out, mode=mode), +# mask=m.take(indices, axis=axis, out=out, mode=mode), +# copy=False,) + # + def put(self, indices, values, mode='raise'): + """Sets storage-indexed locations to corresponding values. +a.put(values, indices, mode) sets a.flat[n] = values[n] for each n in indices. +`values` can be scalar or an array shorter than indices, and it will be repeat, +if necessary. +If `values` has some masked values, the initial mask is updated in consequence, +else the corresponding values are unmasked. + """ + #TODO: Check that + (d, m) = (self._data, self._mask) + ind = filled(indices) + v = filled(values) + d.put(ind, v, mode=mode) + if m is not nomask: + if getmask(values) is not nomask: + m.put(ind, values._mask, mode=mode) + else: + m.put(ind, False, mode=mode) + self._mask = make_mask(m, copy=False, flag=True) + #............................................ + def ids (self): + """Return the ids of the data and mask areas.""" + return (id(self._data), id(self._mask)) + #............................................ + def all(self, axis=None): + """a.all(axis) returns True if all entries along the axis are True. + Returns False otherwise. If axis is None, uses the flatten array. + Masked data are considered as True during computation. + Outputs a masked array, where the mask is True if all data are masked along the axis. + """ + d = filled(self, True).all(axis) + m = self._mask.all(axis) + return self.__class__(d, mask=m, fill_value=self._fill_value) + def any(self, axis=None): + """a.any(axis) returns True if some or all entries along the axis are True. + Returns False otherwise. If axis is None, uses the flatten array. + Masked data are considered as False during computation. + Outputs a masked array, where the mask is True if all data are masked along the axis. + """ + d = filled(self, False).any(axis) + m = self._mask.all(axis) + return self.__class__(d, mask=m, fill_value=self._fill_value) + def nonzero(self): + """a.nonzero() returns a tuple of arrays + + Returns a tuple of arrays, one for each dimension of a, + containing the indices of the non-zero elements in that + dimension. The corresponding non-zero values can be obtained + with + a[a.nonzero()]. + + To group the indices by element, rather than dimension, use + transpose(a.nonzero()) + instead. The result of this is always a 2d array, with a row for + each non-zero element.""" + return self.filled(0).nonzero() + #............................................ + def trace(self, offset=0, axis1=0, axis2=1, dtype=None, out=None): + """a.trace(offset=0, axis1=0, axis2=1, dtype=None, out=None) +Returns the sum along the offset diagonal of the array's indicated `axis1` and `axis2`. + """ + #TODO: What are we doing with `out`? + (d,m) = (self._data, self._mask) + if m is nomask: + return d.trace(offset=offset, axis1=axis1, axis2=axis2, + out=out).astype(dtype) + else: + D = self.diagonal(offset=offset, axis1=axis1, axis2=axis2, + ).astype(dtype) + return D.sum(axis=None) + #............................................ + def sum(self, axis=None, dtype=None): + """a.sum(axis=None, dtype=None) +Sums the array `a` over the given axis `axis`. +Masked values are set to 0. +If `axis` is None, applies to a flattened version of the array. + """ + if self._mask is nomask: +# if axis is None: +# return self._data.sum(None, dtype=dtype) + return self.__class__(self._data.sum(axis, dtype=dtype), + mask=nomask, fill_value=self._fill_value) + else: +# if axis is None: +# return self.filled(0).sum(None, dtype=dtype) + return self.__class__(self.filled(0).sum(axis, dtype=dtype), + mask=self._mask.all(axis), + fill_value=self._fill_value) + + def cumsum(self, axis=None, dtype=None): + """a.cumprod(axis=None, dtype=None) +Returns the cumulative sum of the elements of array `a` along the given axis `axis`. +Masked values are set to 0. +If `axis` is None, applies to a flattened version of the array. + """ + if self._mask is nomask: +# if axis is None: +# return self._data.cumsum(None, dtype=dtype) + return self.__class__(self._data.cumsum(axis=axis, dtype=dtype)) + else: +# if axis is None: +# return self.filled(0).cumsum(None, dtype=dtype) + return self.__class__(self.filled(0).cumsum(axis=axis, dtype=dtype), + mask=self._mask, fill_value=self._fill_value) + + def prod(self, axis=None, dtype=None): + """a.prod(axis=None, dtype=None) +Returns the product of the elements of array `a` along the given axis `axis`. +Masked elements are set to 1. +If `axis` is None, applies to a flattened version of the array. + """ + if self._mask is nomask: +# if axis is None: +# return self._data.prod(None, dtype=dtype) + return self.__class__(self._data.prod(axis, dtype=dtype), + mask=nomask, fill_value=self._fill_value) +# return self.__class__(self._data.prod(axis=axis, dtype=dtype)) + else: +# if axis is None: +# return self.filled(1).prod(None, dtype=dtype) + return self.__class__(self.filled(1).prod(axis=axis, dtype=dtype), + mask=self._mask.all(axis), + fill_value=self._fill_value) + product = prod + + def cumprod(self, axis=None, dtype=None): + """a.cumprod(axis=None, dtype=None) +Returns the cumulative product of ethe lements of array `a` along the given axis `axis`. +Masked values are set to 1. +If `axis` is None, applies to a flattened version of the array. + """ + if self._mask is nomask: +# if axis is None: +# return self._data.cumprod(None, dtype=dtype) + return self.__class__(self._data.cumprod(axis=axis, dtype=dtype), + mask=nomask, fill_value=self._fill_value) + else: +# if axis is None: +# return self.filled(1).cumprod(None, dtype=dtype) + return self.__class__(self.filled(1).cumprod(axis=axis, dtype=dtype), + mask=self._mask, fill_value=self._fill_value) + + def mean(self, axis=None, dtype=None): + """a.mean(axis=None, dtype=None) + + Averages the array over the given axis. If the axis is None, + averages over all dimensions of the array. Equivalent to + + a.sum(axis, dtype) / size(a, axis). + + The optional dtype argument is the data type for intermediate + calculations in the sum. + + Returns a masked array, of the same class as a. + """ + if self._mask is nomask: +# if axis is None: +# return self._data.mean(axis=None, dtype=dtype) + return self.__class__(self._data.mean(axis=axis, dtype=dtype), + mask=nomask, fill_value=self._fill_value) + else: + dsum = fromnumeric.sum(self.filled(0), axis=axis, dtype=dtype) + cnt = self.count(axis=axis) + mask = self._mask.all(axis) + if axis is None and mask: + return masked + return self.__class__(dsum*1./cnt, mask=mask, + fill_value=self._fill_value) + + def anom(self, axis=None, dtype=None): + """a.anom(axis=None, dtype=None) + Returns the anomalies, or deviation from the average. + """ + m = self.mean(axis, dtype) + if not axis: + return (self - m) + else: + return (self - expand_dims(m,axis)) + + def var(self, axis=None, dtype=None): + """a.var(axis=None, dtype=None) +Returns the variance, a measure of the spread of a distribution. + +The variance is the average of the squared deviations from the mean, +i.e. var = mean((x - x.mean())**2). + """ + if self._mask is nomask: +# if axis is None: +# return self._data.var(axis=None, dtype=dtype) + return self.__class__(self._data.var(axis=axis, dtype=dtype), + mask=nomask, fill_value=self._fill_value) + else: + cnt = self.count(axis=axis) + danom = self.anom(axis=axis, dtype=dtype) + danom *= danom + dvar = danom.sum(axis) / cnt +# dvar /= cnt + if axis is None: + return dvar + return self.__class__(dvar, + mask=mask_or(self._mask.all(axis), (cnt==1)), + fill_value=self._fill_value) + + def std(self, axis=None, dtype=None): + """a.std(axis=None, dtype=None) +Returns the standard deviation, a measure of the spread of a distribution. + +The standard deviation is the square root of the average of the squared +deviations from the mean, i.e. std = sqrt(mean((x - x.mean())**2)). + """ + dvar = self.var(axis,dtype) + if axis is None: + if dvar is masked: + return masked + else: + # Should we use umath.sqrt instead ? + return sqrt(dvar) + return self.__class__(sqrt(dvar._data), mask=dvar._mask, + fill_value=self._fill_value) + #............................................ + def argsort(self, axis=None, fill_value=None, kind='quicksort'): + """Returns an array of indices that sort 'a' along the specified axis. + Masked values are filled beforehand to `fill_value`. + If `fill_value` is None, uses the default for the data type. + Returns a numpy array. + +:Keywords: + `axis` : Integer *[None]* + Axis to be indirectly sorted (default -1) + `kind` : String *['quicksort']* + Sorting algorithm (default 'quicksort') + Possible values: 'quicksort', 'mergesort', or 'heapsort' + + Returns: array of indices that sort 'a' along the specified axis. + + This method executes an indirect sort along the given axis using the + algorithm specified by the kind keyword. It returns an array of indices of + the same shape as 'a' that index data along the given axis in sorted order. + + The various sorts are characterized by average speed, worst case + performance, need for work space, and whether they are stable. A stable + sort keeps items with the same key in the same relative order. The three + available algorithms have the following properties: + + |------------------------------------------------------| + | kind | speed | worst case | work space | stable| + |------------------------------------------------------| + |'quicksort'| 1 | O(n^2) | 0 | no | + |'mergesort'| 2 | O(n*log(n)) | ~n/2 | yes | + |'heapsort' | 3 | O(n*log(n)) | 0 | no | + |------------------------------------------------------| + + All the sort algorithms make temporary copies of the data when the sort is not + along the last axis. Consequently, sorts along the last axis are faster and use + less space than sorts along other axis. + """ + if fill_value is None: + fill_value = default_fill_value(self._data) + d = self.filled(fill_value) + if axis is None: + return d.argsort(kind=kind) + return d.argsort(axis, kind) + + def argmin(self, axis=None, fill_value=None): + """Returns the array of indices for the minimum values of `a` along the + specified axis. + Masked values are treated as if they had the value `fill_value`. + If `fill_value` is None, the default for the data type is used. + Returns a numpy array. + +:Keywords: + `axis` : Integer *[None]* + Axis to be indirectly sorted (default -1) + `fill_value` : var *[None]* + Default filling value. If None, uses the data type default. + """ + if fill_value is None: + fill_value = default_fill_value(self._data) + d = self.filled(fill_value) + if axis is None: + return d.argmin() + return d.argmin(axis) + + def argmax(self, axis=None, fill_value=None): + """Returns the array of indices for the maximum values of `a` along the + specified axis. + Masked values are treated as if they had the value `fill_value`. + If `fill_value` is None, the default for the data type is used. + Returns a numpy array. + +:Keywords: + `axis` : Integer *[None]* + Axis to be indirectly sorted (default -1) + `fill_value` : var *[None]* + Default filling value. If None, uses the data type default. + """ + if fill_value is None: + fill_value = default_fill_value(self._data) + try: + fill_value = - fill_value + except: + pass + d = self.filled(fill_value) + if axis is None: + return d.argmax() + return d.argmax(axis) + #............................................ + # Backwards Compatibility. Heck... + @property + def data(self): + """Returns the `_data` part of the MaskedArray. +You should really use `_data` instead...""" + return self._data + def raw_data(self): + """Returns the `_data` part of the MaskedArray. +You should really use `_data` instead...""" + return self._data + +##.............................................................................. + + + +#class _arithmethods: +# """Defines a wrapper for arithmetic methods. +#Instead of directly calling a ufunc, the corresponding method of the `array._data` +#object is called instead. +# """ +# def __init__ (self, methodname, fill_self=0, fill_other=0, domain=None): +# """ +#:Parameters: +# - `methodname` (String) : Method name. +# - `fill_self` (Float *[0]*) : Fill value for the instance. +# - `fill_other` (Float *[0]*) : Fill value for the target. +# - `domain` (Domain object *[None]*) : Domain of non-validity. +# """ +# self.methodname = methodname +# self.fill_self = fill_self +# self.fill_other = fill_other +# self.domain = domain +# # +# def __call__ (self, instance, other, *args): +# "Execute the call behavior." +# m_self = instance._mask +# m_other = getmask(other) +# base = filled(instance,self.fill_self) +# target = filled(other, self.fill_other) +# if self.domain is not None: +# # We need to force the domain to a ndarray only. +# if self.fill_other > self.fill_self: +# domain = self.domain(base, target) +# else: +# domain = self.domain(target, base) +# if domain.any(): +# #If `other` is a subclass of ndarray, `filled` must have the +# # same subclass, else we'll lose some info. +# #The easiest then is to fill `target` instead of creating +# # a pure ndarray. +# #Oh, and we better make a copy! +# if isinstance(other, ndarray): +# if target is other: +# # We don't want to modify other: let's copy target, then +# target = target.copy() +# target[:] = numeric.where(fromnumeric.asarray(domain), +# self.fill_other, target) +# else: +# target = numeric.where(fromnumeric.asarray(domain), +# self.fill_other, target) +# m_other = mask_or(m_other, domain) +# m = mask_or(m_self, m_other) +# method = getattr(base, self.methodname) +# return instance.__class__(method(target, *args), mask=m) +# # +# def patch(self): +# """Applies the method `func` from class `method` to MaskedArray""" +# return types.MethodType(self,None,MaskedArray) +#.............................................................................. +class _arithmethods(object): + """Defines a wrapper for arithmetic methods. +Instead of directly calling a ufunc, the corresponding method of the `array._data` +object is called instead. + """ + def __init__ (self, methodname, fill_self=0, fill_other=0, domain=None): + """ +:Parameters: + - `methodname` (String) : Method name. + - `fill_self` (Float *[0]*) : Fill value for the instance. + - `fill_other` (Float *[0]*) : Fill value for the target. + - `domain` (Domain object *[None]*) : Domain of non-validity. + """ + self.methodname = methodname + self.fill_self = fill_self + self.fill_other = fill_other + self.domain = domain + self.__doc__ = getattr(methodname, '__doc__') + # + def __get__(self, obj, objtype=None): + self.obj = obj + return self + # + def __call__ (self, other, *args): + "Execute the call behavior." + instance = self.obj + m_self = instance._mask + m_other = getmask(other) + base = filled(instance,self.fill_self) + target = filled(other, self.fill_other) + if self.domain is not None: + # We need to force the domain to a ndarray only. + if self.fill_other > self.fill_self: + domain = self.domain(base, target) + else: + domain = self.domain(target, base) + if domain.any(): + #If `other` is a subclass of ndarray, `filled` must have the + # same subclass, else we'll lose some info. + #The easiest then is to fill `target` instead of creating + # a pure ndarray. + #Oh, and we better make a copy! + if isinstance(other, ndarray): + if target is other: + # We don't want to modify other: let's copy target, then + target = target.copy() + target[:] = numeric.where(fromnumeric.asarray(domain), + self.fill_other, target) + else: + target = numeric.where(fromnumeric.asarray(domain), + self.fill_other, target) + m_other = mask_or(m_other, domain) + m = mask_or(m_self, m_other) + method = getattr(base, self.methodname) + return instance.__class__(method(target, *args), mask=m) +#...................................... +class _compamethods(object): + """Defines comparison methods (eq, ge, gt...). +Instead of calling a ufunc, the method of the masked object is called. + """ + def __init__ (self, methodname, fill_self=0, fill_other=0): + """ +:Parameters: + - `methodname` (String) : Method name. + - `fill_self` (Float *[0]*) : Fill value for the instance. + - `fill_other` (Float *[0]*) : Fill value for the target. + - `domain` (Domain object *[None]*) : Domain of non-validity. + """ + self.methodname = methodname + self.fill_self = fill_self + self.fill_other = fill_other + # + def __get__(self, obj, objtype=None): + self.obj = obj + return self + # + def __call__ (self, other, *args): + "Execute the call behavior." + instance = self.obj + m = mask_or(instance._mask, getmask(other), flag=False) + base = instance.filled(self.fill_self) + target = filled(other, self.fill_other) + method = getattr(base, self.methodname) + return instance.__class__(method(target, *args), mask=m) +#.......................................................... +MaskedArray.__add__ = _arithmethods('__add__') +MaskedArray.__radd__ = _arithmethods('__add__') +MaskedArray.__sub__ = _arithmethods('__sub__') +MaskedArray.__rsub__ = _arithmethods('__rsub__') +MaskedArray.__pow__ = _arithmethods('__pow__') +MaskedArray.__mul__ = _arithmethods('__mul__', 1, 1) +MaskedArray.__rmul__ = _arithmethods('__mul__', 1, 1) +MaskedArray.__div__ = _arithmethods('__div__', 0, 1, + domain_safe_divide()) +MaskedArray.__rdiv__ = _arithmethods('__rdiv__', 1, 0, + domain_safe_divide()) +MaskedArray.__truediv__ = _arithmethods('__truediv__', 0, 1, + domain_safe_divide()) +MaskedArray.__rtruediv__ = _arithmethods('__rtruediv__', 1, 0, + domain_safe_divide()) +MaskedArray.__floordiv__ = _arithmethods('__floordiv__', 0, 1, + domain_safe_divide()) +MaskedArray.__rfloordiv__ = _arithmethods('__rfloordiv__', 1, 0, + domain_safe_divide()) +MaskedArray.__eq__ = _compamethods('__eq__') +MaskedArray.__ne__ = _compamethods('__ne__') +MaskedArray.__le__ = _compamethods('__le__') +MaskedArray.__lt__ = _compamethods('__lt__') +MaskedArray.__ge__ = _compamethods('__ge__') +MaskedArray.__gt__ = _compamethods('__gt__') +#####-------------------------------------------------------------------------- +#---- --- Shortcuts --- +#####--------------------------------------------------------------------------- +def isMaskedArray (x): + "Is x a masked array, that is, an instance of MaskedArray?" + return isinstance(x, MaskedArray) +isarray = isMaskedArray +isMA = isMaskedArray #backward compatibility +#masked = MaskedArray(0, int, mask=1) +masked_singleton = MaskedArray(0, dtype=int_, mask=True) +masked = masked_singleton + +masked_array = MaskedArray +def array(data, dtype=None, copy=False, order=False, + mask=nomask, keep_mask=True, flag=True, fill_value=None): + """array(data, dtype=None, copy=True, order=False, mask=nomask, + keep_mask=True, flag=True, fill_value=None) +Acts as shortcut to MaskedArray, with options in a different order for convenience. +And backwards compatibility... + """ + return MaskedArray(data, mask=mask, dtype=dtype, copy=copy, + keep_mask = keep_mask, flag=flag, fill_value=fill_value) + +def is_masked(x): + """Returns whether x has some masked values.""" + m = getmask(x) + if m is nomask: + return False + elif m.any(): + return True + return False + + +#####-------------------------------------------------------------------------- +#---- --- Patch methods --- +#####-------------------------------------------------------------------------- +#class _arraymethod: +# """Defines a wrapper for basic array methods. +#Upon call, returns a masked array, where the new `_data` array is the output +#of the corresponding method called on the original `_data`. +# +#If `onmask` is True, the new mask is the output of the method calld on the initial mask. +#If `onmask` is False, the new mask is just a reference to the initial mask. +# +#:Parameters: +# `funcname` : String +# Name of the function to apply on data. +# `onmask` : Boolean *[True]* +# Whether the mask must be processed also (True) or left alone (False). +# """ +# def __init__(self, funcname, onmask=True): +# self._name = funcname +# self._onmask = onmask +# self.__doc__ = getattr(ndarray, self._name).__doc__ +# def __call__(self, instance, *args, **params): +# methodname = self._name +# (d,m) = (instance._data, instance._mask) +# C = instance.__class__ +# if m is nomask: +# return C(getattr(d,methodname).__call__(*args, **params)) +# elif self._onmask: +# return C(getattr(d,methodname).__call__(*args, **params), +# mask=getattr(m,methodname)(*args, **params) ) +# else: +# return C(getattr(d,methodname).__call__(*args, **params), mask=m) +# +# def patch(self): +# "Adds the new method to MaskedArray." +# return types.MethodType(self, None, MaskedArray) +##...................................... +#MaskedArray.conj = MaskedArray.conjugate = _arraymethod('conjugate').patch() +#MaskedArray.diagonal = _arraymethod('diagonal').patch() +#MaskedArray.take = _arraymethod('take').patch() +#MaskedArray.ravel = _arraymethod('ravel').patch() +#MaskedArray.transpose = _arraymethod('transpose').patch() +#MaskedArray.T = _arraymethod('transpose').patch() +#MaskedArray.swapaxes = _arraymethod('swapaxes').patch() +#MaskedArray.clip = _arraymethod('clip', onmask=False).patch() +#MaskedArray.compress = _arraymethod('compress').patch() +#MaskedArray.resize = _arraymethod('resize').patch() +#MaskedArray.copy = _arraymethod('copy').patch() + +class _arraymethod(object): + """Defines a wrapper for basic array methods. +Upon call, returns a masked array, where the new `_data` array is the output +of the corresponding method called on the original `_data`. + +If `onmask` is True, the new mask is the output of the method calld on the initial mask. +If `onmask` is False, the new mask is just a reference to the initial mask. + +:Parameters: + `funcname` : String + Name of the function to apply on data. + `onmask` : Boolean *[True]* + Whether the mask must be processed also (True) or left alone (False). + """ + def __init__(self, funcname, onmask=True): + self._name = funcname + self._onmask = onmask + self.__doc__ = self.getdoc() + def getdoc(self): + "Returns the doc of the function (from the doc of the method)." + try: + return getattr(MaskedArray, self._name).__doc__ + except: + return getattr(numpy, self._name).__doc__ + def __get__(self, obj, objtype=None): + self.obj = obj + return self + def __call__(self, *args, **params): + methodname = self._name + (d,m, f) = (self.obj._data, self.obj._mask, self.obj._fill_value) + C = self.obj.__class__ + if m is nomask: + return C(getattr(d,methodname).__call__(*args, **params), + fill_value=f) + elif self._onmask: + return C(getattr(d,methodname).__call__(*args, **params), + mask=getattr(m,methodname)(*args, **params), + fill_value=f) + else: + return C(getattr(d,methodname).__call__(*args, **params), mask=m, + fill_value=f) +#...................................... +MaskedArray.conj = MaskedArray.conjugate = _arraymethod('conjugate') +MaskedArray.diagonal = _arraymethod('diagonal') +MaskedArray.take = _arraymethod('take') +MaskedArray.ravel = _arraymethod('ravel') +MaskedArray.transpose = _arraymethod('transpose') +MaskedArray.T = property(fget=lambda self:self.transpose()) +MaskedArray.swapaxes = _arraymethod('swapaxes') +MaskedArray.clip = _arraymethod('clip', onmask=False) +MaskedArray.compress = _arraymethod('compress') +MaskedArray.copy = _arraymethod('copy') +MaskedArray.squeeze = _arraymethod('squeeze') + +#####-------------------------------------------------------------------------- +#---- --- Extrema functions --- +#####-------------------------------------------------------------------------- +class _minimum_operation: + "Object to calculate minima" + def __init__ (self): + """minimum(a, b) or minimum(a) +In one argument case, returns the scalar minimum. + """ + pass + #......... + def __call__ (self, a, b=None): + "Execute the call behavior." + if b is None: + m = getmask(a) + if m is nomask: + d = amin(filled(a).ravel()) + return d + ac = a.compressed() + if len(ac) == 0: + return masked + else: + return amin(ac) + else: + return where(less(a, b), a, b) + #......... + def reduce(self, target, axis=0): + """Reduces `target` along the given `axis`.""" + m = getmask(target) + if m is nomask: + t = filled(target) + return masked_array (umath.minimum.reduce (t, axis)) + else: + t = umath.minimum.reduce(filled(target, minimum_fill_value(target)), + axis) + m = umath.logical_and.reduce(m, axis) +# return masked_array(t, mask=m, fill_value=get_fill_value(target)) + try: + return target.__class__(t, mask=m, + fill_value=get_fill_value(target)) + except AttributeError: + return masked_array(t, mask=m, + fill_value=get_fill_value(target)) + #......... + def outer(self, a, b): + "Returns the function applied to the outer product of a and b." + ma = getmask(a) + mb = getmask(b) + if ma is nomask and mb is nomask: + m = nomask + else: + ma = getmaskarray(a) + mb = getmaskarray(b) + m = logical_or.outer(ma, mb) + d = umath.minimum.outer(filled(a), filled(b)) + return masked_array(d, mask=m) + +def min(array, axis=None, out=None): + """Returns the minima along the given axis. +If `axis` is None, applies to the flattened array.""" + if out is not None: + raise TypeError("Output arrays Unsupported for masked arrays") + if axis is None: + return minimum(array) + else: + return minimum.reduce(array, axis) +#................................................ +class _maximum_operation: + "Object to calculate maxima" + def __init__ (self): + """maximum(a, b) or maximum(a) + In one argument case returns the scalar maximum. + """ + pass + #......... + def __call__ (self, a, b=None): + "Executes the call behavior." + if b is None: + m = getmask(a) + if m is nomask: + d = amax(filled(a).ravel()) + return d + ac = a.compressed() + if len(ac) == 0: + return masked + else: + return amax(ac) + else: + return where(greater(a, b), a, b) + #......... + def reduce (self, target, axis=0): + """Reduces target along the given axis.""" + m = getmask(target) + if m is nomask: + t = filled(target) + return masked_array(umath.maximum.reduce (t, axis)) + else: + t = umath.maximum.reduce(filled(target, maximum_fill_value(target)), + axis) + m = umath.logical_and.reduce(m, axis) + try: + return target.__class__(t, mask=m, + fill_value=get_fill_value(target)) + except AttributeError: + return masked_array(t, mask=m, + fill_value=get_fill_value(target)) + #......... + def outer (self, a, b): + "Returns the function applied to the outer product of a and b." + ma = getmask(a) + mb = getmask(b) + if ma is nomask and mb is nomask: + m = nomask + else: + ma = getmaskarray(a) + mb = getmaskarray(b) + m = logical_or.outer(ma, mb) + d = umath.maximum.outer(filled(a), filled(b)) + return masked_array(d, mask=m) + +def max(obj, axis=None, out=None): + """Returns the maxima along the given axis. +If `axis` is None, applies to the flattened array.""" + if out is not None: + raise TypeError("Output arrays Unsupported for masked arrays") + if axis is None: + return maximum(obj) + else: + return maximum.reduce(obj, axis) +#................................................ +def ptp(obj, axis=None): + """a.ptp(axis=None) = a.max(axis)-a.min(axis)""" + try: + return obj.max(axis)-obj.min(axis) + except AttributeError: + return max(obj, axis=axis) - min(obj, axis=axis) +#................................................ +MaskedArray.min = min +MaskedArray.max = max +MaskedArray.ptp = ptp + +#####--------------------------------------------------------------------------- +#---- --- Definition of functions from the corresponding methods --- +#####--------------------------------------------------------------------------- +class _frommethod: + """Defines functions from existing MaskedArray methods. +:ivar _methodname (String): Name of the method to transform. + """ + def __init__(self, methodname): + self._methodname = methodname + self.__doc__ = self.getdoc() + def getdoc(self): + "Returns the doc of the function (from the doc of the method)." + try: + return getattr(MaskedArray, self._methodname).__doc__ + except: + return getattr(numpy, self._methodname).__doc__ + def __call__(self, x, *args, **params): + if isinstance(x, MaskedArray): + return getattr(x, self._methodname).__call__(*args, **params) + #FIXME: As x is not a MaskedArray, we transform it to a ndarray with asarray + #FIXME: ... and call the corresponding method. + #FIXME: Except that sometimes it doesn't work (try reshape([1,2,3,4],(2,2))) + #FIXME: we end up with a "SystemError: NULL result without error in PyObject_Call" + #FIXME: A dirty trick is then to call the initial numpy function... + method = getattr(fromnumeric.asarray(x), self._methodname) + try: + return method(*args, **params) + except SystemError: + return getattr(numpy,self._methodname).__call__(x, *args, **params) + +all = _frommethod('all') +anomalies = anom = _frommethod('anom') +any = _frommethod('any') +conjugate = _frommethod('conjugate') +ids = _frommethod('ids') +nonzero = _frommethod('nonzero') +diagonal = _frommethod('diagonal') +maximum = _maximum_operation() +mean = _frommethod('mean') +minimum = _minimum_operation () +product = _frommethod('prod') +ptp = _frommethod('ptp') +ravel = _frommethod('ravel') +repeat = _frommethod('repeat') +reshape = _frommethod('reshape') +std = _frommethod('std') +sum = _frommethod('sum') +swapaxes = _frommethod('swapaxes') +take = _frommethod('take') +var = _frommethod('var') + +#.............................................................................. +def argsort(a, axis=None, kind='quicksort', fill_value=None): + """Returns an array of indices that sort 'a' along the specified axis. + Masked values are filled beforehand to `fill_value`. + If `fill_value` is None, uses the default for the data type. + Returns a numpy array. + +:Keywords: + `axis` : Integer *[None]* + Axis to be indirectly sorted (default -1) + `kind` : String *['quicksort']* + Sorting algorithm (default 'quicksort') + Possible values: 'quicksort', 'mergesort', or 'heapsort' + + Returns: array of indices that sort 'a' along the specified axis. + + This method executes an indirect sort along the given axis using the + algorithm specified by the kind keyword. It returns an array of indices of + the same shape as 'a' that index data along the given axis in sorted order. + + The various sorts are characterized by average speed, worst case + performance, need for work space, and whether they are stable. A stable + sort keeps items with the same key in the same relative order. The three + available algorithms have the following properties: + + |------------------------------------------------------| + | kind | speed | worst case | work space | stable| + |------------------------------------------------------| + |'quicksort'| 1 | O(n^2) | 0 | no | + |'mergesort'| 2 | O(n*log(n)) | ~n/2 | yes | + |'heapsort' | 3 | O(n*log(n)) | 0 | no | + |------------------------------------------------------| + + All the sort algorithms make temporary copies of the data when the sort is not + along the last axis. Consequently, sorts along the last axis are faster and use + less space than sorts along other axis. + """ + if fill_value is None: + fill_value = default_fill_value(a) + d = filled(a, fill_value) + if axis is None: + return d.argsort(kind=kind) + return d.argsort(axis, kind) + +def argmin(a, axis=None, fill_value=None): + """Returns the array of indices for the minimum values of `a` along the + specified axis. + Masked values are treated as if they had the value `fill_value`. + If `fill_value` is None, the default for the data type is used. + Returns a numpy array. + +:Keywords: + `axis` : Integer *[None]* + Axis to be indirectly sorted (default -1) + `fill_value` : var *[None]* + Default filling value. If None, uses the data type default. + """ + if fill_value is None: + fill_value = default_fill_value(a) + d = filled(a, fill_value) + if axis is None: + return d.argmin(axis=None) + return d.argmin(axis=axis) + +def argmax(a, axis=None, fill_value=None): + """Returns the array of indices for the maximum values of `a` along the + specified axis. + Masked values are treated as if they had the value `fill_value`. + If `fill_value` is None, the default for the data type is used. + Returns a numpy array. + +:Keywords: + `axis` : Integer *[None]* + Axis to be indirectly sorted (default -1) + `fill_value` : var *[None]* + Default filling value. If None, uses the data type default. + """ + if fill_value is None: + fill_value = default_fill_value(a) + try: + fill_value = - fill_value + except: + pass + d = filled(a, fill_value) + if axis is None: + return d.argmax(axis=None) + return d.argmax(axis=axis) + +def compressed(x): + """Returns a compressed version of a masked array (or just the array if it + wasn't masked first).""" + if getmask(x) is None: + return x + else: + return x.compressed() + +def count(a, axis = None): + "Count of the non-masked elements in a, or along a certain axis." + a = masked_array(a) + return a.count(axis) + +def concatenate(arrays, axis=0): + "Concatenates the arrays along the given axis" + #TODO: We lose the subclass, here! We should keep track of the classes... + #TODO: ...and find the max ? the lowest according to MRO? + d = [] + for x in arrays: + d.append(filled(x)) + d = numeric.concatenate(d, axis) + for x in arrays: + if getmask(x) is not nomask: + break + else: + return masked_array(d) + dm = [] + for x in arrays: + dm.append(getmaskarray(x)) + dm = make_mask(numeric.concatenate(dm, axis), copy=False, flag=True) + return masked_array(d, mask=dm) + +def expand_dims(x,axis): + """Expand the shape of a by including newaxis before given axis.""" + if isinstance(x, MaskedArray): + (d,m) = (x._data, x._mask) + if m is nomask: + return masked_array(n_expand_dims(d,axis)) + else: + return masked_array(n_expand_dims(d,axis), + mask=n_expand_dims(m,axis)) + else: + return n_expand_dims(x,axis) + +#...................................... +def left_shift (a, n): + "Left shift n bits" + m = getmask(a) + if m is nomask: + d = umath.left_shift(filled(a), n) + return masked_array(d) + else: + d = umath.left_shift(filled(a, 0), n) + return masked_array(d, mask=m) + +def right_shift (a, n): + "Right shift n bits" + m = getmask(a) + if m is nomask: + d = umath.right_shift(filled(a), n) + return masked_array(d) + else: + d = umath.right_shift(filled(a, 0), n) + return masked_array(d, mask=m) +#...................................... +def put(x, indices, values, mode='raise'): + """sets storage-indexed locations to corresponding values. + Values and indices are filled if necessary.""" + # We can't use 'frommethod', the order of arguments is different + try: + return x.put(indices, values, mode=mode) + except AttributeError: + return fromnumeric.asarray(x).put(indices, values, mode=mode) + +def putmask(x, mask, values): #, mode='raise'): + """`putmask(x, mask, v)` results in `x = v` for all places where `mask` is true. +If `v` is shorter than `mask`, it will be repeated as necessary. +In particular `v` can be a scalar or length 1 array.""" + # We can't use 'frommethod', the order of arguments is different + try: + return x.putmask(values, mask) + except AttributeError: + return fromnumeric.asarray(x).putmask(values, mask) + +def transpose(x,axes=None): + """Returns a view of the array with dimensions permuted according to axes. +If `axes` is None (default), returns array with dimensions reversed. + """ + #We can't use 'frommethod', as 'transpose' doesn't take keywords + try: + return x.transpose(axes) + except AttributeError: + return fromnumeric.asarray(x).transpose(axes) + +def resize(x, new_shape): + """resize(a,new_shape) returns a new array with the specified shape. + The total size of the original array can be any size. + The new array is filled with repeated copies of a. If a was masked, the new + array will be masked, and the new mask will be a repetition of the old one. + """ + # We can't use _frommethods here, as N.resize is notoriously whiny. + m = getmask(x) + if m is not nomask: + m = fromnumeric.resize(m, new_shape) + if isinstance(x, MaskedArray): + result = x.__class__(fromnumeric.resize(filled(x), new_shape), mask=m) + else: + result = masked_array(fromnumeric.resize(filled(x), new_shape), mask=m) + result.set_fill_value(get_fill_value(x)) + return result + + +#................................................ +def rank(obj): + """Gets the rank of sequence a (the number of dimensions, not a matrix rank) +The rank of a scalar is zero.""" + return fromnumeric.rank(filled(obj)) +# +def shape(obj): + """Returns the shape of `a` (as a function call which also works on nested sequences). + """ + return fromnumeric.shape(filled(obj)) +# +def size(obj, axis=None): + """Returns the number of elements in the array along the given axis, +or in the sequence if `axis` is None. + """ + return fromnumeric.size(filled(obj), axis) +#................................................ + +#####-------------------------------------------------------------------------- +#---- --- Extra functions --- +#####-------------------------------------------------------------------------- +def where (condition, x, y): + """where(condition, x, y) is x where condition is nonzero, y otherwise. + condition must be convertible to an integer array. + Answer is always the shape of condition. + The type depends on x and y. It is integer if both x and y are + the value masked. + """ + fc = filled(not_equal(condition, 0), 0) + xv = filled(x) + xm = getmask(x) + yv = filled(y) + ym = getmask(y) + d = numeric.choose(fc, (yv, xv)) + md = numeric.choose(fc, (ym, xm)) + m = getmask(condition) + m = make_mask(mask_or(m, md), copy=False, flag=True) + return masked_array(d, mask=m) + +def choose (indices, t, out=None, mode='raise'): + "Returns array shaped like indices with elements chosen from t" + #TODO: implement options `out` and `mode`, if possible. + def fmask (x): + "Returns the filled array, or True if ``masked``." + if x is masked: + return 1 + return filled(x) + def nmask (x): + "Returns the mask, True if ``masked``, False if ``nomask``." + if x is masked: + return 1 + m = getmask(x) + if m is nomask: + return 0 + return m + c = filled(indices, 0) + masks = [nmask(x) for x in t] + a = [fmask(x) for x in t] + d = numeric.choose(c, a) + m = numeric.choose(c, masks) + m = make_mask(mask_or(m, getmask(indices)), copy=0, flag=1) + return masked_array(d, mask=m) + +def sort (x, axis=-1, fill_value=None, kind='quicksort'): + """If x does not have a mask, returns a masked array formed from the + result of numeric.sort(x, axis). + Otherwise, fills x with fill_value. Sort it. Sets a mask where the result + is equal to fill_value. Note that this may have unintended consequences + if the data contains the fill value at a non-masked site. + If fill_value is not given the default fill value for x's type will be + used. + """ + if fill_value is None: + fill_value = default_fill_value (x) + d = filled(x, fill_value) + s = fromnumeric.sort(d, axis=axis, kind=kind) + if getmask(x) is nomask: + return masked_array(s) + return masked_values(s, fill_value, copy=0) + +def round_(a, decimals=0, out=None): + """Returns reference to result. Copies a and rounds to 'decimals' places. + + Keyword arguments: + decimals -- number of decimals to round to (default 0). May be negative. + out -- existing array to use for output (default copy of a). + + Return: + Reference to out, where None specifies a copy of the original array a. + + Round to the specified number of decimals. When 'decimals' is negative it + specifies the number of positions to the left of the decimal point. The + real and imaginary parts of complex numbers are rounded separately. + Nothing is done if the array is not of float type and 'decimals' is greater + than or equal to 0.""" + if not hasattr(a, "_mask"): + mask = nomask + else: + mask = a._mask + if out is None: + return a.__class__(fromnumeric.round_(a, decimals, None), mask=mask) + else: + out = a.__class__(fromnumeric.round_(a, decimals, out), mask=mask) + return out + +def arange(start, stop=None, step=1, dtype=None): + """Just like range() except it returns a array whose type can be specified + by the keyword argument dtype. + """ + return array(numeric.arange(start, stop, step, dtype), mask=nomask) + +def inner(a, b): + """inner(a,b) returns the dot product of two arrays, which has + shape a.shape[:-1] + b.shape[:-1] with elements computed by summing the + product of the elements from the last dimensions of a and b. + Masked elements are replace by zeros. + """ + fa = filled(a, 0) + fb = filled(b, 0) + if len(fa.shape) == 0: + fa.shape = (1,) + if len(fb.shape) == 0: + fb.shape = (1,) + return masked_array(numeric.inner(fa, fb)) +innerproduct = inner + +def outer(a, b): + """outer(a,b) = {a[i]*b[j]}, has shape (len(a),len(b))""" + fa = filled(a, 0).ravel() + fb = filled(b, 0).ravel() + d = numeric.outer(fa, fb) + ma = getmask(a) + mb = getmask(b) + if ma is nomask and mb is nomask: + return masked_array(d) + ma = getmaskarray(a) + mb = getmaskarray(b) + m = make_mask(1-numeric.outer(1-ma, 1-mb), copy=0) + return masked_array(d, mask=m) +outerproduct = outer + +def allequal (a, b, fill_value=True): + """ +Returns `True` if all entries of a and b are equal, using +fill_value as a truth value where either or both are masked. + """ + m = mask_or(getmask(a), getmask(b)) + if m is nomask: + x = filled(a) + y = filled(b) + d = umath.equal(x, y) + return d.all() + elif fill_value: + x = filled(a) + y = filled(b) + d = umath.equal(x, y) + dm = array(d, mask=m, copy=False) + return dm.filled(True).all(None) + else: + return False + +def allclose (a, b, fill_value=True, rtol=1.e-5, atol=1.e-8): + """ Returns `True` if all elements of `a` and `b` are equal subject to given tolerances. +If `fill_value` is True, masked values are considered equal. +If `fill_value` is False, masked values considered unequal. +The relative error rtol should be positive and << 1.0 +The absolute error `atol` comes into play for those elements of `b` + that are very small or zero; it says how small `a` must be also. + """ + m = mask_or(getmask(a), getmask(b)) + d1 = filled(a) + d2 = filled(b) + x = filled(array(d1, copy=0, mask=m), fill_value).astype(float) + y = filled(array(d2, copy=0, mask=m), 1).astype(float) + d = umath.less_equal(umath.absolute(x-y), atol + rtol * umath.absolute(y)) + return fromnumeric.alltrue(fromnumeric.ravel(d)) + +def average (a, axis=None, weights=None, returned = 0): + """average(a, axis=None weights=None, returned=False) + + Averages the array over the given axis. If the axis is None, averages + over all dimensions of the array. Equivalent to a.mean(axis) + + If an integer axis is given, this equals: + a.sum(axis) * 1.0 / size(a, axis) + + If axis is None, this equals: + a.sum(axis) * 1.0 / a.size + + If weights are given, result is: + sum(a * weights,axis) / sum(weights,axis), + where the weights must have a's shape or be 1D with length the + size of a in the given axis. Integer weights are converted to + Float. Not specifying weights is equivalent to specifying + weights that are all 1. + + If 'returned' is True, return a tuple: the result and the sum of + the weights or count of values. The shape of these two results + will be the same. + + Returns masked values instead of ZeroDivisionError if appropriate. + + """ + a = asarray(a) + mask = a.mask + ash = a.shape + if ash == (): + ash = (1,) + if axis is None: + if mask is nomask: + if weights is None: + n = a.sum(axis=None) + d = float(a.size) + else: + w = filled(weights, 0.0).ravel() + n = umath.add.reduce(a._data.ravel() * w) + d = umath.add.reduce(w) + del w + else: + if weights is None: + n = a.filled(0).sum(axis=None) + d = umath.add.reduce((-mask).ravel().astype(int_)) + else: + w = array(filled(weights, 0.0), float, mask=mask).ravel() + n = add.reduce(a.ravel() * w) + d = add.reduce(w) + del w + else: + if mask is nomask: + if weights is None: + d = ash[axis] * 1.0 + n = add.reduce(a._data, axis) + else: + w = filled(weights, 0.0) + wsh = w.shape + if wsh == (): + wsh = (1,) + if wsh == ash: + w = numeric.array(w, float_, copy=0) + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + del w + elif wsh == (ash[axis],): + ni = ash[axis] + r = [None]*len(ash) + r[axis] = slice(None, None, 1) + w = eval ("w["+ repr(tuple(r)) + "] * ones(ash, float)") + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + del w, r + else: + raise ValueError, 'average: weights wrong shape.' + else: + if weights is None: + n = add.reduce(a, axis) + d = umath.add.reduce((-mask), axis=axis, dtype=float_) + else: + w = filled(weights, 0.0) + wsh = w.shape + if wsh == (): + wsh = (1,) + if wsh == ash: + w = array(w, float, mask=mask, copy=0) + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + elif wsh == (ash[axis],): + ni = ash[axis] + r = [None]*len(ash) + r[axis] = slice(None, None, 1) + w = eval ("w["+ repr(tuple(r)) + "] * masked_array(ones(ash, float), mask)") + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + else: + raise ValueError, 'average: weights wrong shape.' + del w + if n is masked or d is masked: + return masked + result = n/d + del n + + if isinstance(result, MaskedArray): + if ((axis is None) or (axis==0 and a.ndim == 1)) and \ + (result._mask is nomask): + result = result._data + if returned: + if not isinstance(d, MaskedArray): + d = masked_array(d) + if isinstance(d, ndarray) and (not d.shape == result.shape): + d = ones(result.shape, float) * d + if returned: + return result, d + else: + return result + +#.............................................................................. +def asarray(a, dtype=None): + """asarray(data, dtype) = array(data, dtype, copy=0) +Returns `a` as an masked array. +No copy is performed if `a` is already an array. +Subclasses are converted to base class MaskedArray. + """ + return masked_array(a, dtype=dtype, copy=False, keep_mask=True) + +def empty(new_shape, dtype=float): + """empty((d1,...,dn),dtype=float,order='C') +Returns a new array of shape (d1,...,dn) and given type with all its +entries uninitialized. This can be faster than zeros.""" + return masked_array(numeric.empty(new_shape, dtype), mask=nomask) + +def empty_like(a): + """empty_like(a) +Returns an empty (uninitialized) array of the shape and typecode of a. +Note that this does NOT initialize the returned array. +If you require your array to be initialized, you should use zeros_like().""" + return masked_array(numeric.empty_like(a), mask=nomask) + +def ones(new_shape, dtype=float): + """ones(shape, dtype=None) +Returns an array of the given dimensions, initialized to all ones.""" + return masked_array(numeric.ones(new_shape, dtype), mask=nomask) + +def zeros(new_shape, dtype=float): + """zeros(new_shape, dtype=None) +Returns an array of the given dimensions, initialized to all zeros.""" + return masked_array(numeric.zeros(new_shape, dtype), mask=nomask) + +#####-------------------------------------------------------------------------- +#---- --- Pickling --- +#####-------------------------------------------------------------------------- +#FIXME: We're kinda stuck with forcing the mask to have the same shape as the data +def _mareconstruct(subtype, baseshape, basetype,): + """Internal function that builds a new MaskedArray from the information stored +in a pickle.""" + _data = ndarray.__new__(ndarray, baseshape, basetype) + _mask = ndarray.__new__(ndarray, baseshape, basetype) + return MaskedArray.__new__(subtype, _data, mask=_mask, dtype=basetype, flag=False) + +def _getstate(a): + "Returns the internal state of the masked array, for pickling purposes." + state = (1, + a.shape, + a.dtype, + a.flags.fnc, + (a._data).__reduce__()[-1][-1], + getmaskarray(a).__reduce__()[-1][-1]) + return state + +def _setstate(a, state): + """Restores the internal state of the masked array, for pickling purposes. +`state` is typically the output of the ``__getstate__`` output, and is a 5-tuple: + + - class name + - a tuple giving the shape of the data + - a typecode for the data + - a binary string for the data + - a binary string for the mask. + """ + (ver, shp, typ, isf, raw, msk) = state + (a._data).__setstate__((shp, typ, isf, raw)) + (a._mask).__setstate__((shp, dtype('|b1'), isf, msk)) + +def _reduce(a): + """Returns a 3-tuple for pickling a MaskedArray.""" + return (_mareconstruct, + (a.__class__, (0,), 'b', ), + a.__getstate__()) + +def dump(a,F): + """Pickles the MaskedArray `a` to the file `F`. +`F` can either be the handle of an exiting file, or a string representing a file name. + """ + if not hasattr(F,'readline'): + F = open(F,'w') + return cPickle.dump(a,F) + +def dumps(a): + """Returns a string corresponding to the pickling of the MaskedArray.""" + return cPickle.dumps(a) + +def load(F): + """Wrapper around ``cPickle.load`` which accepts either a file-like object or + a filename.""" + if not hasattr(F, 'readline'): + F = open(F,'r') + return cPickle.load(F) + +def loads(strg): + "Loads a pickle from the current string.""" + return cPickle.loads(strg) + +MaskedArray.__getstate__ = _getstate +MaskedArray.__setstate__ = _setstate +MaskedArray.__reduce__ = _reduce +MaskedArray.__dump__ = dump +MaskedArray.__dumps__ = dumps + Added: trunk/Lib/sandbox/maskedarray/extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/extras.py 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/extras.py 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,303 @@ +"""Masked arrays add-ons. + +A collection of utilities for maskedarray + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: extras.py 38 2006-12-09 23:01:14Z backtopop $ +""" +__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)" +__version__ = '1.0' +__revision__ = "$Revision: 38 $" +__date__ = '$Date: 2006-12-09 18:01:14 -0500 (Sat, 09 Dec 2006) $' + +__all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', + 'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack', + 'count_masked', + 'masked_all', 'masked_all_like', 'mr_', + 'stdu', 'varu', + ] + +import core +reload(core) +from core import * +from core import _arraymethod + +import numpy +import numpy.core.numeric as numeric +from numpy.core.numeric import ndarray +from numpy.core.numeric import array as nxarray +from numpy.core.fromnumeric import asarray as nxasarray + +from numpy.lib.index_tricks import concatenator +import numpy.lib.function_base as function_base + +def issequence(seq): + """Returns True if the argumnet is a sequence (ndarray, list or tuple.""" + if isinstance(seq, ndarray): + return True + elif isinstance(seq, tuple): + return True + elif isinstance(seq, list): + return True + return False + +def count_masked(arr, axis=None): + """Counts the number of masked elements along the given axis.""" + m = getmaskarray(arr) + return m.sum(axis) + +def masked_all(shape, dtype): + """Returns an empty masked array of the given shape and dtype, + where all the data are masked.""" + a = empty(shape, dtype) + a[:] = masked + return a + +def masked_all_like(arr): + """Returns an empty masked array of the same shape and dtype as the array `a`, + where all the data are masked.""" + a = empty_like(arr) + a[:] = masked + return a + +#####-------------------------------------------------------------------------- +#---- --- New methods --- +#####-------------------------------------------------------------------------- +def varu(a, axis=None, dtype=None): + """a.var(axis=None, dtype=None) + Returns an unbiased estimate of the variance. + + Instead of dividing the sum of squared anomalies (SSA) by n, the number of + elements, the SSA is divided by n-1. + """ + a = asarray(a) + cnt = a.count(axis=axis) + anom = a.anom(axis=axis, dtype=dtype) + anom *= anom + dvar = anom.sum(axis) / (cnt-1) + if axis is None: + return dvar + return a.__class__(dvar, + mask=mask_or(a._mask.all(axis), (cnt==1)), + fill_value=a._fill_value) + +def stdu(a, axis=None, dtype=None): + """a.var(axis=None, dtype=None) + Returns an unbiased estimate of the standard deviation. + + Instead of dividing the sum of squared anomalies (SSA) by n, the number of + elements, the SSA is divided by n-1. + """ + a = asarray(a) + dvar = a.varu(axis,dtype) + if axis is None: + if dvar is masked: + return masked + else: + # Should we use umath.sqrt instead ? + return sqrt(dvar) + return a.__class__(sqrt(dvar._data), mask=dvar._mask, + fill_value=a._fill_value) + +MaskedArray.stdu = stdu +MaskedArray.varu = varu + +#####-------------------------------------------------------------------------- +#---- --- Standard functions --- +#####-------------------------------------------------------------------------- +class _fromnxfunction: + """Defines a wrapper to adapt numpy functions to masked arrays.""" + def __init__(self, funcname): + self._function = funcname + self.__doc__ = self.getdoc() + def getdoc(self): + "Retrieves the __doc__ string from the function." + return getattr(numpy, self._function).__doc__ +\ + "(The function is applied to both the _data and the mask, if any.)" + def __call__(self, *args, **params): + func = getattr(numpy, self._function) + if len(args)==1: + x = args[0] + if isinstance(x,ndarray): + _d = func.__call__(nxasarray(x), **params) + _m = func.__call__(getmaskarray(x), **params) + return masked_array(_d, mask=_m) + elif isinstance(x, tuple): + _d = func.__call__(tuple([nxasarray(a) for a in x]), **params) + _m = func.__call__(tuple([getmaskarray(a) for a in x]), **params) + return masked_array(_d, mask=_m) + else: + arrays = [] + args = list(args) + while len(args)>0 and issequence(args[0]): + arrays.append(args.pop(0)) + res = [] + for x in arrays: + _d = func.__call__(nxasarray(x), *args, **params) + _m = func.__call__(getmaskarray(x), *args, **params) + res.append(masked_array(_d, mask=_m)) + return res + +atleast_1d = _fromnxfunction('atleast_1d') +atleast_2d = _fromnxfunction('atleast_2d') +atleast_3d = _fromnxfunction('atleast_3d') + +vstack = row_stack = _fromnxfunction('vstack') +hstack = _fromnxfunction('hstack') +column_stack = _fromnxfunction('column_stack') +dstack = _fromnxfunction('dstack') + +#####-------------------------------------------------------------------------- +#---- +#####-------------------------------------------------------------------------- +def apply_along_axis(func1d,axis,arr,*args): + """ Execute func1d(arr[i],*args) where func1d takes 1-D arrays + and arr is an N-d array. i varies so as to apply the function + along the given axis for each 1-d subarray in arr. + """ + arr = numeric.asanyarray(arr) + nd = arr.ndim + if axis < 0: + axis += nd + if (axis >= nd): + raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d." + % (axis,nd)) + ind = [0]*(nd-1) + i = numeric.zeros(nd,'O') + indlist = range(nd) + indlist.remove(axis) + i[axis] = slice(None,None) + outshape = numeric.asarray(arr.shape).take(indlist) + i.put(indlist, ind) + res = func1d(arr[tuple(i.tolist())],*args) + # if res is a number, then we have a smaller output array + asscalar = numeric.isscalar(res) + if not asscalar: + try: + len(res) + except TypeError: + asscalar = True + # Note: we shouldn't set the dtype of the output from the first result... + #...so we force the type to object, and build a list of dtypes + #...we'll just take the largest, to avoid some downcasting + dtypes = [] + if asscalar: + dtypes.append(numeric.asarray(res).dtype) + outarr = zeros(outshape, object_) + outarr[ind] = res + Ntot = numeric.product(outshape) + k = 1 + while k < Ntot: + # increment the index + ind[-1] += 1 + n = -1 + while (ind[n] >= outshape[n]) and (n > (1-nd)): + ind[n-1] += 1 + ind[n] = 0 + n -= 1 + i.put(indlist,ind) + res = func1d(arr[tuple(i.tolist())],*args) + outarr[ind] = res + dtypes.append(asarray(res).dtype) + k += 1 + else: + Ntot = numeric.product(outshape) + holdshape = outshape + outshape = list(arr.shape) + outshape[axis] = len(res) + dtypes.append(asarray(res).dtype) + outarr = zeros(outshape, object_) + outarr[tuple(i.tolist())] = res + k = 1 + while k < Ntot: + # increment the index + ind[-1] += 1 + n = -1 + while (ind[n] >= holdshape[n]) and (n > (1-nd)): + ind[n-1] += 1 + ind[n] = 0 + n -= 1 + i.put(indlist, ind) + res = func1d(arr[tuple(i.tolist())],*args) + outarr[tuple(i.tolist())] = res + dtypes.append(asarray(res).dtype) + k += 1 + print dtypes + if not hasattr(arr, '_mask'): + return numeric.asarray(outarr, dtype=max(dtypes)) + else: + return outarr.astype(max(dtypes)) + +#####-------------------------------------------------------------------------- +#---- --- Concatenation helpers --- +#####-------------------------------------------------------------------------- + +class mconcatenator(concatenator): + """Translates slice objects to concatenation along an axis.""" + + def __init__(self, axis=0): + concatenator.__init__(self, axis, matrix=False) + + def __getitem__(self,key): + if isinstance(key, str): + raise MAError, "Unavailable for masked array." + if type(key) is not tuple: + key = (key,) + objs = [] + scalars = [] + final_dtypedescr = None + for k in range(len(key)): + scalar = False + if type(key[k]) is slice: + step = key[k].step + start = key[k].start + stop = key[k].stop + if start is None: + start = 0 + if step is None: + step = 1 + if type(step) is type(1j): + size = int(abs(step)) + newobj = function_base.linspace(start, stop, num=size) + else: + newobj = numeric.arange(start, stop, step) + elif type(key[k]) is str: + if (key[k] in 'rc'): + self.matrix = True + self.col = (key[k] == 'c') + continue + try: + self.axis = int(key[k]) + continue + except (ValueError, TypeError): + raise ValueError, "Unknown special directive" + elif type(key[k]) in numeric.ScalarType: + newobj = asarray([key[k]]) + scalars.append(k) + scalar = True + else: + newobj = key[k] + objs.append(newobj) + if isinstance(newobj, numeric.ndarray) and not scalar: + if final_dtypedescr is None: + final_dtypedescr = newobj.dtype + elif newobj.dtype > final_dtypedescr: + final_dtypedescr = newobj.dtype + if final_dtypedescr is not None: + for k in scalars: + objs[k] = objs[k].astype(final_dtypedescr) + res = concatenate(tuple(objs),axis=self.axis) + return self._retval(res) + +class mr_class(mconcatenator): + """Translates slice objects to concatenation along the first axis. + + For example: + >>> r_[array([1,2,3]), 0, 0, array([4,5,6])] + array([1, 2, 3, 0, 0, 4, 5, 6]) + """ + def __init__(self): + mconcatenator.__init__(self, 0) + +mr_ = mr_class() Added: trunk/Lib/sandbox/maskedarray/mpl_maskedarray.patch =================================================================== --- trunk/Lib/sandbox/maskedarray/mpl_maskedarray.patch 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/mpl_maskedarray.patch 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,12 @@ +diff -urNp matplotlib/numerix/ma.init/__init__.py matplotlib/numerix/ma/__init__.py +--- matplotlib/numerix/ma.init/__init__.py 2006-08-19 16:21:56.000000000 -0400 ++++ matplotlib/numerix/ma/__init__.py 2006-11-29 12:48:14.000000000 -0500 +@@ -9,7 +9,9 @@ elif which[0] == "numeric": + nomask = None + getmaskorNone = getmask + elif which[0] == "numpy": +- from numpy.core.ma import * ++ from maskedarray import * + def getmaskorNone(obj): + _msk = getmask(obj) + if _msk is nomask: Added: trunk/Lib/sandbox/maskedarray/setup.py =================================================================== --- trunk/Lib/sandbox/maskedarray/setup.py 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/setup.py 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,19 @@ +#!/usr/bin/env python +__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)" +__version__ = '1.0' +__revision__ = "$Revision: 37 $" +__date__ = '$Date: 2006-12-08 14:30:29 -0500 (Fri, 08 Dec 2006) $' + +import os + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('maskedarray',parent_package,top_path) + config.add_data_dir('tests') + return config + +if __name__ == "__main__": + from numpy.distutils.core import setup + #setup.update(nmasetup) + config = configuration(top_path='').todict() + setup(**config) Added: trunk/Lib/sandbox/maskedarray/testutils.py =================================================================== --- trunk/Lib/sandbox/maskedarray/testutils.py 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/testutils.py 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,182 @@ +"""Miscellaneous functions for testing masked arrays and subclasses + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: testutils.py 14 2006-12-04 19:31:13Z pierregm $ +""" +__author__ = "Pierre GF Gerard-Marchant ($Author: pierregm $)" +__version__ = "1.0" +__revision__ = "$Revision: 14 $" +__date__ = "$Date: 2006-12-04 14:31:13 -0500 (Mon, 04 Dec 2006) $" + + +import numpy as N +from numpy.core.numerictypes import float_ +import numpy.core.umath as umath +from numpy.testing import NumpyTest, NumpyTestCase +from numpy.testing.utils import build_err_msg, rand + +import core +reload(core) +from core import mask_or, getmask, getmaskarray, masked_array, nomask +from core import filled, equal, less + +#------------------------------------------------------------------------------ +def approx (a, b, fill_value=1, rtol=1.e-5, atol=1.e-8): + """Returns true if all components of a and b are equal subject to given tolerances. + If fill_value is 1, masked values considered equal. + If fill_value is 0, masked values considered unequal. + The relative error rtol should be positive and << 1.0 + The absolute error atol comes into play for those elements of b that are + very small or zero; it says how small a must be also. + """ + m = mask_or(getmask(a), getmask(b)) + d1 = filled(a) + d2 = filled(b) + if d1.dtype.char == "O" or d2.dtype.char == "O": + return N.equal(d1,d2).ravel() + x = filled(masked_array(d1, copy=False, mask=m), fill_value).astype(float_) + y = filled(masked_array(d2, copy=False, mask=m), 1).astype(float_) + d = N.less_equal(umath.absolute(x-y), atol + rtol * umath.absolute(y)) + return d.ravel() +#............................ +def assert_equal(actual,desired,err_msg=''): + """Asserts that two items are equal. + """ + if isinstance(desired, dict): + assert isinstance(actual, dict), repr(type(actual)) + assert_equal(len(actual),len(desired),err_msg) + for k,i in desired.items(): + assert actual.has_key(k), repr(k) + assert_equal(actual[k], desired[k], 'key=%r\n%s' % (k,err_msg)) + return + if isinstance(desired, (list,tuple)) and isinstance(actual, (list,tuple)): + assert_equal(len(actual),len(desired),err_msg) + for k in range(len(desired)): + assert_equal(actual[k], desired[k], 'item=%r\n%s' % (k,err_msg)) + return + from numpy.core import ndarray + if isinstance(actual, ndarray) or isinstance(desired, ndarray): + return assert_array_equal(actual, desired, err_msg) + msg = build_err_msg([actual, desired], err_msg,) + assert desired == actual, msg +#............................. +def fail_if_equal(actual,desired,err_msg='',): + """Raises an assertion error if two items are equal. + """ + if isinstance(desired, dict): + assert isinstance(actual, dict), repr(type(actual)) + fail_if_equal(len(actual),len(desired),err_msg) + for k,i in desired.items(): + assert actual.has_key(k), repr(k) + fail_if_equal(actual[k], desired[k], 'key=%r\n%s' % (k,err_msg)) + return + if isinstance(desired, (list,tuple)) and isinstance(actual, (list,tuple)): + fail_if_equal(len(actual),len(desired),err_msg) + for k in range(len(desired)): + fail_if_equal(actual[k], desired[k], 'item=%r\n%s' % (k,err_msg)) + return + if isinstance(actual, N.ndarray) or isinstance(desired, N.ndarray): + return fail_if_array_equal(actual, desired, err_msg) + msg = build_err_msg([actual, desired], err_msg) + assert desired != actual, msg +assert_not_equal = fail_if_equal +#............................ +def assert_almost_equal(actual,desired,decimal=7,err_msg=''): + """Asserts that two items are almost equal. + The test is equivalent to abs(desired-actual) < 0.5 * 10**(-decimal) + """ + if isinstance(actual, N.ndarray) or isinstance(desired, N.ndarray): + return assert_array_almost_equal(actual, desired, decimal, err_msg) + msg = build_err_msg([actual, desired], err_msg) + assert round(abs(desired - actual),decimal) == 0, msg +#............................ +def assert_array_compare(comparison, x, y, err_msg='', header='', + fill_value=True): + """Asserts that a comparison relation between two masked arrays is satisfied + elementwise.""" + xf = filled(x) + yf = filled(y) + m = mask_or(getmask(x), getmask(y)) + + x = filled(masked_array(xf, copy=False, mask=m), fill_value) + y = filled(masked_array(yf, copy=False, mask=m), fill_value) + if (x.dtype.char != "O"): + x = x.astype(float_) + if isinstance(x, N.ndarray) and x.size > 1: + x[N.isnan(x)] = 0 + elif N.isnan(x): + x = 0 + if (y.dtype.char != "O"): + y = y.astype(float_) + if isinstance(y, N.ndarray) and y.size > 1: + y[N.isnan(y)] = 0 + elif N.isnan(y): + y = 0 + try: + cond = (x.shape==() or y.shape==()) or x.shape == y.shape + if not cond: + msg = build_err_msg([x, y], + err_msg + + '\n(shapes %s, %s mismatch)' % (x.shape, + y.shape), + header=header, + names=('x', 'y')) + assert cond, msg + val = comparison(x,y) + if m is not nomask and fill_value: + val = masked_array(val, mask=m, copy=False) + if isinstance(val, bool): + cond = val + reduced = [0] + else: + reduced = val.ravel() + cond = reduced.all() + reduced = reduced.tolist() + if not cond: + match = 100-100.0*reduced.count(1)/len(reduced) + msg = build_err_msg([x, y], + err_msg + + '\n(mismatch %s%%)' % (match,), + header=header, + names=('x', 'y')) + assert cond, msg + except ValueError: + msg = build_err_msg([x, y], err_msg, header=header, names=('x', 'y')) + raise ValueError(msg) +#............................ +def assert_array_equal(x, y, err_msg=''): + """Checks the elementwise equality of two masked arrays.""" + assert_array_compare(equal, x, y, err_msg=err_msg, + header='Arrays are not equal') +##............................ +def fail_if_array_equal(x, y, err_msg=''): + """Raises an assertion error if two masked arrays are not equal + (elem by elem.)""" + def compare(x,y): + + return (not N.alltrue(approx(x, y))) + assert_array_compare(compare, x, y, err_msg=err_msg, + header='Arrays are not equal') +#............................ +def assert_array_almost_equal(x, y, decimal=6, err_msg=''): + """Checks the elementwise equality of two masked arrays, up to a given + number of decimals.""" + def compare(x, y): + return approx(x,y) + assert_array_compare(compare, x, y, err_msg=err_msg, + header='Arrays are not almost equal') +#............................ +def assert_array_less(x, y, err_msg=''): + assert_array_compare(less, x, y, err_msg=err_msg, + header='Arrays are not less-ordered') +#............................ +assert_close = assert_almost_equal +#............................ +def assert_mask_equal(m1, m2): + """Asserts the equality of two masks.""" + if m1 is nomask: + assert(m2 is nomask) + if m2 is nomask: + assert(m1 is nomask) + assert_array_equal(m1, m2) \ No newline at end of file Added: trunk/Lib/sandbox/maskedarray/version.py =================================================================== --- trunk/Lib/sandbox/maskedarray/version.py 2006-12-11 15:14:31 UTC (rev 2388) +++ trunk/Lib/sandbox/maskedarray/version.py 2006-12-11 18:00:04 UTC (rev 2389) @@ -0,0 +1,11 @@ +"""Version number""" + +version = '1.00' +release = False + +if not release: + import core + import extras + revision = [core.__revision__.split(':')[-1][:-1].strip(), + extras.__revision__.split(':')[-1][:-1].strip(),] + version += '.dev%04i' % max([int(rev) for rev in revision]) \ No newline at end of file From scipy-svn at scipy.org Mon Dec 11 13:29:11 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 12:29:11 -0600 (CST) Subject: [Scipy-svn] r2390 - trunk/Lib/sandbox/timeseries Message-ID: <20061211182911.584A239C1FE@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 12:29:08 -0600 (Mon, 11 Dec 2006) New Revision: 2390 Removed: trunk/Lib/sandbox/timeseries/cseries.pyd Log: Removed file/folder Deleted: trunk/Lib/sandbox/timeseries/cseries.pyd =================================================================== (Binary files differ) From scipy-svn at scipy.org Mon Dec 11 13:31:21 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 12:31:21 -0600 (CST) Subject: [Scipy-svn] r2391 - trunk/Lib/sandbox/timeseries Message-ID: <20061211183121.BBF1D39C1FE@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 12:31:14 -0600 (Mon, 11 Dec 2006) New Revision: 2391 Modified: trunk/Lib/sandbox/timeseries/readme.txt Log: Modified: trunk/Lib/sandbox/timeseries/readme.txt =================================================================== --- trunk/Lib/sandbox/timeseries/readme.txt 2006-12-11 18:29:08 UTC (rev 2390) +++ trunk/Lib/sandbox/timeseries/readme.txt 2006-12-11 18:31:14 UTC (rev 2391) @@ -1,11 +1,10 @@ Requirements and warnings: -1. version 2.0.x of the mx DateTime module MUST be installed. Only "tested" with 2.0.3 +1. version 2.0.x of the mx DateTime module MUST be installed. Only "tested" with 2.0.3. The cseries + code requires access to a couple of the header files included with this module when compiling 2. Only tested with numpy 1.0.1 3. Only tested with Python 2.4.x 4. Only tested on Windows Platform -5. the included cseries.pyd file was compiled for 32-bit windows, so if you are trying - this on another platform, the first thing you need to do is recompile it Instructions: From scipy-svn at scipy.org Mon Dec 11 13:54:32 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 12:54:32 -0600 (CST) Subject: [Scipy-svn] r2392 - trunk/Lib/sandbox/timeseries Message-ID: <20061211185432.D788839C1C0@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 12:54:29 -0600 (Mon, 11 Dec 2006) New Revision: 2392 Modified: trunk/Lib/sandbox/timeseries/cseries.c Log: Modified: trunk/Lib/sandbox/timeseries/cseries.c =================================================================== --- trunk/Lib/sandbox/timeseries/cseries.c 2006-12-11 18:31:14 UTC (rev 2391) +++ trunk/Lib/sandbox/timeseries/cseries.c 2006-12-11 18:54:29 UTC (rev 2392) @@ -1,5 +1,4 @@ #include -//#include #include #include #include @@ -11,6 +10,21 @@ /////////////////////////////////////////////////////////////////////// +static //PyArrayObject * +setArrayItem(PyArrayObject **theArray, long index, PyObject *newVal) +{ + char *setptr; + + if (index >= 0) + { + //set value in array + setptr = (*theArray)->data + (index) * (*theArray)->strides[0]; + PyArray_SETITEM(*theArray,setptr,newVal); + } + + //return theArray; +} + static int freqVal(char freq) { @@ -111,6 +125,76 @@ } +static long +getDateInfo_sub(long dateNum, char freq, char info) { + + long monthNum; + mxDateTimeObject *convDate; + convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(toDaily(dateNum,freq),0); + + switch(info) + { + case 'Y': //year + + return (long)(convDate->year); + + case 'Q': //quarter + monthNum = (long)(convDate->month); + return ((monthNum-1)/3)+1; + + case 'M': //month + return (long)(convDate->month); + + case 'D': //day + return (long)(convDate->day); + + case 'W': //day of week + return (long)(convDate->day_of_week); + default: + return -1; + } +} + + +static char cseries_getDateInfo_doc[] = ""; +static PyObject * +cseries_getDateInfo(PyObject *self, PyObject *args) +{ + char *freq; + char *info; + + PyArrayObject *array; + PyArrayObject *tempArray; + PyArrayObject *newArray; + + char *getptr; + PyObject *val; + long i, lngVal, dInfo, dim; + + if (!PyArg_ParseTuple(args, "Oss:getDateInfo(array, freq, info)", &tempArray, &freq, &info)) return NULL; + + array = PyArray_GETCONTIGUOUS(tempArray); + + dim = array->dimensions[0]; + + //initialize new array + newArray = (PyArrayObject*)PyArray_SimpleNew(array->nd, &dim, array->descr->type_num); + + for (i = 0; i < array->dimensions[0]; i++) + { + getptr = array->data + i*array->strides[0]; + val = PyArray_GETITEM(array, getptr); + lngVal = PyInt_AsLong(val); + dInfo = getDateInfo_sub(lngVal, *freq, *info); + + setArrayItem(&newArray, i, PyInt_FromLong(dInfo)); + } + + return (PyObject *) newArray; + +} + + //fromDate is periods since Dec 31, 1849 static long convert(long fromDate, char fromFreq, char toFreq, int notStartInd) @@ -319,22 +403,6 @@ } -static //PyArrayObject * -setArrayItem(PyArrayObject **theArray, long index, PyObject *newVal) -{ - char *setptr; - - if (index >= 0) - { - //set value in array - setptr = (*theArray)->data + (index) * (*theArray)->strides[0]; - PyArray_SETITEM(*theArray,setptr,newVal); - } - - //return theArray; -} - - static char cseries_reindex_doc[] = ""; static PyObject * cseries_reindex(PyObject *self, PyObject *args) @@ -533,6 +601,7 @@ static PyMethodDef cseries_methods[] = { {"reindex", cseries_reindex, METH_VARARGS, cseries_reindex_doc}, {"convert", cseries_convert, METH_VARARGS, cseries_convert_doc}, + {"getDateInfo", cseries_getDateInfo, METH_VARARGS, cseries_getDateInfo_doc}, {NULL, NULL} }; From scipy-svn at scipy.org Mon Dec 11 13:55:51 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 12:55:51 -0600 (CST) Subject: [Scipy-svn] r2393 - trunk/Lib/sandbox/timeseries Message-ID: <20061211185551.D684E39C1C0@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 12:55:48 -0600 (Mon, 11 Dec 2006) New Revision: 2393 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-11 18:54:29 UTC (rev 2392) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-11 18:55:48 UTC (rev 2393) @@ -77,6 +77,20 @@ return "" + def firstValue(self, asDate=False): + val = super(TimeSeries, self).firstValue() + if asDate: + return tsdate.Date(freq=self.freq, val=val) + else: + return val + + def lastValue(self, asDate=False): + val = super(TimeSeries, self).lastValue() + if asDate: + return tsdate.Date(freq=self.freq, val=val) + else: + return val + ### DATA def __add__(self, other): @@ -178,6 +192,30 @@ if start.freq != end.freq: raise ValueError("start and end dates must have same frequency!") return TimeSeries(numpy.arange(int(start), int(end)+1), dtype=corelib.freqTypeMapping[start.freq], freq=start.freq, observed='END', startIndex=int(start)) + +def year(dateSer): + return __getDateInfo(dateSer,'Y') + +def quarter(dateSer): + return __getDateInfo(dateSer,'Q') + +def month(dateSer): + return __getDateInfo(dateSer,'M') + +def day(dateSer): + return __getDateInfo(dateSer,'D') + +def day_of_week(dateSer): + return __getDateInfo(dateSer,'W') + +def __getDateInfo(dateSer,infoCode): + newData = ma.array(cseries.getDateInfo(dateSer.data.filled(), dateSer.dtype.freq, infoCode)) + newData[dateSer.data.mask] = ma.masked + newSer = copy.deepcopy(dateSer) + newSer.data = newData + newSer.dtype = numpy.int_ + return newSer + def validOpInputs(ser1, ser2): if isinstance(ser1, TimeSeries) and isinstance(ser2, TimeSeries) and ser1.freq != ser2.freq: From scipy-svn at scipy.org Mon Dec 11 14:00:06 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 13:00:06 -0600 (CST) Subject: [Scipy-svn] r2394 - trunk/Lib/sandbox/timeseries/examples Message-ID: <20061211190006.56F6F39C211@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 13:00:02 -0600 (Mon, 11 Dec 2006) New Revision: 2394 Modified: trunk/Lib/sandbox/timeseries/examples/example.py Log: Modified: trunk/Lib/sandbox/timeseries/examples/example.py =================================================================== --- trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-11 18:55:48 UTC (rev 2393) +++ trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-11 19:00:02 UTC (rev 2394) @@ -15,6 +15,13 @@ """ +Set values occurring on Fridays to 100. +""" +weekdays = ts.day_of_week(ts.tser(bSer.firstValue(asDate=True),bSer.lastValue(asDate=True))) +bSer[ts.where(weekdays == 4,True,False)] = 100 + + +""" Convert bSer to a monthly frequency series. The optional observed argument to the convert method specifies what From scipy-svn at scipy.org Mon Dec 11 14:03:14 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Dec 2006 13:03:14 -0600 (CST) Subject: [Scipy-svn] r2395 - trunk/Lib/sandbox/timeseries/doc Message-ID: <20061211190314.7A90039C1C0@new.scipy.org> Author: mattknox_ca Date: 2006-12-11 13:03:11 -0600 (Mon, 11 Dec 2006) New Revision: 2395 Modified: trunk/Lib/sandbox/timeseries/doc/todo.txt Log: Modified: trunk/Lib/sandbox/timeseries/doc/todo.txt =================================================================== --- trunk/Lib/sandbox/timeseries/doc/todo.txt 2006-12-11 19:00:02 UTC (rev 2394) +++ trunk/Lib/sandbox/timeseries/doc/todo.txt 2006-12-11 19:03:11 UTC (rev 2395) @@ -7,15 +7,6 @@ - make the various Date data types actual numpy data types instead of an ugly hack (this is probably beyond my ability) - - add month(), year(), day_of_week(), etc... functions that take a - TimeSeries object with dtype as one of the Date types and returns - a TimeSeries object of integer type with the relevant info. - This would make it easy to do things like retrieving all the values - that occur on a Friday for a particular series, for example. - - I could write a brute force method for this right now fairly easily, - but it would be very slow. - - add support for a few more frequency conversions that are missing in the dateOf function in tsdate.py From scipy-svn at scipy.org Tue Dec 12 02:24:11 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 12 Dec 2006 01:24:11 -0600 (CST) Subject: [Scipy-svn] r2396 - trunk/Lib/maxentropy Message-ID: <20061212072411.C0E5A39C259@new.scipy.org> Author: edschofield Date: 2006-12-12 01:24:01 -0600 (Tue, 12 Dec 2006) New Revision: 2396 Modified: trunk/Lib/maxentropy/maxentropy.py Log: Small documentation improvements for maxentropy module Modified: trunk/Lib/maxentropy/maxentropy.py =================================================================== --- trunk/Lib/maxentropy/maxentropy.py 2006-12-11 19:03:11 UTC (rev 2395) +++ trunk/Lib/maxentropy/maxentropy.py 2006-12-12 07:24:01 UTC (rev 2396) @@ -293,13 +293,15 @@ actually computing p_theta. This is important if the sample space is continuous or innumerable in practice. We approximate the norm constant Z using importance sampling as in - [Rosenfeld02whole]. Note that the gradient of this estimator is - equal to the importance sampling *ratio estimator* of the - gradient of the entropy dual [see my paper, ICSLP 2004], so using - this estimator in conjunction with grad() in gradient-based - optimization methods should be stable. Also note that this - estimator is deterministic for any given sample. - + [Rosenfeld01whole]. This estimator is deterministic for any + given sample. Note that the gradient of this estimator is equal + to the importance sampling *ratio estimator* of the gradient of + the entropy dual [see my thesis], justifying the use of this + estimator in conjunction with grad() in optimization methods that + use both the function and gradient. Note, however, that + convergence guarantees break down for most optimization + algorithms in the presence of stochastic error. + Note that, for 'bigmodel' objects, the dual estimate is deterministic for any given sample. It is given as: @@ -387,8 +389,7 @@ # Do we perform a test on external sample(s) every iteration? # Only relevant to bigmodel objects if hasattr(self, 'testevery') and self.testevery > 0: - M = self.testevery - if (self.iters + 1) % M != 0: + if (self.iters + 1) % self.testevery != 0: if self.verbose: print "Skipping test on external sample(s) ..." else: @@ -645,10 +646,10 @@ def setfeaturesandsamplespace(self, f, samplespace): - """Creates a new exponential model, where f is a list of feature - functions f_i mapping the sample space to real values. The - parameter vector params is initialized to the zero vector, of the - same length as the list of feature functions f_i. + """Creates a new matrix self.F of features f of all points in the + sample space. f is a list of feature functions f_i mapping the + sample space to real values. The parameter vector self.params is + initialized to zero. We also compute f(x) for each x in the sample space and store them as self.F. This uses lots of memory but is much faster. @@ -691,7 +692,7 @@ """ # For discrete models, use the representation E_p[f(X)] = p . F if not hasattr(self, 'F'): - raise AttributeError, "first create a feature matrix F" + raise AttributeError, "first set the feature matrix F" # A pre-computed matrix of features exists p = self.pmf() @@ -851,11 +852,11 @@ # N[c, x] += 1 # This would be a nicer input format, but computations are more - # efficient internally with one long row vector. What we really need - # is for sparse # matrices to offer a .reshape method so this - # conversion could be done internally and transparently. Then the - # numcontexts argument to the conditionalmodel constructor could also - # be inferred from the matrix dimensions. + # efficient internally with one long row vector. What we really need is + # for sparse matrices to offer a .reshape method so this conversion + # could be done internally and transparently. Then the numcontexts + # argument to the conditionalmodel constructor could also be inferred + # from the matrix dimensions. super(conditionalmodel, self).__init__() self.F = F @@ -1325,7 +1326,7 @@ {sum_j p_dot(s_j)/aux_dist(s_j) f_i(s_j) } / {sum_j p_dot(s_j) / aux_dist(s_j)} - Compute the estimator E_p f_i(X) using logs as: + Compute the estimator E_p f_i(X) in log space as: num_i / denom, where num_i = exp(logsumexp(theta.f(s_j) - log aux_dist(s_j) @@ -1373,9 +1374,8 @@ # We don't need to handle negative values separately, # because we don't need to take the log of the feature - # matrix sampleF. + # matrix sampleF. See my thesis, Section 4.4 - #logwminuslogZ = self._logw() - logZs[-1] logwminuslogZ = logw - logZ if self.external is None: averages = innerprod(self.sampleF, arrayexp(logwminuslogZ)) @@ -1385,8 +1385,8 @@ averages /= n mus.append(averages) - # Now we have T=trials vectors of the sample means. - # If trials > 1, estimate st dev of means and confidence intervals + # Now we have T=trials vectors of the sample means. If trials > 1, + # estimate st dev of means and confidence intervals ttrials = len(mus) # total number of trials performed if ttrials == 1: self.mu = mus[0] From scipy-svn at scipy.org Tue Dec 12 03:22:03 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 12 Dec 2006 02:22:03 -0600 (CST) Subject: [Scipy-svn] r2397 - in trunk/Lib/sandbox/cdavid: . tests Message-ID: <20061212082203.A038A39C22C@new.scipy.org> Author: cdavid Date: 2006-12-12 02:21:54 -0600 (Tue, 12 Dec 2006) New Revision: 2397 Added: trunk/Lib/sandbox/cdavid/segmentaxis.py trunk/Lib/sandbox/cdavid/tests/test_segmentaxis.py Modified: trunk/Lib/sandbox/cdavid/Changelog trunk/Lib/sandbox/cdavid/TODO trunk/Lib/sandbox/cdavid/__init__.py trunk/Lib/sandbox/cdavid/autocorr.py trunk/Lib/sandbox/cdavid/tests/test_autocorr.py Log: Add fft autocorr + code from A.M Archibald for equivalent to buffer Modified: trunk/Lib/sandbox/cdavid/Changelog =================================================================== --- trunk/Lib/sandbox/cdavid/Changelog 2006-12-12 07:24:01 UTC (rev 2396) +++ trunk/Lib/sandbox/cdavid/Changelog 2006-12-12 08:21:54 UTC (rev 2397) @@ -1,5 +1,14 @@ -pyem (0.1) Tue, 28 Nov 2006 16:56:35 +0900 +cdavid (0.2) Tue, 12 Dec 2006 17:14:18 +0900 + * second release + * add autocorrelation based on fft (should check def for + complex arrays, eg use conjugate or not ?) + * add code segment_axis from A.M Archibald + +-- David Cournapeau + +cdavid (0.1) Tue, 28 Nov 2006 16:56:35 +0900 + * first release -- David Cournapeau Modified: trunk/Lib/sandbox/cdavid/TODO =================================================================== --- trunk/Lib/sandbox/cdavid/TODO 2006-12-12 07:24:01 UTC (rev 2396) +++ trunk/Lib/sandbox/cdavid/TODO 2006-12-12 08:21:54 UTC (rev 2397) @@ -1,6 +1,9 @@ -# Last Change: Tue Nov 28 05:00 PM 2006 J - +# Last Change: Tue Dec 12 05:00 PM 2006 J +Various things to do before submitting outside sandbox - there is no doc. - the handling of non contiguous arrays is not really elegant, and the code is difficult to maintain - rank > 2: must code in C ? (yuk) + - for correlation: no reason to support only autocorrelation. Also, it is +stupid to offer difference function for different implementation: should be an +argument (fft vs no fft). Basically, the current API is not good. Modified: trunk/Lib/sandbox/cdavid/__init__.py =================================================================== --- trunk/Lib/sandbox/cdavid/__init__.py 2006-12-12 07:24:01 UTC (rev 2396) +++ trunk/Lib/sandbox/cdavid/__init__.py 2006-12-12 08:21:54 UTC (rev 2397) @@ -1,8 +1,9 @@ -# Last Change: Tue Nov 28 04:00 PM 2006 J +# Last Change: Tue Dec 12 05:00 PM 2006 J from info import __doc__ from lpc import lpc2 as lpc -from autocorr import autocorr_oneside_nofft as autocorr +from autocorr import autocorr_oneside_nofft, autocorr_fft +from segmentaxis import segment_axis from numpy.testing import NumpyTest test = NumpyTest().test Modified: trunk/Lib/sandbox/cdavid/autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/autocorr.py 2006-12-12 07:24:01 UTC (rev 2396) +++ trunk/Lib/sandbox/cdavid/autocorr.py 2006-12-12 08:21:54 UTC (rev 2397) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Nov 28 03:00 PM 2006 J +# Last Change: Tue Dec 12 05:00 PM 2006 J # TODO: - proper test # TODO: - proper profiling @@ -279,6 +279,40 @@ return res +def nextpow2(n): + """Returns p such as 2 ** p >= n """ + if 2 ** N.log2(n) == n: + return int(N.log2(n)) + else: + return int(N.log2(n) + 1) + +def autocorr_fft(signal, axis = -1): + """Return full autocorrelation along specified axis. Use fft + for computation.""" + if N.ndim(signal) == 0: + return signal + elif signal.ndim == 1: + n = signal.shape[0] + nfft = int(2 ** nextpow2(2 * n - 1)) + lag = n - 1 + a = fft(signal, n = nfft, axis = -1) + au = ifft(a * N.conj(a), n = nfft, axis = -1) + return N.require(N.concatenate((au[-lag:], au[:lag+1])), dtype = signal.dtype) + elif signal.ndim == 2: + n = signal.shape[axis] + lag = n - 1 + nfft = int(2 ** nextpow2(2 * n - 1)) + a = fft(signal, n = nfft, axis = axis) + au = ifft(a * N.conj(a), n = nfft, axis = axis) + if axis == 0: + return N.require(N.concatenate( (au[-lag:], au[:lag+1]), axis = axis), \ + dtype = signal.dtype) + else: + return N.require(N.concatenate( (au[:, -lag:], au[:, :lag+1]), + axis = axis), dtype = signal.dtype) + else: + raise RuntimeError("rank >2 not supported yet") + def bench(): size = 256 nframes = 4000 Added: trunk/Lib/sandbox/cdavid/segmentaxis.py =================================================================== --- trunk/Lib/sandbox/cdavid/segmentaxis.py 2006-12-12 07:24:01 UTC (rev 2396) +++ trunk/Lib/sandbox/cdavid/segmentaxis.py 2006-12-12 08:21:54 UTC (rev 2397) @@ -0,0 +1,93 @@ +import numpy as N +import unittest +from numpy.testing import NumpyTestCase, assert_array_almost_equal, assert_almost_equal, assert_equal +import warnings + +def segment_axis(a, length, overlap=0, axis=None, end='cut', endvalue=0): + """Generate a new array that chops the given array along the given axis into overlapping frames. + + example: + >>> segment_axis(arange(10), 4, 2) + array([[0, 1, 2, 3], + [2, 3, 4, 5], + [4, 5, 6, 7], + [6, 7, 8, 9]]) + + arguments: + a The array to segment + length The length of each frame + overlap The number of array elements by which the frames should overlap + axis The axis to operate on; if None, act on the flattened array + end What to do with the last frame, if the array is not evenly + divisible into pieces. Options are: + + 'cut' Simply discard the extra values + 'wrap' Copy values from the beginning of the array + 'pad' Pad with a constant value + + endvalue The value to use for end='pad' + + The array is not copied unless necessary (either because it is + unevenly strided and being flattened or because end is set to + 'pad' or 'wrap'). + """ + + if axis is None: + a = N.ravel(a) # may copy + axis = 0 + + l = a.shape[axis] + + if overlap>=length: + raise ValueError, "frames cannot overlap by more than 100%" + if overlap<0 or length<=0: + raise ValueError, "overlap must be nonnegative and length must be positive" + + if llength: + roundup = length + (1+(l-length)//(length-overlap))*(length-overlap) + rounddown = length + ((l-length)//(length-overlap))*(length-overlap) + else: + roundup = length + rounddown = 0 + assert rounddown=length + assert (l-length)%(length-overlap) == 0 + n = 1+(l-length)//(length-overlap) + s = a.strides[axis] + newshape = a.shape[:axis]+(n,length)+a.shape[axis+1:] + newstrides = a.strides[:axis]+((length-overlap)*s,s) + a.strides[axis+1:] + + try: + return N.ndarray.__new__(N.ndarray,strides=newstrides,shape=newshape,buffer=a,dtype=a.dtype) + except TypeError: + warnings.warn("Problem with ndarray creation forces copy.") + a = a.copy() + # Shape doesn't change but strides does + newstrides = a.strides[:axis]+((length-overlap)*s,s) + a.strides[axis+1:] + return N.ndarray.__new__(N.ndarray,strides=newstrides,shape=newshape,buffer=a,dtype=a.dtype) + + + Modified: trunk/Lib/sandbox/cdavid/tests/test_autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-12-12 07:24:01 UTC (rev 2396) +++ trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-12-12 08:21:54 UTC (rev 2397) @@ -1,9 +1,10 @@ #! /usr/bin/env python -# Last Change: Tue Nov 28 05:00 PM 2006 J +# Last Change: Tue Dec 12 05:00 PM 2006 J from numpy.testing import * from numpy.random import randn, seed -from numpy import correlate, array, concatenate, require +from numpy import correlate, array, concatenate, require, corrcoef +from numpy.fft import fft, ifft from numpy.ctypeslib import ndpointer, load_library from ctypes import c_uint @@ -11,6 +12,7 @@ set_package_path() from cdavid.autocorr import _raw_autocorr_1d, _raw_autocorr_1d_noncontiguous from cdavid.autocorr import autocorr_oneside_nofft as autocorr +from cdavid.autocorr import autocorr_fft from cdavid.autocorr import _autocorr_oneside_nofft_py as autocorr_py restore_path() @@ -289,6 +291,54 @@ yr = autocorr_py(xt, lag, axis = axis) assert_array_equal(yt, yr) +class test_autocorr_fft(NumpyTestCase): + n = 5 + d = 3 + def check_r1r(self): + """real case, rank 1""" + a = randn(self.n) + + aref = correlate(a, a, mode = 'full') + atest = autocorr_fft(a) + assert_array_almost_equal(atest, aref, decimal = md) + assert atest.dtype == a.dtype + + def check_r1c(self): + """complex case, rank 1""" + a = randn(self.n) + 1.0j * randn(self.n) + + atest = autocorr_fft(a) + aref = numpy.sum(a * numpy.conj(a)) + assert_array_almost_equal(atest[self.n - 1], aref, decimal = md) + assert atest.dtype == a.dtype + + def check_r2c(self): + """complex case, rank 2""" + pass + + def check_r2r(self): + """real case, rank 2""" + + # axis 0 + a = randn(self.n, self.d) + axis = 0 + + c = [correlate(a[:, i], a[:, i], mode = 'full') for i in range(self.d)] + aref = array(c).T + + atest = autocorr_fft(a, axis = axis) + assert_array_almost_equal(atest, aref, decimal = md) + + # axis 1 + a = randn(self.n, self.d) + axis = 1 + + c = [correlate(a[i], a[i], mode = 'full') for i in range(self.n)] + aref = array(c) + + atest = autocorr_fft(a, axis = axis) + assert_array_almost_equal(atest, aref, decimal = md) + if __name__ == "__main__": ScipyTest().run() Added: trunk/Lib/sandbox/cdavid/tests/test_segmentaxis.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_segmentaxis.py 2006-12-12 07:24:01 UTC (rev 2396) +++ trunk/Lib/sandbox/cdavid/tests/test_segmentaxis.py 2006-12-12 08:21:54 UTC (rev 2397) @@ -0,0 +1,64 @@ +#! /usr/bin/env python +# Last Change: Fri Nov 24 04:00 PM 2006 J + +from numpy.testing import * + +import numpy as N + +set_package_path() +from segmentaxis import segment_axis +restore_path() + +# #Optional: +# set_local_path() +# # import modules that are located in the same directory as this file. +# restore_path() + +class test_segment(NumpyTestCase): + def check_simple(self): + assert_equal(segment_axis(N.arange(6),length=3,overlap=0), + N.array([[0,1,2],[3,4,5]])) + + assert_equal(segment_axis(N.arange(7),length=3,overlap=1), + N.array([[0,1,2],[2,3,4],[4,5,6]])) + + assert_equal(segment_axis(N.arange(7),length=3,overlap=2), + N.array([[0,1,2],[1,2,3],[2,3,4],[3,4,5],[4,5,6]])) + + def check_error_checking(self): + self.assertRaises(ValueError, + lambda: segment_axis(N.arange(7),length=3,overlap=-1)) + self.assertRaises(ValueError, + lambda: segment_axis(N.arange(7),length=0,overlap=0)) + self.assertRaises(ValueError, + lambda: segment_axis(N.arange(7),length=3,overlap=3)) + self.assertRaises(ValueError, + lambda: segment_axis(N.arange(7),length=8,overlap=3)) + + def check_ending(self): + assert_equal(segment_axis(N.arange(6),length=3,overlap=1,end='cut'), + N.array([[0,1,2],[2,3,4]])) + assert_equal(segment_axis(N.arange(6),length=3,overlap=1,end='wrap'), + N.array([[0,1,2],[2,3,4],[4,5,0]])) + assert_equal(segment_axis(N.arange(6),length=3,overlap=1,end='pad',endvalue=-17), + N.array([[0,1,2],[2,3,4],[4,5,-17]])) + + def check_multidimensional(self): + + assert_equal(segment_axis(N.ones((2,3,4,5,6)),axis=3,length=3,overlap=1).shape, + (2,3,4,2,3,6)) + + assert_equal(segment_axis(N.ones((2,5,4,3,6)).swapaxes(1,3),axis=3,length=3,overlap=1).shape, + (2,3,4,2,3,6)) + + assert_equal(segment_axis(N.ones((2,3,4,5,6)),axis=2,length=3,overlap=1,end='cut').shape, + (2,3,1,3,5,6)) + + assert_equal(segment_axis(N.ones((2,3,4,5,6)),axis=2,length=3,overlap=1,end='wrap').shape, + (2,3,2,3,5,6)) + + assert_equal(segment_axis(N.ones((2,3,4,5,6)),axis=2,length=3,overlap=1,end='pad').shape, + (2,3,2,3,5,6)) + +if __name__=='__main__': + NumpyTest().run() From scipy-svn at scipy.org Tue Dec 12 07:31:49 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 12 Dec 2006 06:31:49 -0600 (CST) Subject: [Scipy-svn] r2398 - trunk/Lib/maxentropy Message-ID: <20061212123149.DFA4A39C26F@new.scipy.org> Author: edschofield Date: 2006-12-12 06:31:43 -0600 (Tue, 12 Dec 2006) New Revision: 2398 Modified: trunk/Lib/maxentropy/maxentropy.py Log: Small cleanups to maxentropy.py Modified: trunk/Lib/maxentropy/maxentropy.py =================================================================== --- trunk/Lib/maxentropy/maxentropy.py 2006-12-12 08:21:54 UTC (rev 2397) +++ trunk/Lib/maxentropy/maxentropy.py 2006-12-12 12:31:43 UTC (rev 2398) @@ -554,6 +554,7 @@ self.fnevals = 0 self.gradevals = 0 + self.iters = 0 self.callingback = False # Clear the stored duals and gradient norms @@ -1401,12 +1402,10 @@ # -log(n-1) + logsumexp(2*log|Z_k - meanZ|) self.logZapprox = logsumexp(logZs) - math.log(ttrials) - self.logZsapprox = logZs - #logstdevZ = 0.5*(-math.log(n-1) + logsumexp([2.*logdiffexp(logZ_k, self.logZapprox) for logZ_k in logZs])) stdevlogZ = numpy.array(logZs).std() - Etemp = numpy.array(mus) - self.varE = columnvariances(Etemp) - self.mu = columnmeans(Etemp) + mus = numpy.array(mus) + self.varE = columnvariances(mus) + self.mu = columnmeans(mus) return From scipy-svn at scipy.org Tue Dec 12 08:13:59 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 12 Dec 2006 07:13:59 -0600 (CST) Subject: [Scipy-svn] r2399 - in trunk/Lib/sandbox/cdavid: . tests Message-ID: <20061212131359.6187D39C27F@new.scipy.org> Author: cdavid Date: 2006-12-12 07:13:53 -0600 (Tue, 12 Dec 2006) New Revision: 2399 Modified: trunk/Lib/sandbox/cdavid/Changelog trunk/Lib/sandbox/cdavid/autocorr.py trunk/Lib/sandbox/cdavid/info.py trunk/Lib/sandbox/cdavid/tests/test_autocorr.py Log: Correct bug in nextpow2 Modified: trunk/Lib/sandbox/cdavid/Changelog =================================================================== --- trunk/Lib/sandbox/cdavid/Changelog 2006-12-12 12:31:43 UTC (rev 2398) +++ trunk/Lib/sandbox/cdavid/Changelog 2006-12-12 13:13:53 UTC (rev 2399) @@ -1,3 +1,10 @@ +cdavid (0.2.1) Tue, 12 Dec 2006 17:14:18 +0900 + + * Correct bug in nextpow2 which causes circular + autocorrelation problem + +-- David Cournapeau + cdavid (0.2) Tue, 12 Dec 2006 17:14:18 +0900 * second release Modified: trunk/Lib/sandbox/cdavid/autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/autocorr.py 2006-12-12 12:31:43 UTC (rev 2398) +++ trunk/Lib/sandbox/cdavid/autocorr.py 2006-12-12 13:13:53 UTC (rev 2399) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Dec 12 05:00 PM 2006 J +# Last Change: Tue Dec 12 07:00 PM 2006 J # TODO: - proper test # TODO: - proper profiling @@ -282,9 +282,9 @@ def nextpow2(n): """Returns p such as 2 ** p >= n """ if 2 ** N.log2(n) == n: - return int(N.log2(n)) + return N.floor(N.log2(n)) + 1 else: - return int(N.log2(n) + 1) + return N.floor(N.log2(n)) + 2 def autocorr_fft(signal, axis = -1): """Return full autocorrelation along specified axis. Use fft Modified: trunk/Lib/sandbox/cdavid/info.py =================================================================== --- trunk/Lib/sandbox/cdavid/info.py 2006-12-12 12:31:43 UTC (rev 2398) +++ trunk/Lib/sandbox/cdavid/info.py 2006-12-12 13:13:53 UTC (rev 2399) @@ -4,7 +4,7 @@ Copyright: David Cournapeau 2006 License: BSD-style (see LICENSE.txt in main source directory) """ -version = '0.1' +version = '0.2.1' depends = ['linalg'] ignore = False Modified: trunk/Lib/sandbox/cdavid/tests/test_autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-12-12 12:31:43 UTC (rev 2398) +++ trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-12-12 13:13:53 UTC (rev 2399) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Dec 12 05:00 PM 2006 J +# Last Change: Tue Dec 12 07:00 PM 2006 J from numpy.testing import * from numpy.random import randn, seed @@ -294,6 +294,11 @@ class test_autocorr_fft(NumpyTestCase): n = 5 d = 3 + def check_nextpow2(self): + assert(nextpow2(255) == 8) + assert(nextpow2(256) == 8) + assert(nextpow2(257) == 9) + def check_r1r(self): """real case, rank 1""" a = randn(self.n) From scipy-svn at scipy.org Tue Dec 12 14:10:21 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 12 Dec 2006 13:10:21 -0600 (CST) Subject: [Scipy-svn] r2400 - trunk/Lib/sandbox/timeseries/examples Message-ID: <20061212191021.D6E3639C2B9@new.scipy.org> Author: mattknox_ca Date: 2006-12-12 13:10:16 -0600 (Tue, 12 Dec 2006) New Revision: 2400 Modified: trunk/Lib/sandbox/timeseries/examples/example.py Log: ts.where usage was redundant in the examples Modified: trunk/Lib/sandbox/timeseries/examples/example.py =================================================================== --- trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-12 13:13:53 UTC (rev 2399) +++ trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-12 19:10:16 UTC (rev 2400) @@ -6,19 +6,13 @@ bSer = ts.TimeSeries(np.random.uniform(-100,100,600),dtype=np.float64,freq='B',observed='SUMMED',startIndex=ts.thisday('B')-600) -""" -Set negative values to zero. Note that ts.where returns a TimeSeries object. -Indexing by booleans can only be done with TimeSeries objects of the same -frequency. -""" -bSer[ts.where(bSer < 0,True,False)] = 0 +# Set negative values to zero. +bSer[bSer < 0] = 0 -""" -Set values occurring on Fridays to 100. -""" +# Set values occurring on Fridays to 100. weekdays = ts.day_of_week(ts.tser(bSer.firstValue(asDate=True),bSer.lastValue(asDate=True))) -bSer[ts.where(weekdays == 4,True,False)] = 100 +bSer[weekdays == 4] = 100 """ From scipy-svn at scipy.org Tue Dec 12 20:30:34 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 12 Dec 2006 19:30:34 -0600 (CST) Subject: [Scipy-svn] r2401 - in trunk/Lib/io: . tests Message-ID: <20061213013034.64C3C39C017@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-12-12 19:30:27 -0600 (Tue, 12 Dec 2006) New Revision: 2401 Modified: trunk/Lib/io/recaster.py trunk/Lib/io/tests/test_recaster.py Log: Cleaning up, bugfixes to recaster class Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-12-12 19:10:16 UTC (rev 2400) +++ trunk/Lib/io/recaster.py 2006-12-13 01:30:27 UTC (rev 2401) @@ -59,11 +59,6 @@ are not in the list of ATs. ''' - _sctype_trans = {'complex': 'c', 'c': 'c', - 'float': 'f', 'f': 'f', - 'int': 'i', 'i': 'i', - 'uint': 'u', 'u': 'u'} - _sctype_attributes = sctype_attributes() def __init__(self, sctype_list=None, @@ -71,6 +66,7 @@ downcast_fp_to_int = True, downcast_int_to_int = True, upcast_int_to_fp = True, + upcast_fp_to_int = True, sctype_tols=None): ''' Set types for which we are attempting to downcast @@ -85,6 +81,8 @@ smaller of same type upcast_int_to_fp - if True, tries to upcast integers that could not be downcast to floating point type + upcast_fp_to_int - if True, tries to upcast floating point arrays + that cannot be downcast, to integers sctype_tols - dictionary key datatype, values rtol, tol to specify tolerances for checking near equality in downcasting @@ -101,6 +99,7 @@ self.downcast_fp_to_int = downcast_fp_to_int self.downcast_int_to_int = downcast_int_to_int self.upcast_int_to_fp = upcast_int_to_fp + self.upcast_fp_to_int = upcast_fp_to_int # Tolerances if sctype_tols is not None: self.sctype_tols.update(sctype_tols) @@ -142,21 +141,17 @@ 'atol': F.tiny} return t_dict - def sctypes_by_size(self, sctype): + def sctypes_by_size(self, kind): ''' Returns storage size ordered list of entries of scalar type sctype Input - sctype - one of "complex" or "c", "float" or "f" , - "int" or "i", "uint" or "u" + kind - one of "c", "f", "i" or "u" + (for complex, float, integer, unsigned integer) ''' - try: - sctype = self._sctype_trans[sctype] - except KeyError: - raise TypeError, 'Did not recognize sctype %s' % sctype D = [] for t in self.sctype_list: dt = dtype(t) - if dt.kind == sctype: + if dt.kind == kind: D.append([t, dt.itemsize]) D.sort(lambda x, y: cmp(y[1], x[1])) return D @@ -212,10 +207,12 @@ break return out_t - def tols_from_sctype(self, sctype): - ''' Return rtol and atol for sctype ''' - tols = self.sctype_tols[sctype] - return tols['rtol'], tols['atol'] + def all_close(self, arr1, arr2): + ''' True if arr1 arr2 close with tols for arr1 ''' + tols = self.sctype_tols[arr1.dtype.type] + return allclose(arr1, arr2, + rtol=tols['rtol'], + atol=tols['atol']) def arr_if_valid(self, arr): ''' Returns array if of valid sctype, None otherwise ''' @@ -223,35 +220,28 @@ return None return arr - def smallest_same_kind(self, arr): + def smallest_of_kind(self, arr, kind=None, max_size=None): ''' Return arr maybe downcast to same kind, smaller storage + Inputs + arr - array to possibly downcast + kind - kind of array to downcast within + (if None (default) use arr.dtype.kind) + max_size - maximum size of sctype to return (in bytes) + (if None, set to arr.dtype.itemsize-1) If arr cannot be downcast within given tolerances, then: return arr if arr is in list of acceptable types, otherwise return None ''' dtp = arr.dtype - dti = dtp.itemsize - sctypes = self.sized_sctypes[dtp.kind] - sctypes = [t[0] for i, t in enumerate(sctypes) if t[1] < dti] - return self._smallest_from_sctypes(arr, sctypes) - - def _smallest_from_sctypes(self, arr, sctypes): - ''' Returns array recast to smallest possible type from list - - Inputs - arr - array to recast - sctypes - list of scalar types to try - - sctypes is expected to be ordered by size with largest first, - and to all be of the same type. It would not usually be - sensible to use this routine for integers (see - smallest_int_sctype method) - - Returns None if no recast is within tolerance - ''' - sct = arr.dtype.type - rtol, atol = self.tols_from_sctype(sct) + if kind is None: + kind = dtp.kind + if max_size is None: + max_size = dtp.itemsize-1 + sctypes = self.sized_sctypes[kind] + sctypes = [t[0] for i, t in enumerate(sctypes) if t[1] <= max_size] + tols = self.sctype_tols[dtp.type] + rtol, atol = tols['rtol'], tols['atol'] ret_arr = arr for T in sctypes: test_arr = arr.astype(T) @@ -279,94 +269,112 @@ sz = tsz return sct - def downcast(self, arr): - dtk = arr.dtype.kind - if dtk == 'c': - ret = self.downcast_complex(arr) - elif dtk == 'f': - ret = self.downcast_float(arr) - elif dtk in ('u', 'i'): - ret = self.downcast_integer(arr) - else: - raise TypeError, 'Do not recognize array kind %s' % dtk - if ret is None: - raise ValueError, 'Could not downcast array within precision' - return ret - - def downcast_complex(self, arr): - ''' Downcasts complex array to smaller type if possible ''' - # can we downcast to float? - if self.downcast_fp_to_fp: - dt = arr.dtype - dti = ceil(dt.itemsize / 2) - sctypes = self.sized_sctypes['f'] - flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti] - if flts: # There are smaller floats to try - test_arr = arr.astype(flts[0]) - rtol, atol = self.tols_from_sctype(dt.type) - if allclose(arr, test_arr, rtol, atol): - arr = test_arr - # try downcasting to int or another complex type - return self.downcast_to_int_or_same(arr) - - def downcast_to_int_or_same(self, arr): - ''' Downcast to integer or smaller of same kind ''' - # Try integer - if self.downcast_fp_to_int: - test_arr = self.downcast_integer(arr) - rtol, atol = self.tols_from_sctype(arr.dtype.type) - if allclose(arr, test_arr, rtol, atol): - return test_arr - # Otherwise descend the types of same kind - if self.downcast_fp_to_fp: - return self.smallest_same_kind(arr) - return self.arr_if_valid(arr) - - downcast_float = downcast_to_int_or_same + def cast_to_integer(self, arr): + ''' Casts arr to smallest integer containing range - def downcast_integer(self, arr): - ''' Downcasts arr to integer - Returns None if range of arr cannot be contained in acceptable integer types ''' - if not self.downcast_int_to_int: - return arr_if_valid(arr) mx = amax(arr) mn = amin(arr) idt = self.smallest_int_sctype(mx, mn) - if idt: + if idt is not None: return arr.astype(idt) return None - def recast(self, arr): - ''' Try arr downcast, upcast if necesary to get compatible type ''' - try: - return self.downcast(arr) - except ValueError: - pass - dt = arr.dtype + def downcast_or_none(self, arr): + ''' Downcast array to smaller or same type + + If cannot find smaller type within tolerance, + return array if is already valid type, otherwise None + ''' + dtp = arr.dtype + dtk = dtp.kind + dti = dtp.itemsize + if dtk in ('c', 'f'): + if self.downcast_fp_to_int: + test_arr = self.cast_to_integer(arr) + if test_arr is not None: + if self.all_close(arr, test_arr): + return test_arr + if self.downcast_fp_to_fp: + if dtk == 'c': + # Try downcasting to float + max_size = ceil(dti / 2.0) + test_arr = self.smallest_of_kind(arr, 'f', max_size) + if test_arr is not None: + return test_arr + test_arr = self.smallest_of_kind(arr) + if test_arr is not None: + return test_arr + elif dtk in ('u', 'i'): + if self.downcast_int_to_int: + test_arr = self.cast_to_integer(arr) + if test_arr is not None: + if test_arr.dtype.itemsize <= dti: + return test_arr + else: + raise TypeError, 'Do not recognize array kind %s' % dtk + return self.arr_if_valid(arr) + + + def recast_or_none(self, arr): + ''' Recast array to type in type list + + If cannot find smaller type within tolerance, by downcasting, + and array not of valid type already, then try larger + types. If none of these return an array within tolerance, + return None + ''' + test_arr = self.downcast_or_none(arr) + if test_arr is not None: + return test_arr # Could not downcast, arr dtype not in known list - # Try upcast to larger dtype of same kind - sct = dt.type - udt = self.capable_sctype[sct] - if udt is not None: - return arr.astype(udt) - # Could be an integer type that we have not tried - # to downcast - if not self.downcast_int_to_int and dt.kind in ('u', 'i'): - arr = self.downcast_integer(arr) - if arr is not None: - return arr - # We are stuck for floats and complex now - # Can try casting integers to floats - if self.upcast_int_to_fp and dt.kind in ('i', 'u'): - sctypes = self.sized_sctypes['f'] - arr = self._smallest_from_sctypes(arr, sctypes) - if arr is not None: - return arr - raise ValueError, 'Could not recast array within precision' + dtp = arr.dtype + dtk = dtp.kind + sct = dtp.type + if dtk in ('c', 'f'): + # Try upcast to larger dtype of same kind + udt = self.capable_sctype[sct] + if udt is not None: + return arr.astype(udt) + # Try casting to an integer + if self.upcast_fp_to_int: + test_arr = self.cast_to_integer(arr) + if test_arr is not None: + if self.all_close(arr, test_arr): + return test_arr + else: # integer types + # try casting to any possible integer type + test_arr = self.cast_to_integer(arr) + if test_arr is not None: + return test_arr + # Can try casting integers to floats + if self.upcast_int_to_fp: + flts = self._sized_sctypes['f'] + if flts: + flt_arr = arr.astype(flts[0]) + if self.all_close(arr, flt_arr): + if self.downcast_fp_to_fp: + max_size = flt_arr.dtype.itemsize - 1 + test_arr = self.smallest_of_kind(arr, 'f', max_size) + if test_arr is not None: + return test_arr + return flt_arr + return None + + def downcast(self, arr): + ret = self.downcast_or_none(arr) + if ret is None: + raise TypeError, 'Cannot downcast array within tolerance' + return ret + def recast(self, arr): + ret = self.recast_or_none(arr) + if ret is None: + raise TypeError, 'Cannot recast array within tolerance' + return ret + def recast_best_sctype(self, arr): ''' Recast array, return closest sctype to original Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-12-12 19:10:16 UTC (rev 2400) +++ trunk/Lib/io/tests/test_recaster.py 2006-12-13 01:30:27 UTC (rev 2401) @@ -29,14 +29,13 @@ F = N.finfo(T) R = Recaster(sctype_tols={T: {'rtol': F.eps*2, 'atol': F.tiny*2, 'silly': 'silly text'}}) assert tols != R.sctype_tols, 'Tols dictionary not set correctly' - r, a = R.tols_from_sctype(T) - assert r == F.eps*2, 'Rtol not correctly set' - assert a == F.tiny*2, 'Atol not correctly set' + assert R.sctype_tols[T]['rtol'] == F.eps*2, 'Rtol not correctly set' + assert R.sctype_tols[T]['atol'] == F.tiny*2, 'Atol not correctly set' # Sctype size lists # Integer sizes # Cabable types - def test_smallest_same_kind(self): + def test_smallest_of_kind(self): R = self.recaster value = 1 # smallest same kind @@ -54,7 +53,7 @@ expect_none = ((req_type is None) or ((tdtsz <= rdtsz) and not ok_T)) A = N.array(value, T) - C = R.smallest_same_kind(A) + C = R.smallest_of_kind(A) if expect_none: assert C is None, 'Expecting None for %s' % T else: @@ -98,7 +97,6 @@ R = self.recaster for T in (N.complex128, N.complex64, N.float64, N.uint64): - B = R.downcast(N.array(value, T)) + B = R.downcast_or_none(N.array(value, T)) assert B is not None, 'Got None for %s' % T assert B.dtype.type == N.int32 - From scipy-svn at scipy.org Wed Dec 13 00:46:54 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 12 Dec 2006 23:46:54 -0600 (CST) Subject: [Scipy-svn] r2402 - trunk/Lib/maxentropy Message-ID: <20061213054654.7824739C02F@new.scipy.org> Author: edschofield Date: 2006-12-12 23:46:49 -0600 (Tue, 12 Dec 2006) New Revision: 2402 Modified: trunk/Lib/maxentropy/maxentropy.py Log: Refactored internal variable names in maxentropy.py, so my thesis can serve as documentation Modified: trunk/Lib/maxentropy/maxentropy.py =================================================================== --- trunk/Lib/maxentropy/maxentropy.py 2006-12-13 01:30:27 UTC (rev 2401) +++ trunk/Lib/maxentropy/maxentropy.py 2006-12-13 05:46:49 UTC (rev 2402) @@ -520,7 +520,7 @@ """Clears the interim results of computations depending on the parameters and the sample. """ - for var in ['mu', 'logZ', 'logZapprox', 'logw']: + for var in ['mu', 'logZ', 'logZapprox', 'logv']: if hasattr(self, var): exec('del self.' + var) @@ -1228,14 +1228,14 @@ if hasattr(self, 'logZapprox'): return self.logZapprox - # Compute log w = log [p_dot(s_j)/aux_dist(s_j)] for + # Compute log v = log [p_dot(s_j)/aux_dist(s_j)] for # j=1,...,n=|sample| using a precomputed matrix of sample # features. - logw = self._logw() + logv = self._logv() - # Good, we have our logw. Now: - n = len(logw) - self.logZapprox = logsumexp(logw) - math.log(n) + # Good, we have our logv. Now: + n = len(logv) + self.logZapprox = logsumexp(logv) - math.log(n) return self.logZapprox @@ -1253,43 +1253,43 @@ self.estimate() return self.mu - def _logw(self): + def _logv(self): """This function helps with caching of interim computational results. It is designed to be called internally, not by a user. This is defined as the array of unnormalized importance sampling weights corresponding to the sample x_j whose features are represented as the columns of self.sampleF. - logw_j = p_dot(x_j) / q(x_j), + logv_j = p_dot(x_j) / q(x_j), where p_dot(x_j) = p_0(x_j) exp(theta . f(x_j)) is the unnormalized pdf value of the point x_j under the current model. """ - # First see whether logw has been precomputed - if hasattr(self, 'logw'): - return self.logw + # First see whether logv has been precomputed + if hasattr(self, 'logv'): + return self.logv - # Compute log w = log [p_dot(s_j)/aux_dist(s_j)] for + # Compute log v = log [p_dot(s_j)/aux_dist(s_j)] for # j=1,...,n=|sample| using a precomputed matrix of sample # features. if self.external is None: paramsdotF = innerprodtranspose(self.sampleF, self.params) - logw = paramsdotF - self.samplelogprobs + logv = paramsdotF - self.samplelogprobs # Are we minimizing KL divergence between the model and a prior # density p_0? if self.priorlogprobs is not None: - logw += self.priorlogprobs + logv += self.priorlogprobs else: e = self.external paramsdotF = innerprodtranspose(self.externalFs[e], self.params) - logw = paramsdotF - self.externallogprobs[e] + logv = paramsdotF - self.externallogprobs[e] # Are we minimizing KL divergence between the model and a prior # density p_0? if self.externalpriorlogprobs is not None: - logw += self.externalpriorlogprobs[e] + logv += self.externalpriorlogprobs[e] - # Good, we have our logw. Now: - self.logw = logw - return logw + # Good, we have our logv. Now: + self.logv = logv + return logv def estimate(self): @@ -1358,31 +1358,21 @@ if (not self.staticsample) or self.matrixtrials > 1: self.resample() - logw = self._logw() - n = len(logw) + logv = self._logv() + n = len(logv) logZ = self.lognormconst() logZs.append(logZ) - # # 1. Compute log w = log [p_dot(s_j)/aux_dist(s_j)] for - # # j=1,...,n=|sample| using a precomputed matrix of sample - # # features. - # logw = self._logw() - # - # # 2. Good, we have our logw. Now: - # n = len(logw) - # lse = logsumexp(logw) - # logZs.append(lse - math.log(n)) - # We don't need to handle negative values separately, # because we don't need to take the log of the feature # matrix sampleF. See my thesis, Section 4.4 - logwminuslogZ = logw - logZ + logu = logv - logZ if self.external is None: - averages = innerprod(self.sampleF, arrayexp(logwminuslogZ)) + averages = innerprod(self.sampleF, arrayexp(logu)) else: averages = innerprod(self.externalFs[self.external], \ - arrayexp(logwminuslogZ)) + arrayexp(logu)) averages /= n mus.append(averages) From scipy-svn at scipy.org Wed Dec 13 06:03:45 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Dec 2006 05:03:45 -0600 (CST) Subject: [Scipy-svn] r2403 - in trunk/Lib/io: . tests Message-ID: <20061213110345.2574939C076@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-12-13 05:03:40 -0600 (Wed, 13 Dec 2006) New Revision: 2403 Modified: trunk/Lib/io/recaster.py trunk/Lib/io/tests/test_recaster.py Log: More efficiency in downcast, recast Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-12-13 05:46:49 UTC (rev 2402) +++ trunk/Lib/io/recaster.py 2006-12-13 11:03:40 UTC (rev 2403) @@ -214,12 +214,6 @@ rtol=tols['rtol'], atol=tols['atol']) - def arr_if_valid(self, arr): - ''' Returns array if of valid sctype, None otherwise ''' - if arr.dtype.type not in self.sctype_list: - return None - return arr - def smallest_of_kind(self, arr, kind=None, max_size=None): ''' Return arr maybe downcast to same kind, smaller storage @@ -249,7 +243,9 @@ ret_arr = test_arr else: break - return self.arr_if_valid(ret_arr) + if ret_arr.dtype.type not in self.sctype_list: + return None + return ret_arr def smallest_int_sctype(self, mx, mn): ''' Return integer type with smallest storage containing mx and mn @@ -282,7 +278,7 @@ return arr.astype(idt) return None - def downcast_or_none(self, arr): + def downcast(self, arr, allow_larger_integer=False): ''' Downcast array to smaller or same type If cannot find smaller type within tolerance, @@ -291,12 +287,16 @@ dtp = arr.dtype dtk = dtp.kind dti = dtp.itemsize + int_arr = None if dtk in ('c', 'f'): if self.downcast_fp_to_int: test_arr = self.cast_to_integer(arr) if test_arr is not None: if self.all_close(arr, test_arr): - return test_arr + if test_arr.dtype.itemsize < dti: + return test_arr + else: + int_arr = test_arr if self.downcast_fp_to_fp: if dtk == 'c': # Try downcasting to float @@ -311,14 +311,19 @@ if self.downcast_int_to_int: test_arr = self.cast_to_integer(arr) if test_arr is not None: - if test_arr.dtype.itemsize <= dti: + if test_arr.dtype.itemsize < dti: return test_arr + else: + int_arr = test_arr else: raise TypeError, 'Do not recognize array kind %s' % dtk - return self.arr_if_valid(arr) - + if arr.dtype.type in self.sctype_list: + return arr + if allow_larger_integer and int_arr is not None: + return int_arr + raise TypeError, 'Cannot downcast array within tolerance' - def recast_or_none(self, arr): + def recast(self, arr): ''' Recast array to type in type list If cannot find smaller type within tolerance, by downcasting, @@ -326,9 +331,10 @@ types. If none of these return an array within tolerance, return None ''' - test_arr = self.downcast_or_none(arr) - if test_arr is not None: - return test_arr + try: + return self.downcast(arr, allow_larger_integer=True) + except ValueError: + pass # Could not downcast, arr dtype not in known list dtp = arr.dtype dtk = dtp.kind @@ -361,20 +367,8 @@ if test_arr is not None: return test_arr return flt_arr - return None + raise TypeError, 'Cannot recast array within tolerance' - def downcast(self, arr): - ret = self.downcast_or_none(arr) - if ret is None: - raise TypeError, 'Cannot downcast array within tolerance' - return ret - - def recast(self, arr): - ret = self.recast_or_none(arr) - if ret is None: - raise TypeError, 'Cannot recast array within tolerance' - return ret - def recast_best_sctype(self, arr): ''' Recast array, return closest sctype to original Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-12-13 05:46:49 UTC (rev 2402) +++ trunk/Lib/io/tests/test_recaster.py 2006-12-13 11:03:40 UTC (rev 2403) @@ -97,6 +97,6 @@ R = self.recaster for T in (N.complex128, N.complex64, N.float64, N.uint64): - B = R.downcast_or_none(N.array(value, T)) + B = R.downcast(N.array(value, T)) assert B is not None, 'Got None for %s' % T assert B.dtype.type == N.int32 From scipy-svn at scipy.org Wed Dec 13 09:28:03 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Dec 2006 08:28:03 -0600 (CST) Subject: [Scipy-svn] r2404 - trunk/Lib/sandbox/timeseries Message-ID: <20061213142803.1146639C29B@new.scipy.org> Author: mattknox_ca Date: 2006-12-13 08:27:59 -0600 (Wed, 13 Dec 2006) New Revision: 2404 Modified: trunk/Lib/sandbox/timeseries/tsdate.py Log: Modified: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-13 11:03:40 UTC (rev 2403) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-13 14:27:59 UTC (rev 2404) @@ -12,19 +12,19 @@ if val is not None: if self.freq == 'D': - self.__date = val+originDate + self.mxDate = val+originDate elif self.freq == 'B': - self.__date = originDate + val + (val//5)*7 - (val//5)*5 + self.mxDate = originDate + val + (val//5)*7 - (val//5)*5 elif self.freq == 'S': - self.__date = secondlyOriginDate + mx.DateTime.DateTimeDeltaFromSeconds(val) + self.mxDate = secondlyOriginDate + mx.DateTime.DateTimeDeltaFromSeconds(val) elif self.freq == 'M': - self.__date = originDate + mx.DateTime.RelativeDateTime(months=val, day=-1) + self.mxDate = originDate + mx.DateTime.RelativeDateTime(months=val, day=-1) elif self.freq == 'A': - self.__date = originDate + mx.DateTime.RelativeDateTime(years=val, month=-1, day=-1) + self.mxDate = originDate + mx.DateTime.RelativeDateTime(years=val, month=-1, day=-1) elif self.freq == 'Q': - self.__date = originDate + 1 + mx.DateTime.RelativeDateTime(years=int(val/4), month=int(12 * (float(val)/4 - val/4)), day=-1) + self.mxDate = originDate + 1 + mx.DateTime.RelativeDateTime(years=int(val/4), month=int(12 * (float(val)/4 - val/4)), day=-1) elif mxDate is not None: - self.__date = mxDate + self.mxDate = mxDate else: error = ValueError("Insufficient parameters given to create a date at the given frequency") @@ -47,30 +47,32 @@ if month is None or day is None or seconds is None: raise error if self.freq != 'S': - self.__date = mx.DateTime.Date(year, month, day) + self.mxDate = mx.DateTime.Date(year, month, day) if self.freq == 'B': - if self.__date.day_of_week == 5 or self.__date.day_of_week == 6: + if self.mxDate.day_of_week == 5 or self.mxDate.day_of_week == 6: raise ValueError("Weekend passed as business day") else: _hours = int(seconds/3600) _minutes = int((seconds - _hours*3600)/60) _seconds = seconds % 60 - self.__date = mx.DateTime.Date(year, month, day, _hours, _minutes, _seconds) + self.mxDate = mx.DateTime.Date(year, month, day, _hours, _minutes, _seconds) + + self.value = self.__value() - def day(self): return self.mxDate().day - def day_of_week(self): return self.mxDate().day_of_week - def month(self): return self.mxDate().month - def quarter(self): return monthToQuarter(self.mxDate().month) - def year(self): return self.mxDate().year - def seconds(self): return int(self.mxDate().second) - def minute(self): return int(self.mxDate().minute) - def hour(self): return int(self.mxDate().hour) + def day(self): return self.mxDate.day + def day_of_week(self): return self.mxDate.day_of_week + def month(self): return self.mxDate.month + def quarter(self): return monthToQuarter(self.mxDate.month) + def year(self): return self.mxDate.year + def seconds(self): return int(self.mxDate.second) + def minute(self): return int(self.mxDate.minute) + def hour(self): return int(self.mxDate.hour) def strfmt(self, fmt): qFmt = fmt.replace("%q", "XXXX") - tmpStr = self.__date.strftime(qFmt) + tmpStr = self.mxDate.strftime(qFmt) return tmpStr.replace("XXXX", str(self.quarter())) def __str__(self): @@ -120,26 +122,24 @@ def __hash__(self): return hash(int(self)) ^ hash(self.freq) def __int__(self): - return self.value() + return self.value - def value(self): + def __value(self): if self.freq == 'D': - return int((self.__date-originDate).days) + return int((self.mxDate-originDate).days) elif self.freq == 'B': - days = (self.__date-originDate).days + days = (self.mxDate-originDate).days weeks = days // 7 return int((weeks*5) + (days - weeks*7)) elif self.freq == 'M': - return (self.__date.year - originDate.year)*12 + (self.__date.month - originDate.month) + return (self.mxDate.year - originDate.year)*12 + (self.mxDate.month - originDate.month) elif self.freq == 'S': - return int((self.__date - secondlyOriginDate).seconds) + return int((self.mxDate - secondlyOriginDate).seconds) elif self.freq == 'A': - return int(self.__date.year - originDate.year + 1) + return int(self.mxDate.year - originDate.year + 1) elif self.freq == 'Q': - return int ((self.__date.year - originDate.year)*4 + (self.__date.month - originDate.month)/3) + return int ((self.mxDate.year - originDate.year)*4 + (self.mxDate.month - originDate.month)/3) - def mxDate(self): - return self.__date originDate = mx.DateTime.Date(1850)-1 secondlyOriginDate = mx.DateTime.Date(1980) - mx.DateTime.DateTimeDeltaFromSeconds(1) @@ -194,7 +194,7 @@ if toFreq == 'B': # BEFORE result: preceeding Friday if date is a weekend, same day otherwise # AFTER result: following Monday if date is a weekend, same day otherwise - tempDate = date.mxDate() + tempDate = date.mxDate if _rel == 'B': if tempDate.day_of_week >= 5: tempDate -= (tempDate.day_of_week - 4) elif _rel == 'A': @@ -228,7 +228,7 @@ elif date.freq == 'M': if toFreq == 'D': - tempDate = date.mxDate() + tempDate = date.mxDate if _rel == 'B': return Date(freq='D', year=date.year(), month=date.month(), day=1) elif _rel == 'A': From scipy-svn at scipy.org Wed Dec 13 09:34:28 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Dec 2006 08:34:28 -0600 (CST) Subject: [Scipy-svn] r2405 - trunk/Lib/sandbox/timeseries Message-ID: <20061213143428.0101139C299@new.scipy.org> Author: mattknox_ca Date: 2006-12-13 08:34:26 -0600 (Wed, 13 Dec 2006) New Revision: 2405 Modified: trunk/Lib/sandbox/timeseries/tsdate.py Log: Modified: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-13 14:27:59 UTC (rev 2404) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-13 14:34:26 UTC (rev 2405) @@ -2,7 +2,7 @@ import mx.DateTime class Date: - def __init__(self, freq, year=None, month=None, day=None, seconds=None,quarter=None, mxDate=None, val=None): + def __init__(self, freq, year=None, month=None, day=None, seconds=None,quarter=None, mxDate=None, value=None): if hasattr(freq, 'freq'): self.freq = corelib.fmtFreq(freq.freq) @@ -10,19 +10,19 @@ self.freq = corelib.fmtFreq(freq) self.type = corelib.freqToType(self.freq) - if val is not None: + if value is not None: if self.freq == 'D': - self.mxDate = val+originDate + self.mxDate = value+originDate elif self.freq == 'B': - self.mxDate = originDate + val + (val//5)*7 - (val//5)*5 + self.mxDate = originDate + value + (value//5)*7 - (value//5)*5 elif self.freq == 'S': - self.mxDate = secondlyOriginDate + mx.DateTime.DateTimeDeltaFromSeconds(val) + self.mxDate = secondlyOriginDate + mx.DateTime.DateTimeDeltaFromSeconds(value) elif self.freq == 'M': - self.mxDate = originDate + mx.DateTime.RelativeDateTime(months=val, day=-1) + self.mxDate = originDate + mx.DateTime.RelativeDateTime(months=value, day=-1) elif self.freq == 'A': - self.mxDate = originDate + mx.DateTime.RelativeDateTime(years=val, month=-1, day=-1) + self.mxDate = originDate + mx.DateTime.RelativeDateTime(years=value, month=-1, day=-1) elif self.freq == 'Q': - self.mxDate = originDate + 1 + mx.DateTime.RelativeDateTime(years=int(val/4), month=int(12 * (float(val)/4 - val/4)), day=-1) + self.mxDate = originDate + 1 + mx.DateTime.RelativeDateTime(years=int(value/4), month=int(12 * (float(value)/4 - value/4)), day=-1) elif mxDate is not None: self.mxDate = mxDate else: @@ -93,7 +93,7 @@ def __add__(self, other): if isinstance(other, Date): raise TypeError("Cannot add dates") - return Date(freq=self.freq, val=int(self) + other) + return Date(freq=self.freq, value=int(self) + other) def __radd__(self, other): return self+other From scipy-svn at scipy.org Wed Dec 13 09:34:40 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Dec 2006 08:34:40 -0600 (CST) Subject: [Scipy-svn] r2406 - trunk/Lib/sandbox/timeseries Message-ID: <20061213143440.98A7639C299@new.scipy.org> Author: mattknox_ca Date: 2006-12-13 08:34:37 -0600 (Wed, 13 Dec 2006) New Revision: 2406 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-13 14:34:26 UTC (rev 2405) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-13 14:34:37 UTC (rev 2406) @@ -69,7 +69,7 @@ retVal = "" if self.firstValue() is not None: for i in range(self.firstValue(),self.lastValue()+1): - index = str(tsdate.Date(freq=self.freq,val=i)) + index = str(tsdate.Date(freq=self.freq,value=i)) index = index + (" " * (6-len(index))) retVal += index + "---> " + str(super(TimeSeries, self).__getitem__(i)) + "\n" return retVal @@ -78,18 +78,18 @@ def firstValue(self, asDate=False): - val = super(TimeSeries, self).firstValue() + value = super(TimeSeries, self).firstValue() if asDate: - return tsdate.Date(freq=self.freq, val=val) + return tsdate.Date(freq=self.freq, value=value) else: - return val + return value def lastValue(self, asDate=False): - val = super(TimeSeries, self).lastValue() + value = super(TimeSeries, self).lastValue() if asDate: - return tsdate.Date(freq=self.freq, val=val) + return tsdate.Date(freq=self.freq, value=value) else: - return val + return value ### DATA From scipy-svn at scipy.org Wed Dec 13 10:00:04 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Dec 2006 09:00:04 -0600 (CST) Subject: [Scipy-svn] r2407 - trunk/Lib/sandbox/timeseries Message-ID: <20061213150004.E42FD39C063@new.scipy.org> Author: mattknox_ca Date: 2006-12-13 08:59:59 -0600 (Wed, 13 Dec 2006) New Revision: 2407 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-13 14:34:37 UTC (rev 2406) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-13 14:59:59 UTC (rev 2407) @@ -9,11 +9,13 @@ import copy class TimeSeries(sa.ShiftingArray): - def __init__(self, values=[], dtype=numpy.float64, freq=None, observed='END', startIndex=None, mask=ma.nomask): + def __init__(self, values=[], dtype=None, freq=None, observed='END', startIndex=None, mask=ma.nomask): if freq is None: raise ValueError("freq not specified") + + if dtype is None: dtype = values.dtype - super(TimeSeries, self).__init__(values, dtype, startIndex,mask) + super(TimeSeries, self).__init__(values, dtype, startIndex, mask) self.freq = corelib.fmtFreq(freq) self.observed = corelib.fmtObserv(observed) self.dtype = dtype From scipy-svn at scipy.org Wed Dec 13 16:18:20 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Dec 2006 15:18:20 -0600 (CST) Subject: [Scipy-svn] r2408 - trunk/Lib/stats Message-ID: <20061213211820.078AA39C073@new.scipy.org> Author: rkern Date: 2006-12-13 15:18:18 -0600 (Wed, 13 Dec 2006) New Revision: 2408 Modified: trunk/Lib/stats/distributions.py Log: Fix parentheses in Johnson SB distribution. Modified: trunk/Lib/stats/distributions.py =================================================================== --- trunk/Lib/stats/distributions.py 2006-12-13 14:59:59 UTC (rev 2407) +++ trunk/Lib/stats/distributions.py 2006-12-13 21:18:18 UTC (rev 2408) @@ -2055,7 +2055,7 @@ def _cdf(self, x, a, b): return norm.cdf(a+b*log(x/(1.0-x))) def _ppf(self, q, a, b): - return 1.0/(1+exp(-1.0/b*norm.ppf(q)-a)) + return 1.0/(1+exp(-1.0/b*(norm.ppf(q)-a))) johnsonsb = johnsonsb_gen(a=0.0,b=1.0,name='johnsonb', longname="A Johnson SB", shapes="a,b",extradoc=""" From scipy-svn at scipy.org Wed Dec 13 17:04:54 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Dec 2006 16:04:54 -0600 (CST) Subject: [Scipy-svn] r2409 - trunk/Lib/integrate Message-ID: <20061213220454.4B40B39C0A6@new.scipy.org> Author: oliphant Date: 2006-12-13 16:04:51 -0600 (Wed, 13 Dec 2006) New Revision: 2409 Modified: trunk/Lib/integrate/quadrature.py Log: Update docstring for romb Modified: trunk/Lib/integrate/quadrature.py =================================================================== --- trunk/Lib/integrate/quadrature.py 2006-12-13 21:18:18 UTC (rev 2408) +++ trunk/Lib/integrate/quadrature.py 2006-12-13 22:04:51 UTC (rev 2409) @@ -268,10 +268,21 @@ return result def romb(y, dx=1.0, axis=-1, show=False): - """Uses Romberg integration to integrate y(x) using N samples - along the given axis which are assumed equally spaced with distance dx. - The number of samples must be 1 + a non-negative power of two: N=2**k + 1 + """Romberg integration using samples of a function + Inputs: + + y - a vector of 2**k + 1 equally-spaced samples of a fucntion + dx - the sample spacing. + axis - the axis along which to integrate + show - When y is a single 1-d array, then if this argument is True + print the table showing Richardson extrapolation from the + samples. + + Output: ret + + ret - The integrated result for each axis. + See also: quad - adaptive quadrature using QUADPACK From scipy-svn at scipy.org Thu Dec 14 11:29:37 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 14 Dec 2006 10:29:37 -0600 (CST) Subject: [Scipy-svn] r2410 - trunk/Lib/sandbox/models Message-ID: <20061214162937.2E3DF39C0B3@new.scipy.org> Author: jonathan.taylor Date: 2006-12-14 10:29:35 -0600 (Thu, 14 Dec 2006) New Revision: 2410 Modified: trunk/Lib/sandbox/models/formula.py Log: fixed _get_namespace to allow recarrays to be used Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-12-13 22:04:51 UTC (rev 2409) +++ trunk/Lib/sandbox/models/formula.py 2006-12-14 16:29:35 UTC (rev 2410) @@ -53,7 +53,11 @@ # Namespace in which self.name will be looked up in, if needed - def _get_namespace(self): return self.__namespace or default_namespace + def _get_namespace(self): + if isinstance(self.__namespace, N.ndarray): + return self.__namespace + else: return self.__namespace or default_namespace + def _set_namespace(self, value): self.__namespace = value def _del_namespace(self): del self.__namespace namespace = property(_get_namespace, _set_namespace, _del_namespace) From scipy-svn at scipy.org Thu Dec 14 11:31:22 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 14 Dec 2006 10:31:22 -0600 (CST) Subject: [Scipy-svn] r2411 - trunk/Lib/sandbox/models Message-ID: <20061214163122.35B9C39C0B3@new.scipy.org> Author: jonathan.taylor Date: 2006-12-14 10:31:20 -0600 (Thu, 14 Dec 2006) New Revision: 2411 Modified: trunk/Lib/sandbox/models/formula.py Log: another namespace fix Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-12-14 16:29:35 UTC (rev 2410) +++ trunk/Lib/sandbox/models/formula.py 2006-12-14 16:31:20 UTC (rev 2411) @@ -286,7 +286,11 @@ of the columns of the two formulas. """ - def _get_namespace(self): return self.__namespace or default_namespace + def _get_namespace(self): + if isinstance(self.__namespace, N.ndarray): + return self.__namespace + else: return self.__namespace or default_namespace + def _set_namespace(self, value): self.__namespace = value def _del_namespace(self): del self.__namespace namespace = property(_get_namespace, _set_namespace, _del_namespace) From scipy-svn at scipy.org Thu Dec 14 20:00:00 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 14 Dec 2006 19:00:00 -0600 (CST) Subject: [Scipy-svn] r2412 - in trunk/Lib/sandbox/maskedarray: . tests Message-ID: <20061215010000.038A139C104@new.scipy.org> Author: pierregm Date: 2006-12-14 18:59:51 -0600 (Thu, 14 Dec 2006) New Revision: 2412 Added: trunk/Lib/sandbox/maskedarray/.project trunk/Lib/sandbox/maskedarray/src/ trunk/Lib/sandbox/maskedarray/tests/test_core.py trunk/Lib/sandbox/maskedarray/tests/test_extras.py Modified: trunk/Lib/sandbox/maskedarray/CHANGELOG trunk/Lib/sandbox/maskedarray/__init__.py trunk/Lib/sandbox/maskedarray/core.py trunk/Lib/sandbox/maskedarray/extras.py trunk/Lib/sandbox/maskedarray/setup.py trunk/Lib/sandbox/maskedarray/testutils.py Log: see changelog Added: trunk/Lib/sandbox/maskedarray/.project =================================================================== --- trunk/Lib/sandbox/maskedarray/.project 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/.project 2006-12-15 00:59:51 UTC (rev 2412) @@ -0,0 +1,17 @@ + + + scipy_svn_maskedarray + + + + + + org.python.pydev.PyDevBuilder + + + + + + org.python.pydev.pythonNature + + Modified: trunk/Lib/sandbox/maskedarray/CHANGELOG =================================================================== --- trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-15 00:59:51 UTC (rev 2412) @@ -1,18 +1,28 @@ -#2006-12-09: - Code reorganization: define 2 modules, core and extras -#2006-11-25: - Disable copy by default -# - Added keep_mask flag (to save mask when creating a ma from a ma) -# - Fixed functions: empty_like -# - Fixed methods: .any and .all -# - New functions: masked_all, masked_all_like -# - New methods: .squeeze -#2006-11-20: - fixed make_mask -# - fixed nonzero method -#2006-11-16: - fixed .T -#2006-11-12: - add max, min as function (not only method...) -# - repr returns a name like masked_xxx, where xxx is the subclass -#2006-10-31: - make sure that make_mask returns a pure ndarray. -#2006-10-30: - When converted to a float, a masked singleton is transformed to nan -# instead of raising an exception. +#2006-12-13 : - moved 'average' to 'extras' +# : Core +# : - Fixed make_mask (forced filling to True) +# : - Fixed ndim +# : - Fixed error message in __new__ when wrong sizes +# : - Fixed the reshape function. +# : Extras +# : - masked_all: set default dtype to float_ +# : - _fromnxfunctions: make sure that list are recognized +# : - added notmasked_edges, notmasked_contiguous +#2006-12-09 : - Code reorganization: define 2 modules, core and extras +#2006-11-25 : - Disable copy by default +# - Added keep_mask flag (to save mask when creating a ma from a ma) +# - Fixed functions: empty_like +# - Fixed methods: .any and .all +# - New functions: masked_all, masked_all_like +# - New methods: .squeeze +#2006-11-20 : - fixed make_mask +# - fixed nonzero method +#2006-11-16 : - fixed .T +#2006-11-12 : - add max, min as function (not only method...) +# - repr returns a name like masked_xxx, where xxx is the subclass +#2006-10-31 : - make sure that make_mask returns a pure ndarray. +#2006-10-30 : - When converted to a float, a masked singleton is transformed to nan +# instead of raising an exception. #21: Use __get__ method in _arraymethods, _arithmethods, _compamethods #18: Updated put to match the definition of numpy 1.0, deleted putmask, changed resize #2: prevent an extra kword being sent to make_mask_none Modified: trunk/Lib/sandbox/maskedarray/__init__.py =================================================================== --- trunk/Lib/sandbox/maskedarray/__init__.py 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/__init__.py 2006-12-15 00:59:51 UTC (rev 2412) @@ -4,12 +4,12 @@ :author: Pierre GF Gerard-Marchant :contact: pierregm_at_uga_dot_edu -:version: $Id: __init__.py 38 2006-12-09 23:01:14Z backtopop $ +:version: $Id$ """ -__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)" +__author__ = "Pierre GF Gerard-Marchant ($Author$)" __version__ = '1.0' -__revision__ = "$Revision: 38 $" -__date__ = '$Date: 2006-12-09 18:01:14 -0500 (Sat, 09 Dec 2006) $' +__revision__ = "$Revision$" +__date__ = '$Date$' import core reload(core) Property changes on: trunk/Lib/sandbox/maskedarray/__init__.py ___________________________________________________________________ Name: svn:keywords + Date Author Revision Id Modified: trunk/Lib/sandbox/maskedarray/core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/core.py 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/core.py 2006-12-15 00:59:51 UTC (rev 2412) @@ -12,12 +12,12 @@ :author: Pierre Gerard-Marchant :contact: pierregm_at_uga_dot_edu -:version: $Id: core.py 40 2006-12-10 19:50:35Z backtopop $ +:version: $Id$ """ -__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)" +__author__ = "Pierre GF Gerard-Marchant ($Author$)" __version__ = '1.0' -__revision__ = "$Revision: 40 $" -__date__ = '$Date: 2006-12-10 14:50:35 -0500 (Sun, 10 Dec 2006) $' +__revision__ = "$Revision$" +__date__ = '$Date$' __all__ = ['MAError', 'MaskType', 'MaskedArray', 'bool_', 'complex_', 'float_', 'int_', 'object_', @@ -25,7 +25,7 @@ 'amax', 'amin', 'anom', 'anomalies', 'any', 'arange', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin', 'argsort', 'around', - 'array', 'asarray', 'average', + 'array', 'asarray', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'ceil', 'choose', 'compressed', 'concatenate', 'conjugate', 'cos', 'cosh', 'count', @@ -582,6 +582,7 @@ if m is nomask: return nomask elif isinstance(m, ndarray): + m = filled(m, True) if m.dtype.type is MaskType: if copy: result = numeric.array(m, dtype=MaskType, copy=copy) @@ -591,7 +592,7 @@ result = numeric.array(m, dtype=MaskType) else: result = numeric.array(filled(m, True), dtype=MaskType) - + # Bas les masques ! if flag and not result.any(): return nomask else: @@ -856,7 +857,7 @@ else: msg = "Mask and data not compatible: data size is %i, "+\ "mask size is %i." - raise MAError, msg % (nm, nd) + raise MAError, msg % (nd, nm) elif (_mask.shape != _data.shape): _mask = _mask.reshape(_data.shape).copy() #.... @@ -1412,6 +1413,12 @@ size = property(fget=_get_size, doc="Size (number of elements) of the array.") # + def _get_ndim(self): + "Returns the number of dimensions." + return self._data.ndim + ndim = property(fget=_get_ndim, + doc="Number of dimensions of the array.") + # def reshape (self, *s): """Reshapes the array to shape s. Returns a new masked array. @@ -2302,7 +2309,6 @@ ptp = _frommethod('ptp') ravel = _frommethod('ravel') repeat = _frommethod('repeat') -reshape = _frommethod('reshape') std = _frommethod('std') sum = _frommethod('sum') swapaxes = _frommethod('swapaxes') @@ -2310,6 +2316,27 @@ var = _frommethod('var') #.............................................................................. +def power(a, b, third=None): + """Computes a**b elementwise. + Masked values are set to 1.""" + if third is not None: + raise MAError, "3-argument power not supported." + ma = getmask(a) + mb = getmask(b) + m = mask_or(ma, mb) + fa = filled(a, 1) + fb = filled(b, 1) + if fb.dtype.char in typecodes["Integer"]: + return masked_array(umath.power(fa, fb), m) + md = make_mask((fa < 0), flag=1) + m = mask_or(m, md) + if m is nomask: + return masked_array(umath.power(fa, fb)) + else: + fa[m] = 1 + return masked_array(umath.power(fa, fb), m) + +#.............................................................................. def argsort(a, axis=None, kind='quicksort', fill_value=None): """Returns an array of indices that sort 'a' along the specified axis. Masked values are filled beforehand to `fill_value`. @@ -2462,34 +2489,42 @@ d = umath.right_shift(filled(a, 0), n) return masked_array(d, mask=m) #...................................... -def put(x, indices, values, mode='raise'): - """sets storage-indexed locations to corresponding values. +def put(a, indices, values, mode='raise'): + """Sets storage-indexed locations to corresponding values. Values and indices are filled if necessary.""" # We can't use 'frommethod', the order of arguments is different try: - return x.put(indices, values, mode=mode) + return a.put(indices, values, mode=mode) except AttributeError: - return fromnumeric.asarray(x).put(indices, values, mode=mode) + return fromnumeric.asarray(a).put(indices, values, mode=mode) -def putmask(x, mask, values): #, mode='raise'): - """`putmask(x, mask, v)` results in `x = v` for all places where `mask` is true. +def putmask(a, mask, values): #, mode='raise'): + """`putmask(a, mask, v)` results in `a = v` for all places where `mask` is true. If `v` is shorter than `mask`, it will be repeated as necessary. In particular `v` can be a scalar or length 1 array.""" # We can't use 'frommethod', the order of arguments is different try: - return x.putmask(values, mask) + return a.putmask(values, mask) except AttributeError: - return fromnumeric.asarray(x).putmask(values, mask) + return fromnumeric.asarray(a).putmask(values, mask) -def transpose(x,axes=None): +def transpose(a,axes=None): """Returns a view of the array with dimensions permuted according to axes. If `axes` is None (default), returns array with dimensions reversed. """ #We can't use 'frommethod', as 'transpose' doesn't take keywords try: - return x.transpose(axes) + return a.transpose(axes) except AttributeError: - return fromnumeric.asarray(x).transpose(axes) + return fromnumeric.asarray(a).transpose(axes) + +def reshape(a, new_shape): + """Changes the shape of the array `a` to `new_shape`.""" + #We can't use 'frommethod', it whine about some parameters. Dmmit. + try: + return a.reshape(new_shape) + except AttributeError: + return fromnumeric.asarray(a).reshape(new_shape) def resize(x, new_shape): """resize(a,new_shape) returns a new array with the specified shape. @@ -2686,123 +2721,6 @@ d = umath.less_equal(umath.absolute(x-y), atol + rtol * umath.absolute(y)) return fromnumeric.alltrue(fromnumeric.ravel(d)) -def average (a, axis=None, weights=None, returned = 0): - """average(a, axis=None weights=None, returned=False) - - Averages the array over the given axis. If the axis is None, averages - over all dimensions of the array. Equivalent to a.mean(axis) - - If an integer axis is given, this equals: - a.sum(axis) * 1.0 / size(a, axis) - - If axis is None, this equals: - a.sum(axis) * 1.0 / a.size - - If weights are given, result is: - sum(a * weights,axis) / sum(weights,axis), - where the weights must have a's shape or be 1D with length the - size of a in the given axis. Integer weights are converted to - Float. Not specifying weights is equivalent to specifying - weights that are all 1. - - If 'returned' is True, return a tuple: the result and the sum of - the weights or count of values. The shape of these two results - will be the same. - - Returns masked values instead of ZeroDivisionError if appropriate. - - """ - a = asarray(a) - mask = a.mask - ash = a.shape - if ash == (): - ash = (1,) - if axis is None: - if mask is nomask: - if weights is None: - n = a.sum(axis=None) - d = float(a.size) - else: - w = filled(weights, 0.0).ravel() - n = umath.add.reduce(a._data.ravel() * w) - d = umath.add.reduce(w) - del w - else: - if weights is None: - n = a.filled(0).sum(axis=None) - d = umath.add.reduce((-mask).ravel().astype(int_)) - else: - w = array(filled(weights, 0.0), float, mask=mask).ravel() - n = add.reduce(a.ravel() * w) - d = add.reduce(w) - del w - else: - if mask is nomask: - if weights is None: - d = ash[axis] * 1.0 - n = add.reduce(a._data, axis) - else: - w = filled(weights, 0.0) - wsh = w.shape - if wsh == (): - wsh = (1,) - if wsh == ash: - w = numeric.array(w, float_, copy=0) - n = add.reduce(a*w, axis) - d = add.reduce(w, axis) - del w - elif wsh == (ash[axis],): - ni = ash[axis] - r = [None]*len(ash) - r[axis] = slice(None, None, 1) - w = eval ("w["+ repr(tuple(r)) + "] * ones(ash, float)") - n = add.reduce(a*w, axis) - d = add.reduce(w, axis) - del w, r - else: - raise ValueError, 'average: weights wrong shape.' - else: - if weights is None: - n = add.reduce(a, axis) - d = umath.add.reduce((-mask), axis=axis, dtype=float_) - else: - w = filled(weights, 0.0) - wsh = w.shape - if wsh == (): - wsh = (1,) - if wsh == ash: - w = array(w, float, mask=mask, copy=0) - n = add.reduce(a*w, axis) - d = add.reduce(w, axis) - elif wsh == (ash[axis],): - ni = ash[axis] - r = [None]*len(ash) - r[axis] = slice(None, None, 1) - w = eval ("w["+ repr(tuple(r)) + "] * masked_array(ones(ash, float), mask)") - n = add.reduce(a*w, axis) - d = add.reduce(w, axis) - else: - raise ValueError, 'average: weights wrong shape.' - del w - if n is masked or d is masked: - return masked - result = n/d - del n - - if isinstance(result, MaskedArray): - if ((axis is None) or (axis==0 and a.ndim == 1)) and \ - (result._mask is nomask): - result = result._data - if returned: - if not isinstance(d, MaskedArray): - d = masked_array(d) - if isinstance(d, ndarray) and (not d.shape == result.shape): - d = ones(result.shape, float) * d - if returned: - return result, d - else: - return result - #.............................................................................. def asarray(a, dtype=None): """asarray(data, dtype) = array(data, dtype, copy=0) Property changes on: trunk/Lib/sandbox/maskedarray/core.py ___________________________________________________________________ Name: svn:keywords + Date Author Revision Id Modified: trunk/Lib/sandbox/maskedarray/extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/extras.py 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/extras.py 2006-12-15 00:59:51 UTC (rev 2412) @@ -4,26 +4,32 @@ :author: Pierre Gerard-Marchant :contact: pierregm_at_uga_dot_edu -:version: $Id: extras.py 38 2006-12-09 23:01:14Z backtopop $ +:version: $Id$ """ -__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)" +__author__ = "Pierre GF Gerard-Marchant ($Author$)" __version__ = '1.0' -__revision__ = "$Revision: 38 $" -__date__ = '$Date: 2006-12-09 18:01:14 -0500 (Sat, 09 Dec 2006) $' +__revision__ = "$Revision$" +__date__ = '$Date$' __all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', + 'average', 'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack', 'count_masked', 'masked_all', 'masked_all_like', 'mr_', + 'notmasked_edges', 'notmasked_contiguous', 'stdu', 'varu', ] +from itertools import groupby + import core reload(core) from core import * from core import _arraymethod import numpy +from numpy import float_ +import numpy.core.umath as umath import numpy.core.numeric as numeric from numpy.core.numeric import ndarray from numpy.core.numeric import array as nxarray @@ -47,7 +53,7 @@ m = getmaskarray(arr) return m.sum(axis) -def masked_all(shape, dtype): +def masked_all(shape, dtype=float_): """Returns an empty masked array of the given shape and dtype, where all the data are masked.""" a = empty(shape, dtype) @@ -120,12 +126,12 @@ if len(args)==1: x = args[0] if isinstance(x,ndarray): - _d = func.__call__(nxasarray(x), **params) - _m = func.__call__(getmaskarray(x), **params) + _d = func(nxasarray(x), **params) + _m = func(getmaskarray(x), **params) return masked_array(_d, mask=_m) - elif isinstance(x, tuple): - _d = func.__call__(tuple([nxasarray(a) for a in x]), **params) - _m = func.__call__(tuple([getmaskarray(a) for a in x]), **params) + elif isinstance(x, tuple) or isinstance(x, list): + _d = func(tuple([nxasarray(a) for a in x]), **params) + _m = func(tuple([getmaskarray(a) for a in x]), **params) return masked_array(_d, mask=_m) else: arrays = [] @@ -134,8 +140,8 @@ arrays.append(args.pop(0)) res = [] for x in arrays: - _d = func.__call__(nxasarray(x), *args, **params) - _m = func.__call__(getmaskarray(x), *args, **params) + _d = func(nxasarray(x), *args, **params) + _m = func(getmaskarray(x), *args, **params) res.append(masked_array(_d, mask=_m)) return res @@ -229,6 +235,125 @@ else: return outarr.astype(max(dtypes)) + +def average (a, axis=None, weights=None, returned = 0): + """average(a, axis=None weights=None, returned=False) + + Averages the array over the given axis. If the axis is None, averages + over all dimensions of the array. Equivalent to a.mean(axis) + + If an integer axis is given, this equals: + a.sum(axis) * 1.0 / size(a, axis) + + If axis is None, this equals: + a.sum(axis) * 1.0 / a.size + + If weights are given, result is: + sum(a * weights,axis) / sum(weights,axis), + where the weights must have a's shape or be 1D with length the + size of a in the given axis. Integer weights are converted to + Float. Not specifying weights is equivalent to specifying + weights that are all 1. + + If 'returned' is True, return a tuple: the result and the sum of + the weights or count of values. The shape of these two results + will be the same. + + Returns masked values instead of ZeroDivisionError if appropriate. + + """ + a = asarray(a) + mask = a.mask + ash = a.shape + if ash == (): + ash = (1,) + if axis is None: + if mask is nomask: + if weights is None: + n = a.sum(axis=None) + d = float(a.size) + else: + w = filled(weights, 0.0).ravel() + n = umath.add.reduce(a._data.ravel() * w) + d = umath.add.reduce(w) + del w + else: + if weights is None: + n = a.filled(0).sum(axis=None) + d = umath.add.reduce((-mask).ravel().astype(int_)) + else: + w = array(filled(weights, 0.0), float, mask=mask).ravel() + n = add.reduce(a.ravel() * w) + d = add.reduce(w) + del w + else: + if mask is nomask: + if weights is None: + d = ash[axis] * 1.0 + n = add.reduce(a._data, axis) + else: + w = filled(weights, 0.0) + wsh = w.shape + if wsh == (): + wsh = (1,) + if wsh == ash: + w = numeric.array(w, float_, copy=0) + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + del w + elif wsh == (ash[axis],): + ni = ash[axis] + r = [None]*len(ash) + r[axis] = slice(None, None, 1) + w = eval ("w["+ repr(tuple(r)) + "] * ones(ash, float)") + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + del w, r + else: + raise ValueError, 'average: weights wrong shape.' + else: + if weights is None: + n = add.reduce(a, axis) + d = umath.add.reduce((-mask), axis=axis, dtype=float_) + else: + w = filled(weights, 0.0) + wsh = w.shape + if wsh == (): + wsh = (1,) + if wsh == ash: + w = array(w, float, mask=mask, copy=0) + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + elif wsh == (ash[axis],): + ni = ash[axis] + r = [None]*len(ash) + r[axis] = slice(None, None, 1) + w = eval ("w["+ repr(tuple(r)) + "] * masked_array(ones(ash, float), mask)") + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + else: + raise ValueError, 'average: weights wrong shape.' + del w + if n is masked or d is masked: + return masked + result = n/d + del n + + if isinstance(result, MaskedArray): + if ((axis is None) or (axis==0 and a.ndim == 1)) and \ + (result._mask is nomask): + result = result._data + if returned: + if not isinstance(d, MaskedArray): + d = masked_array(d) + if isinstance(d, ndarray) and (not d.shape == result.shape): + d = ones(result.shape, float) * d + if returned: + return result, d + else: + return result + + #####-------------------------------------------------------------------------- #---- --- Concatenation helpers --- #####-------------------------------------------------------------------------- @@ -301,3 +426,76 @@ mconcatenator.__init__(self, 0) mr_ = mr_class() + +#####-------------------------------------------------------------------------- +#---- --- +#####-------------------------------------------------------------------------- + +def flatnotmasked_edges(a): + """Finds the indices of the first and last not masked values in a 1D masked array. + If all values are masked, returns None. + """ + m = getmask(a) + if m is nomask or not numpy.any(m): + return [0,-1] + unmasked = numeric.flatnonzero(~m) + if len(unmasked) > 0: + return unmasked[[0,-1]] + else: + return None + +def notmasked_edges(a, axis=None): + """Finds the indices of the first and last not masked values along the given + axis in a masked array. + If all values are masked, returns None. + Otherwise, returns a list of 2 tuples, corresponding to the indices of the + first and last unmasked values respectively. + """ + a = asarray(a) + if axis is None or a.ndim == 1: + return flatnotmasked_edges(a) + m = getmask(a) + idx = array(numpy.indices(a.shape), mask=nxasarray([m]*a.ndim)) + return [tuple([idx[i].min(axis).compressed() for i in range(a.ndim)]), + tuple([idx[i].max(axis).compressed() for i in range(a.ndim)]),] + +def flatnotmasked_contiguous(a): + """Finds contiguous unmasked data in a flattened masked array. + Returns a sorted sequence of tuples (size,(start index, end index)). + """ + m = getmask(a) + if m is nomask: + return (a.size, [0,-1]) + unmasked = numeric.flatnonzero(~m) + if len(unmasked) == 0: + return None + result = [] + for k, group in groupby(enumerate(unmasked), lambda (i,x):i-x): + tmp = numpy.fromiter((g[1] for g in group), int_) + result.append((tmp.size, tuple(tmp[[0,-1]]))) + result.sort() + return result + +def notmasked_contiguous(a, axis=None): + """Finds contiguous unmasked data in a masked array along the given axis. + Returns a sorted sequence of tuples (size,(start index, end index)). + Note: Only accepts 2D arrays at most. + """ + a = asarray(a) + nd = a.ndim + if nd > 2: + raise NotImplementedError,"Currently limited to atmost 2D array." + if axis is None or nd == 1: + return flatnotmasked_contiguous(a) + # + result = [] + # + other = (axis+1)%2 + idx = [0,0] + idx[axis] = slice(None,None) + # + for i in range(a.shape[other]): + idx[other] = i + result.append( flatnotmasked_contiguous(a[idx]) ) + return result + Property changes on: trunk/Lib/sandbox/maskedarray/extras.py ___________________________________________________________________ Name: svn:keywords + Date Author Revision Id Modified: trunk/Lib/sandbox/maskedarray/setup.py =================================================================== --- trunk/Lib/sandbox/maskedarray/setup.py 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/setup.py 2006-12-15 00:59:51 UTC (rev 2412) @@ -1,8 +1,8 @@ #!/usr/bin/env python -__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)" +__author__ = "Pierre GF Gerard-Marchant ($Author$)" __version__ = '1.0' -__revision__ = "$Revision: 37 $" -__date__ = '$Date: 2006-12-08 14:30:29 -0500 (Fri, 08 Dec 2006) $' +__revision__ = "$Revision$" +__date__ = '$Date$' import os Property changes on: trunk/Lib/sandbox/maskedarray/setup.py ___________________________________________________________________ Name: svn:keywords + Date Author Revision Id Added: trunk/Lib/sandbox/maskedarray/tests/test_core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_core.py 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/tests/test_core.py 2006-12-15 00:59:51 UTC (rev 2412) @@ -0,0 +1,1058 @@ +# pylint: disable-msg=W0611, W0612, W0511,R0201 +"""Tests suite for MaskedArray. +Adapted from the original test_ma by Pierre Gerard-Marchant + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id$ +""" +__author__ = "Pierre GF Gerard-Marchant ($Author$)" +__version__ = '1.0' +__revision__ = "$Revision$" +__date__ = '$Date$' + +import types + +import numpy as N +import numpy.core.fromnumeric as fromnumeric +from numpy.testing import NumpyTest, NumpyTestCase +from numpy.testing.utils import build_err_msg + +import maskedarray.testutils +reload(maskedarray.testutils) +from maskedarray.testutils import * + +import maskedarray.core +reload(maskedarray.core) +from maskedarray.core import * + +pi = N.pi + +#.............................................................................. +class test_ma(NumpyTestCase): + "Base test class for MaskedArrays." + def __init__(self, *args, **kwds): + NumpyTestCase.__init__(self, *args, **kwds) + self.setUp() + + def setUp (self): + "Base data definition." + x = N.array([1.,1.,1.,-2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = N.array([5.,0.,3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + a10 = 10. + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0 ,0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + z = N.array([-.5, 0., .5, .8]) + zm = masked_array(z, mask=[0,1,0,0]) + xf = N.where(m1, 1.e+20, x) + xm.set_fill_value(1.e+20) + self.d = (x, y, a10, m1, m2, xm, ym, z, zm, xf) + #........................ + def check_testBasic1d(self): + "Test of basic array creation and properties in 1 dimension." + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert(not isMaskedArray(x)) + assert(isMaskedArray(xm)) + assert((xm-ym).filled(0).any()) + fail_if_equal(xm.mask.astype(int_), ym.mask.astype(int_)) + s = x.shape + assert_equal(N.shape(xm), s) + assert_equal(xm.shape, s) + assert_equal(xm.dtype, x.dtype) + assert_equal(zm.dtype, z.dtype) + assert_equal(xm.size , reduce(lambda x,y:x*y, s)) + assert_equal(count(xm) , len(m1) - reduce(lambda x,y:x+y, m1)) + assert_array_equal(xm, xf) + assert_array_equal(filled(xm, 1.e20), xf) + assert_array_equal(x, xm) + #........................ + def check_testBasic2d(self): + "Test of basic array creation and properties in 2 dimensions." + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + for s in [(4,3), (6,2)]: + x.shape = s + y.shape = s + xm.shape = s + ym.shape = s + xf.shape = s + + assert(not isMaskedArray(x)) + assert(isMaskedArray(xm)) + assert_equal(shape(xm), s) + assert_equal(xm.shape, s) + assert_equal( xm.size , reduce(lambda x,y:x*y, s)) + assert_equal( count(xm) , len(m1) - reduce(lambda x,y:x+y, m1)) + assert_equal(xm, xf) + assert_equal(filled(xm, 1.e20), xf) + assert_equal(x, xm) + #........................ + def check_testArithmetic (self): + "Test of basic arithmetic." + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + a2d = array([[1,2],[0,4]]) + a2dm = masked_array(a2d, [[0,0],[1,0]]) + assert_equal(a2d * a2d, a2d * a2dm) + assert_equal(a2d + a2d, a2d + a2dm) + assert_equal(a2d - a2d, a2d - a2dm) + for s in [(12,), (4,3), (2,6)]: + x = x.reshape(s) + y = y.reshape(s) + xm = xm.reshape(s) + ym = ym.reshape(s) + xf = xf.reshape(s) + assert_equal(-x, -xm) + assert_equal(x + y, xm + ym) + assert_equal(x - y, xm - ym) + assert_equal(x * y, xm * ym) + assert_equal(x / y, xm / ym) + assert_equal(a10 + y, a10 + ym) + assert_equal(a10 - y, a10 - ym) + assert_equal(a10 * y, a10 * ym) + assert_equal(a10 / y, a10 / ym) + assert_equal(x + a10, xm + a10) + assert_equal(x - a10, xm - a10) + assert_equal(x * a10, xm * a10) + assert_equal(x / a10, xm / a10) + assert_equal(x**2, xm**2) + assert_equal(abs(x)**2.5, abs(xm) **2.5) + assert_equal(x**y, xm**ym) + assert_equal(N.add(x,y), add(xm, ym)) + assert_equal(N.subtract(x,y), subtract(xm, ym)) + assert_equal(N.multiply(x,y), multiply(xm, ym)) + assert_equal(N.divide(x,y), divide(xm, ym)) + #........................ + def check_testMixedArithmetic(self): + "Tests mixed arithmetics." + na = N.array([1]) + ma = array([1]) + self.failUnless(isinstance(na + ma, MaskedArray)) + self.failUnless(isinstance(ma + na, MaskedArray)) + #......................... + def check_testUfuncs1 (self): + "Test various functions such as sin, cos." + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert_equal(N.cos(x), cos(xm)) + assert_equal(N.cosh(x), cosh(xm)) + assert_equal(N.sin(x), sin(xm)) + assert_equal(N.sinh(x), sinh(xm)) + assert_equal(N.tan(x), tan(xm)) + assert_equal(N.tanh(x), tanh(xm)) + assert_equal(N.sqrt(abs(x)), sqrt(xm)) + assert_equal(N.log(abs(x)), log(xm)) + assert_equal(N.log10(abs(x)), log10(xm)) + assert_equal(N.exp(x), exp(xm)) + assert_equal(N.arcsin(z), arcsin(zm)) + assert_equal(N.arccos(z), arccos(zm)) + assert_equal(N.arctan(z), arctan(zm)) + assert_equal(N.arctan2(x, y), arctan2(xm, ym)) + assert_equal(N.absolute(x), absolute(xm)) + assert_equal(N.equal(x,y), equal(xm, ym)) + assert_equal(N.not_equal(x,y), not_equal(xm, ym)) + assert_equal(N.less(x,y), less(xm, ym)) + assert_equal(N.greater(x,y), greater(xm, ym)) + assert_equal(N.less_equal(x,y), less_equal(xm, ym)) + assert_equal(N.greater_equal(x,y), greater_equal(xm, ym)) + assert_equal(N.conjugate(x), conjugate(xm)) + assert_equal(N.concatenate((x,y)), concatenate((xm,ym))) + assert_equal(N.concatenate((x,y)), concatenate((x,y))) + assert_equal(N.concatenate((x,y)), concatenate((xm,y))) + assert_equal(N.concatenate((x,y,x)), concatenate((x,ym,x))) + #........................ + def check_xtestCount (self): + "Tests count" + ott = array([0.,1.,2.,3.], mask=[1,0,0,0]) + assert( isinstance(count(ott), types.IntType)) + assert_equal(3, count(ott)) + assert_equal(1, count(1)) + assert_equal(0, array(1,mask=[1])) + ott = ott.reshape((2,2)) + assert isMaskedArray(count(ott,0)) + assert isinstance(count(ott), types.IntType) + assert_equal(3, count(ott)) + assert getmask(count(ott,0)) is nomask + assert_equal([1,2],count(ott,0)) + #........................ + def check_testMinMax (self): + "Tests minimum and maximum." + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + xr = N.ravel(x) #max doesn't work if shaped + xmr = ravel(xm) + assert_equal(max(xr), maximum(xmr)) #true because of careful selection of data + assert_equal(min(xr), minimum(xmr)) #true because of careful selection of data + # + assert_equal(minimum([1,2,3],[4,0,9]), [1,0,3]) + assert_equal(maximum([1,2,3],[4,0,9]), [4,2,9]) + x = arange(5) + y = arange(5) - 2 + x[3] = masked + y[0] = masked + assert_equal(minimum(x,y), where(less(x,y), x, y)) + assert_equal(maximum(x,y), where(greater(x,y), x, y)) + assert minimum(x) == 0 + assert maximum(x) == 4 + #........................ + def check_testAddSumProd (self): + "Tests add, sum, product." + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert_equal(N.add.reduce(x), add.reduce(x)) + assert_equal(N.add.accumulate(x), add.accumulate(x)) + assert_equal(4, sum(array(4),axis=0)) + assert_equal(4, sum(array(4), axis=0)) + assert_equal(N.sum(x,axis=0), sum(x,axis=0)) + assert_equal(N.sum(filled(xm,0),axis=0), sum(xm,axis=0)) + assert_equal(N.sum(x,0), sum(x,0)) + assert_equal(N.product(x,axis=0), product(x,axis=0)) + assert_equal(N.product(x,0), product(x,0)) + assert_equal(N.product(filled(xm,1),axis=0), product(xm,axis=0)) + s = (3,4) + x.shape = y.shape = xm.shape = ym.shape = s + if len(s) > 1: + assert_equal(N.concatenate((x,y),1), concatenate((xm,ym),1)) + assert_equal(N.add.reduce(x,1), add.reduce(x,1)) + assert_equal(N.sum(x,1), sum(x,1)) + assert_equal(N.product(x,1), product(x,1)) + #......................... + def check_concat(self): + "Tests concatenations." + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + s = (3,4) + x.shape = y.shape = xm.shape = ym.shape = s + assert_equal(xm.mask, N.reshape(m1, s)) + assert_equal(ym.mask, N.reshape(m2, s)) + xmym = concatenate((xm,ym),1) + assert_equal(N.concatenate((x,y),1), xmym) + assert_equal(N.concatenate((xm.mask,ym.mask),1), xmym._mask) + #........................ + def check_testCI(self): + "Tests conversions and indexing" + x1 = N.array([1,2,4,3]) + x2 = array(x1, mask=[1,0,0,0]) + x3 = array(x1, mask=[0,1,0,1]) + x4 = array(x1) + # test conversion to strings + junk, garbage = str(x2), repr(x2) + assert_equal(N.sort(x1),sort(x2, fill_value=0)) + # tests of indexing + assert type(x2[1]) is type(x1[1]) + assert x1[1] == x2[1] + assert x2[0] is masked + assert_equal(x1[2],x2[2]) + assert_equal(x1[2:5],x2[2:5]) + assert_equal(x1[:],x2[:]) + assert_equal(x1[1:], x3[1:]) + x1[2] = 9 + x2[2] = 9 + assert_equal(x1,x2) + x1[1:3] = 99 + x2[1:3] = 99 + assert_equal(x1,x2) + x2[1] = masked + assert_equal(x1,x2) + x2[1:3] = masked + assert_equal(x1,x2) + x2[:] = x1 + x2[1] = masked + assert allequal(getmask(x2),array([0,1,0,0])) + x3[:] = masked_array([1,2,3,4],[0,1,1,0]) + assert allequal(getmask(x3), array([0,1,1,0])) + x4[:] = masked_array([1,2,3,4],[0,1,1,0]) + assert allequal(getmask(x4), array([0,1,1,0])) + assert allequal(x4, array([1,2,3,4])) + x1 = N.arange(5)*1.0 + x2 = masked_values(x1, 3.0) + assert_equal(x1,x2) + assert allequal(array([0,0,0,1,0],MaskType), x2.mask) +#FIXME: Well, eh, fill_value is now a property assert_equal(3.0, x2.fill_value()) + assert_equal(3.0, x2.fill_value) + x1 = array([1,'hello',2,3],object) + x2 = N.array([1,'hello',2,3],object) + s1 = x1[1] + s2 = x2[1] + assert_equal(type(s2), str) + assert_equal(type(s1), str) + assert_equal(s1, s2) + assert x1[1:1].shape == (0,) + #........................ + def check_testCopySize(self): + "Tests of some subtle points of copying and sizing." + n = [0,0,1,0,0] + m = make_mask(n) + m2 = make_mask(m) + assert(m is m2) + m3 = make_mask(m, copy=1) + assert(m is not m3) + + x1 = N.arange(5) + y1 = array(x1, mask=m) + assert( y1._data is x1) + assert( allequal(x1,y1.raw_data())) + assert( y1.mask is m) + + y1a = array(y1) + assert( y1a.raw_data() is y1.raw_data()) + assert( y1a.mask is y1.mask) + + y2 = array(x1, mask=m) + assert( y2.raw_data() is x1) + assert( y2.mask is m) + assert( y2[2] is masked) + y2[2] = 9 + assert( y2[2] is not masked) + assert( y2.mask is not m) + assert( allequal(y2.mask, 0)) + + y3 = array(x1*1.0, mask=m) + assert(filled(y3).dtype is (x1*1.0).dtype) + + x4 = arange(4) + x4[2] = masked + y4 = resize(x4, (8,)) + assert_equal(concatenate([x4,x4]), y4) + assert_equal(getmask(y4),[0,0,1,0,0,0,1,0]) + y5 = repeat(x4, (2,2,2,2), axis=0) + assert_equal(y5, [0,0,1,1,2,2,3,3]) + y6 = repeat(x4, 2, axis=0) + assert_equal(y5, y6) + y7 = x4.repeat((2,2,2,2), axis=0) + assert_equal(y5,y7) + y8 = x4.repeat(2,0) + assert_equal(y5,y8) + + y9 = x4.copy() + assert_equal(y9._data, x4._data) + assert_equal(y9._mask, x4._mask) + + #........................ + def check_testOddFeatures_1(self): + "Test of other odd features" + x = arange(20) + x = x.reshape(4,5) + x.flat[5] = 12 + assert x[1,0] == 12 + z = x + 10j * x + assert_equal(z.real, x) + assert_equal(z.imag, 10*x) + assert_equal((z*conjugate(z)).real, 101*x*x) + z.imag[...] = 0.0 + + x = arange(10) + x[3] = masked + assert str(x[3]) == str(masked) + c = x >= 8 + assert count(where(c,masked,masked)) == 0 + assert shape(where(c,masked,masked)) == c.shape + # + z = where(c , x, masked) + assert z.dtype is x.dtype + assert z[3] is masked + assert z[4] is masked + assert z[7] is masked + assert z[8] is not masked + assert z[9] is not masked + assert_equal(x,z) + # + z = where(c , masked, x) + assert z.dtype is x.dtype + assert z[3] is masked + assert z[4] is not masked + assert z[7] is not masked + assert z[8] is masked + assert z[9] is masked + # + z = masked_where(c, x) + assert z.dtype is x.dtype + assert z[3] is masked + assert z[4] is not masked + assert z[7] is not masked + assert z[8] is masked + assert z[9] is masked + assert_equal(x,z) + # + #........................ + def check_testOddFeatures_2(self): + "Tests some more features." + x = array([1.,2.,3.,4.,5.]) + c = array([1,1,1,0,0]) + x[2] = masked + z = where(c, x, -x) + assert_equal(z, [1.,2.,0., -4., -5]) + c[0] = masked + z = where(c, x, -x) + assert_equal(z, [1.,2.,0., -4., -5]) + assert z[0] is masked + assert z[1] is not masked + assert z[2] is masked + # + x = arange(6) + x[5] = masked + y = arange(6)*10 + y[2] = masked + c = array([1,1,1,0,0,0], mask=[1,0,0,0,0,0]) + cm = c.filled(1) + z = where(c,x,y) + zm = where(cm,x,y) + assert_equal(z, zm) + assert getmask(zm) is nomask + assert_equal(zm, [0,1,2,30,40,50]) + z = where(c, masked, 1) + assert_equal(z, [99,99,99,1,1,1]) + z = where(c, 1, masked) + assert_equal(z, [99, 1, 1, 99, 99, 99]) + #........................ + def check_testOddFeatures_3(self): + """Tests some generic features.""" + atest = ones((10,10,10), dtype=float_) + btest = zeros(atest.shape, MaskType) + ctest = masked_where(btest,atest) + assert_equal(atest,ctest) + #........................ + def check_maskingfunctions(self): + "Tests masking functions." + x = array([1.,2.,3.,4.,5.]) + x[2] = masked + assert_equal(masked_where(greater(x, 2), x), masked_greater(x,2)) + assert_equal(masked_where(greater_equal(x, 2), x), masked_greater_equal(x,2)) + assert_equal(masked_where(less(x, 2), x), masked_less(x,2)) + assert_equal(masked_where(less_equal(x, 2), x), masked_less_equal(x,2)) + assert_equal(masked_where(not_equal(x, 2), x), masked_not_equal(x,2)) + assert_equal(masked_where(equal(x, 2), x), masked_equal(x,2)) + assert_equal(masked_where(not_equal(x,2), x), masked_not_equal(x,2)) + assert_equal(masked_inside(range(5), 1, 3), [0, 199, 199, 199, 4]) + assert_equal(masked_outside(range(5), 1, 3),[199,1,2,3,199]) + assert_equal(masked_inside(array(range(5), mask=[1,0,0,0,0]), 1, 3).mask, [1,1,1,1,0]) + assert_equal(masked_outside(array(range(5), mask=[0,1,0,0,0]), 1, 3).mask, [1,1,0,0,1]) + assert_equal(masked_equal(array(range(5), mask=[1,0,0,0,0]), 2).mask, [1,0,1,0,0]) + assert_equal(masked_not_equal(array([2,2,1,2,1], mask=[1,0,0,0,0]), 2).mask, [1,0,1,0,1]) + assert_equal(masked_where([1,1,0,0,0], [1,2,3,4,5]), [99,99,3,4,5]) + #........................ + def check_testTakeTransposeInnerOuter(self): + "Test of take, transpose, inner, outer products" + x = arange(24) + y = N.arange(24) + x[5:6] = masked + x = x.reshape(2,3,4) + y = y.reshape(2,3,4) + assert_equal(N.transpose(y,(2,0,1)), transpose(x,(2,0,1))) + assert_equal(N.take(y, (2,0,1), 1), take(x, (2,0,1), 1)) + assert_equal(N.inner(filled(x,0),filled(y,0)), + inner(x, y)) + assert_equal(N.outer(filled(x,0),filled(y,0)), + outer(x, y)) + y = array(['abc', 1, 'def', 2, 3], object) + y[2] = masked + t = take(y,[0,3,4]) + assert t[0] == 'abc' + assert t[1] == 2 + assert t[2] == 3 + #........................ + def check_testInplace(self): + """Test of inplace operations and rich comparisons""" + y = arange(10) + + x = arange(10) + xm = arange(10) + xm[2] = masked + x += 1 + assert_equal(x, y+1) + xm += 1 + assert_equal(xm, y+1) + + x = arange(10) + xm = arange(10) + xm[2] = masked + x -= 1 + assert_equal(x, y-1) + xm -= 1 + assert_equal(xm, y-1) + + x = arange(10)*1.0 + xm = arange(10)*1.0 + xm[2] = masked + x *= 2.0 + assert_equal(x, y*2) + xm *= 2.0 + assert_equal(xm, y*2) + + x = arange(10)*2 + xm = arange(10)*2 + xm[2] = masked + x /= 2 + assert_equal(x, y) + xm /= 2 + assert_equal(xm, y) + + x = arange(10)*1.0 + xm = arange(10)*1.0 + xm[2] = masked + x /= 2.0 + assert_equal(x, y/2.0) + xm /= arange(10) + assert_equal(xm, ones((10,))) + + x = arange(10).astype(float_) + xm = arange(10) + xm[2] = masked + id1 = id(x.raw_data()) + x += 1. + assert id1 == id(x.raw_data()) + assert_equal(x, y+1.) + + x = arange(10, dtype=float_) + xm = arange(10, dtype=float_) + xm[2] = masked + m = xm.mask + a = arange(10, dtype=float_) + a[-1] = masked + x += a + xm += a + assert_equal(x,y+a) + assert_equal(xm,y+a) + assert_equal(xm.mask, mask_or(m,a.mask)) + + x = arange(10, dtype=float_) + xm = arange(10, dtype=float_) + xm[2] = masked + m = xm.mask + a = arange(10, dtype=float_) + a[-1] = masked + x -= a + xm -= a + assert_equal(x,y-a) + assert_equal(xm,y-a) + assert_equal(xm.mask, mask_or(m,a.mask)) + + x = arange(10, dtype=float_) + xm = arange(10, dtype=float_) + xm[2] = masked + m = xm.mask + a = arange(10, dtype=float_) + a[-1] = masked + x *= a + xm *= a + assert_equal(x,y*a) + assert_equal(xm,y*a) + assert_equal(xm.mask, mask_or(m,a.mask)) + + x = arange(10, dtype=float_) + xm = arange(10, dtype=float_) + xm[2] = masked + m = xm.mask + a = arange(10, dtype=float_) + a[-1] = masked + x /= a + xm /= a + assert_equal(x,y/a) + assert_equal(xm,y/a) + assert_equal(xm.mask, mask_or(mask_or(m,a.mask), (a==0))) + #........................ + def check_testPickle(self): + "Test of pickling" + import pickle + x = arange(12) + x[4:10:2] = masked + x = x.reshape(4,3) + s = pickle.dumps(x) + y = pickle.loads(s) + assert_equal(x,y) + #....................... + def check_testMasked(self): + "Test of masked element" + x = arange(6) + x[1] = masked + assert(str(masked) == '--') + assert(x[1] is masked) + assert_equal(filled(x[1], 0), 0) + # don't know why these should raise an exception... + #self.failUnlessRaises(Exception, lambda x,y: x+y, masked, masked) + #self.failUnlessRaises(Exception, lambda x,y: x+y, masked, 2) + #self.failUnlessRaises(Exception, lambda x,y: x+y, masked, xx) + #self.failUnlessRaises(Exception, lambda x,y: x+y, xx, masked) + #........................ + def check_testToPython(self): + "Tests some communication issues with Python." + assert_equal(1, int(array(1))) + assert_equal(1.0, float(array(1))) + assert_equal(1, int(array([[[1]]]))) + assert_equal(1.0, float(array([[1]]))) + self.failUnlessRaises(ValueError, float, array([1,1])) + assert N.isnan(float(array([1],mask=[1]))) +#TODO: Check how bool works... +#TODO: self.failUnless(bool(array([0,1]))) +#TODO: self.failUnless(bool(array([0,0],mask=[0,1]))) +#TODO: self.failIf(bool(array([0,0]))) +#TODO: self.failIf(bool(array([0,0],mask=[0,0]))) + #.......................... + def check_testScalarArithmetic(self): + "Tests some scalar arithmetics on MaskedArrays." + xm = array(0, mask=1) + assert((1/array(0)).mask) + assert((1 + xm).mask) + assert((-xm).mask) + assert((-xm).mask) + assert(maximum(xm, xm).mask) + assert(minimum(xm, xm).mask) + assert(xm.filled().dtype is xm.data.dtype) + x = array(0, mask=0) + assert(x.filled() is x.data) + assert_equal(str(xm), str(masked_print_option)) + #........................ + def check_testArrayMethods(self): + "Tests some MaskedArray methods." + a = array([1,3,2]) + b = array([1,3,2], mask=[1,0,1]) + assert_equal(a.any(), a.data.any()) + assert_equal(a.all(), a.data.all()) + assert_equal(a.argmax(), a.data.argmax()) + assert_equal(a.argmin(), a.data.argmin()) + assert_equal(a.choose(0,1,2,3,4), a.data.choose(0,1,2,3,4)) + assert_equal(a.compress([1,0,1]), a.data.compress([1,0,1])) + assert_equal(a.conj(), a.data.conj()) + assert_equal(a.conjugate(), a.data.conjugate()) + # + m = array([[1,2],[3,4]]) + assert_equal(m.diagonal(), m.data.diagonal()) + assert_equal(a.sum(), a.data.sum()) + assert_equal(a.take([1,2]), a.data.take([1,2])) + assert_equal(m.transpose(), m.data.transpose()) + #........................ + def check_testArrayAttributes(self): + "Tests some basic array attributes." + a = array([1,3,2]) + b = array([1,3,2], mask=[1,0,1]) + assert_equal(a.ndim, 1) + assert_equal(b.ndim, 1) + assert_equal(a.size, 3) + assert_equal(b.size, 3) + assert_equal(a.shape, (3,)) + assert_equal(b.shape, (3,)) + #........................ + def check_testSingleElementSubscript(self): + "Tests single element subscripts of Maskedarrays." + a = array([1,3,2]) + b = array([1,3,2], mask=[1,0,1]) + assert_equal(a[0].shape, ()) + assert_equal(b[0].shape, ()) + assert_equal(b[1].shape, ()) + #........................ + def check_maskcreation(self): + "Tests how masks are initialized at the creation of Maskedarrays." + data = arange(24, dtype=float_) + data[[3,6,15]] = masked + dma_1 = MaskedArray(data) + assert_equal(dma_1.mask, data.mask) + dma_2 = MaskedArray(dma_1) + assert_equal(dma_2.mask, dma_1.mask) + dma_3 = MaskedArray(dma_1, mask=[1,0,0,0]*6) + fail_if_equal(dma_3.mask, dma_1.mask) + + def check_testAverage1(self): + "Test of average." + ott = array([0.,1.,2.,3.], mask=[1,0,0,0]) + assert_equal(2.0, average(ott,axis=0)) + assert_equal(2.0, average(ott, weights=[1., 1., 2., 1.])) + result, wts = average(ott, weights=[1.,1.,2.,1.], returned=1) + assert_equal(2.0, result) + assert(wts == 4.0) + ott[:] = masked + assert(average(ott,axis=0) is masked) + ott = array([0.,1.,2.,3.], mask=[1,0,0,0]) + ott = ott.reshape(2,2) + ott[:,1] = masked + assert_equal(average(ott,axis=0), [2.0, 0.0]) + assert(average(ott,axis=1)[0] is masked) + assert_equal([2.,0.], average(ott, axis=0)) + result, wts = average(ott, axis=0, returned=1) + assert_equal(wts, [1., 0.]) + + def check_testAverage2(self): + "More tests of average." + w1 = [0,1,1,1,1,0] + w2 = [[0,1,1,1,1,0],[1,0,0,0,0,1]] + x = arange(6) + assert_equal(average(x, axis=0), 2.5) + assert_equal(average(x, axis=0, weights=w1), 2.5) + y = array([arange(6), 2.0*arange(6)]) + assert_equal(average(y, None), N.add.reduce(N.arange(6))*3./12.) + assert_equal(average(y, axis=0), N.arange(6) * 3./2.) + assert_equal(average(y, axis=1), [average(x,axis=0), average(x,axis=0) * 2.0]) + assert_equal(average(y, None, weights=w2), 20./6.) + assert_equal(average(y, axis=0, weights=w2), [0.,1.,2.,3.,4.,10.]) + assert_equal(average(y, axis=1), [average(x,axis=0), average(x,axis=0) * 2.0]) + m1 = zeros(6) + m2 = [0,0,1,1,0,0] + m3 = [[0,0,1,1,0,0],[0,1,1,1,1,0]] + m4 = ones(6) + m5 = [0, 1, 1, 1, 1, 1] + assert_equal(average(masked_array(x, m1),axis=0), 2.5) + assert_equal(average(masked_array(x, m2),axis=0), 2.5) + assert(average(masked_array(x, m4),axis=0) is masked) + assert_equal(average(masked_array(x, m5),axis=0), 0.0) + assert_equal(count(average(masked_array(x, m4),axis=0)), 0) + z = masked_array(y, m3) + assert_equal(average(z, None), 20./6.) + assert_equal(average(z, axis=0), [0.,1.,99.,99.,4.0, 7.5]) + assert_equal(average(z, axis=1), [2.5, 5.0]) + assert_equal(average(z,axis=0, weights=w2), [0.,1., 99., 99., 4.0, 10.0]) + + def check_testAverage3(self): + "Yet more tests of average!" + a = arange(6) + b = arange(6) * 3 + r1, w1 = average([[a,b],[b,a]], axis=1, returned=1) + assert_equal(shape(r1) , shape(w1)) + assert_equal(r1.shape , w1.shape) + r2, w2 = average(ones((2,2,3)), axis=0, weights=[3,1], returned=1) + assert_equal(shape(w2) , shape(r2)) + r2, w2 = average(ones((2,2,3)), returned=1) + assert_equal(shape(w2) , shape(r2)) + r2, w2 = average(ones((2,2,3)), weights=ones((2,2,3)), returned=1) + assert_equal(shape(w2), shape(r2)) + a2d = array([[1,2],[0,4]], float) + a2dm = masked_array(a2d, [[0,0],[1,0]]) + a2da = average(a2d, axis=0) + assert_equal(a2da, [0.5, 3.0]) + a2dma = average(a2dm, axis=0) + assert_equal(a2dma, [1.0, 3.0]) + a2dma = average(a2dm, axis=None) + assert_equal(a2dma, 7./3.) + a2dma = average(a2dm, axis=1) + assert_equal(a2dma, [1.5, 4.0]) + + def check_backwards(self): + "Tests backward compatibility with numpy.core.ma" + import numpy.core.ma as nma + x = nma.arange(5) + x[2] = nma.masked + X = masked_array(x, mask=x._mask) + assert_equal(X._mask, x.mask) + assert_equal(X._data, x._data) + X = masked_array(x) + assert_equal(X._data, x._data) + assert_equal(X._mask, x.mask) + assert_equal(getmask(x), [0,0,1,0,0]) + +#.............................................................................. + +class test_ufuncs(NumpyTestCase): + "Test class for the application of ufuncs on MaskedArrays." + def setUp(self): + "Base data definition." + self.d = (array([1.0, 0, -1, pi/2]*2, mask=[0,1]+[0]*6), + array([1.0, 0, -1, pi/2]*2, mask=[1,0]+[0]*6),) + + def check_testUfuncRegression(self): + "Tests new ufuncs on MaskedArrays." + for f in ['sqrt', 'log', 'log10', 'exp', 'conjugate', + 'sin', 'cos', 'tan', + 'arcsin', 'arccos', 'arctan', + 'sinh', 'cosh', 'tanh', + 'arcsinh', + 'arccosh', + 'arctanh', + 'absolute', 'fabs', 'negative', + # 'nonzero', 'around', + 'floor', 'ceil', + # 'sometrue', 'alltrue', + 'logical_not', + 'add', 'subtract', 'multiply', + 'divide', 'true_divide', 'floor_divide', + 'remainder', 'fmod', 'hypot', 'arctan2', + 'equal', 'not_equal', 'less_equal', 'greater_equal', + 'less', 'greater', + 'logical_and', 'logical_or', 'logical_xor', + ]: + #print f + try: + uf = getattr(umath, f) + except AttributeError: + uf = getattr(fromnumeric, f) + mf = getattr(maskedarray, f) + args = self.d[:uf.nin] + ur = uf(*args) + mr = mf(*args) + assert_equal(ur.filled(0), mr.filled(0), f) + assert_mask_equal(ur.mask, mr.mask) + #........................ + def test_reduce(self): + "Tests reduce on MaskedArrays." + a = self.d[0] + assert(not alltrue(a,axis=0)) + assert(sometrue(a,axis=0)) + assert_equal(sum(a[:3],axis=0), 0) + assert_equal(product(a,axis=0), 0) + #........................ + def test_minmax(self): + "Tests extrema on MaskedArrays." + a = arange(1,13).reshape(3,4) + amask = masked_where(a < 5,a) + assert_equal(amask.max(), a.max()) + assert_equal(amask.min(), 5) + assert_equal(amask.max(0), a.max(0)) + assert_equal(amask.min(0), [5,6,7,8]) + assert(amask.max(1)[0].mask) + assert(amask.min(1)[0].mask) + +#.............................................................................. + +class test_array_methods(NumpyTestCase): + "Test class for miscellaneous MaskedArrays methods." + def setUp(self): + "Base data definition." + x = N.array([ 8.375, 7.545, 8.828, 8.5 , 1.757, 5.928, + 8.43 , 7.78 , 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04 , 9.63 , 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + X = x.reshape(6,6) + XX = x.reshape(3,2,2,3) + + m = N.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x,mask=m) + mX = array(data=X,mask=m.reshape(X.shape)) + mXX = array(data=XX,mask=m.reshape(XX.shape)) + + m2 = N.array([1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 0, 1, + 0, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 1, 0, + 0, 0, 1, 0, 1, 1]) + m2x = array(data=x,mask=m2) + m2X = array(data=X,mask=m2.reshape(X.shape)) + m2XX = array(data=XX,mask=m2.reshape(XX.shape)) + self.d = (x,X,XX,m,mx,mX,mXX,m2x,m2X,m2XX) + + #------------------------------------------------------ + def test_trace(self): + "Tests trace on MaskedArrays." + (x,X,XX,m,mx,mX,mXX,m2x,m2X,m2XX) = self.d + mXdiag = mX.diagonal() + assert_equal(mX.trace(), mX.diagonal().compressed().sum()) + assert_almost_equal(mX.trace(), + X.trace() - sum(mXdiag.mask*X.diagonal(),axis=0)) + + def test_clip(self): + "Tests clip on MaskedArrays." + (x,X,XX,m,mx,mX,mXX,m2x,m2X,m2XX) = self.d + clipped = mx.clip(2,8) + assert_equal(clipped.mask,mx.mask) + assert_equal(clipped.data,x.clip(2,8)) + assert_equal(clipped.data,mx.data.clip(2,8)) + + def test_ptp(self): + "Tests ptp on MaskedArrays." + (x,X,XX,m,mx,mX,mXX,m2x,m2X,m2XX) = self.d + (n,m) = X.shape + assert_equal(mx.ptp(),mx.compressed().ptp()) + rows = N.zeros(n,N.float_) + cols = N.zeros(m,N.float_) + for k in range(m): + cols[k] = mX[:,k].compressed().ptp() + for k in range(n): + rows[k] = mX[k].compressed().ptp() + assert_equal(mX.ptp(0),cols) + assert_equal(mX.ptp(1),rows) + + def test_swapaxes(self): + "Tests swapaxes on MaskedArrays." + (x,X,XX,m,mx,mX,mXX,m2x,m2X,m2XX) = self.d + mXswapped = mX.swapaxes(0,1) + assert_equal(mXswapped[-1],mX[:,-1]) + mXXswapped = mXX.swapaxes(0,2) + assert_equal(mXXswapped.shape,(2,2,3,3)) + + def test_cumsumprod(self): + "Tests cumsum & cumprod on MaskedArrays." + (x,X,XX,m,mx,mX,mXX,m2x,m2X,m2XX) = self.d + mXcp = mX.cumsum(0) + assert_equal(mXcp.data,mX.filled(0).cumsum(0)) + mXcp = mX.cumsum(1) + assert_equal(mXcp.data,mX.filled(0).cumsum(1)) + # + mXcp = mX.cumprod(0) + assert_equal(mXcp.data,mX.filled(1).cumprod(0)) + mXcp = mX.cumprod(1) + assert_equal(mXcp.data,mX.filled(1).cumprod(1)) + + def test_varstd(self): + "Tests var & std on MaskedArrays." + (x,X,XX,m,mx,mX,mXX,m2x,m2X,m2XX) = self.d + assert_almost_equal(mX.var(axis=None),mX.compressed().var()) + assert_almost_equal(mX.std(axis=None),mX.compressed().std()) + assert_equal(mXX.var(axis=3).shape,XX.var(axis=3).shape) + assert_equal(mX.var().shape,X.var().shape) + (mXvar0,mXvar1) = (mX.var(axis=0), mX.var(axis=1)) + for k in range(6): + assert_almost_equal(mXvar1[k],mX[k].compressed().var()) + assert_almost_equal(mXvar0[k],mX[:,k].compressed().var()) + assert_almost_equal(N.sqrt(mXvar0[k]), mX[:,k].compressed().std()) + + def test_argmin(self): + "Tests argmin & argmax on MaskedArrays." + (x,X,XX,m,mx,mX,mXX,m2x,m2X,m2XX) = self.d + # + assert_equal(mx.argmin(),35) + assert_equal(mX.argmin(),35) + assert_equal(m2x.argmin(),4) + assert_equal(m2X.argmin(),4) + assert_equal(mx.argmax(),28) + assert_equal(mX.argmax(),28) + assert_equal(m2x.argmax(),31) + assert_equal(m2X.argmax(),31) + # + assert_equal(mX.argmin(0), [2,2,2,5,0,5]) + assert_equal(m2X.argmin(0), [2,2,4,5,0,4]) + assert_equal(mX.argmax(0), [0,5,0,5,4,0]) + assert_equal(m2X.argmax(0), [5,5,0,5,1,0]) + # + assert_equal(mX.argmin(1), [4,1,0,0,5,5,]) + assert_equal(m2X.argmin(1), [4,4,0,0,5,3]) + assert_equal(mX.argmax(1), [2,4,1,1,4,1]) + assert_equal(m2X.argmax(1), [2,4,1,1,1,1]) + + def check_put(self): + "Tests put." + d = arange(5) + n = [0,0,0,1,1] + m = make_mask(n) + x = array(d, mask = m) + assert( x[3] is masked) + assert( x[4] is masked) + x[[1,4]] = [10,40] + assert( x.mask is not m) + assert( x[3] is masked) + assert( x[4] is not masked) + assert_equal(x, [0,10,2,-1,40]) + # + x = masked_array(arange(10), mask=[1,0,0,0,0]*2) + i = [0,2,4,6] + x.put(i, [6,4,2,0]) + assert_equal(x, asarray([6,1,4,3,2,5,0,7,8,9,])) + assert_equal(x.mask, [0,0,0,0,0,1,0,0,0,0]) + x.put(i, masked_array([0,2,4,6],[1,0,1,0])) + assert_array_equal(x, [0,1,2,3,4,5,6,7,8,9,]) + assert_equal(x.mask, [1,0,0,0,1,1,0,0,0,0]) + # + x = masked_array(arange(10), mask=[1,0,0,0,0]*2) + put(x, i, [6,4,2,0]) + assert_equal(x, asarray([6,1,4,3,2,5,0,7,8,9,])) + assert_equal(x.mask, [0,0,0,0,0,1,0,0,0,0]) + put(x, i, masked_array([0,2,4,6],[1,0,1,0])) + assert_array_equal(x, [0,1,2,3,4,5,6,7,8,9,]) + assert_equal(x.mask, [1,0,0,0,1,1,0,0,0,0]) + + def check_take(self): + "Tests take" + x = masked_array([10,20,30,40],[0,1,0,1]) + assert_equal(x.take([0,0,3]), masked_array([10, 10, 40], [0,0,1]) ) + assert_equal(x.take([0,0,3]), x[[0,0,3]]) + assert_equal(x.take([[0,1],[0,1]]), + masked_array([[10,20],[10,20]], [[0,1],[0,1]]) ) + # + x = array([[10,20,30],[40,50,60]], mask=[[0,0,1],[1,0,0,]]) + assert_equal(x.take([0,2], axis=1), + array([[10,30],[40,60]], mask=[[0,1],[1,0]])) + assert_equal(take(x, [0,2], axis=1), + array([[10,30],[40,60]], mask=[[0,1],[1,0]])) + #........................ + def check_anyall(self): + """Checks the any/all methods/functions.""" + x = N.array([[ 0.13, 0.26, 0.90], + [ 0.28, 0.33, 0.63], + [ 0.31, 0.87, 0.70]]) + m = N.array([[ True, False, False], + [False, False, False], + [True, True, False]], dtype=N.bool_) + mx = masked_array(x, mask=m) + xbig = N.array([[False, False, True], + [False, False, True], + [False, True, True]], dtype=N.bool_) + mxbig = (mx > 0.5) + mxsmall = (mx < 0.5) + # + assert (mxbig.all()==False) + assert (mxbig.any()==True) + assert_equal(mxbig.all(0),[False, False, True]) + assert_equal(mxbig.all(1), [False, False, True]) + assert_equal(mxbig.any(0),[False, False, True]) + assert_equal(mxbig.any(1), [True, True, True]) + # + assert (mxsmall.all()==False) + assert (mxsmall.any()==True) + assert_equal(mxsmall.all(0), [True, True, False]) + assert_equal(mxsmall.all(1), [False, False, False]) + assert_equal(mxsmall.any(0), [True, True, False]) + assert_equal(mxsmall.any(1), [True, True, False]) + # + X = N.matrix(x) + mX = masked_array(X, mask=m) + mXbig = (mX > 0.5) + mXsmall = (mX < 0.5) + # + assert (mXbig.all()==False) + assert (mXbig.any()==True) + assert_equal(mXbig.all(0), N.matrix([False, False, True])) + assert_equal(mXbig.all(1), N.matrix([False, False, True]).T) + assert_equal(mXbig.any(0), N.matrix([False, False, True])) + assert_equal(mXbig.any(1), N.matrix([ True, True, True]).T) + # + assert (mXsmall.all()==False) + assert (mXsmall.any()==True) + assert_equal(mXsmall.all(0), N.matrix([True, True, False])) + assert_equal(mXsmall.all(1), N.matrix([False, False, False]).T) + assert_equal(mXsmall.any(0), N.matrix([True, True, False])) + assert_equal(mXsmall.any(1), N.matrix([True, True, False]).T) + + def check_keepmask(self): + "Tests the keep mask flag" + x = masked_array([1,2,3], mask=[1,0,0]) + mx = masked_array(x) + assert_equal(mx.mask, x.mask) + mx = masked_array(x, mask=[0,1,0], keep_mask=False) + assert_equal(mx.mask, [0,1,0]) + mx = masked_array(x, mask=[0,1,0], keep_mask=True) + assert_equal(mx.mask, [1,1,0]) + #We default to true + mx = masked_array(x, mask=[0,1,0]) + assert_equal(mx.mask, [1,1,0]) +#.............................................................................. + +#.............................................................................. + +class test_subclassing(NumpyTestCase): + """Test suite for masked subclasses of ndarray.""" + + class SubArray(N.ndarray): + """Defines a generic N.ndarray subclass, that stores some metadata + in the dictionary `info`.""" + def __new__(cls,arr,info={}): + x = N.array(arr).view(cls) + x.info = info + return x + def __array_finalize__(self, obj): + if hasattr(obj,'info'): + self.info = obj.info + return + + def check_subclassing(self): + "Tests whether the subclass is kept." + x = N.arange(5) + m = [0,0,1,0,0] + xsub = test_subclassing.SubArray(x) + xmsub = masked_array(xsub, mask=m) + assert isinstance(xmsub, MaskedArray) + assert_equal(xmsub._data, xsub) + assert isinstance(xmsub._data, test_subclassing.SubArray) + +############################################################################### +#------------------------------------------------------------------------------ +if __name__ == "__main__": + NumpyTest().run() \ No newline at end of file Property changes on: trunk/Lib/sandbox/maskedarray/tests/test_core.py ___________________________________________________________________ Name: svn:keywords + Date Author Revision Id Added: trunk/Lib/sandbox/maskedarray/tests/test_extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_extras.py 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/tests/test_extras.py 2006-12-15 00:59:51 UTC (rev 2412) @@ -0,0 +1,105 @@ +# pylint: disable-msg=W0611, W0612, W0511 +"""Tests suite for MaskedArray. +Adapted from the original test_ma by Pierre Gerard-Marchant + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id$ +""" +__author__ = "Pierre GF Gerard-Marchant ($Author$)" +__version__ = '1.0' +__revision__ = "$Revision$" +__date__ = '$Date$' + +import numpy as N +from numpy.testing import NumpyTest, NumpyTestCase +from numpy.testing.utils import build_err_msg + +import maskedarray.testutils +reload(maskedarray.testutils) +from maskedarray.testutils import * + +import maskedarray.core +reload(maskedarray.core) +from maskedarray.core import * +import maskedarray.extras +reload(maskedarray.extras) +from maskedarray.extras import * + + +class test_concatenator(NumpyTestCase): + "Tests for mr_, the equivalent of r_ for masked arrays." + def check_1d(self): + "Tests mr_ on 1D arrays." + assert_array_equal(mr_[1,2,3,4,5,6],array([1,2,3,4,5,6])) + b = ones(5) + m = [1,0,0,0,0] + d = masked_array(b,mask=m) + c = mr_[d,0,0,d] + assert(isinstance(c,MaskedArray) or isinstance(c,core.MaskedArray)) + assert_array_equal(c,[1,1,1,1,1,0,0,1,1,1,1,1]) + assert_array_equal(c.mask, mr_[m,0,0,m]) + + def check_2d(self): + "Tests mr_ on 2D arrays." + a_1 = rand(5,5) + a_2 = rand(5,5) + m_1 = N.round_(rand(5,5),0) + m_2 = N.round_(rand(5,5),0) + b_1 = masked_array(a_1,mask=m_1) + b_2 = masked_array(a_2,mask=m_2) + d = mr_['1',b_1,b_2] # append columns + assert(d.shape == (5,10)) + assert_array_equal(d[:,:5],b_1) + assert_array_equal(d[:,5:],b_2) + assert_array_equal(d.mask, N.r_['1',m_1,m_2]) + d = mr_[b_1,b_2] + assert(d.shape == (10,5)) + assert_array_equal(d[:5,:],b_1) + assert_array_equal(d[5:,:],b_2) + assert_array_equal(d.mask, N.r_[m_1,m_2]) + +class test_notmasked(NumpyTestCase): + "Tests notmasked_edges and notmasked_contiguous." + def check_edges(self): + "Tests unmasked_edges" + a = masked_array(N.arange(24).reshape(3,8), + mask=[[0,0,0,0,1,1,1,0], + [1,1,1,1,1,1,1,1], + [0,0,0,0,0,0,1,0],]) + # + assert_equal(notmasked_edges(a, None), [0,23]) + # + tmp = notmasked_edges(a, 0) + assert_equal(tmp[0], (array([0,0,0,0,2,2,0]), array([0,1,2,3,4,5,7]))) + assert_equal(tmp[1], (array([2,2,2,2,2,2,2]), array([0,1,2,3,4,5,7]))) + # + tmp = notmasked_edges(a, 1) + assert_equal(tmp[0], (array([0,2,]), array([0,0]))) + assert_equal(tmp[1], (array([0,2,]), array([7,7]))) + + def check_contiguous(self): + "Tests notmasked_contiguous" + a = masked_array(N.arange(24).reshape(3,8), + mask=[[0,0,0,0,1,1,1,1], + [1,1,1,1,1,1,1,1], + [0,0,0,0,0,0,1,0],]) + tmp = notmasked_contiguous(a, None) + assert_equal(tmp[-1], (6, (16,21))) + assert_equal(tmp[-2], (4, (0,3))) + # + tmp = notmasked_contiguous(a, 0) + assert(len(tmp[-1]) == 1) + assert(tmp[-2] is None) + assert_equal(tmp[-3],tmp[-1]) + assert(len(tmp[0]) == 2) + # + tmp = notmasked_contiguous(a, 1) + assert_equal(tmp[0][-1], (4, (0,3))) + assert(tmp[1] is None) + assert_equal(tmp[2][-1], (6, (0,5))) + +############################################################################### +#------------------------------------------------------------------------------ +if __name__ == "__main__": + NumpyTest().run() \ No newline at end of file Property changes on: trunk/Lib/sandbox/maskedarray/tests/test_extras.py ___________________________________________________________________ Name: svn:keywords + Date Author Revision Id Modified: trunk/Lib/sandbox/maskedarray/testutils.py =================================================================== --- trunk/Lib/sandbox/maskedarray/testutils.py 2006-12-14 16:31:20 UTC (rev 2411) +++ trunk/Lib/sandbox/maskedarray/testutils.py 2006-12-15 00:59:51 UTC (rev 2412) @@ -2,12 +2,12 @@ :author: Pierre Gerard-Marchant :contact: pierregm_at_uga_dot_edu -:version: $Id: testutils.py 14 2006-12-04 19:31:13Z pierregm $ +:version: $Id$ """ -__author__ = "Pierre GF Gerard-Marchant ($Author: pierregm $)" +__author__ = "Pierre GF Gerard-Marchant ($Author$)" __version__ = "1.0" -__revision__ = "$Revision: 14 $" -__date__ = "$Date: 2006-12-04 14:31:13 -0500 (Mon, 04 Dec 2006) $" +__revision__ = "$Revision$" +__date__ = "$Date$" import numpy as N Property changes on: trunk/Lib/sandbox/maskedarray/testutils.py ___________________________________________________________________ Name: svn:keywords + Date Author Revision Id From scipy-svn at scipy.org Thu Dec 14 23:42:27 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 14 Dec 2006 22:42:27 -0600 (CST) Subject: [Scipy-svn] r2413 - trunk/Lib/sandbox/maskedarray Message-ID: <20061215044227.52E1D39C00F@new.scipy.org> Author: pierregm Date: 2006-12-14 22:42:25 -0600 (Thu, 14 Dec 2006) New Revision: 2413 Removed: trunk/Lib/sandbox/maskedarray/src/ Modified: trunk/Lib/sandbox/maskedarray/core.py Log: Modified: trunk/Lib/sandbox/maskedarray/core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/core.py 2006-12-15 00:59:51 UTC (rev 2412) +++ trunk/Lib/sandbox/maskedarray/core.py 2006-12-15 04:42:25 UTC (rev 2413) @@ -46,7 +46,7 @@ 'minimum', 'multiply', 'negative', 'nomask', 'nonzero', 'not_equal', 'ones', 'outer', 'outerproduct', - 'product', 'ptp', 'put', 'putmask', + 'power', 'product', 'ptp', 'put', 'putmask', 'rank', 'ravel', 'remainder', 'repeat', 'reshape', 'resize', 'right_shift', 'round_', 'shape', 'sin', 'sinh', 'size', 'sometrue', 'sort', 'sqrt', 'std', From scipy-svn at scipy.org Fri Dec 15 01:39:40 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 00:39:40 -0600 (CST) Subject: [Scipy-svn] r2414 - trunk/Lib/sandbox/models Message-ID: <20061215063940.E425439C03F@new.scipy.org> Author: timl Date: 2006-12-15 00:39:30 -0600 (Fri, 15 Dec 2006) New Revision: 2414 Modified: trunk/Lib/sandbox/models/bspline_module.py Log: fix weave build error in models Modified: trunk/Lib/sandbox/models/bspline_module.py =================================================================== --- trunk/Lib/sandbox/models/bspline_module.py 2006-12-15 04:42:25 UTC (rev 2413) +++ trunk/Lib/sandbox/models/bspline_module.py 2006-12-15 06:39:30 UTC (rev 2414) @@ -128,7 +128,7 @@ eval_ext_code = ''' - int dim[2] = {upper-lower, Nx[0]}; + npy_intp dim[2] = {upper-lower, Nx[0]}; PyArrayObject *basis; double *data; @@ -252,7 +252,7 @@ gram_ext_code = ''' - int dim[2] = {Nknots[0]-m, m}; + npy_intp dim[2] = {Nknots[0]-m, m}; double *data; PyArrayObject *gram; @@ -315,7 +315,7 @@ invband_ext_code = ''' - int dim[2] = {NL[0], NL[1]}; + npy_intp dim[2] = {NL[0], NL[1]}; int i, j; double *data; PyArrayObject *invband; From scipy-svn at scipy.org Fri Dec 15 01:54:33 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 00:54:33 -0600 (CST) Subject: [Scipy-svn] r2415 - trunk/Lib/sandbox/models Message-ID: <20061215065433.D48DC39C0A1@new.scipy.org> Author: timl Date: 2006-12-15 00:54:23 -0600 (Fri, 15 Dec 2006) New Revision: 2415 Modified: trunk/Lib/sandbox/models/gam.py Log: change tabs to spaces Modified: trunk/Lib/sandbox/models/gam.py =================================================================== --- trunk/Lib/sandbox/models/gam.py 2006-12-15 06:39:30 UTC (rev 2414) +++ trunk/Lib/sandbox/models/gam.py 2006-12-15 06:54:23 UTC (rev 2415) @@ -11,21 +11,21 @@ # taken form smooth.spline in R print "herenow" if n < 50: - nknots = n + nknots = n else: - a1 = N.log(50) / N.log(2) - a2 = N.log(100) / N.log(2) - a3 = N.log(140) / N.log(2) - a4 = N.log(200) / N.log(2) - if n < 200: - nknots = 2**(a1 + (a2 - a1) * (n - 50)/150.) - elif n < 800: - nknots = 2**(a2 + (a3 - a2) * (n - 200)/600.) - elif n < 3200: - nknots = 2**(a3 + (a4 - a3) * (n - 800)/2400.) - else: - nknots = 200 + (n - 3200.)**0.2 - knots = _x[N.linspace(0, n-1, nknots).astype(N.int32)] + a1 = N.log(50) / N.log(2) + a2 = N.log(100) / N.log(2) + a3 = N.log(140) / N.log(2) + a4 = N.log(200) / N.log(2) + if n < 200: + nknots = 2**(a1 + (a2 - a1) * (n - 50)/150.) + elif n < 800: + nknots = 2**(a2 + (a3 - a2) * (n - 200)/600.) + elif n < 3200: + nknots = 2**(a3 + (a4 - a3) * (n - 800)/2400.) + else: + nknots = 200 + (n - 3200.)**0.2 + knots = _x[N.linspace(0, n-1, nknots).astype(N.int32)] s = SmoothingSpline(knots) s.gram(d=2) s.target_df = 5 @@ -34,46 +34,46 @@ class offset: def __init__(self, fn, offset): - self.fn = fn - self.offset = offset - + self.fn = fn + self.offset = offset + def __call__(self, *args, **kw): - return self.fn(*args, **kw) + offset + return self.fn(*args, **kw) + offset class results: def __init__(self, Y, alpha, design, smoothers, family, offset): - self.Y = Y - self.alpha = alpha - self.smoothers = smoothers - self.offset = offset - self.family = family - self.design = design - self.offset = offset - self.mu = self(design) + self.Y = Y + self.alpha = alpha + self.smoothers = smoothers + self.offset = offset + self.family = family + self.design = design + self.offset = offset + self.mu = self(design) def __call__(self, design): - return self.family.link.inverse(self.predict(design)) + return self.family.link.inverse(self.predict(design)) def predict(self, design): - return N.sum(self.smoothed(design), axis=0) + self.alpha + return N.sum(self.smoothed(design), axis=0) + self.alpha def smoothed(self, design): - return N.array([self.smoothers[i](design[:,i]) + self.offset[i] for i in range(design.shape[1])]) + return N.array([self.smoothers[i](design[:,i]) + self.offset[i] for i in range(design.shape[1])]) class additive_model: def __init__(self, design, smoothers=None, weights=None): - self.design = design - if weights is not None: - self.weights = weights - else: - self.weights = N.ones(self.design.shape[0]) + self.design = design + if weights is not None: + self.weights = weights + else: + self.weights = N.ones(self.design.shape[0]) - self.smoothers = smoothers or [default_smoother(design[:,i]) for i in range(design.shape[1])] - for i in range(design.shape[1]): - self.smoothers[i].df = 10 - self.family = family.Gaussian() + self.smoothers = smoothers or [default_smoother(design[:,i]) for i in range(design.shape[1])] + for i in range(design.shape[1]): + self.smoothers[i].df = 10 + self.family = family.Gaussian() def __iter__(self): self.iter = 0 @@ -81,82 +81,82 @@ return self def next(self): - _results = self.results; Y = self.results.Y - mu = _results.predict(self.design) - offset = N.zeros(self.design.shape[1], N.float64) - alpha = (Y * self.weights).sum() / self.weights.sum() - for i in range(self.design.shape[1]): - tmp = self.smoothers[i](self.design[:,i]) - self.smoothers[i].smooth(Y - alpha - mu + tmp, x=self.design[:,i], + _results = self.results; Y = self.results.Y + mu = _results.predict(self.design) + offset = N.zeros(self.design.shape[1], N.float64) + alpha = (Y * self.weights).sum() / self.weights.sum() + for i in range(self.design.shape[1]): + tmp = self.smoothers[i](self.design[:,i]) + self.smoothers[i].smooth(Y - alpha - mu + tmp, x=self.design[:,i], weights=self.weights) - tmp2 = self.smoothers[i](self.design[:,i]) - offset[i] = -(tmp2*self.weights).sum() / self.weights.sum() - mu += tmp2 - tmp + tmp2 = self.smoothers[i](self.design[:,i]) + offset[i] = -(tmp2*self.weights).sum() / self.weights.sum() + mu += tmp2 - tmp - return results(Y, alpha, self.design, self.smoothers, self.family, offset) + return results(Y, alpha, self.design, self.smoothers, self.family, offset) def cont(self, tol=1.0e-02): - curdev = (((self.results.Y - self.results.predict(self.design))**2) * self.weights).sum() + curdev = (((self.results.Y - self.results.predict(self.design))**2) * self.weights).sum() if N.fabs((self.dev - curdev) / curdev) < tol: - self.dev = curdev - return False + self.dev = curdev + return False - self.iter += 1 + self.iter += 1 self.dev = curdev return True def df_resid(self): - return self.results.Y.shape[0] - N.array([self.smoothers[i].df_fit() for i in range(self.design.shape[1])]).sum() + return self.results.Y.shape[0] - N.array([self.smoothers[i].df_fit() for i in range(self.design.shape[1])]).sum() def estimate_scale(self): - return ((self.results.Y - self.results(self.design))**2).sum() / self.df_resid() + return ((self.results.Y - self.results(self.design))**2).sum() / self.df_resid() def fit(self, Y): - iter(self) - mu = 0 - alpha = (Y * self.weights).sum() / self.weights.sum() + iter(self) + mu = 0 + alpha = (Y * self.weights).sum() / self.weights.sum() - offset = N.zeros(self.design.shape[1], N.float64) + offset = N.zeros(self.design.shape[1], N.float64) - for i in range(self.design.shape[1]): - self.smoothers[i].smooth(Y - alpha - mu, x=self.design[:,i], + for i in range(self.design.shape[1]): + self.smoothers[i].smooth(Y - alpha - mu, x=self.design[:,i], weights=self.weights) - tmp = self.smoothers[i](self.design[:,i]) - offset[i] = (tmp * self.weights).sum() / self.weights.sum() - tmp -= tmp.sum() - mu += tmp + tmp = self.smoothers[i](self.design[:,i]) + offset[i] = (tmp * self.weights).sum() / self.weights.sum() + tmp -= tmp.sum() + mu += tmp - self.results = results(Y, alpha, self.design, self.smoothers, self.family, offset) + self.results = results(Y, alpha, self.design, self.smoothers, self.family, offset) - while self.cont(): - self.results = self.next() + while self.cont(): + self.results = self.next() - return self.results + return self.results class model(glm, additive_model): niter = 10 def __init__(self, design, smoothers=None, family=family.Gaussian()): - glm.__init__(self, design, family=family) - additive_model.__init__(self, design, smoothers=smoothers) - self.family = family + glm.__init__(self, design, family=family) + additive_model.__init__(self, design, smoothers=smoothers) + self.family = family def next(self): - _results = self.results; Y = _results.Y - _results.mu = self.family.link.inverse(_results.predict(self.design)) + _results = self.results; Y = _results.Y + _results.mu = self.family.link.inverse(_results.predict(self.design)) self.weights = self.family.weights(_results.mu) Z = _results.predict(self.design) + self.family.link.deriv(_results.mu) * (Y - _results.mu) - m = additive_model(self.design, smoothers=self.smoothers, weights=self.weights) - _results = m.fit(Z) - _results.Y = Y - _results.mu = self.family.link.inverse(_results.predict(self.design)) + m = additive_model(self.design, smoothers=self.smoothers, weights=self.weights) + _results = m.fit(Z) + _results.Y = Y + _results.mu = self.family.link.inverse(_results.predict(self.design)) self.iter += 1 - self.results = _results + self.results = _results - return _results + return _results def estimate_scale(self, Y=None): """ @@ -172,10 +172,10 @@ self.Y = N.asarray(Y, N.float64) iter(self) - alpha = self.Y.mean() - Z = self.family.link(alpha) + self.family.link.deriv(alpha) * (Y - alpha) - m = additive_model(self.design, smoothers=self.smoothers) - self.results = m.fit(Z) + alpha = self.Y.mean() + Z = self.family.link(alpha) + self.family.link.deriv(alpha) * (Y - alpha) + m = additive_model(self.design, smoothers=self.smoothers) + self.results = m.fit(Z) self.results.mu = self.family.link.inverse(self.results.predict(self.design)) self.results.Y = Y From scipy-svn at scipy.org Fri Dec 15 05:52:33 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 04:52:33 -0600 (CST) Subject: [Scipy-svn] r2416 - trunk/Lib/sandbox/models Message-ID: <20061215105233.5C68939C06B@new.scipy.org> Author: timl Date: 2006-12-15 04:52:28 -0600 (Fri, 15 Dec 2006) New Revision: 2416 Modified: trunk/Lib/sandbox/models/formula.py Log: support passing a namespace argument to formulas in models Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-12-15 06:54:23 UTC (rev 2415) +++ trunk/Lib/sandbox/models/formula.py 2006-12-15 10:52:28 UTC (rev 2416) @@ -309,6 +309,7 @@ """ + self.__namespace = namespace if isinstance(termlist, formula): self.terms = copy.copy(list(termlist.terms)) @@ -339,12 +340,18 @@ argument 'n' indicates the number of rows (observations). """ + if 'namespace' in kw: + namespace = kw['namespace'] + else: + namespace = self.namespace + + allvals = [] intercept = False iindex = 0 for t in self.terms: - t.namespace = self.namespace + t.namespace = namespace val = t(*args, **kw) isintercept = False From scipy-svn at scipy.org Fri Dec 15 05:55:16 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 04:55:16 -0600 (CST) Subject: [Scipy-svn] r2417 - trunk/Lib/sandbox/models Message-ID: <20061215105516.AF4FE39C111@new.scipy.org> Author: timl Date: 2006-12-15 04:55:10 -0600 (Fri, 15 Dec 2006) New Revision: 2417 Modified: trunk/Lib/sandbox/models/model.py trunk/Lib/sandbox/models/regression.py trunk/Lib/sandbox/models/survival.py Log: change some class names from CamelCase to under_score Modified: trunk/Lib/sandbox/models/model.py =================================================================== --- trunk/Lib/sandbox/models/model.py 2006-12-15 10:52:28 UTC (rev 2416) +++ trunk/Lib/sandbox/models/model.py 2006-12-15 10:55:10 UTC (rev 2417) @@ -41,7 +41,7 @@ """ raise NotImplementedError -class LikelihoodModel(Model): +class likelihood_model(Model): def logL(self, theta): """ Modified: trunk/Lib/sandbox/models/regression.py =================================================================== --- trunk/Lib/sandbox/models/regression.py 2006-12-15 10:52:28 UTC (rev 2416) +++ trunk/Lib/sandbox/models/regression.py 2006-12-15 10:55:10 UTC (rev 2417) @@ -1,10 +1,10 @@ import numpy as N import numpy.linalg as L import scipy.linalg -from scipy.sandbox.models.model import LikelihoodModel, LikelihoodModelResults +from scipy.sandbox.models.model import likelihood_model, LikelihoodModelResults from scipy.sandbox.models import utils -class ols_model(LikelihoodModel): +class ols_model(likelihood_model): """ A simple ordinary least squares model. @@ -14,7 +14,7 @@ return -scipy.linalg.norm(self.whiten(Y) - N.dot(self.wdesign, b))**2 / 2. def __init__(self, design): - LikelihoodModel.__init__(self) + likelihood_model.__init__(self) self.initialize(design) def initialize(self, design): Modified: trunk/Lib/sandbox/models/survival.py =================================================================== --- trunk/Lib/sandbox/models/survival.py 2006-12-15 10:52:28 UTC (rev 2416) +++ trunk/Lib/sandbox/models/survival.py 2006-12-15 10:55:10 UTC (rev 2417) @@ -1,18 +1,18 @@ import numpy as N -class SurvivalTime: +class survival_time: def __init__(self, time, delta): self.time, self.delta = time, delta def atrisk(self, time): raise NotImplementedError -class RightCensored(SurvivalTime): +class right_censored(survival_time): def atrisk(self, time): return N.less_equal.outer(time, self.time) -class LeftCensored(SurvivalTime): +class left_censored(survival_time): def atrisk(self, time): return N.greater_equal.outer(time, self.time) From scipy-svn at scipy.org Fri Dec 15 05:59:13 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 04:59:13 -0600 (CST) Subject: [Scipy-svn] r2418 - trunk/Lib/sandbox/models Message-ID: <20061215105913.7652139C069@new.scipy.org> Author: timl Date: 2006-12-15 04:59:09 -0600 (Fri, 15 Dec 2006) New Revision: 2418 Modified: trunk/Lib/sandbox/models/mixed.py Log: code cleanups to mixed models Modified: trunk/Lib/sandbox/models/mixed.py =================================================================== --- trunk/Lib/sandbox/models/mixed.py 2006-12-15 10:55:10 UTC (rev 2417) +++ trunk/Lib/sandbox/models/mixed.py 2006-12-15 10:59:09 UTC (rev 2418) @@ -42,15 +42,14 @@ self.n = v.shape[0] return v - def compute_S(self, D, sigma): + def _compute_S(self, D, sigma): """ Display (3.3) from Laird, Lange, Stram (see help(Unit)) """ - self.S = (N.identity(self.n) * sigma**2 + - N.dot(self.Z, N.dot(D, N.transpose(self.Z)))) + N.dot(self.Z, N.dot(D, self.Z.T))) - def compute_W(self): + def _compute_W(self): """ Display (3.2) from Laird, Lange, Stram (see help(Unit)) """ @@ -61,19 +60,19 @@ Display (3.10) from Laird, Lange, Stram (see help(Unit)) """ t = N.dot(self.W, self.X) - self.P = self.W - N.dot(N.dot(t, Sinv), N.transpose(t)) + self.P = self.W - N.dot(N.dot(t, Sinv), t.T) - def compute_r(self, alpha): + def _compute_r(self, alpha): """ Display (3.5) from Laird, Lange, Stram (see help(Unit)) """ self.r = self.Y - N.dot(self.X, alpha) - def compute_b(self, D): + def _compute_b(self, D): """ Display (3.4) from Laird, Lange, Stram (see help(Unit)) """ - self.b = N.dot(D, N.dot(N.dot(N.transpose(self.Z), self.W), self.r)) + self.b = N.dot(D, N.dot(N.dot(self.Z.T, self.W), self.r)) def fit(self, a, D, sigma): """ @@ -83,10 +82,10 @@ Displays (3.2)-(3.5). """ - self.compute_S(D, sigma) - self.compute_W() - self.compute_r(a) - self.compute_b(D) + self._compute_S(D, sigma) + self._compute_W() + self._compute_r(a) + self._compute_b(D) def compute_xtwy(self): """ @@ -98,7 +97,7 @@ """ Utility function to compute X^tWX for Unit instance. """ - return N.dot(N.dot(N.transpose(self.X), self.W), self.X) + return N.dot(N.dot(self.X.T, self.W), self.X) def cov_random(self, D, Sinv=None): """ @@ -108,7 +107,7 @@ if Sinv is not None: self.compute_P(Sinv) t = N.dot(self.Z, D) - return D - N.dot(N.dot(N.transpose(t), self.P), t) + return D - N.dot(N.dot(t.T, self.P), t) def logL(self, a, ML=False): """ @@ -166,8 +165,7 @@ self.a = N.zeros(self.p, N.float64) # Determine size of D, and sensible initial estimates - # of sigma and D - + # of sigma and D d = self.units[0].design(self.random) self.q = d.shape[1] # d.shape = q @@ -176,7 +174,7 @@ self.dev = N.inf - def compute_a(self): + def _compute_a(self): """ Display (3.1) of Laird, Lange, Stram (see help(Mixed)). @@ -192,7 +190,7 @@ self.Sinv = L.pinv(S) self.a = N.dot(self.Sinv, Y) - def compute_sigma(self, ML=False): + def _compute_sigma(self, ML=False): """ Estimate sigma. If ML is True, return the ML estimate of sigma, else return the REML estimate. @@ -214,7 +212,7 @@ self.sigma**2 * W) self.sigma = N.sqrt(sigmasq / self.N) - def compute_D(self, ML=False): + def _compute_D(self, ML=False): """ Estimate random effects covariance D. If ML is True, return the ML estimate of sigma, @@ -233,7 +231,7 @@ W = unit.P D += N.multiply.outer(unit.b, unit.b) t = N.dot(unit.Z, self.D) - D += self.D - N.dot(N.dot(N.transpose(t), W), t) + D += self.D - N.dot(N.dot(t.T, W), t) self.D = D / self.m @@ -245,7 +243,7 @@ return self.Sinv def deviance(self, ML=False): - return - 2 * self.logL(ML=ML) + return -2 * self.logL(ML=ML) def logL(self, ML=False): """ @@ -263,8 +261,8 @@ S = 0 Y = 0 for unit in self.units: - S += N.dot(N.transpose(unit.X), unit.X) - Y += N.dot(N.transpose(unit.X), unit.Y) + S += N.dot(unit.X.T, unit.X) + Y += N.dot(unit.X.T, unit.Y) self.a = L.lstsq(S, Y)[0] @@ -280,10 +278,10 @@ unit.b = L.lstsq(Z, unit.r)[0] sigmasq += (N.power(unit.Y, 2).sum() - - (self.a * N.dot(N.transpose(unit.X), unit.Y)).sum() - - (unit.b * N.dot(N.transpose(unit.Z), unit.r)).sum()) + (self.a * N.dot(unit.X.T, unit.Y)).sum() - + (unit.b * N.dot(unit.Z.T, unit.r)).sum()) D += N.multiply.outer(unit.b, unit.b) - t += L.pinv(N.dot(N.transpose(unit.Z), unit.Z)) + t += L.pinv(N.dot(unit.Z.T, unit.Z)) sigmasq /= (self.N - (self.m - 1) * self.q - self.p) self.sigma = N.sqrt(sigmasq) @@ -299,9 +297,9 @@ def fit(self, niter=100, ML=False): for i in range(niter): - self.compute_a() - self.compute_sigma(ML=ML) - self.compute_D(ML=ML) + self._compute_a() + self._compute_sigma(ML=ML) + self._compute_D(ML=ML) if not self.cont(ML=ML): break @@ -314,10 +312,10 @@ n = 3 - import formula - fixed = formula.term('f') - random = formula.term('r') - response = formula.term('y') + from scipy.sandbox.models.formula import term + fixed = term('f') + random = term('r') + response = term('y') for i in range(nsubj): d = R.standard_normal() @@ -326,9 +324,11 @@ Y = R.standard_normal((n,)) + d * 4 units.append(Unit({'f':X, 'r':Z, 'y':Y})) - m = Mixed(units, response)#, fixed, random) + #m = Mixed(units, response)#, fixed, random) + m = Mixed(units, response, fixed, random) m.initialize() m.fit() + print m.a ## a = Unit() ## a['x'] = N.array([2,3]) From scipy-svn at scipy.org Fri Dec 15 08:36:50 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 07:36:50 -0600 (CST) Subject: [Scipy-svn] r2419 - in trunk/Lib/sandbox/cdavid: . tests Message-ID: <20061215133650.5D3F239C12E@new.scipy.org> Author: cdavid Date: 2006-12-15 07:36:44 -0600 (Fri, 15 Dec 2006) New Revision: 2419 Modified: trunk/Lib/sandbox/cdavid/autocorr.py trunk/Lib/sandbox/cdavid/tests/test_autocorr.py trunk/Lib/sandbox/cdavid/tests/test_segmentaxis.py Log: One day, maybe, I will be able to get nextpow2 right.... Modified: trunk/Lib/sandbox/cdavid/autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/autocorr.py 2006-12-15 10:59:09 UTC (rev 2418) +++ trunk/Lib/sandbox/cdavid/autocorr.py 2006-12-15 13:36:44 UTC (rev 2419) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Dec 12 07:00 PM 2006 J +# Last Change: Fri Dec 15 10:00 PM 2006 J # TODO: - proper test # TODO: - proper profiling @@ -281,10 +281,11 @@ def nextpow2(n): """Returns p such as 2 ** p >= n """ - if 2 ** N.log2(n) == n: - return N.floor(N.log2(n)) + 1 + p = N.floor(N.log2(n)) + if 2 ** p == n: + return p else: - return N.floor(N.log2(n)) + 2 + return p + 1 def autocorr_fft(signal, axis = -1): """Return full autocorrelation along specified axis. Use fft Modified: trunk/Lib/sandbox/cdavid/tests/test_autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-12-15 10:59:09 UTC (rev 2418) +++ trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-12-15 13:36:44 UTC (rev 2419) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Dec 12 07:00 PM 2006 J +# Last Change: Fri Dec 15 10:00 PM 2006 J from numpy.testing import * from numpy.random import randn, seed @@ -12,7 +12,7 @@ set_package_path() from cdavid.autocorr import _raw_autocorr_1d, _raw_autocorr_1d_noncontiguous from cdavid.autocorr import autocorr_oneside_nofft as autocorr -from cdavid.autocorr import autocorr_fft +from cdavid.autocorr import autocorr_fft , nextpow2 from cdavid.autocorr import _autocorr_oneside_nofft_py as autocorr_py restore_path() Modified: trunk/Lib/sandbox/cdavid/tests/test_segmentaxis.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_segmentaxis.py 2006-12-15 10:59:09 UTC (rev 2418) +++ trunk/Lib/sandbox/cdavid/tests/test_segmentaxis.py 2006-12-15 13:36:44 UTC (rev 2419) @@ -1,12 +1,12 @@ #! /usr/bin/env python -# Last Change: Fri Nov 24 04:00 PM 2006 J +# Last Change: Fri Dec 15 10:00 PM 2006 J from numpy.testing import * import numpy as N set_package_path() -from segmentaxis import segment_axis +from cdavid.segmentaxis import segment_axis restore_path() # #Optional: From scipy-svn at scipy.org Fri Dec 15 11:03:08 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 10:03:08 -0600 (CST) Subject: [Scipy-svn] r2420 - in trunk/Lib/io: . tests Message-ID: <20061215160308.D7B5E39C03E@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-12-15 10:03:04 -0600 (Fri, 15 Dec 2006) New Revision: 2420 Modified: trunk/Lib/io/recaster.py trunk/Lib/io/tests/test_recaster.py Log: More complete reworking of recaster with more options, tests adapted Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-12-15 13:36:44 UTC (rev 2419) +++ trunk/Lib/io/recaster.py 2006-12-15 16:03:04 UTC (rev 2420) @@ -48,61 +48,121 @@ Initialization specifies acceptable types (ATs) - Implements downcast and recast method - returns array that may be - of different storage type to the input array, where the new type - is one of the ATs. Downcast forces return array to be same size or - smaller than the input. recast method will return a larger type - if no smaller type will contain the data without loss of - precision. - - At its simplest, the downcast method can reject arrays that - are not in the list of ATs. + Implements recast method - returns array that may be of different + storage type to the input array, where the new type is one of the + ATs. Recast method will return a larger type if no smaller type + will contain the data without loss of precision greater than + specified in options at object creation. ''' _sctype_attributes = sctype_attributes() - + _k = 2**10 + _option_defaults = { + 'only_if_none': { + 'fp_to_int': 'if_none', + 'fp_to_fp': 'if_none', + 'int_to_int': 'if_none', + 'int_to_fp': 'if_none', + 'downcast_only': False, + 'downcast_within_fp': False, + 'guarantee_fp_to_fp_precision': False, + 'prefer_input_at_threshold': 0, + }, + 'smallest': { + 'fp_to_int': 'always', + 'fp_to_fp': 'always', + 'int_to_int': 'always', + 'int_to_fp': 'always', + 'downcast_only': False, + 'downcast_within_fp': True, + 'guarantee_fp_to_fp_precision': False, + 'prefer_input_at_threshold': 0 + }, + 'fairly_small': { + 'fp_to_int': 'always', + 'fp_to_fp': 'if_none', + 'int_to_int': 'always', + 'int_to_fp': 'if_none', + 'downcast_only': False, + 'downcast_within_fp': False, + 'guarantee_fp_to_fp_precision': False, + 'prefer_input_at_threshold': 2 * _k, + }, + 'preserve_precision': { + 'fp_to_int': 'never', + 'fp_to_fp': 'if_none', + 'int_to_int': 'if_none', + 'int_to_fp': 'never', + 'downcast_only': False, + 'downcast_within_fp': False, + 'guarantee_fp_to_fp_precision': True, + 'prefer_input_at_threshold': 0 + } + } + def __init__(self, sctype_list=None, - downcast_fp_to_fp = True, - downcast_fp_to_int = True, - downcast_int_to_int = True, - upcast_int_to_fp = True, - upcast_fp_to_int = True, - sctype_tols=None): + sctype_tols=None, + recast_options='only_if_none'): ''' Set types for which we are attempting to downcast Input sctype_list - list of acceptable scalar types If None defaults to all system types - downcast_fp_to_fp - if True, tries to downcast floats and complex - to smaller size of same type - downcast_fp_to_int - if True, tries to downcast floats and complex - to integers - downcast_int_to_int - if True, tries to downcast integers to - smaller of same type - upcast_int_to_fp - if True, tries to upcast integers that could not - be downcast to floating point type - upcast_fp_to_int - if True, tries to upcast floating point arrays - that cannot be downcast, to integers sctype_tols - dictionary key datatype, values rtol, tol to specify tolerances for checking near equality in - downcasting + downcasting. Note that tolerance values for integers + are used for upcasting integers to floats + recast_options - dictionary of options for recasting or string + specifying one of default options dictionaries. - Note that tolerance values for integers are used for upcasting - integers to floats + recast_option strings can be: + only_if_none - only attempts recast if the type is not in + acceptable types + smallest - return array of smallest possible type within tolerance + fairly_small - compromise set of options between speed of downcast and + size of output + preserve_precision - recasts arrays only to types that preserve precision + + Elements in recast_options dictionary: + fp_to_int - "always" or "if_none" or "never" + When to attempt cast of floating point to int + fp_to_fp - "always" or "if_none" or "never" + When to attempt cast of floating point to floating point + int_to_int - "always" or "if_none" or "never" + When to attempt cast of int to int + int_to_fp - "always" or "if_none" or "never" + When to attempt cast of int to floating point + downcast_only - if True, only return datatype of same size or less + downcast_within_fp - if True, tries downcasting within fp types, even + if there is an fp type that already matches + guarantee_fp_to_fp_precision - if True, will only do fp to fp array + casting to type of same or higher precision. Note that + if fp_to_int recasting is allowed this will allow + precision loss of fp values + prefer_input_at_threshold - number of bytes. If input array size + is less than or equal to this number, and in valid + types list, return the array without attempting + recasting ''' if sctype_list is None: sctype_list = self._sctype_attributes.keys() self.sctype_list = sctype_list - # Casting options + # Tolerances self.sctype_tols = self.default_sctype_tols() - self.downcast_fp_to_fp = downcast_fp_to_fp - self.downcast_fp_to_int = downcast_fp_to_int - self.downcast_int_to_int = downcast_int_to_int - self.upcast_int_to_fp = upcast_int_to_fp - self.upcast_fp_to_int = upcast_fp_to_int - # Tolerances if sctype_tols is not None: self.sctype_tols.update(sctype_tols) + # Casting options + if recast_options is None: + recast_options = 'only_if_none' + if isinstance(recast_options, basestring): + try: + self.recast_options = self._option_defaults[recast_options] + except KeyError: + raise ValueError, \ + 'Did not recognize option string %s' % recast_options + else: + self.recast_options = self._option_defaults['only_if_none'] + self.recast_options.update(recast_options) # Cache sctype sizes, self.sized_sctypes = {} for k in ('c', 'f', 'i', 'u'): @@ -115,11 +175,30 @@ self.ints_sized_sctypes.append(e) if self.ints_sized_sctypes: self.ints_sized_sctypes.sort(lambda x, y: cmp(y[1], x[1])) - # Cache capable types list + # Cache capable types list and sizes self._capable_sctypes = {} + self._capable_sctype_sizes = {} + self._c2f_capable_sctype_sizes = {} + flts = self.sized_sctypes['f'] for k in self._sctype_attributes: - self._capable_sctypes[k] = self.get_capable_sctype(k) - + sct = self.get_capable_sctype(k) + self._capable_sctypes[k] = sct + if sct is None: + self._capable_sctype_sizes[k] = inf + if dtype(k).type == 'c': + self._c2f_capable_sctype_sizes[k] = inf + continue + dtp = dtype(sct) + self._capable_sctype_sizes[k] = dtp.itemsize + fsz = inf + min_sz = ceil(dtp.itemsize / 2.0) + if dtp.kind == 'c': + for T, sz in flts: + if sz < min_sz: + break + fsz = sz + self._c2f_capable_sctype_sizes[k] = fsz + def default_sctype_tols(self): ''' Default allclose tolerance values for all dtypes ''' t_dict = {} @@ -156,17 +235,6 @@ D.sort(lambda x, y: cmp(y[1], x[1])) return D - def capable_sctype(self, sct): - ''' Return smallest type containing sct type without precision loss - - Value pulled fron dictionary cached from init - see - get_capable_sctype method for algorithm - ''' - try: - return self._capable_sctypes[sct] - except KeyError: - return None - def get_capable_sctype(self, sct): ''' Return smallest scalar type containing sct type without precision loss @@ -207,44 +275,37 @@ break return out_t - def all_close(self, arr1, arr2): - ''' True if arr1 arr2 close with tols for arr1 ''' - tols = self.sctype_tols[arr1.dtype.type] - return allclose(arr1, arr2, - rtol=tols['rtol'], - atol=tols['atol']) + def cast_to_fp(self, arr, rtol, atol, kind, + max_size=inf, + continue_down=False): + ''' Return fp arr maybe recast to specified kind, different sctype - def smallest_of_kind(self, arr, kind=None, max_size=None): - ''' Return arr maybe downcast to same kind, smaller storage - Inputs - arr - array to possibly downcast - kind - kind of array to downcast within - (if None (default) use arr.dtype.kind) + arr - array to possibly recast + rtol - relative tolerace for allclose + atol - absolute tolerance for allclose + kind - kind of array to recast within + (one of "c", "f", "u", "i") max_size - maximum size of sctype to return (in bytes) - (if None, set to arr.dtype.itemsize-1) - If arr cannot be downcast within given tolerances, then: - return arr if arr is in list of acceptable types, otherwise + continue_down - if False, return array of largest sctype + within tolerance and >= max_size + if True, continue downcasting within kind + to find smallest possible within tolerance + + If arr cannot be recast within given tolerances, and size, return None ''' - dtp = arr.dtype - if kind is None: - kind = dtp.kind - if max_size is None: - max_size = dtp.itemsize-1 - sctypes = self.sized_sctypes[kind] - sctypes = [t[0] for i, t in enumerate(sctypes) if t[1] <= max_size] - tols = self.sctype_tols[dtp.type] - rtol, atol = tols['rtol'], tols['atol'] - ret_arr = arr - for T in sctypes: + ret_arr = None + for T, sz in self.sized_sctypes[kind]: + if sz > max_size: + continue test_arr = arr.astype(T) if allclose(test_arr, arr, rtol, atol): ret_arr = test_arr + if not continue_down: + break else: break - if ret_arr.dtype.type not in self.sctype_list: - return None return ret_arr def smallest_int_sctype(self, mx, mn): @@ -278,95 +339,101 @@ return arr.astype(idt) return None - def downcast(self, arr, allow_larger_integer=False): - ''' Downcast array to smaller or same type + def recast(self, arr): + ''' Recast array to type in type list - If cannot find smaller type within tolerance, - return array if is already valid type, otherwise None + If cannot recast to an array within tolerance, + raise error ''' dtp = arr.dtype dtk = dtp.kind dti = dtp.itemsize - int_arr = None + dtt = dtp.type + opts = self.recast_options + curr_size = inf + ret_arr = None + valid_input_arr = dtt in self.sctype_list + if valid_input_arr: + if opts['prefer_input_at_threshold'] > arr.nbytes: + return arr + ret_arr = arr + if opts['downcast_only'] or valid_input_arr: + curr_size = dti + tols = self.sctype_tols[dtt] + rtol, atol = tols['rtol'], tols['atol'] if dtk in ('c', 'f'): - if self.downcast_fp_to_int: + if opts['fp_to_int'] == 'always' or \ + (opts['fp_to_int'] == 'if_none' and + ret_arr is None): test_arr = self.cast_to_integer(arr) - if test_arr is not None: - if self.all_close(arr, test_arr): - if test_arr.dtype.itemsize < dti: - return test_arr - else: - int_arr = test_arr - if self.downcast_fp_to_fp: - if dtk == 'c': - # Try downcasting to float - max_size = ceil(dti / 2.0) - test_arr = self.smallest_of_kind(arr, 'f', max_size) + if test_arr is not None and \ + test_arr.dtype.itemsize < curr_size: + if allclose(arr, test_arr, rtol, atol): + ret_arr = test_arr + curr_size = ret_arr.dtype.itemsize + if opts['fp_to_fp'] == 'always' or \ + (opts['fp_to_fp'] == 'if_none' and + ret_arr is None): + if dtk == 'c' and not opts['guarantee_fp_to_fp_precision']: + # Try casting to float + max_size = min([self._c2f_capable_sctype_sizes[dtt], + curr_size - 1]) + test_arr = self.cast_to_fp(arr, + rtol, + atol, + 'f', + max_size, + opts['downcast_within_fp']) if test_arr is not None: - return test_arr - test_arr = self.smallest_of_kind(arr) - if test_arr is not None: - return test_arr + ret_arr = test_arr + curr_size = ret_arr.dtype.itemsize + if opts['fp_to_fp'] == 'always' or \ + (opts['fp_to_fp'] == 'if_none' and + ret_arr is None): + # Cast float or complex to another of same type + if opts['guarantee_fp_to_fp_precision']: + sct = self._capable_sctypes[dtt] + sz = self._capable_sctype_sizes[dtt] + if sz < curr_size and sct is not None: + ret_arr = arr.astype(sct) + curr_size = sz + else: + max_size = min([self._capable_sctype_sizes[dtt], + curr_size - 1]) + test_arr = self.cast_to_fp(arr, + rtol, + atol, + dtk, + max_size, + opts['downcast_within_fp']) + if test_arr is not None: + ret_arr = test_arr + curr_size = ret_arr.dtype.itemsize elif dtk in ('u', 'i'): - if self.downcast_int_to_int: + if opts['int_to_int'] == 'always' or \ + (opts['int_to_int'] == 'if_none' and + ret_arr is None): test_arr = self.cast_to_integer(arr) + if test_arr is not None and \ + test_arr.dtype.itemsize < curr_size: + ret_arr = test_arr + curr_size = ret_arr.dtype.itemsize + if opts['int_to_fp'] == 'always' or \ + (opts['int_to_fp'] == 'if_none' and + ret_arr is None): + test_arr = self.cast_to_fp(arr, + rtol, + atol, + 'f', + curr_size-1, + opts['downcast_within_fp']) if test_arr is not None: - if test_arr.dtype.itemsize < dti: - return test_arr - else: - int_arr = test_arr + ret_arr = test_arr else: raise TypeError, 'Do not recognize array kind %s' % dtk - if arr.dtype.type in self.sctype_list: - return arr - if allow_larger_integer and int_arr is not None: - return int_arr - raise TypeError, 'Cannot downcast array within tolerance' - def recast(self, arr): - ''' Recast array to type in type list - - If cannot find smaller type within tolerance, by downcasting, - and array not of valid type already, then try larger - types. If none of these return an array within tolerance, - return None - ''' - try: - return self.downcast(arr, allow_larger_integer=True) - except ValueError: - pass - # Could not downcast, arr dtype not in known list - dtp = arr.dtype - dtk = dtp.kind - sct = dtp.type - if dtk in ('c', 'f'): - # Try upcast to larger dtype of same kind - udt = self.capable_sctype[sct] - if udt is not None: - return arr.astype(udt) - # Try casting to an integer - if self.upcast_fp_to_int: - test_arr = self.cast_to_integer(arr) - if test_arr is not None: - if self.all_close(arr, test_arr): - return test_arr - else: # integer types - # try casting to any possible integer type - test_arr = self.cast_to_integer(arr) - if test_arr is not None: - return test_arr - # Can try casting integers to floats - if self.upcast_int_to_fp: - flts = self._sized_sctypes['f'] - if flts: - flt_arr = arr.astype(flts[0]) - if self.all_close(arr, flt_arr): - if self.downcast_fp_to_fp: - max_size = flt_arr.dtype.itemsize - 1 - test_arr = self.smallest_of_kind(arr, 'f', max_size) - if test_arr is not None: - return test_arr - return flt_arr + if ret_arr is not None: + return ret_arr raise TypeError, 'Cannot recast array within tolerance' def recast_best_sctype(self, arr): @@ -378,7 +445,7 @@ sct = arr.dtype.type arr = self.recast(arr) if sct not in self.sctype_list: - sct = self.capable_sctype[sct] + sct = self._capable_sctypes[sct] if sct is None: sct = arr.dtype.type return arr, sct Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-12-15 13:36:44 UTC (rev 2419) +++ trunk/Lib/io/tests/test_recaster.py 2006-12-15 16:03:04 UTC (rev 2420) @@ -13,7 +13,8 @@ class test_recaster(ScipyTestCase): def setUp(self): self.valid_types = [N.int32, N.complex128, N.float64] - self.recaster = Recaster(self.valid_types) + self.recaster = Recaster(self.valid_types, + recast_options='smallest') def test_init(self): # Setting sctype_list @@ -31,37 +32,29 @@ assert tols != R.sctype_tols, 'Tols dictionary not set correctly' assert R.sctype_tols[T]['rtol'] == F.eps*2, 'Rtol not correctly set' assert R.sctype_tols[T]['atol'] == F.tiny*2, 'Atol not correctly set' + # Options # Sctype size lists # Integer sizes # Cabable types - def test_smallest_of_kind(self): + def test_cast_to_fp(self): R = self.recaster value = 1 - # smallest same kind - # Define expected type output from same kind downcast of value - required_types = {'complex': N.complex128, - 'float': N.float64, - 'int': N.int32, - 'uint': None} - for kind, req_type in required_types.items(): - if req_type is not None: - rdtsz = N.dtype(req_type).itemsize - for T in N.sctypes[kind]: - tdtsz = N.dtype(T).itemsize - ok_T = T in R.sctype_list - expect_none = ((req_type is None) or - ((tdtsz <= rdtsz) and not ok_T)) - A = N.array(value, T) - C = R.smallest_of_kind(A) - if expect_none: - assert C is None, 'Expecting None for %s' % T - else: - assert C is not None, 'Got unexpected None from %s' % T - assert C.dtype.type == req_type, \ - 'Expected %s type, got %s type' % \ - (C.dtype.type, req_type) - + # Define expected type output from fp recast of value + inp_outp = ( + (N.complex128, N.complex128), + (N.complex64, N.complex128), + ) + for inp, outp in inp_outp: + arr = N.array(value, dtype=inp) + rtol = R.sctype_tols[inp]['rtol'] + atol = R.sctype_tols[inp]['atol'] + kind = N.dtype(inp).kind + arr = R.cast_to_fp(arr, rtol, atol, kind) + if outp is None: + assert arr is None, 'Expected None from type %s' % inp + assert arr.dtype.type is outp, 'Expected output type %s from input %s' % (inp, outp) + def test_smallest_int_sctype(self): # Smallest int sctype with testing recaster params = sctype_attributes() @@ -92,11 +85,13 @@ assert N.dtype(rt) == N.dtype(T), \ 'Expected %s, got %s type' % (T, rt) - def test_downcasts(self): + def test_recasts(self): value = 100 R = self.recaster for T in (N.complex128, N.complex64, N.float64, N.uint64): - B = R.downcast(N.array(value, T)) + B = R.recast(N.array(value, T)) assert B is not None, 'Got None for %s' % T - assert B.dtype.type == N.int32 + Bt = B.dtype.type + assert Bt == N.int32, 'Input %s, output %s' % (T, Bt) + From scipy-svn at scipy.org Fri Dec 15 13:00:43 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 12:00:43 -0600 (CST) Subject: [Scipy-svn] r2421 - trunk/Lib/sandbox/models Message-ID: <20061215180043.1FEA539C0D5@new.scipy.org> Author: jonathan.taylor Date: 2006-12-15 12:00:40 -0600 (Fri, 15 Dec 2006) New Revision: 2421 Modified: trunk/Lib/sandbox/models/regression.py Log: added AR(p) regression, Yule-Walker estimation of covariance + more docstrings Modified: trunk/Lib/sandbox/models/regression.py =================================================================== --- trunk/Lib/sandbox/models/regression.py 2006-12-15 16:03:04 UTC (rev 2420) +++ trunk/Lib/sandbox/models/regression.py 2006-12-15 18:00:40 UTC (rev 2421) @@ -1,23 +1,70 @@ +""" +This module implements some standard regression models: OLS and WLS +models, as well as an AR(p) regression model. + +Models are specified with a design matrix and are fit using their +'fit' method. + +Subclasses that have more complicated covariance matrices +should write over the 'whiten' method as the fit method +prewhitens the response by calling 'whiten'. + +General reference for regression models: + +'Introduction to Linear Regression Analysis', Douglas C. Montgomery, + Elizabeth A. Peck, G. Geoffrey Vining. Wiley, 2006. + +""" + import numpy as N import numpy.linalg as L -import scipy.linalg -from scipy.sandbox.models.model import likelihood_model, LikelihoodModelResults +from scipy.linalg import norm, toeplitz + +from scipy.sandbox.models.model import likelihood_model, likelihood_model_results from scipy.sandbox.models import utils class ols_model(likelihood_model): """ A simple ordinary least squares model. + + >>> import numpy as N + >>> + >>> from scipy.sandbox.models.formula import term, I + >>> from scipy.sandbox.models.regression import ols_model + >>> + >>> data={'Y':[1,3,4,5,2,3,4], + ... 'X':range(1,8)} + >>> f = term("X") + I + >>> f.namespace = data + >>> + >>> model = ols_model(f.design()) + >>> results = model.fit(data['Y']) + >>> + >>> results.beta + array([ 0.25 , 2.14285714]) + >>> results.t() + array([ 0.98019606, 1.87867287]) + >>> print results.Tcontrast([0,1]) + + >>> print results.Fcontrast(N.identity(2)) + +>>> """ def logL(self, b, Y): - return -scipy.linalg.norm(self.whiten(Y) - N.dot(self.wdesign, b))**2 / 2. + return -norm(self.whiten(Y) - N.dot(self.wdesign, b))**2 / 2. def __init__(self, design): likelihood_model.__init__(self) self.initialize(design) def initialize(self, design): + """ + Set design for model, prewhitening design matrix and precomputing + covariance of coefficients (up to scale factor in front). + """ + self.design = design self.wdesign = self.whiten(design) self.calc_beta = L.pinv(self.wdesign) @@ -26,6 +73,10 @@ self.df_resid = self.wdesign.shape[0] - utils.rank(self.design) def whiten(self, Y): + """ + OLS model whitener does nothing: returns Y. + """ + return Y def est_coef(self, Y): @@ -37,20 +88,20 @@ Z = self.whiten(Y) - lfit = Results(L.lstsq(self.wdesign, Z)[0], Y) + lfit = regression_results(L.lstsq(self.wdesign, Z)[0], Y) lfit.predict = N.dot(self.design, lfit.beta) def fit(self, Y): """ - Full \'fit\' of the model including estimate of covariance matrix, (whitened) - residuals and scale. + Full fit of the model including estimate of covariance matrix, + (whitened) residuals and scale. """ Z = self.whiten(Y) - lfit = Results(N.dot(self.calc_beta, Z), Y, + lfit = regression_results(N.dot(self.calc_beta, Z), Y, normalized_cov_beta=self.normalized_cov_beta) lfit.df_resid = self.df_resid @@ -58,42 +109,175 @@ lfit.resid = Z - N.dot(self.wdesign, lfit.beta) lfit.scale = N.add.reduce(lfit.resid**2) / lfit.df_resid - lfit.Z = Z # just in case + lfit.Z = Z return lfit class ar_model(ols_model): """ - A regression model with an AR(1) covariance structure. + A regression model with an AR(p) covariance structure. - Eventually, this will be AR(p) -- all that is needed is to - determine the self.whiten method from AR(p) parameters. + >>> import numpy as N + >>> import numpy.random as R + >>> + >>> from scipy.sandbox.models.formula import term, I + >>> from scipy.sandbox.models.regression import ar_model + >>> + >>> data={'Y':[1,3,4,5,8,10,9], + ... 'X':range(1,8)} + >>> f = term("X") + I + >>> f.namespace = data + >>> + >>> model = ar_model(f.design(), 2) + >>> for i in range(6): + ... results = model.fit(data['Y']) + ... print "AR coefficients:", model.rho + ... rho, sigma = model.yule_walker(data["Y"] - results.predict) + ... model = ar_model(model.design, rho) + ... + AR coefficients: [ 0. 0.] + AR coefficients: [-0.52571491 -0.84496178] + AR coefficients: [-0.620642 -0.88654567] + AR coefficients: [-0.61887622 -0.88137957] + AR coefficients: [-0.61894058 -0.88152761] + AR coefficients: [-0.61893842 -0.88152263] + >>> results.beta + array([ 1.58747943, -0.56145497]) + >>> results.t() + array([ 30.796394 , -2.66543144]) + >>> print results.Tcontrast([0,1]) + + >>> print results.Fcontrast(N.identity(2)) + + >>> + + >>> model.rho = N.array([0,0]) + >>> model.iterative_fit(data['Y'], niter=3) + >>> print model.rho + [-0.61887622 -0.88137957] + >>> + """ - def __init__(self, design, rho=0): - self.rho = rho + def __init__(self, design, rho): + if type(rho) is type(1): + self.order = rho + self.rho = N.zeros(self.order, N.float64) + else: + self.rho = N.squeeze(N.asarray(rho)) + if len(self.rho.shape) not in [0,1]: + raise ValueError, "AR parameters must be a scalar or a vector" + if self.rho.shape == (): + self.rho.shape = (1,) + self.order = self.rho.shape[0] ols_model.__init__(self, design) + def iterative_fit(self, Y, niter=3): + """ + Perform an iterative two-stage procedure to estimate AR(p) + paramters and regression coefficients simultaneously. + """ + for i in range(niter): + self.initialize(self.design) + results = self.fit(Y) + self.rho, _ = self.yule_walker(Y - results.predict) + def whiten(self, X): - factor = 1. / N.sqrt(1 - self.rho**2) - return N.concatenate([[X[0]], (X[1:] - self.rho * X[0:-1]) * factor]) + """ + Whiten a series of columns according to an AR(p) + covariance structure. + """ + X = N.asarray(X, N.float64) + _X = X.copy() + for i in range(self.order): + _X[(i+1):] = _X[(i+1):] - self.rho[i] * X[0:-(i+1)] + return _X + + def yule_walker(self, X, method="unbiased", df=None): + """ + Estimate AR(p) parameters from a sequence X using Yule-Walker equation. + Method can be "unbiased" or "MLE" and this determines + denominator in estimate of ACF at lag k. If "MLE", the denominator is + n=r.shape[0], if "unbiased" the denominator is n-k. + + If df is supplied, then it is assumed the X has df degrees of + freedom rather than n. + + See, for example: + + http://en.wikipedia.org/wiki/Autoregressive_moving_average_model + """ + + method = str(method).lower() + if method not in ["unbiased", "mle"]: + raise ValueError, "ACF estimation method must be 'unbiased' \ + or 'MLE'" + X = N.asarray(X, N.float64) + X -= X.mean() + n = df or X.shape[0] + + if method == "unbiased": + denom = lambda k: n - k + else: + denom = lambda k: n + + if len(X.shape) != 1: + raise ValueError, "expecting a vector to estimate AR parameters" + r = N.zeros(self.order+1, N.float64) + r[0] = (X**2).sum() / denom(0) + for k in range(1,self.order+1): + r[k] = (X[0:-k]*X[k:]).sum() / denom(k) + R = toeplitz(r[:-1]) + + rho = L.solve(R, r[1:]) + sigmasq = r[0] - (r[1:]*rho).sum() + return rho, N.sqrt(sigmasq) + class wls_model(ols_model): """ A regression model with diagonal but non-identity covariance - structure. The weights are proportional to the inverse of the + structure. The weights are presumed to be + (proportional to the) inverse of the variance of the observations. + >>> import numpy as N + >>> + >>> from scipy.sandbox.models.formula import term, I + >>> from scipy.sandbox.models.regression import wls_model + >>> + >>> data={'Y':[1,3,4,5,2,3,4], + ... 'X':range(1,8)} + >>> f = term("X") + I + >>> f.namespace = data + >>> + >>> model = wls_model(f.design(), weights=range(1,8)) + >>> results = model.fit(data['Y']) + >>> + >>> results.beta + array([ 0.0952381 , 2.91666667]) + >>> results.t() + array([ 0.35684428, 2.0652652 ]) + >>> print results.Tcontrast([0,1]) + + >>> print results.Fcontrast(N.identity(2)) + + """ def __init__(self, design, weights=1): self.weights = weights ols_model.__init__(self, design) + def whiten(self, X): + """ + Whitener for WLS model, multiplies by sqrt(self.weights) + """ - def whiten(self, X): + X = N.asarray(X, N.float64) + if X.ndim == 1: return X * N.sqrt(self.weights) elif X.ndim == 2: @@ -103,15 +287,15 @@ v[:,i] = X[:,i] * c return v - -class Results(LikelihoodModelResults): +class regression_results(likelihood_model_results): """ This class summarizes the fit of a linear regression model. + It handles the output of contrasts, estimates of covariance, etc. """ def __init__(self, beta, Y, normalized_cov_beta=None, scale=1.): - LikelihoodModelResults.__init__(self, beta, normalized_cov_beta, scale) + likelihood_model_results.__init__(self, beta, normalized_cov_beta, scale) self.Y = Y def norm_resid(self): @@ -128,9 +312,9 @@ return self.resid * N.multiply.outer(N.ones(self.Y.shape[0]), sdd) - def predict(self, design): + def predictors(self, design): """ - Return fitted values from a design matrix. + Return linear predictor values from a design matrix. """ return N.dot(design, self.beta) @@ -143,7 +327,6 @@ if not adjusted: ratio *= ((self.Y.shape[0] - 1) / self.df_resid) return 1 - ratio - def isestimable(C, D): """ From an q x p contrast matrix C and an n x p design matrix D, checks From scipy-svn at scipy.org Fri Dec 15 13:02:08 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 12:02:08 -0600 (CST) Subject: [Scipy-svn] r2422 - trunk/Lib/sandbox/models Message-ID: <20061215180208.547DF39C073@new.scipy.org> Author: jonathan.taylor Date: 2006-12-15 12:02:06 -0600 (Fri, 15 Dec 2006) New Revision: 2422 Added: trunk/Lib/sandbox/models/smoothers.py Log: scatterplot smoothers Added: trunk/Lib/sandbox/models/smoothers.py =================================================================== --- trunk/Lib/sandbox/models/smoothers.py 2006-12-15 18:00:40 UTC (rev 2421) +++ trunk/Lib/sandbox/models/smoothers.py 2006-12-15 18:02:06 UTC (rev 2422) @@ -0,0 +1,257 @@ +""" +This module contains scatterplot smoothers, that is classes +who generate a smooth fit of a set of (x,y) pairs. + +""" + +import numpy as N +import numpy.linalg as L + +from scipy.optimize import golden +from scipy.linalg import solveh_banded + +from bspline import bspline +from utils import band2array + +from scipy.sandbox.models import _bspline + + +class poly_smoother: + + """ + Polynomial smoother up to a given order. + Fit based on weighted least squares. + + The x values can be specified at instantiation or when called. + """ + + def df_fit(self): + """ + Degrees of freedom used in the fit. + """ + return self.order + 1 + + def df_resid(self): + """ + Residual degrees of freedom from last fit. + """ + return self.N - self.order - 1 + + def __init__(self, order, x=None): + self.order = order + self.coef = N.zeros((order+1,), N.float64) + if x is not None: + self.X = N.array([x**i for i in range(order+1)]).T + + def __call__(self, x=None): + if x is not None: + X = N.array([(x**i) for i in range(self.order+1)]) + else: X = self.X + return N.squeeze(N.dot(X.T, self.coef)) + + def fit(self, y, x=None, weights=None): + self.N = y.shape[0] + if weights is None: + weights = 1 + _w = N.sqrt(weights) + if x is None: + if not hasattr(self, "X"): + raise ValueError, "x needed to fit poly_smoother" + else: + self.X = N.array([(x**i) for i in range(self.order+1)]) + + X = self.X * _w + + _y = y * _w + self.coef = N.dot(L.pinv(X).T, _y) + +class smoothing_spline(bspline): + + penmax = 30. + + def fit(self, y, x=None, weights=None, pen=0.): + banded = True + + if x is None: + x = self.tau[(self.M-1):-(self.M-1)] # internal knots + + if pen == 0.: # can't use cholesky for singular matrices + banded = False + + if x.shape != y.shape: + raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots' + + bt = self.basis(x) + if pen >= self.penmax: + pen = self.penmax + + if weights is None: + weights = N.array(1.) + + wmean = weights.mean() + _w = N.sqrt(weights / wmean) + bt *= _w + + # throw out rows with zeros (this happens at boundary points!) + + mask = N.flatnonzero(1 - N.alltrue(N.equal(bt, 0), axis=0)) + + bt = bt[:,mask] + y = y[mask] + + self.df_total = y.shape[0] + + if bt.shape[1] != y.shape[0]: + raise ValueError, "some x values are outside range of B-spline knots" + bty = N.dot(bt, _w * y) + self.N = y.shape[0] + if not banded: + self.btb = N.dot(bt, bt.T) + _g = band2array(self.g, lower=1, symmetric=True) + self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3] + self.rank = min(self.rank, self.btb.shape[0]) + else: + self.btb = N.zeros(self.g.shape, N.float64) + nband, nbasis = self.g.shape + for i in range(nbasis): + for k in range(min(nband, nbasis-i)): + self.btb[k,i] = (bt[i] * bt[i+k]).sum() + + bty.shape = (1,bty.shape[0]) + self.chol, self.coef = solveh_banded(self.btb + + pen*self.g, + bty, lower=1) + + self.coef = N.squeeze(self.coef) + self.resid = N.sqrt(wmean) * (y * _w - N.dot(self.coef, bt)) + self.pen = pen + + def gcv(self): + """ + Generalized cross-validation score of current fit. + """ + + norm_resid = (self.resid**2).sum() + return norm_resid / (self.df_total - self.trace()) + + def df_resid(self): + """ + self.N - self.trace() + + where self.N is the number of observations of last fit. + """ + + return self.N - self.trace() + + def df_fit(self): + """ + = self.trace() + + How many degrees of freedom used in the fit? + """ + return self.trace() + + def trace(self): + """ + Trace of the smoothing matrix S(pen) + """ + + if self.pen > 0: + _invband = _bspline.invband(self.chol.copy()) + tr = _trace_symbanded(_invband, self.btb, lower=1) + return tr + else: + return self.rank + +class smoothing_spline_fixeddf(smoothing_spline): + + """ + Fit smoothing spline with approximately df degrees of freedom + used in the fit, i.e. so that self.trace() is approximately df. + + In general, df must be greater than the dimension of the null space + of the Gram inner product. For cubic smoothing splines, this means + that df > 2. + + """ + + target_df = 5 + + def __init__(self, knots, order=4, coef=None, M=None, target_df=None): + if target_df is not None: + self.target_df = target_df + bspline.__init__(self, knots, order=order, coef=coef, M=M) + self.target_reached = False + + def fit(self, y, x=None, df=None, weights=None, tol=1.0e-03): + + df = df or self.target_df + + apen, bpen = 0, 1.0e-03 + olddf = y.shape[0] - self.m + + if not self.target_reached: + while True: + curpen = 0.5 * (apen + bpen) + smoothing_spline.fit(self, y, x=x, weights=weights, pen=curpen) + curdf = self.trace() + if curdf > df: + apen, bpen = curpen, 2 * curpen + else: + apen, bpen = apen, curpen + if apen >= self.penmax: + raise ValueError, "penalty too large, try setting penmax higher or decreasing df" + if N.fabs(curdf - df) / df < tol: + self.target_reached = True + break + else: + smoothing_spline.fit(self, y, x=x, weights=weights, pen=self.pen) + +class smoothing_spline_gcv(smoothing_spline): + + """ + Fit smoothing spline trying to optimize GCV. + + Try to find a bracketing interval for scipy.optimize.golden + based on bracket. + + It is probably best to use target_df instead, as it is + sometimes difficult to find a bracketing interval. + + """ + + def fit(self, y, x=None, weights=None, tol=1.0e-03, + bracket=(0,1.0e-03)): + + def _gcv(pen, y, x): + smoothing_spline.fit(y, x=x, pen=N.exp(pen), weights=weights) + a = self.gcv() + return a + + a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol) + +def _trace_symbanded(a,b, lower=0): + """ + Compute the trace(a*b) for two upper or lower banded real symmetric matrices. + """ + + if lower: + t = _zero_triband(a * b, lower=1) + return t[0].sum() + 2 * t[1:].sum() + else: + t = _zero_triband(a * b, lower=0) + return t[-1].sum() + 2 * t[:-1].sum() + + + +def _zero_triband(a, lower=0): + """ + Zero out unnecessary elements of a real symmetric banded matrix. + """ + + nrow, ncol = a.shape + if lower: + for i in range(nrow): a[i,(ncol-i):] = 0. + else: + for i in range(nrow): a[i,0:i] = 0. + return a From scipy-svn at scipy.org Fri Dec 15 16:28:49 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 15:28:49 -0600 (CST) Subject: [Scipy-svn] r2423 - trunk/Lib/sandbox/timeseries Message-ID: <20061215212849.7240639C0FC@new.scipy.org> Author: mattknox_ca Date: 2006-12-15 15:28:45 -0600 (Fri, 15 Dec 2006) New Revision: 2423 Modified: trunk/Lib/sandbox/timeseries/cseries.c Log: major revamp Modified: trunk/Lib/sandbox/timeseries/cseries.c =================================================================== --- trunk/Lib/sandbox/timeseries/cseries.c 2006-12-15 18:02:06 UTC (rev 2422) +++ trunk/Lib/sandbox/timeseries/cseries.c 2006-12-15 21:28:45 UTC (rev 2423) @@ -11,145 +11,143 @@ static //PyArrayObject * -setArrayItem(PyArrayObject **theArray, long index, PyObject *newVal) +setArrayItem_1D(PyArrayObject **theArray, long index, PyObject *newVal) { - char *setptr; + if (index >= 0) + { + //set value in array + PyArray_SETITEM(*theArray, PyArray_GetPtr(*theArray, &index), newVal); + } - if (index >= 0) - { - //set value in array - setptr = (*theArray)->data + (index) * (*theArray)->strides[0]; - PyArray_SETITEM(*theArray,setptr,newVal); - } +} - //return theArray; +static //PyArrayObject * +setArrayItem_2D(PyArrayObject **theArray, long index_x, long index_y, PyObject *newVal) +{ + long idx[] = {index_x, index_y}; + + if (index_x >= 0 && index_y >= 0) { + //set value in array + PyArray_SETITEM(*theArray, PyArray_GetPtr(*theArray, idx), newVal); + } + } + static int freqVal(char freq) { - switch(freq) - { - case 'A': - //annual - return 1; - case 'Q': - //quarterly - return 2; - case 'M': - //monthly - return 3; - case 'B': - //business - return 4; - case 'D': - //daily - return 5; - default: - return 0; - } + switch(freq) + { + case 'A': + //annual + return 1; + case 'Q': + //quarterly + return 2; + case 'M': + //monthly + return 3; + case 'B': + //business + return 4; + case 'D': + //daily + return 5; + default: + return 0; + } } static long toDaily(long fromDate, char fromFreq) { - long absdate, origin, secondorigin; + long absdate; int y,m,d; - mxDateTimeObject *theDate; + mxDateTimeObject *theDate; - origin = 675333; - secondorigin = 722814; - - //convert fromDate to days since (0 AD - 1 day) + //convert fromDate to days since (0 AD - 1 day) switch(fromFreq) { case 'D': absdate = fromDate; break; case 'B': - absdate = (fromDate/5)*7 + fromDate%5; + absdate = ((fromDate-1)/5)*7 + (fromDate-1)%5 + 1; break; case 'M': - y = fromDate/12 + 1; - m = fromDate%12; + y = fromDate/12; + m = fromDate%12; - if (m == 0) - { - m = 12; - y--; - } - d=1; - break; + if (m == 0) + { + m = 12; + y--; + } + d=1; + break; case 'Q': - y = fromDate/4 + 1; - m = (fromDate%4) * 3 - 2; + y = fromDate/4; + m = (fromDate%4) * 3 - 2; - if (m < 1) - { - m += 12; - y--; - } - else if (m == 12) - { - m = 1; - y++; - } - d=1; - break; + if (m < 1) + { + m += 12; + y--; + } + else if (m == 12) + { + m = 1; + y++; + } + d=1; + break; case 'A': - y = fromDate-1; - m = 1; - d = 1; - break; + y = fromDate; + m = 1; + d = 1; + break; default: return -1; } - if (freqVal(fromFreq) < 4) - { - //switch to years from 0 for mxDateTime - y+= 1849; + if (freqVal(fromFreq) < 4) + { + theDate = (mxDateTimeObject *)mxDateTime.DateTime_FromDateAndTime(y,m,d,0,0,0); + absdate = (long)(theDate->absdate); + } - theDate = (mxDateTimeObject *)mxDateTime.DateTime_FromDateAndTime(y,m,d,0,0,0); - absdate = (long)(theDate->absdate); - } - else - { - //days from 0 for mxDateTime - absdate += origin; - } + return absdate; - return absdate; - } static long getDateInfo_sub(long dateNum, char freq, char info) { - long monthNum; - mxDateTimeObject *convDate; - convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(toDaily(dateNum,freq),0); + long monthNum; + mxDateTimeObject *convDate; + convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(toDaily(dateNum,freq),0); switch(info) { case 'Y': //year - return (long)(convDate->year); + return (long)(convDate->year); case 'Q': //quarter - monthNum = (long)(convDate->month); - return ((monthNum-1)/3)+1; + monthNum = (long)(convDate->month); + return ((monthNum-1)/3)+1; case 'M': //month - return (long)(convDate->month); + return (long)(convDate->month); case 'D': //day - return (long)(convDate->day); + return (long)(convDate->day); case 'W': //day of week - return (long)(convDate->day_of_week); + return (long)(convDate->day_of_week); default: return -1; } @@ -160,8 +158,8 @@ static PyObject * cseries_getDateInfo(PyObject *self, PyObject *args) { - char *freq; - char *info; + char *freq; + char *info; PyArrayObject *array; PyArrayObject *tempArray; @@ -171,91 +169,88 @@ PyObject *val; long i, lngVal, dInfo, dim; - if (!PyArg_ParseTuple(args, "Oss:getDateInfo(array, freq, info)", &tempArray, &freq, &info)) return NULL; + if (!PyArg_ParseTuple(args, "Oss:getDateInfo(array, freq, info)", &tempArray, &freq, &info)) return NULL; array = PyArray_GETCONTIGUOUS(tempArray); - dim = array->dimensions[0]; + dim = array->dimensions[0]; - //initialize new array + //initialize new array newArray = (PyArrayObject*)PyArray_SimpleNew(array->nd, &dim, array->descr->type_num); for (i = 0; i < array->dimensions[0]; i++) { - getptr = array->data + i*array->strides[0]; - val = PyArray_GETITEM(array, getptr); - lngVal = PyInt_AsLong(val); - dInfo = getDateInfo_sub(lngVal, *freq, *info); + getptr = array->data + i*array->strides[0]; + val = PyArray_GETITEM(array, getptr); + lngVal = PyInt_AsLong(val); + dInfo = getDateInfo_sub(lngVal, *freq, *info); - setArrayItem(&newArray, i, PyInt_FromLong(dInfo)); - } + setArrayItem_1D(&newArray, i, PyInt_FromLong(dInfo)); + } - return (PyObject *) newArray; + return (PyObject *) newArray; } -//fromDate is periods since Dec 31, 1849 static long convert(long fromDate, char fromFreq, char toFreq, int notStartInd) { - long absdate, origin, secondorigin, secsInDay; + long absdate, secsInDay; long converted; - int rem; - int y,m,d; + int y,m; - mxDateTimeObject *convDate; + mxDateTimeObject *convDate; - origin = 675333; - secondorigin = 722814; secsInDay = 86400; - absdate = toDaily(fromDate, fromFreq); + absdate = toDaily(fromDate, fromFreq); - convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(absdate,0); + convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(absdate,0); - //switch back to days and years since 1849 for pyTSA Date - absdate -= origin; - y = convDate->year - 1849; - m = convDate->month; + y = convDate->year; + m = convDate->month; - //convert convDate to appropriate # of periods according to toFreq + //convert convDate to appropriate # of periods according to toFreq switch(toFreq) { case 'D': converted = absdate; break; case 'B': - rem = absdate%7; - if (rem > 4) //is weekend day + if (convDate->day_of_week > 4) //is weekend day { - if (notStartInd == 1 && freqVal(fromFreq) > 4) - { - return -1; - } - else - { - d = convDate->day; - d -= rem - 4; //change to friday before weekend - if (d < 1) d += 3; //if friday was prev. month, change to monday instead - absdate = absdate - convDate->day + d; - converted = (long)((absdate / 7 * 5.0) + absdate%7); - } - } - else - { - converted = (long)((absdate / 7 * 5.0) + rem); - } + if (notStartInd == 1 && freqVal(fromFreq) > 4) + { + return -1; + } + else + { + if (convDate->day - (convDate->day_of_week - 4) < 1) { + //change to Monday after weekend + absdate += (7 - convDate->day_of_week); + } else { + //change to friday before weekend + absdate -= (convDate->day_of_week - 4); + } + + converted = (long)((absdate / 7 * 5.0) + absdate%7); + } + } + else + { + converted = (long)((absdate / 7 * 5.0) + absdate%7); + } break; case 'M': - converted = (long)((y-1)*12 + m); - break; - case 'Q': - converted = (long)((y-1)*4 + ((m-1)/3) + 1); - break; - case 'A': - converted = (long)(y+1); - break; + converted = (long)(y*12 + m); + break; + case 'Q': + converted = (long)(y*4 + ((m-1)/3) + 1); + break; + case 'A': + converted = (long)(y); + break; default: return -1; } @@ -263,143 +258,171 @@ return converted; } - -static long -expand(long oldSize, char fromFr, char toFr) -{ - long newSize; - int fromFreq, toFreq; - - if (fromFr == toFr) return oldSize; - - fromFreq = freqVal(fromFr); - toFreq = freqVal(toFr); - if (fromFreq*toFreq == 0) return oldSize; //invalid frequency - - newSize = oldSize; - - while (toFreq > fromFreq) - { - if (fromFreq == 1) //Annual - { - newSize *= 4; //quarters in year - fromFreq++; - } - else if (fromFreq == 2) //Quarterly - { - newSize *= 3; //months in quarter - fromFreq++; - } - else if (fromFreq == 3) //Monthly - { - newSize *= 31; //max days in month - fromFreq++; - } - else if (fromFreq == 4) //Business - { - newSize *= 2; //max d days for each b days - fromFreq++; - } - } - - - return newSize; +static int validFreq(char freq) { + switch(freq) + { + case 'A': + return 1; + case 'Q': + return 1; + case 'M': + return 1; + case 'B': + return 1; + case 'D': + return 1; + default: + return 0; + } } -/////////////////////////////////////////////////////////////////////// -/* -OBSERVED +static int +expand(long oldSize, char fromFreq, char toFreq, long *newLen, long *newHeight) +{ -from lower freq to higher freq ----------------------- + int maxBusDaysPerYear, maxBusDaysPerQuarter, maxBusDaysPerMonth; + int minBusDaysPerYear, minBusDaysPerQuarter, minBusDaysPerMonth; -summed -- all values in period set as lower freq's value / # of values + int maxDaysPerYear, maxDaysPerQuarter, maxDaysPerMonth; + int minDaysPerYear, minDaysPerQuarter, minDaysPerMonth; -rest -- all values in period set as lower freq's value + minBusDaysPerYear = 260; maxBusDaysPerYear = 262; + minBusDaysPerQuarter = 64; maxBusDaysPerQuarter = 66; + minBusDaysPerMonth = 20; maxBusDaysPerMonth = 23; -from higher freq to lower freq ----------------------- -begin - lower freq's value set as first value in period -end - lower freq's value set as end value in period -summed - lower freq's value set as sum of all values in period -averaged - lower freq's value set as average of all values in period -high - lower freq's value set as largest value in period -low - lower freq's value set as smallest value in period + minDaysPerYear = 365; maxDaysPerYear = 366; + minDaysPerQuarter = 90; maxDaysPerQuarter = 92; + minDaysPerMonth = 28; maxDaysPerMonth = 31; -*/ -/////////////////////////////////////////////////////////////////////// + if (!validFreq(fromFreq)) return 0; + if (!validFreq(toFreq)) return 0; -static void -adjValForObsSet(PyArrayObject *theArray, char obs, PyObject **newVal, PyObject **newValMask, PyObject *val, PyObject *valMask, long curPerLen) -{ - double dblVal; - long lngValMask, lngAllMasked; + if (fromFreq == toFreq) { + *newLen = oldSize; + *newHeight = 1; + } else { - lngValMask = PyInt_AsLong(valMask); - lngAllMasked = PyInt_AsLong(*newValMask); + switch(fromFreq) + { + case 'A': //annual - if (!lngValMask) { + switch(toFreq) + { + case 'Q': + *newLen = oldSize * 4; + *newHeight = 1; + break; + case 'M': + *newLen = oldSize * 12; + *newHeight = 1; + break; + case 'B': + *newLen = oldSize * maxBusDaysPerYear; + *newHeight = 1; + break; + case 'D': + *newLen = oldSize * maxDaysPerYear; + *newHeight = 1; + break; + } + break; - // if any value is not masked, then we shall not mask the aggregated result - *newValMask = valMask; + case 'Q': //quarterly - if (obs == 'B') - { - if (lngAllMasked) { - *newVal = val; - } - } - else if ( PyArray_ISFLOAT(theArray) && (obs=='S' || obs=='A') ) - { + switch(toFreq) + { + case 'A': + *newLen = (oldSize / 4) + 2; + *newHeight = 4; + break; + case 'M': + *newLen = oldSize * 3; + *newHeight = 1; + break; + case 'B': + *newLen = oldSize * maxBusDaysPerQuarter; + *newHeight = 1; + break; + case 'D': + *newLen = oldSize * maxDaysPerQuarter; + *newHeight = 1; + break; + } + break; - if (obs == 'S') - { - //observed is summed + case 'M': //monthly - dblVal = PyFloat_AsDouble(*newVal); - dblVal += PyFloat_AsDouble(val); - *newVal = PyFloat_FromDouble(dblVal); - } - else - { - //observed is averaged + switch(toFreq) + { + case 'A': + *newLen = (oldSize / 12) + 2; + *newHeight = 12; + break; + case 'Q': + *newLen = (oldSize / 3) + 2; + *newHeight = 3; + break; + case 'B': + *newLen = oldSize * maxBusDaysPerMonth; + *newHeight = 1; + break; + case 'D': + *newLen = oldSize * maxDaysPerMonth; + *newHeight = 1; + break; + } + break; - dblVal = PyFloat_AsDouble(*newVal); - dblVal *= (curPerLen-1); - dblVal += PyFloat_AsDouble(val); - dblVal /= curPerLen; - *newVal = PyFloat_FromDouble(dblVal); - } + case 'B': //business - } - else if ( PyArray_ISNUMBER(theArray) && (obs=='H' || obs=='L') ) - { + switch(toFreq) + { + case 'A': + *newLen = (oldSize / minBusDaysPerYear) + 2; + *newHeight = maxBusDaysPerYear; + break; + case 'Q': + *newLen = (oldSize / minBusDaysPerQuarter) + 2; + *newHeight = maxBusDaysPerQuarter; + break; + case 'M': + *newLen = (oldSize / minBusDaysPerMonth) + 2; + *newHeight = maxBusDaysPerMonth; + break; + case 'D': + *newLen = ((7 * oldSize)/5) + 2; + *newHeight = 1; + break; + } + break; - if (obs == 'H') - { - //observed is high + case 'D': //daily - if (PyFloat_AsDouble(val) > PyFloat_AsDouble(*newVal)) *newVal = val; - } - else if (obs == 'L') - { - //observed is low + switch(toFreq) + { + case 'A': + *newLen = (oldSize / minDaysPerYear) + 2; + *newHeight = maxDaysPerYear; + break; + case 'Q': + *newLen = (oldSize / minDaysPerQuarter) + 2; + *newHeight = maxDaysPerQuarter; + break; + case 'M': + *newLen = (oldSize / minDaysPerMonth) + 2; + *newHeight = maxDaysPerMonth; + break; + case 'B': + *newLen = ((5 * oldSize)/7) + 2; + *newHeight = 1; + break; } + break; + } + } - if (PyFloat_AsDouble(val) < PyFloat_AsDouble(*newVal)) *newVal = val; - } + return 1; - } - else - { - //observed is not beginning and - //val is string or (val is date and observed is summed/averaged) - //or observed is end or not supported - - *newVal = val; - } - } - } @@ -418,24 +441,21 @@ PyObject *returnVal = NULL; int notStartInd; - long startIndex, newStart; - long i, curPerInd, nextPerInd, prevIndex, curIndex; - long dim; - long curPerLen; - long lngValMask; - char *fromFreq, *toFreq, *observed; + long startIndex, newStart, newStartYaxis; + long newLen, newHeight; + long i, currIndex, prevIndex; + long nd; + long *dim; + long currPerLen; + char *fromFreq, *toFreq, *position; - char *getptr; - PyObject *val, *newVal; + PyObject *val, *valMask; - char *getptrMask; - PyObject *valMask, *newValMask; - int toFrVal, fromFrVal; - returnVal = PyDict_New(); + returnVal = PyDict_New(); - if (!PyArg_ParseTuple(args, "OssslO:reindex(array, fromfreq, tofreq, observed, startIndex,mask)", &tempArray, &fromFreq, &toFreq, &observed, &startIndex, &tempMask)) return NULL; + if (!PyArg_ParseTuple(args, "OssslO:reindex(array, fromfreq, tofreq, position, startIndex, mask)", &tempArray, &fromFreq, &toFreq, &position, &startIndex, &tempMask)) return NULL; if (toFreq[0] == fromFreq[0]) { @@ -446,135 +466,95 @@ return returnVal; } + //get frequency numeric mapping + fromFrVal = freqVal(fromFreq[0]); + toFrVal = freqVal(toFreq[0]); + array = PyArray_GETCONTIGUOUS(tempArray); mask = PyArray_GETCONTIGUOUS(tempMask); - //expand size to fit new values if needed - dim = expand(array->dimensions[0], fromFreq[0], toFreq[0]); + //expand size to fit new values if needed + if (!expand(array->dimensions[0], fromFreq[0], toFreq[0], &newLen, &newHeight)) return NULL; - //initialize new array - newArray = (PyArrayObject*)PyArray_SimpleNew(array->nd, &dim, array->descr->type_num); - newMask = (PyArrayObject*)PyArray_SimpleNew(mask->nd, &dim, mask->descr->type_num); - - for (i = 0; i < dim; i++) - { - setArrayItem(&newArray, i, PyInt_FromLong(1)); - setArrayItem(&newMask, i, PyInt_FromLong(1)); - } - - //convert start index to new frequency - notStartInd = 0; + //convert start index to new frequency + notStartInd = 0; newStart = convert(startIndex, fromFreq[0], toFreq[0], notStartInd); - //initialize prevIndex - prevIndex = newStart - 1; + if (newHeight > 1) { - notStartInd = 1; + newStartYaxis = startIndex - convert(newStart, toFreq[0], fromFreq[0], notStartInd); + currPerLen = newStartYaxis; - //set values in the new array - for (i = 0; i < array->dimensions[0]; i++) - { - //find index for start of current period in new frequency - curPerInd = convert(startIndex + i, fromFreq[0], toFreq[0], notStartInd); + nd = 2; + dim = malloc(nd * sizeof(int)); + dim[0] = newLen; + dim[1] = newHeight; + } else { + currPerLen = 0; + nd = 1; + dim = malloc(nd * sizeof(int)); + dim[0] = newLen; + } - //get frequency numeric mapping - fromFrVal = freqVal(fromFreq[0]); - toFrVal = freqVal(toFreq[0]); + newArray = (PyArrayObject*)PyArray_SimpleNew(nd, dim, array->descr->type_num); + newMask = (PyArrayObject*)PyArray_SimpleNew(nd, dim, mask->descr->type_num); - //get value from old array - getptr = array->data + i*array->strides[0]; - val = PyArray_GETITEM(array,getptr); + free(dim); - //get the mask corresponding to the old value - getptrMask = mask->data + i*mask->strides[0]; - valMask = PyArray_GETITEM(mask,getptrMask); + PyArray_FILLWBYTE(newArray,0); + PyArray_FILLWBYTE(newMask,1); - if (fromFrVal < toFrVal) - { - //from lower freq to higher freq + //initialize prevIndex + prevIndex = newStart; - newVal = val; - newValMask = valMask; + notStartInd = 1; - //find index for start of next period in new frequency - nextPerInd = convert(startIndex + i + 1, fromFreq[0], toFreq[0], notStartInd); + //set values in the new array + for (i = 0; i < array->dimensions[0]; i++) + { - //adjust for observed setting - if (observed[0] == 'S' && PyArray_ISFLOAT(array) && !( (fromFrVal == 4 && toFrVal == 5) || (fromFrVal == 5 && toFrVal == 4) ) ) - { - //summed + //get value from old array + val = PyArray_GETITEM(array, PyArray_GetPtr(array, &i)); - //all values in period set as old array's value / # of values - newVal = PyFloat_FromDouble( PyFloat_AsDouble(val) / (nextPerInd - curPerInd) ); - } + //get the mask corresponding to the old value + valMask = PyArray_GETITEM(mask, PyArray_GetPtr(mask, &i)); - //set each value in period - for (curIndex = curPerInd; curIndex < nextPerInd; curIndex++) - { - setArrayItem(&newArray, curIndex-newStart, newVal); - setArrayItem(&newMask, curIndex-newStart, newValMask); - } - } - else - { + //find index for start of current period in new frequency + if (newHeight == 1 && (position[0] == 'E' && !((fromFrVal == 4 && toFrVal == 5) || (fromFrVal == 5 && toFrVal == 4))) ) { + currIndex = convert(startIndex + i + 1, fromFreq[0], toFreq[0], notStartInd)-1; + } else { + currIndex = convert(startIndex + i, fromFreq[0], toFreq[0], notStartInd); + } - lngValMask = PyInt_AsLong(valMask); + if (newHeight > 1) { - //from higher freq to lower freq + if (currIndex != prevIndex) + { + //reset period length + currPerLen = 0; + prevIndex = currIndex; + } - if (curPerInd != prevIndex) - { - //starting new period in old array + //set value in the new array + setArrayItem_2D(&newArray, currIndex-newStart, currPerLen, val); + setArrayItem_2D(&newMask, currIndex-newStart, currPerLen, valMask); + currPerLen++; - //set value in the new array - setArrayItem(&newArray, prevIndex-newStart, newVal); - setArrayItem(&newMask, prevIndex-newStart, newValMask); + } else { - //reset period length - curPerLen = 0; + setArrayItem_1D(&newArray, currIndex-newStart, val); + setArrayItem_1D(&newMask, currIndex-newStart, valMask); + } - - if (!lngValMask) { - curPerLen++; - } - - - - //store current index and value - prevIndex = curPerInd; - newVal = val; - newValMask = valMask; - - } - else - { - //still in same period - - - - if (!lngValMask) { - curPerLen++; - } - - //adjust new value according to observed setting - adjValForObsSet(array, observed[0], &newVal, &newValMask, val, valMask, curPerLen); - } - - } - } - //set value of last item in the new array - setArrayItem(&newArray, curPerInd-newStart, newVal); - setArrayItem(&newMask, curPerInd-newStart, newValMask); + PyDict_SetItemString(returnVal, "values", (PyObject*)newArray); + PyDict_SetItemString(returnVal, "mask", (PyObject*)newMask); - PyDict_SetItemString(returnVal, "values", (PyObject*)newArray); - PyDict_SetItemString(returnVal, "mask", (PyObject*)newMask); + return returnVal; - return returnVal; - } @@ -589,8 +569,8 @@ if (!PyArg_ParseTuple(args, "lss:convert(fromDate, fromfreq, tofreq)", &fromDate, &fromFreq, &toFreq)) return NULL; - //always want start of period (only matters when converting from lower freq to higher freq ie. m -> d) - notStartInd = 0; + //always want start of period (only matters when converting from lower freq to higher freq ie. m -> d) + notStartInd = 0; return PyInt_FromLong(convert(fromDate, fromFreq[0], toFreq[0], notStartInd)); } @@ -601,7 +581,7 @@ static PyMethodDef cseries_methods[] = { {"reindex", cseries_reindex, METH_VARARGS, cseries_reindex_doc}, {"convert", cseries_convert, METH_VARARGS, cseries_convert_doc}, - {"getDateInfo", cseries_getDateInfo, METH_VARARGS, cseries_getDateInfo_doc}, + {"getDateInfo", cseries_getDateInfo, METH_VARARGS, cseries_getDateInfo_doc}, {NULL, NULL} }; From scipy-svn at scipy.org Fri Dec 15 16:29:08 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 15:29:08 -0600 (CST) Subject: [Scipy-svn] r2424 - trunk/Lib/sandbox/timeseries Message-ID: <20061215212908.1CC8439C0FC@new.scipy.org> Author: mattknox_ca Date: 2006-12-15 15:29:06 -0600 (Fri, 15 Dec 2006) New Revision: 2424 Modified: trunk/Lib/sandbox/timeseries/corelib.py Log: Modified: trunk/Lib/sandbox/timeseries/corelib.py =================================================================== --- trunk/Lib/sandbox/timeseries/corelib.py 2006-12-15 21:28:45 UTC (rev 2423) +++ trunk/Lib/sandbox/timeseries/corelib.py 2006-12-15 21:29:06 UTC (rev 2424) @@ -1,5 +1,101 @@ import numpy +from numpy import ma + +############################################################# +############## generally applicable functions ############### +############################################################# +def apply_along_axis(func1d, axis, arr, *args): + """ Execute func1d(arr[i],*args) where func1d takes 1-D arrays + and arr is an N-d array. i varies so as to apply the function + along the given axis for each 1-d subarray in arr. + + Slightly modified version of the standard numpy version to work with masked arrays. + """ + + nd = arr.ndim + if axis < 0: + axis += nd + if (axis >= nd): + raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d." + % (axis,nd)) + ind = [0]*(nd-1) + i = numpy.zeros(nd,'O') + indlist = range(nd) + indlist.remove(axis) + i[axis] = slice(None,None) + outshape = numpy.asarray(arr.shape).take(indlist) + i.put(indlist, ind) + res = func1d(arr[tuple(i.tolist())],*args) + # if res is a number, then we have a smaller output array + if not hasattr(res,'shape') or len(res.shape) == 0: + outarr = ma.zeros(outshape,ma.asarray(res).dtype) + outarr[ind] = res + Ntot = numpy.product(outshape) + k = 1 + while k < Ntot: + # increment the index + ind[-1] += 1 + n = -1 + while (ind[n] >= outshape[n]) and (n > (1-nd)): + ind[n-1] += 1 + ind[n] = 0 + n -= 1 + i.put(indlist,ind) + res = func1d(arr[tuple(i.tolist())],*args) + outarr[ind] = res + k += 1 + return outarr + else: + Ntot = numpy.product(outshape) + holdshape = outshape + outshape = list(arr.shape) + outshape[axis] = len(res) + outarr = ma.zeros(outshape,ma.asarray(res).dtype) + outarr[tuple(i.tolist())] = res + k = 1 + while k < Ntot: + # increment the index + ind[-1] += 1 + n = -1 + while (ind[n] >= holdshape[n]) and (n > (1-nd)): + ind[n-1] += 1 + ind[n] = 0 + n -= 1 + i.put(indlist, ind) + res = func1d(arr[tuple(i.tolist())],*args) + outarr[tuple(i.tolist())] = res + k += 1 + return outarr + + +def first_unmasked(m): + return __unmasked(m, False, 0) + +def last_unmasked(m): + return __unmasked(m, False, -1) + +def first_unmasked_val(m): + return __unmasked(m, True, 0) + +def last_unmasked_val(m): + return __unmasked(m, True, -1) + + +def __unmasked(m, get_val, relpos): + idx = numpy.where(m.mask == False) + if len(idx) != 0 and len(idx[0]) != 0: + idx = idx[0][relpos] + else: + idx = None + + if get_val: + if idx is None: return ma.masked + else: return m[idx] + else: + return idx +############################################################# + #converts possible strings for frequency into acceptable values def fmtFreq (freqStr): if freqStr is None: @@ -10,24 +106,26 @@ raise ValueError("Invalid frequency: "+str(freqStr)) +obsDict = { + "UNDEFINED":None, + "BEGINNING":first_unmasked_val, + "END":last_unmasked_val, + "AVERAGED":ma.average, + "SUMMED":ma.sum, + "MAXIMUM":ma.maximum, + "MINIMUM":ma.minimum + } + #converts possible strings for observed into acceptable values def fmtObserv(obStr): - obsVals = ( "UNDEFINED", - "BEGINNING", - "END", - "AVERAGED", - "SUMMED", - "ANNUALIZED", - "FORMULA", - "HIGH", - "LOW") + obsVals = list(obsDict) if obStr is None: return None elif obStr.upper() in obsVals: return obStr.upper() - elif obStr.upper() in ("UNDEFINED", "BEGIN", "END", "AVERAGE", "SUM", "ANNUAL" , "FORMULA", "HIGH", "LOW"): + elif obStr.upper() in ("UNDEFINED", "BEGIN", "END", "AVERAGE", "SUM", "MAX", "MIN"): obStr = obStr.upper() for x in obsVals: if obStr[:2] == x[:2]: From scipy-svn at scipy.org Fri Dec 15 16:29:19 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 15:29:19 -0600 (CST) Subject: [Scipy-svn] r2425 - trunk/Lib/sandbox/timeseries Message-ID: <20061215212919.EDED339C0FC@new.scipy.org> Author: mattknox_ca Date: 2006-12-15 15:29:16 -0600 (Fri, 15 Dec 2006) New Revision: 2425 Modified: trunk/Lib/sandbox/timeseries/shiftingarray.py Log: Modified: trunk/Lib/sandbox/timeseries/shiftingarray.py =================================================================== --- trunk/Lib/sandbox/timeseries/shiftingarray.py 2006-12-15 21:29:06 UTC (rev 2424) +++ trunk/Lib/sandbox/timeseries/shiftingarray.py 2006-12-15 21:29:16 UTC (rev 2425) @@ -20,10 +20,14 @@ else: tempData = ma.array(values, self.dtype) - newSize = tempData.size*2 + #newSize = tempData.size*2 + newShape = list(tempData.shape) + newShape[0] *= 2 + newShape = tuple(newShape) + - firstIndex = newSize//4 - lastIndex = firstIndex + tempData.size - 1 + firstIndex = newShape[0]//4 + lastIndex = firstIndex + tempData.shape[0] - 1 if startIndex is None: self.indexZeroRepresents = None else: @@ -33,7 +37,7 @@ tempMask = ma.make_mask(mask) tempData[tempMask] = ma.masked - self.data = ma.array(numpy.empty(newSize,self.dtype)) + self.data = ma.array(numpy.empty(newShape,self.dtype)) if firstIndex > 0: self.data[0:firstIndex] = ma.masked @@ -187,7 +191,7 @@ return 0 def firstValue(self): - firstIndex = first_unmasked(self.data) + firstIndex = corelib.first_unmasked(self.data) if self.indexZeroRepresents is None or firstIndex is None: return None else: @@ -195,7 +199,7 @@ def lastValue(self): - lastIndex = last_unmasked(self.data) + lastIndex = corelib.last_unmasked(self.data) if self.indexZeroRepresents is None or lastIndex is None: return None else: @@ -207,7 +211,7 @@ def __str__(self): retVal = "" if self.firstValue() is not None: - for i in range(first_unmasked(self.data), last_unmasked(self.data)+1): + for i in range(corelib.first_unmasked(self.data), corelib.last_unmasked(self.data)+1): index = str(i+self.indexZeroRepresents) index = index + (" " * (6-len(index))) retVal += index + "---> " + str(self.data[i]) + "\n" @@ -223,7 +227,9 @@ if ser1.indexZeroRepresents is None: return ShiftingArray([],ser1.data.dtype) else: - ser2 = ShiftingArray([ser2]*len(ser1),ser1.data.dtype, ser1.firstValue()) + tempSer = numpy.empty(ser1.data.shape,dtype=ser1.data.dtype) + tempSer.fill(ser2) + ser2 = ShiftingArray(tempSer, startIndex=ser1.firstValue()) sFV, sLV = ser1.firstValue(), ser1.lastValue() oFV, oLV = ser2.firstValue(), ser2.lastValue() @@ -282,16 +288,3 @@ return round(size*EXPAND_MULT) + EXPAND_ADD -def first_unmasked(m): - idx = numpy.where(m.mask == False) - if len(idx) != 0 and len(idx[0]) != 0: - return idx[0][0] - else: - return None - -def last_unmasked(m): - idx = numpy.where(m.mask == False) - if len(idx) != 0 and len(idx[0]) != 0: - return idx[0][-1] - else: - return None \ No newline at end of file From scipy-svn at scipy.org Fri Dec 15 16:29:30 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 15:29:30 -0600 (CST) Subject: [Scipy-svn] r2426 - trunk/Lib/sandbox/timeseries Message-ID: <20061215212930.A5C9339C0FC@new.scipy.org> Author: mattknox_ca Date: 2006-12-15 15:29:26 -0600 (Fri, 15 Dec 2006) New Revision: 2426 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-15 21:29:16 UTC (rev 2425) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-15 21:29:26 UTC (rev 2426) @@ -34,38 +34,69 @@ super(TimeSeries, self).__setitem__(key, value) - def convert(self, freq, observed=None): - # return self converted to freq, method according to self.observed + def convert(self, freq, func='auto', position='END', interp=None): + """ + return self converted to freq. + + When converting to a lower frequency, func is a function that acts + on a 1-d array and returns a scalar or 1-d array. func should handle + masked values appropriately. If func is "auto", then an + appropriate function is determined based on the observed attribute + of the series. If func is None, then a 2D array is returned, where each + column represents the values appropriately grouped into the new frequency. + interp and position will be ignored in this case. + + When converting to a higher frequency, position is 'START' or 'END' + and determines where the data point is in each period (eg. if going + from monthly to daily, and position is 'END', then each data point is + placed at the end of the month). Interp is the method that will be used + to fill in the gaps. Valid values are "CUBIC", "LINEAR", "CONSTANT", "DIVIDED", + and None. + + """ + + if position.upper() not in ('END','START'): raise ValueError("invalid value for position argument: (%s)",str(position)) + toFreq = corelib.fmtFreq(freq) fromFreq = self.freq if fromFreq != toFreq: - if observed is None: observed=self.observed - else: observed = corelib.fmtObserv(observed) + + if func == 'auto': + func = corelib.obsDict[self.observed] - firstIndex = sa.first_unmasked(self.data) + firstIndex = corelib.first_unmasked(self.data) if firstIndex is None: - return TimeSeries([], dtype=self.dtype, freq=toFreq, observed=observed) + return TimeSeries([], dtype=self.dtype, freq=toFreq, observed=self.observed) startIndexAdj = self.firstValue() - lastIndex = sa.last_unmasked(self.data) + lastIndex = corelib.last_unmasked(self.data) tempData = copy.deepcopy(self.data[firstIndex:lastIndex+1]) tempMask = tempData.mask tempData = tempData.filled() - cRetVal = cseries.reindex(tempData, fromFreq, toFreq, observed, startIndexAdj, tempMask) + cRetVal = cseries.reindex(tempData, fromFreq, toFreq, position, startIndexAdj, tempMask) + _values = cRetVal['values'] _mask = cRetVal['mask'] + + tempData = ma.array(_values) + tempMask = ma.make_mask(_mask) + tempData[tempMask] = ma.masked + if func is not None and tempData.ndim == 2: + tempData = corelib.apply_along_axis(func, 1, tempData) + startIndex = cseries.convert(startIndexAdj, fromFreq, toFreq) - return TimeSeries(_values, dtype=self.data.dtype, freq=toFreq, observed=observed, startIndex=startIndex, mask=_mask) + return TimeSeries(tempData, dtype=self.data.dtype, freq=toFreq, observed=self.observed, startIndex=startIndex) else: return copy.deepcopy(self) + def __str__(self): retVal = "" From scipy-svn at scipy.org Fri Dec 15 16:29:40 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 15:29:40 -0600 (CST) Subject: [Scipy-svn] r2427 - trunk/Lib/sandbox/timeseries Message-ID: <20061215212940.17FB039C0FC@new.scipy.org> Author: mattknox_ca Date: 2006-12-15 15:29:36 -0600 (Fri, 15 Dec 2006) New Revision: 2427 Modified: trunk/Lib/sandbox/timeseries/tsdate.py Log: Modified: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-15 21:29:26 UTC (rev 2426) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-15 21:29:36 UTC (rev 2427) @@ -2,7 +2,7 @@ import mx.DateTime class Date: - def __init__(self, freq, year=None, month=None, day=None, seconds=None,quarter=None, mxDate=None, value=None): + def __init__(self, freq, year=None, month=None, day=None, seconds=None, quarter=None, mxDate=None, value=None): if hasattr(freq, 'freq'): self.freq = corelib.fmtFreq(freq.freq) @@ -12,17 +12,19 @@ if value is not None: if self.freq == 'D': - self.mxDate = value+originDate + self.mxDate = mx.DateTime.DateTimeFromAbsDays(value-1) elif self.freq == 'B': - self.mxDate = originDate + value + (value//5)*7 - (value//5)*5 + #originDate + val + (val//5)*7 - (val//5)*5 + value -= 1 + self.mxDate = mx.DateTime.DateTimeFromAbsDays(value + (value//5)*7 - (value//5)*5) elif self.freq == 'S': self.mxDate = secondlyOriginDate + mx.DateTime.DateTimeDeltaFromSeconds(value) elif self.freq == 'M': - self.mxDate = originDate + mx.DateTime.RelativeDateTime(months=value, day=-1) + self.mxDate = (mx.DateTime.Date(0)) + mx.DateTime.RelativeDateTime(months=value-1, day=-1) elif self.freq == 'A': - self.mxDate = originDate + mx.DateTime.RelativeDateTime(years=value, month=-1, day=-1) + self.mxDate = mx.DateTime.Date(value, -1, -1) elif self.freq == 'Q': - self.mxDate = originDate + 1 + mx.DateTime.RelativeDateTime(years=int(value/4), month=int(12 * (float(value)/4 - value/4)), day=-1) + self.mxDate = (mx.DateTime.Date(0)) + mx.DateTime.RelativeDateTime(years=(value // 4), month=((value * 3) % 12), day=-1) elif mxDate is not None: self.mxDate = mxDate else: @@ -125,23 +127,23 @@ return self.value def __value(self): + if self.freq == 'D': - return int((self.mxDate-originDate).days) + return self.mxDate.absdate elif self.freq == 'B': - days = (self.mxDate-originDate).days + days = self.mxDate.absdate weeks = days // 7 return int((weeks*5) + (days - weeks*7)) elif self.freq == 'M': - return (self.mxDate.year - originDate.year)*12 + (self.mxDate.month - originDate.month) + return self.mxDate.year*12 + self.mxDate.month elif self.freq == 'S': return int((self.mxDate - secondlyOriginDate).seconds) elif self.freq == 'A': - return int(self.mxDate.year - originDate.year + 1) + return int(self.mxDate.year) elif self.freq == 'Q': - return int ((self.mxDate.year - originDate.year)*4 + (self.mxDate.month - originDate.month)/3) - + return int(self.mxDate.year*4 + self.mxDate.month/3) + -originDate = mx.DateTime.Date(1850)-1 secondlyOriginDate = mx.DateTime.Date(1980) - mx.DateTime.DateTimeDeltaFromSeconds(1) From scipy-svn at scipy.org Fri Dec 15 16:30:07 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 15:30:07 -0600 (CST) Subject: [Scipy-svn] r2428 - trunk/Lib/sandbox/timeseries/examples Message-ID: <20061215213007.A396439C0FC@new.scipy.org> Author: mattknox_ca Date: 2006-12-15 15:30:04 -0600 (Fri, 15 Dec 2006) New Revision: 2428 Modified: trunk/Lib/sandbox/timeseries/examples/example.py Log: Modified: trunk/Lib/sandbox/timeseries/examples/example.py =================================================================== --- trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-15 21:29:36 UTC (rev 2427) +++ trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-15 21:30:04 UTC (rev 2428) @@ -1,5 +1,6 @@ import numpy as np import timeseries as ts +from numpy import ma # create a time series at business frequency and fill it with random data @@ -18,14 +19,39 @@ """ Convert bSer to a monthly frequency series. -The optional observed argument to the convert method specifies what -method will be used to perform the frequency conversion. If it is -not specified, the observed attribute of the series will be used to -determine the method. +The optional func argument to the convert method specifies is a +function that acts on a 1-dimension masked array and returns a single +value. """ -mSer1 = bSer.convert('M',observed='AVERAGED') +mSer1 = bSer.convert('M',func=ma.average) +""" +If func is None, a "2 dimensional" time series will be returned. In this +example, the value for each month will be a length 23 masked array (23 +being the max number of business days in a month) +""" +mSer1_2d = bSer.convert('M',func=None) + + +""" +If func is not specified, the observed attribute of the series +will be used to determine the method. (SUMMED for this example) +""" +mSer1_default = bSer.convert('M') + + +""" +Convert mSer to a business frequency series. + +when converting from a lower frequency to a higher frequency, position is one +of 'START' or 'END', and determines where the data point will be placed in the +period. In the future, interpolation methods will be supported to fill in the +resulting masked values. +""" +mToB = bSer.convert('M',position='START') + + # create another monthly frequency series mSer2 = ts.TimeSeries(np.random.uniform(-100,100,100),dtype=np.float64,freq='m',observed='END',startIndex=ts.thisday('M')-110) @@ -63,7 +89,7 @@ min(mSer1.firstValue(),mSer2.firstValue()) to max(mSer1.lastValue(),mSer2.lastValue()) wherever the series are masked before performing the operation """ -mAdd2 = ts.add(mSer1,mSer2,fill_value=0) +mAdd2 = ts.add(mSer1, mSer2, fill_value=0) # calculate the average value in the series. Behaves the same as in ma @@ -79,18 +105,23 @@ # get the last day of this year, at daily frequency dLastDayOfYear = ts.dateOf(ts.thisday('A'),'D','AFTER') + # get the first day of this year, at business frequency bFirstDayOfYear = ts.dateOf(ts.thisday('A'),'B','BEFORE') + # get the last day of the previous quarter, business frequency -bFirstDayOfLastQuarter = ts.dateOf(ts.thisday('Q')-1,'B','AFTER') +bLastDayOfLastQuarter = ts.dateOf(ts.thisday('Q')-1,'B','AFTER') + # dateOf can also go from high frequency to low frequency. In this case, the third parameter has no impact aTrueValue = (ts.thisday('Q') == ts.dateOf(ts.thisday('b'),'Q')) + # dates of the same frequency can be subtracted (but not added obviously) numberOfBusinessDaysPassedThisYear = ts.thisday('b') - bFirstDayOfYear + # integers can be added/substracted to/from dates fiveDaysFromNow = ts.thisday('d') + 5 @@ -102,4 +133,4 @@ # construct a date object explicitly myDateQ = ts.Date(freq='Q',year=2004,quarter=3) -myDateD = ts.Date(freq='D',year=1985,month=10,day=4) \ No newline at end of file +myDateD = ts.Date(freq='D',year=1985,month=10,day=4) From scipy-svn at scipy.org Fri Dec 15 16:32:24 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 15:32:24 -0600 (CST) Subject: [Scipy-svn] r2429 - trunk/Lib/sandbox/timeseries/doc Message-ID: <20061215213224.317B639C0FC@new.scipy.org> Author: mattknox_ca Date: 2006-12-15 15:32:21 -0600 (Fri, 15 Dec 2006) New Revision: 2429 Modified: trunk/Lib/sandbox/timeseries/doc/todo.txt Log: Modified: trunk/Lib/sandbox/timeseries/doc/todo.txt =================================================================== --- trunk/Lib/sandbox/timeseries/doc/todo.txt 2006-12-15 21:30:04 UTC (rev 2428) +++ trunk/Lib/sandbox/timeseries/doc/todo.txt 2006-12-15 21:32:21 UTC (rev 2429) @@ -20,10 +20,6 @@ This should also be an attribute of each TimeSeries object that would provide the default behaviour if the basis option was not specified. - - - add an "ignore" option to the convert method to specify whether to - ignore masked values in the calculation or not (current behaviour is - to ignore masked values). - support for wider variety of standard numpy/ma functions @@ -40,15 +36,7 @@ whole periods of the higher frequency. For example, if converting weekly to monthly, some weeks overlap more than one month. - - Perhaps change frequency conversion code to simply group data in the C code, - (eg. if going from daily to monthly, create a 2 dimensional array - where each column represents one month's worth of data), and then - perform mathematical operation on the python side (eg. sum/average/etc - the columns to reduce back to a 1-d array). This would not work well - for the scenario described in the previous bullet, but would work nicely - for a lot of scenarios. - Wishlist: - currently, the code relies on the mx.DateTime module (both the python, From scipy-svn at scipy.org Fri Dec 15 16:33:38 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Dec 2006 15:33:38 -0600 (CST) Subject: [Scipy-svn] r2430 - trunk/Lib/sandbox/timeseries Message-ID: <20061215213338.D7C7C39C078@new.scipy.org> Author: mattknox_ca Date: 2006-12-15 15:33:36 -0600 (Fri, 15 Dec 2006) New Revision: 2430 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-15 21:32:21 UTC (rev 2429) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-15 21:33:36 UTC (rev 2430) @@ -53,6 +53,8 @@ to fill in the gaps. Valid values are "CUBIC", "LINEAR", "CONSTANT", "DIVIDED", and None. + Note: interp currently not implemented + """ if position.upper() not in ('END','START'): raise ValueError("invalid value for position argument: (%s)",str(position)) From scipy-svn at scipy.org Sun Dec 17 19:25:25 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 17 Dec 2006 18:25:25 -0600 (CST) Subject: [Scipy-svn] r2431 - trunk/Lib/sandbox/models Message-ID: <20061218002525.5F6C139C035@new.scipy.org> Author: timl Date: 2006-12-17 18:25:20 -0600 (Sun, 17 Dec 2006) New Revision: 2431 Modified: trunk/Lib/sandbox/models/formula.py Log: fix undefined variable bug Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-12-15 21:33:36 UTC (rev 2430) +++ trunk/Lib/sandbox/models/formula.py 2006-12-18 00:25:20 UTC (rev 2431) @@ -337,7 +337,7 @@ Create (transpose) of the design matrix of the formula within namespace. Extra arguments are passed to each term instance. If the formula just contains an intercept, then the keyword - argument 'n' indicates the number of rows (observations). + argument 'nrow' indicates the number of rows (observations). """ if 'namespace' in kw: @@ -375,6 +375,7 @@ except: pass else: + nrow = kw.get('nrow', -1) if allvals != []: if interceptindex > 0: n = allvals[0].shape[1] @@ -382,10 +383,10 @@ n = allvals[1].shape[1] allvals[interceptindex] = N.ones((1,n), N.float64) allvals = N.concatenate(allvals) - elif nrow <= 1: # FIXME: nrow is undefined here + elif nrow <= 1: raise ValueError, 'with only intercept in formula, keyword \'nrow\' argument needed' else: - allvals = I(nrow=nrow) # ... and here + allvals = I(nrow=nrow) allvals.shape = (1,) + allvals.shape return allvals From scipy-svn at scipy.org Sun Dec 17 19:26:11 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 17 Dec 2006 18:26:11 -0600 (CST) Subject: [Scipy-svn] r2432 - trunk/Lib/sandbox/models Message-ID: <20061218002611.31EB039C035@new.scipy.org> Author: timl Date: 2006-12-17 18:26:07 -0600 (Sun, 17 Dec 2006) New Revision: 2432 Modified: trunk/Lib/sandbox/models/mixed.py Log: simplify some expressions Modified: trunk/Lib/sandbox/models/mixed.py =================================================================== --- trunk/Lib/sandbox/models/mixed.py 2006-12-18 00:25:20 UTC (rev 2431) +++ trunk/Lib/sandbox/models/mixed.py 2006-12-18 00:26:07 UTC (rev 2432) @@ -181,12 +181,12 @@ """ - S = 0 - Y = 0 for unit in self.units: unit.fit(self.a, self.D, self.sigma) - S += unit.compute_xtwx() - Y += unit.compute_xtwy() + + S = sum([unit.compute_xtwx() for unit in self.units]) + Y = sum([unit.compute_xtwy() for unit in self.units]) + self.Sinv = L.pinv(S) self.a = N.dot(self.Sinv, Y) @@ -258,12 +258,8 @@ return logL def initialize(self): - S = 0 - Y = 0 - for unit in self.units: - S += N.dot(unit.X.T, unit.X) - Y += N.dot(unit.X.T, unit.Y) - + S = sum([N.dot(unit.X.T, unit.X) for unit in self.units]) + Y = sum([N.dot(unit.X.T, unit.Y) for unit in self.units]) self.a = L.lstsq(S, Y)[0] D = 0 @@ -328,8 +324,9 @@ m = Mixed(units, response, fixed, random) m.initialize() m.fit() - print m.a + + ## a = Unit() ## a['x'] = N.array([2,3]) ## a['y'] = N.array([3,4]) From scipy-svn at scipy.org Sun Dec 17 19:29:12 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 17 Dec 2006 18:29:12 -0600 (CST) Subject: [Scipy-svn] r2433 - trunk/Lib/sandbox/models Message-ID: <20061218002912.DC35039C109@new.scipy.org> Author: timl Date: 2006-12-17 18:29:08 -0600 (Sun, 17 Dec 2006) New Revision: 2433 Modified: trunk/Lib/sandbox/models/cox.py Log: clean up FIXME Modified: trunk/Lib/sandbox/models/cox.py =================================================================== --- trunk/Lib/sandbox/models/cox.py 2006-12-18 00:26:07 UTC (rev 2432) +++ trunk/Lib/sandbox/models/cox.py 2006-12-18 00:29:08 UTC (rev 2433) @@ -154,12 +154,7 @@ raise NotImplementedError, 'Cox tie breaking method not implemented' else: raise NotImplementedError, 'tie breaking method not recognized' - # FIXME: score is an int. it has no shape - # is it that we shouldn't be using an int above - # or that we shouldn't be looking at shape here - if score.shape == (): - score = N.array([score]) - return score + return = N.array([score]) def information(self, b, ties='breslow'): From scipy-svn at scipy.org Sun Dec 17 21:47:50 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 17 Dec 2006 20:47:50 -0600 (CST) Subject: [Scipy-svn] r2434 - trunk/Lib/maxentropy Message-ID: <20061218024750.0C4D539C1DC@new.scipy.org> Author: edschofield Date: 2006-12-17 20:47:44 -0600 (Sun, 17 Dec 2006) New Revision: 2434 Modified: trunk/Lib/maxentropy/maxentropy.py Log: Removed old unused attributes from maxentropy.py Modified: trunk/Lib/maxentropy/maxentropy.py =================================================================== --- trunk/Lib/maxentropy/maxentropy.py 2006-12-18 00:29:08 UTC (rev 2433) +++ trunk/Lib/maxentropy/maxentropy.py 2006-12-18 02:47:44 UTC (rev 2434) @@ -1139,32 +1139,6 @@ # in stochastic approx self.testconvergematrices = 10 - # For comparing sampling methods and opt algorithms -- specifies that - # we can compute the exact expectations at any iteration with - # self.expectations() as a convergence criterion - self.testconvergecheat = False - - # Number of stdevs either side of the mean for Z and E confidence - # intervals - self.z = 3.0 - - # Desired precision with expectation estimates - self.Etol = 5e-5 - - # Desired precision with logZ estimates - self.Ztol = 5e-5 - - # Using relative precision for the sampling stopping criterion is - # disabled by default: - self.Ertol = 0.0 - - # Number of samples to compute before tracking the variance - self.init_samples = 10000 - - # Min number of samples to compute the variance of after starting - # tracking - self.min_samples = 10000 - # Test for convergence every 'testevery' iterations, using one or # more external samples. If None, don't test. self.testevery = None From scipy-svn at scipy.org Mon Dec 18 23:14:30 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 18 Dec 2006 22:14:30 -0600 (CST) Subject: [Scipy-svn] r2435 - in trunk/Lib/sandbox/maskedarray: . tests Message-ID: <20061219041430.1972739C09F@new.scipy.org> Author: pierregm Date: 2006-12-18 22:14:26 -0600 (Mon, 18 Dec 2006) New Revision: 2435 Modified: trunk/Lib/sandbox/maskedarray/CHANGELOG trunk/Lib/sandbox/maskedarray/core.py trunk/Lib/sandbox/maskedarray/extras.py trunk/Lib/sandbox/maskedarray/tests/test_core.py trunk/Lib/sandbox/maskedarray/tests/test_extras.py Log: see changelog Modified: trunk/Lib/sandbox/maskedarray/CHANGELOG =================================================================== --- trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-18 02:47:44 UTC (rev 2434) +++ trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-19 04:14:26 UTC (rev 2435) @@ -1,3 +1,5 @@ +#2006-12-18 : Extras +# : - Added compress2d and mask_rowcols #2006-12-13 : - moved 'average' to 'extras' # : Core # : - Fixed make_mask (forced filling to True) Modified: trunk/Lib/sandbox/maskedarray/core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/core.py 2006-12-18 02:47:44 UTC (rev 2434) +++ trunk/Lib/sandbox/maskedarray/core.py 2006-12-19 04:14:26 UTC (rev 2435) @@ -947,7 +947,8 @@ return dout else: return self.__class__(dout, mask=nomask, - fill_value=self._fill_value) + fill_value=self._fill_value, + dtype = self.dtype,) #.... # m = self._mask.copy() m = self._mask @@ -958,7 +959,8 @@ else: return dout else: - return self.__class__(dout, mask=mi, fill_value=self._fill_value) + return self.__class__(dout, mask=mi, dtype = self.dtype, + fill_value=self._fill_value) #........................ def __setitem__(self, index, value): """x.__setitem__(i, y) <==> x[i]=y @@ -1001,9 +1003,11 @@ m = self._mask dout = self._data[i:j] if m is nomask: - return self.__class__(dout, fill_value=self._fill_value) + return self.__class__(dout, dtype = self.dtype, + fill_value=self._fill_value) else: - return self.__class__(dout, mask=m[i:j], fill_value=self._fill_value) + return self.__class__(dout, mask=m[i:j], dtype = self.dtype, + fill_value=self._fill_value) #........................ def __setslice__(self, i, j, value): """x.__setslice__(i, j, value) <==> x[i:j]=value @@ -1119,14 +1123,16 @@ Returns a masked array of the current subclass, with the new `_data` the absolute of the inital `_data`. """ - return self.__class__(self._data.__abs__(), mask=self._mask) + return self.__class__(self._data.__abs__(), mask=self._mask, + dtype = self.dtype,) # def __neg__(self): """x.__abs__() <==> neg(x) Returns a masked array of the current subclass, with the new `_data` the negative of the inital `_data`.""" try: - return self.__class__(self._data.__neg__(), mask=self._mask) + return self.__class__(self._data.__neg__(), mask=self._mask, + dtype = self.dtype,) except MAError: return negative(self) # @@ -1233,8 +1239,9 @@ d = self._data.astype(tc) # print "DEBUG: _astype: d", d # print "DEBUG: _astype: m", self._mask - return self.__class__(d, mask=self._mask) + return self.__class__(d, mask=self._mask, dtype=tc) #............................................ + #TODO: FIX THAT: THAT"S NOT A REAL FLATITER def _get_flat(self): """Calculates the flat value. """ @@ -1425,9 +1432,13 @@ If you want to modify the shape in place, please use `a.shape = s`""" if self._mask is not nomask: return self.__class__(self._data.reshape(*s), - mask=self._mask.reshape(*s)) + mask=self._mask.reshape(*s), + dtype = self.dtype, + fill_value=self.fill_value) else: - return self.__class__(self._data.reshape(*s)) + return self.__class__(self._data.reshape(*s), + dtype = self.dtype, + fill_value=self.fill_value) # def repeat(self, repeats, axis=None): """Repeat elements of `a` `repeats` times along `axis`. @@ -1447,7 +1458,8 @@ if m is not nomask: m = fromnumeric.repeat(m, repeats, axis) d = fromnumeric.repeat(f, repeats, axis) - return self.__class__(d, mask=m, fill_value=self.fill_value) + return self.__class__(d, mask=m, dtype = self.dtype, + fill_value=self.fill_value) # def resize(self, newshape, refcheck=True, order=False): """Attempts to modify size and shape of self inplace. @@ -1470,38 +1482,17 @@ "Use the resize function." raise ValueError, msg return None + # + def flatten(self): + """Flattens the array in place. + """ + flatsize = self.size + self._data.resize((flatsize,)) + if self.mask is not nomask: + self._mask.resize((flatsize,)) + return self - -# # -# def transpose(self,axes=None): -# """Returns a view of 'a' with axes transposed.""" -# (d,m) = (self._data, self._mask) -# if m is nomask: -# return self.__class__(d.transpose(axes), copy=False) -# else: -# return self.__class__(d.transpose(axes), -# mask=m.transpose(axes), copy=False) -# # -# def swapaxes(self, axis1, axis2): -# (d,m) = (self._data, self._mask) -# if m is nomask: -# return self.__class__(d.swapaxes(axis1, axis2), -# copy=False) -# else: -# return self.__class__(data=d.swapaxes(axis1, axis2), -# mask=m.swapaxes(axis1, axis2), -# copy=False) # -# def take(self, indices, axis=None, out=None, mode='raise'): -# "returns selection of items from a." -# (d,m) = (self._data, self._mask) -# if m is nomask: -# return self.__class__(d.take(indices, axis=axis, out=out, mode=mode)) -# else: -# return self.__class__(d.take(indices, axis=axis, out=out, mode=mode), -# mask=m.take(indices, axis=axis, out=out, mode=mode), -# copy=False,) - # def put(self, indices, values, mode='raise'): """Sets storage-indexed locations to corresponding values. a.put(values, indices, mode) sets a.flat[n] = values[n] for each n in indices. @@ -1534,7 +1525,8 @@ """ d = filled(self, True).all(axis) m = self._mask.all(axis) - return self.__class__(d, mask=m, fill_value=self._fill_value) + return self.__class__(d, mask=m, dtype=bool_, + fill_value=self._fill_value,) def any(self, axis=None): """a.any(axis) returns True if some or all entries along the axis are True. Returns False otherwise. If axis is None, uses the flatten array. @@ -1543,7 +1535,8 @@ """ d = filled(self, False).any(axis) m = self._mask.all(axis) - return self.__class__(d, mask=m, fill_value=self._fill_value) + return self.__class__(d, mask=m, dtype=bool_, + fill_value=self._fill_value) def nonzero(self): """a.nonzero() returns a tuple of arrays @@ -1583,13 +1576,15 @@ # if axis is None: # return self._data.sum(None, dtype=dtype) return self.__class__(self._data.sum(axis, dtype=dtype), - mask=nomask, fill_value=self._fill_value) + mask=nomask, dtype = self.dtype, + fill_value=self.fill_value) else: # if axis is None: # return self.filled(0).sum(None, dtype=dtype) return self.__class__(self.filled(0).sum(axis, dtype=dtype), - mask=self._mask.all(axis), - fill_value=self._fill_value) + mask=self._mask.all(axis), + dtype = self.dtype, + fill_value=self.fill_value) def cumsum(self, axis=None, dtype=None): """a.cumprod(axis=None, dtype=None) @@ -1600,12 +1595,16 @@ if self._mask is nomask: # if axis is None: # return self._data.cumsum(None, dtype=dtype) - return self.__class__(self._data.cumsum(axis=axis, dtype=dtype)) + return self.__class__(self._data.cumsum(axis=axis, dtype=dtype), + dtype = self.dtype, + fill_value=self.fill_value) else: # if axis is None: # return self.filled(0).cumsum(None, dtype=dtype) return self.__class__(self.filled(0).cumsum(axis=axis, dtype=dtype), - mask=self._mask, fill_value=self._fill_value) + mask=self._mask, + dtype = self.dtype, + fill_value=self.fill_value) def prod(self, axis=None, dtype=None): """a.prod(axis=None, dtype=None) @@ -1617,14 +1616,17 @@ # if axis is None: # return self._data.prod(None, dtype=dtype) return self.__class__(self._data.prod(axis, dtype=dtype), - mask=nomask, fill_value=self._fill_value) + mask=nomask, + dtype = self.dtype, + fill_value=self.fill_value) # return self.__class__(self._data.prod(axis=axis, dtype=dtype)) else: # if axis is None: # return self.filled(1).prod(None, dtype=dtype) return self.__class__(self.filled(1).prod(axis=axis, dtype=dtype), - mask=self._mask.all(axis), - fill_value=self._fill_value) + mask=self._mask.all(axis), + dtype = self.dtype, + fill_value=self.fill_value) product = prod def cumprod(self, axis=None, dtype=None): @@ -1637,12 +1639,16 @@ # if axis is None: # return self._data.cumprod(None, dtype=dtype) return self.__class__(self._data.cumprod(axis=axis, dtype=dtype), - mask=nomask, fill_value=self._fill_value) + mask=nomask, + dtype = self.dtype, + fill_value=self.fill_value,) else: # if axis is None: # return self.filled(1).cumprod(None, dtype=dtype) return self.__class__(self.filled(1).cumprod(axis=axis, dtype=dtype), - mask=self._mask, fill_value=self._fill_value) + mask=self._mask, + dtype = self.dtype, + fill_value=self.fill_value) def mean(self, axis=None, dtype=None): """a.mean(axis=None, dtype=None) @@ -1661,15 +1667,17 @@ # if axis is None: # return self._data.mean(axis=None, dtype=dtype) return self.__class__(self._data.mean(axis=axis, dtype=dtype), - mask=nomask, fill_value=self._fill_value) + mask=nomask, dtype = self.dtype, + fill_value=self.fill_value) else: dsum = fromnumeric.sum(self.filled(0), axis=axis, dtype=dtype) cnt = self.count(axis=axis) mask = self._mask.all(axis) if axis is None and mask: return masked - return self.__class__(dsum*1./cnt, mask=mask, - fill_value=self._fill_value) + return self.__class__(dsum*1./cnt, mask=mask, + dtype = self.dtype, + fill_value=self.fill_value,) def anom(self, axis=None, dtype=None): """a.anom(axis=None, dtype=None) @@ -1692,7 +1700,9 @@ # if axis is None: # return self._data.var(axis=None, dtype=dtype) return self.__class__(self._data.var(axis=axis, dtype=dtype), - mask=nomask, fill_value=self._fill_value) + mask=nomask, + dtype = self.dtype, + fill_value=self.fill_value) else: cnt = self.count(axis=axis) danom = self.anom(axis=axis, dtype=dtype) @@ -1703,7 +1713,8 @@ return dvar return self.__class__(dvar, mask=mask_or(self._mask.all(axis), (cnt==1)), - fill_value=self._fill_value) + dtype = self.dtype, + fill_value=self.fill_value) def std(self, axis=None, dtype=None): """a.std(axis=None, dtype=None) @@ -1719,8 +1730,9 @@ else: # Should we use umath.sqrt instead ? return sqrt(dvar) - return self.__class__(sqrt(dvar._data), mask=dvar._mask, - fill_value=self._fill_value) + return self.__class__(sqrt(dvar._data), mask=dvar._mask, + dtype = self.dtype, + fill_value=self.fill_value,) #............................................ def argsort(self, axis=None, fill_value=None, kind='quicksort'): """Returns an array of indices that sort 'a' along the specified axis. @@ -1932,7 +1944,8 @@ m_other = mask_or(m_other, domain) m = mask_or(m_self, m_other) method = getattr(base, self.methodname) - return instance.__class__(method(target, *args), mask=m) + return instance.__class__(method(target, *args), mask=m, + fill_value=instance.fill_value) #...................................... class _compamethods(object): """Defines comparison methods (eq, ge, gt...). @@ -1961,7 +1974,9 @@ base = instance.filled(self.fill_self) target = filled(other, self.fill_other) method = getattr(base, self.methodname) - return instance.__class__(method(target, *args), mask=m) + return instance.__class__(method(target, *args), mask=m, + dtype = instance.dtype, + fill_value=instance.fill_value) #.......................................................... MaskedArray.__add__ = _arithmethods('__add__') MaskedArray.__radd__ = _arithmethods('__add__') @@ -2099,18 +2114,19 @@ return self def __call__(self, *args, **params): methodname = self._name - (d,m, f) = (self.obj._data, self.obj._mask, self.obj._fill_value) + (d, m) = (self.obj._data, self.obj._mask) + (t, f) = (self.obj.dtype, self.obj._fill_value) C = self.obj.__class__ if m is nomask: return C(getattr(d,methodname).__call__(*args, **params), - fill_value=f) + dtype=t, fill_value=f) elif self._onmask: return C(getattr(d,methodname).__call__(*args, **params), mask=getattr(m,methodname)(*args, **params), - fill_value=f) + dtype=t, fill_value=f) else: return C(getattr(d,methodname).__call__(*args, **params), mask=m, - fill_value=f) + dtype=t, fill_value=f) #...................................... MaskedArray.conj = MaskedArray.conjugate = _arraymethod('conjugate') MaskedArray.diagonal = _arraymethod('diagonal') @@ -2162,10 +2178,10 @@ m = umath.logical_and.reduce(m, axis) # return masked_array(t, mask=m, fill_value=get_fill_value(target)) try: - return target.__class__(t, mask=m, + return target.__class__(t, mask=m, dtype=t.dtype, fill_value=get_fill_value(target)) except AttributeError: - return masked_array(t, mask=m, + return masked_array(t, mask=m, dtype=t.dtype, fill_value=get_fill_value(target)) #......... def outer(self, a, b): @@ -2225,10 +2241,10 @@ axis) m = umath.logical_and.reduce(m, axis) try: - return target.__class__(t, mask=m, + return target.__class__(t, mask=m, dtype=t.dtype, fill_value=get_fill_value(target)) except AttributeError: - return masked_array(t, mask=m, + return masked_array(t, mask=m, dtype=t.dtype, fill_value=get_fill_value(target)) #......... def outer (self, a, b): @@ -2461,10 +2477,12 @@ if isinstance(x, MaskedArray): (d,m) = (x._data, x._mask) if m is nomask: - return masked_array(n_expand_dims(d,axis)) + return masked_array(n_expand_dims(d,axis), + dtype=d.dtype, fill_value=x._fill_value) else: return masked_array(n_expand_dims(d,axis), - mask=n_expand_dims(m,axis)) + mask=n_expand_dims(m,axis), + dtype=d.dtype, fill_value=x._fill_value) else: return n_expand_dims(x,axis) Modified: trunk/Lib/sandbox/maskedarray/extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/extras.py 2006-12-18 02:47:44 UTC (rev 2434) +++ trunk/Lib/sandbox/maskedarray/extras.py 2006-12-19 04:14:26 UTC (rev 2435) @@ -14,8 +14,8 @@ __all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack', - 'count_masked', - 'masked_all', 'masked_all_like', 'mr_', + 'compress2d', 'count_masked', + 'mask_rowcols','masked_all', 'masked_all_like', 'mr_', 'notmasked_edges', 'notmasked_contiguous', 'stdu', 'varu', ] @@ -23,9 +23,8 @@ from itertools import groupby import core -reload(core) +#reload(core) from core import * -from core import _arraymethod import numpy from numpy import float_ @@ -38,8 +37,9 @@ from numpy.lib.index_tricks import concatenator import numpy.lib.function_base as function_base +#............................................................................... def issequence(seq): - """Returns True if the argumnet is a sequence (ndarray, list or tuple.""" + """Returns True if the argumnet is a sequence (ndarray, list or tuple).""" if isinstance(seq, ndarray): return True elif isinstance(seq, tuple): @@ -290,7 +290,7 @@ if mask is nomask: if weights is None: d = ash[axis] * 1.0 - n = add.reduce(a._data, axis) + n = add.reduce(a._data, axis, dtype=float_) else: w = filled(weights, 0.0) wsh = w.shape @@ -306,14 +306,14 @@ r = [None]*len(ash) r[axis] = slice(None, None, 1) w = eval ("w["+ repr(tuple(r)) + "] * ones(ash, float)") - n = add.reduce(a*w, axis) - d = add.reduce(w, axis) + n = add.reduce(a*w, axis, dtype=float_) + d = add.reduce(w, axis, dtype=float_) del w, r else: raise ValueError, 'average: weights wrong shape.' else: if weights is None: - n = add.reduce(a, axis) + n = add.reduce(a, axis, dtype=float_) d = umath.add.reduce((-mask), axis=axis, dtype=float_) else: w = filled(weights, 0.0) @@ -321,16 +321,16 @@ if wsh == (): wsh = (1,) if wsh == ash: - w = array(w, float, mask=mask, copy=0) - n = add.reduce(a*w, axis) - d = add.reduce(w, axis) + w = array(w, dtype=float_, mask=mask, copy=0) + n = add.reduce(a*w, axis, dtype=float_) + d = add.reduce(w, axis, dtype=float_) elif wsh == (ash[axis],): ni = ash[axis] r = [None]*len(ash) r[axis] = slice(None, None, 1) w = eval ("w["+ repr(tuple(r)) + "] * masked_array(ones(ash, float), mask)") - n = add.reduce(a*w, axis) - d = add.reduce(w, axis) + n = add.reduce(a*w, axis, dtype=float_) + d = add.reduce(w, axis, dtype=float_) else: raise ValueError, 'average: weights wrong shape.' del w @@ -339,21 +339,74 @@ result = n/d del n - if isinstance(result, MaskedArray): + if isMaskedArray(result): if ((axis is None) or (axis==0 and a.ndim == 1)) and \ - (result._mask is nomask): + (result.mask is nomask): result = result._data if returned: - if not isinstance(d, MaskedArray): + if not isMaskedArray(d): d = masked_array(d) if isinstance(d, ndarray) and (not d.shape == result.shape): - d = ones(result.shape, float) * d + d = ones(result.shape, dtype=float_) * d if returned: + print type(result) return result, d else: return result + +#.............................................................................. +def compress2d(x, axis=None): + """Suppresses the rows and/or columns of a 2D array that contains masked values. + The suppression behavior is selected with the `axis`parameter. + - If axis is None, rows and columns are suppressed. + - If axis is 0, only rows are suppressed. + - If axis is 1 or -1, only columns are suppressed. + Returns a *pure* ndarray. + """ + x = asarray(x) + if x.ndim <> 2: + raise NotImplementedError, "compress2d works for 2D arrays only." + m = getmask(x) + # Nothing is masked: return x + if m is nomask or not m.any(): + return nxasarray(x) + # All is masked: return empty + if m.all(): + return nxarray([]) + # Builds a list of rows/columns indices + (idxr, idxc) = (range(len(x)), range(x.shape[1])) + masked = m.nonzero() + if not axis: + for i in function_base.unique(masked[0]): + idxr.remove(i) + if axis in [None, 1, -1]: + for j in function_base.unique(masked[1]): + idxc.remove(j) + return nxasarray(x[idxr][:,idxc]) +def mask_rowcols(a, axis=None): + """Suppresses the rows and/or columns of a 2D array that contains masked values. + The suppression behavior is selected with the `axis`parameter. + - If axis is None, rows and columns are suppressed. + - If axis is 0, only rows are suppressed. + - If axis is 1 or -1, only columns are suppressed. + Returns a *pure* ndarray. + """ + a = asarray(a) + if a.ndim != 2: + raise NotImplementedError, "compress2d works for 2D arrays only." + m = getmask(a) + # Nothing is masked: return a + if m is nomask or not m.any(): + return a + maskedval = m.nonzero() + if not axis: + a[function_base.unique(maskedval[0])] = masked + if axis in [None, 1, -1]: + a[:,function_base.unique(maskedval[1])] = masked + return a + #####-------------------------------------------------------------------------- #---- --- Concatenation helpers --- #####-------------------------------------------------------------------------- Modified: trunk/Lib/sandbox/maskedarray/tests/test_core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_core.py 2006-12-18 02:47:44 UTC (rev 2434) +++ trunk/Lib/sandbox/maskedarray/tests/test_core.py 2006-12-19 04:14:26 UTC (rev 2435) @@ -647,79 +647,6 @@ dma_3 = MaskedArray(dma_1, mask=[1,0,0,0]*6) fail_if_equal(dma_3.mask, dma_1.mask) - def check_testAverage1(self): - "Test of average." - ott = array([0.,1.,2.,3.], mask=[1,0,0,0]) - assert_equal(2.0, average(ott,axis=0)) - assert_equal(2.0, average(ott, weights=[1., 1., 2., 1.])) - result, wts = average(ott, weights=[1.,1.,2.,1.], returned=1) - assert_equal(2.0, result) - assert(wts == 4.0) - ott[:] = masked - assert(average(ott,axis=0) is masked) - ott = array([0.,1.,2.,3.], mask=[1,0,0,0]) - ott = ott.reshape(2,2) - ott[:,1] = masked - assert_equal(average(ott,axis=0), [2.0, 0.0]) - assert(average(ott,axis=1)[0] is masked) - assert_equal([2.,0.], average(ott, axis=0)) - result, wts = average(ott, axis=0, returned=1) - assert_equal(wts, [1., 0.]) - - def check_testAverage2(self): - "More tests of average." - w1 = [0,1,1,1,1,0] - w2 = [[0,1,1,1,1,0],[1,0,0,0,0,1]] - x = arange(6) - assert_equal(average(x, axis=0), 2.5) - assert_equal(average(x, axis=0, weights=w1), 2.5) - y = array([arange(6), 2.0*arange(6)]) - assert_equal(average(y, None), N.add.reduce(N.arange(6))*3./12.) - assert_equal(average(y, axis=0), N.arange(6) * 3./2.) - assert_equal(average(y, axis=1), [average(x,axis=0), average(x,axis=0) * 2.0]) - assert_equal(average(y, None, weights=w2), 20./6.) - assert_equal(average(y, axis=0, weights=w2), [0.,1.,2.,3.,4.,10.]) - assert_equal(average(y, axis=1), [average(x,axis=0), average(x,axis=0) * 2.0]) - m1 = zeros(6) - m2 = [0,0,1,1,0,0] - m3 = [[0,0,1,1,0,0],[0,1,1,1,1,0]] - m4 = ones(6) - m5 = [0, 1, 1, 1, 1, 1] - assert_equal(average(masked_array(x, m1),axis=0), 2.5) - assert_equal(average(masked_array(x, m2),axis=0), 2.5) - assert(average(masked_array(x, m4),axis=0) is masked) - assert_equal(average(masked_array(x, m5),axis=0), 0.0) - assert_equal(count(average(masked_array(x, m4),axis=0)), 0) - z = masked_array(y, m3) - assert_equal(average(z, None), 20./6.) - assert_equal(average(z, axis=0), [0.,1.,99.,99.,4.0, 7.5]) - assert_equal(average(z, axis=1), [2.5, 5.0]) - assert_equal(average(z,axis=0, weights=w2), [0.,1., 99., 99., 4.0, 10.0]) - - def check_testAverage3(self): - "Yet more tests of average!" - a = arange(6) - b = arange(6) * 3 - r1, w1 = average([[a,b],[b,a]], axis=1, returned=1) - assert_equal(shape(r1) , shape(w1)) - assert_equal(r1.shape , w1.shape) - r2, w2 = average(ones((2,2,3)), axis=0, weights=[3,1], returned=1) - assert_equal(shape(w2) , shape(r2)) - r2, w2 = average(ones((2,2,3)), returned=1) - assert_equal(shape(w2) , shape(r2)) - r2, w2 = average(ones((2,2,3)), weights=ones((2,2,3)), returned=1) - assert_equal(shape(w2), shape(r2)) - a2d = array([[1,2],[0,4]], float) - a2dm = masked_array(a2d, [[0,0],[1,0]]) - a2da = average(a2d, axis=0) - assert_equal(a2da, [0.5, 3.0]) - a2dma = average(a2dm, axis=0) - assert_equal(a2dma, [1.0, 3.0]) - a2dma = average(a2dm, axis=None) - assert_equal(a2dma, 7./3.) - a2dma = average(a2dm, axis=1) - assert_equal(a2dma, [1.5, 4.0]) - def check_backwards(self): "Tests backward compatibility with numpy.core.ma" import numpy.core.ma as nma Modified: trunk/Lib/sandbox/maskedarray/tests/test_extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_extras.py 2006-12-18 02:47:44 UTC (rev 2434) +++ trunk/Lib/sandbox/maskedarray/tests/test_extras.py 2006-12-19 04:14:26 UTC (rev 2435) @@ -26,6 +26,80 @@ reload(maskedarray.extras) from maskedarray.extras import * +class test_average(NumpyTestCase): + "Several tests of average. Why so many ? Good point..." + def check_testAverage1(self): + "Test of average." + ott = array([0.,1.,2.,3.], mask=[1,0,0,0]) + assert_equal(2.0, average(ott,axis=0)) + assert_equal(2.0, average(ott, weights=[1., 1., 2., 1.])) + result, wts = average(ott, weights=[1.,1.,2.,1.], returned=1) + assert_equal(2.0, result) + assert(wts == 4.0) + ott[:] = masked + assert_equal(average(ott,axis=0).mask, [True]) + ott = array([0.,1.,2.,3.], mask=[1,0,0,0]) + ott = ott.reshape(2,2) + ott[:,1] = masked + assert_equal(average(ott,axis=0), [2.0, 0.0]) + assert_equal(average(ott,axis=1).mask[0], [True]) + assert_equal([2.,0.], average(ott, axis=0)) + result, wts = average(ott, axis=0, returned=1) + assert_equal(wts, [1., 0.]) + + def check_testAverage2(self): + "More tests of average." + w1 = [0,1,1,1,1,0] + w2 = [[0,1,1,1,1,0],[1,0,0,0,0,1]] + x = arange(6, dtype=float_) + assert_equal(average(x, axis=0), 2.5) + assert_equal(average(x, axis=0, weights=w1), 2.5) + y = array([arange(6, dtype=float_), 2.0*arange(6)]) + assert_equal(average(y, None), N.add.reduce(N.arange(6))*3./12.) + assert_equal(average(y, axis=0), N.arange(6) * 3./2.) + assert_equal(average(y, axis=1), [average(x,axis=0), average(x,axis=0) * 2.0]) + assert_equal(average(y, None, weights=w2), 20./6.) + assert_equal(average(y, axis=0, weights=w2), [0.,1.,2.,3.,4.,10.]) + assert_equal(average(y, axis=1), [average(x,axis=0), average(x,axis=0) * 2.0]) + m1 = zeros(6) + m2 = [0,0,1,1,0,0] + m3 = [[0,0,1,1,0,0],[0,1,1,1,1,0]] + m4 = ones(6) + m5 = [0, 1, 1, 1, 1, 1] + assert_equal(average(masked_array(x, m1),axis=0), 2.5) + assert_equal(average(masked_array(x, m2),axis=0), 2.5) + assert_equal(average(masked_array(x, m4),axis=0).mask, [True]) + assert_equal(average(masked_array(x, m5),axis=0), 0.0) + assert_equal(count(average(masked_array(x, m4),axis=0)), 0) + z = masked_array(y, m3) + assert_equal(average(z, None), 20./6.) + assert_equal(average(z, axis=0), [0.,1.,99.,99.,4.0, 7.5]) + assert_equal(average(z, axis=1), [2.5, 5.0]) + assert_equal(average(z,axis=0, weights=w2), [0.,1., 99., 99., 4.0, 10.0]) + + def check_testAverage3(self): + "Yet more tests of average!" + a = arange(6) + b = arange(6) * 3 + r1, w1 = average([[a,b],[b,a]], axis=1, returned=1) + assert_equal(shape(r1) , shape(w1)) + assert_equal(r1.shape , w1.shape) + r2, w2 = average(ones((2,2,3)), axis=0, weights=[3,1], returned=1) + assert_equal(shape(w2) , shape(r2)) + r2, w2 = average(ones((2,2,3)), returned=1) + assert_equal(shape(w2) , shape(r2)) + r2, w2 = average(ones((2,2,3)), weights=ones((2,2,3)), returned=1) + assert_equal(shape(w2), shape(r2)) + a2d = array([[1,2],[0,4]], float) + a2dm = masked_array(a2d, [[0,0],[1,0]]) + a2da = average(a2d, axis=0) + assert_equal(a2da, [0.5, 3.0]) + a2dma = average(a2dm, axis=0) + assert_equal(a2dma, [1.0, 3.0]) + a2dma = average(a2dm, axis=None) + assert_equal(a2dma, 7./3.) + a2dma = average(a2dm, axis=1) + assert_equal(a2dma, [1.5, 4.0]) class test_concatenator(NumpyTestCase): "Tests for mr_, the equivalent of r_ for masked arrays." @@ -98,8 +172,50 @@ assert_equal(tmp[0][-1], (4, (0,3))) assert(tmp[1] is None) assert_equal(tmp[2][-1], (6, (0,5))) + +class test_compress2d(NumpyTestCase): + "Tests compress2d and mask_row_columns." + def check_compress2d(self): + "Tests compress2d" + x = array(N.arange(9).reshape(3,3), mask=[[1,0,0],[0,0,0],[0,0,0]]) + assert_equal(compress2d(x), [[4,5],[7,8]] ) + assert_equal(compress2d(x,0), [[3,4,5],[6,7,8]] ) + assert_equal(compress2d(x,1), [[1,2],[4,5],[7,8]] ) + x = array(x._data, mask=[[0,0,0],[0,1,0],[0,0,0]]) + assert_equal(compress2d(x), [[0,2],[6,8]] ) + assert_equal(compress2d(x,0), [[0,1,2],[6,7,8]] ) + assert_equal(compress2d(x,1), [[0,2],[3,5],[6,8]] ) + x = array(x._data, mask=[[1,0,0],[0,1,0],[0,0,0]]) + assert_equal(compress2d(x), [[8]] ) + assert_equal(compress2d(x,0), [[6,7,8]] ) + assert_equal(compress2d(x,1,), [[2],[5],[8]] ) + x = array(x._data, mask=[[1,0,0],[0,1,0],[0,0,1]]) + assert_equal(compress2d(x).size, 0 ) + assert_equal(compress2d(x,0).size, 0 ) + assert_equal(compress2d(x,1).size, 0 ) + # + def check_mask_rowcols(self): + "Tests mask_rowcols." + x = array(N.arange(9).reshape(3,3), mask=[[1,0,0],[0,0,0],[0,0,0]]) + assert_equal(mask_rowcols(x).mask, [[1,1,1],[1,0,0],[1,0,0]] ) + assert_equal(mask_rowcols(x,0).mask, [[1,1,1],[0,0,0],[0,0,0]] ) + assert_equal(mask_rowcols(x,1).mask, [[1,0,0],[1,0,0],[1,0,0]] ) + x = array(x._data, mask=[[0,0,0],[0,1,0],[0,0,0]]) + assert_equal(mask_rowcols(x).mask, [[0,1,0],[1,1,1],[0,1,0]] ) + assert_equal(mask_rowcols(x,0).mask, [[0,0,0],[1,1,1],[0,0,0]] ) + assert_equal(mask_rowcols(x,1).mask, [[0,1,0],[0,1,0],[0,1,0]] ) + x = array(x._data, mask=[[1,0,0],[0,1,0],[0,0,0]]) + assert_equal(mask_rowcols(x).mask, [[1,1,1],[1,1,1],[1,1,0]] ) + assert_equal(mask_rowcols(x,0).mask, [[1,1,1],[1,1,1],[0,0,0]] ) + assert_equal(mask_rowcols(x,1,).mask, [[1,1,0],[1,1,0],[1,1,0]] ) + x = array(x._data, mask=[[1,0,0],[0,1,0],[0,0,1]]) + assert(mask_rowcols(x).all()) + assert(mask_rowcols(x,0).all()) + assert(mask_rowcols(x,1).all()) ############################################################################### #------------------------------------------------------------------------------ if __name__ == "__main__": - NumpyTest().run() \ No newline at end of file + NumpyTest().run() + + \ No newline at end of file From scipy-svn at scipy.org Tue Dec 19 19:17:09 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 19 Dec 2006 18:17:09 -0600 (CST) Subject: [Scipy-svn] r2436 - trunk/Lib/sandbox/models Message-ID: <20061220001709.97B4139C035@new.scipy.org> Author: jarrod.millman Date: 2006-12-19 18:16:24 -0600 (Tue, 19 Dec 2006) New Revision: 2436 Modified: trunk/Lib/sandbox/models/info.py Log: test Modified: trunk/Lib/sandbox/models/info.py =================================================================== --- trunk/Lib/sandbox/models/info.py 2006-12-19 04:14:26 UTC (rev 2435) +++ trunk/Lib/sandbox/models/info.py 2006-12-20 00:16:24 UTC (rev 2436) @@ -2,7 +2,7 @@ Statistical models ================== - +testing """ depends = ['weave', From scipy-svn at scipy.org Tue Dec 19 19:17:42 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 19 Dec 2006 18:17:42 -0600 (CST) Subject: [Scipy-svn] r2437 - trunk/Lib/sandbox/models Message-ID: <20061220001742.9135F39C035@new.scipy.org> Author: jarrod.millman Date: 2006-12-19 18:17:41 -0600 (Tue, 19 Dec 2006) New Revision: 2437 Modified: trunk/Lib/sandbox/models/info.py Log: cleaning up Modified: trunk/Lib/sandbox/models/info.py =================================================================== --- trunk/Lib/sandbox/models/info.py 2006-12-20 00:16:24 UTC (rev 2436) +++ trunk/Lib/sandbox/models/info.py 2006-12-20 00:17:41 UTC (rev 2437) @@ -2,7 +2,6 @@ Statistical models ================== -testing """ depends = ['weave', From scipy-svn at scipy.org Tue Dec 19 19:29:09 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 19 Dec 2006 18:29:09 -0600 (CST) Subject: [Scipy-svn] r2438 - in trunk/Lib/io: . tests Message-ID: <20061220002909.1C69B39C035@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-12-19 18:28:55 -0600 (Tue, 19 Dec 2006) New Revision: 2438 Modified: trunk/Lib/io/recaster.py trunk/Lib/io/tests/test_recaster.py Log: More refactoring of recaster, more complete unit tests Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-12-20 00:17:41 UTC (rev 2437) +++ trunk/Lib/io/recaster.py 2006-12-20 00:28:55 UTC (rev 2438) @@ -43,6 +43,9 @@ } return d_dict +class RecastError(ValueError): + pass + class Recaster(object): ''' Class to recast arrays to one of acceptable scalar types @@ -67,6 +70,7 @@ 'downcast_within_fp': False, 'guarantee_fp_to_fp_precision': False, 'prefer_input_at_threshold': 0, + 'prefer_int_type': 'i', }, 'smallest': { 'fp_to_int': 'always', @@ -76,7 +80,8 @@ 'downcast_only': False, 'downcast_within_fp': True, 'guarantee_fp_to_fp_precision': False, - 'prefer_input_at_threshold': 0 + 'prefer_input_at_threshold': 0, + 'prefer_int_type': 'i', }, 'fairly_small': { 'fp_to_int': 'always', @@ -87,6 +92,7 @@ 'downcast_within_fp': False, 'guarantee_fp_to_fp_precision': False, 'prefer_input_at_threshold': 2 * _k, + 'prefer_int_type': 'i', }, 'preserve_precision': { 'fp_to_int': 'never', @@ -96,7 +102,8 @@ 'downcast_only': False, 'downcast_within_fp': False, 'guarantee_fp_to_fp_precision': True, - 'prefer_input_at_threshold': 0 + 'prefer_input_at_threshold': 0, + 'prefer_int_type': 'i', } } @@ -143,6 +150,9 @@ is less than or equal to this number, and in valid types list, return the array without attempting recasting + prefer_int_type - if 'i', when recasting to integer type, prefer int + when equal sized uint is also available. Prefer + uint otherwise. ''' if sctype_list is None: sctype_list = self._sctype_attributes.keys() @@ -275,15 +285,13 @@ break return out_t - def cast_to_fp(self, arr, rtol, atol, kind, + def cast_to_fp(self, arr, kind, max_size=inf, continue_down=False): ''' Return fp arr maybe recast to specified kind, different sctype Inputs arr - array to possibly recast - rtol - relative tolerace for allclose - atol - absolute tolerance for allclose kind - kind of array to recast within (one of "c", "f", "u", "i") max_size - maximum size of sctype to return (in bytes) @@ -295,6 +303,8 @@ If arr cannot be recast within given tolerances, and size, return None ''' + tols = self.sctype_tols[arr.dtype.type] + rtol, atol = tols['rtol'], tols['atol'] ret_arr = None for T, sz in self.sized_sctypes[kind]: if sz > max_size: @@ -308,33 +318,43 @@ break return ret_arr - def smallest_int_sctype(self, mx, mn): + def smallest_int_sctype(self, mx, mn, prefer='i'): ''' Return integer type with smallest storage containing mx and mn Inputs mx - maximum value mn - minumum value - + prefer - if == 'i' prefer int for range also compatible + uint, else prefer uint in same situation + Returns None if no integer can contain this range ''' sct = None + sz = inf for T, tsz in self.ints_sized_sctypes: t_dict = self._sctype_attributes[T] if t_dict['max'] >= mx and t_dict['min'] <= mn: - if sct is None or tsz < sz: + if tsz < sz: sct = T sz = tsz + elif tsz == sz: + if t_dict['kind'] == prefer: + sct = T return sct - def cast_to_integer(self, arr): + def cast_to_integer(self, arr, prefer='i'): ''' Casts arr to smallest integer containing range Returns None if range of arr cannot be contained in acceptable integer types + + prefer - if == 'i' prefer int for range also compatible + uint, else prefer uint in same situation + ''' mx = amax(arr) mn = amin(arr) - idt = self.smallest_int_sctype(mx, mn) + idt = self.smallest_int_sctype(mx, mn, prefer) if idt is not None: return arr.astype(idt) return None @@ -365,7 +385,8 @@ if opts['fp_to_int'] == 'always' or \ (opts['fp_to_int'] == 'if_none' and ret_arr is None): - test_arr = self.cast_to_integer(arr) + test_arr = self.cast_to_integer(arr, + opts['prefer_int_type']) if test_arr is not None and \ test_arr.dtype.itemsize < curr_size: if allclose(arr, test_arr, rtol, atol): @@ -379,8 +400,6 @@ max_size = min([self._c2f_capable_sctype_sizes[dtt], curr_size - 1]) test_arr = self.cast_to_fp(arr, - rtol, - atol, 'f', max_size, opts['downcast_within_fp']) @@ -401,8 +420,6 @@ max_size = min([self._capable_sctype_sizes[dtt], curr_size - 1]) test_arr = self.cast_to_fp(arr, - rtol, - atol, dtk, max_size, opts['downcast_within_fp']) @@ -413,7 +430,8 @@ if opts['int_to_int'] == 'always' or \ (opts['int_to_int'] == 'if_none' and ret_arr is None): - test_arr = self.cast_to_integer(arr) + test_arr = self.cast_to_integer(arr, + opts['prefer_int_type']) if test_arr is not None and \ test_arr.dtype.itemsize < curr_size: ret_arr = test_arr @@ -422,8 +440,6 @@ (opts['int_to_fp'] == 'if_none' and ret_arr is None): test_arr = self.cast_to_fp(arr, - rtol, - atol, 'f', curr_size-1, opts['downcast_within_fp']) @@ -434,7 +450,7 @@ if ret_arr is not None: return ret_arr - raise TypeError, 'Cannot recast array within tolerance' + raise RecastError, 'Cannot recast array within tolerance' def recast_best_sctype(self, arr): ''' Recast array, return closest sctype to original Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-12-20 00:17:41 UTC (rev 2437) +++ trunk/Lib/io/tests/test_recaster.py 2006-12-20 00:28:55 UTC (rev 2438) @@ -2,7 +2,7 @@ import numpy as N set_package_path() -from io.recaster import sctype_attributes, Recaster +from io.recaster import sctype_attributes, Recaster, RecastError restore_path() try: # Python 2.3 support @@ -11,10 +11,6 @@ pass class test_recaster(ScipyTestCase): - def setUp(self): - self.valid_types = [N.int32, N.complex128, N.float64] - self.recaster = Recaster(self.valid_types, - recast_options='smallest') def test_init(self): # Setting sctype_list @@ -25,56 +21,69 @@ R = Recaster([T]) assert R.sctype_list == [T], 'Scalar type list not correctly set' # Setting tolerances - tols = self.recaster.default_sctype_tols() - assert tols == self.recaster.sctype_tols, 'Unexpected tols dictionary' + R = Recaster() + tols = R.default_sctype_tols() + assert tols == R.sctype_tols, 'Unexpected tols dictionary' F = N.finfo(T) - R = Recaster(sctype_tols={T: {'rtol': F.eps*2, 'atol': F.tiny*2, 'silly': 'silly text'}}) - assert tols != R.sctype_tols, 'Tols dictionary not set correctly' - assert R.sctype_tols[T]['rtol'] == F.eps*2, 'Rtol not correctly set' - assert R.sctype_tols[T]['atol'] == F.tiny*2, 'Atol not correctly set' + R = Recaster(sctype_tols={T: { + 'rtol': F.eps*2, + 'atol': F.tiny*2, + 'silly': 'silly text'}}) + assert R.sctype_tols[T]['rtol'] == F.eps*2, \ + 'Rtol not correctly set' + assert R.sctype_tols[T]['atol'] == F.tiny*2, \ + 'Atol not correctly set' + T = N.complex128 + F = N.finfo(T) + assert R.sctype_tols[T]['rtol'] == F.eps, \ + 'Rtol defaults not correctly set' + assert R.sctype_tols[T]['atol'] == F.tiny, \ + 'Atol defaults not correctly set' # Options # Sctype size lists # Integer sizes # Cabable types def test_cast_to_fp(self): - R = self.recaster - value = 1 + R = Recaster() # Define expected type output from fp recast of value + sta = sctype_attributes() inp_outp = ( - (N.complex128, N.complex128), - (N.complex64, N.complex128), + (1, N.complex128, 'c', sta[N.complex128]['size'], 0, N.complex128), + (1, N.complex128, 'c', sta[N.complex128]['size'], 1, N.complex64), + (1, N.complex128, 'c', sta[N.complex64]['size'], 0, N.complex64), + (1, N.complex128, 'f', sta[N.float64]['size'], 0, N.float64), + (1.0+1j, N.complex128, 'f', sta[N.complex128]['size'], 0, None), + (1, N.float64, 'f', sta[N.float64]['size'], 0, N.float64), + (1, N.float64, 'f', sta[N.float64]['size'], 1, N.float32), + (1, N.float64, 'f', sta[N.float32]['size'], 0, N.float32), + (1, N.float64, 'c', sta[N.complex128]['size'], 0, N.complex128), + (1, N.float64, 'c', sta[N.complex128]['size'], 1, N.complex64), + (1, N.int32, 'f', sta[N.float64]['size'], 0, N.float64), + (1, N.int32, 'f', sta[N.float64]['size'], 1, N.float32), + (1, N.float64, 'f', 0, 0, None), ) - for inp, outp in inp_outp: + for value, inp, kind, max_size, continue_down, outp in inp_outp: arr = N.array(value, dtype=inp) - rtol = R.sctype_tols[inp]['rtol'] - atol = R.sctype_tols[inp]['atol'] - kind = N.dtype(inp).kind - arr = R.cast_to_fp(arr, rtol, atol, kind) + arr = R.cast_to_fp(arr, kind, max_size, continue_down) if outp is None: - assert arr is None, 'Expected None from type %s' % inp - assert arr.dtype.type is outp, 'Expected output type %s from input %s' % (inp, outp) + assert arr is None, \ + 'Expected None from type %s, got %s' \ + % (inp, arr.dtype.type) + continue + assert arr is not None, \ + 'Expected %s from %s, got None' % (outp, inp) + dtt = arr.dtype.type + assert dtt is outp, \ + 'Expected %s from %s, got %s' % (outp, inp, dtt) def test_smallest_int_sctype(self): - # Smallest int sctype with testing recaster + # Smallest int sctype with full recaster params = sctype_attributes() - mmax = params[N.int32]['max'] - mmin = params[N.int32]['min'] - for kind in ('int', 'uint'): - for T in N.sctypes[kind]: - mx = params[T]['max'] - mn = params[T]['min'] - rt = self.recaster.smallest_int_sctype(mx, mn) - if mx <= mmax and mn >= mmin: - assert rt == N.int32, 'Expected int32 type' - else: - assert rt is None, 'Expected None, got %s for %s' % (T, rt) - - # Smallest int sctype with full recaster RF = Recaster() test_triples = [(N.uint8, 0, 255), (N.int8, -128, 0), - (N.uint16, 0, params[N.uint16]['max']), + (N.uint16, 0, params[N.uint16]['max']), (N.int16, params[N.int16]['min'], 0), (N.uint32, 0, params[N.uint32]['max']), (N.int32, params[N.int32]['min'], 0), @@ -84,14 +93,84 @@ rt = RF.smallest_int_sctype(mx, mn) assert N.dtype(rt) == N.dtype(T), \ 'Expected %s, got %s type' % (T, rt) + # Smallest int sctype with restricted recaster + mmax = params[N.int32]['max'] + mmin = params[N.int32]['min'] + RR = Recaster([N.int32]) + for kind in ('int', 'uint'): + for T in N.sctypes[kind]: + mx = params[T]['max'] + mn = params[T]['min'] + rt = RR.smallest_int_sctype(mx, mn) + if mx <= mmax and mn >= mmin: + assert rt == N.int32, \ + 'Expected int32 type, got %s' % rt + else: + assert rt is None, \ + 'Expected None, got %s for %s' % (T, rt) + # Test preferred int flag + mx = 1000 + mn = 0 + rt = RF.smallest_int_sctype(mx, mn) + assert rt == N.int16, 'Expected int16, got %s' % rt + rt = RF.smallest_int_sctype(mx, mn, 'i') + assert rt == N.int16, 'Expected int16, got %s' % rt + rt = RF.smallest_int_sctype(mx, mn, prefer='u') + assert rt == N.uint16, 'Expected uint16, got %s' % rt def test_recasts(self): - value = 100 - R = self.recaster - for T in (N.complex128, N.complex64, - N.float64, N.uint64): - B = R.recast(N.array(value, T)) - assert B is not None, 'Got None for %s' % T - Bt = B.dtype.type - assert Bt == N.int32, 'Input %s, output %s' % (T, Bt) - + valid_types = [N.int32, N.complex128, N.float64] + # Test smallest + R = Recaster(valid_types, recast_options='smallest') + inp_outp = ( + (1, N.complex128, N.int32), + (1, N.complex64, N.int32), + (1.0+1j, N.complex128, N.complex128), + (1.0+1j, N.complex64, N.complex128), + (1, N.float64, N.int32), + (1, N.float32, N.int32), + (1.1, N.float64, N.float64), + (-1e12, N.int64, N.float64), + ) + self.run_io_recasts(R, inp_outp) + # Test only_if_none + R = Recaster(valid_types, recast_options='only_if_none') + inp_outp = ( + (1, N.complex128, N.complex128), + (1, N.complex64, N.int32), + (1.0+1j, N.complex128, N.complex128), + (1.0+1j, N.complex64, N.complex128), + (1, N.float64, N.float64), + (1, N.float32, N.int32), + (1.1, N.float64, N.float64), + (-1e12, N.int64, N.float64), + ) + self.run_io_recasts(R, inp_outp) + # Test preserve_precision + R = Recaster(valid_types, recast_options='preserve_precision') + inp_outp = ( + (1, N.complex128, N.complex128), + (1, N.complex64, N.complex128), + (1.0+1j, N.complex128, N.complex128), + (1.0+1j, N.complex64, N.complex128), + (1, N.float64, N.float64), + (1, N.float32, N.float64), + (1.1, N.float64, N.float64), + (-1e12, N.int64, None), + ) + self.run_io_recasts(R, inp_outp) + + def run_io_recasts(self, R, inp_outp): + ''' Runs sets of value, input, output tests ''' + for value, inp, outp in inp_outp: + arr = N.array(value, inp) + if outp is None: + self.assertRaises(RecastError, R.recast, arr) + continue + arr = R.recast(N.array(value, inp)) + assert arr is not None, \ + 'Expected %s from %s, got None' % (outp, inp) + dtt = arr.dtype.type + assert dtt is outp, \ + 'Expected %s from %s, got %s' % (outp, inp, dtt) + From scipy-svn at scipy.org Tue Dec 19 19:32:20 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 19 Dec 2006 18:32:20 -0600 (CST) Subject: [Scipy-svn] r2439 - trunk/Lib/sandbox/models Message-ID: <20061220003220.2985C39C035@new.scipy.org> Author: jarrod.millman Date: 2006-12-19 18:32:18 -0600 (Tue, 19 Dec 2006) New Revision: 2439 Modified: trunk/Lib/sandbox/models/__init__.py Log: import documentation string from info.py per scipy requirements Modified: trunk/Lib/sandbox/models/__init__.py =================================================================== --- trunk/Lib/sandbox/models/__init__.py 2006-12-20 00:28:55 UTC (rev 2438) +++ trunk/Lib/sandbox/models/__init__.py 2006-12-20 00:32:18 UTC (rev 2439) @@ -1,3 +1,9 @@ +# +# models - Statistical Models +# + +from info import __doc__ + import model import formula import regression From scipy-svn at scipy.org Wed Dec 20 12:09:20 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Dec 2006 11:09:20 -0600 (CST) Subject: [Scipy-svn] r2440 - trunk/Lib/sandbox/timeseries Message-ID: <20061220170920.8356339C011@new.scipy.org> Author: mattknox_ca Date: 2006-12-20 11:09:08 -0600 (Wed, 20 Dec 2006) New Revision: 2440 Modified: trunk/Lib/sandbox/timeseries/corelib.py Log: bug fixes and code cleanup Modified: trunk/Lib/sandbox/timeseries/corelib.py =================================================================== --- trunk/Lib/sandbox/timeseries/corelib.py 2006-12-20 00:32:18 UTC (rev 2439) +++ trunk/Lib/sandbox/timeseries/corelib.py 2006-12-20 17:09:08 UTC (rev 2440) @@ -83,17 +83,21 @@ def __unmasked(m, get_val, relpos): - idx = numpy.where(m.mask == False) - if len(idx) != 0 and len(idx[0]) != 0: - idx = idx[0][relpos] + + if m.mask is ma.nomask: + return 0 else: - idx = None - - if get_val: - if idx is None: return ma.masked - else: return m[idx] - else: - return idx + idx = numpy.where(m.mask == False) + if len(idx) != 0 and len(idx[0]) != 0: + idx = idx[0][relpos] + else: + idx = None + + if get_val: + if idx is None: return ma.masked + else: return m[idx] + else: + return idx ############################################################# #converts possible strings for frequency into acceptable values From scipy-svn at scipy.org Wed Dec 20 12:10:22 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Dec 2006 11:10:22 -0600 (CST) Subject: [Scipy-svn] r2441 - trunk/Lib/sandbox/timeseries Message-ID: <20061220171022.E648D39C011@new.scipy.org> Author: mattknox_ca Date: 2006-12-20 11:10:19 -0600 (Wed, 20 Dec 2006) New Revision: 2441 Modified: trunk/Lib/sandbox/timeseries/tsdate.py Log: code cleanup Modified: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-20 17:09:08 UTC (rev 2440) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-20 17:10:19 UTC (rev 2441) @@ -14,7 +14,6 @@ if self.freq == 'D': self.mxDate = mx.DateTime.DateTimeFromAbsDays(value-1) elif self.freq == 'B': - #originDate + val + (val//5)*7 - (val//5)*5 value -= 1 self.mxDate = mx.DateTime.DateTimeFromAbsDays(value + (value//5)*7 - (value//5)*5) elif self.freq == 'S': @@ -100,15 +99,15 @@ def __radd__(self, other): return self+other def __sub__(self, other): - try: return self + (-1) * other - except: pass - try: + if isinstance(other, Date): if self.freq != other.freq: raise ValueError("Cannont subtract dates of different frequency (" + str(self.freq) + " != " + str(other.freq) + ")") - return int(self) - int(other) - except TypeError: - raise TypeError("Could not subtract types " + str(type(self)) + " and " + str(type(other))) + else: + return int(self) - int(other) + else: + return self + (-1) * int(other) + def __repr__(self): return "<" + str(self.freq) + ":" + str(self) + ">" def __eq__(self, other): From scipy-svn at scipy.org Wed Dec 20 12:11:29 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Dec 2006 11:11:29 -0600 (CST) Subject: [Scipy-svn] r2442 - trunk/Lib/sandbox/timeseries Message-ID: <20061220171129.5A0D739C011@new.scipy.org> Author: mattknox_ca Date: 2006-12-20 11:11:26 -0600 (Wed, 20 Dec 2006) New Revision: 2442 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: re-write of timeseries class to be a subclass of Masked Array instead of shifting array Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-20 17:10:19 UTC (rev 2441) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-20 17:11:26 UTC (rev 2442) @@ -2,37 +2,136 @@ from numpy import ma import corelib -import shiftingarray as sa -from shiftingarray import doFunc, doFunc_oneseries import cseries import tsdate -import copy +import copy as copytools -class TimeSeries(sa.ShiftingArray): - def __init__(self, values=[], dtype=None, freq=None, observed='END', startIndex=None, mask=ma.nomask): + +def ts_compatible(a, b): + if a.freq != b.freq: + raise ValueError("Both TimeSeries must have same freq!") + elif a.start_date() != b.start_date(): + raise ValueError("Both TimeSeries must have same start_date!") + elif a.shape != b.shape: + raise ValueError("Both TimeSeries must be of the same size!") + + +class ts_unary_operation: + def __init__ (self, abfunc): + self.f = abfunc + self.__doc__ = getattr(abfunc, "__doc__", str(abfunc)) + + def __call__ (self, a, *args, **kwargs): + "Execute the call behavior." + if isinstance(a, TimeSeries): + return TimeSeries(self.f(a, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date()) + else: + return self.f(a, *args, **kwargs) + + +class ts_binary_operation: + def __init__ (self, abfunc): + self.f = abfunc + self.__doc__ = getattr(abfunc, "__doc__", str(abfunc)) + + def __call__ (self, a, b, *args, **kwargs): + "Execute the call behavior." + + if isinstance(a, TimeSeries) and isinstance(b, TimeSeries): + ts_compatible(a, b) + return TimeSeries(self.f(a, b, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date()) + elif isinstance(a, TimeSeries): + return TimeSeries(self.f(a, b, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date()) + elif isinstance(b, TimeSeries): + return TimeSeries(self.f(a, b, *args, **kwargs), freq=b.freq, observed=b.observed, start_date=b.start_date()) + else: + return self.f(a, b, *args, **kwargs) + + + +class TimeSeries(ma.MaskedArray): + + __array_priority__ = 10.2 + + def __init__(self, data, dtype=None, freq=None, start_date=None, observed=None, copy=True, order=False, mask=ma.nomask, fill_value=None): - if freq is None: raise ValueError("freq not specified") + if isinstance(data, TimeSeries): + if freq is None: freq = data.freq + if start_date is None: start_date = data.start_date() + if observed is None: observed = data.observed + else: + if observed is None: observed = 'END' - if dtype is None: dtype = values.dtype + self.freq = corelib.fmtFreq(freq) - super(TimeSeries, self).__init__(values, dtype, startIndex, mask) - self.freq = corelib.fmtFreq(freq) + if isinstance(start_date, tsdate.Date): + if start_date.freq != self.freq: raise ValueError("frequency of start_date must match frequency of series") + else: self.__start_date = start_date + else: + self.__start_date = tsdate.Date(freq=self.freq, value=start_date) + self.observed = corelib.fmtObserv(observed) - self.dtype = dtype + + self.tstype = None + + if corelib.isDateType(dtype) or (isinstance(data, TimeSeries) and corelib.isDateType(data.tstype)): + self.tstype = dtype + dtype = numpy.int_ + + super(TimeSeries, self).__init__(data=data, dtype=dtype, copy=copy, order=order, mask=mask, fill_value=fill_value) + if self.tstype is None: self.tstype = self.dtype + + def __getitem__(self, key): - if isinstance(key,tsdate.Date): - if self.freq != key.freq: - raise "series of frequency "+str(self.freq)+" given date expression of type "+str(key.freq) - else: - key = int(key) - return super(TimeSeries, self).__getitem__(key) + return super(TimeSeries, self).__getitem__(self.__prepKey(key)) def __setitem__(self, key, value): + super(TimeSeries, self).__setitem__(self.__prepKey(key), value) + + def __prepKey(self, key): + if isinstance(key, tsdate.Date): - key = int(key) - super(TimeSeries, self).__setitem__(key, value) + key = int(key - self.start_date()) + if key < 0: raise ValueError("Date out of bounds") + else: return key + elif isinstance(key, TimeSeries): + if corelib.isDateType(key.tstype): + if key.tstype.freq != self.freq: + raise ValueError("series of frequency "+str(self.freq)+" given date expression of type "+str(key.tstype.freq)) + + if key.mask is ma.nomask: key = numpy.asarray(key) - int(self.start_date()) + else: key = numpy.asarray(key[key.mask == False]) - int(self.start_date()) + + if len(numpy.where(key < 0)[0]) > 0: raise ValueError("Indices out of bounds") + + return key + + else: + + # frequency, size, and start_date of key must all match self + # when the data type is note a date + ts_compatible(key, self) + + if key.tstype is numpy.bool_: + key = key.filled(False) + elif numpy.ravel(key.mask).any(): + raise ValueError("masked values cannot be used as indices!") + + return numpy.asarray(key) + + elif isinstance(key, ma.MaskedArray): + + if key.dtype is numpy.bool_: + key = key.filled(False) + elif numpy.ravel(key.mask).any(): + raise ValueError("masked values cannot be used as indices!") + + return numpy.asarray(key) + + else: return key + def convert(self, freq, func='auto', position='END', interp=None): """ @@ -67,19 +166,18 @@ if func == 'auto': func = corelib.obsDict[self.observed] - firstIndex = corelib.first_unmasked(self.data) - if firstIndex is None: - return TimeSeries([], dtype=self.dtype, freq=toFreq, observed=self.observed) + if self.size == 0: + return TimeSeries(self, freq=toFreq, start_date=tsdate.dateOf(self.start_date(), toFreq)) - startIndexAdj = self.firstValue() - lastIndex = corelib.last_unmasked(self.data) + tempData = self.filled() - tempData = copy.deepcopy(self.data[firstIndex:lastIndex+1]) - tempMask = tempData.mask - tempData = tempData.filled() + if self.mask is ma.nomask: + tempMask = numpy.empty(tempData.shape, dtype=numpy.bool_) + tempMask[:] = False + else: tempMask = self.mask - cRetVal = cseries.reindex(tempData, fromFreq, toFreq, position, startIndexAdj, tempMask) + cRetVal = cseries.reindex(tempData, fromFreq, toFreq, position, int(self.start_date()), tempMask) _values = cRetVal['values'] _mask = cRetVal['mask'] @@ -91,142 +189,193 @@ if func is not None and tempData.ndim == 2: tempData = corelib.apply_along_axis(func, 1, tempData) - startIndex = cseries.convert(startIndexAdj, fromFreq, toFreq) + startIndex = cseries.convert(int(self.start_date()), fromFreq, toFreq) + + newStart = tsdate.dateOf(self.start_date(),toFreq, "BEFORE") + newEnd = tsdate.dateOf(self.end_date(),toFreq, "AFTER") - return TimeSeries(tempData, dtype=self.data.dtype, freq=toFreq, observed=self.observed, startIndex=startIndex) + return adjust_endpoints(TimeSeries(tempData, freq=toFreq, observed=self.observed, start_date=startIndex), start_date=newStart, end_date=newEnd) else: - return copy.deepcopy(self) + return copytools.deepcopy(self) + def adjust_endpoints(self, start_date=None, end_date=None): + self.__init__(adjust_endpoints(self, start_date=start_date, end_date=end_date)) + def __str__(self): retVal = "" - if self.firstValue() is not None: - for i in range(self.firstValue(),self.lastValue()+1): - index = str(tsdate.Date(freq=self.freq,value=i)) + + if self.shape[0] > 0: + for i in range(self.shape[0]): + index = str(self.start_date() + i) index = index + (" " * (6-len(index))) - retVal += index + "---> " + str(super(TimeSeries, self).__getitem__(i)) + "\n" + retVal += index + " --> " + str(self[i])+"\n" return retVal else: return "" - def firstValue(self, asDate=False): - value = super(TimeSeries, self).firstValue() + def first_value(self, asDate=False): + firstIndex = corelib.first_unmasked(self) if asDate: - return tsdate.Date(freq=self.freq, value=value) + return self.start_date() + firstIndex else: - return value + return firstIndex - def lastValue(self, asDate=False): - value = super(TimeSeries, self).lastValue() + def last_value(self, asDate=False): + lastIndex = corelib.last_unmasked(self) if asDate: - return tsdate.Date(freq=self.freq, value=value) + return self.start_date() + lastIndex else: - return value + return lastIndex + + def start_date(self): return self.__start_date + def end_date(self): return self.__start_date + (self.shape[0] - 1) + + def date_to_index(self, date): + if date.freq != self.freq: raise ValueError("date.freq != self.freq") + return date - self.start_date() + + + # built-in methods - ### DATA - - def __add__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed) - - def __radd__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed) - - def __sub__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__sub__(other), self.freq, self.observed) - - def __rsub__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__rsub__(other), self.freq, self.observed) - - def __mul__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__mul__(other), self.freq, self.observed) - - def __rmul__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__rmul__(other), self.freq, self.observed) - - def __div__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__div__(other), self.freq, self.observed) - - def __rdiv__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__rdiv__(other), self.freq, self.observed) - - def __pow__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__pow__(other), self.freq, self.observed) - - ### IN PLACE - + def __and__(self, other): return bitwise_and(self, other) + def __or__(self, other): return bitwise_or(self, other) + def __xor__(self, other): return bitwise_xor(self, other) + __rand__ = __and__ + __ror__ = __or__ + __rxor__ = __xor__ + def __abs__(self): return absolute(self) + def __neg__(self): return negative(self) + def __pos__(self): return TimeSeries(self) + def __add__(self, other): return add(self, other) + __radd__ = __add__ + def __mod__ (self, other): return remainder(self, other) + def __rmod__ (self, other): return remainder(other, self) + def __lshift__ (self, n): return left_shift(self, n) + def __rshift__ (self, n): return right_shift(self, n) + def __sub__(self, other): return subtract(self, other) + def __rsub__(self, other): return subtract(other, self) + def __mul__(self, other): return multiply(self, other) + __rmul__ = __mul__ + def __div__(self, other): return divide(self, other) + def __rdiv__(self, other): return divide(other, self) + def __truediv__(self, other): return true_divide(self, other) + def __rtruediv__(self, other): return true_divide(other, self) + def __floordiv__(self, other): return floor_divide(self, other) + def __rfloordiv__(self, other): return floor_divide(other, self) + def __pow__(self, other, third=None): return power(self, other, third) + def __sqrt__(self): return sqrt(self) + def __iadd__(self, other): - validOpInputs(self, other) - self = SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed) - return self - - def __isub__(self, other): - validOpInputs(self, other) - self = SAtoTS(super(TimeSeries, self).__sub__(other), self.freq, self.observed) - return self - + return self + other + def __imul__(self, other): - validOpInputs(self, other) - self = SAtoTS(super(TimeSeries, self).__mul__(other), self.freq, self.observed) - return self - - def __idiv__(self, other): - validOpInputs(self, other) - self = SAtoTS(super(TimeSeries, self).__div__(other), self.freq, self.observed) - return self - - # this overrides & and should only be used by boolean series - def __and__(self, other): - validOpInputs(self, other) return self * other - # this overrides | and should only be used by boolean series - def __or__(self, other): - validOpInputs(self, other) - return ~(~self & ~other) - - # this overrides ~ and should only be used by boolean series - # it is our "not" operator - def __invert__(self): - return self == False - - ### COMPARISON - - def __eq__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__eq__(other), self.freq, self.observed) - - def __le__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__le__(other), self.freq, self.observed) - - def __lt__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__lt__(other), self.freq, self.observed) - - def __ge__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__ge__(other), self.freq, self.observed) - - def __gt__(self, other): - validOpInputs(self, other) - return SAtoTS(super(TimeSeries, self).__gt__(other), self.freq, self.observed) + def __isub__(self, other): + return self - other + def __idiv__(self, other): + return self / other + + def __eq__(self, other): return equal(self,other) + def __ne__(self, other): return not_equal(self,other) + def __lt__(self, other): return less(self,other) + def __le__(self, other): return less_equal(self,other) + def __gt__(self, other): return greater(self,other) + def __ge__(self, other): return greater_equal(self,other) + + def astype (self, tc): + "return self as array of given type." + d = self._data.astype(tc) + return datawrap(ma.array(d, mask=self._mask), self) + + def filled (self, fill_value=None, ts=False): + d = super(TimeSeries, self).filled(fill_value) + if ts: return datawrap(d, self) + else: return d + + +def datawrap(data, ts): return TimeSeries(data, freq=ts.freq, observed=ts.observed, start_date=ts.start_date()) + +## wrappers for numpy.ma funcs + +sqrt = ts_unary_operation(ma.sqrt) +log = ts_unary_operation(ma.log) +log10 = ts_unary_operation(ma.log10) +exp = ts_unary_operation(ma.exp) +sin = ts_unary_operation(ma.sin) +cos = ts_unary_operation(ma.cos) +tan = ts_unary_operation(ma.tan) +arcsin = ts_unary_operation(ma.arcsin) +arccos = ts_unary_operation(ma.arccos) +arctan = ts_unary_operation(ma.arctan) +power = ts_binary_operation(ma.power) + +arcsinh = ts_unary_operation(ma.arcsinh) +arccosh = ts_unary_operation(ma.arccosh) +arctanh = ts_unary_operation(ma.arctanh) +sinh = ts_unary_operation(ma.sinh) +cosh = ts_unary_operation(ma.cosh) +tanh = ts_unary_operation(ma.tanh) +absolute = ts_unary_operation(ma.absolute) +fabs = ts_unary_operation(ma.fabs) +negative = ts_unary_operation(ma.negative) + +def nonzero(a): return datawrap(ma.nonzero(a), a) +def zeros(shape, dtype=float, freq=None, start_date=None, observed=None): + return TimeSeries(ma.zeros(shape, dtype), freq=freq, start_date=start_date, observed=observed) +def ones(shape, dtype=float, freq=None, start_date=None, observed=None): + return TimeSeries(ma.ones(shape, dtype), freq=freq, start_date=start_date, observed=observed) + +count = ma.count +sum = ma.sum +product = ma.product +average = ma.average + + + +around = ts_unary_operation(ma.around) +floor = ts_unary_operation(ma.floor) +ceil = ts_unary_operation(ma.ceil) +logical_not = ts_unary_operation(ma.logical_not) + +add = ts_binary_operation(ma.add) +subtract = ts_binary_operation(ma.subtract) + +multiply = ts_binary_operation(ma.multiply) +divide = ts_binary_operation(ma.divide) +true_divide = ts_binary_operation(ma.true_divide) +floor_divide = ts_binary_operation(ma.floor_divide) +remainder = ts_binary_operation(ma.remainder) +fmod = ts_binary_operation(ma.fmod) +hypot = ts_binary_operation(ma.hypot) +arctan2 = ts_binary_operation(ma.arctan2) +equal = ts_binary_operation(ma.equal) +not_equal = ts_binary_operation(ma.not_equal) +less_equal = ts_binary_operation(ma.less_equal) +greater_equal = ts_binary_operation(ma.greater_equal) +less = ts_binary_operation(ma.less) +greater = ts_binary_operation(ma.greater) +logical_and = ts_binary_operation(ma.logical_and) +logical_or = ts_binary_operation(ma.logical_or) +logical_xor = ts_binary_operation(ma.logical_xor) +bitwise_and = ts_binary_operation(ma.bitwise_and) +bitwise_or = ts_binary_operation(ma.bitwise_or) +bitwise_xor = ts_binary_operation(ma.bitwise_xor) + +def left_shift (a, n): return datawrap(ma.left_shift(a, n), a) +def right_shift (a, n): return datawrap(ma.right_shift(a, n), a) + +# time series specific functions + def tser(start, end): if start.freq != end.freq: raise ValueError("start and end dates must have same frequency!") - return TimeSeries(numpy.arange(int(start), int(end)+1), dtype=corelib.freqTypeMapping[start.freq], freq=start.freq, observed='END', startIndex=int(start)) + return TimeSeries(numpy.arange(int(start), int(end)+1), dtype=corelib.freqTypeMapping[start.freq], freq=start.freq, start_date=start) def year(dateSer): return __getDateInfo(dateSer,'Y') @@ -244,61 +393,50 @@ return __getDateInfo(dateSer,'W') def __getDateInfo(dateSer,infoCode): - newData = ma.array(cseries.getDateInfo(dateSer.data.filled(), dateSer.dtype.freq, infoCode)) - newData[dateSer.data.mask] = ma.masked - newSer = copy.deepcopy(dateSer) - newSer.data = newData - newSer.dtype = numpy.int_ - return newSer + newData = ma.array(cseries.getDateInfo(dateSer.filled(), dateSer.tstype.freq, infoCode)) + if dateSer.mask is not ma.nomask: + newData[dateSer.mask] = ma.masked + return datawrap(newData, dateSer) - -def validOpInputs(ser1, ser2): - if isinstance(ser1, TimeSeries) and isinstance(ser2, TimeSeries) and ser1.freq != ser2.freq: - raise "operation cannot be performed on series with different frequencies ("+str(ser1.freq) + " and " + str(ser2.freq)+")" +def adjust_endpoints(a, start_date=None, end_date=None): + """adjust_endpoints(a, start_date=None, end_date=None) returns a new + TimeSeries going from start_date to end_date""" -def SAtoTS(values, freq, observed, dtype=None): - if dtype is None: _dtype = values.dtype - else: _dtype = dtype - return TimeSeries(values.data, dtype=_dtype, freq=freq, observed=observed, startIndex=values.indexZeroRepresents) + if start_date is None: start_date = a.start_date() + if end_date is None: end_date = a.end_date() - -# math functions (two series) -def add(ser1, ser2, fill_value=ma.masked): - return apply_func_twoseries(ma.add, ser1, ser2, fill_value) - -def multiply(ser1, ser2, fill_value=ma.masked): - return apply_func_twoseries(ma.multiply, ser1, ser2, fill_value) - -def divide(ser1, ser2, fill_value=ma.masked): - return apply_func_twoseries(ma.divide, ser1, ser2, fill_value) + tmpShape = list(a.shape) + tmpShape[0] = max(end_date - start_date + 1, 0) + tmpShape = tuple(tmpShape) -def subtract(ser1, ser2, fill_value=ma.masked): - return apply_func_twoseries(ma.subtract, ser1, ser2, fill_value) + tmpSer = TimeSeries(ma.resize(a, tmpShape), freq=a.freq, observed=a.observed, start_date=start_date) -# math functions (one series, return series) -def sqrt(ser): - return apply_func_oneseries(ma.sqrt, ser) + setStart, setEnd = max(start_date, a.start_date()), min(end_date, a.end_date()) + setLen = setEnd - setStart -# math functions (one series, return scalar) -def sum(ser): - return ma.sum(ser.data) + tmpSer[:] = ma.masked + + if setLen >= 0: + tmpSer[tmpSer.date_to_index(setStart):tmpSer.date_to_index(setEnd)+1] = a[a.date_to_index(setStart):a.date_to_index(setEnd)+1] + + return tmpSer -def product(ser): - return ma.product(ser.data) + +def aligned(*series, **kwargs): -def average(ser): - return ma.average(ser.data) + if len(series) < 2: + return series + + freq = series[0].freq -def where(condition, x, y): - tempResult = ma.where(condition.data, x, y) - return TimeSeries(tempResult, dtype=numpy.bool_, freq=condition.freq, observed=condition.observed, startIndex=condition.indexZeroRepresents) - -# generic functions -def apply_func_twoseries(func, ser1, ser2, fill_value=ma.masked): - validOpInputs(ser1, ser2) - return SAtoTS(doFunc(ser1, ser2, func, fill_value=fill_value), ser1.freq, ser1.observed) + if len(set([x.freq for x in series])) > 1: raise ValueError("All series must have same frequency!") -def apply_func_oneseries(func, ser): - return SAtoTS(doFunc_oneseries(ser, func),ser.freq, ser.observed) + if 'start_date' in kwargs: start_date = kwargs['start_date'] + else: start_date = min([x.start_date() for x in series]) + if 'end_date' in kwargs: end_date = kwargs['end_date'] + else: end_date = max([x.end_date() for x in series]) + + return [adjust_endpoints(x, start_date=start_date, end_date=end_date) for x in series] + \ No newline at end of file From scipy-svn at scipy.org Wed Dec 20 12:11:57 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Dec 2006 11:11:57 -0600 (CST) Subject: [Scipy-svn] r2443 - trunk/Lib/sandbox/timeseries/examples Message-ID: <20061220171157.7391A39C011@new.scipy.org> Author: mattknox_ca Date: 2006-12-20 11:11:54 -0600 (Wed, 20 Dec 2006) New Revision: 2443 Modified: trunk/Lib/sandbox/timeseries/examples/example.py Log: updated examples to work with latest version of code Modified: trunk/Lib/sandbox/timeseries/examples/example.py =================================================================== --- trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-20 17:11:26 UTC (rev 2442) +++ trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-20 17:11:54 UTC (rev 2443) @@ -4,7 +4,7 @@ # create a time series at business frequency and fill it with random data -bSer = ts.TimeSeries(np.random.uniform(-100,100,600),dtype=np.float64,freq='B',observed='SUMMED',startIndex=ts.thisday('B')-600) +bSer = ts.TimeSeries(np.random.uniform(-100,100,600),dtype=np.float64,freq='B',observed='SUMMED',start_date=ts.thisday('B')-600) # Set negative values to zero. @@ -12,7 +12,7 @@ # Set values occurring on Fridays to 100. -weekdays = ts.day_of_week(ts.tser(bSer.firstValue(asDate=True),bSer.lastValue(asDate=True))) +weekdays = ts.day_of_week(ts.tser(bSer.start_date(),bSer.end_date())) bSer[weekdays == 4] = 100 @@ -23,7 +23,7 @@ function that acts on a 1-dimension masked array and returns a single value. """ -mSer1 = bSer.convert('M',func=ma.average) +mSer1 = bSer.convert('M',func=ts.average) """ @@ -53,45 +53,45 @@ # create another monthly frequency series -mSer2 = ts.TimeSeries(np.random.uniform(-100,100,100),dtype=np.float64,freq='m',observed='END',startIndex=ts.thisday('M')-110) +mSer2 = ts.TimeSeries(np.random.uniform(-100,100,100),dtype=np.float64,freq='m',observed='END',start_date=ts.thisday('M')-110) """ Slicing also supported. The intention is to have indexing behave -largely in the same manner as regular numpy arrays. It sure would be -nice if we could slice with the dates directly, but as it stands we -shall have to cast the dates to integers +largely in the same manner as regular numpy arrays. + +series.adjust_date convert a date object into the corresponding +integer for indexing the series """ -mSer2[int(ts.thisday('m')-60):int(ts.thisday('m')-45)] = 12 +sixtyMonthsAgoIdx = mSer2.date_to_index(ts.thisday('m')-60) +mSer2[sixtyMonthsAgoIdx:sixtyMonthsAgoIdx+10] = 12 -# Mask a value. series.lastValue() returns the index of the last -# unmasked value in the series (as an integer, not a Date object) -mSer2[mSer2.lastValue()-40] = ts.masked #ts.masked is the same thing as numpy.ma.masked +# Mask the last value in the series +mSer2[-1] = ts.masked #ts.masked is the same thing as numpy.ma.masked """ -Only series of the same frequency can be used in the basic operations. +Only series of the same frequency and size and same start date +can be used in the basic operations. + The results are the same as you would expect for masked arrays with the basic operations. -Notice that the start and end indices of mSer1 and mSer2 do not need to -line up. This conversion is done implicitly. +start_date and end_date are optional parameters to the aligned function. +If omitted, the min start_date() and end_date() of all series is used as +the new boundaries for each series. """ +mSer1, mSer2 = ts.aligned(mSer1, mSer2, start_date=ts.thisday('m')-100, end_date=ts.thisday('m')) mAdd1 = mSer1 + mSer2 -""" -if you want more control over behaviour of masked values, use ts.add -(or multiply, etc) instead. +# add the two series together, first filling in masked values with zeros +mAdd1_filled = mSer1.filled(fill_value=0, ts=True) + mSer2.filled(fill_value=0, ts=True) -if a fill_value is specified, both TimeSeries objects are filled from -min(mSer1.firstValue(),mSer2.firstValue()) to max(mSer1.lastValue(),mSer2.lastValue()) -wherever the series are masked before performing the operation -""" -mAdd2 = ts.add(mSer1, mSer2, fill_value=0) +# adjust the start and end dates of a series +newSer = ts.adjust_endpoints(mSer1, start_date=ts.Date(freq='M', year=1954, month=5), end_date=ts.Date(freq='M', year=2000, month=6)) - # calculate the average value in the series. Behaves the same as in ma bAverage = ts.average(bSer) From scipy-svn at scipy.org Wed Dec 20 12:17:52 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Dec 2006 11:17:52 -0600 (CST) Subject: [Scipy-svn] r2444 - trunk/Lib/sandbox/timeseries Message-ID: <20061220171752.94D2239C011@new.scipy.org> Author: mattknox_ca Date: 2006-12-20 11:17:48 -0600 (Wed, 20 Dec 2006) New Revision: 2444 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: added two constants from numpy.core.ma to local namespace Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-20 17:11:54 UTC (rev 2443) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-20 17:17:48 UTC (rev 2444) @@ -6,6 +6,8 @@ import tsdate import copy as copytools +masked = ma.masked +nomask = ma.nomask def ts_compatible(a, b): if a.freq != b.freq: From scipy-svn at scipy.org Wed Dec 20 12:18:24 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Dec 2006 11:18:24 -0600 (CST) Subject: [Scipy-svn] r2445 - trunk/Lib/sandbox/timeseries Message-ID: <20061220171824.A0C5539C011@new.scipy.org> Author: mattknox_ca Date: 2006-12-20 11:18:21 -0600 (Wed, 20 Dec 2006) New Revision: 2445 Modified: trunk/Lib/sandbox/timeseries/__init__.py Log: Modified: trunk/Lib/sandbox/timeseries/__init__.py =================================================================== --- trunk/Lib/sandbox/timeseries/__init__.py 2006-12-20 17:17:48 UTC (rev 2444) +++ trunk/Lib/sandbox/timeseries/__init__.py 2006-12-20 17:18:21 UTC (rev 2445) @@ -1,5 +1,3 @@ from timeseries import * from tsdate import * from corelib import * -from numpy import ma -masked = ma.masked \ No newline at end of file From scipy-svn at scipy.org Wed Dec 20 16:53:52 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Dec 2006 15:53:52 -0600 (CST) Subject: [Scipy-svn] r2446 - trunk/Lib/sandbox/timeseries Message-ID: <20061220215352.549AC39C0D7@new.scipy.org> Author: mattknox_ca Date: 2006-12-20 15:53:48 -0600 (Wed, 20 Dec 2006) New Revision: 2446 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: added reduce, outer, and accumulate to the binary operations Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-20 17:18:21 UTC (rev 2445) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-20 21:53:48 UTC (rev 2446) @@ -49,8 +49,21 @@ else: return self.f(a, b, *args, **kwargs) - + def reduce (self, target, axis=0, dtype=None): + """Reduce target along the given axis with this function.""" + + return self.f.reduce(target, axis, dtype) + def outer (self, a, b): + return self.f.outer(a, b) + + def accumulate (self, target, axis=0): + return datawrap(self.f.accumulate(target, axis), target) + + def __str__ (self): + return "Masked version of " + str(self.f) + + class TimeSeries(ma.MaskedArray): __array_priority__ = 10.2 From scipy-svn at scipy.org Wed Dec 20 18:11:44 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Dec 2006 17:11:44 -0600 (CST) Subject: [Scipy-svn] r2447 - in trunk/Lib/sandbox/timeseries: . src Message-ID: <20061220231144.1E71339C116@new.scipy.org> Author: pierregm Date: 2006-12-20 17:11:41 -0600 (Wed, 20 Dec 2006) New Revision: 2447 Added: trunk/Lib/sandbox/timeseries/setup.py trunk/Lib/sandbox/timeseries/src/ trunk/Lib/sandbox/timeseries/src/cseries.c Log: Added a prototype of setup.py (and put the *.c code in its own folder) Added: trunk/Lib/sandbox/timeseries/setup.py =================================================================== --- trunk/Lib/sandbox/timeseries/setup.py 2006-12-20 21:53:48 UTC (rev 2446) +++ trunk/Lib/sandbox/timeseries/setup.py 2006-12-20 23:11:41 UTC (rev 2447) @@ -0,0 +1,35 @@ +#!/usr/bin/env python +__version__ = '1.0' +__revision__ = "$Revision: 37 $" +__date__ = '$Date: 2006-12-08 14:30:29 -0500 (Fri, 08 Dec 2006) $' + +import os +from os.path import join + +def check_mxDateTime(): + try: + import mx.DateTime + except ImportError: + raise ImportError,"mx.DateTime should already be installed !" + else: + return os.path.dirname(mx.DateTime.mxDateTime.__file__) + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs + nxheader = join(get_numpy_include_dirs()[0],'numpy',) + mxlib = check_mxDateTime() + confgr = Configuration('timeseries',parent_package,top_path) + sources = join('src', 'cseries.c') + confgr.add_extension('cseries', + sources=[sources,], + include_dirs=[mxlib, nxheader], + ) + confgr.add_data_dir('doc') + confgr.add_data_dir('examples') + return confgr + +if __name__ == "__main__": + from numpy.distutils.core import setup + #setup.update(nmasetup) + config = configuration(top_path='').todict() + setup(**config) \ No newline at end of file Copied: trunk/Lib/sandbox/timeseries/src/cseries.c (from rev 2430, trunk/Lib/sandbox/timeseries/cseries.c) From scipy-svn at scipy.org Thu Dec 21 14:54:10 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 13:54:10 -0600 (CST) Subject: [Scipy-svn] r2448 - trunk/Lib/signal Message-ID: <20061221195410.5CEE239C13C@new.scipy.org> Author: rkern Date: 2006-12-21 13:54:04 -0600 (Thu, 21 Dec 2006) New Revision: 2448 Modified: trunk/Lib/signal/filter_design.py Log: Import sum from numpy. Modified: trunk/Lib/signal/filter_design.py =================================================================== --- trunk/Lib/signal/filter_design.py 2006-12-20 23:11:41 UTC (rev 2447) +++ trunk/Lib/signal/filter_design.py 2006-12-21 19:54:04 UTC (rev 2448) @@ -7,7 +7,7 @@ from numpy.core.umath import * from numpy import atleast_1d, poly, polyval, roots, imag, real, asarray,\ allclose, resize, pi, concatenate, absolute, logspace -from numpy import mintypecode, select +from numpy import mintypecode, select, sum from scipy import special, optimize, linalg from scipy.misc import comb import string, types From scipy-svn at scipy.org Thu Dec 21 15:58:44 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 14:58:44 -0600 (CST) Subject: [Scipy-svn] r2449 - trunk/Lib/sandbox/timeseries/src Message-ID: <20061221205844.AB20A39C13F@new.scipy.org> Author: mattknox_ca Date: 2006-12-21 14:58:41 -0600 (Thu, 21 Dec 2006) New Revision: 2449 Removed: trunk/Lib/sandbox/timeseries/src/cseries.c Log: Removed file/folder Deleted: trunk/Lib/sandbox/timeseries/src/cseries.c =================================================================== --- trunk/Lib/sandbox/timeseries/src/cseries.c 2006-12-21 19:54:04 UTC (rev 2448) +++ trunk/Lib/sandbox/timeseries/src/cseries.c 2006-12-21 20:58:41 UTC (rev 2449) @@ -1,594 +0,0 @@ -#include -#include -#include -#include -#include "mxDateTime.h" -#include "arrayobject.h" - -static char cseries_doc[] = "Speed sensitive time series operations"; - -/////////////////////////////////////////////////////////////////////// - - -static //PyArrayObject * -setArrayItem_1D(PyArrayObject **theArray, long index, PyObject *newVal) -{ - if (index >= 0) - { - //set value in array - PyArray_SETITEM(*theArray, PyArray_GetPtr(*theArray, &index), newVal); - } - -} - -static //PyArrayObject * -setArrayItem_2D(PyArrayObject **theArray, long index_x, long index_y, PyObject *newVal) -{ - long idx[] = {index_x, index_y}; - - if (index_x >= 0 && index_y >= 0) { - //set value in array - PyArray_SETITEM(*theArray, PyArray_GetPtr(*theArray, idx), newVal); - } - -} - - -static int -freqVal(char freq) -{ - switch(freq) - { - case 'A': - //annual - return 1; - case 'Q': - //quarterly - return 2; - case 'M': - //monthly - return 3; - case 'B': - //business - return 4; - case 'D': - //daily - return 5; - default: - return 0; - } -} - -static long -toDaily(long fromDate, char fromFreq) -{ - long absdate; - int y,m,d; - - mxDateTimeObject *theDate; - - //convert fromDate to days since (0 AD - 1 day) - switch(fromFreq) - { - case 'D': - absdate = fromDate; - break; - case 'B': - absdate = ((fromDate-1)/5)*7 + (fromDate-1)%5 + 1; - break; - case 'M': - y = fromDate/12; - m = fromDate%12; - - if (m == 0) - { - m = 12; - y--; - } - d=1; - break; - case 'Q': - y = fromDate/4; - m = (fromDate%4) * 3 - 2; - - if (m < 1) - { - m += 12; - y--; - } - else if (m == 12) - { - m = 1; - y++; - } - d=1; - break; - case 'A': - y = fromDate; - m = 1; - d = 1; - break; - default: - return -1; - } - - if (freqVal(fromFreq) < 4) - { - theDate = (mxDateTimeObject *)mxDateTime.DateTime_FromDateAndTime(y,m,d,0,0,0); - absdate = (long)(theDate->absdate); - } - - return absdate; - -} - - -static long -getDateInfo_sub(long dateNum, char freq, char info) { - - long monthNum; - mxDateTimeObject *convDate; - convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(toDaily(dateNum,freq),0); - - switch(info) - { - case 'Y': //year - - return (long)(convDate->year); - - case 'Q': //quarter - monthNum = (long)(convDate->month); - return ((monthNum-1)/3)+1; - - case 'M': //month - return (long)(convDate->month); - - case 'D': //day - return (long)(convDate->day); - - case 'W': //day of week - return (long)(convDate->day_of_week); - default: - return -1; - } -} - - -static char cseries_getDateInfo_doc[] = ""; -static PyObject * -cseries_getDateInfo(PyObject *self, PyObject *args) -{ - char *freq; - char *info; - - PyArrayObject *array; - PyArrayObject *tempArray; - PyArrayObject *newArray; - - char *getptr; - PyObject *val; - long i, lngVal, dInfo, dim; - - if (!PyArg_ParseTuple(args, "Oss:getDateInfo(array, freq, info)", &tempArray, &freq, &info)) return NULL; - - array = PyArray_GETCONTIGUOUS(tempArray); - - dim = array->dimensions[0]; - - //initialize new array - newArray = (PyArrayObject*)PyArray_SimpleNew(array->nd, &dim, array->descr->type_num); - - for (i = 0; i < array->dimensions[0]; i++) - { - getptr = array->data + i*array->strides[0]; - val = PyArray_GETITEM(array, getptr); - lngVal = PyInt_AsLong(val); - dInfo = getDateInfo_sub(lngVal, *freq, *info); - - setArrayItem_1D(&newArray, i, PyInt_FromLong(dInfo)); - } - - return (PyObject *) newArray; - -} - - -static long -convert(long fromDate, char fromFreq, char toFreq, int notStartInd) -{ - long absdate, secsInDay; - long converted; - int y,m; - - mxDateTimeObject *convDate; - - secsInDay = 86400; - - absdate = toDaily(fromDate, fromFreq); - - convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(absdate,0); - - y = convDate->year; - m = convDate->month; - - //convert convDate to appropriate # of periods according to toFreq - switch(toFreq) - { - case 'D': - converted = absdate; - break; - case 'B': - if (convDate->day_of_week > 4) //is weekend day - { - if (notStartInd == 1 && freqVal(fromFreq) > 4) - { - return -1; - } - else - { - if (convDate->day - (convDate->day_of_week - 4) < 1) { - //change to Monday after weekend - absdate += (7 - convDate->day_of_week); - } else { - //change to friday before weekend - absdate -= (convDate->day_of_week - 4); - } - - converted = (long)((absdate / 7 * 5.0) + absdate%7); - } - } - else - { - converted = (long)((absdate / 7 * 5.0) + absdate%7); - } - break; - case 'M': - converted = (long)(y*12 + m); - break; - case 'Q': - converted = (long)(y*4 + ((m-1)/3) + 1); - break; - case 'A': - converted = (long)(y); - break; - default: - return -1; - } - - return converted; -} - -static int validFreq(char freq) { - switch(freq) - { - case 'A': - return 1; - case 'Q': - return 1; - case 'M': - return 1; - case 'B': - return 1; - case 'D': - return 1; - default: - return 0; - } -} - - -static int -expand(long oldSize, char fromFreq, char toFreq, long *newLen, long *newHeight) -{ - - int maxBusDaysPerYear, maxBusDaysPerQuarter, maxBusDaysPerMonth; - int minBusDaysPerYear, minBusDaysPerQuarter, minBusDaysPerMonth; - - int maxDaysPerYear, maxDaysPerQuarter, maxDaysPerMonth; - int minDaysPerYear, minDaysPerQuarter, minDaysPerMonth; - - minBusDaysPerYear = 260; maxBusDaysPerYear = 262; - minBusDaysPerQuarter = 64; maxBusDaysPerQuarter = 66; - minBusDaysPerMonth = 20; maxBusDaysPerMonth = 23; - - minDaysPerYear = 365; maxDaysPerYear = 366; - minDaysPerQuarter = 90; maxDaysPerQuarter = 92; - minDaysPerMonth = 28; maxDaysPerMonth = 31; - - if (!validFreq(fromFreq)) return 0; - if (!validFreq(toFreq)) return 0; - - if (fromFreq == toFreq) { - *newLen = oldSize; - *newHeight = 1; - } else { - - switch(fromFreq) - { - case 'A': //annual - - switch(toFreq) - { - case 'Q': - *newLen = oldSize * 4; - *newHeight = 1; - break; - case 'M': - *newLen = oldSize * 12; - *newHeight = 1; - break; - case 'B': - *newLen = oldSize * maxBusDaysPerYear; - *newHeight = 1; - break; - case 'D': - *newLen = oldSize * maxDaysPerYear; - *newHeight = 1; - break; - } - break; - - case 'Q': //quarterly - - switch(toFreq) - { - case 'A': - *newLen = (oldSize / 4) + 2; - *newHeight = 4; - break; - case 'M': - *newLen = oldSize * 3; - *newHeight = 1; - break; - case 'B': - *newLen = oldSize * maxBusDaysPerQuarter; - *newHeight = 1; - break; - case 'D': - *newLen = oldSize * maxDaysPerQuarter; - *newHeight = 1; - break; - } - break; - - case 'M': //monthly - - switch(toFreq) - { - case 'A': - *newLen = (oldSize / 12) + 2; - *newHeight = 12; - break; - case 'Q': - *newLen = (oldSize / 3) + 2; - *newHeight = 3; - break; - case 'B': - *newLen = oldSize * maxBusDaysPerMonth; - *newHeight = 1; - break; - case 'D': - *newLen = oldSize * maxDaysPerMonth; - *newHeight = 1; - break; - } - break; - - case 'B': //business - - switch(toFreq) - { - case 'A': - *newLen = (oldSize / minBusDaysPerYear) + 2; - *newHeight = maxBusDaysPerYear; - break; - case 'Q': - *newLen = (oldSize / minBusDaysPerQuarter) + 2; - *newHeight = maxBusDaysPerQuarter; - break; - case 'M': - *newLen = (oldSize / minBusDaysPerMonth) + 2; - *newHeight = maxBusDaysPerMonth; - break; - case 'D': - *newLen = ((7 * oldSize)/5) + 2; - *newHeight = 1; - break; - } - break; - - case 'D': //daily - - switch(toFreq) - { - case 'A': - *newLen = (oldSize / minDaysPerYear) + 2; - *newHeight = maxDaysPerYear; - break; - case 'Q': - *newLen = (oldSize / minDaysPerQuarter) + 2; - *newHeight = maxDaysPerQuarter; - break; - case 'M': - *newLen = (oldSize / minDaysPerMonth) + 2; - *newHeight = maxDaysPerMonth; - break; - case 'B': - *newLen = ((5 * oldSize)/7) + 2; - *newHeight = 1; - break; } - break; - } - } - - return 1; - -} - - -static char cseries_reindex_doc[] = ""; -static PyObject * -cseries_reindex(PyObject *self, PyObject *args) -{ - PyArrayObject *array; - PyArrayObject *tempArray; - PyArrayObject *newArray; - - PyArrayObject *mask; - PyArrayObject *tempMask; - PyArrayObject *newMask; - - PyObject *returnVal = NULL; - - int notStartInd; - long startIndex, newStart, newStartYaxis; - long newLen, newHeight; - long i, currIndex, prevIndex; - long nd; - long *dim; - long currPerLen; - char *fromFreq, *toFreq, *position; - - PyObject *val, *valMask; - - int toFrVal, fromFrVal; - - returnVal = PyDict_New(); - - if (!PyArg_ParseTuple(args, "OssslO:reindex(array, fromfreq, tofreq, position, startIndex, mask)", &tempArray, &fromFreq, &toFreq, &position, &startIndex, &tempMask)) return NULL; - - if (toFreq[0] == fromFreq[0]) - { - - PyDict_SetItemString(returnVal, "values", (PyObject*)tempArray); - PyDict_SetItemString(returnVal, "mask", (PyObject*)tempMask); - - return returnVal; - } - - //get frequency numeric mapping - fromFrVal = freqVal(fromFreq[0]); - toFrVal = freqVal(toFreq[0]); - - array = PyArray_GETCONTIGUOUS(tempArray); - mask = PyArray_GETCONTIGUOUS(tempMask); - - //expand size to fit new values if needed - if (!expand(array->dimensions[0], fromFreq[0], toFreq[0], &newLen, &newHeight)) return NULL; - - //convert start index to new frequency - notStartInd = 0; - newStart = convert(startIndex, fromFreq[0], toFreq[0], notStartInd); - - if (newHeight > 1) { - - newStartYaxis = startIndex - convert(newStart, toFreq[0], fromFreq[0], notStartInd); - currPerLen = newStartYaxis; - - nd = 2; - dim = malloc(nd * sizeof(int)); - dim[0] = newLen; - dim[1] = newHeight; - } else { - currPerLen = 0; - nd = 1; - dim = malloc(nd * sizeof(int)); - dim[0] = newLen; - } - - newArray = (PyArrayObject*)PyArray_SimpleNew(nd, dim, array->descr->type_num); - newMask = (PyArrayObject*)PyArray_SimpleNew(nd, dim, mask->descr->type_num); - - free(dim); - - PyArray_FILLWBYTE(newArray,0); - PyArray_FILLWBYTE(newMask,1); - - //initialize prevIndex - prevIndex = newStart; - - notStartInd = 1; - - //set values in the new array - for (i = 0; i < array->dimensions[0]; i++) - { - - //get value from old array - val = PyArray_GETITEM(array, PyArray_GetPtr(array, &i)); - - //get the mask corresponding to the old value - valMask = PyArray_GETITEM(mask, PyArray_GetPtr(mask, &i)); - - //find index for start of current period in new frequency - if (newHeight == 1 && (position[0] == 'E' && !((fromFrVal == 4 && toFrVal == 5) || (fromFrVal == 5 && toFrVal == 4))) ) { - currIndex = convert(startIndex + i + 1, fromFreq[0], toFreq[0], notStartInd)-1; - } else { - currIndex = convert(startIndex + i, fromFreq[0], toFreq[0], notStartInd); - } - - if (newHeight > 1) { - - if (currIndex != prevIndex) - { - //reset period length - currPerLen = 0; - prevIndex = currIndex; - } - - //set value in the new array - setArrayItem_2D(&newArray, currIndex-newStart, currPerLen, val); - setArrayItem_2D(&newMask, currIndex-newStart, currPerLen, valMask); - - currPerLen++; - - } else { - - setArrayItem_1D(&newArray, currIndex-newStart, val); - setArrayItem_1D(&newMask, currIndex-newStart, valMask); - - } - - } - - PyDict_SetItemString(returnVal, "values", (PyObject*)newArray); - PyDict_SetItemString(returnVal, "mask", (PyObject*)newMask); - - return returnVal; - -} - - -static char cseries_convert_doc[] = ""; -static PyObject * -cseries_convert(PyObject *self, PyObject *args) -{ - long fromDate; - char* fromFreq; - char* toFreq; - int notStartInd; - - if (!PyArg_ParseTuple(args, "lss:convert(fromDate, fromfreq, tofreq)", &fromDate, &fromFreq, &toFreq)) return NULL; - - //always want start of period (only matters when converting from lower freq to higher freq ie. m -> d) - notStartInd = 0; - - return PyInt_FromLong(convert(fromDate, fromFreq[0], toFreq[0], notStartInd)); -} - - -/////////////////////////////////////////////////////////////////////// - -static PyMethodDef cseries_methods[] = { - {"reindex", cseries_reindex, METH_VARARGS, cseries_reindex_doc}, - {"convert", cseries_convert, METH_VARARGS, cseries_convert_doc}, - {"getDateInfo", cseries_getDateInfo, METH_VARARGS, cseries_getDateInfo_doc}, - {NULL, NULL} -}; - -PyMODINIT_FUNC -initcseries(void) -{ - Py_InitModule3("cseries", cseries_methods, cseries_doc); - mxDateTime_ImportModuleAndAPI(); - import_array(); -} \ No newline at end of file From scipy-svn at scipy.org Thu Dec 21 15:58:53 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 14:58:53 -0600 (CST) Subject: [Scipy-svn] r2450 - in trunk/Lib/sandbox/timeseries: . src Message-ID: <20061221205853.B51D039C13F@new.scipy.org> Author: mattknox_ca Date: 2006-12-21 14:58:51 -0600 (Thu, 21 Dec 2006) New Revision: 2450 Added: trunk/Lib/sandbox/timeseries/src/cseries.c Removed: trunk/Lib/sandbox/timeseries/cseries.c Log: Moved remotely Deleted: trunk/Lib/sandbox/timeseries/cseries.c =================================================================== --- trunk/Lib/sandbox/timeseries/cseries.c 2006-12-21 20:58:41 UTC (rev 2449) +++ trunk/Lib/sandbox/timeseries/cseries.c 2006-12-21 20:58:51 UTC (rev 2450) @@ -1,594 +0,0 @@ -#include -#include -#include -#include -#include "mxDateTime.h" -#include "arrayobject.h" - -static char cseries_doc[] = "Speed sensitive time series operations"; - -/////////////////////////////////////////////////////////////////////// - - -static //PyArrayObject * -setArrayItem_1D(PyArrayObject **theArray, long index, PyObject *newVal) -{ - if (index >= 0) - { - //set value in array - PyArray_SETITEM(*theArray, PyArray_GetPtr(*theArray, &index), newVal); - } - -} - -static //PyArrayObject * -setArrayItem_2D(PyArrayObject **theArray, long index_x, long index_y, PyObject *newVal) -{ - long idx[] = {index_x, index_y}; - - if (index_x >= 0 && index_y >= 0) { - //set value in array - PyArray_SETITEM(*theArray, PyArray_GetPtr(*theArray, idx), newVal); - } - -} - - -static int -freqVal(char freq) -{ - switch(freq) - { - case 'A': - //annual - return 1; - case 'Q': - //quarterly - return 2; - case 'M': - //monthly - return 3; - case 'B': - //business - return 4; - case 'D': - //daily - return 5; - default: - return 0; - } -} - -static long -toDaily(long fromDate, char fromFreq) -{ - long absdate; - int y,m,d; - - mxDateTimeObject *theDate; - - //convert fromDate to days since (0 AD - 1 day) - switch(fromFreq) - { - case 'D': - absdate = fromDate; - break; - case 'B': - absdate = ((fromDate-1)/5)*7 + (fromDate-1)%5 + 1; - break; - case 'M': - y = fromDate/12; - m = fromDate%12; - - if (m == 0) - { - m = 12; - y--; - } - d=1; - break; - case 'Q': - y = fromDate/4; - m = (fromDate%4) * 3 - 2; - - if (m < 1) - { - m += 12; - y--; - } - else if (m == 12) - { - m = 1; - y++; - } - d=1; - break; - case 'A': - y = fromDate; - m = 1; - d = 1; - break; - default: - return -1; - } - - if (freqVal(fromFreq) < 4) - { - theDate = (mxDateTimeObject *)mxDateTime.DateTime_FromDateAndTime(y,m,d,0,0,0); - absdate = (long)(theDate->absdate); - } - - return absdate; - -} - - -static long -getDateInfo_sub(long dateNum, char freq, char info) { - - long monthNum; - mxDateTimeObject *convDate; - convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(toDaily(dateNum,freq),0); - - switch(info) - { - case 'Y': //year - - return (long)(convDate->year); - - case 'Q': //quarter - monthNum = (long)(convDate->month); - return ((monthNum-1)/3)+1; - - case 'M': //month - return (long)(convDate->month); - - case 'D': //day - return (long)(convDate->day); - - case 'W': //day of week - return (long)(convDate->day_of_week); - default: - return -1; - } -} - - -static char cseries_getDateInfo_doc[] = ""; -static PyObject * -cseries_getDateInfo(PyObject *self, PyObject *args) -{ - char *freq; - char *info; - - PyArrayObject *array; - PyArrayObject *tempArray; - PyArrayObject *newArray; - - char *getptr; - PyObject *val; - long i, lngVal, dInfo, dim; - - if (!PyArg_ParseTuple(args, "Oss:getDateInfo(array, freq, info)", &tempArray, &freq, &info)) return NULL; - - array = PyArray_GETCONTIGUOUS(tempArray); - - dim = array->dimensions[0]; - - //initialize new array - newArray = (PyArrayObject*)PyArray_SimpleNew(array->nd, &dim, array->descr->type_num); - - for (i = 0; i < array->dimensions[0]; i++) - { - getptr = array->data + i*array->strides[0]; - val = PyArray_GETITEM(array, getptr); - lngVal = PyInt_AsLong(val); - dInfo = getDateInfo_sub(lngVal, *freq, *info); - - setArrayItem_1D(&newArray, i, PyInt_FromLong(dInfo)); - } - - return (PyObject *) newArray; - -} - - -static long -convert(long fromDate, char fromFreq, char toFreq, int notStartInd) -{ - long absdate, secsInDay; - long converted; - int y,m; - - mxDateTimeObject *convDate; - - secsInDay = 86400; - - absdate = toDaily(fromDate, fromFreq); - - convDate = (mxDateTimeObject *)mxDateTime.DateTime_FromAbsDateAndTime(absdate,0); - - y = convDate->year; - m = convDate->month; - - //convert convDate to appropriate # of periods according to toFreq - switch(toFreq) - { - case 'D': - converted = absdate; - break; - case 'B': - if (convDate->day_of_week > 4) //is weekend day - { - if (notStartInd == 1 && freqVal(fromFreq) > 4) - { - return -1; - } - else - { - if (convDate->day - (convDate->day_of_week - 4) < 1) { - //change to Monday after weekend - absdate += (7 - convDate->day_of_week); - } else { - //change to friday before weekend - absdate -= (convDate->day_of_week - 4); - } - - converted = (long)((absdate / 7 * 5.0) + absdate%7); - } - } - else - { - converted = (long)((absdate / 7 * 5.0) + absdate%7); - } - break; - case 'M': - converted = (long)(y*12 + m); - break; - case 'Q': - converted = (long)(y*4 + ((m-1)/3) + 1); - break; - case 'A': - converted = (long)(y); - break; - default: - return -1; - } - - return converted; -} - -static int validFreq(char freq) { - switch(freq) - { - case 'A': - return 1; - case 'Q': - return 1; - case 'M': - return 1; - case 'B': - return 1; - case 'D': - return 1; - default: - return 0; - } -} - - -static int -expand(long oldSize, char fromFreq, char toFreq, long *newLen, long *newHeight) -{ - - int maxBusDaysPerYear, maxBusDaysPerQuarter, maxBusDaysPerMonth; - int minBusDaysPerYear, minBusDaysPerQuarter, minBusDaysPerMonth; - - int maxDaysPerYear, maxDaysPerQuarter, maxDaysPerMonth; - int minDaysPerYear, minDaysPerQuarter, minDaysPerMonth; - - minBusDaysPerYear = 260; maxBusDaysPerYear = 262; - minBusDaysPerQuarter = 64; maxBusDaysPerQuarter = 66; - minBusDaysPerMonth = 20; maxBusDaysPerMonth = 23; - - minDaysPerYear = 365; maxDaysPerYear = 366; - minDaysPerQuarter = 90; maxDaysPerQuarter = 92; - minDaysPerMonth = 28; maxDaysPerMonth = 31; - - if (!validFreq(fromFreq)) return 0; - if (!validFreq(toFreq)) return 0; - - if (fromFreq == toFreq) { - *newLen = oldSize; - *newHeight = 1; - } else { - - switch(fromFreq) - { - case 'A': //annual - - switch(toFreq) - { - case 'Q': - *newLen = oldSize * 4; - *newHeight = 1; - break; - case 'M': - *newLen = oldSize * 12; - *newHeight = 1; - break; - case 'B': - *newLen = oldSize * maxBusDaysPerYear; - *newHeight = 1; - break; - case 'D': - *newLen = oldSize * maxDaysPerYear; - *newHeight = 1; - break; - } - break; - - case 'Q': //quarterly - - switch(toFreq) - { - case 'A': - *newLen = (oldSize / 4) + 2; - *newHeight = 4; - break; - case 'M': - *newLen = oldSize * 3; - *newHeight = 1; - break; - case 'B': - *newLen = oldSize * maxBusDaysPerQuarter; - *newHeight = 1; - break; - case 'D': - *newLen = oldSize * maxDaysPerQuarter; - *newHeight = 1; - break; - } - break; - - case 'M': //monthly - - switch(toFreq) - { - case 'A': - *newLen = (oldSize / 12) + 2; - *newHeight = 12; - break; - case 'Q': - *newLen = (oldSize / 3) + 2; - *newHeight = 3; - break; - case 'B': - *newLen = oldSize * maxBusDaysPerMonth; - *newHeight = 1; - break; - case 'D': - *newLen = oldSize * maxDaysPerMonth; - *newHeight = 1; - break; - } - break; - - case 'B': //business - - switch(toFreq) - { - case 'A': - *newLen = (oldSize / minBusDaysPerYear) + 2; - *newHeight = maxBusDaysPerYear; - break; - case 'Q': - *newLen = (oldSize / minBusDaysPerQuarter) + 2; - *newHeight = maxBusDaysPerQuarter; - break; - case 'M': - *newLen = (oldSize / minBusDaysPerMonth) + 2; - *newHeight = maxBusDaysPerMonth; - break; - case 'D': - *newLen = ((7 * oldSize)/5) + 2; - *newHeight = 1; - break; - } - break; - - case 'D': //daily - - switch(toFreq) - { - case 'A': - *newLen = (oldSize / minDaysPerYear) + 2; - *newHeight = maxDaysPerYear; - break; - case 'Q': - *newLen = (oldSize / minDaysPerQuarter) + 2; - *newHeight = maxDaysPerQuarter; - break; - case 'M': - *newLen = (oldSize / minDaysPerMonth) + 2; - *newHeight = maxDaysPerMonth; - break; - case 'B': - *newLen = ((5 * oldSize)/7) + 2; - *newHeight = 1; - break; } - break; - } - } - - return 1; - -} - - -static char cseries_reindex_doc[] = ""; -static PyObject * -cseries_reindex(PyObject *self, PyObject *args) -{ - PyArrayObject *array; - PyArrayObject *tempArray; - PyArrayObject *newArray; - - PyArrayObject *mask; - PyArrayObject *tempMask; - PyArrayObject *newMask; - - PyObject *returnVal = NULL; - - int notStartInd; - long startIndex, newStart, newStartYaxis; - long newLen, newHeight; - long i, currIndex, prevIndex; - long nd; - long *dim; - long currPerLen; - char *fromFreq, *toFreq, *position; - - PyObject *val, *valMask; - - int toFrVal, fromFrVal; - - returnVal = PyDict_New(); - - if (!PyArg_ParseTuple(args, "OssslO:reindex(array, fromfreq, tofreq, position, startIndex, mask)", &tempArray, &fromFreq, &toFreq, &position, &startIndex, &tempMask)) return NULL; - - if (toFreq[0] == fromFreq[0]) - { - - PyDict_SetItemString(returnVal, "values", (PyObject*)tempArray); - PyDict_SetItemString(returnVal, "mask", (PyObject*)tempMask); - - return returnVal; - } - - //get frequency numeric mapping - fromFrVal = freqVal(fromFreq[0]); - toFrVal = freqVal(toFreq[0]); - - array = PyArray_GETCONTIGUOUS(tempArray); - mask = PyArray_GETCONTIGUOUS(tempMask); - - //expand size to fit new values if needed - if (!expand(array->dimensions[0], fromFreq[0], toFreq[0], &newLen, &newHeight)) return NULL; - - //convert start index to new frequency - notStartInd = 0; - newStart = convert(startIndex, fromFreq[0], toFreq[0], notStartInd); - - if (newHeight > 1) { - - newStartYaxis = startIndex - convert(newStart, toFreq[0], fromFreq[0], notStartInd); - currPerLen = newStartYaxis; - - nd = 2; - dim = malloc(nd * sizeof(int)); - dim[0] = newLen; - dim[1] = newHeight; - } else { - currPerLen = 0; - nd = 1; - dim = malloc(nd * sizeof(int)); - dim[0] = newLen; - } - - newArray = (PyArrayObject*)PyArray_SimpleNew(nd, dim, array->descr->type_num); - newMask = (PyArrayObject*)PyArray_SimpleNew(nd, dim, mask->descr->type_num); - - free(dim); - - PyArray_FILLWBYTE(newArray,0); - PyArray_FILLWBYTE(newMask,1); - - //initialize prevIndex - prevIndex = newStart; - - notStartInd = 1; - - //set values in the new array - for (i = 0; i < array->dimensions[0]; i++) - { - - //get value from old array - val = PyArray_GETITEM(array, PyArray_GetPtr(array, &i)); - - //get the mask corresponding to the old value - valMask = PyArray_GETITEM(mask, PyArray_GetPtr(mask, &i)); - - //find index for start of current period in new frequency - if (newHeight == 1 && (position[0] == 'E' && !((fromFrVal == 4 && toFrVal == 5) || (fromFrVal == 5 && toFrVal == 4))) ) { - currIndex = convert(startIndex + i + 1, fromFreq[0], toFreq[0], notStartInd)-1; - } else { - currIndex = convert(startIndex + i, fromFreq[0], toFreq[0], notStartInd); - } - - if (newHeight > 1) { - - if (currIndex != prevIndex) - { - //reset period length - currPerLen = 0; - prevIndex = currIndex; - } - - //set value in the new array - setArrayItem_2D(&newArray, currIndex-newStart, currPerLen, val); - setArrayItem_2D(&newMask, currIndex-newStart, currPerLen, valMask); - - currPerLen++; - - } else { - - setArrayItem_1D(&newArray, currIndex-newStart, val); - setArrayItem_1D(&newMask, currIndex-newStart, valMask); - - } - - } - - PyDict_SetItemString(returnVal, "values", (PyObject*)newArray); - PyDict_SetItemString(returnVal, "mask", (PyObject*)newMask); - - return returnVal; - -} - - -static char cseries_convert_doc[] = ""; -static PyObject * -cseries_convert(PyObject *self, PyObject *args) -{ - long fromDate; - char* fromFreq; - char* toFreq; - int notStartInd; - - if (!PyArg_ParseTuple(args, "lss:convert(fromDate, fromfreq, tofreq)", &fromDate, &fromFreq, &toFreq)) return NULL; - - //always want start of period (only matters when converting from lower freq to higher freq ie. m -> d) - notStartInd = 0; - - return PyInt_FromLong(convert(fromDate, fromFreq[0], toFreq[0], notStartInd)); -} - - -/////////////////////////////////////////////////////////////////////// - -static PyMethodDef cseries_methods[] = { - {"reindex", cseries_reindex, METH_VARARGS, cseries_reindex_doc}, - {"convert", cseries_convert, METH_VARARGS, cseries_convert_doc}, - {"getDateInfo", cseries_getDateInfo, METH_VARARGS, cseries_getDateInfo_doc}, - {NULL, NULL} -}; - -PyMODINIT_FUNC -initcseries(void) -{ - Py_InitModule3("cseries", cseries_methods, cseries_doc); - mxDateTime_ImportModuleAndAPI(); - import_array(); -} \ No newline at end of file Copied: trunk/Lib/sandbox/timeseries/src/cseries.c (from rev 2449, trunk/Lib/sandbox/timeseries/cseries.c) From scipy-svn at scipy.org Thu Dec 21 16:08:10 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 15:08:10 -0600 (CST) Subject: [Scipy-svn] r2451 - trunk/Lib/sandbox/timeseries Message-ID: <20061221210810.6B32039C13F@new.scipy.org> Author: mattknox_ca Date: 2006-12-21 15:08:06 -0600 (Thu, 21 Dec 2006) New Revision: 2451 Modified: trunk/Lib/sandbox/timeseries/timeseries.py Log: added more methods/functions from numpy.core.ma Modified: trunk/Lib/sandbox/timeseries/timeseries.py =================================================================== --- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-21 20:58:51 UTC (rev 2450) +++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-21 21:08:06 UTC (rev 2451) @@ -6,6 +6,8 @@ import tsdate import copy as copytools +from types import MethodType + masked = ma.masked nomask = ma.nomask @@ -50,8 +52,6 @@ return self.f(a, b, *args, **kwargs) def reduce (self, target, axis=0, dtype=None): - """Reduce target along the given axis with this function.""" - return self.f.reduce(target, axis, dtype) def outer (self, a, b): @@ -328,8 +328,10 @@ arcsin = ts_unary_operation(ma.arcsin) arccos = ts_unary_operation(ma.arccos) arctan = ts_unary_operation(ma.arctan) -power = ts_binary_operation(ma.power) +def cumprod(self, axis=0, dtype=None, out=None): return datawrap(ma._cumprod(self, axis, dtype, out), self) +def cumsum(self, axis=0, dtype=None, out=None): return datawrap(ma._cumsum(self, axis, dtype, out), self) + arcsinh = ts_unary_operation(ma.arcsinh) arccosh = ts_unary_operation(ma.arccosh) arctanh = ts_unary_operation(ma.arctanh) @@ -339,30 +341,49 @@ absolute = ts_unary_operation(ma.absolute) fabs = ts_unary_operation(ma.fabs) negative = ts_unary_operation(ma.negative) +nonzero = ts_unary_operation(ma.nonzero) -def nonzero(a): return datawrap(ma.nonzero(a), a) +around = ts_unary_operation(ma.around) +floor = ts_unary_operation(ma.floor) +ceil = ts_unary_operation(ma.ceil) +logical_not = ts_unary_operation(ma.logical_not) + def zeros(shape, dtype=float, freq=None, start_date=None, observed=None): return TimeSeries(ma.zeros(shape, dtype), freq=freq, start_date=start_date, observed=observed) def ones(shape, dtype=float, freq=None, start_date=None, observed=None): return TimeSeries(ma.ones(shape, dtype), freq=freq, start_date=start_date, observed=observed) + +# functions from ma that we want to return scalars or masked arrays count = ma.count sum = ma.sum product = ma.product average = ma.average +compress = ma.compress +minimum = ma.minimum +maximum = ma.maximum +alltrue = ma.alltrue +allclose = ma.allclose +allequal = ma.allequal +sometrue = ma.sometrue +std = ma._std +var = ma._var +def argmin (x, axis = -1, out=None, fill_value=None): + # same as argmin for ma, but returns a date instead of integer + return x.start_date() + ma.argmin(x, axis, out, fill_value) +def argmax (x, axis = -1, out=None, fill_value=None): + # same as argmax for ma, but returns a date instead of integer + return x.start_date() + ma.argmax(x, axis, out, fill_value) + -around = ts_unary_operation(ma.around) -floor = ts_unary_operation(ma.floor) -ceil = ts_unary_operation(ma.ceil) -logical_not = ts_unary_operation(ma.logical_not) - +# binary operators add = ts_binary_operation(ma.add) subtract = ts_binary_operation(ma.subtract) - multiply = ts_binary_operation(ma.multiply) divide = ts_binary_operation(ma.divide) +power = ts_binary_operation(ma.power) true_divide = ts_binary_operation(ma.true_divide) floor_divide = ts_binary_operation(ma.floor_divide) remainder = ts_binary_operation(ma.remainder) @@ -385,6 +406,31 @@ def left_shift (a, n): return datawrap(ma.left_shift(a, n), a) def right_shift (a, n): return datawrap(ma.right_shift(a, n), a) +def masked_where(condition, x, copy=1): return datawrap(ma.masked_where(condition, x, copy), x) +def masked_greater(x, value, copy=1): return datawrap(ma.masked_greater(x, value, copy), x) +def masked_greater_equal(x, value, copy=1): return datawrap(ma.masked_greater_equal(x, value, copy), x) +def masked_less(x, value, copy=1): return datawrap(ma.masked_less(x, value, copy), x) +def masked_less_equal(x, value, copy=1): return datawrap(ma.masked_less_equal(x, value, copy), x) +def masked_not_equal(x, value, copy=1): return datawrap(ma.masked_not_equal(x, value, copy), x) +def masked_equal(x, value, copy=1): return datawrap(ma.masked_equal(x, value, copy), x) +def masked_inside(x, v1, v2, copy=1): return datawrap(ma.masked_inside(x, v1, v2, copy), x) +def masked_outside(x, v1, v2, copy=1): return datawrap(ma.masked_outside(x, v1, v2, copy), x) + +def clip(self,a_min,a_max,out=None): return datawrap(ma._clip(self, a_min, a_max, out=None), self) + + +array = TimeSeries + +def _m(f): + return MethodType(f, None, array) + +array.clip = _m(clip) +array.argmax = _m(argmax) +array.argmin = _m(argmin) +array.cumprod = _m(cumprod) +array.cumsum = _m(cumsum) + + # time series specific functions def tser(start, end): From scipy-svn at scipy.org Thu Dec 21 16:42:52 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 15:42:52 -0600 (CST) Subject: [Scipy-svn] r2452 - trunk/Lib/sandbox/timeseries/examples Message-ID: <20061221214252.0C4C939C13F@new.scipy.org> Author: mattknox_ca Date: 2006-12-21 15:42:48 -0600 (Thu, 21 Dec 2006) New Revision: 2452 Modified: trunk/Lib/sandbox/timeseries/examples/example.py Log: added some more examples Modified: trunk/Lib/sandbox/timeseries/examples/example.py =================================================================== --- trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-21 21:08:06 UTC (rev 2451) +++ trunk/Lib/sandbox/timeseries/examples/example.py 2006-12-21 21:42:48 UTC (rev 2452) @@ -71,7 +71,18 @@ mSer2[-1] = ts.masked #ts.masked is the same thing as numpy.ma.masked +# dates can be used as indices as well +mSer2[ts.thisday('m')-55] = 400 + + """ +the tser function makes it easy to index a series over a range of dates +without worrying about converting the dates to appropriate integers first +""" +mSer2[ts.tser(ts.thisday('m')-59, ts.thisday('m')-45)] = 25 + + +""" Only series of the same frequency and size and same start date can be used in the basic operations. From scipy-svn at scipy.org Thu Dec 21 19:02:54 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 18:02:54 -0600 (CST) Subject: [Scipy-svn] r2453 - in trunk/Lib/sandbox/models: . family tests Message-ID: <20061222000254.42EB339C13F@new.scipy.org> Author: jarrod.millman Date: 2006-12-21 18:02:48 -0600 (Thu, 21 Dec 2006) New Revision: 2453 Modified: trunk/Lib/sandbox/models/bspline.py trunk/Lib/sandbox/models/family/family.py trunk/Lib/sandbox/models/formula.py trunk/Lib/sandbox/models/glm.py trunk/Lib/sandbox/models/info.py trunk/Lib/sandbox/models/model.py trunk/Lib/sandbox/models/regression.py trunk/Lib/sandbox/models/smoothers.py trunk/Lib/sandbox/models/tests/test_formula.py Log: expanded tabs to spaces Modified: trunk/Lib/sandbox/models/bspline.py =================================================================== --- trunk/Lib/sandbox/models/bspline.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/bspline.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -50,11 +50,11 @@ """ if lower: - t = _zero_triband(a * b, lower=1) - return t[0].sum() + 2 * t[1:].sum() + t = _zero_triband(a * b, lower=1) + return t[0].sum() + 2 * t[1:].sum() else: - t = _zero_triband(a * b, lower=0) - return t[-1].sum() + 2 * t[:-1].sum() + t = _zero_triband(a * b, lower=0) + return t[-1].sum() + 2 * t[:-1].sum() def _zero_triband(a, lower=0): @@ -64,9 +64,9 @@ nrow, ncol = a.shape if lower: - for i in range(nrow): a[i,(ncol-i):] = 0. + for i in range(nrow): a[i,(ncol-i):] = 0. else: - for i in range(nrow): a[i,0:i] = 0. + for i in range(nrow): a[i,0:i] = 0. return a def _zerofunc(x): @@ -137,17 +137,18 @@ upper = min(upper, self.tau.shape[0] - self.m) lower = max(0, lower) - d = N.asarray(d) - if d.shape == (): - v = _bspline.evaluate(x, self.tau, self.m, int(d), lower, upper) - else: - if d.shape[0] != 2: - raise ValueError, "if d is not an integer, expecting a jx2 array with first row indicating order \ - of derivative, second row coefficient in front." + d = N.asarray(d) + if d.shape == (): + v = _bspline.evaluate(x, self.tau, self.m, int(d), lower, upper) + else: + if d.shape[0] != 2: + raise ValueError, "if d is not an integer, expecting a jx2 \ + array with first row indicating order \ + of derivative, second row coefficient in front." - v = 0 - for i in range(d.shape[1]): - v += d[1,i] * _bspline.evaluate(x, self.tau, self.m, d[0,i], lower, upper) + v = 0 + for i in range(d.shape[1]): + v += d[1,i] * _bspline.evaluate(x, self.tau, self.m, d[0,i], lower, upper) v.shape = (upper-lower,) + _shape if upper == self.tau.shape[0] - self.m: @@ -159,23 +160,24 @@ Compute Gram inner product matrix. """ - d = N.squeeze(d) - if N.asarray(d).shape == (): - self.g = _bspline.gram(self.tau, self.m, int(d), int(d)) - else: - d = N.asarray(d) - if d.shape[0] != 2: - raise ValueError, "if d is not an integer, expecting a jx2 array with first row indicating order \ - of derivative, second row coefficient in front." - if d.shape == (2,): - d.shape = (2,1) - self.g = 0 - for i in range(d.shape[1]): + d = N.squeeze(d) + if N.asarray(d).shape == (): + self.g = _bspline.gram(self.tau, self.m, int(d), int(d)) + else: + d = N.asarray(d) + if d.shape[0] != 2: + raise ValueError, "if d is not an integer, expecting a jx2 \ + array with first row indicating order \ + of derivative, second row coefficient in front." + if d.shape == (2,): + d.shape = (2,1) + self.g = 0 + for i in range(d.shape[1]): for j in range(d.shape[1]): - self.g += d[1,i]* d[1,j] * _bspline.gram(self.tau, self.m, int(d[0,i]), int(d[0,j])) - self.g = self.g.T - self.d = d - return N.nan_to_num(self.g) + self.g += d[1,i]* d[1,j] * _bspline.gram(self.tau, self.m, int(d[0,i]), int(d[0,j])) + self.g = self.g.T + self.d = d + return N.nan_to_num(self.g) class SmoothingSpline(BSpline): @@ -185,10 +187,10 @@ default_pen = 1.0e-03 def smooth(self, y, x=None, weights=None): - if self.method == "target_df": - self.fit_target_df(y, x=x, weights=weights, df=self.target_df) - elif self.method == "optimize_gcv": - self.fit_optimize_gcv(y, x=x, weights=weights) + if self.method == "target_df": + self.fit_target_df(y, x=x, weights=weights, df=self.target_df) + elif self.method == "optimize_gcv": + self.fit_optimize_gcv(y, x=x, weights=weights) def fit(self, y, x=None, weights=None, pen=0.): banded = True @@ -200,20 +202,21 @@ banded = False if x.shape != y.shape: - raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots' + raise ValueError, 'x and y shape do not agree, by default x are \ + the Bspline\'s internal knots' - bt = self.basis(x) - if pen >= self.penmax: - pen = self.penmax + bt = self.basis(x) + if pen >= self.penmax: + pen = self.penmax if weights is not None: - self.weights = weights - else: - self.weights = 1. + self.weights = weights + else: + self.weights = 1. - _w = N.sqrt(self.weights) - bt *= _w + _w = N.sqrt(self.weights) + bt *= _w # throw out rows with zeros (this happens at boundary points!) @@ -222,109 +225,110 @@ bt = bt[:,mask] y = y[mask] - self.df_total = y.shape[0] + self.df_total = y.shape[0] bty = N.dot(bt, _w * y) - self.N = y.shape[0] + self.N = y.shape[0] if not banded: self.btb = N.dot(bt, bt.T) - _g = band2array(self.g, lower=1, symmetric=True) + _g = band2array(self.g, lower=1, symmetric=True) self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3] - self.rank = min(self.rank, self.btb.shape[0]) + self.rank = min(self.rank, self.btb.shape[0]) else: self.btb = N.zeros(self.g.shape, N.float64) nband, nbasis = self.g.shape for i in range(nbasis): for k in range(min(nband, nbasis-i)): - self.btb[k,i] = (bt[i] * bt[i+k]).sum() + self.btb[k,i] = (bt[i] * bt[i+k]).sum() - bty.shape = (1,bty.shape[0]) + bty.shape = (1,bty.shape[0]) self.chol, self.coef = solveh_banded(self.btb + pen*self.g, bty, lower=1) - self.coef = N.squeeze(self.coef) - self.resid = y * self.weights - N.dot(self.coef, bt) - self.pen = pen + self.coef = N.squeeze(self.coef) + self.resid = y * self.weights - N.dot(self.coef, bt) + self.pen = pen def gcv(self): - """ - Generalized cross-validation score of current fit. - """ + """ + Generalized cross-validation score of current fit. + """ - norm_resid = (self.resid**2).sum() - return norm_resid / (self.df_total - self.trace()) + norm_resid = (self.resid**2).sum() + return norm_resid / (self.df_total - self.trace()) def df_resid(self): - """ - self.N - self.trace() + """ + self.N - self.trace() - where self.N is the number of observations of last fit. - """ + where self.N is the number of observations of last fit. + """ - return self.N - self.trace() + return self.N - self.trace() def df_fit(self): - """ - = self.trace() + """ + = self.trace() - How many degrees of freedom used in the fit? - """ - return self.trace() + How many degrees of freedom used in the fit? + """ + return self.trace() def trace(self): - """ - Trace of the smoothing matrix S(pen) - """ + """ + Trace of the smoothing matrix S(pen) + """ - if self.pen > 0: - _invband = _bspline.invband(self.chol.copy()) - tr = _trace_symbanded(_invband, self.btb, lower=1) - return tr - else: - return self.rank + if self.pen > 0: + _invband = _bspline.invband(self.chol.copy()) + tr = _trace_symbanded(_invband, self.btb, lower=1) + return tr + else: + return self.rank def fit_target_df(self, y, x=None, df=None, weights=None, tol=1.0e-03): - """ - Fit smoothing spline with approximately df degrees of freedom + """ + Fit smoothing spline with approximately df degrees of freedom used in the fit, i.e. so that self.trace() is approximately df. - In general, df must be greater than the dimension of the null space - of the Gram inner product. For cubic smoothing splines, this means - that df > 2. + In general, df must be greater than the dimension of the null space + of the Gram inner product. For cubic smoothing splines, this means + that df > 2. """ - df = df or self.target_df + df = df or self.target_df - apen, bpen = 0, 1.0e-03 - olddf = y.shape[0] - self.m + apen, bpen = 0, 1.0e-03 + olddf = y.shape[0] - self.m - if hasattr(self, "pen"): - self.fit(y, x=x, weights=weights, pen=self.pen) - curdf = self.trace() - if N.fabs(curdf - df) / df < tol: - return - if curdf > df: - apen, bpen = self.pen, 2 * self.pen - else: - apen, bpen = 0., self.pen + if hasattr(self, "pen"): + self.fit(y, x=x, weights=weights, pen=self.pen) + curdf = self.trace() + if N.fabs(curdf - df) / df < tol: + return + if curdf > df: + apen, bpen = self.pen, 2 * self.pen + else: + apen, bpen = 0., self.pen - while True: - curpen = 0.5 * (apen + bpen) - self.fit(y, x=x, weights=weights, pen=curpen) - curdf = self.trace() - if curdf > df: - apen, bpen = curpen, 2 * curpen - else: - apen, bpen = apen, curpen - if apen >= self.penmax: - raise ValueError, "penalty too large, try setting penmax higher or decreasing df" - if N.fabs(curdf - df) / df < tol: - break + while True: + curpen = 0.5 * (apen + bpen) + self.fit(y, x=x, weights=weights, pen=curpen) + curdf = self.trace() + if curdf > df: + apen, bpen = curpen, 2 * curpen + else: + apen, bpen = apen, curpen + if apen >= self.penmax: + raise ValueError, "penalty too large, try setting penmax \ + higher or decreasing df" + if N.fabs(curdf - df) / df < tol: + break def fit_optimize_gcv(self, y, x=None, weights=None, tol=1.0e-03, bracket=(0,1.0e-03)): - """ + """ Fit smoothing spline trying to optimize GCV. Try to find a bracketing interval for scipy.optimize.golden @@ -334,15 +338,15 @@ sometimes difficult to find a bracketing interval. """ - - def _gcv(pen, y, x): - self.fit(y, x=x, pen=N.exp(pen)) - a = self.gcv() - return a - a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol) + def _gcv(pen, y, x): + self.fit(y, x=x, pen=N.exp(pen)) + a = self.gcv() + return a + a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol) + def band2array(a, lower=0, symmetric=False, hermitian=False): """ Take an upper or lower triangular banded matrix and return a matrix using @@ -365,7 +369,7 @@ _a += _b if symmetric and j > 0: _a += _b.T elif hermitian and j > 0: _a += _b.conjugate().T - _a = _a.T + _a = _a.T return _a Modified: trunk/Lib/sandbox/models/family/family.py =================================================================== --- trunk/Lib/sandbox/models/family/family.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/family/family.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -14,7 +14,7 @@ def weights(self, mu): - """ + """ Weights for IRLS step. """ Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/formula.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -54,9 +54,9 @@ # Namespace in which self.name will be looked up in, if needed def _get_namespace(self): - if isinstance(self.__namespace, N.ndarray): - return self.__namespace - else: return self.__namespace or default_namespace + if isinstance(self.__namespace, N.ndarray): + return self.__namespace + else: return self.__namespace or default_namespace def _set_namespace(self, value): self.__namespace = value def _del_namespace(self): del self.__namespace @@ -105,13 +105,13 @@ def __call__(self, *args, **kw): """ Return the columns associated to self in a design matrix. - If the term has no 'func' attribute, it returns + If the term has no 'func' attribute, it returns - self.namespace[self.termname] + self.namespace[self.termname] - else, it returns - - self.func(*args, **kw) + else, it returns + + self.func(*args, **kw) """ @@ -156,8 +156,8 @@ def get_columns(self, *args, **kw): """ - Calling function for factor instance. - """ + Calling function for factor instance. + """ v = self.namespace[self._name] while True: @@ -181,9 +181,9 @@ def values(self, *args, **kw): """ - Return the keys of the factor, rather than the columns of the design - matrix. - """ + Return the keys of the factor, rather than the columns of the design + matrix. + """ del(self.func) val = self(*args, **kw) @@ -204,7 +204,7 @@ When adding \'intercept\' to a factor, this just returns - formula(self, namespace=self.namespace) + formula(self, namespace=self.namespace) """ @@ -237,8 +237,8 @@ __names = self.names() _names = ['%s-%s' % (__names[keep[i]], __names[reference]) for i in range(len(keep))] value = quantitative(_names, func=self, - termname='%s:maineffect' % self.termname, - transform=maineffect_func) + termname='%s:maineffect' % self.termname, + transform=maineffect_func) value.namespace = self.namespace return value @@ -269,9 +269,9 @@ def __call__(self, *args, **kw): """ - A quantitative is just like term, except there is an additional - transformation: self.transform. - """ + A quantitative is just like term, except there is an additional + transformation: self.transform. + """ return self.transform(term.__call__(self, *args, **kw)) class formula(object): @@ -287,9 +287,9 @@ """ def _get_namespace(self): - if isinstance(self.__namespace, N.ndarray): - return self.__namespace - else: return self.__namespace or default_namespace + if isinstance(self.__namespace, N.ndarray): + return self.__namespace + else: return self.__namespace or default_namespace def _set_namespace(self, value): self.__namespace = value def _del_namespace(self): del self.__namespace @@ -396,11 +396,11 @@ """ if not isinstance(query_term, formula): - if type(query_term) == type("name"): - try: query = self[query_term] - except: return False - elif isinstance(query_term, term): - return query_term.termname in self.termnames() + if type(query_term) == type("name"): + try: query = self[query_term] + except: return False + elif isinstance(query_term, term): + return query_term.termname in self.termnames() elif len(query_term.terms) == 1: query_term = query_term.terms[0] return query_term.termname in self.termnames() @@ -525,7 +525,7 @@ sumterms.namespace = self.namespace _term = quantitative(names, func=sumterms, termname=termname, - transform=product_func) + transform=product_func) _term.namespace = self.namespace Modified: trunk/Lib/sandbox/models/glm.py =================================================================== --- trunk/Lib/sandbox/models/glm.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/glm.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -29,12 +29,12 @@ return self.family.deviance(Y, results.mu) / scale def next(self): - results = self.results; Y = self.Y + results = self.results; Y = self.Y self.weights = self.family.weights(results.mu) self.initialize(self.design) Z = results.predict + self.family.link.deriv(results.mu) * (Y - results.mu) newresults = wls_model.fit(self, Z) - newresults.Y = Y + newresults.Y = Y newresults.mu = self.family.link.inverse(newresults.predict) self.iter += 1 return newresults Modified: trunk/Lib/sandbox/models/info.py =================================================================== --- trunk/Lib/sandbox/models/info.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/info.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -2,6 +2,16 @@ Statistical models ================== +This module contains a several linear statistical models +- model formulae as in R (to some extent) +- OLS (ordinary least square regression) +- WLS (weighted least square regression) +- ARp regression +- GLMS (generalized linear models) +- robust linear models using M estimators (with a number of standard default robust norms as in R's rlm) +- robust scale estimates (MAD, Huber's proposal 2). +- mixed effects models +- generalized additive models (gam) """ depends = ['weave', Modified: trunk/Lib/sandbox/models/model.py =================================================================== --- trunk/Lib/sandbox/models/model.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/model.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -1,6 +1,6 @@ import numpy as N from numpy.linalg import inv -import scipy.optimize +#from scipy import optimize from scipy.sandbox.models.contrast import ContrastResults from scipy.sandbox.models.utils import recipr @@ -64,10 +64,10 @@ raise NotImplementedError def newton(self, theta): - raise NotImplementedError + raise NotImplementedError # def f(theta): # return -self.logL(theta) -# self.results = scipy.optimize.fmin(f, theta) +# self.results = optimize.fmin(f, theta) class LikelihoodModelResults: Modified: trunk/Lib/sandbox/models/regression.py =================================================================== --- trunk/Lib/sandbox/models/regression.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/regression.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -60,10 +60,10 @@ self.initialize(design) def initialize(self, design): - """ - Set design for model, prewhitening design matrix and precomputing - covariance of coefficients (up to scale factor in front). - """ + """ + Set design for model, prewhitening design matrix and precomputing + covariance of coefficients (up to scale factor in front). + """ self.design = design self.wdesign = self.whiten(design) @@ -73,9 +73,9 @@ self.df_resid = self.wdesign.shape[0] - utils.rank(self.design) def whiten(self, Y): - """ - OLS model whitener does nothing: returns Y. - """ + """ + OLS model whitener does nothing: returns Y. + """ return Y @@ -95,7 +95,7 @@ def fit(self, Y): """ Full fit of the model including estimate of covariance matrix, - (whitened) residuals and scale. + (whitened) residuals and scale. """ @@ -160,80 +160,80 @@ """ def __init__(self, design, rho): - if type(rho) is type(1): - self.order = rho - self.rho = N.zeros(self.order, N.float64) - else: - self.rho = N.squeeze(N.asarray(rho)) - if len(self.rho.shape) not in [0,1]: - raise ValueError, "AR parameters must be a scalar or a vector" - if self.rho.shape == (): - self.rho.shape = (1,) - self.order = self.rho.shape[0] + if type(rho) is type(1): + self.order = rho + self.rho = N.zeros(self.order, N.float64) + else: + self.rho = N.squeeze(N.asarray(rho)) + if len(self.rho.shape) not in [0,1]: + raise ValueError, "AR parameters must be a scalar or a vector" + if self.rho.shape == (): + self.rho.shape = (1,) + self.order = self.rho.shape[0] ols_model.__init__(self, design) def iterative_fit(self, Y, niter=3): - """ - Perform an iterative two-stage procedure to estimate AR(p) - paramters and regression coefficients simultaneously. - """ - for i in range(niter): - self.initialize(self.design) - results = self.fit(Y) - self.rho, _ = self.yule_walker(Y - results.predict) + """ + Perform an iterative two-stage procedure to estimate AR(p) + paramters and regression coefficients simultaneously. + """ + for i in range(niter): + self.initialize(self.design) + results = self.fit(Y) + self.rho, _ = self.yule_walker(Y - results.predict) def whiten(self, X): - """ - Whiten a series of columns according to an AR(p) - covariance structure. + """ + Whiten a series of columns according to an AR(p) + covariance structure. - """ - X = N.asarray(X, N.float64) - _X = X.copy() - for i in range(self.order): - _X[(i+1):] = _X[(i+1):] - self.rho[i] * X[0:-(i+1)] - return _X + """ + X = N.asarray(X, N.float64) + _X = X.copy() + for i in range(self.order): + _X[(i+1):] = _X[(i+1):] - self.rho[i] * X[0:-(i+1)] + return _X def yule_walker(self, X, method="unbiased", df=None): - """ - Estimate AR(p) parameters from a sequence X using Yule-Walker equation. - Method can be "unbiased" or "MLE" and this determines - denominator in estimate of ACF at lag k. If "MLE", the denominator is - n=r.shape[0], if "unbiased" the denominator is n-k. + """ + Estimate AR(p) parameters from a sequence X using Yule-Walker equation. + Method can be "unbiased" or "MLE" and this determines + denominator in estimate of ACF at lag k. If "MLE", the denominator is + n=r.shape[0], if "unbiased" the denominator is n-k. - If df is supplied, then it is assumed the X has df degrees of - freedom rather than n. + If df is supplied, then it is assumed the X has df degrees of + freedom rather than n. - See, for example: + See, for example: http://en.wikipedia.org/wiki/Autoregressive_moving_average_model - """ - - method = str(method).lower() - if method not in ["unbiased", "mle"]: - raise ValueError, "ACF estimation method must be 'unbiased' \ - or 'MLE'" - X = N.asarray(X, N.float64) - X -= X.mean() - n = df or X.shape[0] + """ + + method = str(method).lower() + if method not in ["unbiased", "mle"]: + raise ValueError, "ACF estimation method must be 'unbiased' \ + or 'MLE'" + X = N.asarray(X, N.float64) + X -= X.mean() + n = df or X.shape[0] - if method == "unbiased": - denom = lambda k: n - k - else: - denom = lambda k: n + if method == "unbiased": + denom = lambda k: n - k + else: + denom = lambda k: n - if len(X.shape) != 1: - raise ValueError, "expecting a vector to estimate AR parameters" - r = N.zeros(self.order+1, N.float64) - r[0] = (X**2).sum() / denom(0) - for k in range(1,self.order+1): - r[k] = (X[0:-k]*X[k:]).sum() / denom(k) - R = toeplitz(r[:-1]) + if len(X.shape) != 1: + raise ValueError, "expecting a vector to estimate AR parameters" + r = N.zeros(self.order+1, N.float64) + r[0] = (X**2).sum() / denom(0) + for k in range(1,self.order+1): + r[k] = (X[0:-k]*X[k:]).sum() / denom(k) + R = toeplitz(r[:-1]) - rho = L.solve(R, r[1:]) - sigmasq = r[0] - (r[1:]*rho).sum() - return rho, N.sqrt(sigmasq) + rho = L.solve(R, r[1:]) + sigmasq = r[0] - (r[1:]*rho).sum() + return rho, N.sqrt(sigmasq) class wls_model(ols_model): """ @@ -272,11 +272,11 @@ ols_model.__init__(self, design) def whiten(self, X): - """ - Whitener for WLS model, multiplies by sqrt(self.weights) - """ + """ + Whitener for WLS model, multiplies by sqrt(self.weights) + """ - X = N.asarray(X, N.float64) + X = N.asarray(X, N.float64) if X.ndim == 1: return X * N.sqrt(self.weights) Modified: trunk/Lib/sandbox/models/smoothers.py =================================================================== --- trunk/Lib/sandbox/models/smoothers.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/smoothers.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -26,44 +26,44 @@ """ def df_fit(self): - """ - Degrees of freedom used in the fit. - """ - return self.order + 1 + """ + Degrees of freedom used in the fit. + """ + return self.order + 1 def df_resid(self): - """ - Residual degrees of freedom from last fit. - """ - return self.N - self.order - 1 + """ + Residual degrees of freedom from last fit. + """ + return self.N - self.order - 1 def __init__(self, order, x=None): - self.order = order - self.coef = N.zeros((order+1,), N.float64) - if x is not None: - self.X = N.array([x**i for i in range(order+1)]).T + self.order = order + self.coef = N.zeros((order+1,), N.float64) + if x is not None: + self.X = N.array([x**i for i in range(order+1)]).T def __call__(self, x=None): - if x is not None: - X = N.array([(x**i) for i in range(self.order+1)]) - else: X = self.X + if x is not None: + X = N.array([(x**i) for i in range(self.order+1)]) + else: X = self.X return N.squeeze(N.dot(X.T, self.coef)) def fit(self, y, x=None, weights=None): - self.N = y.shape[0] - if weights is None: - weights = 1 - _w = N.sqrt(weights) - if x is None: - if not hasattr(self, "X"): - raise ValueError, "x needed to fit poly_smoother" - else: - self.X = N.array([(x**i) for i in range(self.order+1)]) + self.N = y.shape[0] + if weights is None: + weights = 1 + _w = N.sqrt(weights) + if x is None: + if not hasattr(self, "X"): + raise ValueError, "x needed to fit poly_smoother" + else: + self.X = N.array([(x**i) for i in range(self.order+1)]) - X = self.X * _w + X = self.X * _w - _y = y * _w - self.coef = N.dot(L.pinv(X).T, _y) + _y = y * _w + self.coef = N.dot(L.pinv(X).T, _y) class smoothing_spline(bspline): @@ -81,16 +81,16 @@ if x.shape != y.shape: raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots' - bt = self.basis(x) - if pen >= self.penmax: - pen = self.penmax + bt = self.basis(x) + if pen >= self.penmax: + pen = self.penmax if weights is None: - weights = N.array(1.) + weights = N.array(1.) - wmean = weights.mean() - _w = N.sqrt(weights / wmean) - bt *= _w + wmean = weights.mean() + _w = N.sqrt(weights / wmean) + bt *= _w # throw out rows with zeros (this happens at boundary points!) @@ -99,69 +99,69 @@ bt = bt[:,mask] y = y[mask] - self.df_total = y.shape[0] + self.df_total = y.shape[0] - if bt.shape[1] != y.shape[0]: - raise ValueError, "some x values are outside range of B-spline knots" + if bt.shape[1] != y.shape[0]: + raise ValueError, "some x values are outside range of B-spline knots" bty = N.dot(bt, _w * y) - self.N = y.shape[0] + self.N = y.shape[0] if not banded: self.btb = N.dot(bt, bt.T) - _g = band2array(self.g, lower=1, symmetric=True) + _g = band2array(self.g, lower=1, symmetric=True) self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3] - self.rank = min(self.rank, self.btb.shape[0]) + self.rank = min(self.rank, self.btb.shape[0]) else: self.btb = N.zeros(self.g.shape, N.float64) nband, nbasis = self.g.shape for i in range(nbasis): for k in range(min(nband, nbasis-i)): - self.btb[k,i] = (bt[i] * bt[i+k]).sum() + self.btb[k,i] = (bt[i] * bt[i+k]).sum() - bty.shape = (1,bty.shape[0]) + bty.shape = (1,bty.shape[0]) self.chol, self.coef = solveh_banded(self.btb + - pen*self.g, - bty, lower=1) + pen*self.g, + bty, lower=1) - self.coef = N.squeeze(self.coef) - self.resid = N.sqrt(wmean) * (y * _w - N.dot(self.coef, bt)) - self.pen = pen + self.coef = N.squeeze(self.coef) + self.resid = N.sqrt(wmean) * (y * _w - N.dot(self.coef, bt)) + self.pen = pen def gcv(self): - """ - Generalized cross-validation score of current fit. - """ + """ + Generalized cross-validation score of current fit. + """ - norm_resid = (self.resid**2).sum() - return norm_resid / (self.df_total - self.trace()) + norm_resid = (self.resid**2).sum() + return norm_resid / (self.df_total - self.trace()) def df_resid(self): - """ - self.N - self.trace() + """ + self.N - self.trace() - where self.N is the number of observations of last fit. - """ - - return self.N - self.trace() + where self.N is the number of observations of last fit. + """ + + return self.N - self.trace() def df_fit(self): - """ - = self.trace() + """ + = self.trace() - How many degrees of freedom used in the fit? - """ - return self.trace() + How many degrees of freedom used in the fit? + """ + return self.trace() def trace(self): - """ - Trace of the smoothing matrix S(pen) - """ + """ + Trace of the smoothing matrix S(pen) + """ - if self.pen > 0: - _invband = _bspline.invband(self.chol.copy()) - tr = _trace_symbanded(_invband, self.btb, lower=1) - return tr - else: - return self.rank + if self.pen > 0: + _invband = _bspline.invband(self.chol.copy()) + tr = _trace_symbanded(_invband, self.btb, lower=1) + return tr + else: + return self.rank class smoothing_spline_fixeddf(smoothing_spline): @@ -178,34 +178,34 @@ target_df = 5 def __init__(self, knots, order=4, coef=None, M=None, target_df=None): - if target_df is not None: - self.target_df = target_df - bspline.__init__(self, knots, order=order, coef=coef, M=M) - self.target_reached = False + if target_df is not None: + self.target_df = target_df + bspline.__init__(self, knots, order=order, coef=coef, M=M) + self.target_reached = False def fit(self, y, x=None, df=None, weights=None, tol=1.0e-03): - df = df or self.target_df + df = df or self.target_df - apen, bpen = 0, 1.0e-03 - olddf = y.shape[0] - self.m + apen, bpen = 0, 1.0e-03 + olddf = y.shape[0] - self.m - if not self.target_reached: - while True: - curpen = 0.5 * (apen + bpen) - smoothing_spline.fit(self, y, x=x, weights=weights, pen=curpen) - curdf = self.trace() - if curdf > df: - apen, bpen = curpen, 2 * curpen - else: - apen, bpen = apen, curpen - if apen >= self.penmax: - raise ValueError, "penalty too large, try setting penmax higher or decreasing df" - if N.fabs(curdf - df) / df < tol: - self.target_reached = True - break - else: - smoothing_spline.fit(self, y, x=x, weights=weights, pen=self.pen) + if not self.target_reached: + while True: + curpen = 0.5 * (apen + bpen) + smoothing_spline.fit(self, y, x=x, weights=weights, pen=curpen) + curdf = self.trace() + if curdf > df: + apen, bpen = curpen, 2 * curpen + else: + apen, bpen = apen, curpen + if apen >= self.penmax: + raise ValueError, "penalty too large, try setting penmax higher or decreasing df" + if N.fabs(curdf - df) / df < tol: + self.target_reached = True + break + else: + smoothing_spline.fit(self, y, x=x, weights=weights, pen=self.pen) class smoothing_spline_gcv(smoothing_spline): @@ -221,14 +221,14 @@ """ def fit(self, y, x=None, weights=None, tol=1.0e-03, - bracket=(0,1.0e-03)): + bracket=(0,1.0e-03)): - def _gcv(pen, y, x): - smoothing_spline.fit(y, x=x, pen=N.exp(pen), weights=weights) - a = self.gcv() - return a + def _gcv(pen, y, x): + smoothing_spline.fit(y, x=x, pen=N.exp(pen), weights=weights) + a = self.gcv() + return a - a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol) + a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol) def _trace_symbanded(a,b, lower=0): """ @@ -236,11 +236,11 @@ """ if lower: - t = _zero_triband(a * b, lower=1) - return t[0].sum() + 2 * t[1:].sum() + t = _zero_triband(a * b, lower=1) + return t[0].sum() + 2 * t[1:].sum() else: - t = _zero_triband(a * b, lower=0) - return t[-1].sum() + 2 * t[:-1].sum() + t = _zero_triband(a * b, lower=0) + return t[-1].sum() + 2 * t[:-1].sum() @@ -251,7 +251,7 @@ nrow, ncol = a.shape if lower: - for i in range(nrow): a[i,(ncol-i):] = 0. + for i in range(nrow): a[i,(ncol-i):] = 0. else: - for i in range(nrow): a[i,0:i] = 0. + for i in range(nrow): a[i,0:i] = 0. return a Modified: trunk/Lib/sandbox/models/tests/test_formula.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-21 21:42:48 UTC (rev 2452) +++ trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-22 00:02:48 UTC (rev 2453) @@ -57,43 +57,43 @@ self.formula = self.terms[0] for i in range(1, 10): self.formula += self.terms[i] - self.formula.namespace = self.namespace + self.formula.namespace = self.namespace def test_namespace(self): - space1 = {'X':N.arange(50), 'Y':N.arange(50)*2} - space2 = {'X':N.arange(20), 'Y':N.arange(20)*2} - X = formula.term('X') - Y = formula.term('Y') + space1 = {'X':N.arange(50), 'Y':N.arange(50)*2} + space2 = {'X':N.arange(20), 'Y':N.arange(20)*2} + X = formula.term('X') + Y = formula.term('Y') - X.namespace = space1 - assert_almost_equal(X(), N.arange(50)) + X.namespace = space1 + assert_almost_equal(X(), N.arange(50)) - Y.namespace = space2 - assert_almost_equal(Y(), N.arange(20)*2) + Y.namespace = space2 + assert_almost_equal(Y(), N.arange(20)*2) - f = X + Y + f = X + Y - f.namespace = space1 - self.assertEqual(f().shape, (2,50)) - assert_almost_equal(Y(), N.arange(50)*2) - assert_almost_equal(X(), N.arange(50)) + f.namespace = space1 + self.assertEqual(f().shape, (2,50)) + assert_almost_equal(Y(), N.arange(50)*2) + assert_almost_equal(X(), N.arange(50)) - f.namespace = space2 - self.assertEqual(f().shape, (2,20)) - assert_almost_equal(Y(), N.arange(20)*2) - assert_almost_equal(X(), N.arange(20)) + f.namespace = space2 + self.assertEqual(f().shape, (2,20)) + assert_almost_equal(Y(), N.arange(20)*2) + assert_almost_equal(X(), N.arange(20)) def test_termcolumns(self): t1 = formula.term("A") t2 = formula.term("B") f = t1 + t2 + t1 * t2 - def other(val): - return N.array([3.2*val,4.342*val**2, 5.234*val**3]) - q = formula.quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other) - f += q - q.namespace = f.namespace = self.formula.namespace - assert_almost_equal(q(), f()[f.termcolumns(q)]) + def other(val): + return N.array([3.2*val,4.342*val**2, 5.234*val**3]) + q = formula.quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other) + f += q + q.namespace = f.namespace = self.formula.namespace + assert_almost_equal(q(), f()[f.termcolumns(q)]) def test_str(self): @@ -111,27 +111,27 @@ prod = self.terms[0] * self.terms[2] self.formula += prod x = self.formula.design() - p = self.formula['A*C'] + p = self.formula['A*C'] col = self.formula.termcolumns(prod, dict=False) assert_almost_equal(N.squeeze(x[:,col]), self.X[:,0] * self.X[:,2]) assert_almost_equal(N.squeeze(p()), self.X[:,0] * self.X[:,2]) - + def test_intercept1(self): prod = self.terms[0] * self.terms[2] self.formula += formula.I - icol = self.formula.names().index('intercept') - assert_almost_equal(self.formula()[icol], N.ones((40,))) + icol = self.formula.names().index('intercept') + assert_almost_equal(self.formula()[icol], N.ones((40,))) def test_intercept2(self): prod = self.terms[0] * self.terms[2] self.formula += formula.I - icol = self.formula.names().index('intercept') - assert_almost_equal(self.formula()[icol], N.ones((40,))) + icol = self.formula.names().index('intercept') + assert_almost_equal(self.formula()[icol], N.ones((40,))) def test_intercept3(self): prod = self.terms[0] * formula.I - prod.namespace = self.formula.namespace - assert_almost_equal(N.squeeze(prod()), self.terms[0]()) + prod.namespace = self.formula.namespace + assert_almost_equal(N.squeeze(prod()), self.terms[0]()) @@ -163,53 +163,53 @@ resid = N.identity(40) - P self.namespace['noise'] = N.transpose(N.dot(resid, R.standard_normal((40,5)))) terms = dummy + self.terms[2] - terms.namespace = self.formula.namespace + terms.namespace = self.formula.namespace c = contrast.Contrast(terms, self.formula) c.getmatrix() self.assertEquals(c.matrix.shape, (10,)) def test_power(self): - t = self.terms[2] - t2 = t**2 - t.namespace = t2.namespace = self.formula.namespace - assert_almost_equal(t()**2, t2()) + t = self.terms[2] + t2 = t**2 + t.namespace = t2.namespace = self.formula.namespace + assert_almost_equal(t()**2, t2()) def test_quantitative(self): - t = self.terms[2] - sint = formula.quantitative('t', func=t, transform=N.sin) - t.namespace = sint.namespace = self.formula.namespace - assert_almost_equal(N.sin(t()), sint()) + t = self.terms[2] + sint = formula.quantitative('t', func=t, transform=N.sin) + t.namespace = sint.namespace = self.formula.namespace + assert_almost_equal(N.sin(t()), sint()) def test_factor1(self): - f = ['a','b','c']*10 - fac = formula.factor('ff', set(f)) - fac.namespace = {'ff':f} - self.assertEquals(list(fac.values()), f) + f = ['a','b','c']*10 + fac = formula.factor('ff', set(f)) + fac.namespace = {'ff':f} + self.assertEquals(list(fac.values()), f) def test_factor2(self): - f = ['a','b','c']*10 - fac = formula.factor('ff', set(f)) - fac.namespace = {'ff':f} - self.assertEquals(fac().shape, (3,30)) + f = ['a','b','c']*10 + fac = formula.factor('ff', set(f)) + fac.namespace = {'ff':f} + self.assertEquals(fac().shape, (3,30)) def test_factor3(self): - f = ['a','b','c']*10 - fac = formula.factor('ff', set(f)) - fac.namespace = {'ff':f} - m = fac.main_effect(reference=1) - self.assertEquals(m().shape, (2,30)) + f = ['a','b','c']*10 + fac = formula.factor('ff', set(f)) + fac.namespace = {'ff':f} + m = fac.main_effect(reference=1) + self.assertEquals(m().shape, (2,30)) def test_factor4(self): - f = ['a','b','c']*10 - fac = formula.factor('ff', set(f)) - fac.namespace = {'ff':f} - m = fac.main_effect(reference=2) - r = N.array([N.identity(3)]*10) - r.shape = (30,3) - r = r.T - _m = N.array([r[0]-r[2],r[1]-r[2]]) - assert_almost_equal(_m, m()) + f = ['a','b','c']*10 + fac = formula.factor('ff', set(f)) + fac.namespace = {'ff':f} + m = fac.main_effect(reference=2) + r = N.array([N.identity(3)]*10) + r.shape = (30,3) + r = r.T + _m = N.array([r[0]-r[2],r[1]-r[2]]) + assert_almost_equal(_m, m()) def test_contrast4(self): From scipy-svn at scipy.org Thu Dec 21 19:11:08 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 18:11:08 -0600 (CST) Subject: [Scipy-svn] r2454 - trunk/Lib/sandbox/models/tests Message-ID: <20061222001108.0EFAB39C13F@new.scipy.org> Author: jarrod.millman Date: 2006-12-21 18:11:06 -0600 (Thu, 21 Dec 2006) New Revision: 2454 Modified: trunk/Lib/sandbox/models/tests/test_formula.py Log: cleaning up imports and removed use of deprecated string module Modified: trunk/Lib/sandbox/models/tests/test_formula.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-22 00:02:48 UTC (rev 2453) +++ trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-22 00:11:06 UTC (rev 2454) @@ -1,9 +1,12 @@ -import unittest, csv, os +import csv +import os +import unittest + import numpy as N import numpy.random as R import numpy.linalg as L -import scipy, string from numpy.testing import * +import scipy from scipy.sandbox.models import utils, formula, contrast @@ -50,7 +53,7 @@ self.namespace = {} self.terms = [] for i in range(10): - name = '%s' % string.uppercase[i] + name = '%s' % i.upper() self.namespace[name] = self.X[:,i] self.terms.append(formula.term(name)) From scipy-svn at scipy.org Thu Dec 21 19:11:41 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 18:11:41 -0600 (CST) Subject: [Scipy-svn] r2455 - trunk/Lib/sandbox/models/tests Message-ID: <20061222001141.1CA5839C13F@new.scipy.org> Author: jarrod.millman Date: 2006-12-21 18:11:39 -0600 (Thu, 21 Dec 2006) New Revision: 2455 Modified: trunk/Lib/sandbox/models/tests/__init__.py Log: absolute imports Modified: trunk/Lib/sandbox/models/tests/__init__.py =================================================================== --- trunk/Lib/sandbox/models/tests/__init__.py 2006-12-22 00:11:06 UTC (rev 2454) +++ trunk/Lib/sandbox/models/tests/__init__.py 2006-12-22 00:11:39 UTC (rev 2455) @@ -1,8 +1,9 @@ -import test_formula -import test_regression -import test_utils import unittest +from scipy.sandbox.models.tests import test_formula +from scipy.sandbox.models.tests import test_regression +from scipy.sandbox.models.tests import test_utils + def suite(): return unittest.TestSuite([test_formula.suite(), test_regression.suite(), From scipy-svn at scipy.org Thu Dec 21 19:12:07 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 18:12:07 -0600 (CST) Subject: [Scipy-svn] r2456 - trunk/Lib/sandbox/models/robust Message-ID: <20061222001207.5944139C181@new.scipy.org> Author: jarrod.millman Date: 2006-12-21 18:12:05 -0600 (Thu, 21 Dec 2006) New Revision: 2456 Modified: trunk/Lib/sandbox/models/robust/norms.py Log: removed unused import Modified: trunk/Lib/sandbox/models/robust/norms.py =================================================================== --- trunk/Lib/sandbox/models/robust/norms.py 2006-12-22 00:11:39 UTC (rev 2455) +++ trunk/Lib/sandbox/models/robust/norms.py 2006-12-22 00:12:05 UTC (rev 2456) @@ -1,5 +1,4 @@ import numpy as N -import scipy.interpolate class RobustNorm: def __call__(self, z): From scipy-svn at scipy.org Thu Dec 21 19:29:01 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 18:29:01 -0600 (CST) Subject: [Scipy-svn] r2457 - trunk/Lib/sandbox/models/tests Message-ID: <20061222002901.B9E5539C16D@new.scipy.org> Author: jarrod.millman Date: 2006-12-21 18:29:00 -0600 (Thu, 21 Dec 2006) New Revision: 2457 Modified: trunk/Lib/sandbox/models/tests/test_formula.py Log: revert back to using string module Modified: trunk/Lib/sandbox/models/tests/test_formula.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-22 00:12:05 UTC (rev 2456) +++ trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-22 00:29:00 UTC (rev 2457) @@ -1,5 +1,6 @@ import csv import os +import string import unittest import numpy as N @@ -53,7 +54,7 @@ self.namespace = {} self.terms = [] for i in range(10): - name = '%s' % i.upper() + name = '%s' % string.uppercase[i] self.namespace[name] = self.X[:,i] self.terms.append(formula.term(name)) From scipy-svn at scipy.org Thu Dec 21 19:38:16 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 18:38:16 -0600 (CST) Subject: [Scipy-svn] r2458 - trunk/Lib/sandbox/models/tests Message-ID: <20061222003816.4F9C439C16D@new.scipy.org> Author: jarrod.millman Date: 2006-12-21 18:38:13 -0600 (Thu, 21 Dec 2006) New Revision: 2458 Modified: trunk/Lib/sandbox/models/tests/test_formula.py Log: removed wildcard import Modified: trunk/Lib/sandbox/models/tests/test_formula.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-22 00:29:00 UTC (rev 2457) +++ trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-22 00:38:13 UTC (rev 2458) @@ -6,7 +6,7 @@ import numpy as N import numpy.random as R import numpy.linalg as L -from numpy.testing import * +from numpy.testing import assert_almost_equal, ScipyTestCase import scipy from scipy.sandbox.models import utils, formula, contrast From scipy-svn at scipy.org Thu Dec 21 19:44:07 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 18:44:07 -0600 (CST) Subject: [Scipy-svn] r2459 - trunk/Lib/sandbox/models/tests Message-ID: <20061222004407.2468B39C16D@new.scipy.org> Author: jarrod.millman Date: 2006-12-21 18:44:04 -0600 (Thu, 21 Dec 2006) New Revision: 2459 Modified: trunk/Lib/sandbox/models/tests/test_glm.py trunk/Lib/sandbox/models/tests/test_regression.py trunk/Lib/sandbox/models/tests/test_utils.py Log: removed more wildcard imports Modified: trunk/Lib/sandbox/models/tests/test_glm.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_glm.py 2006-12-22 00:38:13 UTC (rev 2458) +++ trunk/Lib/sandbox/models/tests/test_glm.py 2006-12-22 00:44:04 UTC (rev 2459) @@ -1,8 +1,10 @@ -import scipy.sandbox.models as S import unittest + +import numpy as N import numpy.random as R -import numpy as N -from numpy.testing import * +from numpy.testing import NumpyTest, NumpyTestCase + +import scipy.sandbox.models as S from scipy.sandbox.models.glm import model W = R.standard_normal Modified: trunk/Lib/sandbox/models/tests/test_regression.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_regression.py 2006-12-22 00:38:13 UTC (rev 2458) +++ trunk/Lib/sandbox/models/tests/test_regression.py 2006-12-22 00:44:04 UTC (rev 2459) @@ -1,7 +1,8 @@ import unittest + from numpy.random import standard_normal + from scipy.sandbox.models.regression import ols_model, ar_model -from numpy.testing import * W = standard_normal Modified: trunk/Lib/sandbox/models/tests/test_utils.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_utils.py 2006-12-22 00:38:13 UTC (rev 2458) +++ trunk/Lib/sandbox/models/tests/test_utils.py 2006-12-22 00:44:04 UTC (rev 2459) @@ -1,8 +1,10 @@ import unittest + import numpy as N import numpy.random as R +from numpy.testing import assert_almost_equal import scipy -from numpy.testing import * + from scipy.sandbox.models import utils class test_Utils(unittest.TestCase): From scipy-svn at scipy.org Thu Dec 21 22:45:46 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 21 Dec 2006 21:45:46 -0600 (CST) Subject: [Scipy-svn] r2460 - trunk/Lib/sandbox/models Message-ID: <20061222034546.EEF3F39C18C@new.scipy.org> Author: timl Date: 2006-12-21 21:45:42 -0600 (Thu, 21 Dec 2006) New Revision: 2460 Modified: trunk/Lib/sandbox/models/rlm.py Log: small refactoring of rlm.next() Modified: trunk/Lib/sandbox/models/rlm.py =================================================================== --- trunk/Lib/sandbox/models/rlm.py 2006-12-22 00:44:04 UTC (rev 2459) +++ trunk/Lib/sandbox/models/rlm.py 2006-12-22 03:45:42 UTC (rev 2460) @@ -29,7 +29,8 @@ results = self.results return self.M((results.Y - results.predict) / N.sqrt(results.scale)).sum() - def next(self, results): + def next(self): + results = self.results self.weights = self.M.weights((results.Y - results.predict) / N.sqrt(results.scale)) self.initialize(self.design) results = wls_model.fit(self, results.Y) @@ -71,6 +72,6 @@ self.scale = self.results.scale = self.estimate_scale(self.results) while self.cont(self.results): - self.results = self.next(self.results) + self.results = self.next() return self.results From scipy-svn at scipy.org Fri Dec 22 04:40:22 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 22 Dec 2006 03:40:22 -0600 (CST) Subject: [Scipy-svn] r2461 - trunk/Lib/sandbox/models Message-ID: <20061222094022.5E53639C131@new.scipy.org> Author: jarrod.millman Date: 2006-12-22 03:40:20 -0600 (Fri, 22 Dec 2006) New Revision: 2461 Modified: trunk/Lib/sandbox/models/cox.py Log: fix buglet Modified: trunk/Lib/sandbox/models/cox.py =================================================================== --- trunk/Lib/sandbox/models/cox.py 2006-12-22 03:45:42 UTC (rev 2460) +++ trunk/Lib/sandbox/models/cox.py 2006-12-22 09:40:20 UTC (rev 2461) @@ -154,7 +154,7 @@ raise NotImplementedError, 'Cox tie breaking method not implemented' else: raise NotImplementedError, 'tie breaking method not recognized' - return = N.array([score]) + return N.array([score]) def information(self, b, ties='breslow'): From scipy-svn at scipy.org Fri Dec 22 05:04:54 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 22 Dec 2006 04:04:54 -0600 (CST) Subject: [Scipy-svn] r2462 - trunk/Lib/sandbox/models Message-ID: <20061222100454.E555839C160@new.scipy.org> Author: jarrod.millman Date: 2006-12-22 04:04:50 -0600 (Fri, 22 Dec 2006) New Revision: 2462 Modified: trunk/Lib/sandbox/models/model.py Log: switch from camelcase to all_lower Modified: trunk/Lib/sandbox/models/model.py =================================================================== --- trunk/Lib/sandbox/models/model.py 2006-12-22 09:40:20 UTC (rev 2461) +++ trunk/Lib/sandbox/models/model.py 2006-12-22 10:04:50 UTC (rev 2462) @@ -69,7 +69,7 @@ # return -self.logL(theta) # self.results = optimize.fmin(f, theta) -class LikelihoodModelResults: +class likelihood_model_results: def __init__(self, beta, normalized_cov_beta=None, scale=1.): self.beta = beta From scipy-svn at scipy.org Fri Dec 22 09:39:36 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 22 Dec 2006 08:39:36 -0600 (CST) Subject: [Scipy-svn] r2463 - trunk/Lib/sandbox/timeseries Message-ID: <20061222143936.92C3739C19A@new.scipy.org> Author: mattknox_ca Date: 2006-12-22 08:39:32 -0600 (Fri, 22 Dec 2006) New Revision: 2463 Modified: trunk/Lib/sandbox/timeseries/tsdate.py Log: Modified: trunk/Lib/sandbox/timeseries/tsdate.py =================================================================== --- trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-22 10:04:50 UTC (rev 2462) +++ trunk/Lib/sandbox/timeseries/tsdate.py 2006-12-22 14:39:32 UTC (rev 2463) @@ -77,19 +77,21 @@ return tmpStr.replace("XXXX", str(self.quarter())) def __str__(self): + return self.strfmt(self.default_fmtstr()) + + def default_fmtstr(self): if self.freq in ("B", "D"): - return self.strfmt("%d-%b-%y") + return "%d-%b-%y" elif self.freq == "S": - return self.strfmt("%d-%b-%Y %H:%M:%S") + return "%d-%b-%Y %H:%M:%S" elif self.freq == "M": - return self.strfmt("%b-%Y") + return "%b-%Y" elif self.freq == "Q": - return self.strfmt("%Yq%q") + return "%Yq%q" elif self.freq == "A": - return self.strfmt("%Y") + return "%Y" else: - return self.strfmt("%d-%b-%y") - + return "%d-%b-%y" def __add__(self, other): if isinstance(other, Date): From scipy-svn at scipy.org Fri Dec 22 19:00:49 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 22 Dec 2006 18:00:49 -0600 (CST) Subject: [Scipy-svn] r2464 - in trunk/Lib/sandbox/maskedarray: . tests Message-ID: <20061223000049.17EEB39C190@new.scipy.org> Author: pierregm Date: 2006-12-22 18:00:43 -0600 (Fri, 22 Dec 2006) New Revision: 2464 Added: trunk/Lib/sandbox/maskedarray/timer_comparison.py Modified: trunk/Lib/sandbox/maskedarray/CHANGELOG trunk/Lib/sandbox/maskedarray/core.py trunk/Lib/sandbox/maskedarray/extras.py trunk/Lib/sandbox/maskedarray/tests/test_core.py trunk/Lib/sandbox/maskedarray/tests/test_extras.py trunk/Lib/sandbox/maskedarray/testutils.py Log: cf changelog Modified: trunk/Lib/sandbox/maskedarray/CHANGELOG =================================================================== --- trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-22 14:39:32 UTC (rev 2463) +++ trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-23 00:00:43 UTC (rev 2464) @@ -1,3 +1,13 @@ +#2006-12-22 : Core +# : - Optimized(?) default_/maximum_/minimum_fill_value +# : - Force __new__ to not return a MaskedArray, in order to ... +# : ... optimize __array_finalize__ +# : - Add the hard_mask flag to __new__ (*[False]*) +#2006-12-19 : Core +# : - Fixed a problem on _set_mask which prevented to set a mask to nomask +# : Extras +# : - Renamed compress2d to compress_rowcols +# : - Added dot #2006-12-18 : Extras # : - Added compress2d and mask_rowcols #2006-12-13 : - moved 'average' to 'extras' Modified: trunk/Lib/sandbox/maskedarray/core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/core.py 2006-12-22 14:39:32 UTC (rev 2463) +++ trunk/Lib/sandbox/maskedarray/core.py 2006-12-23 00:00:43 UTC (rev 2464) @@ -55,13 +55,12 @@ 'var', 'where', 'zeros'] - import sys import types import cPickle # import numpy -from numpy import bool_, complex_, float_, int_, object_ +from numpy import bool_, complex_, float_, int_, object_, str_ import numpy.core.umath as umath import numpy.core.fromnumeric as fromnumeric @@ -73,11 +72,16 @@ from numpy.lib.shape_base import expand_dims as n_expand_dims import warnings +import logging +logging.basicConfig(level=logging.WARNING, + format='%(levelname)s %(message)s',) + MaskType = bool_ nomask = MaskType(0) divide_tolerance = 1.e-35 +numpy.seterr(all='ignore') #####-------------------------------------------------------------------------- #---- --- Helper functions --- @@ -128,70 +132,67 @@ #####-------------------------------------------------------------------------- #---- --- Filling options --- #####-------------------------------------------------------------------------- -# Use single element arrays or scalars. -default_real_fill_value = 1.e20 -default_complex_fill_value = 1.e20 + 0.0j -default_character_fill_value = '-' -default_integer_fill_value = 999999 -default_object_fill_value = '?' +# b: boolean - c: complex - f: floats - i: integer - O: object - S: string +default_filler = {'b': True, + 'c' : 1.e20 + 0.0j, + 'f' : 1.e20, + 'i' : 999999, + 'O' : '?', + 'S' : 'N/A', + } +max_filler = {'b': False, + 'f' : -numeric.inf, + 'i' : -sys.maxint, + } +min_filler = {'b' : True, + 'f' : numeric.inf, + 'i' : sys.maxint, + } + def default_fill_value (obj): "Calculates the default fill value for an object `obj`." - if isinstance(obj, types.FloatType): - return default_real_fill_value - elif isinstance(obj, types.IntType) or isinstance(obj, types.LongType): - return default_integer_fill_value - elif isinstance(obj, types.StringType): - return default_character_fill_value - elif isinstance(obj, types.ComplexType): - return default_complex_fill_value - elif isinstance(obj, MaskedArray) or isinstance(obj, ndarray): - x = obj.dtype.char - if x in typecodes['Float']: - return default_real_fill_value - if x in typecodes['Integer']: - return default_integer_fill_value - if x in typecodes['Complex']: - return default_complex_fill_value - if x in typecodes['Character']: - return default_character_fill_value - if x in typecodes['UnsignedInteger']: - return umath.absolute(default_integer_fill_value) - return default_object_fill_value + if hasattr(obj,'dtype'): + return default_filler[obj.dtype.kind] + elif isinstance(obj, float): + return default_filler['f'] + elif isinstance(obj, int) or isinstance(obj, long): + return default_filler['i'] + elif isinstance(obj, str): + return default_filler['S'] + elif isinstance(obj, complex): + return default_filler['c'] else: - return default_object_fill_value + return default_filler['O'] def minimum_fill_value (obj): "Calculates the default fill value suitable for taking the minimum of `obj`." - if isinstance(obj, types.FloatType): - return numeric.inf - elif isinstance(obj, types.IntType) or isinstance(obj, types.LongType): - return sys.maxint - elif isinstance(obj, MaskedArray) or isinstance(obj, ndarray): - x = obj.dtype.char - if x in typecodes['Float']: - return numeric.inf - if x in typecodes['Integer']: - return sys.maxint - if x in typecodes['UnsignedInteger']: - return sys.maxint + if hasattr(obj, 'dtype'): + objtype = obj.dtype.kind + try: + return min_filler[objtype] + except KeyError: + raise TypeError, 'Unsuitable type for calculating minimum.' + elif isinstance(obj, float): + return min_filler['f'] + elif isinstance(obj, int) or isinstance(obj, long): + return min_filler['i'] else: raise TypeError, 'Unsuitable type for calculating minimum.' def maximum_fill_value (obj): "Calculates the default fill value suitable for taking the maximum of `obj`." - if isinstance(obj, types.FloatType): - return -numeric.inf - elif isinstance(obj, types.IntType) or isinstance(obj, types.LongType): - return -sys.maxint - elif isinstance(obj, MaskedArray) or isinstance(obj, ndarray): - x = obj.dtype.char - if x in typecodes['Float']: - return -numeric.inf - if x in typecodes['Integer']: - return -sys.maxint - if x in typecodes['UnsignedInteger']: - return 0 + if hasattr(obj, 'dtype'): + objtype = obj.dtype.kind + try: + return max_filler[objtype] + except KeyError: + raise TypeError, 'Unsuitable type for calculating maximum.' + elif isinstance(obj, float): + return max_filler['f'] + elif isinstance(obj, int) or isinstance(obj, long): + #TODO: Check what happens to 'UnisgnedInteger'! + return max_filler['i'] else: raise TypeError, 'Unsuitable type for calculating maximum.' @@ -567,17 +568,17 @@ except AttributeError: return False # -def make_mask(m, copy=False, flag=False): - """make_mask(m, copy=0, flag=0) +def make_mask(m, copy=False, small_mask=False): + """make_mask(m, copy=0, small_mask=0) Returns `m` as a mask, creating a copy if necessary or requested. The function can accept any sequence of integers or `nomask`. Does not check that contents must be 0s and 1s. -If `flag=True`, returns `nomask` if `m` contains no true elements. +If `small_mask=True`, returns `nomask` if `m` contains no true elements. :Parameters: - `m` (ndarray) : Mask. - `copy` (boolean, *[False]*) : Returns a copy of `m` if true. - - `flag` (boolean, *[False]*): Flattens mask to `nomask` if `m` is all false. + - `small_mask` (boolean, *[False]*): Flattens mask to `nomask` if `m` is all false. """ if m is nomask: return nomask @@ -593,7 +594,7 @@ else: result = numeric.array(filled(m, True), dtype=MaskType) # Bas les masques ! - if flag and not result.any(): + if small_mask and not result.any(): return nomask else: return result @@ -603,7 +604,7 @@ result = numeric.zeros(s, dtype=MaskType) return result -def mask_or (m1, m2, copy=False, flag=True): +def mask_or (m1, m2, copy=False, small_mask=True): """Returns the combination of two masks `m1` and `m2`. The masks are combined with the `logical_or` operator, treating `nomask` as false. The result may equal m1 or m2 if the other is nomask. @@ -611,15 +612,15 @@ :Parameters: - `m` (ndarray) : Mask. - `copy` (boolean, *[False]*) : Returns a copy of `m` if true. - - `flag` (boolean, *[False]*): Flattens mask to `nomask` if `m` is all false. + - `small_mask` (boolean, *[False]*): Flattens mask to `nomask` if `m` is all false. """ if m1 is nomask: - return make_mask(m2, copy=copy, flag=flag) + return make_mask(m2, copy=copy, small_mask=small_mask) if m2 is nomask: - return make_mask(m1, copy=copy, flag=flag) + return make_mask(m1, copy=copy, small_mask=small_mask) if m1 is m2 and is_mask(m1): return m1 - return make_mask(umath.logical_or(m1, m2), copy=copy, flag=flag) + return make_mask(umath.logical_or(m1, m2), copy=copy, small_mask=small_mask) #####-------------------------------------------------------------------------- #--- --- Masking functions --- @@ -707,7 +708,7 @@ else: condition = umath.equal(fromnumeric.asarray(x), value) mask = nomask - mask = mask_or(mask, make_mask(condition, flag=True)) + mask = mask_or(mask, make_mask(condition, small_mask=True)) return masked_array(x, mask=mask, copy=copy, fill_value=value) def masked_values(x, value, rtol=1.e-5, atol=1.e-8, copy=True): @@ -732,7 +733,7 @@ else: condition = umath.equal(xnew, value) mask = nomask - mask = mask_or(mask, make_mask(condition, flag=True)) + mask = mask_or(mask, make_mask(condition, small_mask=True)) return masked_array(xnew, mask=mask, copy=copy, fill_value=value) #####-------------------------------------------------------------------------- @@ -757,9 +758,9 @@ "Is the use of the display value enabled?" return self._enabled - def enable(self, flag=1): - "Set the enabling flag to `flag`." - self._enabled = flag + def enable(self, small_mask=1): + "Set the enabling small_mask to `small_mask`." + self._enabled = small_mask def __str__ (self): return str(self._display) @@ -778,7 +779,7 @@ Construction: x = array(data, dtype=None, copy=True, order=False, - mask = nomask, fill_value=None, flag=True) + mask = nomask, fill_value=None, small_mask=True) If copy=False, every effort is made not to copy the data: If `data` is a MaskedArray, and argument mask=nomask, then the candidate data @@ -792,7 +793,7 @@ If `mask` is `nomask` there are no masked values. Otherwise mask must be convertible to an array of booleans with the same shape as x. -If `flag` is True, a mask consisting of zeros (False) only is compressed to `nomask`. +If `small_mask` is True, a mask consisting of zeros (False) only is compressed to `nomask`. Otherwise, the mask is not compressed. fill_value is used to fill in masked values when necessary, such as when @@ -800,108 +801,95 @@ The fill_value is not used for computation within this module. """ __array_priority__ = 10.1 - + #TODO: There some reorganization to do round here def __new__(cls, data, mask=nomask, dtype=None, copy=False, fill_value=None, - flag=True, keep_mask=True): + keep_mask=True, small_mask=True, hard_mask=False): """array(data, dtype=None, copy=True, mask=nomask, fill_value=None) If `data` is already a ndarray, its dtype becomes the default value of dtype. """ - if dtype is not None: - dtype = numeric.dtype(dtype) +# logging.debug("__new__ received %s" % type(data)) # 1. Argument is MA ........... if isinstance(data, MaskedArray) or\ (hasattr(data,"_mask") and hasattr(data,"_data")) : if keep_mask: if mask is nomask: - cls._basemask = data._mask + cls.__defaultmask = data._mask else: - cls._basemask = mask_or(data._mask, mask) + cls.__defaultmask = mask_or(data._mask, mask, + copy=copy, small_mask=small_mask) else: - # Force copy of mask if it changes - cls._basemask = make_mask(mask, copy=copy, flag=flag) + cls.__defaultmask = make_mask(mask, copy=copy, small_mask=small_mask) # Update fille_value if fill_value is None: cls._fill_value = data._fill_value else: cls._fill_value = fill_value - return numeric.array(data._data, dtype=dtype, copy=copy).view(cls) + cls.__defaulthardmask = hard_mask + _data = data._data + if dtype is not None and _data.dtype != numeric.dtype(dtype): + return _data.astype(dtype).view(cls) + elif copy: + return _data.copy().view(cls) + else: + return _data.view(cls) # 2. Argument is not MA ....... if isinstance(data, ndarray): - if dtype is not None and data.dtype != dtype: + if dtype is not None and data.dtype != numeric.dtype(dtype): _data = data.astype(dtype) elif copy: _data = data.copy() else: _data = data else: - try: - _data = numeric.array(data, dtype=dtype, copy=copy) - except TypeError: - _data = empty(len(data), dtype=dtype) - for (k,v) in enumerate(data): - _data[k] = v - if mask is nomask: - cls._basemask = getmask(_data) - return _data.view(cls) + _data = numeric.array(data, dtype=dtype, copy=copy) +# try: +# _data = numeric.array(data, dtype=dtype, copy=copy) +# except TypeError: +# _data = empty(len(data), dtype=dtype) +# for (k,v) in enumerate(data): +# _data[k] = v +# if mask is nomask: +# cls.__defaultmask = getmask(_data) +# return _data.view(cls) # Define mask ................. - _mask = make_mask(mask, copy=False, flag=flag) + if not is_mask(mask): + mask = make_mask(mask, small_mask=small_mask) #....Check shapes compatibility - if _mask is not nomask: - (nd, nm) = (_data.size, _mask.size) + if mask is not nomask: + (nd, nm) = (_data.size, mask.size) if (nm != nd): + # We need to resize w/ a function, in case _data is only a reference if nm == 1: - _mask = fromnumeric.resize(_mask, _data.shape) + mask = fromnumeric.resize(mask, _data.shape) elif nd == 1: - _data = fromnumeric.resize(_data, _mask.shape) + _data = fromnumeric.resize(_data, mask.shape) else: msg = "Mask and data not compatible: data size is %i, "+\ "mask size is %i." raise MAError, msg % (nd, nm) - elif (_mask.shape != _data.shape): - _mask = _mask.reshape(_data.shape).copy() + elif (mask.shape != _data.shape): + mask = mask.reshape(_data.shape) +# mask = _data.shape #.... cls._fill_value = fill_value - cls._basemask = _mask + cls.__defaulthardmask = hard_mask + cls.__defaultmask = mask +# logging.debug("__new__ returned %s as %s" % (type(_data), cls)) return numeric.asanyarray(_data).view(cls) #.................................. - def __array__ (self, t=None, context=None): - "Special hook for numeric. Converts to numeric if possible." - # Er... Do we really need __array__ ? - if self._mask is not nomask: - if fromnumeric.ravel(self._mask).any(): - if context is None: - # Hardliner stand: raise an exception - # We may wanna use warnings.warn instead - raise MAError,\ - "Cannot automatically convert masked array to "\ - "numeric because data\n is masked in one or "\ - "more locations." - #return self._data - else: - func, args, i = context - fills = ufunc_fills.get(func) - if fills is None: - raise MAError, "%s not known to ma" % func - return self.filled(fills[i]) - else: # Mask is all false - # Optimize to avoid future invocations of this section. - self._mask = nomask - self._shared_mask = 0 - if t: - return self._data.astype(t) - else: - return self._data - #.................................. def __array_wrap__(self, obj, context=None): """Special hook for ufuncs. Wraps the numpy array and sets the mask according to context. """ - mclass = self.__class__ +# mclass = self.__class__ #.......... +# logging.debug("__wrap__ received %s" % type(obj)) if context is None: - print "DEBUG _wrap_: no context" - return mclass(obj, mask=self._mask, copy=False) +# return mclass(obj, mask=self._mask, copy=False) + return MaskedArray(obj, mask=self._mask, copy=False, + dtype=obj.dtype, + fill_value=self.fill_value, ) #.......... (func, args) = context[:2] m = reduce(mask_or, [getmask(arg) for arg in args]) @@ -918,83 +906,122 @@ else: if m.shape != dshape: m = reduce(mask_or, [getmaskarray(arg) for arg in args]) - return mclass(obj, copy=False, mask=m) +# return mclass(obj, copy=False, mask=m) + return MaskedArray(obj, copy=False, mask=m,) +# dtype=obj.dtype, fill_value=self._fill_value) #........................ + #TODO: there should be some reorganization to do round here. def __array_finalize__(self,obj): """Finalizes the masked array. """ # - if not hasattr(self, "_data"): - try: - self._data = obj._data - except AttributeError: - self._data = obj - # - self.fill_value = self._fill_value - # - if not hasattr(self, '_mask'): - self._mask = self._basemask - # +# logging.debug("__finalize__ received %s" % type(obj)) + if isMaskedArray(obj): + self._data = obj._data + self._mask = obj._mask + self._hardmask = obj._hardmask + self._fill_value = obj._fill_value + else: + self._data = obj + self._mask = self.__defaultmask + self._hardmask = self.__defaulthardmask + self.fill_value = self._fill_value +# # +# logging.debug("__finalize__ returned %s" % type(self)) return #............................................ - def __getitem__(self, i): + def __getitem__(self, index): """x.__getitem__(y) <==> x[y] Returns the item described by i. Not a copy as in previous versions. """ - dout = self._data[i] - if self._mask is nomask: - if numeric.size(dout)==1: + if getmask(index) is not nomask: + msg = "Masked arrays must be filled before they can be used as indices!" + raise IndexError, msg + dout = self._data[index] + m = self._mask + scalardout = (len(numeric.shape(dout))==0) + # + if m is nomask: + if scalardout: return dout else: - return self.__class__(dout, mask=nomask, - fill_value=self._fill_value, - dtype = self.dtype,) + return self.__class__(dout, mask=nomask, keep_mask=True, + fill_value=self._fill_value) #.... -# m = self._mask.copy() - m = self._mask - mi = m[i] + mi = m[index] if mi.size == 1: if mi: return masked - else: - return dout + return dout else: - return self.__class__(dout, mask=mi, dtype = self.dtype, - fill_value=self._fill_value) + return self.__class__(dout, mask=mi, fill_value=self._fill_value) #........................ def __setitem__(self, index, value): """x.__setitem__(i, y) <==> x[i]=y Sets item described by index. If value is masked, masks those locations. """ - d = self._data if self is masked: raise MAError, 'Cannot alter the masked element.' + if getmask(index) is not nomask: + msg = "Masked arrays must be filled before they can be used as indices!" + raise IndexError, msg #.... + (d, m) = (self._data, self._mask) + #.... if value is masked: - if self._mask is nomask: - _mask = make_mask_none(d.shape) + if m is nomask: + m = make_mask_none(d.shape) else: - _mask = self._mask.copy() - _mask[index] = True - self._mask = _mask + m = m.copy() + m[index] = True + self._mask = m return - #.... - m = getmask(value) - value = filled(value).astype(d.dtype) - d[index] = value + #.... if m is nomask: - if self._mask is not nomask: - _mask = self._mask.copy() - _mask[index] = False + d[index] = filled(value) + valmask = getmask(value) + if valmask is not nomask: + m = make_mask_none(d.shape) + m[index] = valmask + elif not self._hardmask: + d[index] = filled(value) + valmask = getmask(value) + m = m.copy() + if valmask is nomask: + m[index] = False else: - _mask = nomask + m[index] = valmask + elif hasattr(index, 'dtype') and (index.dtype==bool_): + index *= ~m + d[index] = filled(value) +# elif isinstance(index, int): + else: + mindx = m[index] + value = masked_array(value, mask=mindx, keep_mask=True) + valdata = filled(value) + valmask = getmask(value) + if valmask is nomask: + d[index] = valdata + else: + dindx = d[index] + numeric.putmask(dindx, ~valmask, valdata) + d[index] = dindx + numeric.putmask(mindx, valmask, True) + m[index] = mindx +# else: +# mindx = m[index] +# value = masked_array(value, mask=mindx, keep_mask=True) +# valmask = getmask(value) +# if valmask is nomask: +# d[vindx] = filled(value) +# else: +# d[vindx[~valmask]] = filled(value[~valmask]) +# m[vindx[valmask]] = True + #..... + if not m.any(): + self._mask = nomask else: - if self._mask is nomask: - _mask = make_mask_none(d.shape) - else: - _mask = self._mask.copy() - _mask[index] = m - self._mask = _mask + self._mask = m #............................................ def __getslice__(self, i, j): """x.__getslice__(i, j) <==> x[i:j] @@ -1003,46 +1030,81 @@ m = self._mask dout = self._data[i:j] if m is nomask: - return self.__class__(dout, dtype = self.dtype, - fill_value=self._fill_value) + return self.__class__(dout, fill_value=self._fill_value) else: - return self.__class__(dout, mask=m[i:j], dtype = self.dtype, - fill_value=self._fill_value) + return self.__class__(dout, mask=m[i:j], fill_value=self._fill_value) #........................ def __setslice__(self, i, j, value): """x.__setslice__(i, j, value) <==> x[i:j]=value Sets a slice i:j to `value`. If `value` is masked, masks those locations.""" - d = self._data if self is masked: #TODO: Well, maybe we could/should raise MAError, "Cannot alter the 'masked' object." #.... + (d, m) = (self._data, self._mask) + #.... if value is masked: - if self._mask is nomask: - _mask = make_mask_none(d.shape) - else: - _mask = self._mask.copy() - _mask[i:j] = True - self._mask = _mask + if m is nomask: + m = make_mask_none(d.shape) + m[i:j] = True return #.... - m = getmask(value) - value = filled(value).astype(d.dtype) - d[i:j] = value +# valmask = getmask(value) +# valdata = filled(value).astype(d.dtype) +# if valmask is nomask: +# if m is nomask: +# d[i:j] = valdata +# elif self._hardmask: +# d[(~m)[i:j]] = valdata +# elif not self._hardmask: +# d[i:j] = valdata +# m[i:j] = False +# if not m.any(): +# self._mask = nomask +# return +# else: +# if m is nomask: +# m = make_mask_none(d.shape) +# m[i:j] = valmask +# elif self._hardmask: +# m[i:j] = mask_or(m[i:j], valmask) +# else: +# m[i:j] = valmask +# d[(~m)[i:j]] = valdata +# if not m.any(): +# self._mask = nomask +# return + #.... if m is nomask: - if self._mask is not nomask: - _mask = self._mask.copy() - _mask[i:j] = False + valmask = getmask(value) + valdata = filled(value) + d[i:j] = valdata + if valmask is not nomask: + m = make_mask_none(d.shape) + m[i:j] = valmask + elif not self._hardmask: + valmask = getmask(value) + valdata = filled(value) + d[i:j] = valdata + if valmask is nomask: + m[i:j] = False else: - _mask = nomask + m[i:j] = valmask else: - if self._mask is nomask: - _mask = make_mask_none(d.shape) + mindx = m[i:j] + value = masked_array(value, mask=mindx, keep_mask=True) + valmask = getmask(value) + if valmask is None: + d[i:j][~mindx] = filled(value) else: - _mask = self._mask.copy() - _mask[i:j] = m - self._mask = make_mask(_mask, flag=True) + d[i:j][~mindx] = value[~valmask] + m[i:j][valmask] = True + #..... + if not m.any(): + self._mask = nomask + else: + self._mask = m #............................................ # If we don't want to crash the performance, we better leave __getattribute__ alone... # def __getattribute__(self, name): @@ -1124,7 +1186,7 @@ the absolute of the inital `_data`. """ return self.__class__(self._data.__abs__(), mask=self._mask, - dtype = self.dtype,) + fill_value = self._fill_value,) # def __neg__(self): """x.__abs__() <==> neg(x) @@ -1132,53 +1194,41 @@ the negative of the inital `_data`.""" try: return self.__class__(self._data.__neg__(), mask=self._mask, - dtype = self.dtype,) + fill_value = self._fill_value,) except MAError: return negative(self) # def __iadd__(self, other): "Adds other to self in place." f = convert_typecode(filled(other, 0), self._data.dtype.char) + m = getmask(other) + self._data += f if self._mask is nomask: - self._data += f - m = getmask(other) self._mask = m - ###self._shared_mask = m is not nomask - else: - tmp = masked_array(f, mask=getmask(other)) - self._data += tmp._data - self._mask = mask_or(self._mask, tmp._mask) - ###self._shared_mask = 1 + elif m is not nomask: + self._mask += m return self # def __isub__(self, other): "Subtracts other from self in place." f = convert_typecode(filled(other, 0), self._data.dtype.char) + m = getmask(other) + self._data -= f if self._mask is nomask: - self._data -= f - m = getmask(other) self._mask = m - ###self._shared_mask = m is not nomask - else: - tmp = masked_array(f, mask=getmask(other)) - self._data -= tmp._data - self._mask = mask_or(self._mask, tmp._mask) - ###self._shared_mask = 1 + elif m is not nomask: + self._mask += m return self # def __imul__(self, other): "Multiplies self by other in place." - f = convert_typecode(filled(other, 0), self._data.dtype.char) + f = convert_typecode(filled(other, 1), self._data.dtype.char) + m = getmask(other) + self._data *= f if self._mask is nomask: - self._data *= f - m = getmask(other) self._mask = m - ####self._shared_mask = m is not nomask - else: - tmp = masked_array(f, mask=getmask(other)) - self._data *= tmp._data - self._mask = mask_or(self._mask, tmp._mask) - ###self._shared_mask = 1 + elif m is not nomask: + self._mask += m return self # def __idiv__(self, other): @@ -1215,7 +1265,7 @@ def __float__(self): "Converts self to float." if self._mask is not nomask: - print "Warning: converting a masked element to nan." + warnings.warn("Warning: converting a masked element to nan.") return numpy.nan #raise MAError, 'Cannot convert masked element to a Python float.' return float(self._data.item()) @@ -1236,10 +1286,15 @@ Subclassing is preserved.""" if tc == self._data.dtype: return self - d = self._data.astype(tc) -# print "DEBUG: _astype: d", d -# print "DEBUG: _astype: m", self._mask + d = self._data.astype(tc) return self.__class__(d, mask=self._mask, dtype=tc) + #............................................ + def harden_mask(self): + "Forces the mask to hard" + self._hardmask = True + def soften_mask(self): + "Forces the mask to soft" + self._hardmask = False #............................................ #TODO: FIX THAT: THAT"S NOT A REAL FLATITER def _get_flat(self): @@ -1247,10 +1302,10 @@ """ if self._mask is nomask: return masked_array(self._data.ravel(), mask=nomask, copy=False, - fill_value = self.fill_value) + fill_value = self._fill_value) else: return masked_array(self._data.ravel(), mask=self._mask.ravel(), - copy=False, fill_value = self.fill_value) + copy=False, fill_value = self._fill_value) # def _set_flat (self, value): "x.flat = value" @@ -1263,7 +1318,7 @@ def _get_real(self): "Returns the real part of a complex array." return masked_array(self._data.real, mask=self.mask, - fill_value = self.fill_value) + fill_value = self._fill_value) # if self.mask is nomask: # return masked_array(self._data.real, mask=nomask, # fill_value = self.fill_value) @@ -1281,7 +1336,7 @@ def _get_imaginary(self): "Returns the imaginary part of a complex array." return masked_array(self._data.imag, mask=nomask, - fill_value = self.fill_value) + fill_value = self._fill_value) def _set_imaginary (self, value): "Sets the imaginary part of a complex array to `value`." @@ -1295,17 +1350,17 @@ def _get_mask(self): """Returns the current mask.""" return self._mask - def _set_mask(self, mask): """Sets the mask to `mask`.""" - mask = make_mask(mask, copy=False, flag=True) + mask = make_mask(mask, copy=False, small_mask=True) if mask is not nomask: if mask.size != self._data.size: raise ValueError, "Inconsistent shape between data and mask!" if mask.shape != self._data.shape: mask.shape = self._data.shape - self._mask = mask - + self._mask = mask + else: + self._mask = nomask mask = property(fget=_get_mask, fset=_set_mask, doc="Mask") #............................................ def get_fill_value(self): @@ -1332,11 +1387,10 @@ d = self._data m = self._mask if m is nomask: -# return fromnumeric.asarray(d) return d # if fill_value is None: - value = self._fill_value + value = self.fill_value else: value = fill_value # @@ -1367,11 +1421,8 @@ d = self._data.ravel() if self._mask is nomask: return d -# return numeric.asarray(d) else: -# m = 1 - self._mask.ravel() -# return numeric.asarray(d.compress(m)) - return d.compress(-self._mask.ravel()) + return d[~self._mask.ravel()] #............................................ def count(self, axis=None): """Counts the non-masked elements of the array along a given axis, @@ -1433,11 +1484,9 @@ if self._mask is not nomask: return self.__class__(self._data.reshape(*s), mask=self._mask.reshape(*s), - dtype = self.dtype, fill_value=self.fill_value) else: return self.__class__(self._data.reshape(*s), - dtype = self.dtype, fill_value=self.fill_value) # def repeat(self, repeats, axis=None): @@ -1458,25 +1507,18 @@ if m is not nomask: m = fromnumeric.repeat(m, repeats, axis) d = fromnumeric.repeat(f, repeats, axis) - return self.__class__(d, mask=m, dtype = self.dtype, - fill_value=self.fill_value) + return self.__class__(d, mask=m, fill_value=self.fill_value) # def resize(self, newshape, refcheck=True, order=False): """Attempts to modify size and shape of self inplace. The array must own its own memory and not be referenced by other arrays. Returns None. """ - raiseit = False try: self._data.resize(newshape,) + if self.mask is not nomask: + self._mask.resize(newshape,) except ValueError: - raiseit = True - if self.mask is not nomask: - try: - self._mask.resize(newshape,) - except ValueError: - raiseit = True - if raiseit: msg = "Cannot resize an array that has been referenced or "+\ "is referencing another array in this way.\n"+\ "Use the resize function." @@ -1511,7 +1553,7 @@ m.put(ind, values._mask, mode=mode) else: m.put(ind, False, mode=mode) - self._mask = make_mask(m, copy=False, flag=True) + self._mask = make_mask(m, copy=False, small_mask=True) #............................................ def ids (self): """Return the ids of the data and mask areas.""" @@ -1576,14 +1618,12 @@ # if axis is None: # return self._data.sum(None, dtype=dtype) return self.__class__(self._data.sum(axis, dtype=dtype), - mask=nomask, dtype = self.dtype, - fill_value=self.fill_value) + mask=nomask, fill_value=self.fill_value) else: # if axis is None: # return self.filled(0).sum(None, dtype=dtype) return self.__class__(self.filled(0).sum(axis, dtype=dtype), mask=self._mask.all(axis), - dtype = self.dtype, fill_value=self.fill_value) def cumsum(self, axis=None, dtype=None): @@ -1596,15 +1636,12 @@ # if axis is None: # return self._data.cumsum(None, dtype=dtype) return self.__class__(self._data.cumsum(axis=axis, dtype=dtype), - dtype = self.dtype, fill_value=self.fill_value) else: # if axis is None: # return self.filled(0).cumsum(None, dtype=dtype) return self.__class__(self.filled(0).cumsum(axis=axis, dtype=dtype), - mask=self._mask, - dtype = self.dtype, - fill_value=self.fill_value) + mask=self._mask, fill_value=self.fill_value) def prod(self, axis=None, dtype=None): """a.prod(axis=None, dtype=None) @@ -1616,16 +1653,13 @@ # if axis is None: # return self._data.prod(None, dtype=dtype) return self.__class__(self._data.prod(axis, dtype=dtype), - mask=nomask, - dtype = self.dtype, - fill_value=self.fill_value) + mask=nomask, fill_value=self.fill_value) # return self.__class__(self._data.prod(axis=axis, dtype=dtype)) else: # if axis is None: # return self.filled(1).prod(None, dtype=dtype) return self.__class__(self.filled(1).prod(axis=axis, dtype=dtype), mask=self._mask.all(axis), - dtype = self.dtype, fill_value=self.fill_value) product = prod @@ -1639,15 +1673,12 @@ # if axis is None: # return self._data.cumprod(None, dtype=dtype) return self.__class__(self._data.cumprod(axis=axis, dtype=dtype), - mask=nomask, - dtype = self.dtype, - fill_value=self.fill_value,) + mask=nomask, fill_value=self.fill_value,) else: # if axis is None: # return self.filled(1).cumprod(None, dtype=dtype) return self.__class__(self.filled(1).cumprod(axis=axis, dtype=dtype), mask=self._mask, - dtype = self.dtype, fill_value=self.fill_value) def mean(self, axis=None, dtype=None): @@ -1667,8 +1698,7 @@ # if axis is None: # return self._data.mean(axis=None, dtype=dtype) return self.__class__(self._data.mean(axis=axis, dtype=dtype), - mask=nomask, dtype = self.dtype, - fill_value=self.fill_value) + mask=nomask, fill_value=self.fill_value) else: dsum = fromnumeric.sum(self.filled(0), axis=axis, dtype=dtype) cnt = self.count(axis=axis) @@ -1676,7 +1706,6 @@ if axis is None and mask: return masked return self.__class__(dsum*1./cnt, mask=mask, - dtype = self.dtype, fill_value=self.fill_value,) def anom(self, axis=None, dtype=None): @@ -1701,7 +1730,6 @@ # return self._data.var(axis=None, dtype=dtype) return self.__class__(self._data.var(axis=axis, dtype=dtype), mask=nomask, - dtype = self.dtype, fill_value=self.fill_value) else: cnt = self.count(axis=axis) @@ -1713,7 +1741,6 @@ return dvar return self.__class__(dvar, mask=mask_or(self._mask.all(axis), (cnt==1)), - dtype = self.dtype, fill_value=self.fill_value) def std(self, axis=None, dtype=None): @@ -1970,13 +1997,13 @@ def __call__ (self, other, *args): "Execute the call behavior." instance = self.obj - m = mask_or(instance._mask, getmask(other), flag=False) + m = mask_or(instance._mask, getmask(other), small_mask=False) base = instance.filled(self.fill_self) target = filled(other, self.fill_other) method = getattr(base, self.methodname) return instance.__class__(method(target, *args), mask=m, - dtype = instance.dtype, - fill_value=instance.fill_value) +# fill_value=instance.fill_value + ) #.......................................................... MaskedArray.__add__ = _arithmethods('__add__') MaskedArray.__radd__ = _arithmethods('__add__') @@ -2016,15 +2043,16 @@ masked = masked_singleton masked_array = MaskedArray -def array(data, dtype=None, copy=False, order=False, - mask=nomask, keep_mask=True, flag=True, fill_value=None): +def array(data, dtype=None, copy=False, order=False, mask=nomask, + keep_mask=True, small_mask=True, hard_mask=None, fill_value=None): """array(data, dtype=None, copy=True, order=False, mask=nomask, - keep_mask=True, flag=True, fill_value=None) + keep_mask=True, small_mask=True, fill_value=None) Acts as shortcut to MaskedArray, with options in a different order for convenience. And backwards compatibility... """ return MaskedArray(data, mask=mask, dtype=dtype, copy=copy, - keep_mask = keep_mask, flag=flag, fill_value=fill_value) + keep_mask=keep_mask, small_mask=small_mask, + hard_mask=hard_mask, fill_value=fill_value) def is_masked(x): """Returns whether x has some masked values.""" @@ -2344,7 +2372,7 @@ fb = filled(b, 1) if fb.dtype.char in typecodes["Integer"]: return masked_array(umath.power(fa, fb), m) - md = make_mask((fa < 0), flag=1) + md = make_mask((fa < 0), small_mask=1) m = mask_or(m, md) if m is nomask: return masked_array(umath.power(fa, fb)) @@ -2469,7 +2497,7 @@ dm = [] for x in arrays: dm.append(getmaskarray(x)) - dm = make_mask(numeric.concatenate(dm, axis), copy=False, flag=True) + dm = make_mask(numeric.concatenate(dm, axis), copy=False, small_mask=True) return masked_array(d, mask=dm) def expand_dims(x,axis): @@ -2598,7 +2626,7 @@ d = numeric.choose(fc, (yv, xv)) md = numeric.choose(fc, (ym, xm)) m = getmask(condition) - m = make_mask(mask_or(m, md), copy=False, flag=True) + m = make_mask(mask_or(m, md), copy=False, small_mask=True) return masked_array(d, mask=m) def choose (indices, t, out=None, mode='raise'): @@ -2622,7 +2650,7 @@ a = [fmask(x) for x in t] d = numeric.choose(c, a) m = numeric.choose(c, masks) - m = make_mask(mask_or(m, getmask(indices)), copy=0, flag=1) + m = make_mask(mask_or(m, getmask(indices)), copy=0, small_mask=1) return masked_array(d, mask=m) def sort (x, axis=-1, fill_value=None, kind='quicksort'): @@ -2780,7 +2808,7 @@ in a pickle.""" _data = ndarray.__new__(ndarray, baseshape, basetype) _mask = ndarray.__new__(ndarray, baseshape, basetype) - return MaskedArray.__new__(subtype, _data, mask=_mask, dtype=basetype, flag=False) + return MaskedArray.__new__(subtype, _data, mask=_mask, dtype=basetype, small_mask=False) def _getstate(a): "Returns the internal state of the masked array, for pickling purposes." Modified: trunk/Lib/sandbox/maskedarray/extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/extras.py 2006-12-22 14:39:32 UTC (rev 2463) +++ trunk/Lib/sandbox/maskedarray/extras.py 2006-12-23 00:00:43 UTC (rev 2464) @@ -14,8 +14,10 @@ __all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack', - 'compress2d', 'count_masked', - 'mask_rowcols','masked_all', 'masked_all_like', 'mr_', + 'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked', + 'dot', + 'mask_rowcols', 'mask_rows', 'mask_cols', 'masked_all', + 'masked_all_like', 'mr_', 'notmasked_edges', 'notmasked_contiguous', 'stdu', 'varu', ] @@ -229,7 +231,6 @@ outarr[tuple(i.tolist())] = res dtypes.append(asarray(res).dtype) k += 1 - print dtypes if not hasattr(arr, '_mask'): return numeric.asarray(outarr, dtype=max(dtypes)) else: @@ -349,13 +350,12 @@ if isinstance(d, ndarray) and (not d.shape == result.shape): d = ones(result.shape, dtype=float_) * d if returned: - print type(result) return result, d else: return result #.............................................................................. -def compress2d(x, axis=None): +def compress_rowcols(x, axis=None): """Suppresses the rows and/or columns of a 2D array that contains masked values. The suppression behavior is selected with the `axis`parameter. - If axis is None, rows and columns are suppressed. @@ -369,7 +369,7 @@ m = getmask(x) # Nothing is masked: return x if m is nomask or not m.any(): - return nxasarray(x) + return x._data # All is masked: return empty if m.all(): return nxarray([]) @@ -382,11 +382,19 @@ if axis in [None, 1, -1]: for j in function_base.unique(masked[1]): idxc.remove(j) - return nxasarray(x[idxr][:,idxc]) + return x._data[idxr][:,idxc] +def compress_rows(a): + """Suppresses whole rows of a 2D array that contain masked values.""" + return compress_rowcols(a,0) + +def compress_cols(a): + """Suppresses whole columnss of a 2D array that contain masked values.""" + return compress_rowcols(a,1) + def mask_rowcols(a, axis=None): - """Suppresses the rows and/or columns of a 2D array that contains masked values. - The suppression behavior is selected with the `axis`parameter. + """Masks whole rows and/or columns of a 2D array that contain masked values. + The masking behavior is selected with the `axis`parameter. - If axis is None, rows and columns are suppressed. - If axis is 0, only rows are suppressed. - If axis is 1 or -1, only columns are suppressed. @@ -406,7 +414,37 @@ a[:,function_base.unique(maskedval[1])] = masked return a +def mask_rows(a, axis=None): + """Masks whole rows of a 2D array that contain masked values.""" + return mask_rowcols(a, 0) +def mask_cols(a, axis=None): + """Masks whole columns of a 2D array that contain masked values.""" + return mask_rowcols(a, 1) + + +def dot(a,b): + """Returns the dot product of two 2D masked arrays a and b. + Like the generic numpy equivalent the product sum is over + the last dimension of a and the second-to-last dimension of b. + + Masked values are propagated: if a masked value appears in a row or column, + the whole row or column is considered masked. + + NB: The first argument is not conjugated. + """ + #TODO: Works only with 2D arrays. There should be a way to get it to run with higher dimension + a = mask_rows(a) + b = mask_cols(b) + # + d = numpy.dot(a.filled(0), b.filled(0)) + # + am = (~getmaskarray(a)) + bm = (~getmaskarray(b)) + m = ~numpy.dot(am,bm) + return masked_array(d, mask=m) + + #####-------------------------------------------------------------------------- #---- --- Concatenation helpers --- #####-------------------------------------------------------------------------- Modified: trunk/Lib/sandbox/maskedarray/tests/test_core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_core.py 2006-12-22 14:39:32 UTC (rev 2463) +++ trunk/Lib/sandbox/maskedarray/tests/test_core.py 2006-12-23 00:00:43 UTC (rev 2464) @@ -950,6 +950,63 @@ #We default to true mx = masked_array(x, mask=[0,1,0]) assert_equal(mx.mask, [1,1,0]) + + def check_hardmask(self): + "Test hard_mask" + d = arange(5) + n = [0,0,0,1,1] + m = make_mask(n) + xh = array(d, mask = m, hard_mask=True) + # We need to copy, to avoid updating d in xh! + xs = array(d, mask = m, hard_mask=False, copy=True) + xh[[1,4]] = [10,40] + xs[[1,4]] = [10,40] + assert_equal(xh._data, [0,10,2,3,4]) + assert_equal(xs._data, [0,10,2,3,40]) + assert(xh.mask is m) + assert_equal(xs.mask, [0,0,0,1,0]) + assert(xh._hardmask) + assert(not xs._hardmask) + xh[1:4] = [10,20,30] + xs[1:4] = [10,20,30] + assert_equal(xh._data, [0,10,20,3,4]) + assert_equal(xs._data, [0,10,20,30,40]) + assert(xh.mask is m) + assert_equal(xs.mask, nomask) + xh[0] = maskedarray.core.masked + xs[0] = maskedarray.core.masked + assert_equal(xh.mask, [1,0,0,1,1]) + assert_equal(xs.mask, [1,0,0,0,0]) + xh[:] = 1 + xs[:] = 1 + assert_equal(xh._data, [0,1,1,3,4]) + assert_equal(xs._data, [1,1,1,1,1]) + assert_equal(xh.mask, [1,0,0,1,1]) + assert_equal(xs.mask, nomask) + # Switch to soft mask + xh.soften_mask() + xh[:] = arange(5) + assert_equal(xh._data, [0,1,2,3,4]) + assert_equal(xh.mask, nomask) + # Switch back to hard mask + xh.harden_mask() + xh[xh<3] = maskedarray.core.masked + assert_equal(xh._data, [0,1,2,3,4]) + assert_equal(xh._mask, [1,1,1,0,0]) + xh[filled(xh>1,False)] = 5 + assert_equal(xh._data, [0,1,2,5,5]) + assert_equal(xh._mask, [1,1,1,0,0]) + # + xh = array([[1,2],[3,4]], mask = [[1,0],[0,0]], hard_mask=True) + xh[0] = 0 + assert_equal(xh._data, [[1,0],[3,4]]) + assert_equal(xh._mask, [[1,0],[0,0]]) + xh[-1,-1] = 5 + assert_equal(xh._data, [[1,0],[3,5]]) + assert_equal(xh._mask, [[1,0],[0,0]]) + xh[filled(xh<5,False)] = 2 + assert_equal(xh._data, [[1,2],[2,5]]) + assert_equal(xh._mask, [[1,0],[0,0]]) #.............................................................................. #.............................................................................. Modified: trunk/Lib/sandbox/maskedarray/tests/test_extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_extras.py 2006-12-22 14:39:32 UTC (rev 2463) +++ trunk/Lib/sandbox/maskedarray/tests/test_extras.py 2006-12-23 00:00:43 UTC (rev 2464) @@ -173,26 +173,26 @@ assert(tmp[1] is None) assert_equal(tmp[2][-1], (6, (0,5))) -class test_compress2d(NumpyTestCase): - "Tests compress2d and mask_row_columns." +class test_2dfunctions(NumpyTestCase): + "Tests 2D functions" def check_compress2d(self): "Tests compress2d" x = array(N.arange(9).reshape(3,3), mask=[[1,0,0],[0,0,0],[0,0,0]]) - assert_equal(compress2d(x), [[4,5],[7,8]] ) - assert_equal(compress2d(x,0), [[3,4,5],[6,7,8]] ) - assert_equal(compress2d(x,1), [[1,2],[4,5],[7,8]] ) + assert_equal(compress_rowcols(x), [[4,5],[7,8]] ) + assert_equal(compress_rowcols(x,0), [[3,4,5],[6,7,8]] ) + assert_equal(compress_rowcols(x,1), [[1,2],[4,5],[7,8]] ) x = array(x._data, mask=[[0,0,0],[0,1,0],[0,0,0]]) - assert_equal(compress2d(x), [[0,2],[6,8]] ) - assert_equal(compress2d(x,0), [[0,1,2],[6,7,8]] ) - assert_equal(compress2d(x,1), [[0,2],[3,5],[6,8]] ) + assert_equal(compress_rowcols(x), [[0,2],[6,8]] ) + assert_equal(compress_rowcols(x,0), [[0,1,2],[6,7,8]] ) + assert_equal(compress_rowcols(x,1), [[0,2],[3,5],[6,8]] ) x = array(x._data, mask=[[1,0,0],[0,1,0],[0,0,0]]) - assert_equal(compress2d(x), [[8]] ) - assert_equal(compress2d(x,0), [[6,7,8]] ) - assert_equal(compress2d(x,1,), [[2],[5],[8]] ) + assert_equal(compress_rowcols(x), [[8]] ) + assert_equal(compress_rowcols(x,0), [[6,7,8]] ) + assert_equal(compress_rowcols(x,1,), [[2],[5],[8]] ) x = array(x._data, mask=[[1,0,0],[0,1,0],[0,0,1]]) - assert_equal(compress2d(x).size, 0 ) - assert_equal(compress2d(x,0).size, 0 ) - assert_equal(compress2d(x,1).size, 0 ) + assert_equal(compress_rowcols(x).size, 0 ) + assert_equal(compress_rowcols(x,0).size, 0 ) + assert_equal(compress_rowcols(x,1).size, 0 ) # def check_mask_rowcols(self): "Tests mask_rowcols." @@ -212,6 +212,55 @@ assert(mask_rowcols(x).all()) assert(mask_rowcols(x,0).all()) assert(mask_rowcols(x,1).all()) + # + def test_dot(self): + "Tests dot product" + n = N.arange(1,7) + # + m = [1,0,0,0,0,0] + a = masked_array(n, mask=m).reshape(2,3) + b = masked_array(n, mask=m).reshape(3,2) + c = dot(a,b) + assert_equal(c.mask, [[1,1],[1,0]]) + c = dot(b,a) + assert_equal(c.mask, [[1,1,1],[1,0,0],[1,0,0]]) + # + m = [0,0,0,0,0,1] + a = masked_array(n, mask=m).reshape(2,3) + b = masked_array(n, mask=m).reshape(3,2) + c = dot(a,b) + assert_equal(c.mask,[[0,1],[1,1]]) + c = dot(b,a) + assert_equal(c.mask, [[0,0,1],[0,0,1],[1,1,1]]) + # + m = [0,0,0,0,0,0] + a = masked_array(n, mask=m).reshape(2,3) + b = masked_array(n, mask=m).reshape(3,2) + c = dot(a,b) + assert_equal(c.mask,nomask) + c = dot(b,a) + assert_equal(c.mask,nomask) + # + a = masked_array(n, mask=[1,0,0,0,0,0]).reshape(2,3) + b = masked_array(n, mask=[0,0,0,0,0,0]).reshape(3,2) + c = dot(a,b) + assert_equal(c.mask,[[1,1],[0,0]]) + c = dot(b,a) + assert_equal(c.mask,[[1,0,0],[1,0,0],[1,0,0]]) + # + a = masked_array(n, mask=[0,0,0,0,0,1]).reshape(2,3) + b = masked_array(n, mask=[0,0,0,0,0,0]).reshape(3,2) + c = dot(a,b) + assert_equal(c.mask,[[0,0],[1,1]]) + c = dot(b,a) + assert_equal(c.mask,[[0,0,1],[0,0,1],[0,0,1]]) + # + a = masked_array(n, mask=[0,0,0,0,0,1]).reshape(2,3) + b = masked_array(n, mask=[0,0,1,0,0,0]).reshape(3,2) + c = dot(a,b) + assert_equal(c.mask,[[1,0],[1,1]]) + c = dot(b,a) + assert_equal(c.mask,[[0,0,1],[1,1,1],[0,0,1]]) ############################################################################### #------------------------------------------------------------------------------ Modified: trunk/Lib/sandbox/maskedarray/testutils.py =================================================================== --- trunk/Lib/sandbox/maskedarray/testutils.py 2006-12-22 14:39:32 UTC (rev 2463) +++ trunk/Lib/sandbox/maskedarray/testutils.py 2006-12-23 00:00:43 UTC (rev 2464) @@ -17,7 +17,7 @@ from numpy.testing.utils import build_err_msg, rand import core -reload(core) +#reload(core) from core import mask_or, getmask, getmaskarray, masked_array, nomask from core import filled, equal, less Added: trunk/Lib/sandbox/maskedarray/timer_comparison.py =================================================================== --- trunk/Lib/sandbox/maskedarray/timer_comparison.py 2006-12-22 14:39:32 UTC (rev 2463) +++ trunk/Lib/sandbox/maskedarray/timer_comparison.py 2006-12-23 00:00:43 UTC (rev 2464) @@ -0,0 +1,364 @@ + +import timeit + +import numpy +from numpy import int_, float_, bool_ +import numpy.core.fromnumeric as fromnumeric + +from maskedarray.testutils import assert_equal,assert_array_equal, \ + fail_if_equal, assert_mask_equal + + +numpy.seterr(all='ignore') + +pi = numpy.pi + +class moduletester: + #----------------------------------- + def __init__(self, module): + self.module = module + self.allequal = module.allequal + self.arange = module.arange + self.array = module.array + self.average = module.average + self.concatenate = module.concatenate + self.count = module.count + self.filled = module.filled + self.getmask = module.getmask + self.id = id + self.inner = module.inner + self.make_mask = module.make_mask + self.masked = module.masked + self.masked_array = module.masked_array + self.masked_values = module.masked_values + self.mask_or = module.mask_or + self.ones = module.ones + self.outer = module.outer + self.repeat = module.repeat + self.resize = module.resize + self.sort = module.sort + self.take = module.take + self.transpose = module.transpose + self.zeros = module.zeros + self.MaskType = module.MaskType + try: + self.umath = module.umath + except AttributeError: + self.umath = module.core.umath + #---------------------------------- + def test_1(self): + x = numpy.array([1.,1.,1.,-2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = numpy.array([5.,0.,3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + a10 = 10. + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0 ,0, 1] + xm = self.masked_array(x, mask=m1) + ym = self.masked_array(y, mask=m2) + z = numpy.array([-.5, 0., .5, .8]) + zm = self.masked_array(z, mask=[0,1,0,0]) + xf = numpy.where(m1, 1.e+20, x) + xm.set_fill_value(1.e+20) + #..... + assert((xm-ym).filled(0).any()) + fail_if_equal(xm.mask.astype(int_), ym.mask.astype(int_)) + s = x.shape + assert_equal(xm.size , reduce(lambda x,y:x*y, s)) + assert_equal(self.count(xm) , len(m1) - reduce(lambda x,y:x+y, m1)) + #..... + for s in [(4,3), (6,2)]: + x.shape = s + y.shape = s + xm.shape = s + ym.shape = s + xf.shape = s + + assert_equal(self.count(xm) , len(m1) - reduce(lambda x,y:x+y, m1)) + #---------------------------------- + def test_2(self): + "Tests conversions and indexing" + x1 = numpy.array([1,2,4,3]) + x2 = self.array(x1, mask=[1,0,0,0]) + x3 = self.array(x1, mask=[0,1,0,1]) + x4 = self.array(x1) + # test conversion to strings + junk, garbage = str(x2), repr(x2) + assert_equal(numpy.sort(x1), self.sort(x2, fill_value=0)) + # tests of indexing + assert type(x2[1]) is type(x1[1]) + assert x1[1] == x2[1] + assert_equal(x1[2],x2[2]) + assert_equal(x1[2:5],x2[2:5]) + assert_equal(x1[:],x2[:]) + assert_equal(x1[1:], x3[1:]) + x1[2] = 9 + x2[2] = 9 + assert_equal(x1,x2) + x1[1:3] = 99 + x2[1:3] = 99 + assert_equal(x1,x2) + x2[1] = self.masked + assert_equal(x1,x2) + x2[1:3] = self.masked + assert_equal(x1,x2) + x2[:] = x1 + x2[1] = self.masked + assert self.allequal(self.getmask(x2),self.array([0,1,0,0])) + x3[:] = self.masked_array([1,2,3,4],[0,1,1,0]) + assert self.allequal(self.getmask(x3), self.array([0,1,1,0])) + x4[:] = self.masked_array([1,2,3,4],[0,1,1,0]) + assert self.allequal(self.getmask(x4), self.array([0,1,1,0])) + assert self.allequal(x4, self.array([1,2,3,4])) + x1 = numpy.arange(5)*1.0 + x2 = self.masked_values(x1, 3.0) + assert_equal(x1,x2) + assert self.allequal(self.array([0,0,0,1,0], self.MaskType), x2.mask) + x1 = self.array([1,'hello',2,3],object) + x2 = numpy.array([1,'hello',2,3],object) + s1 = x1[1] + s2 = x2[1] + assert x1[1:1].shape == (0,) + # Tests copy-size + n = [0,0,1,0,0] + m = self.make_mask(n) + m2 = self.make_mask(m) + assert(m is m2) + m3 = self.make_mask(m, copy=1) + assert(m is not m3) + + x4 = self.arange(4) + x4[2] = self.masked + y4 = self.resize(x4, (8,)) + assert_equal(self.concatenate([x4,x4]), y4) + assert_equal(self.getmask(y4),[0,0,1,0,0,0,1,0]) + y5 = self.repeat(x4, (2,2,2,2), axis=0) + assert_equal(y5, [0,0,1,1,2,2,3,3]) + y6 = self.repeat(x4, 2, axis=0) + assert_equal(y5, y6) + y7 = x4.repeat((2,2,2,2), axis=0) + assert_equal(y5,y7) + y8 = x4.repeat(2,0) + assert_equal(y5,y8) + + #"Test of take, transpose, inner, outer products" + x = self.arange(24) + y = numpy.arange(24) + x[5:6] = self.masked + x = x.reshape(2,3,4) + y = y.reshape(2,3,4) + assert_equal(numpy.transpose(y,(2,0,1)), self.transpose(x,(2,0,1))) + assert_equal(numpy.take(y, (2,0,1), 1), self.take(x, (2,0,1), 1)) + assert_equal(numpy.inner(self.filled(x,0), self.filled(y,0)), + self.inner(x, y)) + assert_equal(numpy.outer(self.filled(x,0), self.filled(y,0)), + self.outer(x, y)) + y = self.array(['abc', 1, 'def', 2, 3], object) + y[2] = self.masked + t = self.take(y,[0,3,4]) + assert t[0] == 'abc' + assert t[1] == 2 + assert t[2] == 3 + # Tests in place + y = self.arange(10) + + x = self.arange(10) + xm = self.arange(10) + xm[2] = self.masked + x += 1 + assert_equal(x, y+1) + xm += 1 + assert_equal(xm, y+1) + + x = self.arange(10) + xm = self.arange(10) + xm[2] = self.masked + x -= 1 + assert_equal(x, y-1) + xm -= 1 + assert_equal(xm, y-1) + + x = self.arange(10)*1.0 + xm = self.arange(10)*1.0 + xm[2] = self.masked + x *= 2.0 + assert_equal(x, y*2) + xm *= 2.0 + assert_equal(xm, y*2) + + x = self.arange(10)*2 + xm = self.arange(10)*2 + xm[2] = self.masked + x /= 2 + assert_equal(x, y) + xm /= 2 + assert_equal(xm, y) + + x = self.arange(10)*1.0 + xm = self.arange(10)*1.0 + xm[2] = self.masked + x /= 2.0 + assert_equal(x, y/2.0) + xm /= self.arange(10) + assert_equal(xm, self.ones((10,))) + + x = self.arange(10).astype(float_) + xm = self.arange(10) + xm[2] = self.masked + id1 = self.id(x.raw_data()) + x += 1. + assert id1 == self.id(x.raw_data()) + assert_equal(x, y+1.) + + x = self.arange(10, dtype=float_) + xm = self.arange(10, dtype=float_) + xm[2] = self.masked + m = xm.mask + a = self.arange(10, dtype=float_) + a[-1] = self.masked + x += a + xm += a + assert_equal(x,y+a) + assert_equal(xm,y+a) + assert_equal(xm.mask, self.mask_or(m,a.mask)) + + x = self.arange(10, dtype=float_) + xm = self.arange(10, dtype=float_) + xm[2] = self.masked + m = xm.mask + a = self.arange(10, dtype=float_) + a[-1] = self.masked + x -= a + xm -= a + assert_equal(x,y-a) + assert_equal(xm,y-a) + assert_equal(xm.mask, self.mask_or(m,a.mask)) + + x = self.arange(10, dtype=float_) + xm = self.arange(10, dtype=float_) + xm[2] = self.masked + m = xm.mask + a = self.arange(10, dtype=float_) + a[-1] = self.masked + x *= a + xm *= a + assert_equal(x,y*a) + assert_equal(xm,y*a) + assert_equal(xm.mask, self.mask_or(m,a.mask)) + + x = self.arange(10, dtype=float_) + xm = self.arange(10, dtype=float_) + xm[2] = self.masked + m = xm.mask + a = self.arange(10, dtype=float_) + a[-1] = self.masked + x /= a + xm /= a + #---------------------------------- + def test_3(self): + d = (self.array([1.0, 0, -1, pi/2]*2, mask=[0,1]+[0]*6), + self.array([1.0, 0, -1, pi/2]*2, mask=[1,0]+[0]*6),) + for f in ['sqrt', 'log', 'log10', 'exp', 'conjugate', + 'sin', 'cos', 'tan', + 'arcsin', 'arccos', 'arctan', + 'sinh', 'cosh', 'tanh', + 'arcsinh', + 'arccosh', + 'arctanh', + 'absolute', 'fabs', 'negative', + # 'nonzero', 'around', + 'floor', 'ceil', + # 'sometrue', 'alltrue', + 'logical_not', + 'add', 'subtract', 'multiply', + 'divide', 'true_divide', 'floor_divide', + 'remainder', 'fmod', 'hypot', 'arctan2', + 'equal', 'not_equal', 'less_equal', 'greater_equal', + 'less', 'greater', + 'logical_and', 'logical_or', 'logical_xor', + ]: + #print f + try: + uf = getattr(self.umath, f) + except AttributeError: + uf = getattr(fromnumeric, f) + mf = getattr(self.module, f) + args = d[:uf.nin] + ur = uf(*args) + mr = mf(*args) + assert_equal(ur.filled(0), mr.filled(0), f) + assert_mask_equal(ur.mask, mr.mask) + # test average + ott = self.array([0.,1.,2.,3.], mask=[1,0,0,0]) + assert_equal(2.0, self.average(ott,axis=0)) + assert_equal(2.0, self.average(ott, weights=[1., 1., 2., 1.])) + result, wts = self.average(ott, weights=[1.,1.,2.,1.], returned=1) + assert_equal(2.0, result) + assert(wts == 4.0) + ott[:] = self.masked + # assert(average(ott,axis=0) is masked) + ott = self.array([0.,1.,2.,3.], mask=[1,0,0,0]) + ott = ott.reshape(2,2) + ott[:,1] = self.masked + assert_equal(self.average(ott,axis=0), [2.0, 0.0]) + # assert(average(ott,axis=1)[0] is masked) + assert_equal([2.,0.], self.average(ott, axis=0)) + result, wts = self.average(ott, axis=0, returned=1) + assert_equal(wts, [1., 0.]) + w1 = [0,1,1,1,1,0] + w2 = [[0,1,1,1,1,0],[1,0,0,0,0,1]] + x = self.arange(6) + assert_equal(self.average(x, axis=0), 2.5) + assert_equal(self.average(x, axis=0, weights=w1), 2.5) + y = self.array([self.arange(6), 2.0*self.arange(6)]) + assert_equal(self.average(y, None), numpy.add.reduce(numpy.arange(6))*3./12.) + assert_equal(self.average(y, axis=0), numpy.arange(6) * 3./2.) + assert_equal(self.average(y, axis=1), [self.average(x,axis=0), self.average(x,axis=0) * 2.0]) + assert_equal(self.average(y, None, weights=w2), 20./6.) + assert_equal(self.average(y, axis=0, weights=w2), [0.,1.,2.,3.,4.,10.]) + assert_equal(self.average(y, axis=1), [self.average(x,axis=0), self.average(x,axis=0) * 2.0]) + m1 = self.zeros(6) + m2 = [0,0,1,1,0,0] + m3 = [[0,0,1,1,0,0],[0,1,1,1,1,0]] + m4 = self.ones(6) + m5 = [0, 1, 1, 1, 1, 1] + assert_equal(self.average(self.masked_array(x, m1),axis=0), 2.5) + assert_equal(self.average(self.masked_array(x, m2),axis=0), 2.5) + # assert(self.average(masked_array(x, m4),axis=0) is masked) + assert_equal(self.average(self.masked_array(x, m5),axis=0), 0.0) + assert_equal(self.count(self.average(self.masked_array(x, m4),axis=0)), 0) + z = self.masked_array(y, m3) + assert_equal(self.average(z, None), 20./6.) + assert_equal(self.average(z, axis=0), [0.,1.,99.,99.,4.0, 7.5]) + assert_equal(self.average(z, axis=1), [2.5, 5.0]) + assert_equal(self.average(z,axis=0, weights=w2), [0.,1., 99., 99., 4.0, 10.0]) + + +################################################################################ +if __name__ == '__main__': + setup_base = "from __main__ import moduletester\ntester = moduletester(module)" + setup_old = "import numpy.core.ma as module\n"+setup_base + setup_new = "import maskedarray as module\n"+setup_base + + nrepeat = 10 + nloop = 50 + print "#1"+50*'.' + old = timeit.Timer('tester.test_1()', setup_old).repeat(nrepeat, nloop*10) + new = timeit.Timer('tester.test_1()', setup_new).repeat(nrepeat, nloop*10) + old = numpy.sort(old) + new = numpy.sort(new) + print "numpy.core.ma: %.3f - %.3f" % (old[0], old[1]) + print "maskedarray : %.3f - %.3f" % (new[0], new[1]) + print "#2"+50*'.' + old = timeit.Timer('tester.test_2()', setup_old).repeat(nrepeat, nloop) + new = timeit.Timer('tester.test_2()', setup_new).repeat(nrepeat, nloop) + old = numpy.sort(old) + new = numpy.sort(new) + print "numpy.core.ma: %.3f - %.3f" % (old[0], old[1]) + print "maskedarray : %.3f - %.3f" % (new[0], new[1]) + print "#3"+50*'.' + old = timeit.Timer('tester.test_3()', setup_old).repeat(nrepeat, nloop) + new = timeit.Timer('tester.test_3()', setup_new).repeat(nrepeat, nloop) + old = numpy.sort(old) + new = numpy.sort(new) + print "numpy.core.ma: %.3f - %.3f" % (old[0], old[1]) + print "maskedarray : %.3f - %.3f" % (new[0], new[1]) + \ No newline at end of file From scipy-svn at scipy.org Sat Dec 23 10:09:29 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 23 Dec 2006 09:09:29 -0600 (CST) Subject: [Scipy-svn] r2465 - trunk/Lib/linsolve Message-ID: <20061223150929.99B9039C057@new.scipy.org> Author: jarrod.millman Date: 2006-12-23 09:09:27 -0600 (Sat, 23 Dec 2006) New Revision: 2465 Modified: trunk/Lib/linsolve/linsolve.py Log: added a docstring for linsolve.splu (see ticket #312) Modified: trunk/Lib/linsolve/linsolve.py =================================================================== --- trunk/Lib/linsolve/linsolve.py 2006-12-23 00:00:43 UTC (rev 2464) +++ trunk/Lib/linsolve/linsolve.py 2006-12-23 15:09:27 UTC (rev 2465) @@ -86,6 +86,10 @@ def splu(A, permc_spec=2, diag_pivot_thresh=1.0, drop_tol=0.0, relax=1, panel_size=10): + """ + A linear solver, for a square matrix A, using LU decomposition where + L is a lower triangular matrix and U is an upper triagular matrix. + """ M, N = A.shape if (M != N): raise ValueError, "can only factor square matrices" From scipy-svn at scipy.org Tue Dec 26 20:01:58 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 26 Dec 2006 19:01:58 -0600 (CST) Subject: [Scipy-svn] r2466 - trunk/Lib/optimize Message-ID: <20061227010158.52B4739C15E@new.scipy.org> Author: timl Date: 2006-12-26 19:01:53 -0600 (Tue, 26 Dec 2006) New Revision: 2466 Modified: trunk/Lib/optimize/optimize.py Log: increment iteration counter. Fixes ticket #283. Modified: trunk/Lib/optimize/optimize.py =================================================================== --- trunk/Lib/optimize/optimize.py 2006-12-23 15:09:27 UTC (rev 2465) +++ trunk/Lib/optimize/optimize.py 2006-12-27 01:01:53 UTC (rev 2466) @@ -1371,6 +1371,8 @@ v=w; w=x; x=u fv=fw; fw=fx; fx=fu + iter += 1 + xmin = x fval = fx if full_output: From scipy-svn at scipy.org Tue Dec 26 20:16:07 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 26 Dec 2006 19:16:07 -0600 (CST) Subject: [Scipy-svn] r2467 - trunk/Lib/optimize Message-ID: <20061227011607.4A59E39C172@new.scipy.org> Author: timl Date: 2006-12-26 19:16:03 -0600 (Tue, 26 Dec 2006) New Revision: 2467 Modified: trunk/Lib/optimize/optimize.py Log: add an option to specify the initial direction set used in fmin_powell. fixes ticket #284 Modified: trunk/Lib/optimize/optimize.py =================================================================== --- trunk/Lib/optimize/optimize.py 2006-12-27 01:01:53 UTC (rev 2466) +++ trunk/Lib/optimize/optimize.py 2006-12-27 01:16:03 UTC (rev 2467) @@ -1535,7 +1535,8 @@ def fmin_powell(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, - maxfun=None, full_output=0, disp=1, retall=0, callback=None): + maxfun=None, full_output=0, disp=1, retall=0, callback=None, + direc=None): """Minimize a function using modified Powell's method. Description: @@ -1551,6 +1552,7 @@ callback -- an optional user-supplied function to call after each iteration. It is called as callback(xk), where xk is the current parameter vector. + direc -- initial direction set Outputs: (xopt, {fopt, xi, direc, iter, funcalls, warnflag}, {allvecs}) @@ -1610,7 +1612,12 @@ if maxfun is None: maxfun = N * 1000 - direc = eye(N,dtype=float) + + if direc is None: + direc = eye(N, dtype=float) + else: + direc = asarray(direc, dtype=float) + fval = squeeze(func(x)) x1 = x.copy() iter = 0; From scipy-svn at scipy.org Sat Dec 30 15:59:42 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 30 Dec 2006 14:59:42 -0600 (CST) Subject: [Scipy-svn] r2468 - trunk/Lib/sandbox/maskedarray Message-ID: <20061230205942.DF97339C0D4@new.scipy.org> Author: pierregm Date: 2006-12-30 14:59:39 -0600 (Sat, 30 Dec 2006) New Revision: 2468 Modified: trunk/Lib/sandbox/maskedarray/CHANGELOG trunk/Lib/sandbox/maskedarray/core.py trunk/Lib/sandbox/maskedarray/extras.py Log: Modified: trunk/Lib/sandbox/maskedarray/CHANGELOG =================================================================== --- trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-27 01:16:03 UTC (rev 2467) +++ trunk/Lib/sandbox/maskedarray/CHANGELOG 2006-12-30 20:59:39 UTC (rev 2468) @@ -1,3 +1,5 @@ +#2006-12-30 : Core +# : - Cleaned up setitem/setslice w/ hard_mask=True #2006-12-22 : Core # : - Optimized(?) default_/maximum_/minimum_fill_value # : - Force __new__ to not return a MaskedArray, in order to ... Modified: trunk/Lib/sandbox/maskedarray/core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/core.py 2006-12-27 01:16:03 UTC (rev 2467) +++ trunk/Lib/sandbox/maskedarray/core.py 2006-12-30 20:59:39 UTC (rev 2468) @@ -73,8 +73,8 @@ import warnings import logging -logging.basicConfig(level=logging.WARNING, - format='%(levelname)s %(message)s',) +logging.basicConfig(level=logging.CRITICAL, + format='%(name)-15s %(levelname)s %(message)s',) MaskType = bool_ @@ -814,18 +814,18 @@ (hasattr(data,"_mask") and hasattr(data,"_data")) : if keep_mask: if mask is nomask: - cls.__defaultmask = data._mask + cls._defaultmask = data._mask else: - cls.__defaultmask = mask_or(data._mask, mask, + cls._defaultmask = mask_or(data._mask, mask, copy=copy, small_mask=small_mask) else: - cls.__defaultmask = make_mask(mask, copy=copy, small_mask=small_mask) + cls._defaultmask = make_mask(mask, copy=copy, small_mask=small_mask) # Update fille_value if fill_value is None: cls._fill_value = data._fill_value else: cls._fill_value = fill_value - cls.__defaulthardmask = hard_mask + cls._defaulthardmask = hard_mask _data = data._data if dtype is not None and _data.dtype != numeric.dtype(dtype): return _data.astype(dtype).view(cls) @@ -853,8 +853,7 @@ # cls.__defaultmask = getmask(_data) # return _data.view(cls) # Define mask ................. - if not is_mask(mask): - mask = make_mask(mask, small_mask=small_mask) + mask = make_mask(mask, small_mask=small_mask) #....Check shapes compatibility if mask is not nomask: (nd, nm) = (_data.size, mask.size) @@ -873,8 +872,8 @@ # mask = _data.shape #.... cls._fill_value = fill_value - cls.__defaulthardmask = hard_mask - cls.__defaultmask = mask + cls._defaulthardmask = hard_mask + cls._defaultmask = mask # logging.debug("__new__ returned %s as %s" % (type(_data), cls)) return numeric.asanyarray(_data).view(cls) #.................................. @@ -923,8 +922,8 @@ self._fill_value = obj._fill_value else: self._data = obj - self._mask = self.__defaultmask - self._hardmask = self.__defaulthardmask + self._mask = self._defaultmask + self._hardmask = self._defaulthardmask self.fill_value = self._fill_value # # # logging.debug("__finalize__ returned %s" % type(self)) @@ -1002,21 +1001,12 @@ valmask = getmask(value) if valmask is nomask: d[index] = valdata - else: + elif valmask.size > 1: dindx = d[index] numeric.putmask(dindx, ~valmask, valdata) d[index] = dindx numeric.putmask(mindx, valmask, True) m[index] = mindx -# else: -# mindx = m[index] -# value = masked_array(value, mask=mindx, keep_mask=True) -# valmask = getmask(value) -# if valmask is nomask: -# d[vindx] = filled(value) -# else: -# d[vindx[~valmask]] = filled(value[~valmask]) -# m[vindx[valmask]] = True #..... if not m.any(): self._mask = nomask @@ -1048,33 +1038,8 @@ if m is nomask: m = make_mask_none(d.shape) m[i:j] = True + self._mask = m return - #.... -# valmask = getmask(value) -# valdata = filled(value).astype(d.dtype) -# if valmask is nomask: -# if m is nomask: -# d[i:j] = valdata -# elif self._hardmask: -# d[(~m)[i:j]] = valdata -# elif not self._hardmask: -# d[i:j] = valdata -# m[i:j] = False -# if not m.any(): -# self._mask = nomask -# return -# else: -# if m is nomask: -# m = make_mask_none(d.shape) -# m[i:j] = valmask -# elif self._hardmask: -# m[i:j] = mask_or(m[i:j], valmask) -# else: -# m[i:j] = valmask -# d[(~m)[i:j]] = valdata -# if not m.any(): -# self._mask = nomask -# return #.... if m is nomask: valmask = getmask(value) @@ -1094,10 +1059,10 @@ else: mindx = m[i:j] value = masked_array(value, mask=mindx, keep_mask=True) - valmask = getmask(value) - if valmask is None: + valmask = value._mask + if valmask is nomask: d[i:j][~mindx] = filled(value) - else: + elif valmask.size > 1: d[i:j][~mindx] = value[~valmask] m[i:j][valmask] = True #..... @@ -1286,8 +1251,13 @@ Subclassing is preserved.""" if tc == self._data.dtype: return self - d = self._data.astype(tc) - return self.__class__(d, mask=self._mask, dtype=tc) + try: + return self.__class__(self, mask=self._mask, dtype=tc, copy=True) + except: + d = self._data.astype(tc) + return self.__class__(d, mask=self._mask, dtype=tc) +# +# #............................................ def harden_mask(self): "Forces the mask to hard" @@ -1395,14 +1365,12 @@ value = fill_value # if self is masked_singleton: - result = numeric.array(value) + result = numeric.asanyarray(value) else: + result = d.copy() try: -# result = numeric.array(d, dtype=d.dtype, copy=True) - result = d.copy() result[m] = value except (TypeError, AttributeError): - #ok, can't put that value in here value = numeric.array(value, dtype=object) d = d.astype(object) result = fromnumeric.choose(m, (d, value)) @@ -2157,6 +2125,7 @@ dtype=t, fill_value=f) #...................................... MaskedArray.conj = MaskedArray.conjugate = _arraymethod('conjugate') +MaskedArray.copy = MaskedArray.conjugate = _arraymethod('copy') MaskedArray.diagonal = _arraymethod('diagonal') MaskedArray.take = _arraymethod('take') MaskedArray.ravel = _arraymethod('ravel') Modified: trunk/Lib/sandbox/maskedarray/extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/extras.py 2006-12-27 01:16:03 UTC (rev 2467) +++ trunk/Lib/sandbox/maskedarray/extras.py 2006-12-30 20:59:39 UTC (rev 2468) @@ -58,15 +58,15 @@ def masked_all(shape, dtype=float_): """Returns an empty masked array of the given shape and dtype, where all the data are masked.""" - a = empty(shape, dtype) - a[:] = masked + a = masked_array(numeric.empty(shape, dtype), + mask=numeric.ones(shape, bool_)) return a def masked_all_like(arr): """Returns an empty masked array of the same shape and dtype as the array `a`, where all the data are masked.""" - a = empty_like(arr) - a[:] = masked + a = masked_array(numeric.empty_like(arr), + mask=numeric.ones(shape, bool_)) return a #####--------------------------------------------------------------------------