[Scipy-svn] r2931 - in trunk/Lib/sandbox/timeseries: lib src
scipy-svn at scipy.org
scipy-svn at scipy.org
Tue Apr 17 12:09:55 EDT 2007
Author: mattknox_ca
Date: 2007-04-17 11:09:47 -0500 (Tue, 17 Apr 2007)
New Revision: 2931
Modified:
trunk/Lib/sandbox/timeseries/lib/moving_funcs.py
trunk/Lib/sandbox/timeseries/src/cseries.c
Log:
added mov_median function
Modified: trunk/Lib/sandbox/timeseries/lib/moving_funcs.py
===================================================================
--- trunk/Lib/sandbox/timeseries/lib/moving_funcs.py 2007-04-17 13:24:22 UTC (rev 2930)
+++ trunk/Lib/sandbox/timeseries/lib/moving_funcs.py 2007-04-17 16:09:47 UTC (rev 2931)
@@ -21,9 +21,10 @@
from maskedarray import MaskedArray, nomask, getmask, getmaskarray, masked
marray = MA.array
-from timeseries.cseries import MA_mov_stddev, MA_mov_sum, MA_mov_average
+from timeseries.cseries import MA_mov_stddev, MA_mov_sum, MA_mov_average, \
+ MA_mov_median
-__all__ = ['mov_sum',
+__all__ = ['mov_sum', 'mov_median',
'mov_average', 'mov_mean', 'mov_average_expw',
'mov_stddev', 'mov_var', 'mov_sample_stddev', 'mov_sample_var',
'cmov_average', 'cmov_mean', 'cmov_window'
@@ -52,6 +53,16 @@
result_dict = MA_mov_sum(**kwargs)
return _process_result_dict(data, result_dict)
+def mov_median(data, window_size, dtype=None):
+ kwargs = {'array':data,
+ 'window_size':window_size}
+
+ if dtype is not None:
+ kwargs['dtype'] = dtype
+
+ result_dict = MA_mov_median(**kwargs)
+ return _process_result_dict(data, result_dict)
+
def mov_average(data, window_size, dtype=None):
kwargs = {'array':data,
'window_size':window_size}
Modified: trunk/Lib/sandbox/timeseries/src/cseries.c
===================================================================
--- trunk/Lib/sandbox/timeseries/src/cseries.c 2007-04-17 13:24:22 UTC (rev 2930)
+++ trunk/Lib/sandbox/timeseries/src/cseries.c 2007-04-17 16:09:47 UTC (rev 2931)
@@ -2989,15 +2989,16 @@
}
-static PyObject *NP_ADD, *NP_MULTIPLY, *NP_SUBTRACT, *NP_SQRT;
+static PyObject *NP_ADD, *NP_MULTIPLY, *NP_SUBTRACT, *NP_SQRT,
+ *NP_GREATER, *NP_GREATER_EQUAL;
/* computation portion of moving sum. Appropriate mask is overlayed on top
afterwards */
-static PyArrayObject*
+static PyObject*
calc_mov_sum(PyArrayObject *orig_ndarray, int window_size, int rtype)
{
PyArrayObject *result_ndarray=NULL;
- int i, valid_points=0;
+ int i;
result_ndarray = (PyArrayObject*)PyArray_ZEROS(
orig_ndarray->nd,
@@ -3011,9 +3012,8 @@
val = PyArray_GETITEM(orig_ndarray, PyArray_GetPtr(orig_ndarray, &i));
- if (valid_points == 0) {
+ if (i == 0) {
mov_sum_val = val;
- valid_points += 1;
} else {
int prev_idx = i-1;
PyObject *mov_sum_prevval;
@@ -3024,7 +3024,7 @@
Py_DECREF(mov_sum_prevval);
ERR_CHECK(mov_sum_val)
- if (valid_points == window_size) {
+ if (i >= (window_size - 1)) {
PyObject *temp_val, *rem_val;
int rem_idx = i-window_size;
temp_val = mov_sum_val;
@@ -3039,9 +3039,6 @@
Py_DECREF(temp_val);
Py_DECREF(rem_val);
-
- } else {
- valid_points += 1;
}
}
@@ -3052,7 +3049,7 @@
Py_DECREF(mov_sum_val);
}
- return result_ndarray;
+ return (PyObject*)result_ndarray;
}
@@ -3078,7 +3075,7 @@
rtype = _CHKTYPENUM(dtype);
- result_ndarray = calc_mov_sum(orig_ndarray, window_size, rtype);
+ result_ndarray = (PyArrayObject*)calc_mov_sum(orig_ndarray, window_size, rtype);
ERR_CHECK(result_ndarray)
result_dict = PyDict_New();
@@ -3102,8 +3099,6 @@
PyArray_Descr *dtype=NULL;
- int *raw_result_mask;
-
int rtype, window_size;
static char *kwlist[] = {"array", "window_size", "dtype", NULL};
@@ -3119,7 +3114,7 @@
rtype = _get_type_num_double(orig_ndarray->descr, dtype);
- mov_sum = calc_mov_sum(orig_ndarray, window_size, rtype);
+ mov_sum = (PyArrayObject*)calc_mov_sum(orig_ndarray, window_size, rtype);
ERR_CHECK(mov_sum)
denom = PyFloat_FromDouble(1.0/(double)(window_size));
@@ -3142,6 +3137,281 @@
return result_dict;
}
+static PyObject*
+np_add(PyObject *left_val, PyObject *right_val) {
+
+ PyObject *result;
+
+ result = PyObject_CallFunction(
+ NP_ADD, "OO",
+ (PyArrayObject*)left_val,
+ right_val);
+ return result;
+}
+
+static PyObject*
+np_subtract(PyObject *left_val, PyObject *right_val) {
+
+ PyObject *result;
+
+ result = PyObject_CallFunction(
+ NP_SUBTRACT, "OO",
+ (PyArrayObject*)left_val,
+ right_val);
+ return result;
+}
+
+static PyObject*
+np_multiply(PyObject *left_val, PyObject *right_val) {
+
+ PyObject *result;
+
+ result = PyObject_CallFunction(
+ NP_MULTIPLY, "OO",
+ (PyArrayObject*)left_val,
+ right_val);
+ return result;
+}
+
+static int np_greater(PyObject *left_val, PyObject *right_val) {
+
+ PyObject *temp;
+ int result;
+
+ temp = PyObject_CallFunction(
+ NP_GREATER, "OO",
+ (PyArrayObject*)left_val,
+ right_val);
+
+ result = (int)PyInt_AsLong(temp);
+ Py_DECREF(temp);
+ return result;
+}
+
+static int np_greater_equal(PyObject *left_val, PyObject *right_val) {
+
+ PyObject *temp;
+ int result;
+
+ temp = PyObject_CallFunction(
+ NP_GREATER_EQUAL, "OO",
+ (PyArrayObject*)left_val,
+ right_val);
+
+ result = (int)PyInt_AsLong(temp);
+ Py_DECREF(temp);
+ return result;
+}
+
+/* computation portion of moving median. Appropriate mask is overlayed on top
+ afterwards.
+
+ The algorithm used here is based on the code found at:
+ http://cran.r-project.org/src/contrib/Devel/runStat_1.1.tar.gz
+
+ This code was originally released under the GPL, but the author
+ (David Brahm) has granted me (and scipy) permission to use it under the BSD
+ license. */
+static PyObject*
+calc_mov_median(PyArrayObject *orig_ndarray, int window_size, int rtype)
+{
+ PyArrayObject *result_ndarray=NULL;
+ PyObject **result_array, **ref_array, **even_array=NULL;
+ PyObject *new_val, *old_val, *temp;
+ PyObject *temp_add, *one_half;
+ int a, i, k, R, arr_size, z, is_odd;
+ int *r;
+
+ arr_size = orig_ndarray->dimensions[0];
+
+ result_ndarray = (PyArrayObject*)PyArray_ZEROS(
+ orig_ndarray->nd,
+ orig_ndarray->dimensions,
+ rtype, 0);
+ ERR_CHECK(result_ndarray)
+
+ if (arr_size >= window_size) {
+ result_array = calloc(arr_size, sizeof(PyObject*));
+ MEM_CHECK(result_array)
+
+ /* this array will be used for quick access to the data in the original
+ array (so PyArray_GETITEM doesn't have to be used over and over in the
+ main loop) */
+ ref_array = malloc(arr_size * sizeof(PyObject*));
+ MEM_CHECK(ref_array)
+
+ for (i=0; i<arr_size; i++) {
+ ref_array[i] = PyArray_GETITEM(orig_ndarray, PyArray_GetPtr(orig_ndarray, &i));
+ }
+
+ /* this array wll be used for keeping track of the "ranks" of the values
+ in the current window */
+ r = malloc(window_size * sizeof(int));
+ MEM_CHECK(r)
+
+ for (i=0; i < window_size; i++) {
+ r[i] = 1;
+ }
+
+ if ((window_size % 2) == 0) {
+ // array to store two median values when window_size is an even #
+ even_array = calloc(2, sizeof(PyObject*));
+ MEM_CHECK(even_array)
+ }
+
+ R = (window_size + 1)/2;
+ one_half = PyFloat_FromDouble(0.5);
+
+ z = arr_size - window_size;
+
+ //printf("yep 1: %f, %f\n", PyFloat_AsDouble(data_array[z+i]), PyFloat_AsDouble(data_array[z+k]));
+
+ /* Calculate initial ranks "r" */
+ for (i=0; i < window_size; i++) {
+
+ for (k=0; k < i; k++) {
+ if (np_greater_equal(ref_array[z+i], ref_array[z+k])) {
+ r[i]++;
+ }
+ }
+ for (k=i+1; k < window_size; k++) {
+ if (np_greater(ref_array[z+i], ref_array[z+k])) {
+ r[i]++;
+ }
+ }
+
+ /* If rank=R, this is the median */
+ if (even_array != NULL) {
+ if (r[i]==R) {
+ even_array[0] = ref_array[z+i];
+ } else if (r[i] == (R+1)) {
+ even_array[1] = ref_array[z+i];
+ }
+ } else {
+ if (r[i]==R) {
+ result_array[arr_size-1] = ref_array[z+i];
+ }
+ }
+ }
+
+ if (even_array != NULL) {
+ temp_add = np_add(even_array[0], even_array[1]);
+ result_array[arr_size-1] = np_multiply(temp_add, one_half);
+ Py_DECREF(temp_add);
+ }
+
+ for (i=arr_size-2; i >= window_size-1; i--) {
+ a = window_size;
+ z = i - window_size + 1;
+ old_val = ref_array[i+1];
+ new_val = ref_array[i-window_size+1];
+
+ for (k=window_size-1; k > 0; k--) {
+ r[k] = r[k-1]; /* Shift previous iteration's ranks */
+ if (np_greater_equal(ref_array[z+k], new_val)) {r[k]++; a--;}
+ if (np_greater(ref_array[z+k], old_val)) {r[k]--;}
+
+ if (r[k]==R) {
+ result_array[i] = ref_array[z+k];
+ }
+
+ if (even_array != NULL) {
+ if (r[k]==R) {
+ even_array[0] = ref_array[z+k];
+ } else if (r[k] == (R+1)) {
+ even_array[1] = ref_array[z+k];
+ }
+ } else {
+ if (r[k]==R) {
+ result_array[i] = ref_array[z+k];
+ }
+ }
+
+ }
+
+ r[0] = a;
+
+ if (even_array != NULL) {
+ if (a==R) {
+ even_array[0] = new_val;
+ } else if (a == (R+1)) {
+ even_array[1] = new_val;
+ }
+
+ temp_add = np_add(even_array[0], even_array[1]);
+ result_array[i] = np_multiply(temp_add, one_half);;
+ Py_DECREF(temp_add);
+
+ } else {
+ if (a==R) {
+ result_array[i] = new_val;
+ }
+ }
+
+ }
+
+ Py_DECREF(one_half);
+
+ for (i=window_size-1; i<arr_size; i++) {
+ PyArray_SETITEM(result_ndarray,
+ PyArray_GetPtr(result_ndarray, &i),
+ result_array[i]);
+ }
+
+ for (i=0; i<arr_size; i++) {
+ Py_DECREF(ref_array[i]);
+ }
+
+ if (even_array != NULL) {
+ for (i=window_size-1; i<arr_size; i++) {
+ Py_DECREF(result_array[i]);
+ }
+ }
+
+ free(ref_array);
+ free(result_array);
+ }
+
+ return (PyObject*)result_ndarray;
+
+}
+
+static char MaskedArray_mov_median_doc[] = "";
+static PyObject *
+MaskedArray_mov_median(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ PyObject *orig_arrayobj=NULL, *result_dict=NULL;
+ PyArrayObject *orig_ndarray=NULL, *result_ndarray=NULL, *result_mask=NULL;
+ PyArray_Descr *dtype=NULL;
+
+ int rtype, window_size;
+
+ static char *kwlist[] = {"array", "window_size", "dtype", NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds,
+ "Oi|O&:mov_median(array, window_size, dtype)", kwlist,
+ &orig_arrayobj, &window_size,
+ PyArray_DescrConverter2, &dtype)) return NULL;
+
+ check_mov_args(orig_arrayobj, window_size, 1,
+ &orig_ndarray, &result_mask);
+
+ rtype = _CHKTYPENUM(dtype);
+
+ result_ndarray = (PyArrayObject*)calc_mov_median(orig_ndarray, window_size, rtype);
+ ERR_CHECK(result_ndarray)
+
+ result_dict = PyDict_New();
+ MEM_CHECK(result_dict)
+ PyDict_SetItemString(result_dict, "array", (PyObject*)result_ndarray);
+ PyDict_SetItemString(result_dict, "mask", (PyObject*)result_mask);
+
+ Py_DECREF(result_ndarray);
+ Py_DECREF(result_mask);
+ return result_dict;
+}
+
+
static char MaskedArray_mov_stddev_doc[] = "";
static PyObject *
MaskedArray_mov_stddev(PyObject *self, PyObject *args, PyObject *kwds)
@@ -3154,8 +3424,6 @@
PyArray_Descr *dtype=NULL;
- int *raw_result_mask;
-
int rtype, window_size, is_variance, is_sample;
static char *kwlist[] = {"array", "window_size", "is_variance", "is_sample", "dtype", NULL};
@@ -3171,7 +3439,7 @@
rtype = _get_type_num_double(orig_ndarray->descr, dtype);
- mov_sum = calc_mov_sum(orig_ndarray, window_size, rtype);
+ mov_sum = (PyArrayObject*)calc_mov_sum(orig_ndarray, window_size, rtype);
ERR_CHECK(mov_sum)
result_temp1 = (PyArrayObject*)PyObject_CallFunction(
@@ -3179,7 +3447,7 @@
orig_ndarray, (PyObject*)orig_ndarray);
ERR_CHECK(result_temp1)
- mov_sum_sq = calc_mov_sum(result_temp1, window_size, rtype);
+ mov_sum_sq = (PyArrayObject*)calc_mov_sum(result_temp1, window_size, rtype);
Py_DECREF(result_temp1);
ERR_CHECK(mov_sum_sq)
@@ -3386,6 +3654,8 @@
{"MA_mov_sum", (PyCFunction)MaskedArray_mov_sum,
METH_VARARGS | METH_KEYWORDS, MaskedArray_mov_sum_doc},
+ {"MA_mov_median", (PyCFunction)MaskedArray_mov_median,
+ METH_VARARGS | METH_KEYWORDS, MaskedArray_mov_median_doc},
{"MA_mov_average", (PyCFunction)MaskedArray_mov_average,
METH_VARARGS | METH_KEYWORDS, MaskedArray_mov_average_doc},
{"MA_mov_stddev", (PyCFunction)MaskedArray_mov_stddev,
@@ -3439,11 +3709,15 @@
NP_MULTIPLY = PyDict_GetItemString(ops_dict, "multiply");
NP_SUBTRACT = PyDict_GetItemString(ops_dict, "subtract");
NP_SQRT = PyDict_GetItemString(ops_dict, "sqrt");
+ NP_GREATER = PyDict_GetItemString(ops_dict, "greater");
+ NP_GREATER_EQUAL = PyDict_GetItemString(ops_dict, "greater_equal");
Py_INCREF(NP_ADD);
Py_INCREF(NP_MULTIPLY);
Py_INCREF(NP_SUBTRACT);
Py_INCREF(NP_SQRT);
+ Py_INCREF(NP_GREATER);
+ Py_INCREF(NP_GREATER_EQUAL);
Py_DECREF(ops_dict);
Py_INCREF(&DateType);
More information about the Scipy-svn
mailing list