[Numpy-discussion] NumPy re-factoring project

Thu Jun 10 21:02:20 EDT 2010

On Thu, Jun 10, 2010 at 6:27 PM, Sturla Molden <sturla at molden.no> wrote:

>  Den 11.06.2010 00:57, skrev David Cournapeau:
>
>  Do you have the code for this ? That's something I wanted to do, but
> never took the time to do. Faster generic iterator would be nice, but
> very hard to do in general.
>
>
>
>
>
> /* this computes the start adress for every vector along a dimension
> (axis) of an ndarray */
>
> typedef PyArrayObject ndarray;
>
> inline char *datapointer(ndarray *a, npy_intp *indices)
> {
>     char *data = a->data;
>     int i;
>     npy_intp j;
>     for (i=0; i < a->nd; i++) {
>         j = indices[i];
>         data += j * a->strides[i];
>     }
>     return data;
> }
>
> static double ** _get_axis_pointer(npy_intp *indices, int axis,
>                            ndarray *a, double **axis_ptr_array, int dim)
> {
>     /* recursive axis pointer search for 4 dimensions or more */
>     npy_intp i;
>     double *axis_ptr;
>     if (dim == a->nd) {
>         /* done recursing dimensions,
>             compute address of this axis */
>         axis_ptr = (double *) datapointer(a, indices);
>         *axis_ptr_array = axis_ptr;
>         return (axis_ptr_array + 1);
>     } else if (dim == axis) {
>         /* use first element only */
>         indices[dim] = 0;
>         axis_ptr_array = _get_axis_pointer(indices, axis, a,
> axis_ptr_array, dim+1);
>         return axis_ptr_array;
>     } else {
>         /* iterate range of indices */
>         npy_intp length = a->dimensions[dim];
>         for (i=0; i < length; i++) {
>             indices[dim] = i;
>             axis_ptr_array = _get_axis_pointer(indices, axis, a,
> axis_ptr_array, dim+1);
>         }
>         return axis_ptr_array;
>     }
> }
>
>
> static double **get_axis_pointers(ndarray *a, int axis, npy_intp *count)
> {
>     npy_intp indices[NPY_MAXDIMS];
>     double **out, **tmp;
>     npy_intp i, j;
>     j = 1;
>     for (i=0; i < a->nd; i++) {
>         if (i != axis)
>             j *= a->dimensions[i];
>     }
>     *count = j;
>
>     out = (double **) malloc(*count * sizeof(double*));
>     if (out == NULL) {
>         *count = 0;
>         return NULL;
>     }
>     tmp = out;
>     switch (a->nd) {
>         /* for one dimension we just return the data pointer */
>         case 1:
>             *tmp = (double *)a->data;
>             break;
>         /* specialized for two dimensions */
>         case 2:
>             switch (axis) {
>                 case 0:
>                     for (i=0; i<a->dimensions[1]; i++)
>                         *tmp++ = (double *)(a->data + i*a->strides[1]);
>                     break;
>
>                 case 1:
>                     for (i=0; i<a->dimensions[0]; i++)
>                         *tmp++ = (double *)(a->data + i*a->strides[0]);
>                     break;
>             }
>             break;
>         /* specialized for three dimensions */
>         case 3:
>             switch (axis) {
>                 case 0:
>                     for (i=0; i<a->dimensions[1]; i++)
>                         for (j=0; j<a->dimensions[2]; j++)
>                             *tmp++ = (double *)(a->data + i*a->strides[1] +
> j*a->strides[2]);
>                     break;
>                 case 1:
>                     for (i=0; i<a->dimensions[0]; i++)
>                         for (j=0; j<a->dimensions[2]; j++)
>                             *tmp++ = (double *)(a->data + i*a->strides[0] +
> j*a->strides[2]);
>                     break;
>
>                 case 2:
>                     for (i=0; i<a->dimensions[0]; i++)
>                         for (j=0; j<a->dimensions[1]; j++)
>                             *tmp++ = (double *)(a->data + i*a->strides[0] +
> j*a->strides[1]);
>
>             }
>             break;
>         /* four or more dimensions: use recursion */
>         default:
>             for (i=0; i<a->nd; i++) indices[i] = 0;
>             _get_axis_pointer(indices, axis, a, out, 0);
>
>     }
> done:
>     return out;
>
> }
>
>
>   Another thing I did when reimplementing lfilter was "copy-in copy-out"
> for strided arrays.
>
>
>  What is copy-in copy out ? I am not familiar with this term ?
>
>
>
>
> Strided memory access is slow. So it often helps to make a temporary copy
> that are contiguous.
>
>
But for an initial refactoring it probably falls in the category of
premature optimization. Another thing to avoid on the first go around is
micro-optimization, as it tends to complicate the code and often doesn't do
much for performance.

<snip>

Chuck
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/numpy-discussion/attachments/20100610/39a10e21/attachment.html>