[Numpy-svn] r6089 - in trunk/numpy/core: . code_generators src

numpy-svn at scipy.org numpy-svn at scipy.org
Fri Nov 21 20:29:04 EST 2008


Author: charris
Date: 2008-11-21 19:28:52 -0600 (Fri, 21 Nov 2008)
New Revision: 6089

Added:
   trunk/numpy/core/src/umath_funcs_c99.inc.src
   trunk/numpy/core/src/umath_ufunc_object.inc
Removed:
   trunk/numpy/core/src/math_c99.inc.src
   trunk/numpy/core/src/ufuncobject.c
Modified:
   trunk/numpy/core/SConscript
   trunk/numpy/core/code_generators/genapi.py
   trunk/numpy/core/setup.py
   trunk/numpy/core/src/umathmodule.c.src
Log:
Merge branch 'ufunc'

Conflicts:

	numpy/core/code_generators/genapi.py

Modified: trunk/numpy/core/SConscript
===================================================================
--- trunk/numpy/core/SConscript	2008-11-21 21:50:17 UTC (rev 6088)
+++ trunk/numpy/core/SConscript	2008-11-22 01:28:52 UTC (rev 6089)
@@ -251,7 +251,7 @@
 # Generate generated code
 #------------------------
 scalartypes_src = env.GenerateFromTemplate(pjoin('src', 'scalartypes.inc.src'))
-math_c99_src = env.GenerateFromTemplate(pjoin('src', 'math_c99.inc.src'))
+math_c99_src = env.GenerateFromTemplate(pjoin('src', 'umath_funcs_c99.inc.src'))
 arraytypes_src = env.GenerateFromTemplate(pjoin('src', 'arraytypes.inc.src'))
 sortmodule_src = env.GenerateFromTemplate(pjoin('src', '_sortmodule.c.src'))
 umathmodule_src = env.GenerateFromTemplate(pjoin('src', 'umathmodule.c.src'))

Modified: trunk/numpy/core/code_generators/genapi.py
===================================================================
--- trunk/numpy/core/code_generators/genapi.py	2008-11-21 21:50:17 UTC (rev 6088)
+++ trunk/numpy/core/code_generators/genapi.py	2008-11-22 01:28:52 UTC (rev 6089)
@@ -17,7 +17,7 @@
              'arraytypes.inc.src',
              'multiarraymodule.c',
              'scalartypes.inc.src',
-             'ufuncobject.c',
+             'umath_ufunc_object.inc',
              'umathmodule.c.src'
             ]
 THIS_DIR = os.path.dirname(__file__)

Modified: trunk/numpy/core/setup.py
===================================================================
--- trunk/numpy/core/setup.py	2008-11-21 21:50:17 UTC (rev 6088)
+++ trunk/numpy/core/setup.py	2008-11-22 01:28:52 UTC (rev 6089)
@@ -357,9 +357,9 @@
                                     generate_ufunc_api,
                                     join('src','scalartypes.inc.src'),
                                     join('src','arraytypes.inc.src'),
-                                    join('src','math_c99.inc.src'),
+                                    join('src','umath_funcs_c99.inc.src'),
                                     ],
-                         depends = [join('src','ufuncobject.c'),
+                         depends = [join('src','umath_ufunc_object.inc'),
                                     generate_umath_py,
                                     join(codegen_dir,'generate_ufunc_api.py'),
                                     ]+deps,

Deleted: trunk/numpy/core/src/math_c99.inc.src
===================================================================
--- trunk/numpy/core/src/math_c99.inc.src	2008-11-21 21:50:17 UTC (rev 6088)
+++ trunk/numpy/core/src/math_c99.inc.src	2008-11-22 01:28:52 UTC (rev 6089)
@@ -1,377 +0,0 @@
-/*
- * vim:syntax=c
- * A small module to implement missing C99 math capabilities required by numpy
- *
- * Please keep this independant of python !
- *
- * How to add a function to this section
- * -------------------------------------
- *
- * Say you want to add `foo`, these are the steps and the reasons for them.
- *
- * 1) Add foo to the appropriate list in the configuration system. The
- *    lists can be found in numpy/core/setup.py lines 63-105. Read the
- *    comments that come with them, they are very helpful.
- *
- * 2) The configuration system will define a macro HAVE_FOO if your function
- *    can be linked from the math library. The result can depend on the
- *    optimization flags as well as the compiler, so can't be known ahead of
- *    time. If the function can't be linked, then either it is absent, defined
- *    as a macro, or is an intrinsic (hardware) function. If it is linkable it
- *    may still be the case that no prototype is available. So to cover all the
- *    cases requires the following construction.
- *
- *    i) Undefine any possible macros:
- *
- *    #ifdef foo
- *    #undef foo
- *    #endif
- *
- *    ii) Check if the function was in the library, If not, define the
- *    function with npy_ prepended to its name to avoid conflict with any
- *    intrinsic versions, then use a define so that the preprocessor will
- *    replace foo with npy_foo before the compilation pass. Make the
- *    function static to avoid poluting the module library.
- *
- *    #ifdef foo
- *    #undef foo
- *    #endif
- *    #ifndef HAVE_FOO
- *    static double
- *    npy_foo(double x)
- *    {
- *        return x;
- *    }
- *    #define foo npy_foo
- *
- *    iii) Finally, even if foo is in the library, add a prototype. Just being
- *    in the library doesn't guarantee a prototype in math.h, and in any case
- *    you want to make sure the prototype is what you think it is. Count on it,
- *    whatever can go wrong will go wrong. Think defensively! The result:
- *
- *    #ifdef foo
- *    #undef foo
- *    #endif
- *    #ifndef HAVE_FOO
- *    static double
- *    npy_foo(double x)
- *    {
- *        return x;
- *    }
- *    #define foo npy_foo
- *    #else
- *    double foo(double x);
- *    #end
- *
- *    And there you have it.
- *
- */
-
-/*
- *****************************************************************************
- **                     DISTRO VOODOO                                       **
- *****************************************************************************
- */
-
-
-/*
- *****************************************************************************
- **                     BASIC MATH FUNCTIONS                                **
- *****************************************************************************
- */
-
-/* Original code by Konrad Hinsen.  */
-#ifndef HAVE_EXPM1
-static double
-npy_expm1(double x)
-{
-    double u = exp(x);
-    if (u == 1.0) {
-        return x;
-    } else if (u-1.0 == -1.0) {
-        return -1;
-    } else {
-        return (u-1.0) * x/log(u);
-    }
-}
-#define expm1 npy_expm1
-#else
-double expm1(double x);
-#endif
-
-#ifndef HAVE_LOG1P
-static double
-npy_log1p(double x)
-{
-    double u = 1. + x;
-    if (u == 1.0) {
-        return x;
-    } else {
-        return log(u) * x / (u - 1);
-    }
-}
-#define log1p npy_log1p
-#else
-double log1p(double x);
-#endif
-
-#ifndef HAVE_HYPOT
-static double
-npy_hypot(double x, double y)
-{
-    double yx;
-
-    x = fabs(x);
-    y = fabs(y);
-    if (x < y) {
-        double temp = x;
-        x = y;
-        y = temp;
-    }
-    if (x == 0.)
-        return 0.;
-    else {
-        yx = y/x;
-        return x*sqrt(1.+yx*yx);
-    }
-}
-#define hypot npy_hypot
-#else
-double hypot(double x, double y);
-#endif
-
-#ifndef HAVE_ACOSH
-static double
-npy_acosh(double x)
-{
-    return 2*log(sqrt((x+1.0)/2)+sqrt((x-1.0)/2));
-}
-#define acosh npy_acosh
-#else
-double acosh(double x);
-#endif
-
-#ifndef HAVE_ASINH
-static double
-npy_asinh(double xx)
-{
-    double x, d;
-    int sign;
-    if (xx < 0.0) {
-        sign = -1;
-        x = -xx;
-    }
-    else {
-        sign = 1;
-        x = xx;
-    }
-    if (x > 1e8) {
-        d = x;
-    } else {
-        d = sqrt(x*x + 1);
-    }
-    return sign*log1p(x*(1.0 + x/(d+1)));
-}
-#define asinh npy_asinh
-#else
-double asinh(double xx);
-#endif
-
-#ifndef HAVE_ATANH
-static double
-npy_atanh(double x)
-{
-    return 0.5*log1p(2.0*x/(1.0-x));
-}
-#define atanh npy_atanh
-#else
-double atanh(double x);
-#endif
-
-#ifndef HAVE_RINT
-static double
-npy_rint(double x)
-{
-    double y, r;
-
-    y = floor(x);
-    r = x - y;
-
-    if (r > 0.5) goto rndup;
-
-    /* Round to nearest even */
-    if (r==0.5) {
-        r = y - 2.0*floor(0.5*y);
-        if (r==1.0) {
-        rndup:
-            y+=1.0;
-        }
-    }
-    return y;
-}
-#define rint npy_rint
-#else
-double rint(double x);
-#endif
-
-#ifndef HAVE_TRUNC
-static double
-npy_trunc(double x)
-{
-    return x < 0 ? ceil(x) : floor(x);
-}
-#define trunc npy_trunc
-#else
-double trunc(double x);
-#endif
-
-#ifndef HAVE_EXP2
-#define LOG2 0.69314718055994530943
-static double
-npy_exp2(double x)
-{
-    return exp(LOG2*x);
-}
-#define exp2 npy_exp2
-#undef LOG2
-#else
-double exp2(double x);
-#endif
-
-#ifndef HAVE_LOG2
-#define INVLOG2 1.4426950408889634074
-static double
-npy_log2(double x)
-{
-    return INVLOG2*log(x);
-}
-#define log2 npy_log2
-#undef INVLOG2
-#else
-double log2(double x);
-#endif
-
-/*
- *****************************************************************************
- **                     IEEE 754 FPU HANDLING                               **
- *****************************************************************************
- */
-#if !defined(HAVE_DECL_ISNAN)
-    # define isnan(x) ((x) != (x))
-#endif
-
-/* VS 2003 with /Ox optimizes (x)-(x) to 0, which is not IEEE compliant. So we
- * force (x) + (-x), which seems to work. */
-#if !defined(HAVE_DECL_ISFINITE)
-    # define isfinite(x) !isnan((x) + (-x))
-#endif
-
-#if !defined(HAVE_DECL_ISINF)
-#define isinf(x) (!isfinite(x) && !isnan(x))
-#endif
-
-#if !defined(HAVE_DECL_SIGNBIT)
-    #include "_signbit.c"
-    # define signbit(x) \
-              (sizeof (x) == sizeof (long double) ? signbit_ld (x) \
-               : sizeof (x) == sizeof (double) ? signbit_d (x) \
-               : signbit_f (x))
-
-static int signbit_f (float x)
-{
-    return signbit_d((double)x);
-}
-
-static int signbit_ld (long double x)
-{
-    return signbit_d((double)x);
-}
-#endif
-
-/*
- * if C99 extensions not available then define dummy functions that use the
- * double versions for
- *
- * sin, cos, tan
- * sinh, cosh, tanh,
- * fabs, floor, ceil, rint, trunc
- * sqrt, log10, log, exp, expm1
- * asin, acos, atan,
- * asinh, acosh, atanh
- *
- * hypot, atan2, pow, fmod, modf
- *
- * We assume the above are always available in their double versions.
- *
- * NOTE: some facilities may be available as macro only  instead of functions.
- * For simplicity, we define our own functions and undef the macros. We could
- * instead test for the macro, but I am lazy to do that for now.
- */
-
-/**begin repeat
- * #type = longdouble, float#
- * #TYPE = LONGDOUBLE, FLOAT#
- * #c = l,f#
- * #C = L,F#
- */
-
-/**begin repeat1
- * #kind = sin,cos,tan,sinh,cosh,tanh,fabs,floor,ceil,rint,trunc,sqrt,log10,
- *         log,exp,expm1,asin,acos,atan,asinh,acosh,atanh,log1p,exp2,log2#
- * #KIND = SIN,COS,TAN,SINH,COSH,TANH,FABS,FLOOR,CEIL,RINT,TRUNC,SQRT,LOG10,
- *         LOG,EXP,EXPM1,ASIN,ACOS,ATAN,ASINH,ACOSH,ATANH,LOG1P,EXP2,LOG2#
- */
-
-#ifdef @kind@@c@
-#undef @kind@@c@
-#endif
-#ifndef HAVE_ at KIND@@C@
-static @type@
-npy_ at kind@@c@(@type@ x)
-{
-    return (@type@) @kind@((double)x);
-}
-#define @kind@@c@  npy_ at kind@@c@
-#else
- at type@ @kind@@c@(@type@ x);
-#endif
-
-/**end repeat1**/
-
-/**begin repeat1
- * #kind = atan2,hypot,pow,fmod#
- * #KIND = ATAN2,HYPOT,POW,FMOD#
- */
-#ifdef @kind@@c@
-#undef @kind@@c@
-#endif
-#ifndef HAVE_ at KIND@@C@
-static @type@
-npy_ at kind@@c@(@type@ x, @type@ y)
-{
-    return (@type@) @kind@((double)x, (double) y);
-}
-#define @kind@@c@  npy_ at kind@@c@
-#else
- at type@ @kind@@c@(@type@ x, @type@ y);
-#endif
-/**end repeat1**/
-
-#ifdef modf at c@
-#undef modf at c@
-#endif
-#ifndef HAVE_MODF at C@
-static @type@
-npy_modf at c@(@type@ x, @type@ *iptr)
-{
-    double niptr;
-    double y = modf((double)x, &niptr);
-    *iptr = (@type@) niptr;
-    return (@type@) y;
-}
-#define modf at c@ npy_modf at c@
-#else
- at type@ modf at c@(@type@ x, @type@ *iptr);
-#endif
-
-/**end repeat**/

Deleted: trunk/numpy/core/src/ufuncobject.c
===================================================================
--- trunk/numpy/core/src/ufuncobject.c	2008-11-21 21:50:17 UTC (rev 6088)
+++ trunk/numpy/core/src/ufuncobject.c	2008-11-22 01:28:52 UTC (rev 6089)
@@ -1,4134 +0,0 @@
-/*
- * Python Universal Functions Object -- Math for all types, plus fast
- * arrays math
- *
- * Full description
- *
- * This supports mathematical (and Boolean) functions on arrays and other python
- * objects.  Math on large arrays of basic C types is rather efficient.
- *
- * Travis E. Oliphant  2005, 2006 oliphant at ee.byu.edu (oliphant.travis at ieee.org)
- * Brigham Young University
- *
- * based on the
- *
- * Original Implementation:
- * Copyright (c) 1995, 1996, 1997 Jim Hugunin, hugunin at mit.edu
- *
- * with inspiration and code from
- * Numarray
- * Space Science Telescope Institute
- * J. Todd Miller
- * Perry Greenfield
- * Rick White
- *
- */
-
-
-#define USE_USE_DEFAULTS 1
-
-
-
-
-/* ---------------------------------------------------------------- */
-
-
-/* fpstatus is the ufunc_formatted hardware status
-   errmask is the handling mask specified by the user.
-   errobj is a Python object with (string, callable object or None)
-   or NULL
-*/
-
-/*
-  2. for each of the flags
-  determine whether to ignore, warn, raise error, or call Python function.
-  If ignore, do nothing
-  If warn, print a warning and continue
-  If raise return an error
-  If call, call a user-defined function with string
-*/
-
-static int
-_error_handler(int method, PyObject *errobj, char *errtype, int retstatus, int *first)
-{
-    PyObject *pyfunc, *ret, *args;
-    char *name=PyString_AS_STRING(PyTuple_GET_ITEM(errobj,0));
-    char msg[100];
-
-    ALLOW_C_API_DEF
-
-        ALLOW_C_API
-
-        switch(method) {
-        case UFUNC_ERR_WARN:
-            PyOS_snprintf(msg, sizeof(msg),
-                    "%s encountered in %s", errtype, name);
-            if (PyErr_Warn(PyExc_RuntimeWarning, msg) < 0) goto fail;
-            break;
-        case UFUNC_ERR_RAISE:
-            PyErr_Format(PyExc_FloatingPointError,
-                         "%s encountered in %s",
-                         errtype, name);
-            goto fail;
-        case UFUNC_ERR_CALL:
-            pyfunc = PyTuple_GET_ITEM(errobj, 1);
-
-            if (pyfunc == Py_None) {
-                PyErr_Format(PyExc_NameError,
-                             "python callback specified for %s (in " \
-                             " %s) but no function found.",
-                             errtype, name);
-                goto fail;
-            }
-            args = Py_BuildValue("NN", PyString_FromString(errtype),
-                                 PyInt_FromLong((long) retstatus));
-            if (args == NULL) goto fail;
-            ret = PyObject_CallObject(pyfunc, args);
-            Py_DECREF(args);
-            if (ret == NULL) goto fail;
-            Py_DECREF(ret);
-
-            break;
-        case UFUNC_ERR_PRINT:
-            if (*first) {
-                fprintf(stderr, "Warning: %s encountered in %s\n", errtype, name);
-                *first = 0;
-            }
-            break;
-        case UFUNC_ERR_LOG:
-            if (first) {
-                *first = 0;
-                pyfunc = PyTuple_GET_ITEM(errobj, 1);
-                if (pyfunc == Py_None) {
-                    PyErr_Format(PyExc_NameError,
-                                 "log specified for %s (in %s) but no " \
-                                 "object with write method found.",
-                                 errtype, name);
-                    goto fail;
-                }
-                PyOS_snprintf(msg, sizeof(msg),
-                        "Warning: %s encountered in %s\n", errtype, name);
-                ret = PyObject_CallMethod(pyfunc, "write", "s", msg);
-                if (ret == NULL) goto fail;
-                Py_DECREF(ret);
-            }
-            break;
-        }
-    DISABLE_C_API
-        return 0;
-
- fail:
-    DISABLE_C_API
-        return -1;
-}
-
-
-/*UFUNC_API*/
-static int
-PyUFunc_getfperr(void)
-{
-    int retstatus;
-    UFUNC_CHECK_STATUS(retstatus);
-    return retstatus;
-}
-
-#define HANDLEIT(NAME, str) {if (retstatus & UFUNC_FPE_##NAME) {        \
-            handle = errmask & UFUNC_MASK_##NAME;                       \
-            if (handle &&                                               \
-                _error_handler(handle >> UFUNC_SHIFT_##NAME,            \
-                               errobj, str, retstatus, first) < 0)      \
-                return -1;                                              \
-        }}
-
-/*UFUNC_API*/
-static int
-PyUFunc_handlefperr(int errmask, PyObject *errobj, int retstatus, int *first)
-{
-    int handle;
-    if (errmask && retstatus) {
-        HANDLEIT(DIVIDEBYZERO, "divide by zero");
-        HANDLEIT(OVERFLOW, "overflow");
-        HANDLEIT(UNDERFLOW, "underflow");
-        HANDLEIT(INVALID, "invalid value");
-    }
-    return 0;
-}
-
-#undef HANDLEIT
-
-
-/*UFUNC_API*/
-static int
-PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first)
-{
-    int retstatus;
-
-    /* 1. check hardware flag --- this is platform dependent code */
-    retstatus = PyUFunc_getfperr();
-    return PyUFunc_handlefperr(errmask, errobj, retstatus, first);
-}
-
-
-/* Checking the status flag clears it */
-/*UFUNC_API*/
-static void
-PyUFunc_clearfperr()
-{
-    PyUFunc_getfperr();
-}
-
-
-#define NO_UFUNCLOOP        0
-#define ZERO_EL_REDUCELOOP  0
-#define ONE_UFUNCLOOP       1
-#define ONE_EL_REDUCELOOP   1
-#define NOBUFFER_UFUNCLOOP  2
-#define NOBUFFER_REDUCELOOP 2
-#define BUFFER_UFUNCLOOP  3
-#define BUFFER_REDUCELOOP 3
-#define SIGNATURE_NOBUFFER_UFUNCLOOP 4
-
-
-static char
-_lowest_type(char intype)
-{
-    switch(intype) {
-    /* case PyArray_BYTE */
-    case PyArray_SHORT:
-    case PyArray_INT:
-    case PyArray_LONG:
-    case PyArray_LONGLONG:
-        return PyArray_BYTE;
-    /* case PyArray_UBYTE */
-    case PyArray_USHORT:
-    case PyArray_UINT:
-    case PyArray_ULONG:
-    case PyArray_ULONGLONG:
-        return PyArray_UBYTE;
-    /* case PyArray_FLOAT:*/
-    case PyArray_DOUBLE:
-    case PyArray_LONGDOUBLE:
-        return PyArray_FLOAT;
-    /* case PyArray_CFLOAT:*/
-    case PyArray_CDOUBLE:
-    case PyArray_CLONGDOUBLE:
-        return PyArray_CFLOAT;
-    default:
-        return intype;
-    }
-}
-
-static char *_types_msg =  "function not supported for these types, "   \
-    "and can't coerce safely to supported types";
-
-/* Called for non-NULL user-defined functions.
-   The object should be a CObject pointing to a linked-list of functions
-   storing the function, data, and signature of all user-defined functions.
-   There must be a match with the input argument types or an error
-   will occur.
-*/
-static int
-_find_matching_userloop(PyObject *obj, int *arg_types,
-                        PyArray_SCALARKIND *scalars,
-                        PyUFuncGenericFunction *function, void **data,
-                        int nargs, int nin)
-{
-    PyUFunc_Loop1d *funcdata;
-    int i;
-    funcdata = (PyUFunc_Loop1d *)PyCObject_AsVoidPtr(obj);
-    while (funcdata != NULL) {
-        for(i=0; i<nin; i++) {
-            if (!PyArray_CanCoerceScalar(arg_types[i],
-                                         funcdata->arg_types[i],
-                                         scalars[i]))
-                break;
-        }
-        if (i==nin) { /* match found */
-            *function = funcdata->func;
-            *data = funcdata->data;
-            /* Make sure actual arg_types supported
-               by the loop are used */
-            for(i=0; i<nargs; i++) {
-                arg_types[i] = funcdata->arg_types[i];
-            }
-            return 0;
-        }
-        funcdata = funcdata->next;
-    }
-    return -1;
-}
-
-/* if only one type is specified then it is the "first" output data-type
-   and the first signature matching this output data-type is returned.
-
-   if a tuple of types is specified then an exact match to the signature
-   is searched and it much match exactly or an error occurs
-*/
-static int
-extract_specified_loop(PyUFuncObject *self, int *arg_types,
-                       PyUFuncGenericFunction *function, void **data,
-                       PyObject *type_tup, int userdef)
-{
-    Py_ssize_t n=1;
-    int *rtypenums;
-    static char msg[] = "loop written to specified type(s) not found";
-    PyArray_Descr *dtype;
-    int nargs;
-    int i, j;
-    int strtype=0;
-
-    nargs = self->nargs;
-
-    if (PyTuple_Check(type_tup)) {
-        n = PyTuple_GET_SIZE(type_tup);
-        if (n != 1 && n != nargs) {
-            PyErr_Format(PyExc_ValueError,
-                         "a type-tuple must be specified " \
-                         "of length 1 or %d for %s", nargs,
-                         self->name ? self->name : "(unknown)");
-            return -1;
-        }
-    }
-    else if PyString_Check(type_tup) {
-            Py_ssize_t slen;
-            char *thestr;
-            slen = PyString_GET_SIZE(type_tup);
-            thestr = PyString_AS_STRING(type_tup);
-            for(i=0; i < slen-2; i++) {
-                if (thestr[i] == '-' && thestr[i+1] == '>')
-                    break;
-            }
-            if (i < slen-2) {
-                strtype = 1;
-                n = slen-2;
-                if (i != self->nin ||
-                    slen-2-i != self->nout) {
-                    PyErr_Format(PyExc_ValueError,
-                                 "a type-string for %s, "   \
-                                 "requires %d typecode(s) before " \
-                                 "and %d after the -> sign",
-                                 self->name ? self->name : "(unknown)",
-                                 self->nin, self->nout);
-                    return -1;
-                }
-            }
-        }
-    rtypenums = (int *)_pya_malloc(n*sizeof(int));
-    if (rtypenums==NULL) {
-        PyErr_NoMemory();
-        return -1;
-    }
-
-    if (strtype) {
-        char *ptr;
-        ptr = PyString_AS_STRING(type_tup);
-        i = 0;
-        while (i < n) {
-            if (*ptr == '-' || *ptr == '>') {
-                ptr++;
-                continue;
-            }
-            dtype = PyArray_DescrFromType((int) *ptr);
-            if (dtype == NULL) goto fail;
-            rtypenums[i] = dtype->type_num;
-            Py_DECREF(dtype);
-            ptr++; i++;
-        }
-    }
-    else if (PyTuple_Check(type_tup)) {
-        for(i=0; i<n; i++) {
-            if (PyArray_DescrConverter(PyTuple_GET_ITEM     \
-                                       (type_tup, i),
-                                       &dtype) == NPY_FAIL)
-                goto fail;
-            rtypenums[i] = dtype->type_num;
-            Py_DECREF(dtype);
-        }
-    }
-    else {
-        if (PyArray_DescrConverter(type_tup, &dtype) == NPY_FAIL) {
-            goto fail;
-        }
-        rtypenums[0] = dtype->type_num;
-        Py_DECREF(dtype);
-    }
-
-    if (userdef > 0) { /* search in the user-defined functions */
-        PyObject *key, *obj;
-        PyUFunc_Loop1d *funcdata;
-        obj = NULL;
-        key = PyInt_FromLong((long) userdef);
-        if (key == NULL) goto fail;
-        obj = PyDict_GetItem(self->userloops, key);
-        Py_DECREF(key);
-        if (obj == NULL) {
-            PyErr_SetString(PyExc_TypeError,
-                            "user-defined type used in ufunc" \
-                            " with no registered loops");
-            goto fail;
-        }
-        /* extract the correct function
-           data and argtypes
-        */
-        funcdata = (PyUFunc_Loop1d *)PyCObject_AsVoidPtr(obj);
-        while (funcdata != NULL) {
-            if (n != 1) {
-                for(i=0; i<nargs; i++) {
-                    if (rtypenums[i] != funcdata->arg_types[i])
-                        break;
-                }
-            }
-            else if (rtypenums[0] == funcdata->arg_types[self->nin]) {
-                i = nargs;
-            }
-            else i = -1;
-            if (i == nargs) {
-                *function = funcdata->func;
-                *data = funcdata->data;
-                for(i=0; i<nargs; i++) {
-                    arg_types[i] = funcdata->arg_types[i];
-                }
-                Py_DECREF(obj);
-                goto finish;
-            }
-            funcdata = funcdata->next;
-        }
-        PyErr_SetString(PyExc_TypeError, msg);
-        goto fail;
-    }
-
-    /* look for match in self->functions */
-
-    for(j=0; j<self->ntypes; j++) {
-        if (n != 1) {
-            for(i=0; i<nargs; i++) {
-                if (rtypenums[i] != self->types[j*nargs + i])
-                    break;
-            }
-        }
-        else if (rtypenums[0] == self->types[j*nargs+self->nin]) {
-            i = nargs;
-        }
-        else i = -1;
-        if (i == nargs) {
-            *function = self->functions[j];
-            *data = self->data[j];
-            for(i=0; i<nargs; i++) {
-                arg_types[i] = self->types[j*nargs+i];
-            }
-            goto finish;
-        }
-    }
-    PyErr_SetString(PyExc_TypeError, msg);
-
-
- fail:
-    _pya_free(rtypenums);
-    return -1;
-
- finish:
-    _pya_free(rtypenums);
-    return 0;
-
-}
-
-
-/*
- * Called to determine coercion
- * Can change arg_types.
- */
-
-static int
-select_types(PyUFuncObject *self, int *arg_types,
-             PyUFuncGenericFunction *function, void **data,
-             PyArray_SCALARKIND *scalars,
-             PyObject *typetup)
-{
-    int i, j;
-    char start_type;
-    int userdef = -1;
-    int userdef_ind = -1;
-
-    if (self->userloops) {
-        for(i = 0; i < self->nin; i++) {
-            if (PyTypeNum_ISUSERDEF(arg_types[i])) {
-                userdef = arg_types[i];
-		userdef_ind = i;
-                break;
-            }
-        }
-    }
-
-    if (typetup != NULL)
-        return extract_specified_loop(self, arg_types, function, data,
-                                      typetup, userdef);
-
-    if (userdef > 0) {
-        PyObject *key, *obj;
-        int ret = -1;
-        obj = NULL;
-
-	/*
-         * Look through all the registered loops for all the user-defined
-	 * types to find a match.
-	 */
-	while (ret == -1) {
-	    if (userdef_ind >= self->nin) {
-                break;
-            }
-	    userdef = arg_types[userdef_ind++];
-	    if (!(PyTypeNum_ISUSERDEF(userdef))) {
-                continue;
-            }
-	    key = PyInt_FromLong((long) userdef);
-	    if (key == NULL) {
-                return -1;
-            }
-	    obj = PyDict_GetItem(self->userloops, key);
-	    Py_DECREF(key);
-	    if (obj == NULL) {
-                continue;
-            }
-	    /*
-             * extract the correct function
-	     * data and argtypes for this user-defined type.
-	     */
-	    ret = _find_matching_userloop(obj, arg_types, scalars,
-					  function, data, self->nargs,
-					  self->nin);
-	}
-	if (ret == 0) {
-            return ret;
-        }
-	PyErr_SetString(PyExc_TypeError, _types_msg);
-	return ret;
-    }
-
-    start_type = arg_types[0];
-    /*
-     * If the first argument is a scalar we need to place
-     * the start type as the lowest type in the class
-     */
-    if (scalars[0] != PyArray_NOSCALAR) {
-        start_type = _lowest_type(start_type);
-    }
-
-    i = 0;
-    while (i < self->ntypes && start_type > self->types[i*self->nargs]) {
-        i++;
-    }
-    for (; i < self->ntypes; i++) {
-        for (j = 0; j < self->nin; j++) {
-            if (!PyArray_CanCoerceScalar(arg_types[j],
-                                         self->types[i*self->nargs + j],
-                                         scalars[j]))
-                break;
-        }
-        if (j == self->nin) {
-            break;
-        }
-    }
-    if (i >= self->ntypes) {
-        PyErr_SetString(PyExc_TypeError, _types_msg);
-        return -1;
-    }
-    for (j = 0; j < self->nargs; j++) {
-        arg_types[j] = self->types[i*self->nargs+j];
-    }
-    if (self->data) {
-        *data = self->data[i];
-    }
-    else {
-        *data = NULL;
-    }
-    *function = self->functions[i];
-
-    return 0;
-}
-
-#if USE_USE_DEFAULTS==1
-static int PyUFunc_NUM_NODEFAULTS=0;
-#endif
-static PyObject *PyUFunc_PYVALS_NAME=NULL;
-
-
-static int
-_extract_pyvals(PyObject *ref, char *name, int *bufsize,
-                int *errmask, PyObject **errobj)
-{
-    PyObject *retval;
-
-    *errobj = NULL;
-    if (!PyList_Check(ref) || (PyList_GET_SIZE(ref)!=3)) {
-        PyErr_Format(PyExc_TypeError, "%s must be a length 3 list.",
-                     UFUNC_PYVALS_NAME);
-        return -1;
-    }
-
-    *bufsize = PyInt_AsLong(PyList_GET_ITEM(ref, 0));
-    if ((*bufsize == -1) && PyErr_Occurred()) {
-        return -1;
-    }
-    if ((*bufsize < PyArray_MIN_BUFSIZE) ||
-        (*bufsize > PyArray_MAX_BUFSIZE) ||
-        (*bufsize % 16 != 0)) {
-        PyErr_Format(PyExc_ValueError,
-                     "buffer size (%d) is not in range "
-                     "(%"INTP_FMT" - %"INTP_FMT") or not a multiple of 16",
-                     *bufsize, (intp) PyArray_MIN_BUFSIZE,
-                     (intp) PyArray_MAX_BUFSIZE);
-        return -1;
-    }
-
-    *errmask = PyInt_AsLong(PyList_GET_ITEM(ref, 1));
-    if (*errmask < 0) {
-        if (PyErr_Occurred()) {
-            return -1;
-        }
-        PyErr_Format(PyExc_ValueError,
-                     "invalid error mask (%d)",
-                     *errmask);
-        return -1;
-    }
-
-    retval = PyList_GET_ITEM(ref, 2);
-    if (retval != Py_None && !PyCallable_Check(retval)) {
-        PyObject *temp;
-        temp = PyObject_GetAttrString(retval, "write");
-        if (temp == NULL || !PyCallable_Check(temp)) {
-            PyErr_SetString(PyExc_TypeError,
-                            "python object must be callable or have " \
-                            "a callable write method");
-            Py_XDECREF(temp);
-            return -1;
-        }
-        Py_DECREF(temp);
-    }
-
-    *errobj = Py_BuildValue("NO",
-                            PyString_FromString(name),
-                            retval);
-    if (*errobj == NULL) {
-        return -1;
-    }
-
-    return 0;
-}
-
-
-
-/*UFUNC_API*/
-static int
-PyUFunc_GetPyValues(char *name, int *bufsize, int *errmask, PyObject **errobj)
-{
-    PyObject *thedict;
-    PyObject *ref = NULL;
-
-#if USE_USE_DEFAULTS==1
-    if (PyUFunc_NUM_NODEFAULTS != 0) {
-#endif
-        if (PyUFunc_PYVALS_NAME == NULL) {
-            PyUFunc_PYVALS_NAME = \
-                PyString_InternFromString(UFUNC_PYVALS_NAME);
-        }
-        thedict = PyThreadState_GetDict();
-        if (thedict == NULL) {
-            thedict = PyEval_GetBuiltins();
-        }
-        ref = PyDict_GetItem(thedict, PyUFunc_PYVALS_NAME);
-#if USE_USE_DEFAULTS==1
-    }
-#endif
-    if (ref == NULL) {
-        *errmask = UFUNC_ERR_DEFAULT;
-        *errobj = Py_BuildValue("NO",
-                                PyString_FromString(name),
-                                Py_None);
-        *bufsize = PyArray_BUFSIZE;
-        return 0;
-    }
-    return _extract_pyvals(ref, name, bufsize, errmask, errobj);
-}
-
-/* Create copies for any arrays that are less than loop->bufsize
-   in total size (or core_enabled) and are mis-behaved or in need
-   of casting.
-*/
-
-static int
-_create_copies(PyUFuncLoopObject *loop, int *arg_types, PyArrayObject **mps)
-{
-    int nin = loop->ufunc->nin;
-    int i;
-    intp size;
-    PyObject *new;
-    PyArray_Descr *ntype;
-    PyArray_Descr *atype;
-
-    for(i=0; i<nin; i++) {
-        size = PyArray_SIZE(mps[i]);
-        /* if the type of mps[i] is equivalent to arg_types[i] */
-        /* then set arg_types[i] equal to type of
-           mps[i] for later checking....
-        */
-        if (PyArray_TYPE(mps[i]) != arg_types[i]) {
-            ntype = mps[i]->descr;
-            atype = PyArray_DescrFromType(arg_types[i]);
-            if (PyArray_EquivTypes(atype, ntype)) {
-                arg_types[i] = ntype->type_num;
-            }
-            Py_DECREF(atype);
-        }
-        if (size < loop->bufsize || loop->ufunc->core_enabled) {
-            if (!(PyArray_ISBEHAVED_RO(mps[i])) ||          \
-                PyArray_TYPE(mps[i]) != arg_types[i]) {
-                ntype = PyArray_DescrFromType(arg_types[i]);
-                new = PyArray_FromAny((PyObject *)mps[i],
-                                      ntype, 0, 0,
-                                      FORCECAST | ALIGNED, NULL);
-                if (new == NULL) return -1;
-                Py_DECREF(mps[i]);
-                mps[i] = (PyArrayObject *)new;
-            }
-        }
-    }
-
-    return 0;
-}
-
-#define _GETATTR_(str, rstr) do {if (strcmp(name, #str) == 0)     \
-        return PyObject_HasAttrString(op, "__" #rstr "__");} while (0);
-
-static int
-_has_reflected_op(PyObject *op, char *name)
-{
-    _GETATTR_(add, radd);
-    _GETATTR_(subtract, rsub);
-    _GETATTR_(multiply, rmul);
-    _GETATTR_(divide, rdiv);
-    _GETATTR_(true_divide, rtruediv);
-    _GETATTR_(floor_divide, rfloordiv);
-    _GETATTR_(remainder, rmod);
-    _GETATTR_(power, rpow);
-    _GETATTR_(left_shift, rlshift);
-    _GETATTR_(right_shift, rrshift);
-    _GETATTR_(bitwise_and, rand);
-    _GETATTR_(bitwise_xor, rxor);
-    _GETATTR_(bitwise_or, ror);
-    return 0;
-}
-
-#undef _GETATTR_
-
-
-/* Return the position of next non-white-space char in the string
-*/
-static int
-_next_non_white_space(const char* str, int offset)
-{
-    int ret = offset;
-    while (str[ret] == ' ' || str[ret] == '\t') ret++;
-    return ret;
-}
-
-static int
-_is_alpha_underscore(char ch)
-{
-    return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_';
-}
-
-static int
-_is_alnum_underscore(char ch)
-{
-    return _is_alpha_underscore(ch) || (ch >= '0' && ch <= '9');
-}
-
-/* Return the ending position of a variable name
-*/
-static int
-_get_end_of_name(const char* str, int offset)
-{
-    int ret = offset;
-    while (_is_alnum_underscore(str[ret])) ret++;
-    return ret;
-}
-
-/* Returns 1 if the dimension names pointed by s1 and s2 are the same,
-   otherwise returns 0.
-*/
-static int
-_is_same_name(const char* s1, const char* s2)
-{
-    while (_is_alnum_underscore(*s1) && _is_alnum_underscore(*s2)) {
-        if (*s1 != *s2) return 0;
-        s1++;
-        s2++;
-    }
-    return !_is_alnum_underscore(*s1) && !_is_alnum_underscore(*s2);
-}
-
-/* Sets core_num_dim_ix, core_num_dims, core_dim_ixs, core_offsets,
-   and core_signature in PyUFuncObject "self".  Returns 0 unless an
-   error occured.
-*/
-static int
-_parse_signature(PyUFuncObject *self, const char *signature)
-{
-    size_t len;
-    char const **var_names;
-    int nd = 0;             /* number of dimension of the current argument */
-    int cur_arg = 0;        /* index into core_num_dims&core_offsets */
-    int cur_core_dim = 0;   /* index into core_dim_ixs */
-    int i = 0;
-    char *parse_error = NULL;
-
-    if (signature == NULL) {
-	PyErr_SetString(PyExc_RuntimeError,
-			"_parse_signature with NULL signature");
-        return -1;
-    }
-
-    len = strlen(signature);
-    self->core_signature = _pya_malloc(sizeof(char) * (len+1));
-    if (self->core_signature)
-        strcpy(self->core_signature, signature);
-
-    /* Allocate sufficient memory to store pointers to all dimension names */
-    var_names = _pya_malloc(sizeof(char const*) * len);
-    if (var_names == NULL) {
-        PyErr_NoMemory();
-        return -1;
-    }
-    
-    self->core_enabled = 1;
-    self->core_num_dim_ix = 0;
-    self->core_num_dims = _pya_malloc(sizeof(int) * self->nargs);
-    self->core_dim_ixs = _pya_malloc(sizeof(int) * len); /* shrink this later */
-    self->core_offsets = _pya_malloc(sizeof(int) * self->nargs);
-    if (self->core_num_dims == NULL || self->core_dim_ixs == NULL ||
-        self->core_offsets == NULL) {
-        PyErr_NoMemory();
-        goto fail;
-    }
-
-    i = _next_non_white_space(signature, 0);
-
-    while (signature[i] != '\0') { /* loop over input/output arguments */
-        if (cur_arg == self->nin) {
-            /* expect "->" */
-            if (signature[i] != '-' || signature[i+1] != '>') {
-                parse_error = "expect '->'";
-                goto fail;
-            }
-            i = _next_non_white_space(signature, i+2);
-        }
-
-        /* parse core dimensions of one argument, e.g. "()", "(i)", or
-	   "(i,j)" */
-        if (signature[i] != '(') {
-            parse_error = "expect '('";
-            goto fail;
-        }
-        i = _next_non_white_space(signature, i+1);
-        while (signature[i] != ')') { /* loop over core dimensions */
-	    int j = 0;
-            if (!_is_alpha_underscore(signature[i])) {
-		parse_error = "expect dimension name";
-		goto fail;
-            }
-	    while (j < self->core_num_dim_ix) {
-		if (_is_same_name(signature+i, var_names[j])) break;
-		j++;
-	    }
-	    if (j >= self->core_num_dim_ix) {
-		var_names[j] = signature+i;
-		self->core_num_dim_ix++;
-	    }
-	    self->core_dim_ixs[cur_core_dim] = j;
-	    cur_core_dim++;
-	    nd++;
-	    i = _get_end_of_name(signature, i);
-	    i = _next_non_white_space(signature, i);
-	    if (signature[i] != ',' && signature[i] != ')') {
-		parse_error = "expect ',' or ')'";
-		goto fail;
-	    }
-	    if (signature[i] == ',')
-	    {
-		i = _next_non_white_space(signature, i+1);
-		if (signature[i] == ')') {
-		    parse_error = "',' must not be followed by ')'";
-		    goto fail;
-		}
-	    }
-        }
-	self->core_num_dims[cur_arg] = nd;
-	self->core_offsets[cur_arg] = cur_core_dim-nd;
-	cur_arg++;
-	nd = 0;
-	i = _next_non_white_space(signature, i+1);
-
-        if (cur_arg != self->nin && cur_arg != self->nargs) {
-	    /* The list of input arguments (or output arguments) was
-	       only read partially */
-            if (signature[i] != ',') {
-                parse_error = "expect ','";
-                goto fail;
-            }
-            i = _next_non_white_space(signature, i+1);
-        }
-    }
-    if (cur_arg != self->nargs) {
-	parse_error = "incomplete signature: not all arguments found";
-        goto fail;
-    }
-    self->core_dim_ixs = _pya_realloc(self->core_dim_ixs,
-                                      sizeof(int) * cur_core_dim);
-    /* check for trivial core-signature, e.g. "(),()->()" */
-    if (cur_core_dim == 0)
-        self->core_enabled = 0;
-    _pya_free((void*)var_names);
-    return 0;
-fail:
-    _pya_free((void*)var_names);
-    if (parse_error) {
-	char *buf = _pya_malloc(sizeof(char) * (len + 200));
-	if (buf) {
-	    sprintf(buf, "%s at position %d in \"%s\"",
-		    parse_error, i, signature);
-	    PyErr_SetString(PyExc_ValueError, signature);
-	    _pya_free(buf);
-	}
-	else {
-	    PyErr_NoMemory();
-	}
-    }
-    return -1;
-}
-
-/* Concatenate the loop and core dimensions of
-   PyArrayMultiIterObject's iarg-th argument, to recover a full
-   dimension array (used for output arguments).
-*/
-static npy_intp*
-_compute_output_dims(PyUFuncLoopObject *loop, int iarg,
-		     int *out_nd, npy_intp *tmp_dims)
-{
-    int i;
-    PyUFuncObject *ufunc = loop->ufunc;
-    if (ufunc->core_enabled == 0) {
-        /* case of ufunc with trivial core-signature */
-        *out_nd = loop->nd;
-        return loop->dimensions;
-    }
-
-    *out_nd = loop->nd + ufunc->core_num_dims[iarg];
-    if (*out_nd > NPY_MAXARGS) {
-        PyErr_SetString(PyExc_ValueError,
-                        "dimension of output variable exceeds limit");
-        return NULL;
-    }
-
-    /* copy loop dimensions */
-    memcpy(tmp_dims, loop->dimensions, sizeof(npy_intp) * loop->nd);
-
-    /* copy core dimension */
-    for (i = 0; i < ufunc->core_num_dims[iarg]; i++)
-        tmp_dims[loop->nd + i] = loop->core_dim_sizes[1 +
-            ufunc->core_dim_ixs[ufunc->core_offsets[iarg]+i]];
-    return tmp_dims;
-}
-
-/* Check and set core_dim_sizes and core_strides for the i-th argument.
-*/
-static int
-_compute_dimension_size(PyUFuncLoopObject *loop, PyArrayObject **mps, int i)
-{
-    PyUFuncObject *ufunc = loop->ufunc;
-    int j = ufunc->core_offsets[i];
-    int k = PyArray_NDIM(mps[i]) - ufunc->core_num_dims[i];
-    int ind;
-    for (ind = 0; ind < ufunc->core_num_dims[i]; ind++, j++, k++) {
-	npy_intp dim = k<0 ? 1 : PyArray_DIM(mps[i], k);
-	/* First element of core_dim_sizes will be used for looping */
-	int dim_ix = ufunc->core_dim_ixs[j] + 1;
-	if (loop->core_dim_sizes[dim_ix] == 1) {
-	    /* broadcast core dimension  */
-	    loop->core_dim_sizes[dim_ix] = dim;
-	}
-	else if (dim != 1 && dim != loop->core_dim_sizes[dim_ix]) {
-	    PyErr_SetString(PyExc_ValueError,
-			    "core dimensions mismatch");
-	    return -1;
-	}
-        /* First ufunc->nargs elements will be used for looping */
-	loop->core_strides[ufunc->nargs + j] =
-            dim == 1 ? 0 : PyArray_STRIDE(mps[i], k);
-    }
-    return 0;
-}
-
-/* Return a view of array "ap" with "core_nd" dimensions cut from tail. */
-static PyArrayObject *
-_trunc_coredim(PyArrayObject *ap, int core_nd)
-{
-    PyArrayObject *ret;
-    int nd = ap->nd - core_nd;
-    if (nd < 0) nd = 0;
-
-    /* The following code is basically taken from PyArray_Transpose */
-    Py_INCREF(ap->descr);  /* NewFromDescr will steal this reference */
-    ret = (PyArrayObject *)
-        PyArray_NewFromDescr(ap->ob_type, ap->descr,
-                             nd, ap->dimensions,
-                             ap->strides, ap->data, ap->flags,
-                             (PyObject *)ap);
-    if (ret == NULL) return NULL;
-
-    /* point at true owner of memory: */
-    ret->base = (PyObject *)ap;
-    Py_INCREF(ap);
-
-    PyArray_UpdateFlags(ret, CONTIGUOUS | FORTRAN);
-    
-    return ret;
-}
-
-static Py_ssize_t
-construct_arrays(PyUFuncLoopObject *loop, PyObject *args, PyArrayObject **mps,
-                 PyObject *typetup)
-{
-    Py_ssize_t nargs;
-    int i;
-    int arg_types[NPY_MAXARGS];
-    PyArray_SCALARKIND scalars[NPY_MAXARGS];
-    PyArray_SCALARKIND maxarrkind, maxsckind, new;
-    PyUFuncObject *self = loop->ufunc;
-    Bool allscalars = TRUE;
-    PyTypeObject *subtype = &PyArray_Type;
-    PyObject *context = NULL;
-    PyObject *obj;
-    int flexible = 0;
-    int object = 0;
-
-    npy_intp temp_dims[NPY_MAXDIMS];
-    npy_intp *out_dims;
-    int out_nd;
-
-    /* Check number of arguments */
-    nargs = PyTuple_Size(args);
-    if ((nargs < self->nin) || (nargs > self->nargs)) {
-        PyErr_SetString(PyExc_ValueError,
-                        "invalid number of arguments");
-        return -1;
-    }
-
-    /* Get each input argument */
-    maxarrkind = PyArray_NOSCALAR;
-    maxsckind = PyArray_NOSCALAR;
-    for(i = 0; i < self->nin; i++) {
-        obj = PyTuple_GET_ITEM(args,i);
-        if (!PyArray_Check(obj) && !PyArray_IsScalar(obj, Generic)) {
-            context = Py_BuildValue("OOi", self, args, i);
-        }
-        else {
-            context = NULL;
-        }
-        mps[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, context);
-        Py_XDECREF(context);
-        if (mps[i] == NULL) {
-            return -1;
-        }
-        arg_types[i] = PyArray_TYPE(mps[i]);
-        if (!flexible && PyTypeNum_ISFLEXIBLE(arg_types[i])) {
-            flexible = 1;
-        }
-        if (!object && PyTypeNum_ISOBJECT(arg_types[i])) {
-            object = 1;
-        }
-        /* debug
-         * fprintf(stderr, "array %d has reference %d\n", i,
-         * (mps[i])->ob_refcnt);
-         */
-
-        /*
-         * Scalars are 0-dimensional arrays at this point
-         */
-
-        /*
-         * We need to keep track of whether or not scalars
-         * are mixed with arrays of different kinds.
-         */
-
-        if (mps[i]->nd > 0) {
-            scalars[i] = PyArray_NOSCALAR;
-            allscalars = FALSE;
-            new = PyArray_ScalarKind(arg_types[i], NULL);
-            maxarrkind = NPY_MAX(new, maxarrkind);
-        }
-        else {
-            scalars[i] = PyArray_ScalarKind(arg_types[i], &(mps[i]));
-            maxsckind = NPY_MAX(scalars[i], maxsckind);
-        }
-    }
-
-    /* We don't do strings */
-    if (flexible && !object) {
-        loop->notimplemented = 1;
-        return nargs;
-    }
-
-    /*
-     * If everything is a scalar, or scalars mixed with arrays of
-     * different kinds of lesser kinds then use normal coercion rules
-     */
-    if (allscalars || (maxsckind > maxarrkind)) {
-        for(i = 0; i < self->nin; i++) {
-            scalars[i] = PyArray_NOSCALAR;
-        }
-    }
-
-    /* Select an appropriate function for these argument types. */
-    if (select_types(loop->ufunc, arg_types, &(loop->function),
-                     &(loop->funcdata), scalars, typetup) == -1)
-        return -1;
-
-    /*
-     * FAIL with NotImplemented if the other object has
-     * the __r<op>__ method and has __array_priority__ as
-     * an attribute (signalling it can handle ndarray's)
-     * and is not already an ndarray or a subtype of the same type.
-     */
-    if ((arg_types[1] == PyArray_OBJECT) &&                         \
-        (loop->ufunc->nin==2) && (loop->ufunc->nout == 1)) {
-        PyObject *_obj = PyTuple_GET_ITEM(args, 1);
-        if (!PyArray_CheckExact(_obj) &&
-	    /* If both are same subtype of object arrays, then proceed */
-	    !(_obj->ob_type == (PyTuple_GET_ITEM(args, 0))->ob_type) &&   \
-
-            PyObject_HasAttrString(_obj, "__array_priority__") && \
-            _has_reflected_op(_obj, loop->ufunc->name)) {
-            loop->notimplemented = 1;
-            return nargs;
-        }
-    }
-
-    /*
-     * Create copies for some of the arrays if they are small
-     * enough and not already contiguous
-     */
-    if (_create_copies(loop, arg_types, mps) < 0) {
-        return -1;
-    }
-
-    /* Only use loop dimensions when constructing Iterator:
-     * temporarily replace mps[i] (will be recovered below).
-     */
-    if (self->core_enabled) {
-        for (i = 0; i < self->nin; i++) {
-            PyArrayObject *ao;
-
-	    if (_compute_dimension_size(loop, mps, i) < 0)
-		return -1;
-
-            ao = _trunc_coredim(mps[i], self->core_num_dims[i]);
-            if (ao == NULL)
-                return -1;
-            mps[i] = ao;
-        }
-    }
-
-    /* Create Iterators for the Inputs */
-    for(i = 0; i < self->nin; i++) {
-        loop->iters[i] = (PyArrayIterObject *)          \
-            PyArray_IterNew((PyObject *)mps[i]);
-        if (loop->iters[i] == NULL) {
-            return -1;
-        }
-    }
-
-
-    /* Recover mps[i]. */
-    if (self->core_enabled) {
-        for (i = 0; i < self->nin; i++) {
-            PyArrayObject *ao = mps[i];
-            mps[i] = (PyArrayObject *)mps[i]->base;
-            Py_DECREF(ao);
-        }
-    }
-
-    /* Broadcast the result */
-    loop->numiter = self->nin;
-    if (PyArray_Broadcast((PyArrayMultiIterObject *)loop) < 0) {
-        return -1;
-    }
-
-    /* Get any return arguments */
-    for(i = self->nin; i < nargs; i++) {
-        mps[i] = (PyArrayObject *)PyTuple_GET_ITEM(args, i);
-        if (((PyObject *)mps[i])==Py_None) {
-            mps[i] = NULL;
-            continue;
-        }
-        Py_INCREF(mps[i]);
-        if (!PyArray_Check((PyObject *)mps[i])) {
-            PyObject *new;
-            if (PyArrayIter_Check(mps[i])) {
-                new = PyObject_CallMethod((PyObject *)mps[i],
-                                          "__array__", NULL);
-                Py_DECREF(mps[i]);
-                mps[i] = (PyArrayObject *)new;
-            }
-            else {
-                PyErr_SetString(PyExc_TypeError,
-                                "return arrays must be "\
-                                "of ArrayType");
-                Py_DECREF(mps[i]);
-                mps[i] = NULL;
-                return -1;
-            }
-        }
-        
-
-        if (self->core_enabled) {
-            if (_compute_dimension_size(loop, mps, i) < 0)
-                return -1;
-        }
-	out_dims = _compute_output_dims(loop, i, &out_nd, temp_dims);
-	if (!out_dims) return -1;
-
-        if (mps[i]->nd != out_nd ||
-            !PyArray_CompareLists(mps[i]->dimensions,
-                                  out_dims, out_nd)) {
-            PyErr_SetString(PyExc_ValueError,
-                            "invalid return array shape");
-            Py_DECREF(mps[i]);
-            mps[i] = NULL;
-            return -1;
-        }
-        if (!PyArray_ISWRITEABLE(mps[i])) {
-            PyErr_SetString(PyExc_ValueError,
-                            "return array is not writeable");
-            Py_DECREF(mps[i]);
-            mps[i] = NULL;
-            return -1;
-        }
-    }
-
-    /* construct any missing return arrays and make output iterators */
-    for(i = self->nin; i < self->nargs; i++) {
-        PyArray_Descr *ntype;
-
-        if (mps[i] == NULL) {
-	    out_dims = _compute_output_dims(loop, i, &out_nd, temp_dims);
-	    if (!out_dims) return -1;
-
-            mps[i] = (PyArrayObject *)PyArray_New(subtype,
-                                                  out_nd,
-                                                  out_dims,
-                                                  arg_types[i],
-                                                  NULL, NULL,
-                                                  0, 0, NULL);
-            if (mps[i] == NULL) {
-                return -1;
-            }
-        }
-
-        /*
-         * reset types for outputs that are equivalent
-         * -- no sense casting uselessly
-         */
-        else {
-            if (mps[i]->descr->type_num != arg_types[i]) {
-                PyArray_Descr *atype;
-                ntype = mps[i]->descr;
-                atype = PyArray_DescrFromType(arg_types[i]);
-                if (PyArray_EquivTypes(atype, ntype)) {
-                    arg_types[i] = ntype->type_num;
-                }
-                Py_DECREF(atype);
-            }
-
-            /* still not the same -- or will we have to use buffers?*/
-            if (mps[i]->descr->type_num != arg_types[i] ||
-                !PyArray_ISBEHAVED_RO(mps[i])) {
-                if (loop->size < loop->bufsize || self->core_enabled) {
-                    PyObject *new;
-                    /*
-                     * Copy the array to a temporary copy
-                     * and set the UPDATEIFCOPY flag
-                     */
-                    ntype = PyArray_DescrFromType(arg_types[i]);
-                    new = PyArray_FromAny((PyObject *)mps[i],
-                                          ntype, 0, 0,
-                                          FORCECAST | ALIGNED |
-                                          UPDATEIFCOPY, NULL);
-                    if (new == NULL) {
-                        return -1;
-                    }
-                    Py_DECREF(mps[i]);
-                    mps[i] = (PyArrayObject *)new;
-                }
-            }
-        }
-
-        if (self->core_enabled) {
-            PyArrayObject *ao;
-
-            /* computer for all output arguments, and set strides in "loop" */
-            if (_compute_dimension_size(loop, mps, i) < 0)
-                return -1;
-
-            ao = _trunc_coredim(mps[i], self->core_num_dims[i]);
-            if (ao == NULL)
-                return -1;
-            /* Temporarily modify mps[i] for constructing iterator. */
-            mps[i] = ao;
-        }
-
-        loop->iters[i] = (PyArrayIterObject *)          \
-            PyArray_IterNew((PyObject *)mps[i]);
-        if (loop->iters[i] == NULL) {
-            return -1;
-        }
-
-	/* Recover mps[i]. */
-	if (self->core_enabled) {
-	    PyArrayObject *ao = mps[i];
-	    mps[i] = (PyArrayObject *)mps[i]->base;
-	    Py_DECREF(ao);
-	}
-
-    }
-
-    /*
-     * If any of different type, or misaligned or swapped
-     * then must use buffers
-     */
-    loop->bufcnt = 0;
-    loop->obj = 0;
-
-    /* Determine looping method needed */
-    loop->meth = NO_UFUNCLOOP;
-
-    if (loop->size == 0) {
-        return nargs;
-    }
-
-    if (self->core_enabled) {
-        loop->meth = SIGNATURE_NOBUFFER_UFUNCLOOP;
-    }
-
-    for(i = 0; i < self->nargs; i++) {
-        loop->needbuffer[i] = 0;
-        if (arg_types[i] != mps[i]->descr->type_num ||
-            !PyArray_ISBEHAVED_RO(mps[i])) {
-            if (self->core_enabled) {
-                PyErr_SetString(PyExc_RuntimeError,
-                                "never reached; copy should have been made");
-                return -1;
-            }
-            loop->meth = BUFFER_UFUNCLOOP;
-            loop->needbuffer[i] = 1;
-        }
-        if (!loop->obj && ((mps[i]->descr->type_num == PyArray_OBJECT) ||
-			   (arg_types[i] == PyArray_OBJECT))) {
-            loop->obj = 1;
-        }
-    }
-
-
-    if (self->core_enabled && loop->obj) {
-	PyErr_SetString(PyExc_TypeError,
-			"Object type not allowed in ufunc with signature");
-	return -1;
-    }
-
-    if (loop->meth == NO_UFUNCLOOP) {
-        loop->meth = ONE_UFUNCLOOP;
-
-        /* All correct type and BEHAVED */
-        /* Check for non-uniform stridedness */
-        for(i = 0; i < self->nargs; i++) {
-            if (!(loop->iters[i]->contiguous)) {
-                /*
-                 * May still have uniform stride
-                 * if (broadcast result) <= 1-d
-                 */
-                if (mps[i]->nd != 0 &&                  \
-                    (loop->iters[i]->nd_m1 > 0)) {
-                    loop->meth = NOBUFFER_UFUNCLOOP;
-                    break;
-                }
-            }
-        }
-        if (loop->meth == ONE_UFUNCLOOP) {
-            for(i = 0; i < self->nargs; i++) {
-                loop->bufptr[i] = mps[i]->data;
-            }
-        }
-    }
-
-    loop->numiter = self->nargs;
-
-    /* Fill in steps  */
-    if (loop->meth == SIGNATURE_NOBUFFER_UFUNCLOOP && loop->nd == 0) {
-	/* Use default core_strides */
-    }
-    else if (loop->meth != ONE_UFUNCLOOP) {
-        int ldim;
-        intp minsum;
-        intp maxdim;
-        PyArrayIterObject *it;
-        intp stride_sum[NPY_MAXDIMS];
-        int j;
-
-        /* Fix iterators */
-
-        /*
-         * Optimize axis the iteration takes place over
-         *
-         * The first thought was to have the loop go
-         * over the largest dimension to minimize the number of loops
-         *
-         * However, on processors with slow memory bus and cache,
-         * the slowest loops occur when the memory access occurs for
-         * large strides.
-         *
-         * Thus, choose the axis for which strides of the last iterator is
-         * smallest but non-zero.
-         */
-
-        for(i = 0; i < loop->nd; i++) {
-            stride_sum[i] = 0;
-            for(j = 0; j < loop->numiter; j++) {
-                stride_sum[i] += loop->iters[j]->strides[i];
-            }
-        }
-
-        ldim = loop->nd - 1;
-        minsum = stride_sum[loop->nd-1];
-        for(i = loop->nd - 2; i >= 0; i--) {
-            if (stride_sum[i] < minsum ) {
-                ldim = i;
-                minsum = stride_sum[i];
-            }
-        }
-
-        maxdim = loop->dimensions[ldim];
-        loop->size /= maxdim;
-        loop->bufcnt = maxdim;
-        loop->lastdim = ldim;
-
-        /*
-         * Fix the iterators so the inner loop occurs over the
-         * largest dimensions -- This can be done by
-         * setting the size to 1 in that dimension
-         * (just in the iterators)
-         */
-        for(i = 0; i < loop->numiter; i++) {
-            it = loop->iters[i];
-            it->contiguous = 0;
-            it->size /= (it->dims_m1[ldim]+1);
-            it->dims_m1[ldim] = 0;
-            it->backstrides[ldim] = 0;
-
-            /*
-             * (won't fix factors because we
-             * don't use PyArray_ITER_GOTO1D
-             * so don't change them)
-             *
-             * Set the steps to the strides in that dimension
-             */
-            loop->steps[i] = it->strides[ldim];
-        }
-
-        /*
-         * Set looping part of core_dim_sizes and core_strides.
-         */
-        if (loop->meth == SIGNATURE_NOBUFFER_UFUNCLOOP) {
-            loop->core_dim_sizes[0] = maxdim;
-            for (i = 0; i < self->nargs; i++) {
-                loop->core_strides[i] = loop->steps[i];
-            }
-        }
-
-        /*
-         * fix up steps where we will be copying data to
-         * buffers and calculate the ninnerloops and leftover
-         * values -- if step size is already zero that is not changed...
-         */
-        if (loop->meth == BUFFER_UFUNCLOOP) {
-            loop->leftover = maxdim % loop->bufsize;
-            loop->ninnerloops = (maxdim / loop->bufsize) + 1;
-            for(i = 0; i < self->nargs; i++) {
-                if (loop->needbuffer[i] && loop->steps[i]) {
-                    loop->steps[i] = mps[i]->descr->elsize;
-                }
-                /* These are changed later if casting is needed */
-            }
-        }
-    }
-    else if (loop->meth == ONE_UFUNCLOOP) {
-        /* uniformly-strided case */
-        for(i = 0; i < self->nargs; i++) {
-            if (PyArray_SIZE(mps[i]) == 1)
-                loop->steps[i] = 0;
-            else
-                loop->steps[i] = mps[i]->strides[mps[i]->nd-1];
-        }
-    }
-
-
-    /* Finally, create memory for buffers if we need them */
-
-    /*
-     * Buffers for scalars are specially made small -- scalars are
-     * not copied multiple times
-     */
-    if (loop->meth == BUFFER_UFUNCLOOP) {
-        int cnt = 0, cntcast = 0; /* keeps track of bytes to allocate */
-        int scnt = 0, scntcast = 0;
-        char *castptr;
-        char *bufptr;
-        int last_was_scalar=0;
-        int last_cast_was_scalar=0;
-        int oldbufsize=0;
-        int oldsize=0;
-        int scbufsize = 4*sizeof(double);
-        int memsize;
-        PyArray_Descr *descr;
-
-        /* compute the element size */
-        for(i = 0; i < self->nargs; i++) {
-            if (!loop->needbuffer[i]) {
-                continue;
-            }
-            if (arg_types[i] != mps[i]->descr->type_num) {
-                descr = PyArray_DescrFromType(arg_types[i]);
-                if (loop->steps[i]) {
-                    cntcast += descr->elsize;
-                }
-                else {
-                    scntcast += descr->elsize;
-                }
-                if (i < self->nin) {
-                    loop->cast[i] = PyArray_GetCastFunc(mps[i]->descr,
-                                            arg_types[i]);
-                }
-                else {
-                    loop->cast[i] = PyArray_GetCastFunc \
-                        (descr, mps[i]->descr->type_num);
-                }
-                Py_DECREF(descr);
-                if (!loop->cast[i]) {
-                    return -1;
-                }
-            }
-            loop->swap[i] = !(PyArray_ISNOTSWAPPED(mps[i]));
-            if (loop->steps[i]) {
-                cnt += mps[i]->descr->elsize;
-            }
-            else {
-                scnt += mps[i]->descr->elsize;
-            }
-        }
-        memsize = loop->bufsize*(cnt+cntcast) + scbufsize*(scnt+scntcast);
-        loop->buffer[0] = PyDataMem_NEW(memsize);
-
-        /* debug
-         * fprintf(stderr, "Allocated buffer at %p of size %d, cnt=%d, cntcast=%d\n",
-         *               loop->buffer[0], loop->bufsize * (cnt + cntcast), cnt, cntcast);
-         */
-
-        if (loop->buffer[0] == NULL) {
-            PyErr_NoMemory();
-            return -1;
-        }
-        if (loop->obj) {
-            memset(loop->buffer[0], 0, memsize);
-        }
-        castptr = loop->buffer[0] + loop->bufsize*cnt + scbufsize*scnt;
-        bufptr = loop->buffer[0];
-        loop->objfunc = 0;
-        for(i = 0; i < self->nargs; i++) {
-            if (!loop->needbuffer[i]) {
-                continue;
-            }
-            loop->buffer[i] = bufptr + (last_was_scalar ? scbufsize : \
-                                        loop->bufsize)*oldbufsize;
-            last_was_scalar = (loop->steps[i] == 0);
-            bufptr = loop->buffer[i];
-            oldbufsize = mps[i]->descr->elsize;
-            /* fprintf(stderr, "buffer[%d] = %p\n", i, loop->buffer[i]); */
-            if (loop->cast[i]) {
-                PyArray_Descr *descr;
-                loop->castbuf[i] = castptr + (last_cast_was_scalar ? scbufsize : \
-                                              loop->bufsize)*oldsize;
-                last_cast_was_scalar = last_was_scalar;
-                /* fprintf(stderr, "castbuf[%d] = %p\n", i, loop->castbuf[i]); */
-                descr = PyArray_DescrFromType(arg_types[i]);
-                oldsize = descr->elsize;
-                Py_DECREF(descr);
-                loop->bufptr[i] = loop->castbuf[i];
-                castptr = loop->castbuf[i];
-                if (loop->steps[i])
-                    loop->steps[i] = oldsize;
-            }
-            else {
-                loop->bufptr[i] = loop->buffer[i];
-            }
-            if (!loop->objfunc && loop->obj) {
-                if (arg_types[i] == PyArray_OBJECT) {
-                    loop->objfunc = 1;
-                }
-            }
-        }
-    }
-    return nargs;
-}
-
-static void
-ufuncreduce_dealloc(PyUFuncReduceObject *self)
-{
-    if (self->ufunc) {
-        Py_XDECREF(self->it);
-        Py_XDECREF(self->rit);
-        Py_XDECREF(self->ret);
-        Py_XDECREF(self->errobj);
-        Py_XDECREF(self->decref);
-        if (self->buffer) PyDataMem_FREE(self->buffer);
-        Py_DECREF(self->ufunc);
-    }
-    _pya_free(self);
-}
-
-static void
-ufuncloop_dealloc(PyUFuncLoopObject *self)
-{
-    int i;
-
-    if (self->ufunc != NULL) {
-	if (self->core_dim_sizes)
-	    _pya_free(self->core_dim_sizes);
-	if (self->core_strides)
-	    _pya_free(self->core_strides);
-        for(i = 0; i < self->ufunc->nargs; i++)
-            Py_XDECREF(self->iters[i]);
-        if (self->buffer[0]) {
-            PyDataMem_FREE(self->buffer[0]);
-        }
-        Py_XDECREF(self->errobj);
-        Py_DECREF(self->ufunc);
-    }
-    _pya_free(self);
-}
-
-static PyUFuncLoopObject *
-construct_loop(PyUFuncObject *self, PyObject *args, PyObject *kwds, PyArrayObject **mps)
-{
-    PyUFuncLoopObject *loop;
-    int i;
-    PyObject *typetup = NULL;
-    PyObject *extobj = NULL;
-    char *name;
-
-    if (self == NULL) {
-        PyErr_SetString(PyExc_ValueError, "function not supported");
-        return NULL;
-    }
-    if ((loop = _pya_malloc(sizeof(PyUFuncLoopObject))) == NULL) {
-        PyErr_NoMemory();
-        return loop;
-    }
-
-    loop->index = 0;
-    loop->ufunc = self;
-    Py_INCREF(self);
-    loop->buffer[0] = NULL;
-    for(i = 0; i < self->nargs; i++) {
-        loop->iters[i] = NULL;
-        loop->cast[i] = NULL;
-    }
-    loop->errobj = NULL;
-    loop->notimplemented = 0;
-    loop->first = 1;
-    loop->core_dim_sizes = NULL;
-    loop->core_strides = NULL;
-
-    if (self->core_enabled) {
-	int num_dim_ix = 1 + self->core_num_dim_ix;
-	int nstrides = self->nargs + self->core_offsets[self->nargs-1]
-	    + self->core_num_dims[self->nargs-1];
-	loop->core_dim_sizes = _pya_malloc(sizeof(npy_intp) * num_dim_ix);
-	loop->core_strides = _pya_malloc(sizeof(npy_intp) * nstrides);
-	if (loop->core_dim_sizes == NULL || loop->core_strides == NULL) {
-	    PyErr_NoMemory();
-	    goto fail;
-	}
-	memset(loop->core_strides, 0, sizeof(npy_intp) * nstrides);
-	for (i = 0; i < num_dim_ix; i++)
-	    loop->core_dim_sizes[i] = 1;
-    }
-
-    name = self->name ? self->name : "";
-
-    /*
-     * Extract sig= keyword and extobj= keyword if present.
-     * Raise an error if anything else is present in the
-     * keyword dictionary
-     */
-    if (kwds != NULL) {
-        PyObject *key, *value;
-        Py_ssize_t pos=0;
-        while (PyDict_Next(kwds, &pos, &key, &value)) {
-            char *keystring = PyString_AsString(key);
-            if (keystring == NULL) {
-                PyErr_Clear();
-                PyErr_SetString(PyExc_TypeError, "invalid keyword");
-                goto fail;
-            }
-            if (strncmp(keystring,"extobj",6) == 0) {
-                extobj = value;
-            }
-            else if (strncmp(keystring,"sig",3) == 0) {
-                typetup = value;
-            }
-            else {
-                char *format = "'%s' is an invalid keyword to %s";
-                PyErr_Format(PyExc_TypeError,format,keystring, name);
-                goto fail;
-            }
-        }
-    }
-
-    if (extobj == NULL) {
-        if (PyUFunc_GetPyValues(name,
-                                &(loop->bufsize), &(loop->errormask),
-                                &(loop->errobj)) < 0) {
-            goto fail;
-        }
-    }
-    else {
-        if (_extract_pyvals(extobj, name,
-                            &(loop->bufsize), &(loop->errormask),
-                            &(loop->errobj)) < 0) {
-            goto fail;
-        }
-    }
-
-    /* Setup the arrays */
-    if (construct_arrays(loop, args, mps, typetup) < 0) {
-        goto fail;
-    }
-
-    PyUFunc_clearfperr();
-    return loop;
-
-fail:
-    ufuncloop_dealloc(loop);
-    return NULL;
-}
-
-
-/*
-  static void
-  _printbytebuf(PyUFuncLoopObject *loop, int bufnum)
-  {
-  int i;
-
-  fprintf(stderr, "Printing byte buffer %d\n", bufnum);
-  for(i=0; i<loop->bufcnt; i++) {
-  fprintf(stderr, "  %d\n", *(((byte *)(loop->buffer[bufnum]))+i));
-  }
-  }
-
-  static void
-  _printlongbuf(PyUFuncLoopObject *loop, int bufnum)
-  {
-  int i;
-
-  fprintf(stderr, "Printing long buffer %d\n", bufnum);
-  for(i=0; i<loop->bufcnt; i++) {
-  fprintf(stderr, "  %ld\n", *(((long *)(loop->buffer[bufnum]))+i));
-  }
-  }
-
-  static void
-  _printlongbufptr(PyUFuncLoopObject *loop, int bufnum)
-  {
-  int i;
-
-  fprintf(stderr, "Printing long buffer %d\n", bufnum);
-  for(i=0; i<loop->bufcnt; i++) {
-  fprintf(stderr, "  %ld\n", *(((long *)(loop->bufptr[bufnum]))+i));
-  }
-  }
-
-
-
-  static void
-  _printcastbuf(PyUFuncLoopObject *loop, int bufnum)
-  {
-  int i;
-
-  fprintf(stderr, "Printing long buffer %d\n", bufnum);
-  for(i=0; i<loop->bufcnt; i++) {
-  fprintf(stderr, "  %ld\n", *(((long *)(loop->castbuf[bufnum]))+i));
-  }
-  }
-
-*/
-
-
-
-
-/*
- * currently generic ufuncs cannot be built for use on flexible arrays.
- *
- * The cast functions in the generic loop would need to be fixed to pass
- * in something besides NULL, NULL.
- *
- * Also the underlying ufunc loops would not know the element-size unless
- * that was passed in as data (which could be arranged).
- *
- */
-
-/*
- * This generic function is called with the ufunc object, the arguments to it,
- * and an array of (pointers to) PyArrayObjects which are NULL.  The
- * arguments are parsed and placed in mps in construct_loop (construct_arrays)
- */
-
-/*UFUNC_API*/
-static int
-PyUFunc_GenericFunction(PyUFuncObject *self, PyObject *args, PyObject *kwds,
-                        PyArrayObject **mps)
-{
-    PyUFuncLoopObject *loop;
-    int i;
-    NPY_BEGIN_THREADS_DEF;
-
-    if (!(loop = construct_loop(self, args, kwds, mps))) {
-        return -1;
-    }
-    if (loop->notimplemented) {
-        ufuncloop_dealloc(loop);
-        return -2;
-    }
-    if (self->core_enabled && loop->meth != SIGNATURE_NOBUFFER_UFUNCLOOP) {
-	PyErr_SetString(PyExc_RuntimeError,
-			"illegal loop method for ufunc with signature");
-	goto fail;
-    }
-
-    NPY_LOOP_BEGIN_THREADS;
-    switch(loop->meth) {
-        case ONE_UFUNCLOOP:
-            /*
-             * Everything is contiguous, notswapped, aligned,
-             * and of the right type.  -- Fastest.
-             * Or if not contiguous, then a single-stride
-             * increment moves through the entire array.
-             */
-            /*fprintf(stderr, "ONE...%d\n", loop->size);*/
-            loop->function((char **)loop->bufptr, &(loop->size),
-                    loop->steps, loop->funcdata);
-            UFUNC_CHECK_ERROR(loop);
-            break;
-
-        case NOBUFFER_UFUNCLOOP:
-            /*
-             * Everything is notswapped, aligned and of the
-             * right type but not contiguous. -- Almost as fast.
-             */
-            /*fprintf(stderr, "NOBUFFER...%d\n", loop->size);*/
-            
-             while (loop->index < loop->size) {
-                for(i = 0; i < self->nargs; i++) {
-                    loop->bufptr[i] = loop->iters[i]->dataptr;
-                }
-                loop->function((char **)loop->bufptr, &(loop->bufcnt),
-                        loop->steps, loop->funcdata);
-                UFUNC_CHECK_ERROR(loop);
-
-                /* Adjust loop pointers */
-                for(i = 0; i < self->nargs; i++) {
-                    PyArray_ITER_NEXT(loop->iters[i]);
-                }
-                loop->index++;
-            }
-            break;
-
-        case SIGNATURE_NOBUFFER_UFUNCLOOP:
-	    while (loop->index < loop->size) {
-		for(i = 0; i < self->nargs; i++) {
-		    loop->bufptr[i] = loop->iters[i]->dataptr;
-		}
-		loop->function((char **)loop->bufptr, loop->core_dim_sizes,
-			       loop->core_strides, loop->funcdata);
-		UFUNC_CHECK_ERROR(loop);
-
-		/* Adjust loop pointers */
-		for(i = 0; i < self->nargs; i++) {
-		    PyArray_ITER_NEXT(loop->iters[i]);
-		}
-		loop->index++;
-	    }
-	    break;
-
-        case BUFFER_UFUNCLOOP: {
-               PyArray_CopySwapNFunc *copyswapn[NPY_MAXARGS];
-               PyArrayIterObject **iters=loop->iters;
-               int *swap=loop->swap;
-               char **dptr=loop->dptr;
-               int mpselsize[NPY_MAXARGS];
-               intp laststrides[NPY_MAXARGS];
-               int fastmemcpy[NPY_MAXARGS];
-               int *needbuffer=loop->needbuffer;
-               intp index=loop->index, size=loop->size;
-               int bufsize;
-               intp bufcnt;
-               int copysizes[NPY_MAXARGS];
-               char **bufptr = loop->bufptr;
-               char **buffer = loop->buffer;
-               char **castbuf = loop->castbuf;
-               intp *steps = loop->steps;
-               char *tptr[NPY_MAXARGS];
-               int ninnerloops = loop->ninnerloops;
-               Bool pyobject[NPY_MAXARGS];
-               int datasize[NPY_MAXARGS];
-               int j, k, stopcondition;
-               char *myptr1, *myptr2;
-
-               for(i = 0; i <self->nargs; i++) {
-                   copyswapn[i] = mps[i]->descr->f->copyswapn;
-                   mpselsize[i] = mps[i]->descr->elsize;
-                   pyobject[i] = (loop->obj && \
-                           (mps[i]->descr->type_num == PyArray_OBJECT));
-                   laststrides[i] = iters[i]->strides[loop->lastdim];
-                   if (steps[i] && laststrides[i] != mpselsize[i]) {
-                       fastmemcpy[i] = 0;
-                   }
-                   else {
-                       fastmemcpy[i] = 1;
-                   }
-               }
-               /* Do generic buffered looping here (works for any kind of
-                * arrays -- some need buffers, some don't.
-                *
-                *
-                * New algorithm: N is the largest dimension.  B is the buffer-size.
-                * quotient is loop->ninnerloops-1
-                * remainder is loop->leftover
-                *
-                * Compute N = quotient * B + remainder.
-                * quotient = N / B  # integer math
-                * (store quotient + 1) as the number of innerloops
-                * remainder = N % B # integer remainder
-                *
-                * On the inner-dimension we will have (quotient + 1) loops where
-                * the size of the inner function is B for all but the last when the niter size is
-                * remainder.
-                *
-                * So, the code looks very similar to NOBUFFER_LOOP except the inner-most loop is
-                * replaced with...
-                *
-                * for(i=0; i<quotient+1; i++) {
-                * if (i==quotient+1) make itersize remainder size
-                * copy only needed items to buffer.
-                * swap input buffers if needed
-                * cast input buffers if needed
-                * call loop_function()
-                * cast outputs in buffers if needed
-                * swap outputs in buffers if needed
-                * copy only needed items back to output arrays.
-                * update all data-pointers by strides*niter
-                * }
-                */
-
-
-               /*
-                * fprintf(stderr, "BUFFER...%d,%d,%d\n", loop->size,
-                * loop->ninnerloops, loop->leftover);
-                */
-               /*
-                * for(i=0; i<self->nargs; i++) {
-                * fprintf(stderr, "iters[%d]->dataptr = %p, %p of size %d\n", i,
-                * iters[i], iters[i]->ao->data, PyArray_NBYTES(iters[i]->ao));
-                * }
-                */
-               stopcondition = ninnerloops;
-               if (loop->leftover == 0) stopcondition--;
-               while (index < size) {
-                   bufsize=loop->bufsize;
-                   for(i = 0; i<self->nargs; i++) {
-                       tptr[i] = loop->iters[i]->dataptr;
-                       if (needbuffer[i]) {
-                           dptr[i] = bufptr[i];
-                           datasize[i] = (steps[i] ? bufsize : 1);
-                           copysizes[i] = datasize[i] * mpselsize[i];
-                       }
-                       else {
-                           dptr[i] = tptr[i];
-                       }
-                   }
-
-                   /* This is the inner function over the last dimension */
-                   for(k = 1; k<=stopcondition; k++) {
-                       if (k == ninnerloops) {
-                           bufsize = loop->leftover;
-                           for(i=0; i<self->nargs;i++) {
-                               if (!needbuffer[i]) {
-                                   continue;
-                               }
-                               datasize[i] = (steps[i] ? bufsize : 1);
-                               copysizes[i] = datasize[i] * mpselsize[i];
-                           }
-                       }
-                       for(i = 0; i < self->nin; i++) {
-                           if (!needbuffer[i]) {
-                               continue;
-                           }
-                           if (fastmemcpy[i]) {
-                               memcpy(buffer[i], tptr[i], copysizes[i]);
-                           }
-                           else {
-                               myptr1 = buffer[i];
-                               myptr2 = tptr[i];
-                               for(j = 0; j < bufsize; j++) {
-                                   memcpy(myptr1, myptr2, mpselsize[i]);
-                                   myptr1 += mpselsize[i];
-                                   myptr2 += laststrides[i];
-                               }
-                           }
-
-                           /* swap the buffer if necessary */
-                           if (swap[i]) {
-                               /* fprintf(stderr, "swapping...\n");*/
-                               copyswapn[i](buffer[i], mpselsize[i], NULL, -1,
-                                       (intp) datasize[i], 1,
-                                       mps[i]);
-                           }
-                           /* cast to the other buffer if necessary */
-                           if (loop->cast[i]) {
-                               /* fprintf(stderr, "casting... %d, %p %p\n", i, buffer[i]); */
-                               loop->cast[i](buffer[i], castbuf[i],
-                                       (intp) datasize[i],
-                                       NULL, NULL);
-                           }
-                       }
-
-                       bufcnt = (intp) bufsize;
-                       loop->function((char **)dptr, &bufcnt, steps, loop->funcdata);
-                       UFUNC_CHECK_ERROR(loop);
-
-                       for(i=self->nin; i<self->nargs; i++) {
-                           if (!needbuffer[i]) {
-                               continue;
-                           }
-                           if (loop->cast[i]) {
-                               /* fprintf(stderr, "casting back... %d, %p", i, castbuf[i]); */
-                               loop->cast[i](castbuf[i],
-                                       buffer[i],
-                                       (intp) datasize[i],
-                                       NULL, NULL);
-                           }
-                           if (swap[i]) {
-                               copyswapn[i](buffer[i], mpselsize[i], NULL, -1,
-                                       (intp) datasize[i], 1,
-                                       mps[i]);
-                           }
-                           /*
-                            * copy back to output arrays
-                            * decref what's already there for object arrays
-                            */
-                           if (pyobject[i]) {
-                               myptr1 = tptr[i];
-                               for(j = 0; j < datasize[i]; j++) {
-                                   Py_XDECREF(*((PyObject **)myptr1));
-                                   myptr1 += laststrides[i];
-                               }
-                           }
-                           if (fastmemcpy[i])
-                               memcpy(tptr[i], buffer[i], copysizes[i]);
-                           else {
-                               myptr2 = buffer[i];
-                               myptr1 = tptr[i];
-                               for(j = 0; j < bufsize; j++) {
-                                   memcpy(myptr1, myptr2,
-                                           mpselsize[i]);
-                                   myptr1 += laststrides[i];
-                                   myptr2 += mpselsize[i];
-                               }
-                           }
-                       }
-                       if (k == stopcondition) {
-                           continue;
-                       }
-                       for(i = 0; i < self->nargs; i++) {
-                           tptr[i] += bufsize * laststrides[i];
-                           if (!needbuffer[i]) {
-                               dptr[i] = tptr[i];
-                           }
-                       }
-                   }
-                   /* end inner function over last dimension */
-
-                   if (loop->objfunc) {
-                       /*
-                        * DECREF castbuf when underlying function used
-                        * object arrays and casting was needed to get
-                        * to object arrays
-                        */
-                       for(i = 0; i < self->nargs; i++) {
-                           if (loop->cast[i]) {
-                               if (steps[i] == 0) {
-                                   Py_XDECREF(*((PyObject **)castbuf[i]));
-                               }
-                               else {
-                                   int size = loop->bufsize;
-
-                                   PyObject **objptr = (PyObject **)castbuf[i];
-                                   /*
-                                    * size is loop->bufsize unless there
-                                    * was only one loop
-                                    */
-                                   if (ninnerloops == 1) {
-                                       size = loop->leftover;
-                                   }
-                                   for(j = 0; j < size; j++) {
-                                       Py_XDECREF(*objptr);
-                                       *objptr = NULL;
-                                       objptr += 1;
-                                   }
-                               }
-                           }
-                       }
-
-                   }
-                   /* fixme -- probably not needed here*/
-                   UFUNC_CHECK_ERROR(loop);
-
-                   for(i=0; i<self->nargs; i++) {
-                       PyArray_ITER_NEXT(loop->iters[i]);
-                   }
-                   index++;
-               }
-           }
-    }
-
-    NPY_LOOP_END_THREADS;
-    ufuncloop_dealloc(loop);
-    return 0;
-
-fail:
-    NPY_LOOP_END_THREADS;
-    if (loop) ufuncloop_dealloc(loop);
-    return -1;
-}
-
-static PyArrayObject *
-_getidentity(PyUFuncObject *self, int otype, char *str)
-{
-    PyObject *obj, *arr;
-    PyArray_Descr *typecode;
-
-    if (self->identity == PyUFunc_None) {
-        PyErr_Format(PyExc_ValueError,
-                     "zero-size array to ufunc.%s "      \
-                     "without identity", str);
-        return NULL;
-    }
-    if (self->identity == PyUFunc_One) {
-        obj = PyInt_FromLong((long) 1);
-    } else {
-        obj = PyInt_FromLong((long) 0);
-    }
-
-    typecode = PyArray_DescrFromType(otype);
-    arr = PyArray_FromAny(obj, typecode, 0, 0, CARRAY, NULL);
-    Py_DECREF(obj);
-    return (PyArrayObject *)arr;
-}
-
-static int
-_create_reduce_copy(PyUFuncReduceObject *loop, PyArrayObject **arr, int rtype)
-{
-    intp maxsize;
-    PyObject *new;
-    PyArray_Descr *ntype;
-
-    maxsize = PyArray_SIZE(*arr);
-
-    if (maxsize < loop->bufsize) {
-        if (!(PyArray_ISBEHAVED_RO(*arr)) ||
-            PyArray_TYPE(*arr) != rtype) {
-            ntype = PyArray_DescrFromType(rtype);
-            new = PyArray_FromAny((PyObject *)(*arr),
-                                  ntype, 0, 0,
-                                  FORCECAST | ALIGNED, NULL);
-            if (new == NULL) {
-                return -1;
-            }
-            *arr = (PyArrayObject *)new;
-            loop->decref = new;
-        }
-    }
-
-    /* Don't decref *arr before re-assigning
-       because it was not going to be DECREF'd anyway.
-
-       If a copy is made, then the copy will be removed
-       on deallocation of the loop structure by setting
-       loop->decref.
-    */
-
-    return 0;
-}
-
-static PyUFuncReduceObject *
-construct_reduce(PyUFuncObject *self, PyArrayObject **arr, PyArrayObject *out,
-                 int axis, int otype, int operation, intp ind_size, char *str)
-{
-    PyUFuncReduceObject *loop;
-    PyArrayObject *idarr;
-    PyArrayObject *aar;
-    intp loop_i[MAX_DIMS], outsize=0;
-    int arg_types[3];
-    PyArray_SCALARKIND scalars[3] = {PyArray_NOSCALAR, PyArray_NOSCALAR,
-                                     PyArray_NOSCALAR};
-    int i, j, nd;
-    int flags;
-    /* Reduce type is the type requested of the input
-       during reduction */
-
-    if (self->core_enabled) {
-        PyErr_Format(PyExc_RuntimeError,
-                     "construct_reduce not allowed on ufunc with signature");
-        return NULL;
-    }
-
-    nd = (*arr)->nd;
-    arg_types[0] = otype;
-    arg_types[1] = otype;
-    arg_types[2] = otype;
-    if ((loop = _pya_malloc(sizeof(PyUFuncReduceObject)))==NULL) {
-        PyErr_NoMemory(); return loop;
-    }
-
-    loop->retbase=0;
-    loop->swap = 0;
-    loop->index = 0;
-    loop->ufunc = self;
-    Py_INCREF(self);
-    loop->cast = NULL;
-    loop->buffer = NULL;
-    loop->ret = NULL;
-    loop->it = NULL;
-    loop->rit = NULL;
-    loop->errobj = NULL;
-    loop->first = 1;
-    loop->decref=NULL;
-    loop->N = (*arr)->dimensions[axis];
-    loop->instrides = (*arr)->strides[axis];
-
-    if (select_types(loop->ufunc, arg_types, &(loop->function),
-                     &(loop->funcdata), scalars, NULL) == -1) goto fail;
-
-    /* output type may change -- if it does
-       reduction is forced into that type
-       and we need to select the reduction function again
-    */
-    if (otype != arg_types[2]) {
-        otype = arg_types[2];
-        arg_types[0] = otype;
-        arg_types[1] = otype;
-        if (select_types(loop->ufunc, arg_types, &(loop->function),
-                         &(loop->funcdata), scalars, NULL) == -1)
-            goto fail;
-    }
-
-    /* get looping parameters from Python */
-    if (PyUFunc_GetPyValues(str, &(loop->bufsize), &(loop->errormask),
-                            &(loop->errobj)) < 0) goto fail;
-
-    /* Make copy if misbehaved or not otype for small arrays */
-    if (_create_reduce_copy(loop, arr, otype) < 0) goto fail;
-    aar = *arr;
-
-    if (loop->N == 0) {
-        loop->meth = ZERO_EL_REDUCELOOP;
-    }
-    else if (PyArray_ISBEHAVED_RO(aar) &&           \
-             otype == (aar)->descr->type_num) {
-        if (loop->N == 1) {
-            loop->meth = ONE_EL_REDUCELOOP;
-        }
-        else {
-            loop->meth = NOBUFFER_UFUNCLOOP;
-            loop->steps[1] = (aar)->strides[axis];
-            loop->N -= 1;
-        }
-    }
-    else {
-        loop->meth = BUFFER_UFUNCLOOP;
-        loop->swap = !(PyArray_ISNOTSWAPPED(aar));
-    }
-
-    /* Determine if object arrays are involved */
-    if (otype == PyArray_OBJECT || aar->descr->type_num == PyArray_OBJECT)
-        loop->obj = 1;
-    else
-        loop->obj = 0;
-
-    if (loop->meth == ZERO_EL_REDUCELOOP) {
-        idarr = _getidentity(self, otype, str);
-        if (idarr == NULL) goto fail;
-        if (idarr->descr->elsize > UFUNC_MAXIDENTITY) {
-            PyErr_Format(PyExc_RuntimeError,
-                         "UFUNC_MAXIDENTITY (%d)"           \
-                         " is too small (needs to be at least %d)",
-                         UFUNC_MAXIDENTITY, idarr->descr->elsize);
-            Py_DECREF(idarr);
-            goto fail;
-        }
-        memcpy(loop->idptr, idarr->data, idarr->descr->elsize);
-        Py_DECREF(idarr);
-    }
-
-    /* Construct return array */
-    flags = NPY_CARRAY | NPY_UPDATEIFCOPY | NPY_FORCECAST;
-    switch(operation) {
-    case UFUNC_REDUCE:
-        for(j=0, i=0; i<nd; i++) {
-            if (i != axis)
-                loop_i[j++] = (aar)->dimensions[i];
-
-        }
-        if (out == NULL) {
-            loop->ret = (PyArrayObject *)                   \
-                PyArray_New(aar->ob_type, aar->nd-1, loop_i,
-                            otype, NULL, NULL, 0, 0,
-                            (PyObject *)aar);
-        }
-        else {
-            outsize = PyArray_MultiplyList(loop_i, aar->nd-1);
-        }
-        break;
-    case UFUNC_ACCUMULATE:
-        if (out == NULL) {
-            loop->ret = (PyArrayObject *)                   \
-                PyArray_New(aar->ob_type, aar->nd, aar->dimensions,
-                            otype, NULL, NULL, 0, 0, (PyObject *)aar);
-        }
-        else {
-            outsize = PyArray_MultiplyList(aar->dimensions, aar->nd);
-        }
-        break;
-    case UFUNC_REDUCEAT:
-        memcpy(loop_i, aar->dimensions, nd*sizeof(intp));
-        /* Index is 1-d array */
-        loop_i[axis] = ind_size;
-        if (out == NULL) {
-            loop->ret = (PyArrayObject *)                   \
-                PyArray_New(aar->ob_type, aar->nd, loop_i, otype,
-                            NULL, NULL, 0, 0, (PyObject *)aar);
-        }
-        else {
-            outsize = PyArray_MultiplyList(loop_i, aar->nd);
-        }
-        if (ind_size == 0) {
-            loop->meth = ZERO_EL_REDUCELOOP;
-            return loop;
-        }
-        if (loop->meth == ONE_EL_REDUCELOOP)
-            loop->meth = NOBUFFER_REDUCELOOP;
-        break;
-    }
-    if (out) {
-        if (PyArray_SIZE(out) != outsize) {
-            PyErr_SetString(PyExc_ValueError,
-                            "wrong shape for output");
-            goto fail;
-        }
-        loop->ret = (PyArrayObject *)                   \
-            PyArray_FromArray(out, PyArray_DescrFromType(otype),
-                              flags);
-        if (loop->ret && loop->ret != out) {
-            loop->retbase = 1;
-        }
-    }
-    if (loop->ret == NULL) goto fail;
-    loop->insize = aar->descr->elsize;
-    loop->outsize = loop->ret->descr->elsize;
-    loop->bufptr[0] = loop->ret->data;
-
-    if (loop->meth == ZERO_EL_REDUCELOOP) {
-        loop->size = PyArray_SIZE(loop->ret);
-        return loop;
-    }
-
-    loop->it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)aar);
-    if (loop->it == NULL) return NULL;
-
-    if (loop->meth == ONE_EL_REDUCELOOP) {
-        loop->size = loop->it->size;
-        return loop;
-    }
-
-    /* Fix iterator to loop over correct dimension */
-    /* Set size in axis dimension to 1 */
-
-    loop->it->contiguous = 0;
-    loop->it->size /= (loop->it->dims_m1[axis]+1);
-    loop->it->dims_m1[axis] = 0;
-    loop->it->backstrides[axis] = 0;
-
-
-    loop->size = loop->it->size;
-
-    if (operation == UFUNC_REDUCE) {
-        loop->steps[0] = 0;
-    }
-    else {
-        loop->rit = (PyArrayIterObject *)                       \
-            PyArray_IterNew((PyObject *)(loop->ret));
-        if (loop->rit == NULL) return NULL;
-
-        /* Fix iterator to loop over correct dimension */
-        /* Set size in axis dimension to 1 */
-
-        loop->rit->contiguous = 0;
-        loop->rit->size /= (loop->rit->dims_m1[axis]+1);
-        loop->rit->dims_m1[axis] = 0;
-        loop->rit->backstrides[axis] = 0;
-
-        if (operation == UFUNC_ACCUMULATE)
-            loop->steps[0] = loop->ret->strides[axis];
-        else
-            loop->steps[0] = 0;
-    }
-    loop->steps[2] = loop->steps[0];
-    loop->bufptr[2] = loop->bufptr[0] + loop->steps[2];
-
-
-    if (loop->meth == BUFFER_UFUNCLOOP) {
-        int _size;
-        loop->steps[1] = loop->outsize;
-        if (otype != aar->descr->type_num) {
-            _size=loop->bufsize*(loop->outsize +            \
-                                 aar->descr->elsize);
-            loop->buffer = PyDataMem_NEW(_size);
-            if (loop->buffer == NULL) goto fail;
-            if (loop->obj) memset(loop->buffer, 0, _size);
-            loop->castbuf = loop->buffer + \
-                loop->bufsize*aar->descr->elsize;
-            loop->bufptr[1] = loop->castbuf;
-            loop->cast = PyArray_GetCastFunc(aar->descr, otype);
-            if (loop->cast == NULL) goto fail;
-        }
-        else {
-            _size = loop->bufsize * loop->outsize;
-            loop->buffer = PyDataMem_NEW(_size);
-            if (loop->buffer == NULL) goto fail;
-            if (loop->obj) memset(loop->buffer, 0, _size);
-            loop->bufptr[1] = loop->buffer;
-        }
-    }
-
-
-    PyUFunc_clearfperr();
-    return loop;
-
- fail:
-    ufuncreduce_dealloc(loop);
-    return NULL;
-}
-
-
-/* We have two basic kinds of loops */
-/*  One is used when arr is not-swapped and aligned and output type
-    is the same as input type.
-    and another using buffers when one of these is not satisfied.
-
-    Zero-length and one-length axes-to-be-reduced are handled separately.
-*/
-
-    static PyObject *
-PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
-        int axis, int otype)
-{
-    PyArrayObject *ret=NULL;
-    PyUFuncReduceObject *loop;
-    intp i, n;
-    char *dptr;
-    NPY_BEGIN_THREADS_DEF;
-
-    /* Construct loop object */
-    loop = construct_reduce(self, &arr, out, axis, otype, UFUNC_REDUCE, 0,
-            "reduce");
-    if (!loop) return NULL;
-
-    NPY_LOOP_BEGIN_THREADS;
-        switch(loop->meth) {
-        case ZERO_EL_REDUCELOOP:
-            /* fprintf(stderr, "ZERO..%d\n", loop->size); */
-            for(i=0; i<loop->size; i++) {
-                if (loop->obj) Py_INCREF(*((PyObject **)loop->idptr));
-                memmove(loop->bufptr[0], loop->idptr, loop->outsize);
-                loop->bufptr[0] += loop->outsize;
-            }
-            break;
-        case ONE_EL_REDUCELOOP:
-            /*fprintf(stderr, "ONEDIM..%d\n", loop->size); */
-            while(loop->index < loop->size) {
-                if (loop->obj)
-                    Py_INCREF(*((PyObject **)loop->it->dataptr));
-                memmove(loop->bufptr[0], loop->it->dataptr,
-                        loop->outsize);
-                PyArray_ITER_NEXT(loop->it);
-                loop->bufptr[0] += loop->outsize;
-                loop->index++;
-            }
-            break;
-        case NOBUFFER_UFUNCLOOP:
-            /*fprintf(stderr, "NOBUFFER..%d\n", loop->size); */
-            while(loop->index < loop->size) {
-                /* Copy first element to output */
-                if (loop->obj)
-                    Py_INCREF(*((PyObject **)loop->it->dataptr));
-                memmove(loop->bufptr[0], loop->it->dataptr,
-                        loop->outsize);
-                /* Adjust input pointer */
-                loop->bufptr[1] = loop->it->dataptr+loop->steps[1];
-                loop->function((char **)loop->bufptr,
-                               &(loop->N),
-                               loop->steps, loop->funcdata);
-                UFUNC_CHECK_ERROR(loop);
-
-                PyArray_ITER_NEXT(loop->it)
-                    loop->bufptr[0] += loop->outsize;
-                loop->bufptr[2] = loop->bufptr[0];
-                loop->index++;
-            }
-            break;
-        case BUFFER_UFUNCLOOP:
-            /* use buffer for arr */
-            /*
-              For each row to reduce
-              1. copy first item over to output (casting if necessary)
-              2. Fill inner buffer
-              3. When buffer is filled or end of row
-              a. Cast input buffers if needed
-              b. Call inner function.
-              4. Repeat 2 until row is done.
-            */
-            /* fprintf(stderr, "BUFFERED..%d %d\n", loop->size,
-               loop->swap); */
-            while(loop->index < loop->size) {
-                loop->inptr = loop->it->dataptr;
-                /* Copy (cast) First term over to output */
-                if (loop->cast) {
-                    /* A little tricky because we need to
-                       cast it first */
-                    arr->descr->f->copyswap(loop->buffer,
-                                            loop->inptr,
-                                            loop->swap,
-                                            NULL);
-                    loop->cast(loop->buffer, loop->castbuf,
-                               1, NULL, NULL);
-                    if (loop->obj) {
-                        Py_XINCREF(*((PyObject **)loop->castbuf));
-                    }
-                    memcpy(loop->bufptr[0], loop->castbuf,
-                           loop->outsize);
-                }
-                else { /* Simple copy */
-                    arr->descr->f->copyswap(loop->bufptr[0],
-                                            loop->inptr,
-                                            loop->swap, NULL);
-                }
-                loop->inptr += loop->instrides;
-                n = 1;
-                while(n < loop->N) {
-                    /* Copy up to loop->bufsize elements to
-                       buffer */
-                    dptr = loop->buffer;
-                    for(i=0; i<loop->bufsize; i++, n++) {
-                        if (n == loop->N) break;
-                        arr->descr->f->copyswap(dptr,
-                                                loop->inptr,
-                                                loop->swap,
-                                                NULL);
-                        loop->inptr += loop->instrides;
-                        dptr += loop->insize;
-                    }
-                    if (loop->cast)
-                        loop->cast(loop->buffer,
-                                   loop->castbuf,
-                                   i, NULL, NULL);
-                    loop->function((char **)loop->bufptr,
-                                   &i,
-                                   loop->steps, loop->funcdata);
-                    loop->bufptr[0] += loop->steps[0]*i;
-                    loop->bufptr[2] += loop->steps[2]*i;
-                    UFUNC_CHECK_ERROR(loop);
-                }
-                PyArray_ITER_NEXT(loop->it);
-                loop->bufptr[0] += loop->outsize;
-                loop->bufptr[2] = loop->bufptr[0];
-                loop->index++;
-            }
-        }
-
-        NPY_LOOP_END_THREADS;
-
-        /* Hang on to this reference -- will be decref'd with loop */
-        if (loop->retbase) ret = (PyArrayObject *)loop->ret->base;
-        else ret = loop->ret;
-        Py_INCREF(ret);
-        ufuncreduce_dealloc(loop);
-        return (PyObject *)ret;
-
-fail:
-    NPY_LOOP_END_THREADS;
-
-        if (loop) ufuncreduce_dealloc(loop);
-    return NULL;
-}
-
-
-static PyObject *
-PyUFunc_Accumulate(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
-                   int axis, int otype)
-{
-    PyArrayObject *ret=NULL;
-    PyUFuncReduceObject *loop;
-    intp i, n;
-    char *dptr;
-    NPY_BEGIN_THREADS_DEF;
-
-        /* Construct loop object */
-        loop = construct_reduce(self, &arr, out, axis, otype, UFUNC_ACCUMULATE, 0,
-                                "accumulate");
-    if (!loop) return NULL;
-
-    NPY_LOOP_BEGIN_THREADS;
-    switch(loop->meth) {
-    case ZERO_EL_REDUCELOOP: /* Accumulate */
-        /* fprintf(stderr, "ZERO..%d\n", loop->size); */
-        for(i=0; i<loop->size; i++) {
-            if (loop->obj)
-                Py_INCREF(*((PyObject **)loop->idptr));
-            memcpy(loop->bufptr[0], loop->idptr, loop->outsize);
-            loop->bufptr[0] += loop->outsize;
-        }
-        break;
-    case ONE_EL_REDUCELOOP: /* Accumulate */
-        /* fprintf(stderr, "ONEDIM..%d\n", loop->size); */
-        while(loop->index < loop->size) {
-            if (loop->obj)
-                Py_INCREF(*((PyObject **)loop->it->dataptr));
-            memmove(loop->bufptr[0], loop->it->dataptr,
-                   loop->outsize);
-            PyArray_ITER_NEXT(loop->it);
-            loop->bufptr[0] += loop->outsize;
-            loop->index++;
-        }
-        break;
-    case NOBUFFER_UFUNCLOOP: /* Accumulate */
-        /* fprintf(stderr, "NOBUFFER..%d\n", loop->size); */
-        while(loop->index < loop->size) {
-            /* Copy first element to output */
-            if (loop->obj)
-                Py_INCREF(*((PyObject **)loop->it->dataptr));
-            memmove(loop->bufptr[0], loop->it->dataptr,
-                   loop->outsize);
-            /* Adjust input pointer */
-            loop->bufptr[1] = loop->it->dataptr+loop->steps[1];
-            loop->function((char **)loop->bufptr,
-                           &(loop->N),
-                           loop->steps, loop->funcdata);
-            UFUNC_CHECK_ERROR(loop);
-
-            PyArray_ITER_NEXT(loop->it);
-            PyArray_ITER_NEXT(loop->rit);
-            loop->bufptr[0] = loop->rit->dataptr;
-            loop->bufptr[2] = loop->bufptr[0] + loop->steps[0];
-            loop->index++;
-        }
-        break;
-    case BUFFER_UFUNCLOOP:  /* Accumulate */
-        /* use buffer for arr */
-        /*
-          For each row to reduce
-          1. copy identity over to output (casting if necessary)
-          2. Fill inner buffer
-          3. When buffer is filled or end of row
-          a. Cast input buffers if needed
-          b. Call inner function.
-          4. Repeat 2 until row is done.
-        */
-        /* fprintf(stderr, "BUFFERED..%d %p\n", loop->size,
-           loop->cast); */
-        while(loop->index < loop->size) {
-            loop->inptr = loop->it->dataptr;
-            /* Copy (cast) First term over to output */
-            if (loop->cast) {
-                /* A little tricky because we need to
-                   cast it first */
-                arr->descr->f->copyswap(loop->buffer,
-                                        loop->inptr,
-                                        loop->swap,
-                                        NULL);
-                loop->cast(loop->buffer, loop->castbuf,
-                           1, NULL, NULL);
-                if (loop->obj) {
-                    Py_XINCREF(*((PyObject **)loop->castbuf));
-                }
-                memcpy(loop->bufptr[0], loop->castbuf,
-                       loop->outsize);
-            }
-            else { /* Simple copy */
-                arr->descr->f->copyswap(loop->bufptr[0],
-                                        loop->inptr,
-                                        loop->swap,
-                                        NULL);
-            }
-            loop->inptr += loop->instrides;
-            n = 1;
-            while(n < loop->N) {
-                /* Copy up to loop->bufsize elements to
-                   buffer */
-                dptr = loop->buffer;
-                for(i=0; i<loop->bufsize; i++, n++) {
-                    if (n == loop->N) break;
-                    arr->descr->f->copyswap(dptr,
-                                            loop->inptr,
-                                            loop->swap,
-                                            NULL);
-                    loop->inptr += loop->instrides;
-                    dptr += loop->insize;
-                }
-                if (loop->cast)
-                    loop->cast(loop->buffer,
-                               loop->castbuf,
-                               i, NULL, NULL);
-                loop->function((char **)loop->bufptr,
-                               &i,
-                               loop->steps, loop->funcdata);
-                loop->bufptr[0] += loop->steps[0]*i;
-                loop->bufptr[2] += loop->steps[2]*i;
-                UFUNC_CHECK_ERROR(loop);
-            }
-            PyArray_ITER_NEXT(loop->it);
-            PyArray_ITER_NEXT(loop->rit);
-            loop->bufptr[0] = loop->rit->dataptr;
-            loop->bufptr[2] = loop->bufptr[0] + loop->steps[0];
-            loop->index++;
-        }
-    }
-
-    NPY_LOOP_END_THREADS;
-
-    /* Hang on to this reference -- will be decref'd with loop */
-    if (loop->retbase) ret = (PyArrayObject *)loop->ret->base;
-    else ret = loop->ret;
-    Py_INCREF(ret);
-    ufuncreduce_dealloc(loop);
-    return (PyObject *)ret;
-
- fail:
-    NPY_LOOP_END_THREADS;
-
-        if (loop) ufuncreduce_dealloc(loop);
-    return NULL;
-}
-
-/* Reduceat performs a reduce over an axis using the indices as a guide
-
-   op.reduceat(array,indices)  computes
-   op.reduce(array[indices[i]:indices[i+1]]
-   for i=0..end with an implicit indices[i+1]=len(array)
-   assumed when i=end-1
-
-   if indices[i+1] <= indices[i]+1
-   then the result is array[indices[i]] for that value
-
-   op.accumulate(array) is the same as
-   op.reduceat(array,indices)[::2]
-   where indices is range(len(array)-1) with a zero placed in every other sample
-   indices = zeros(len(array)*2-1)
-   indices[1::2] = range(1,len(array))
-
-   output shape is based on the size of indices
-*/
-
-static PyObject *
-PyUFunc_Reduceat(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *ind,
-                 PyArrayObject *out, int axis, int otype)
-{
-    PyArrayObject *ret;
-    PyUFuncReduceObject *loop;
-    intp *ptr=(intp *)ind->data;
-    intp nn=ind->dimensions[0];
-    intp mm=arr->dimensions[axis]-1;
-    intp n, i, j;
-    char *dptr;
-    NPY_BEGIN_THREADS_DEF;
-
-    /* Check for out-of-bounds values in indices array */
-    for(i=0; i<nn; i++) {
-        if ((*ptr < 0) || (*ptr > mm)) {
-            PyErr_Format(PyExc_IndexError,
-                    "index out-of-bounds (0, %d)", (int) mm);
-            return NULL;
-        }
-        ptr++;
-    }
-
-    ptr = (intp *)ind->data;
-    /* Construct loop object */
-    loop = construct_reduce(self, &arr, out, axis, otype, UFUNC_REDUCEAT, nn,
-                            "reduceat");
-    if (!loop) return NULL;
-
-    NPY_LOOP_BEGIN_THREADS;
-    switch(loop->meth) {
-    /* zero-length index -- return array immediately */
-    case ZERO_EL_REDUCELOOP:
-        /* fprintf(stderr, "ZERO..\n"); */
-        break;
-        /* NOBUFFER -- behaved array and same type */
-    case NOBUFFER_UFUNCLOOP:                        /* Reduceat */
-        /* fprintf(stderr, "NOBUFFER..%d\n", loop->size); */
-        while(loop->index < loop->size) {
-            ptr = (intp *)ind->data;
-            for(i=0; i<nn; i++) {
-                loop->bufptr[1] = loop->it->dataptr +   \
-                                  (*ptr)*loop->instrides;
-                if (loop->obj) {
-                    Py_XINCREF(*((PyObject **)loop->bufptr[1]));
-                }
-                memcpy(loop->bufptr[0], loop->bufptr[1],
-                        loop->outsize);
-                mm = (i==nn-1 ? arr->dimensions[axis]-*ptr : \
-                        *(ptr+1) - *ptr) - 1;
-                if (mm > 0) {
-                    loop->bufptr[1] += loop->instrides;
-                    loop->bufptr[2] = loop->bufptr[0];
-                    loop->function((char **)loop->bufptr,
-                            &mm, loop->steps,
-                            loop->funcdata);
-                    UFUNC_CHECK_ERROR(loop);
-                }
-                loop->bufptr[0] += loop->ret->strides[axis];
-                ptr++;
-            }
-            PyArray_ITER_NEXT(loop->it);
-            PyArray_ITER_NEXT(loop->rit);
-            loop->bufptr[0] = loop->rit->dataptr;
-            loop->index++;
-        }
-        break;
-
-        /* BUFFER -- misbehaved array or different types */
-    case BUFFER_UFUNCLOOP:                               /* Reduceat */
-        /* fprintf(stderr, "BUFFERED..%d\n", loop->size); */
-        while(loop->index < loop->size) {
-            ptr = (intp *)ind->data;
-            for(i=0; i<nn; i++) {
-                if (loop->obj) {
-                    Py_XINCREF(*((PyObject **)loop->idptr));
-                }
-                memcpy(loop->bufptr[0], loop->idptr,
-                        loop->outsize);
-                n = 0;
-                mm = (i==nn-1 ? arr->dimensions[axis] - *ptr :\
-                        *(ptr+1) - *ptr);
-                if (mm < 1) mm = 1;
-                loop->inptr = loop->it->dataptr + \
-                              (*ptr)*loop->instrides;
-                while (n < mm) {
-                    /* Copy up to loop->bufsize elements
-                       to buffer */
-                    dptr = loop->buffer;
-                    for(j=0; j<loop->bufsize; j++, n++) {
-                        if (n == mm) break;
-                        arr->descr->f->copyswap\
-                            (dptr,
-                             loop->inptr,
-                             loop->swap, NULL);
-                        loop->inptr += loop->instrides;
-                        dptr += loop->insize;
-                    }
-                    if (loop->cast)
-                        loop->cast(loop->buffer,
-                                loop->castbuf,
-                                j, NULL, NULL);
-                    loop->bufptr[2] = loop->bufptr[0];
-                    loop->function((char **)loop->bufptr,
-                            &j, loop->steps,
-                            loop->funcdata);
-                    UFUNC_CHECK_ERROR(loop);
-                    loop->bufptr[0] += j*loop->steps[0];
-                }
-                loop->bufptr[0] += loop->ret->strides[axis];
-                ptr++;
-            }
-            PyArray_ITER_NEXT(loop->it);
-            PyArray_ITER_NEXT(loop->rit);
-            loop->bufptr[0] = loop->rit->dataptr;
-            loop->index++;
-        }
-        break;
-    }
-
-    NPY_LOOP_END_THREADS;
-
-        /* Hang on to this reference -- will be decref'd with loop */
-        if (loop->retbase) ret = (PyArrayObject *)loop->ret->base;
-        else ret = loop->ret;
-    Py_INCREF(ret);
-    ufuncreduce_dealloc(loop);
-    return (PyObject *)ret;
-
-fail:
-    NPY_LOOP_END_THREADS;
-
-    if (loop) ufuncreduce_dealloc(loop);
-    return NULL;
-}
-
-
-/* This code handles reduce, reduceat, and accumulate
-   (accumulate and reduce are special cases of the more general reduceat
-   but they are handled separately for speed)
-*/
-
-static PyObject *
-PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
-                         PyObject *kwds, int operation)
-{
-    int axis=0;
-    PyArrayObject *mp, *ret = NULL;
-    PyObject *op, *res=NULL;
-    PyObject *obj_ind, *context;
-    PyArrayObject *indices = NULL;
-    PyArray_Descr *otype=NULL;
-    PyArrayObject *out=NULL;
-    static char *kwlist1[] = {"array", "axis", "dtype", "out", NULL};
-    static char *kwlist2[] = {"array", "indices", "axis", "dtype", "out", NULL};
-    static char *_reduce_type[] = {"reduce", "accumulate", \
-                                   "reduceat", NULL};
-    if (self == NULL) {
-        PyErr_SetString(PyExc_ValueError, "function not supported");
-        return NULL;
-    }
-
-    if (self->core_enabled) {
-        PyErr_Format(PyExc_RuntimeError,
-                     "Reduction not defined on ufunc with signature");
-        return NULL;
-    }
-    
-    if (self->nin != 2) {
-        PyErr_Format(PyExc_ValueError,
-                     "%s only supported for binary functions",
-                     _reduce_type[operation]);
-        return NULL;
-    }
-    if (self->nout != 1) {
-        PyErr_Format(PyExc_ValueError,
-                     "%s only supported for functions " \
-                     "returning a single value",
-                     _reduce_type[operation]);
-        return NULL;
-    }
-
-    if (operation == UFUNC_REDUCEAT) {
-        PyArray_Descr *indtype;
-        indtype = PyArray_DescrFromType(PyArray_INTP);
-        if(!PyArg_ParseTupleAndKeywords(args, kwds, "OO|iO&O&", kwlist2,
-                                        &op, &obj_ind, &axis,
-                                        PyArray_DescrConverter2,
-                                        &otype,
-                                        PyArray_OutputConverter,
-                                        &out)) {
-            Py_XDECREF(otype);
-            return NULL;
-        }
-        indices = (PyArrayObject *)PyArray_FromAny(obj_ind, indtype,
-                                                   1, 1, CARRAY, NULL);
-        if (indices == NULL) {Py_XDECREF(otype); return NULL;}
-    }
-    else {
-        if(!PyArg_ParseTupleAndKeywords(args, kwds, "O|iO&O&", kwlist1,
-                                        &op, &axis,
-                                        PyArray_DescrConverter2,
-                                        &otype,
-                                        PyArray_OutputConverter,
-                                        &out)) {
-            Py_XDECREF(otype);
-            return NULL;
-        }
-    }
-
-    /* Ensure input is an array */
-    if (!PyArray_Check(op) && !PyArray_IsScalar(op, Generic)) {
-        context = Py_BuildValue("O(O)i", self, op, 0);
-    }
-    else {
-        context = NULL;
-    }
-    mp = (PyArrayObject *)PyArray_FromAny(op, NULL, 0, 0, 0, context);
-    Py_XDECREF(context);
-    if (mp == NULL) return NULL;
-
-    /* Check to see if input is zero-dimensional */
-    if (mp->nd == 0) {
-        PyErr_Format(PyExc_TypeError, "cannot %s on a scalar",
-                     _reduce_type[operation]);
-        Py_XDECREF(otype);
-        Py_DECREF(mp);
-        return NULL;
-    }
-
-    /* Check to see that type (and otype) is not FLEXIBLE */
-    if (PyArray_ISFLEXIBLE(mp) ||
-        (otype && PyTypeNum_ISFLEXIBLE(otype->type_num))) {
-        PyErr_Format(PyExc_TypeError,
-                     "cannot perform %s with flexible type",
-                     _reduce_type[operation]);
-        Py_XDECREF(otype);
-        Py_DECREF(mp);
-        return NULL;
-    }
-
-    if (axis < 0) axis += mp->nd;
-    if (axis < 0 || axis >= mp->nd) {
-        PyErr_SetString(PyExc_ValueError, "axis not in array");
-        Py_XDECREF(otype);
-        Py_DECREF(mp);
-        return NULL;
-    }
-
-    /* If out is specified it determines otype unless otype
-       already specified.
-    */
-    if (otype == NULL && out != NULL) {
-        otype = out->descr;
-        Py_INCREF(otype);
-    }
-
-    if (otype == NULL) {
-        /* For integer types --- make sure at
-           least a long is used for add and multiply
-           reduction --- to avoid overflow */
-        int typenum = PyArray_TYPE(mp);
-        if ((typenum < NPY_FLOAT) &&                \
-            ((strcmp(self->name,"add")==0) ||       \
-             (strcmp(self->name,"multiply")==0))) {
-            if (PyTypeNum_ISBOOL(typenum))
-                typenum = PyArray_LONG;
-            else if (mp->descr->elsize < sizeof(long)) {
-                if (PyTypeNum_ISUNSIGNED(typenum))
-                    typenum = PyArray_ULONG;
-                else
-                    typenum = PyArray_LONG;
-            }
-        }
-        otype = PyArray_DescrFromType(typenum);
-    }
-
-
-    switch(operation) {
-    case UFUNC_REDUCE:
-        ret = (PyArrayObject *)PyUFunc_Reduce(self, mp, out, axis,
-                                              otype->type_num);
-        break;
-    case UFUNC_ACCUMULATE:
-        ret = (PyArrayObject *)PyUFunc_Accumulate(self, mp, out, axis,
-                                                  otype->type_num);
-        break;
-    case UFUNC_REDUCEAT:
-        ret = (PyArrayObject *)PyUFunc_Reduceat(self, mp, indices, out,
-                                                axis, otype->type_num);
-        Py_DECREF(indices);
-        break;
-    }
-    Py_DECREF(mp);
-    Py_DECREF(otype);
-    if (ret==NULL) return NULL;
-    if (op->ob_type != ret->ob_type) {
-        res = PyObject_CallMethod(op, "__array_wrap__", "O", ret);
-        if (res == NULL) PyErr_Clear();
-        else if (res == Py_None) Py_DECREF(res);
-        else {
-            Py_DECREF(ret);
-            return res;
-        }
-    }
-    return PyArray_Return(ret);
-
-}
-
-/* This function analyzes the input arguments
-   and determines an appropriate __array_wrap__ function to call
-   for the outputs.
-
-   If an output argument is provided, then it is wrapped
-   with its own __array_wrap__ not with the one determined by
-   the input arguments.
-
-   if the provided output argument is already an array,
-   the wrapping function is None (which means no wrapping will
-   be done --- not even PyArray_Return).
-
-   A NULL is placed in output_wrap for outputs that
-   should just have PyArray_Return called.
-*/
-
-static void
-_find_array_wrap(PyObject *args, PyObject **output_wrap, int nin, int nout)
-{
-    Py_ssize_t nargs;
-    int i;
-    int np = 0;
-    double priority, maxpriority;
-    PyObject *with_wrap[NPY_MAXARGS], *wraps[NPY_MAXARGS];
-    PyObject *obj, *wrap = NULL;
-
-    nargs = PyTuple_GET_SIZE(args);
-    for(i = 0; i < nin; i++) {
-        obj = PyTuple_GET_ITEM(args, i);
-        if (PyArray_CheckExact(obj) ||  \
-            PyArray_IsAnyScalar(obj))
-            continue;
-        wrap = PyObject_GetAttrString(obj, "__array_wrap__");
-        if (wrap) {
-            if (PyCallable_Check(wrap)) {
-                with_wrap[np] = obj;
-                wraps[np] = wrap;
-                ++np;
-            }
-            else {
-                Py_DECREF(wrap);
-                wrap = NULL;
-            }
-        }
-        else {
-            PyErr_Clear();
-        }
-    }
-    if (np >= 2) {
-        wrap = wraps[0];
-        maxpriority = PyArray_GetPriority(with_wrap[0],
-                                          PyArray_SUBTYPE_PRIORITY);
-        for(i = 1; i < np; ++i) {
-            priority = \
-                PyArray_GetPriority(with_wrap[i],
-                                    PyArray_SUBTYPE_PRIORITY);
-            if (priority > maxpriority) {
-                maxpriority = priority;
-                Py_DECREF(wrap);
-                wrap = wraps[i];
-            } else {
-                Py_DECREF(wraps[i]);
-            }
-        }
-    }
-
-    /* Here wrap is the wrapping function determined from the
-       input arrays (could be NULL).
-
-       For all the output arrays decide what to do.
-
-       1) Use the wrap function determined from the input arrays
-       This is the default if the output array is not
-       passed in.
-
-       2) Use the __array_wrap__ method of the output object
-       passed in. -- this is special cased for
-       exact ndarray so that no PyArray_Return is
-       done in that case.
-    */
-
-    for(i=0; i<nout; i++) {
-        int j = nin + i;
-        int incref = 1;
-        output_wrap[i] = wrap;
-        if (j < nargs) {
-            obj = PyTuple_GET_ITEM(args, j);
-            if (obj == Py_None)
-                continue;
-            if (PyArray_CheckExact(obj)) {
-                output_wrap[i] = Py_None;
-            }
-            else {
-                PyObject *owrap;
-                owrap = PyObject_GetAttrString(obj,"__array_wrap__");
-                incref = 0;
-                if (!(owrap) || !(PyCallable_Check(owrap))) {
-                    Py_XDECREF(owrap);
-                    owrap = wrap;
-                    incref = 1;
-                    PyErr_Clear();
-                }
-                output_wrap[i] = owrap;
-            }
-        }
-        if (incref) {
-            Py_XINCREF(output_wrap[i]);
-        }
-    }
-
-    Py_XDECREF(wrap);
-    return;
-}
-
-static PyObject *
-ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
-{
-    int i;
-    PyTupleObject *ret;
-    PyArrayObject *mps[NPY_MAXARGS];
-    PyObject *retobj[NPY_MAXARGS];
-    PyObject *wraparr[NPY_MAXARGS];
-    PyObject *res;
-    int errval;
-
-    /*
-     * Initialize all array objects to NULL to make cleanup easier
-     * if something goes wrong.
-     */
-    for(i = 0; i < self->nargs; i++) {
-        mps[i] = NULL;
-    }
-
-    errval = PyUFunc_GenericFunction(self, args, kwds, mps);
-    if (errval < 0) {
-        for(i = 0; i < self->nargs; i++) {
-            PyArray_XDECREF_ERR(mps[i]);
-        }
-        if (errval == -1)
-            return NULL;
-        else {
-            /*
-             * PyErr_SetString(PyExc_TypeError,"");
-             * return NULL;
-             */
-            Py_INCREF(Py_NotImplemented);
-            return Py_NotImplemented;
-        }
-    }
-
-    for(i = 0; i < self->nin; i++) {
-        Py_DECREF(mps[i]);
-    }
-
-
-    /*
-     * Use __array_wrap__ on all outputs
-     * if present on one of the input arguments.
-     * If present for multiple inputs:
-     * use __array_wrap__ of input object with largest
-     * __array_priority__ (default = 0.0)
-     *
-     * Exception:  we should not wrap outputs for items already
-     * passed in as output-arguments.  These items should either
-     * be left unwrapped or wrapped by calling their own __array_wrap__
-     * routine.
-     *
-     * For each output argument, wrap will be either
-     * NULL --- call PyArray_Return() -- default if no output arguments given
-     * None --- array-object passed in don't call PyArray_Return
-     * method --- the __array_wrap__ method to call.
-     */
-    _find_array_wrap(args, wraparr, self->nin, self->nout);
-
-    /* wrap outputs */
-    for(i = 0; i < self->nout; i++) {
-        int j=self->nin+i;
-        PyObject *wrap;
-
-        /*
-         * check to see if any UPDATEIFCOPY flags are set
-         * which meant that a temporary output was generated
-         */
-        if (mps[j]->flags & UPDATEIFCOPY) {
-            PyObject *old = mps[j]->base;
-            /* we want to hang on to this */
-            Py_INCREF(old);
-            /* should trigger the copyback into old */
-            Py_DECREF(mps[j]);
-            mps[j] = (PyArrayObject *)old;
-        }
-        wrap = wraparr[i];
-        if (wrap != NULL) {
-            if (wrap == Py_None) {
-                Py_DECREF(wrap);
-                retobj[i] = (PyObject *)mps[j];
-                continue;
-            }
-            res = PyObject_CallFunction(wrap, "O(OOi)",
-                                        mps[j], self, args, i);
-            if (res == NULL && \
-                PyErr_ExceptionMatches(PyExc_TypeError)) {
-                PyErr_Clear();
-                res = PyObject_CallFunctionObjArgs(wrap,
-                                                   mps[j],
-                                                   NULL);
-            }
-            Py_DECREF(wrap);
-            if (res == NULL) {
-                goto fail;
-            }
-            else if (res == Py_None) {
-                Py_DECREF(res);
-            }
-            else {
-                Py_DECREF(mps[j]);
-                retobj[i] = res;
-                continue;
-            }
-        }
-        /* default behavior */
-        retobj[i] = PyArray_Return(mps[j]);
-    }
-
-    if (self->nout == 1) {
-        return retobj[0];
-    } else {
-        ret = (PyTupleObject *)PyTuple_New(self->nout);
-        for(i = 0; i < self->nout; i++) {
-            PyTuple_SET_ITEM(ret, i, retobj[i]);
-        }
-        return (PyObject *)ret;
-    }
-fail:
-    for(i = self->nin; i < self->nargs; i++) {
-        Py_XDECREF(mps[i]);
-    }
-    return NULL;
-}
-
-static PyObject *
-ufunc_geterr(PyObject *NPY_UNUSED(dummy), PyObject *args)
-{
-    PyObject *thedict;
-    PyObject *res;
-
-    if (!PyArg_ParseTuple(args, "")) return NULL;
-
-    if (PyUFunc_PYVALS_NAME == NULL) {
-        PyUFunc_PYVALS_NAME = PyString_InternFromString(UFUNC_PYVALS_NAME);
-    }
-    thedict = PyThreadState_GetDict();
-    if (thedict == NULL) {
-        thedict = PyEval_GetBuiltins();
-    }
-    res = PyDict_GetItem(thedict, PyUFunc_PYVALS_NAME);
-    if (res != NULL) {
-        Py_INCREF(res);
-        return res;
-    }
-    /* Construct list of defaults */
-    res = PyList_New(3);
-    if (res == NULL) return NULL;
-    PyList_SET_ITEM(res, 0, PyInt_FromLong(PyArray_BUFSIZE));
-    PyList_SET_ITEM(res, 1, PyInt_FromLong(UFUNC_ERR_DEFAULT));
-    PyList_SET_ITEM(res, 2, Py_None); Py_INCREF(Py_None);
-    return res;
-}
-
-#if USE_USE_DEFAULTS==1
-/*
-  This is a strategy to buy a little speed up and avoid the dictionary
-  look-up in the default case.  It should work in the presence of
-  threads.  If it is deemed too complicated or it doesn't actually work
-  it could be taken out.
-*/
-static int
-ufunc_update_use_defaults(void)
-{
-    PyObject *errobj=NULL;
-    int errmask, bufsize;
-    int res;
-
-    PyUFunc_NUM_NODEFAULTS += 1;
-    res = PyUFunc_GetPyValues("test", &bufsize, &errmask,
-                              &errobj);
-    PyUFunc_NUM_NODEFAULTS -= 1;
-
-    if (res < 0) {Py_XDECREF(errobj); return -1;}
-
-    if ((errmask != UFUNC_ERR_DEFAULT) ||           \
-        (bufsize != PyArray_BUFSIZE) ||             \
-        (PyTuple_GET_ITEM(errobj, 1) != Py_None)) {
-        PyUFunc_NUM_NODEFAULTS += 1;
-    }
-    else if (PyUFunc_NUM_NODEFAULTS > 0) {
-        PyUFunc_NUM_NODEFAULTS -= 1;
-    }
-    Py_XDECREF(errobj);
-    return 0;
-}
-#endif
-
-static PyObject *
-ufunc_seterr(PyObject *NPY_UNUSED(dummy), PyObject *args)
-{
-    PyObject *thedict;
-    int res;
-    PyObject *val;
-    static char *msg = "Error object must be a list of length 3";
-
-    if (!PyArg_ParseTuple(args, "O", &val)) return NULL;
-
-    if (!PyList_CheckExact(val) || PyList_GET_SIZE(val) != 3) {
-        PyErr_SetString(PyExc_ValueError, msg);
-        return NULL;
-    }
-    if (PyUFunc_PYVALS_NAME == NULL) {
-        PyUFunc_PYVALS_NAME = PyString_InternFromString(UFUNC_PYVALS_NAME);
-    }
-    thedict = PyThreadState_GetDict();
-    if (thedict == NULL) {
-        thedict = PyEval_GetBuiltins();
-    }
-    res = PyDict_SetItem(thedict, PyUFunc_PYVALS_NAME, val);
-    if (res < 0) return NULL;
-#if USE_USE_DEFAULTS==1
-    if (ufunc_update_use_defaults() < 0) return NULL;
-#endif
-    Py_INCREF(Py_None);
-    return Py_None;
-}
-
-
-
-static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om};
-
-static char
-doc_frompyfunc[] = "frompyfunc(func, nin, nout) take an arbitrary python function that takes nin objects as input and returns nout objects and return a universal function (ufunc).  This ufunc always returns PyObject arrays";
-
-static PyObject *
-ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUSED(kwds)) {
-    /* Keywords are ignored for now */
-
-    PyObject *function, *pyname=NULL;
-    int nin, nout, i;
-    PyUFunc_PyFuncData *fdata;
-    PyUFuncObject *self;
-    char *fname, *str;
-    Py_ssize_t fname_len=-1;
-    int offset[2];
-
-    if (!PyArg_ParseTuple(args, "Oii", &function, &nin, &nout)) return NULL;
-
-    if (!PyCallable_Check(function)) {
-        PyErr_SetString(PyExc_TypeError, "function must be callable");
-        return NULL;
-    }
-
-    self = _pya_malloc(sizeof(PyUFuncObject));
-    if (self == NULL) return NULL;
-    PyObject_Init((PyObject *)self, &PyUFunc_Type);
-
-    self->userloops = NULL;
-    self->nin = nin;
-    self->nout = nout;
-    self->nargs = nin+nout;
-    self->identity = PyUFunc_None;
-    self->functions = pyfunc_functions;
-
-    self->ntypes = 1;
-    self->check_return = 0;
-    
-    /* generalized ufunc */
-    self->core_enabled = 0;
-    self->core_num_dim_ix = 0;
-    self->core_num_dims = NULL;
-    self->core_dim_ixs = NULL;
-    self->core_offsets = NULL;
-    self->core_signature = NULL;
-
-    pyname = PyObject_GetAttrString(function, "__name__");
-    if (pyname)
-        (void) PyString_AsStringAndSize(pyname, &fname, &fname_len);
-
-    if (PyErr_Occurred()) {
-        fname = "?";
-        fname_len = 1;
-        PyErr_Clear();
-    }
-    Py_XDECREF(pyname);
-
-
-
-    /* self->ptr holds a pointer for enough memory for
-       self->data[0] (fdata)
-       self->data
-       self->name
-       self->types
-
-       To be safest, all of these need their memory aligned on void * pointers
-       Therefore, we may need to allocate extra space.
-    */
-    offset[0] = sizeof(PyUFunc_PyFuncData);
-    i = (sizeof(PyUFunc_PyFuncData) % sizeof(void *));
-    if (i) offset[0] += (sizeof(void *) - i);
-    offset[1] = self->nargs;
-    i = (self->nargs % sizeof(void *));
-    if (i) offset[1] += (sizeof(void *)-i);
-
-    self->ptr = _pya_malloc(offset[0] + offset[1] + sizeof(void *) + \
-                            (fname_len+14));
-
-    if (self->ptr == NULL) return PyErr_NoMemory();
-    Py_INCREF(function);
-    self->obj = function;
-    fdata = (PyUFunc_PyFuncData *)(self->ptr);
-    fdata->nin = nin;
-    fdata->nout = nout;
-    fdata->callable = function;
-
-    self->data = (void **)(((char *)self->ptr) + offset[0]);
-    self->data[0] = (void *)fdata;
-
-    self->types = (char *)self->data + sizeof(void *);
-    for(i=0; i<self->nargs; i++) self->types[i] = PyArray_OBJECT;
-
-    str = self->types + offset[1];
-    memcpy(str, fname, fname_len);
-    memcpy(str+fname_len, " (vectorized)", 14);
-
-    self->name = str;
-
-    /* Do a better job someday */
-    self->doc = "dynamic ufunc based on a python function";
-
-
-    return (PyObject *)self;
-}
-
-/*UFUNC_API*/
-static int
-PyUFunc_ReplaceLoopBySignature(PyUFuncObject *func,
-                               PyUFuncGenericFunction newfunc,
-                               int *signature,
-                               PyUFuncGenericFunction *oldfunc)
-{
-    int i,j;
-    int res = -1;
-    /* Find the location of the matching signature */
-    for(i=0; i<func->ntypes; i++) {
-        for(j=0; j<func->nargs; j++) {
-            if (signature[j] != func->types[i*func->nargs+j])
-                break;
-        }
-        if (j < func->nargs) continue;
-
-        if (oldfunc != NULL) {
-            *oldfunc = func->functions[i];
-        }
-        func->functions[i] = newfunc;
-        res = 0;
-        break;
-    }
-    return res;
-}
-
-/*UFUNC_API*/
-static PyObject *
-PyUFunc_FromFuncAndData(PyUFuncGenericFunction *func, void **data,
-                        char *types, int ntypes,
-                        int nin, int nout, int identity,
-                        char *name, char *doc, int check_return)
-{
-    return PyUFunc_FromFuncAndDataAndSignature(func, data, types, ntypes,
-        nin, nout, identity, name, doc, check_return, NULL);
-}
-                        
-/*UFUNC_API*/
-static PyObject *
-PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
-                                     char *types, int ntypes,
-                                     int nin, int nout, int identity,
-                                     char *name, char *doc,
-                                     int check_return, const char *signature)
-{
-    PyUFuncObject *self;
-
-    self = _pya_malloc(sizeof(PyUFuncObject));
-    if (self == NULL) return NULL;
-    PyObject_Init((PyObject *)self, &PyUFunc_Type);
-
-    self->nin = nin;
-    self->nout = nout;
-    self->nargs = nin+nout;
-    self->identity = identity;
-
-    self->functions = func;
-    self->data = data;
-    self->types = types;
-    self->ntypes = ntypes;
-    self->check_return = check_return;
-    self->ptr = NULL;
-    self->obj = NULL;
-    self->userloops=NULL;
-
-    if (name == NULL) self->name = "?";
-    else self->name = name;
-
-    if (doc == NULL) self->doc = "NULL";
-    else self->doc = doc;
-    
-    /* generalized ufunc */
-    self->core_enabled = 0;
-    self->core_num_dim_ix = 0;
-    self->core_num_dims = NULL;
-    self->core_dim_ixs = NULL;
-    self->core_offsets = NULL;
-    self->core_signature = NULL;
-    if (signature != NULL) {
-        if (_parse_signature(self, signature) != 0)
-	    return NULL;
-    }
-
-    return (PyObject *)self;
-}
-
-/* This is the first-part of the CObject structure.
-
-   I don't think this will change, but if it should, then
-   this needs to be fixed.  The exposed C-API was insufficient
-   because I needed to replace the pointer and it wouldn't
-   let me with a destructor set (even though it works fine
-   with the destructor).
-*/
-
-typedef struct {
-    PyObject_HEAD
-    void *c_obj;
-} _simple_cobj;
-
-#define _SETCPTR(cobj, val) ((_simple_cobj *)(cobj))->c_obj = (val)
-
-/* return 1 if arg1 > arg2, 0 if arg1 == arg2, and -1 if arg1 < arg2
- */
-static int
-cmp_arg_types(int *arg1, int *arg2, int n)
-{
-    for(;n>0; n--, arg1++, arg2++) {
-        if (PyArray_EquivTypenums(*arg1, *arg2)) continue;
-        if (PyArray_CanCastSafely(*arg1, *arg2))
-            return -1;
-        return 1;
-    }
-    return 0;
-}
-
-/* This frees the linked-list structure
-   when the CObject is destroyed (removed
-   from the internal dictionary)
-*/
-static void
-_loop1d_list_free(void *ptr)
-{
-    PyUFunc_Loop1d *funcdata;
-    if (ptr == NULL) return;
-    funcdata = (PyUFunc_Loop1d *)ptr;
-    if (funcdata == NULL) return;
-    _pya_free(funcdata->arg_types);
-    _loop1d_list_free(funcdata->next);
-    _pya_free(funcdata);
-}
-
-
-/*UFUNC_API*/
-static int
-PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
-                            int usertype,
-                            PyUFuncGenericFunction function,
-                            int *arg_types,
-                            void *data)
-{
-    PyArray_Descr *descr;
-    PyUFunc_Loop1d *funcdata;
-    PyObject *key, *cobj;
-    int i;
-    int *newtypes=NULL;
-
-    descr=PyArray_DescrFromType(usertype);
-    if ((usertype < PyArray_USERDEF) || (descr==NULL)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "unknown user-defined type");
-        return -1;
-    }
-    Py_DECREF(descr);
-
-    if (ufunc->userloops == NULL) {
-        ufunc->userloops = PyDict_New();
-    }
-    key = PyInt_FromLong((long) usertype);
-    if (key == NULL) return -1;
-    funcdata = _pya_malloc(sizeof(PyUFunc_Loop1d));
-    if (funcdata == NULL) goto fail;
-    newtypes = _pya_malloc(sizeof(int)*ufunc->nargs);
-    if (newtypes == NULL) goto fail;
-    if (arg_types != NULL) {
-        for(i=0; i<ufunc->nargs; i++) {
-            newtypes[i] = arg_types[i];
-        }
-    }
-    else {
-        for(i=0; i<ufunc->nargs; i++) {
-            newtypes[i] = usertype;
-        }
-    }
-
-    funcdata->func = function;
-    funcdata->arg_types = newtypes;
-    funcdata->data = data;
-    funcdata->next = NULL;
-
-    /* Get entry for this user-defined type*/
-    cobj = PyDict_GetItem(ufunc->userloops, key);
-
-    /* If it's not there, then make one and return. */
-    if (cobj == NULL) {
-        cobj = PyCObject_FromVoidPtr((void *)funcdata,
-                                     _loop1d_list_free);
-        if (cobj == NULL) goto fail;
-        PyDict_SetItem(ufunc->userloops, key, cobj);
-        Py_DECREF(cobj);
-        Py_DECREF(key);
-        return 0;
-    }
-    else {
-        PyUFunc_Loop1d *current, *prev=NULL;
-        int cmp=1;
-        /* There is already at least 1 loop. Place this one in
-           lexicographic order.  If the next one signature
-           is exactly like this one, then just replace.
-           Otherwise insert.
-        */
-        current = (PyUFunc_Loop1d *)PyCObject_AsVoidPtr(cobj);
-        while (current != NULL) {
-            cmp = cmp_arg_types(current->arg_types, newtypes,
-                                ufunc->nargs);
-            if (cmp >= 0) break;
-            prev = current;
-            current = current->next;
-        }
-        if (cmp == 0) { /* just replace it with new function */
-            current->func = function;
-            current->data = data;
-            _pya_free(newtypes);
-            _pya_free(funcdata);
-        }
-        else { /* insert it before the current one
-                  by hacking the internals of cobject to
-                  replace the function pointer ---
-                  can't use CObject API because destructor is set.
-               */
-            funcdata->next = current;
-            if (prev == NULL) { /* place this at front */
-                _SETCPTR(cobj, funcdata);
-            }
-            else {
-                prev->next = funcdata;
-            }
-        }
-    }
-    Py_DECREF(key);
-    return 0;
-
-
- fail:
-    Py_DECREF(key);
-    _pya_free(funcdata);
-    _pya_free(newtypes);
-    if (!PyErr_Occurred()) PyErr_NoMemory();
-    return -1;
-}
-
-#undef _SETCPTR
-
-
-static void
-ufunc_dealloc(PyUFuncObject *self)
-{
-    if (self->core_num_dims) _pya_free(self->core_num_dims);
-    if (self->core_dim_ixs) _pya_free(self->core_dim_ixs);
-    if (self->core_offsets) _pya_free(self->core_offsets);
-    if (self->core_signature) _pya_free(self->core_signature);
-    if (self->ptr) _pya_free(self->ptr);
-    Py_XDECREF(self->userloops);
-    Py_XDECREF(self->obj);
-    _pya_free(self);
-}
-
-static PyObject *
-ufunc_repr(PyUFuncObject *self)
-{
-    char buf[100];
-
-    sprintf(buf, "<ufunc '%.50s'>", self->name);
-
-    return PyString_FromString(buf);
-}
-
-
-/* -------------------------------------------------------- */
-
-/* op.outer(a,b) is equivalent to op(a[:,NewAxis,NewAxis,etc.],b)
-   where a has b.ndim NewAxis terms appended.
-
-   The result has dimensions a.ndim + b.ndim
-*/
-
-static PyObject *
-ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
-{
-    int i;
-    PyObject *ret;
-    PyArrayObject *ap1=NULL, *ap2=NULL, *ap_new=NULL;
-    PyObject *new_args, *tmp;
-    PyObject *shape1, *shape2, *newshape;
-
-    if (self->core_enabled) {
-        PyErr_Format(PyExc_TypeError,
-                     "method outer is not allowed in ufunc with non-trivial"\
-                     " signature");
-        return NULL;
-    }
-    
-    if(self->nin != 2) {
-        PyErr_SetString(PyExc_ValueError,
-                        "outer product only supported "\
-                        "for binary functions");
-        return NULL;
-    }
-
-    if (PySequence_Length(args) != 2) {
-        PyErr_SetString(PyExc_TypeError,
-                        "exactly two arguments expected");
-        return NULL;
-    }
-
-    tmp = PySequence_GetItem(args, 0);
-    if (tmp == NULL) return NULL;
-    ap1 = (PyArrayObject *)                                 \
-        PyArray_FromObject(tmp, PyArray_NOTYPE, 0, 0);
-    Py_DECREF(tmp);
-    if (ap1 == NULL) return NULL;
-
-    tmp = PySequence_GetItem(args, 1);
-    if (tmp == NULL) return NULL;
-    ap2 = (PyArrayObject *)PyArray_FromObject(tmp, PyArray_NOTYPE, 0, 0);
-    Py_DECREF(tmp);
-    if (ap2 == NULL) {Py_DECREF(ap1); return NULL;}
-
-    /* Construct new shape tuple */
-    shape1 = PyTuple_New(ap1->nd);
-    if (shape1 == NULL) goto fail;
-    for(i=0; i<ap1->nd; i++)
-        PyTuple_SET_ITEM(shape1, i,
-                         PyLong_FromLongLong((longlong)ap1->    \
-                                             dimensions[i]));
-
-    shape2 = PyTuple_New(ap2->nd);
-    for(i=0; i<ap2->nd; i++)
-        PyTuple_SET_ITEM(shape2, i, PyInt_FromLong((long) 1));
-    if (shape2 == NULL) {Py_DECREF(shape1); goto fail;}
-    newshape = PyNumber_Add(shape1, shape2);
-    Py_DECREF(shape1);
-    Py_DECREF(shape2);
-    if (newshape == NULL) goto fail;
-
-    ap_new = (PyArrayObject *)PyArray_Reshape(ap1, newshape);
-    Py_DECREF(newshape);
-    if (ap_new == NULL) goto fail;
-
-    new_args = Py_BuildValue("(OO)", ap_new, ap2);
-    Py_DECREF(ap1);
-    Py_DECREF(ap2);
-    Py_DECREF(ap_new);
-    ret = ufunc_generic_call(self, new_args, kwds);
-    Py_DECREF(new_args);
-    return ret;
-
- fail:
-    Py_XDECREF(ap1);
-    Py_XDECREF(ap2);
-    Py_XDECREF(ap_new);
-    return NULL;
-}
-
-
-static PyObject *
-ufunc_reduce(PyUFuncObject *self, PyObject *args, PyObject *kwds)
-{
-
-    return PyUFunc_GenericReduction(self, args, kwds, UFUNC_REDUCE);
-}
-
-static PyObject *
-ufunc_accumulate(PyUFuncObject *self, PyObject *args, PyObject *kwds)
-{
-
-    return PyUFunc_GenericReduction(self, args, kwds, UFUNC_ACCUMULATE);
-}
-
-static PyObject *
-ufunc_reduceat(PyUFuncObject *self, PyObject *args, PyObject *kwds)
-{
-    return PyUFunc_GenericReduction(self, args, kwds, UFUNC_REDUCEAT);
-}
-
-
-static struct PyMethodDef ufunc_methods[] = {
-    {"reduce",  (PyCFunction)ufunc_reduce, METH_VARARGS | METH_KEYWORDS, NULL },
-    {"accumulate",  (PyCFunction)ufunc_accumulate,
-     METH_VARARGS | METH_KEYWORDS, NULL },
-    {"reduceat",  (PyCFunction)ufunc_reduceat,
-     METH_VARARGS | METH_KEYWORDS, NULL },
-    {"outer", (PyCFunction)ufunc_outer, METH_VARARGS | METH_KEYWORDS, NULL},
-    {NULL, NULL, 0, NULL}           /* sentinel */
-};
-
-
-
-/* construct the string
-   y1,y2,...,yn
-*/
-static PyObject *
-_makeargs(int num, char *ltr, int null_if_none)
-{
-    PyObject *str;
-    int i;
-    switch (num) {
-    case 0:
-        if (null_if_none) return NULL;
-        return PyString_FromString("");
-    case 1:
-        return PyString_FromString(ltr);
-    }
-    str = PyString_FromFormat("%s1, %s2", ltr, ltr);
-    for(i = 3; i <= num; ++i) {
-        PyString_ConcatAndDel(&str, PyString_FromFormat(", %s%d", ltr, i));
-    }
-    return str;
-}
-
-static char
-_typecharfromnum(int num) {
-    PyArray_Descr *descr;
-    char ret;
-
-    descr = PyArray_DescrFromType(num);
-    ret = descr->type;
-    Py_DECREF(descr);
-    return ret;
-}
-
-static PyObject *
-ufunc_get_doc(PyUFuncObject *self)
-{
-    /* Put docstring first or FindMethod finds it...*/
-    /* could so some introspection on name and nin + nout */
-    /* to automate the first part of it */
-    /* the doc string shouldn't need the calling convention */
-    /* construct
-       name(x1, x2, ...,[ out1, out2, ...])
-       
-       __doc__
-    */
-    PyObject *outargs, *inargs, *doc;
-    outargs = _makeargs(self->nout, "out", 1);
-    inargs = _makeargs(self->nin, "x", 0);
-    if (outargs == NULL) {
-        doc = PyString_FromFormat("%s(%s)\n\n%s",
-                                  self->name,
-                                  PyString_AS_STRING(inargs),
-                                  self->doc);
-    } else {
-        doc = PyString_FromFormat("%s(%s[, %s])\n\n%s",
-                                  self->name,
-                                  PyString_AS_STRING(inargs),
-                                  PyString_AS_STRING(outargs),
-                                  self->doc);
-        Py_DECREF(outargs);
-    }
-    Py_DECREF(inargs);
-    return doc;
-}
-
-static PyObject *
-ufunc_get_nin(PyUFuncObject *self)
-{
-    return PyInt_FromLong(self->nin);
-}
-
-static PyObject *
-ufunc_get_nout(PyUFuncObject *self)
-{
-    return PyInt_FromLong(self->nout);
-}
-
-static PyObject *
-ufunc_get_nargs(PyUFuncObject *self)
-{
-    return PyInt_FromLong(self->nargs);
-}
-
-static PyObject *
-ufunc_get_ntypes(PyUFuncObject *self)
-{
-    return PyInt_FromLong(self->ntypes);
-}
-
-static PyObject *
-ufunc_get_types(PyUFuncObject *self)
-{
-    /* return a list with types grouped
-       input->output */
-    PyObject *list;
-    PyObject *str;
-    int k, j, n, nt=self->ntypes;
-    int ni = self->nin;
-    int no = self->nout;
-    char *t;
-    list = PyList_New(nt);
-    if (list == NULL) return NULL;
-    t = _pya_malloc(no+ni+2);
-    n = 0;
-    for(k=0; k<nt; k++) {
-        for(j=0; j<ni; j++) {
-            t[j] = _typecharfromnum(self->types[n]);
-            n++;
-        }
-        t[ni] = '-';
-        t[ni+1] = '>';
-        for(j=0; j<no; j++) {
-            t[ni+2+j] =                                     \
-                _typecharfromnum(self->types[n]);
-            n++;
-        }
-        str = PyString_FromStringAndSize(t, no+ni+2);
-        PyList_SET_ITEM(list, k, str);
-    }
-    _pya_free(t);
-    return list;
-}
-
-static PyObject *
-ufunc_get_name(PyUFuncObject *self)
-{
-    return PyString_FromString(self->name);
-}
-
-static PyObject *
-ufunc_get_identity(PyUFuncObject *self)
-{
-    switch(self->identity) {
-    case PyUFunc_One:
-        return PyInt_FromLong(1);
-    case PyUFunc_Zero:
-        return PyInt_FromLong(0);
-    }
-    return Py_None;
-}
-
-static PyObject *
-ufunc_get_signature(PyUFuncObject *self)
-{
-    if (!self->core_enabled)
-        Py_RETURN_NONE;
-    return PyString_FromString(self->core_signature);
-}
-
-#undef _typecharfromnum
-
-/* Docstring is now set from python */
-/* static char *Ufunctype__doc__ = NULL; */
-
-static PyGetSetDef ufunc_getset[] = {
-    {"__doc__",  (getter)ufunc_get_doc,      NULL, "documentation string", NULL},
-    {"nin",      (getter)ufunc_get_nin,      NULL, "number of inputs", NULL},
-    {"nout",     (getter)ufunc_get_nout,     NULL, "number of outputs", NULL},
-    {"nargs",    (getter)ufunc_get_nargs,    NULL, "number of arguments", NULL},
-    {"ntypes",   (getter)ufunc_get_ntypes,   NULL, "number of types", NULL},
-    {"types",    (getter)ufunc_get_types,    NULL, "return a list with types grouped input->output", NULL},
-    {"__name__", (getter)ufunc_get_name,     NULL, "function name", NULL},
-    {"identity", (getter)ufunc_get_identity, NULL, "identity value", NULL},
-    {"signature",(getter)ufunc_get_signature,NULL, "signature"},
-    {NULL, NULL, NULL, NULL, NULL},  /* Sentinel */
-};
-
-static PyTypeObject PyUFunc_Type = {
-    PyObject_HEAD_INIT(0)
-    0,                              /*ob_size*/
-    "numpy.ufunc",                  /*tp_name*/
-    sizeof(PyUFuncObject),          /*tp_basicsize*/
-    0,                              /*tp_itemsize*/
-    /* methods */
-    (destructor)ufunc_dealloc,      /*tp_dealloc*/
-    (printfunc)0,                   /*tp_print*/
-    (getattrfunc)0,                 /*tp_getattr*/
-    (setattrfunc)0,                 /*tp_setattr*/
-    (cmpfunc)0,                     /*tp_compare*/
-    (reprfunc)ufunc_repr,           /*tp_repr*/
-    0,                             /*tp_as_number*/
-    0,                             /*tp_as_sequence*/
-    0,                             /*tp_as_mapping*/
-    (hashfunc)0,                   /*tp_hash*/
-    (ternaryfunc)ufunc_generic_call, /*tp_call*/
-    (reprfunc)ufunc_repr,          /*tp_str*/
-    0,                             /* tp_getattro */
-    0,                             /* tp_setattro */
-    0,                             /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT,            /* tp_flags */
-    NULL,                          /* tp_doc */ /* was Ufunctype__doc__ */
-    0,                             /* tp_traverse */
-    0,                             /* tp_clear */
-    0,                             /* tp_richcompare */
-    0,                             /* tp_weaklistoffset */
-    0,                             /* tp_iter */
-    0,                             /* tp_iternext */
-    ufunc_methods,                 /* tp_methods */
-    0,                             /* tp_members */
-    ufunc_getset,                  /* tp_getset */
-    0,   /* tp_base */
-    0,   /* tp_dict */
-    0,   /* tp_descr_get */
-    0,   /* tp_descr_set */
-    0,   /* tp_dictoffset */
-    0,   /* tp_init */
-    0,   /* tp_alloc */
-    0,   /* tp_new */
-    0,   /* tp_free */
-    0,   /* tp_is_gc */
-    0,   /* tp_bases */
-    0,   /* tp_mro */
-    0,   /* tp_cache */
-    0,   /* tp_subclasses */
-    0,   /* tp_weaklist */
-    0,   /* tp_del */
-
-#ifdef COUNT_ALLOCS
-	/* these must be last and never explicitly initialized */
-    0, /* tp_allocs */
-    0, /* tp_frees */
-    0, /* tp_maxalloc */
-    0, /* tp_prev */
-    0, /* *tp_next */
-#endif
-};
-
-/* End of code for ufunc objects */
-/* -------------------------------------------------------- */

Copied: trunk/numpy/core/src/umath_funcs_c99.inc.src (from rev 6087, trunk/numpy/core/src/math_c99.inc.src)
===================================================================
--- trunk/numpy/core/src/math_c99.inc.src	2008-11-21 20:49:33 UTC (rev 6087)
+++ trunk/numpy/core/src/umath_funcs_c99.inc.src	2008-11-22 01:28:52 UTC (rev 6089)
@@ -0,0 +1,377 @@
+/*
+ * vim:syntax=c
+ * A small module to implement missing C99 math capabilities required by numpy
+ *
+ * Please keep this independant of python !
+ *
+ * How to add a function to this section
+ * -------------------------------------
+ *
+ * Say you want to add `foo`, these are the steps and the reasons for them.
+ *
+ * 1) Add foo to the appropriate list in the configuration system. The
+ *    lists can be found in numpy/core/setup.py lines 63-105. Read the
+ *    comments that come with them, they are very helpful.
+ *
+ * 2) The configuration system will define a macro HAVE_FOO if your function
+ *    can be linked from the math library. The result can depend on the
+ *    optimization flags as well as the compiler, so can't be known ahead of
+ *    time. If the function can't be linked, then either it is absent, defined
+ *    as a macro, or is an intrinsic (hardware) function. If it is linkable it
+ *    may still be the case that no prototype is available. So to cover all the
+ *    cases requires the following construction.
+ *
+ *    i) Undefine any possible macros:
+ *
+ *    #ifdef foo
+ *    #undef foo
+ *    #endif
+ *
+ *    ii) Check if the function was in the library, If not, define the
+ *    function with npy_ prepended to its name to avoid conflict with any
+ *    intrinsic versions, then use a define so that the preprocessor will
+ *    replace foo with npy_foo before the compilation pass. Make the
+ *    function static to avoid poluting the module library.
+ *
+ *    #ifdef foo
+ *    #undef foo
+ *    #endif
+ *    #ifndef HAVE_FOO
+ *    static double
+ *    npy_foo(double x)
+ *    {
+ *        return x;
+ *    }
+ *    #define foo npy_foo
+ *
+ *    iii) Finally, even if foo is in the library, add a prototype. Just being
+ *    in the library doesn't guarantee a prototype in math.h, and in any case
+ *    you want to make sure the prototype is what you think it is. Count on it,
+ *    whatever can go wrong will go wrong. Think defensively! The result:
+ *
+ *    #ifdef foo
+ *    #undef foo
+ *    #endif
+ *    #ifndef HAVE_FOO
+ *    static double
+ *    npy_foo(double x)
+ *    {
+ *        return x;
+ *    }
+ *    #define foo npy_foo
+ *    #else
+ *    double foo(double x);
+ *    #end
+ *
+ *    And there you have it.
+ *
+ */
+
+/*
+ *****************************************************************************
+ **                     DISTRO VOODOO                                       **
+ *****************************************************************************
+ */
+
+
+/*
+ *****************************************************************************
+ **                     BASIC MATH FUNCTIONS                                **
+ *****************************************************************************
+ */
+
+/* Original code by Konrad Hinsen.  */
+#ifndef HAVE_EXPM1
+static double
+npy_expm1(double x)
+{
+    double u = exp(x);
+    if (u == 1.0) {
+        return x;
+    } else if (u-1.0 == -1.0) {
+        return -1;
+    } else {
+        return (u-1.0) * x/log(u);
+    }
+}
+#define expm1 npy_expm1
+#else
+double expm1(double x);
+#endif
+
+#ifndef HAVE_LOG1P
+static double
+npy_log1p(double x)
+{
+    double u = 1. + x;
+    if (u == 1.0) {
+        return x;
+    } else {
+        return log(u) * x / (u - 1);
+    }
+}
+#define log1p npy_log1p
+#else
+double log1p(double x);
+#endif
+
+#ifndef HAVE_HYPOT
+static double
+npy_hypot(double x, double y)
+{
+    double yx;
+
+    x = fabs(x);
+    y = fabs(y);
+    if (x < y) {
+        double temp = x;
+        x = y;
+        y = temp;
+    }
+    if (x == 0.)
+        return 0.;
+    else {
+        yx = y/x;
+        return x*sqrt(1.+yx*yx);
+    }
+}
+#define hypot npy_hypot
+#else
+double hypot(double x, double y);
+#endif
+
+#ifndef HAVE_ACOSH
+static double
+npy_acosh(double x)
+{
+    return 2*log(sqrt((x+1.0)/2)+sqrt((x-1.0)/2));
+}
+#define acosh npy_acosh
+#else
+double acosh(double x);
+#endif
+
+#ifndef HAVE_ASINH
+static double
+npy_asinh(double xx)
+{
+    double x, d;
+    int sign;
+    if (xx < 0.0) {
+        sign = -1;
+        x = -xx;
+    }
+    else {
+        sign = 1;
+        x = xx;
+    }
+    if (x > 1e8) {
+        d = x;
+    } else {
+        d = sqrt(x*x + 1);
+    }
+    return sign*log1p(x*(1.0 + x/(d+1)));
+}
+#define asinh npy_asinh
+#else
+double asinh(double xx);
+#endif
+
+#ifndef HAVE_ATANH
+static double
+npy_atanh(double x)
+{
+    return 0.5*log1p(2.0*x/(1.0-x));
+}
+#define atanh npy_atanh
+#else
+double atanh(double x);
+#endif
+
+#ifndef HAVE_RINT
+static double
+npy_rint(double x)
+{
+    double y, r;
+
+    y = floor(x);
+    r = x - y;
+
+    if (r > 0.5) goto rndup;
+
+    /* Round to nearest even */
+    if (r==0.5) {
+        r = y - 2.0*floor(0.5*y);
+        if (r==1.0) {
+        rndup:
+            y+=1.0;
+        }
+    }
+    return y;
+}
+#define rint npy_rint
+#else
+double rint(double x);
+#endif
+
+#ifndef HAVE_TRUNC
+static double
+npy_trunc(double x)
+{
+    return x < 0 ? ceil(x) : floor(x);
+}
+#define trunc npy_trunc
+#else
+double trunc(double x);
+#endif
+
+#ifndef HAVE_EXP2
+#define LOG2 0.69314718055994530943
+static double
+npy_exp2(double x)
+{
+    return exp(LOG2*x);
+}
+#define exp2 npy_exp2
+#undef LOG2
+#else
+double exp2(double x);
+#endif
+
+#ifndef HAVE_LOG2
+#define INVLOG2 1.4426950408889634074
+static double
+npy_log2(double x)
+{
+    return INVLOG2*log(x);
+}
+#define log2 npy_log2
+#undef INVLOG2
+#else
+double log2(double x);
+#endif
+
+/*
+ *****************************************************************************
+ **                     IEEE 754 FPU HANDLING                               **
+ *****************************************************************************
+ */
+#if !defined(HAVE_DECL_ISNAN)
+    # define isnan(x) ((x) != (x))
+#endif
+
+/* VS 2003 with /Ox optimizes (x)-(x) to 0, which is not IEEE compliant. So we
+ * force (x) + (-x), which seems to work. */
+#if !defined(HAVE_DECL_ISFINITE)
+    # define isfinite(x) !isnan((x) + (-x))
+#endif
+
+#if !defined(HAVE_DECL_ISINF)
+#define isinf(x) (!isfinite(x) && !isnan(x))
+#endif
+
+#if !defined(HAVE_DECL_SIGNBIT)
+    #include "_signbit.c"
+    # define signbit(x) \
+              (sizeof (x) == sizeof (long double) ? signbit_ld (x) \
+               : sizeof (x) == sizeof (double) ? signbit_d (x) \
+               : signbit_f (x))
+
+static int signbit_f (float x)
+{
+    return signbit_d((double)x);
+}
+
+static int signbit_ld (long double x)
+{
+    return signbit_d((double)x);
+}
+#endif
+
+/*
+ * if C99 extensions not available then define dummy functions that use the
+ * double versions for
+ *
+ * sin, cos, tan
+ * sinh, cosh, tanh,
+ * fabs, floor, ceil, rint, trunc
+ * sqrt, log10, log, exp, expm1
+ * asin, acos, atan,
+ * asinh, acosh, atanh
+ *
+ * hypot, atan2, pow, fmod, modf
+ *
+ * We assume the above are always available in their double versions.
+ *
+ * NOTE: some facilities may be available as macro only  instead of functions.
+ * For simplicity, we define our own functions and undef the macros. We could
+ * instead test for the macro, but I am lazy to do that for now.
+ */
+
+/**begin repeat
+ * #type = longdouble, float#
+ * #TYPE = LONGDOUBLE, FLOAT#
+ * #c = l,f#
+ * #C = L,F#
+ */
+
+/**begin repeat1
+ * #kind = sin,cos,tan,sinh,cosh,tanh,fabs,floor,ceil,rint,trunc,sqrt,log10,
+ *         log,exp,expm1,asin,acos,atan,asinh,acosh,atanh,log1p,exp2,log2#
+ * #KIND = SIN,COS,TAN,SINH,COSH,TANH,FABS,FLOOR,CEIL,RINT,TRUNC,SQRT,LOG10,
+ *         LOG,EXP,EXPM1,ASIN,ACOS,ATAN,ASINH,ACOSH,ATANH,LOG1P,EXP2,LOG2#
+ */
+
+#ifdef @kind@@c@
+#undef @kind@@c@
+#endif
+#ifndef HAVE_ at KIND@@C@
+static @type@
+npy_ at kind@@c@(@type@ x)
+{
+    return (@type@) @kind@((double)x);
+}
+#define @kind@@c@  npy_ at kind@@c@
+#else
+ at type@ @kind@@c@(@type@ x);
+#endif
+
+/**end repeat1**/
+
+/**begin repeat1
+ * #kind = atan2,hypot,pow,fmod#
+ * #KIND = ATAN2,HYPOT,POW,FMOD#
+ */
+#ifdef @kind@@c@
+#undef @kind@@c@
+#endif
+#ifndef HAVE_ at KIND@@C@
+static @type@
+npy_ at kind@@c@(@type@ x, @type@ y)
+{
+    return (@type@) @kind@((double)x, (double) y);
+}
+#define @kind@@c@  npy_ at kind@@c@
+#else
+ at type@ @kind@@c@(@type@ x, @type@ y);
+#endif
+/**end repeat1**/
+
+#ifdef modf at c@
+#undef modf at c@
+#endif
+#ifndef HAVE_MODF at C@
+static @type@
+npy_modf at c@(@type@ x, @type@ *iptr)
+{
+    double niptr;
+    double y = modf((double)x, &niptr);
+    *iptr = (@type@) niptr;
+    return (@type@) y;
+}
+#define modf at c@ npy_modf at c@
+#else
+ at type@ modf at c@(@type@ x, @type@ *iptr);
+#endif
+
+/**end repeat**/

Copied: trunk/numpy/core/src/umath_ufunc_object.inc (from rev 6087, trunk/numpy/core/src/ufuncobject.c)
===================================================================
--- trunk/numpy/core/src/ufuncobject.c	2008-11-21 20:49:33 UTC (rev 6087)
+++ trunk/numpy/core/src/umath_ufunc_object.inc	2008-11-22 01:28:52 UTC (rev 6089)
@@ -0,0 +1,4134 @@
+/*
+ * Python Universal Functions Object -- Math for all types, plus fast
+ * arrays math
+ *
+ * Full description
+ *
+ * This supports mathematical (and Boolean) functions on arrays and other python
+ * objects.  Math on large arrays of basic C types is rather efficient.
+ *
+ * Travis E. Oliphant  2005, 2006 oliphant at ee.byu.edu (oliphant.travis at ieee.org)
+ * Brigham Young University
+ *
+ * based on the
+ *
+ * Original Implementation:
+ * Copyright (c) 1995, 1996, 1997 Jim Hugunin, hugunin at mit.edu
+ *
+ * with inspiration and code from
+ * Numarray
+ * Space Science Telescope Institute
+ * J. Todd Miller
+ * Perry Greenfield
+ * Rick White
+ *
+ */
+
+
+#define USE_USE_DEFAULTS 1
+
+
+
+
+/* ---------------------------------------------------------------- */
+
+
+/* fpstatus is the ufunc_formatted hardware status
+   errmask is the handling mask specified by the user.
+   errobj is a Python object with (string, callable object or None)
+   or NULL
+*/
+
+/*
+  2. for each of the flags
+  determine whether to ignore, warn, raise error, or call Python function.
+  If ignore, do nothing
+  If warn, print a warning and continue
+  If raise return an error
+  If call, call a user-defined function with string
+*/
+
+static int
+_error_handler(int method, PyObject *errobj, char *errtype, int retstatus, int *first)
+{
+    PyObject *pyfunc, *ret, *args;
+    char *name=PyString_AS_STRING(PyTuple_GET_ITEM(errobj,0));
+    char msg[100];
+
+    ALLOW_C_API_DEF
+
+        ALLOW_C_API
+
+        switch(method) {
+        case UFUNC_ERR_WARN:
+            PyOS_snprintf(msg, sizeof(msg),
+                    "%s encountered in %s", errtype, name);
+            if (PyErr_Warn(PyExc_RuntimeWarning, msg) < 0) goto fail;
+            break;
+        case UFUNC_ERR_RAISE:
+            PyErr_Format(PyExc_FloatingPointError,
+                         "%s encountered in %s",
+                         errtype, name);
+            goto fail;
+        case UFUNC_ERR_CALL:
+            pyfunc = PyTuple_GET_ITEM(errobj, 1);
+
+            if (pyfunc == Py_None) {
+                PyErr_Format(PyExc_NameError,
+                             "python callback specified for %s (in " \
+                             " %s) but no function found.",
+                             errtype, name);
+                goto fail;
+            }
+            args = Py_BuildValue("NN", PyString_FromString(errtype),
+                                 PyInt_FromLong((long) retstatus));
+            if (args == NULL) goto fail;
+            ret = PyObject_CallObject(pyfunc, args);
+            Py_DECREF(args);
+            if (ret == NULL) goto fail;
+            Py_DECREF(ret);
+
+            break;
+        case UFUNC_ERR_PRINT:
+            if (*first) {
+                fprintf(stderr, "Warning: %s encountered in %s\n", errtype, name);
+                *first = 0;
+            }
+            break;
+        case UFUNC_ERR_LOG:
+            if (first) {
+                *first = 0;
+                pyfunc = PyTuple_GET_ITEM(errobj, 1);
+                if (pyfunc == Py_None) {
+                    PyErr_Format(PyExc_NameError,
+                                 "log specified for %s (in %s) but no " \
+                                 "object with write method found.",
+                                 errtype, name);
+                    goto fail;
+                }
+                PyOS_snprintf(msg, sizeof(msg),
+                        "Warning: %s encountered in %s\n", errtype, name);
+                ret = PyObject_CallMethod(pyfunc, "write", "s", msg);
+                if (ret == NULL) goto fail;
+                Py_DECREF(ret);
+            }
+            break;
+        }
+    DISABLE_C_API
+        return 0;
+
+ fail:
+    DISABLE_C_API
+        return -1;
+}
+
+
+/*UFUNC_API*/
+static int
+PyUFunc_getfperr(void)
+{
+    int retstatus;
+    UFUNC_CHECK_STATUS(retstatus);
+    return retstatus;
+}
+
+#define HANDLEIT(NAME, str) {if (retstatus & UFUNC_FPE_##NAME) {        \
+            handle = errmask & UFUNC_MASK_##NAME;                       \
+            if (handle &&                                               \
+                _error_handler(handle >> UFUNC_SHIFT_##NAME,            \
+                               errobj, str, retstatus, first) < 0)      \
+                return -1;                                              \
+        }}
+
+/*UFUNC_API*/
+static int
+PyUFunc_handlefperr(int errmask, PyObject *errobj, int retstatus, int *first)
+{
+    int handle;
+    if (errmask && retstatus) {
+        HANDLEIT(DIVIDEBYZERO, "divide by zero");
+        HANDLEIT(OVERFLOW, "overflow");
+        HANDLEIT(UNDERFLOW, "underflow");
+        HANDLEIT(INVALID, "invalid value");
+    }
+    return 0;
+}
+
+#undef HANDLEIT
+
+
+/*UFUNC_API*/
+static int
+PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first)
+{
+    int retstatus;
+
+    /* 1. check hardware flag --- this is platform dependent code */
+    retstatus = PyUFunc_getfperr();
+    return PyUFunc_handlefperr(errmask, errobj, retstatus, first);
+}
+
+
+/* Checking the status flag clears it */
+/*UFUNC_API*/
+static void
+PyUFunc_clearfperr()
+{
+    PyUFunc_getfperr();
+}
+
+
+#define NO_UFUNCLOOP        0
+#define ZERO_EL_REDUCELOOP  0
+#define ONE_UFUNCLOOP       1
+#define ONE_EL_REDUCELOOP   1
+#define NOBUFFER_UFUNCLOOP  2
+#define NOBUFFER_REDUCELOOP 2
+#define BUFFER_UFUNCLOOP  3
+#define BUFFER_REDUCELOOP 3
+#define SIGNATURE_NOBUFFER_UFUNCLOOP 4
+
+
+static char
+_lowest_type(char intype)
+{
+    switch(intype) {
+    /* case PyArray_BYTE */
+    case PyArray_SHORT:
+    case PyArray_INT:
+    case PyArray_LONG:
+    case PyArray_LONGLONG:
+        return PyArray_BYTE;
+    /* case PyArray_UBYTE */
+    case PyArray_USHORT:
+    case PyArray_UINT:
+    case PyArray_ULONG:
+    case PyArray_ULONGLONG:
+        return PyArray_UBYTE;
+    /* case PyArray_FLOAT:*/
+    case PyArray_DOUBLE:
+    case PyArray_LONGDOUBLE:
+        return PyArray_FLOAT;
+    /* case PyArray_CFLOAT:*/
+    case PyArray_CDOUBLE:
+    case PyArray_CLONGDOUBLE:
+        return PyArray_CFLOAT;
+    default:
+        return intype;
+    }
+}
+
+static char *_types_msg =  "function not supported for these types, "   \
+    "and can't coerce safely to supported types";
+
+/* Called for non-NULL user-defined functions.
+   The object should be a CObject pointing to a linked-list of functions
+   storing the function, data, and signature of all user-defined functions.
+   There must be a match with the input argument types or an error
+   will occur.
+*/
+static int
+_find_matching_userloop(PyObject *obj, int *arg_types,
+                        PyArray_SCALARKIND *scalars,
+                        PyUFuncGenericFunction *function, void **data,
+                        int nargs, int nin)
+{
+    PyUFunc_Loop1d *funcdata;
+    int i;
+    funcdata = (PyUFunc_Loop1d *)PyCObject_AsVoidPtr(obj);
+    while (funcdata != NULL) {
+        for(i=0; i<nin; i++) {
+            if (!PyArray_CanCoerceScalar(arg_types[i],
+                                         funcdata->arg_types[i],
+                                         scalars[i]))
+                break;
+        }
+        if (i==nin) { /* match found */
+            *function = funcdata->func;
+            *data = funcdata->data;
+            /* Make sure actual arg_types supported
+               by the loop are used */
+            for(i=0; i<nargs; i++) {
+                arg_types[i] = funcdata->arg_types[i];
+            }
+            return 0;
+        }
+        funcdata = funcdata->next;
+    }
+    return -1;
+}
+
+/* if only one type is specified then it is the "first" output data-type
+   and the first signature matching this output data-type is returned.
+
+   if a tuple of types is specified then an exact match to the signature
+   is searched and it much match exactly or an error occurs
+*/
+static int
+extract_specified_loop(PyUFuncObject *self, int *arg_types,
+                       PyUFuncGenericFunction *function, void **data,
+                       PyObject *type_tup, int userdef)
+{
+    Py_ssize_t n=1;
+    int *rtypenums;
+    static char msg[] = "loop written to specified type(s) not found";
+    PyArray_Descr *dtype;
+    int nargs;
+    int i, j;
+    int strtype=0;
+
+    nargs = self->nargs;
+
+    if (PyTuple_Check(type_tup)) {
+        n = PyTuple_GET_SIZE(type_tup);
+        if (n != 1 && n != nargs) {
+            PyErr_Format(PyExc_ValueError,
+                         "a type-tuple must be specified " \
+                         "of length 1 or %d for %s", nargs,
+                         self->name ? self->name : "(unknown)");
+            return -1;
+        }
+    }
+    else if PyString_Check(type_tup) {
+            Py_ssize_t slen;
+            char *thestr;
+            slen = PyString_GET_SIZE(type_tup);
+            thestr = PyString_AS_STRING(type_tup);
+            for(i=0; i < slen-2; i++) {
+                if (thestr[i] == '-' && thestr[i+1] == '>')
+                    break;
+            }
+            if (i < slen-2) {
+                strtype = 1;
+                n = slen-2;
+                if (i != self->nin ||
+                    slen-2-i != self->nout) {
+                    PyErr_Format(PyExc_ValueError,
+                                 "a type-string for %s, "   \
+                                 "requires %d typecode(s) before " \
+                                 "and %d after the -> sign",
+                                 self->name ? self->name : "(unknown)",
+                                 self->nin, self->nout);
+                    return -1;
+                }
+            }
+        }
+    rtypenums = (int *)_pya_malloc(n*sizeof(int));
+    if (rtypenums==NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    if (strtype) {
+        char *ptr;
+        ptr = PyString_AS_STRING(type_tup);
+        i = 0;
+        while (i < n) {
+            if (*ptr == '-' || *ptr == '>') {
+                ptr++;
+                continue;
+            }
+            dtype = PyArray_DescrFromType((int) *ptr);
+            if (dtype == NULL) goto fail;
+            rtypenums[i] = dtype->type_num;
+            Py_DECREF(dtype);
+            ptr++; i++;
+        }
+    }
+    else if (PyTuple_Check(type_tup)) {
+        for(i=0; i<n; i++) {
+            if (PyArray_DescrConverter(PyTuple_GET_ITEM     \
+                                       (type_tup, i),
+                                       &dtype) == NPY_FAIL)
+                goto fail;
+            rtypenums[i] = dtype->type_num;
+            Py_DECREF(dtype);
+        }
+    }
+    else {
+        if (PyArray_DescrConverter(type_tup, &dtype) == NPY_FAIL) {
+            goto fail;
+        }
+        rtypenums[0] = dtype->type_num;
+        Py_DECREF(dtype);
+    }
+
+    if (userdef > 0) { /* search in the user-defined functions */
+        PyObject *key, *obj;
+        PyUFunc_Loop1d *funcdata;
+        obj = NULL;
+        key = PyInt_FromLong((long) userdef);
+        if (key == NULL) goto fail;
+        obj = PyDict_GetItem(self->userloops, key);
+        Py_DECREF(key);
+        if (obj == NULL) {
+            PyErr_SetString(PyExc_TypeError,
+                            "user-defined type used in ufunc" \
+                            " with no registered loops");
+            goto fail;
+        }
+        /* extract the correct function
+           data and argtypes
+        */
+        funcdata = (PyUFunc_Loop1d *)PyCObject_AsVoidPtr(obj);
+        while (funcdata != NULL) {
+            if (n != 1) {
+                for(i=0; i<nargs; i++) {
+                    if (rtypenums[i] != funcdata->arg_types[i])
+                        break;
+                }
+            }
+            else if (rtypenums[0] == funcdata->arg_types[self->nin]) {
+                i = nargs;
+            }
+            else i = -1;
+            if (i == nargs) {
+                *function = funcdata->func;
+                *data = funcdata->data;
+                for(i=0; i<nargs; i++) {
+                    arg_types[i] = funcdata->arg_types[i];
+                }
+                Py_DECREF(obj);
+                goto finish;
+            }
+            funcdata = funcdata->next;
+        }
+        PyErr_SetString(PyExc_TypeError, msg);
+        goto fail;
+    }
+
+    /* look for match in self->functions */
+
+    for(j=0; j<self->ntypes; j++) {
+        if (n != 1) {
+            for(i=0; i<nargs; i++) {
+                if (rtypenums[i] != self->types[j*nargs + i])
+                    break;
+            }
+        }
+        else if (rtypenums[0] == self->types[j*nargs+self->nin]) {
+            i = nargs;
+        }
+        else i = -1;
+        if (i == nargs) {
+            *function = self->functions[j];
+            *data = self->data[j];
+            for(i=0; i<nargs; i++) {
+                arg_types[i] = self->types[j*nargs+i];
+            }
+            goto finish;
+        }
+    }
+    PyErr_SetString(PyExc_TypeError, msg);
+
+
+ fail:
+    _pya_free(rtypenums);
+    return -1;
+
+ finish:
+    _pya_free(rtypenums);
+    return 0;
+
+}
+
+
+/*
+ * Called to determine coercion
+ * Can change arg_types.
+ */
+
+static int
+select_types(PyUFuncObject *self, int *arg_types,
+             PyUFuncGenericFunction *function, void **data,
+             PyArray_SCALARKIND *scalars,
+             PyObject *typetup)
+{
+    int i, j;
+    char start_type;
+    int userdef = -1;
+    int userdef_ind = -1;
+
+    if (self->userloops) {
+        for(i = 0; i < self->nin; i++) {
+            if (PyTypeNum_ISUSERDEF(arg_types[i])) {
+                userdef = arg_types[i];
+		userdef_ind = i;
+                break;
+            }
+        }
+    }
+
+    if (typetup != NULL)
+        return extract_specified_loop(self, arg_types, function, data,
+                                      typetup, userdef);
+
+    if (userdef > 0) {
+        PyObject *key, *obj;
+        int ret = -1;
+        obj = NULL;
+
+	/*
+         * Look through all the registered loops for all the user-defined
+	 * types to find a match.
+	 */
+	while (ret == -1) {
+	    if (userdef_ind >= self->nin) {
+                break;
+            }
+	    userdef = arg_types[userdef_ind++];
+	    if (!(PyTypeNum_ISUSERDEF(userdef))) {
+                continue;
+            }
+	    key = PyInt_FromLong((long) userdef);
+	    if (key == NULL) {
+                return -1;
+            }
+	    obj = PyDict_GetItem(self->userloops, key);
+	    Py_DECREF(key);
+	    if (obj == NULL) {
+                continue;
+            }
+	    /*
+             * extract the correct function
+	     * data and argtypes for this user-defined type.
+	     */
+	    ret = _find_matching_userloop(obj, arg_types, scalars,
+					  function, data, self->nargs,
+					  self->nin);
+	}
+	if (ret == 0) {
+            return ret;
+        }
+	PyErr_SetString(PyExc_TypeError, _types_msg);
+	return ret;
+    }
+
+    start_type = arg_types[0];
+    /*
+     * If the first argument is a scalar we need to place
+     * the start type as the lowest type in the class
+     */
+    if (scalars[0] != PyArray_NOSCALAR) {
+        start_type = _lowest_type(start_type);
+    }
+
+    i = 0;
+    while (i < self->ntypes && start_type > self->types[i*self->nargs]) {
+        i++;
+    }
+    for (; i < self->ntypes; i++) {
+        for (j = 0; j < self->nin; j++) {
+            if (!PyArray_CanCoerceScalar(arg_types[j],
+                                         self->types[i*self->nargs + j],
+                                         scalars[j]))
+                break;
+        }
+        if (j == self->nin) {
+            break;
+        }
+    }
+    if (i >= self->ntypes) {
+        PyErr_SetString(PyExc_TypeError, _types_msg);
+        return -1;
+    }
+    for (j = 0; j < self->nargs; j++) {
+        arg_types[j] = self->types[i*self->nargs+j];
+    }
+    if (self->data) {
+        *data = self->data[i];
+    }
+    else {
+        *data = NULL;
+    }
+    *function = self->functions[i];
+
+    return 0;
+}
+
+#if USE_USE_DEFAULTS==1
+static int PyUFunc_NUM_NODEFAULTS=0;
+#endif
+static PyObject *PyUFunc_PYVALS_NAME=NULL;
+
+
+static int
+_extract_pyvals(PyObject *ref, char *name, int *bufsize,
+                int *errmask, PyObject **errobj)
+{
+    PyObject *retval;
+
+    *errobj = NULL;
+    if (!PyList_Check(ref) || (PyList_GET_SIZE(ref)!=3)) {
+        PyErr_Format(PyExc_TypeError, "%s must be a length 3 list.",
+                     UFUNC_PYVALS_NAME);
+        return -1;
+    }
+
+    *bufsize = PyInt_AsLong(PyList_GET_ITEM(ref, 0));
+    if ((*bufsize == -1) && PyErr_Occurred()) {
+        return -1;
+    }
+    if ((*bufsize < PyArray_MIN_BUFSIZE) ||
+        (*bufsize > PyArray_MAX_BUFSIZE) ||
+        (*bufsize % 16 != 0)) {
+        PyErr_Format(PyExc_ValueError,
+                     "buffer size (%d) is not in range "
+                     "(%"INTP_FMT" - %"INTP_FMT") or not a multiple of 16",
+                     *bufsize, (intp) PyArray_MIN_BUFSIZE,
+                     (intp) PyArray_MAX_BUFSIZE);
+        return -1;
+    }
+
+    *errmask = PyInt_AsLong(PyList_GET_ITEM(ref, 1));
+    if (*errmask < 0) {
+        if (PyErr_Occurred()) {
+            return -1;
+        }
+        PyErr_Format(PyExc_ValueError,
+                     "invalid error mask (%d)",
+                     *errmask);
+        return -1;
+    }
+
+    retval = PyList_GET_ITEM(ref, 2);
+    if (retval != Py_None && !PyCallable_Check(retval)) {
+        PyObject *temp;
+        temp = PyObject_GetAttrString(retval, "write");
+        if (temp == NULL || !PyCallable_Check(temp)) {
+            PyErr_SetString(PyExc_TypeError,
+                            "python object must be callable or have " \
+                            "a callable write method");
+            Py_XDECREF(temp);
+            return -1;
+        }
+        Py_DECREF(temp);
+    }
+
+    *errobj = Py_BuildValue("NO",
+                            PyString_FromString(name),
+                            retval);
+    if (*errobj == NULL) {
+        return -1;
+    }
+
+    return 0;
+}
+
+
+
+/*UFUNC_API*/
+static int
+PyUFunc_GetPyValues(char *name, int *bufsize, int *errmask, PyObject **errobj)
+{
+    PyObject *thedict;
+    PyObject *ref = NULL;
+
+#if USE_USE_DEFAULTS==1
+    if (PyUFunc_NUM_NODEFAULTS != 0) {
+#endif
+        if (PyUFunc_PYVALS_NAME == NULL) {
+            PyUFunc_PYVALS_NAME = \
+                PyString_InternFromString(UFUNC_PYVALS_NAME);
+        }
+        thedict = PyThreadState_GetDict();
+        if (thedict == NULL) {
+            thedict = PyEval_GetBuiltins();
+        }
+        ref = PyDict_GetItem(thedict, PyUFunc_PYVALS_NAME);
+#if USE_USE_DEFAULTS==1
+    }
+#endif
+    if (ref == NULL) {
+        *errmask = UFUNC_ERR_DEFAULT;
+        *errobj = Py_BuildValue("NO",
+                                PyString_FromString(name),
+                                Py_None);
+        *bufsize = PyArray_BUFSIZE;
+        return 0;
+    }
+    return _extract_pyvals(ref, name, bufsize, errmask, errobj);
+}
+
+/* Create copies for any arrays that are less than loop->bufsize
+   in total size (or core_enabled) and are mis-behaved or in need
+   of casting.
+*/
+
+static int
+_create_copies(PyUFuncLoopObject *loop, int *arg_types, PyArrayObject **mps)
+{
+    int nin = loop->ufunc->nin;
+    int i;
+    intp size;
+    PyObject *new;
+    PyArray_Descr *ntype;
+    PyArray_Descr *atype;
+
+    for(i=0; i<nin; i++) {
+        size = PyArray_SIZE(mps[i]);
+        /* if the type of mps[i] is equivalent to arg_types[i] */
+        /* then set arg_types[i] equal to type of
+           mps[i] for later checking....
+        */
+        if (PyArray_TYPE(mps[i]) != arg_types[i]) {
+            ntype = mps[i]->descr;
+            atype = PyArray_DescrFromType(arg_types[i]);
+            if (PyArray_EquivTypes(atype, ntype)) {
+                arg_types[i] = ntype->type_num;
+            }
+            Py_DECREF(atype);
+        }
+        if (size < loop->bufsize || loop->ufunc->core_enabled) {
+            if (!(PyArray_ISBEHAVED_RO(mps[i])) ||          \
+                PyArray_TYPE(mps[i]) != arg_types[i]) {
+                ntype = PyArray_DescrFromType(arg_types[i]);
+                new = PyArray_FromAny((PyObject *)mps[i],
+                                      ntype, 0, 0,
+                                      FORCECAST | ALIGNED, NULL);
+                if (new == NULL) return -1;
+                Py_DECREF(mps[i]);
+                mps[i] = (PyArrayObject *)new;
+            }
+        }
+    }
+
+    return 0;
+}
+
+#define _GETATTR_(str, rstr) do {if (strcmp(name, #str) == 0)     \
+        return PyObject_HasAttrString(op, "__" #rstr "__");} while (0);
+
+static int
+_has_reflected_op(PyObject *op, char *name)
+{
+    _GETATTR_(add, radd);
+    _GETATTR_(subtract, rsub);
+    _GETATTR_(multiply, rmul);
+    _GETATTR_(divide, rdiv);
+    _GETATTR_(true_divide, rtruediv);
+    _GETATTR_(floor_divide, rfloordiv);
+    _GETATTR_(remainder, rmod);
+    _GETATTR_(power, rpow);
+    _GETATTR_(left_shift, rlshift);
+    _GETATTR_(right_shift, rrshift);
+    _GETATTR_(bitwise_and, rand);
+    _GETATTR_(bitwise_xor, rxor);
+    _GETATTR_(bitwise_or, ror);
+    return 0;
+}
+
+#undef _GETATTR_
+
+
+/* Return the position of next non-white-space char in the string
+*/
+static int
+_next_non_white_space(const char* str, int offset)
+{
+    int ret = offset;
+    while (str[ret] == ' ' || str[ret] == '\t') ret++;
+    return ret;
+}
+
+static int
+_is_alpha_underscore(char ch)
+{
+    return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_';
+}
+
+static int
+_is_alnum_underscore(char ch)
+{
+    return _is_alpha_underscore(ch) || (ch >= '0' && ch <= '9');
+}
+
+/* Return the ending position of a variable name
+*/
+static int
+_get_end_of_name(const char* str, int offset)
+{
+    int ret = offset;
+    while (_is_alnum_underscore(str[ret])) ret++;
+    return ret;
+}
+
+/* Returns 1 if the dimension names pointed by s1 and s2 are the same,
+   otherwise returns 0.
+*/
+static int
+_is_same_name(const char* s1, const char* s2)
+{
+    while (_is_alnum_underscore(*s1) && _is_alnum_underscore(*s2)) {
+        if (*s1 != *s2) return 0;
+        s1++;
+        s2++;
+    }
+    return !_is_alnum_underscore(*s1) && !_is_alnum_underscore(*s2);
+}
+
+/* Sets core_num_dim_ix, core_num_dims, core_dim_ixs, core_offsets,
+   and core_signature in PyUFuncObject "self".  Returns 0 unless an
+   error occured.
+*/
+static int
+_parse_signature(PyUFuncObject *self, const char *signature)
+{
+    size_t len;
+    char const **var_names;
+    int nd = 0;             /* number of dimension of the current argument */
+    int cur_arg = 0;        /* index into core_num_dims&core_offsets */
+    int cur_core_dim = 0;   /* index into core_dim_ixs */
+    int i = 0;
+    char *parse_error = NULL;
+
+    if (signature == NULL) {
+	PyErr_SetString(PyExc_RuntimeError,
+			"_parse_signature with NULL signature");
+        return -1;
+    }
+
+    len = strlen(signature);
+    self->core_signature = _pya_malloc(sizeof(char) * (len+1));
+    if (self->core_signature)
+        strcpy(self->core_signature, signature);
+
+    /* Allocate sufficient memory to store pointers to all dimension names */
+    var_names = _pya_malloc(sizeof(char const*) * len);
+    if (var_names == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    
+    self->core_enabled = 1;
+    self->core_num_dim_ix = 0;
+    self->core_num_dims = _pya_malloc(sizeof(int) * self->nargs);
+    self->core_dim_ixs = _pya_malloc(sizeof(int) * len); /* shrink this later */
+    self->core_offsets = _pya_malloc(sizeof(int) * self->nargs);
+    if (self->core_num_dims == NULL || self->core_dim_ixs == NULL ||
+        self->core_offsets == NULL) {
+        PyErr_NoMemory();
+        goto fail;
+    }
+
+    i = _next_non_white_space(signature, 0);
+
+    while (signature[i] != '\0') { /* loop over input/output arguments */
+        if (cur_arg == self->nin) {
+            /* expect "->" */
+            if (signature[i] != '-' || signature[i+1] != '>') {
+                parse_error = "expect '->'";
+                goto fail;
+            }
+            i = _next_non_white_space(signature, i+2);
+        }
+
+        /* parse core dimensions of one argument, e.g. "()", "(i)", or
+	   "(i,j)" */
+        if (signature[i] != '(') {
+            parse_error = "expect '('";
+            goto fail;
+        }
+        i = _next_non_white_space(signature, i+1);
+        while (signature[i] != ')') { /* loop over core dimensions */
+	    int j = 0;
+            if (!_is_alpha_underscore(signature[i])) {
+		parse_error = "expect dimension name";
+		goto fail;
+            }
+	    while (j < self->core_num_dim_ix) {
+		if (_is_same_name(signature+i, var_names[j])) break;
+		j++;
+	    }
+	    if (j >= self->core_num_dim_ix) {
+		var_names[j] = signature+i;
+		self->core_num_dim_ix++;
+	    }
+	    self->core_dim_ixs[cur_core_dim] = j;
+	    cur_core_dim++;
+	    nd++;
+	    i = _get_end_of_name(signature, i);
+	    i = _next_non_white_space(signature, i);
+	    if (signature[i] != ',' && signature[i] != ')') {
+		parse_error = "expect ',' or ')'";
+		goto fail;
+	    }
+	    if (signature[i] == ',')
+	    {
+		i = _next_non_white_space(signature, i+1);
+		if (signature[i] == ')') {
+		    parse_error = "',' must not be followed by ')'";
+		    goto fail;
+		}
+	    }
+        }
+	self->core_num_dims[cur_arg] = nd;
+	self->core_offsets[cur_arg] = cur_core_dim-nd;
+	cur_arg++;
+	nd = 0;
+	i = _next_non_white_space(signature, i+1);
+
+        if (cur_arg != self->nin && cur_arg != self->nargs) {
+	    /* The list of input arguments (or output arguments) was
+	       only read partially */
+            if (signature[i] != ',') {
+                parse_error = "expect ','";
+                goto fail;
+            }
+            i = _next_non_white_space(signature, i+1);
+        }
+    }
+    if (cur_arg != self->nargs) {
+	parse_error = "incomplete signature: not all arguments found";
+        goto fail;
+    }
+    self->core_dim_ixs = _pya_realloc(self->core_dim_ixs,
+                                      sizeof(int) * cur_core_dim);
+    /* check for trivial core-signature, e.g. "(),()->()" */
+    if (cur_core_dim == 0)
+        self->core_enabled = 0;
+    _pya_free((void*)var_names);
+    return 0;
+fail:
+    _pya_free((void*)var_names);
+    if (parse_error) {
+	char *buf = _pya_malloc(sizeof(char) * (len + 200));
+	if (buf) {
+	    sprintf(buf, "%s at position %d in \"%s\"",
+		    parse_error, i, signature);
+	    PyErr_SetString(PyExc_ValueError, signature);
+	    _pya_free(buf);
+	}
+	else {
+	    PyErr_NoMemory();
+	}
+    }
+    return -1;
+}
+
+/* Concatenate the loop and core dimensions of
+   PyArrayMultiIterObject's iarg-th argument, to recover a full
+   dimension array (used for output arguments).
+*/
+static npy_intp*
+_compute_output_dims(PyUFuncLoopObject *loop, int iarg,
+		     int *out_nd, npy_intp *tmp_dims)
+{
+    int i;
+    PyUFuncObject *ufunc = loop->ufunc;
+    if (ufunc->core_enabled == 0) {
+        /* case of ufunc with trivial core-signature */
+        *out_nd = loop->nd;
+        return loop->dimensions;
+    }
+
+    *out_nd = loop->nd + ufunc->core_num_dims[iarg];
+    if (*out_nd > NPY_MAXARGS) {
+        PyErr_SetString(PyExc_ValueError,
+                        "dimension of output variable exceeds limit");
+        return NULL;
+    }
+
+    /* copy loop dimensions */
+    memcpy(tmp_dims, loop->dimensions, sizeof(npy_intp) * loop->nd);
+
+    /* copy core dimension */
+    for (i = 0; i < ufunc->core_num_dims[iarg]; i++)
+        tmp_dims[loop->nd + i] = loop->core_dim_sizes[1 +
+            ufunc->core_dim_ixs[ufunc->core_offsets[iarg]+i]];
+    return tmp_dims;
+}
+
+/* Check and set core_dim_sizes and core_strides for the i-th argument.
+*/
+static int
+_compute_dimension_size(PyUFuncLoopObject *loop, PyArrayObject **mps, int i)
+{
+    PyUFuncObject *ufunc = loop->ufunc;
+    int j = ufunc->core_offsets[i];
+    int k = PyArray_NDIM(mps[i]) - ufunc->core_num_dims[i];
+    int ind;
+    for (ind = 0; ind < ufunc->core_num_dims[i]; ind++, j++, k++) {
+	npy_intp dim = k<0 ? 1 : PyArray_DIM(mps[i], k);
+	/* First element of core_dim_sizes will be used for looping */
+	int dim_ix = ufunc->core_dim_ixs[j] + 1;
+	if (loop->core_dim_sizes[dim_ix] == 1) {
+	    /* broadcast core dimension  */
+	    loop->core_dim_sizes[dim_ix] = dim;
+	}
+	else if (dim != 1 && dim != loop->core_dim_sizes[dim_ix]) {
+	    PyErr_SetString(PyExc_ValueError,
+			    "core dimensions mismatch");
+	    return -1;
+	}
+        /* First ufunc->nargs elements will be used for looping */
+	loop->core_strides[ufunc->nargs + j] =
+            dim == 1 ? 0 : PyArray_STRIDE(mps[i], k);
+    }
+    return 0;
+}
+
+/* Return a view of array "ap" with "core_nd" dimensions cut from tail. */
+static PyArrayObject *
+_trunc_coredim(PyArrayObject *ap, int core_nd)
+{
+    PyArrayObject *ret;
+    int nd = ap->nd - core_nd;
+    if (nd < 0) nd = 0;
+
+    /* The following code is basically taken from PyArray_Transpose */
+    Py_INCREF(ap->descr);  /* NewFromDescr will steal this reference */
+    ret = (PyArrayObject *)
+        PyArray_NewFromDescr(ap->ob_type, ap->descr,
+                             nd, ap->dimensions,
+                             ap->strides, ap->data, ap->flags,
+                             (PyObject *)ap);
+    if (ret == NULL) return NULL;
+
+    /* point at true owner of memory: */
+    ret->base = (PyObject *)ap;
+    Py_INCREF(ap);
+
+    PyArray_UpdateFlags(ret, CONTIGUOUS | FORTRAN);
+    
+    return ret;
+}
+
+static Py_ssize_t
+construct_arrays(PyUFuncLoopObject *loop, PyObject *args, PyArrayObject **mps,
+                 PyObject *typetup)
+{
+    Py_ssize_t nargs;
+    int i;
+    int arg_types[NPY_MAXARGS];
+    PyArray_SCALARKIND scalars[NPY_MAXARGS];
+    PyArray_SCALARKIND maxarrkind, maxsckind, new;
+    PyUFuncObject *self = loop->ufunc;
+    Bool allscalars = TRUE;
+    PyTypeObject *subtype = &PyArray_Type;
+    PyObject *context = NULL;
+    PyObject *obj;
+    int flexible = 0;
+    int object = 0;
+
+    npy_intp temp_dims[NPY_MAXDIMS];
+    npy_intp *out_dims;
+    int out_nd;
+
+    /* Check number of arguments */
+    nargs = PyTuple_Size(args);
+    if ((nargs < self->nin) || (nargs > self->nargs)) {
+        PyErr_SetString(PyExc_ValueError,
+                        "invalid number of arguments");
+        return -1;
+    }
+
+    /* Get each input argument */
+    maxarrkind = PyArray_NOSCALAR;
+    maxsckind = PyArray_NOSCALAR;
+    for(i = 0; i < self->nin; i++) {
+        obj = PyTuple_GET_ITEM(args,i);
+        if (!PyArray_Check(obj) && !PyArray_IsScalar(obj, Generic)) {
+            context = Py_BuildValue("OOi", self, args, i);
+        }
+        else {
+            context = NULL;
+        }
+        mps[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, context);
+        Py_XDECREF(context);
+        if (mps[i] == NULL) {
+            return -1;
+        }
+        arg_types[i] = PyArray_TYPE(mps[i]);
+        if (!flexible && PyTypeNum_ISFLEXIBLE(arg_types[i])) {
+            flexible = 1;
+        }
+        if (!object && PyTypeNum_ISOBJECT(arg_types[i])) {
+            object = 1;
+        }
+        /* debug
+         * fprintf(stderr, "array %d has reference %d\n", i,
+         * (mps[i])->ob_refcnt);
+         */
+
+        /*
+         * Scalars are 0-dimensional arrays at this point
+         */
+
+        /*
+         * We need to keep track of whether or not scalars
+         * are mixed with arrays of different kinds.
+         */
+
+        if (mps[i]->nd > 0) {
+            scalars[i] = PyArray_NOSCALAR;
+            allscalars = FALSE;
+            new = PyArray_ScalarKind(arg_types[i], NULL);
+            maxarrkind = NPY_MAX(new, maxarrkind);
+        }
+        else {
+            scalars[i] = PyArray_ScalarKind(arg_types[i], &(mps[i]));
+            maxsckind = NPY_MAX(scalars[i], maxsckind);
+        }
+    }
+
+    /* We don't do strings */
+    if (flexible && !object) {
+        loop->notimplemented = 1;
+        return nargs;
+    }
+
+    /*
+     * If everything is a scalar, or scalars mixed with arrays of
+     * different kinds of lesser kinds then use normal coercion rules
+     */
+    if (allscalars || (maxsckind > maxarrkind)) {
+        for(i = 0; i < self->nin; i++) {
+            scalars[i] = PyArray_NOSCALAR;
+        }
+    }
+
+    /* Select an appropriate function for these argument types. */
+    if (select_types(loop->ufunc, arg_types, &(loop->function),
+                     &(loop->funcdata), scalars, typetup) == -1)
+        return -1;
+
+    /*
+     * FAIL with NotImplemented if the other object has
+     * the __r<op>__ method and has __array_priority__ as
+     * an attribute (signalling it can handle ndarray's)
+     * and is not already an ndarray or a subtype of the same type.
+     */
+    if ((arg_types[1] == PyArray_OBJECT) &&                         \
+        (loop->ufunc->nin==2) && (loop->ufunc->nout == 1)) {
+        PyObject *_obj = PyTuple_GET_ITEM(args, 1);
+        if (!PyArray_CheckExact(_obj) &&
+	    /* If both are same subtype of object arrays, then proceed */
+	    !(_obj->ob_type == (PyTuple_GET_ITEM(args, 0))->ob_type) &&   \
+
+            PyObject_HasAttrString(_obj, "__array_priority__") && \
+            _has_reflected_op(_obj, loop->ufunc->name)) {
+            loop->notimplemented = 1;
+            return nargs;
+        }
+    }
+
+    /*
+     * Create copies for some of the arrays if they are small
+     * enough and not already contiguous
+     */
+    if (_create_copies(loop, arg_types, mps) < 0) {
+        return -1;
+    }
+
+    /* Only use loop dimensions when constructing Iterator:
+     * temporarily replace mps[i] (will be recovered below).
+     */
+    if (self->core_enabled) {
+        for (i = 0; i < self->nin; i++) {
+            PyArrayObject *ao;
+
+	    if (_compute_dimension_size(loop, mps, i) < 0)
+		return -1;
+
+            ao = _trunc_coredim(mps[i], self->core_num_dims[i]);
+            if (ao == NULL)
+                return -1;
+            mps[i] = ao;
+        }
+    }
+
+    /* Create Iterators for the Inputs */
+    for(i = 0; i < self->nin; i++) {
+        loop->iters[i] = (PyArrayIterObject *)          \
+            PyArray_IterNew((PyObject *)mps[i]);
+        if (loop->iters[i] == NULL) {
+            return -1;
+        }
+    }
+
+
+    /* Recover mps[i]. */
+    if (self->core_enabled) {
+        for (i = 0; i < self->nin; i++) {
+            PyArrayObject *ao = mps[i];
+            mps[i] = (PyArrayObject *)mps[i]->base;
+            Py_DECREF(ao);
+        }
+    }
+
+    /* Broadcast the result */
+    loop->numiter = self->nin;
+    if (PyArray_Broadcast((PyArrayMultiIterObject *)loop) < 0) {
+        return -1;
+    }
+
+    /* Get any return arguments */
+    for(i = self->nin; i < nargs; i++) {
+        mps[i] = (PyArrayObject *)PyTuple_GET_ITEM(args, i);
+        if (((PyObject *)mps[i])==Py_None) {
+            mps[i] = NULL;
+            continue;
+        }
+        Py_INCREF(mps[i]);
+        if (!PyArray_Check((PyObject *)mps[i])) {
+            PyObject *new;
+            if (PyArrayIter_Check(mps[i])) {
+                new = PyObject_CallMethod((PyObject *)mps[i],
+                                          "__array__", NULL);
+                Py_DECREF(mps[i]);
+                mps[i] = (PyArrayObject *)new;
+            }
+            else {
+                PyErr_SetString(PyExc_TypeError,
+                                "return arrays must be "\
+                                "of ArrayType");
+                Py_DECREF(mps[i]);
+                mps[i] = NULL;
+                return -1;
+            }
+        }
+        
+
+        if (self->core_enabled) {
+            if (_compute_dimension_size(loop, mps, i) < 0)
+                return -1;
+        }
+	out_dims = _compute_output_dims(loop, i, &out_nd, temp_dims);
+	if (!out_dims) return -1;
+
+        if (mps[i]->nd != out_nd ||
+            !PyArray_CompareLists(mps[i]->dimensions,
+                                  out_dims, out_nd)) {
+            PyErr_SetString(PyExc_ValueError,
+                            "invalid return array shape");
+            Py_DECREF(mps[i]);
+            mps[i] = NULL;
+            return -1;
+        }
+        if (!PyArray_ISWRITEABLE(mps[i])) {
+            PyErr_SetString(PyExc_ValueError,
+                            "return array is not writeable");
+            Py_DECREF(mps[i]);
+            mps[i] = NULL;
+            return -1;
+        }
+    }
+
+    /* construct any missing return arrays and make output iterators */
+    for(i = self->nin; i < self->nargs; i++) {
+        PyArray_Descr *ntype;
+
+        if (mps[i] == NULL) {
+	    out_dims = _compute_output_dims(loop, i, &out_nd, temp_dims);
+	    if (!out_dims) return -1;
+
+            mps[i] = (PyArrayObject *)PyArray_New(subtype,
+                                                  out_nd,
+                                                  out_dims,
+                                                  arg_types[i],
+                                                  NULL, NULL,
+                                                  0, 0, NULL);
+            if (mps[i] == NULL) {
+                return -1;
+            }
+        }
+
+        /*
+         * reset types for outputs that are equivalent
+         * -- no sense casting uselessly
+         */
+        else {
+            if (mps[i]->descr->type_num != arg_types[i]) {
+                PyArray_Descr *atype;
+                ntype = mps[i]->descr;
+                atype = PyArray_DescrFromType(arg_types[i]);
+                if (PyArray_EquivTypes(atype, ntype)) {
+                    arg_types[i] = ntype->type_num;
+                }
+                Py_DECREF(atype);
+            }
+
+            /* still not the same -- or will we have to use buffers?*/
+            if (mps[i]->descr->type_num != arg_types[i] ||
+                !PyArray_ISBEHAVED_RO(mps[i])) {
+                if (loop->size < loop->bufsize || self->core_enabled) {
+                    PyObject *new;
+                    /*
+                     * Copy the array to a temporary copy
+                     * and set the UPDATEIFCOPY flag
+                     */
+                    ntype = PyArray_DescrFromType(arg_types[i]);
+                    new = PyArray_FromAny((PyObject *)mps[i],
+                                          ntype, 0, 0,
+                                          FORCECAST | ALIGNED |
+                                          UPDATEIFCOPY, NULL);
+                    if (new == NULL) {
+                        return -1;
+                    }
+                    Py_DECREF(mps[i]);
+                    mps[i] = (PyArrayObject *)new;
+                }
+            }
+        }
+
+        if (self->core_enabled) {
+            PyArrayObject *ao;
+
+            /* computer for all output arguments, and set strides in "loop" */
+            if (_compute_dimension_size(loop, mps, i) < 0)
+                return -1;
+
+            ao = _trunc_coredim(mps[i], self->core_num_dims[i]);
+            if (ao == NULL)
+                return -1;
+            /* Temporarily modify mps[i] for constructing iterator. */
+            mps[i] = ao;
+        }
+
+        loop->iters[i] = (PyArrayIterObject *)          \
+            PyArray_IterNew((PyObject *)mps[i]);
+        if (loop->iters[i] == NULL) {
+            return -1;
+        }
+
+	/* Recover mps[i]. */
+	if (self->core_enabled) {
+	    PyArrayObject *ao = mps[i];
+	    mps[i] = (PyArrayObject *)mps[i]->base;
+	    Py_DECREF(ao);
+	}
+
+    }
+
+    /*
+     * If any of different type, or misaligned or swapped
+     * then must use buffers
+     */
+    loop->bufcnt = 0;
+    loop->obj = 0;
+
+    /* Determine looping method needed */
+    loop->meth = NO_UFUNCLOOP;
+
+    if (loop->size == 0) {
+        return nargs;
+    }
+
+    if (self->core_enabled) {
+        loop->meth = SIGNATURE_NOBUFFER_UFUNCLOOP;
+    }
+
+    for(i = 0; i < self->nargs; i++) {
+        loop->needbuffer[i] = 0;
+        if (arg_types[i] != mps[i]->descr->type_num ||
+            !PyArray_ISBEHAVED_RO(mps[i])) {
+            if (self->core_enabled) {
+                PyErr_SetString(PyExc_RuntimeError,
+                                "never reached; copy should have been made");
+                return -1;
+            }
+            loop->meth = BUFFER_UFUNCLOOP;
+            loop->needbuffer[i] = 1;
+        }
+        if (!loop->obj && ((mps[i]->descr->type_num == PyArray_OBJECT) ||
+			   (arg_types[i] == PyArray_OBJECT))) {
+            loop->obj = 1;
+        }
+    }
+
+
+    if (self->core_enabled && loop->obj) {
+	PyErr_SetString(PyExc_TypeError,
+			"Object type not allowed in ufunc with signature");
+	return -1;
+    }
+
+    if (loop->meth == NO_UFUNCLOOP) {
+        loop->meth = ONE_UFUNCLOOP;
+
+        /* All correct type and BEHAVED */
+        /* Check for non-uniform stridedness */
+        for(i = 0; i < self->nargs; i++) {
+            if (!(loop->iters[i]->contiguous)) {
+                /*
+                 * May still have uniform stride
+                 * if (broadcast result) <= 1-d
+                 */
+                if (mps[i]->nd != 0 &&                  \
+                    (loop->iters[i]->nd_m1 > 0)) {
+                    loop->meth = NOBUFFER_UFUNCLOOP;
+                    break;
+                }
+            }
+        }
+        if (loop->meth == ONE_UFUNCLOOP) {
+            for(i = 0; i < self->nargs; i++) {
+                loop->bufptr[i] = mps[i]->data;
+            }
+        }
+    }
+
+    loop->numiter = self->nargs;
+
+    /* Fill in steps  */
+    if (loop->meth == SIGNATURE_NOBUFFER_UFUNCLOOP && loop->nd == 0) {
+	/* Use default core_strides */
+    }
+    else if (loop->meth != ONE_UFUNCLOOP) {
+        int ldim;
+        intp minsum;
+        intp maxdim;
+        PyArrayIterObject *it;
+        intp stride_sum[NPY_MAXDIMS];
+        int j;
+
+        /* Fix iterators */
+
+        /*
+         * Optimize axis the iteration takes place over
+         *
+         * The first thought was to have the loop go
+         * over the largest dimension to minimize the number of loops
+         *
+         * However, on processors with slow memory bus and cache,
+         * the slowest loops occur when the memory access occurs for
+         * large strides.
+         *
+         * Thus, choose the axis for which strides of the last iterator is
+         * smallest but non-zero.
+         */
+
+        for(i = 0; i < loop->nd; i++) {
+            stride_sum[i] = 0;
+            for(j = 0; j < loop->numiter; j++) {
+                stride_sum[i] += loop->iters[j]->strides[i];
+            }
+        }
+
+        ldim = loop->nd - 1;
+        minsum = stride_sum[loop->nd-1];
+        for(i = loop->nd - 2; i >= 0; i--) {
+            if (stride_sum[i] < minsum ) {
+                ldim = i;
+                minsum = stride_sum[i];
+            }
+        }
+
+        maxdim = loop->dimensions[ldim];
+        loop->size /= maxdim;
+        loop->bufcnt = maxdim;
+        loop->lastdim = ldim;
+
+        /*
+         * Fix the iterators so the inner loop occurs over the
+         * largest dimensions -- This can be done by
+         * setting the size to 1 in that dimension
+         * (just in the iterators)
+         */
+        for(i = 0; i < loop->numiter; i++) {
+            it = loop->iters[i];
+            it->contiguous = 0;
+            it->size /= (it->dims_m1[ldim]+1);
+            it->dims_m1[ldim] = 0;
+            it->backstrides[ldim] = 0;
+
+            /*
+             * (won't fix factors because we
+             * don't use PyArray_ITER_GOTO1D
+             * so don't change them)
+             *
+             * Set the steps to the strides in that dimension
+             */
+            loop->steps[i] = it->strides[ldim];
+        }
+
+        /*
+         * Set looping part of core_dim_sizes and core_strides.
+         */
+        if (loop->meth == SIGNATURE_NOBUFFER_UFUNCLOOP) {
+            loop->core_dim_sizes[0] = maxdim;
+            for (i = 0; i < self->nargs; i++) {
+                loop->core_strides[i] = loop->steps[i];
+            }
+        }
+
+        /*
+         * fix up steps where we will be copying data to
+         * buffers and calculate the ninnerloops and leftover
+         * values -- if step size is already zero that is not changed...
+         */
+        if (loop->meth == BUFFER_UFUNCLOOP) {
+            loop->leftover = maxdim % loop->bufsize;
+            loop->ninnerloops = (maxdim / loop->bufsize) + 1;
+            for(i = 0; i < self->nargs; i++) {
+                if (loop->needbuffer[i] && loop->steps[i]) {
+                    loop->steps[i] = mps[i]->descr->elsize;
+                }
+                /* These are changed later if casting is needed */
+            }
+        }
+    }
+    else if (loop->meth == ONE_UFUNCLOOP) {
+        /* uniformly-strided case */
+        for(i = 0; i < self->nargs; i++) {
+            if (PyArray_SIZE(mps[i]) == 1)
+                loop->steps[i] = 0;
+            else
+                loop->steps[i] = mps[i]->strides[mps[i]->nd-1];
+        }
+    }
+
+
+    /* Finally, create memory for buffers if we need them */
+
+    /*
+     * Buffers for scalars are specially made small -- scalars are
+     * not copied multiple times
+     */
+    if (loop->meth == BUFFER_UFUNCLOOP) {
+        int cnt = 0, cntcast = 0; /* keeps track of bytes to allocate */
+        int scnt = 0, scntcast = 0;
+        char *castptr;
+        char *bufptr;
+        int last_was_scalar=0;
+        int last_cast_was_scalar=0;
+        int oldbufsize=0;
+        int oldsize=0;
+        int scbufsize = 4*sizeof(double);
+        int memsize;
+        PyArray_Descr *descr;
+
+        /* compute the element size */
+        for(i = 0; i < self->nargs; i++) {
+            if (!loop->needbuffer[i]) {
+                continue;
+            }
+            if (arg_types[i] != mps[i]->descr->type_num) {
+                descr = PyArray_DescrFromType(arg_types[i]);
+                if (loop->steps[i]) {
+                    cntcast += descr->elsize;
+                }
+                else {
+                    scntcast += descr->elsize;
+                }
+                if (i < self->nin) {
+                    loop->cast[i] = PyArray_GetCastFunc(mps[i]->descr,
+                                            arg_types[i]);
+                }
+                else {
+                    loop->cast[i] = PyArray_GetCastFunc \
+                        (descr, mps[i]->descr->type_num);
+                }
+                Py_DECREF(descr);
+                if (!loop->cast[i]) {
+                    return -1;
+                }
+            }
+            loop->swap[i] = !(PyArray_ISNOTSWAPPED(mps[i]));
+            if (loop->steps[i]) {
+                cnt += mps[i]->descr->elsize;
+            }
+            else {
+                scnt += mps[i]->descr->elsize;
+            }
+        }
+        memsize = loop->bufsize*(cnt+cntcast) + scbufsize*(scnt+scntcast);
+        loop->buffer[0] = PyDataMem_NEW(memsize);
+
+        /* debug
+         * fprintf(stderr, "Allocated buffer at %p of size %d, cnt=%d, cntcast=%d\n",
+         *               loop->buffer[0], loop->bufsize * (cnt + cntcast), cnt, cntcast);
+         */
+
+        if (loop->buffer[0] == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        if (loop->obj) {
+            memset(loop->buffer[0], 0, memsize);
+        }
+        castptr = loop->buffer[0] + loop->bufsize*cnt + scbufsize*scnt;
+        bufptr = loop->buffer[0];
+        loop->objfunc = 0;
+        for(i = 0; i < self->nargs; i++) {
+            if (!loop->needbuffer[i]) {
+                continue;
+            }
+            loop->buffer[i] = bufptr + (last_was_scalar ? scbufsize : \
+                                        loop->bufsize)*oldbufsize;
+            last_was_scalar = (loop->steps[i] == 0);
+            bufptr = loop->buffer[i];
+            oldbufsize = mps[i]->descr->elsize;
+            /* fprintf(stderr, "buffer[%d] = %p\n", i, loop->buffer[i]); */
+            if (loop->cast[i]) {
+                PyArray_Descr *descr;
+                loop->castbuf[i] = castptr + (last_cast_was_scalar ? scbufsize : \
+                                              loop->bufsize)*oldsize;
+                last_cast_was_scalar = last_was_scalar;
+                /* fprintf(stderr, "castbuf[%d] = %p\n", i, loop->castbuf[i]); */
+                descr = PyArray_DescrFromType(arg_types[i]);
+                oldsize = descr->elsize;
+                Py_DECREF(descr);
+                loop->bufptr[i] = loop->castbuf[i];
+                castptr = loop->castbuf[i];
+                if (loop->steps[i])
+                    loop->steps[i] = oldsize;
+            }
+            else {
+                loop->bufptr[i] = loop->buffer[i];
+            }
+            if (!loop->objfunc && loop->obj) {
+                if (arg_types[i] == PyArray_OBJECT) {
+                    loop->objfunc = 1;
+                }
+            }
+        }
+    }
+    return nargs;
+}
+
+static void
+ufuncreduce_dealloc(PyUFuncReduceObject *self)
+{
+    if (self->ufunc) {
+        Py_XDECREF(self->it);
+        Py_XDECREF(self->rit);
+        Py_XDECREF(self->ret);
+        Py_XDECREF(self->errobj);
+        Py_XDECREF(self->decref);
+        if (self->buffer) PyDataMem_FREE(self->buffer);
+        Py_DECREF(self->ufunc);
+    }
+    _pya_free(self);
+}
+
+static void
+ufuncloop_dealloc(PyUFuncLoopObject *self)
+{
+    int i;
+
+    if (self->ufunc != NULL) {
+	if (self->core_dim_sizes)
+	    _pya_free(self->core_dim_sizes);
+	if (self->core_strides)
+	    _pya_free(self->core_strides);
+        for(i = 0; i < self->ufunc->nargs; i++)
+            Py_XDECREF(self->iters[i]);
+        if (self->buffer[0]) {
+            PyDataMem_FREE(self->buffer[0]);
+        }
+        Py_XDECREF(self->errobj);
+        Py_DECREF(self->ufunc);
+    }
+    _pya_free(self);
+}
+
+static PyUFuncLoopObject *
+construct_loop(PyUFuncObject *self, PyObject *args, PyObject *kwds, PyArrayObject **mps)
+{
+    PyUFuncLoopObject *loop;
+    int i;
+    PyObject *typetup = NULL;
+    PyObject *extobj = NULL;
+    char *name;
+
+    if (self == NULL) {
+        PyErr_SetString(PyExc_ValueError, "function not supported");
+        return NULL;
+    }
+    if ((loop = _pya_malloc(sizeof(PyUFuncLoopObject))) == NULL) {
+        PyErr_NoMemory();
+        return loop;
+    }
+
+    loop->index = 0;
+    loop->ufunc = self;
+    Py_INCREF(self);
+    loop->buffer[0] = NULL;
+    for(i = 0; i < self->nargs; i++) {
+        loop->iters[i] = NULL;
+        loop->cast[i] = NULL;
+    }
+    loop->errobj = NULL;
+    loop->notimplemented = 0;
+    loop->first = 1;
+    loop->core_dim_sizes = NULL;
+    loop->core_strides = NULL;
+
+    if (self->core_enabled) {
+	int num_dim_ix = 1 + self->core_num_dim_ix;
+	int nstrides = self->nargs + self->core_offsets[self->nargs-1]
+	    + self->core_num_dims[self->nargs-1];
+	loop->core_dim_sizes = _pya_malloc(sizeof(npy_intp) * num_dim_ix);
+	loop->core_strides = _pya_malloc(sizeof(npy_intp) * nstrides);
+	if (loop->core_dim_sizes == NULL || loop->core_strides == NULL) {
+	    PyErr_NoMemory();
+	    goto fail;
+	}
+	memset(loop->core_strides, 0, sizeof(npy_intp) * nstrides);
+	for (i = 0; i < num_dim_ix; i++)
+	    loop->core_dim_sizes[i] = 1;
+    }
+
+    name = self->name ? self->name : "";
+
+    /*
+     * Extract sig= keyword and extobj= keyword if present.
+     * Raise an error if anything else is present in the
+     * keyword dictionary
+     */
+    if (kwds != NULL) {
+        PyObject *key, *value;
+        Py_ssize_t pos=0;
+        while (PyDict_Next(kwds, &pos, &key, &value)) {
+            char *keystring = PyString_AsString(key);
+            if (keystring == NULL) {
+                PyErr_Clear();
+                PyErr_SetString(PyExc_TypeError, "invalid keyword");
+                goto fail;
+            }
+            if (strncmp(keystring,"extobj",6) == 0) {
+                extobj = value;
+            }
+            else if (strncmp(keystring,"sig",3) == 0) {
+                typetup = value;
+            }
+            else {
+                char *format = "'%s' is an invalid keyword to %s";
+                PyErr_Format(PyExc_TypeError,format,keystring, name);
+                goto fail;
+            }
+        }
+    }
+
+    if (extobj == NULL) {
+        if (PyUFunc_GetPyValues(name,
+                                &(loop->bufsize), &(loop->errormask),
+                                &(loop->errobj)) < 0) {
+            goto fail;
+        }
+    }
+    else {
+        if (_extract_pyvals(extobj, name,
+                            &(loop->bufsize), &(loop->errormask),
+                            &(loop->errobj)) < 0) {
+            goto fail;
+        }
+    }
+
+    /* Setup the arrays */
+    if (construct_arrays(loop, args, mps, typetup) < 0) {
+        goto fail;
+    }
+
+    PyUFunc_clearfperr();
+    return loop;
+
+fail:
+    ufuncloop_dealloc(loop);
+    return NULL;
+}
+
+
+/*
+  static void
+  _printbytebuf(PyUFuncLoopObject *loop, int bufnum)
+  {
+  int i;
+
+  fprintf(stderr, "Printing byte buffer %d\n", bufnum);
+  for(i=0; i<loop->bufcnt; i++) {
+  fprintf(stderr, "  %d\n", *(((byte *)(loop->buffer[bufnum]))+i));
+  }
+  }
+
+  static void
+  _printlongbuf(PyUFuncLoopObject *loop, int bufnum)
+  {
+  int i;
+
+  fprintf(stderr, "Printing long buffer %d\n", bufnum);
+  for(i=0; i<loop->bufcnt; i++) {
+  fprintf(stderr, "  %ld\n", *(((long *)(loop->buffer[bufnum]))+i));
+  }
+  }
+
+  static void
+  _printlongbufptr(PyUFuncLoopObject *loop, int bufnum)
+  {
+  int i;
+
+  fprintf(stderr, "Printing long buffer %d\n", bufnum);
+  for(i=0; i<loop->bufcnt; i++) {
+  fprintf(stderr, "  %ld\n", *(((long *)(loop->bufptr[bufnum]))+i));
+  }
+  }
+
+
+
+  static void
+  _printcastbuf(PyUFuncLoopObject *loop, int bufnum)
+  {
+  int i;
+
+  fprintf(stderr, "Printing long buffer %d\n", bufnum);
+  for(i=0; i<loop->bufcnt; i++) {
+  fprintf(stderr, "  %ld\n", *(((long *)(loop->castbuf[bufnum]))+i));
+  }
+  }
+
+*/
+
+
+
+
+/*
+ * currently generic ufuncs cannot be built for use on flexible arrays.
+ *
+ * The cast functions in the generic loop would need to be fixed to pass
+ * in something besides NULL, NULL.
+ *
+ * Also the underlying ufunc loops would not know the element-size unless
+ * that was passed in as data (which could be arranged).
+ *
+ */
+
+/*
+ * This generic function is called with the ufunc object, the arguments to it,
+ * and an array of (pointers to) PyArrayObjects which are NULL.  The
+ * arguments are parsed and placed in mps in construct_loop (construct_arrays)
+ */
+
+/*UFUNC_API*/
+static int
+PyUFunc_GenericFunction(PyUFuncObject *self, PyObject *args, PyObject *kwds,
+                        PyArrayObject **mps)
+{
+    PyUFuncLoopObject *loop;
+    int i;
+    NPY_BEGIN_THREADS_DEF;
+
+    if (!(loop = construct_loop(self, args, kwds, mps))) {
+        return -1;
+    }
+    if (loop->notimplemented) {
+        ufuncloop_dealloc(loop);
+        return -2;
+    }
+    if (self->core_enabled && loop->meth != SIGNATURE_NOBUFFER_UFUNCLOOP) {
+	PyErr_SetString(PyExc_RuntimeError,
+			"illegal loop method for ufunc with signature");
+	goto fail;
+    }
+
+    NPY_LOOP_BEGIN_THREADS;
+    switch(loop->meth) {
+        case ONE_UFUNCLOOP:
+            /*
+             * Everything is contiguous, notswapped, aligned,
+             * and of the right type.  -- Fastest.
+             * Or if not contiguous, then a single-stride
+             * increment moves through the entire array.
+             */
+            /*fprintf(stderr, "ONE...%d\n", loop->size);*/
+            loop->function((char **)loop->bufptr, &(loop->size),
+                    loop->steps, loop->funcdata);
+            UFUNC_CHECK_ERROR(loop);
+            break;
+
+        case NOBUFFER_UFUNCLOOP:
+            /*
+             * Everything is notswapped, aligned and of the
+             * right type but not contiguous. -- Almost as fast.
+             */
+            /*fprintf(stderr, "NOBUFFER...%d\n", loop->size);*/
+            
+             while (loop->index < loop->size) {
+                for(i = 0; i < self->nargs; i++) {
+                    loop->bufptr[i] = loop->iters[i]->dataptr;
+                }
+                loop->function((char **)loop->bufptr, &(loop->bufcnt),
+                        loop->steps, loop->funcdata);
+                UFUNC_CHECK_ERROR(loop);
+
+                /* Adjust loop pointers */
+                for(i = 0; i < self->nargs; i++) {
+                    PyArray_ITER_NEXT(loop->iters[i]);
+                }
+                loop->index++;
+            }
+            break;
+
+        case SIGNATURE_NOBUFFER_UFUNCLOOP:
+	    while (loop->index < loop->size) {
+		for(i = 0; i < self->nargs; i++) {
+		    loop->bufptr[i] = loop->iters[i]->dataptr;
+		}
+		loop->function((char **)loop->bufptr, loop->core_dim_sizes,
+			       loop->core_strides, loop->funcdata);
+		UFUNC_CHECK_ERROR(loop);
+
+		/* Adjust loop pointers */
+		for(i = 0; i < self->nargs; i++) {
+		    PyArray_ITER_NEXT(loop->iters[i]);
+		}
+		loop->index++;
+	    }
+	    break;
+
+        case BUFFER_UFUNCLOOP: {
+               PyArray_CopySwapNFunc *copyswapn[NPY_MAXARGS];
+               PyArrayIterObject **iters=loop->iters;
+               int *swap=loop->swap;
+               char **dptr=loop->dptr;
+               int mpselsize[NPY_MAXARGS];
+               intp laststrides[NPY_MAXARGS];
+               int fastmemcpy[NPY_MAXARGS];
+               int *needbuffer=loop->needbuffer;
+               intp index=loop->index, size=loop->size;
+               int bufsize;
+               intp bufcnt;
+               int copysizes[NPY_MAXARGS];
+               char **bufptr = loop->bufptr;
+               char **buffer = loop->buffer;
+               char **castbuf = loop->castbuf;
+               intp *steps = loop->steps;
+               char *tptr[NPY_MAXARGS];
+               int ninnerloops = loop->ninnerloops;
+               Bool pyobject[NPY_MAXARGS];
+               int datasize[NPY_MAXARGS];
+               int j, k, stopcondition;
+               char *myptr1, *myptr2;
+
+               for(i = 0; i <self->nargs; i++) {
+                   copyswapn[i] = mps[i]->descr->f->copyswapn;
+                   mpselsize[i] = mps[i]->descr->elsize;
+                   pyobject[i] = (loop->obj && \
+                           (mps[i]->descr->type_num == PyArray_OBJECT));
+                   laststrides[i] = iters[i]->strides[loop->lastdim];
+                   if (steps[i] && laststrides[i] != mpselsize[i]) {
+                       fastmemcpy[i] = 0;
+                   }
+                   else {
+                       fastmemcpy[i] = 1;
+                   }
+               }
+               /* Do generic buffered looping here (works for any kind of
+                * arrays -- some need buffers, some don't.
+                *
+                *
+                * New algorithm: N is the largest dimension.  B is the buffer-size.
+                * quotient is loop->ninnerloops-1
+                * remainder is loop->leftover
+                *
+                * Compute N = quotient * B + remainder.
+                * quotient = N / B  # integer math
+                * (store quotient + 1) as the number of innerloops
+                * remainder = N % B # integer remainder
+                *
+                * On the inner-dimension we will have (quotient + 1) loops where
+                * the size of the inner function is B for all but the last when the niter size is
+                * remainder.
+                *
+                * So, the code looks very similar to NOBUFFER_LOOP except the inner-most loop is
+                * replaced with...
+                *
+                * for(i=0; i<quotient+1; i++) {
+                * if (i==quotient+1) make itersize remainder size
+                * copy only needed items to buffer.
+                * swap input buffers if needed
+                * cast input buffers if needed
+                * call loop_function()
+                * cast outputs in buffers if needed
+                * swap outputs in buffers if needed
+                * copy only needed items back to output arrays.
+                * update all data-pointers by strides*niter
+                * }
+                */
+
+
+               /*
+                * fprintf(stderr, "BUFFER...%d,%d,%d\n", loop->size,
+                * loop->ninnerloops, loop->leftover);
+                */
+               /*
+                * for(i=0; i<self->nargs; i++) {
+                * fprintf(stderr, "iters[%d]->dataptr = %p, %p of size %d\n", i,
+                * iters[i], iters[i]->ao->data, PyArray_NBYTES(iters[i]->ao));
+                * }
+                */
+               stopcondition = ninnerloops;
+               if (loop->leftover == 0) stopcondition--;
+               while (index < size) {
+                   bufsize=loop->bufsize;
+                   for(i = 0; i<self->nargs; i++) {
+                       tptr[i] = loop->iters[i]->dataptr;
+                       if (needbuffer[i]) {
+                           dptr[i] = bufptr[i];
+                           datasize[i] = (steps[i] ? bufsize : 1);
+                           copysizes[i] = datasize[i] * mpselsize[i];
+                       }
+                       else {
+                           dptr[i] = tptr[i];
+                       }
+                   }
+
+                   /* This is the inner function over the last dimension */
+                   for(k = 1; k<=stopcondition; k++) {
+                       if (k == ninnerloops) {
+                           bufsize = loop->leftover;
+                           for(i=0; i<self->nargs;i++) {
+                               if (!needbuffer[i]) {
+                                   continue;
+                               }
+                               datasize[i] = (steps[i] ? bufsize : 1);
+                               copysizes[i] = datasize[i] * mpselsize[i];
+                           }
+                       }
+                       for(i = 0; i < self->nin; i++) {
+                           if (!needbuffer[i]) {
+                               continue;
+                           }
+                           if (fastmemcpy[i]) {
+                               memcpy(buffer[i], tptr[i], copysizes[i]);
+                           }
+                           else {
+                               myptr1 = buffer[i];
+                               myptr2 = tptr[i];
+                               for(j = 0; j < bufsize; j++) {
+                                   memcpy(myptr1, myptr2, mpselsize[i]);
+                                   myptr1 += mpselsize[i];
+                                   myptr2 += laststrides[i];
+                               }
+                           }
+
+                           /* swap the buffer if necessary */
+                           if (swap[i]) {
+                               /* fprintf(stderr, "swapping...\n");*/
+                               copyswapn[i](buffer[i], mpselsize[i], NULL, -1,
+                                       (intp) datasize[i], 1,
+                                       mps[i]);
+                           }
+                           /* cast to the other buffer if necessary */
+                           if (loop->cast[i]) {
+                               /* fprintf(stderr, "casting... %d, %p %p\n", i, buffer[i]); */
+                               loop->cast[i](buffer[i], castbuf[i],
+                                       (intp) datasize[i],
+                                       NULL, NULL);
+                           }
+                       }
+
+                       bufcnt = (intp) bufsize;
+                       loop->function((char **)dptr, &bufcnt, steps, loop->funcdata);
+                       UFUNC_CHECK_ERROR(loop);
+
+                       for(i=self->nin; i<self->nargs; i++) {
+                           if (!needbuffer[i]) {
+                               continue;
+                           }
+                           if (loop->cast[i]) {
+                               /* fprintf(stderr, "casting back... %d, %p", i, castbuf[i]); */
+                               loop->cast[i](castbuf[i],
+                                       buffer[i],
+                                       (intp) datasize[i],
+                                       NULL, NULL);
+                           }
+                           if (swap[i]) {
+                               copyswapn[i](buffer[i], mpselsize[i], NULL, -1,
+                                       (intp) datasize[i], 1,
+                                       mps[i]);
+                           }
+                           /*
+                            * copy back to output arrays
+                            * decref what's already there for object arrays
+                            */
+                           if (pyobject[i]) {
+                               myptr1 = tptr[i];
+                               for(j = 0; j < datasize[i]; j++) {
+                                   Py_XDECREF(*((PyObject **)myptr1));
+                                   myptr1 += laststrides[i];
+                               }
+                           }
+                           if (fastmemcpy[i])
+                               memcpy(tptr[i], buffer[i], copysizes[i]);
+                           else {
+                               myptr2 = buffer[i];
+                               myptr1 = tptr[i];
+                               for(j = 0; j < bufsize; j++) {
+                                   memcpy(myptr1, myptr2,
+                                           mpselsize[i]);
+                                   myptr1 += laststrides[i];
+                                   myptr2 += mpselsize[i];
+                               }
+                           }
+                       }
+                       if (k == stopcondition) {
+                           continue;
+                       }
+                       for(i = 0; i < self->nargs; i++) {
+                           tptr[i] += bufsize * laststrides[i];
+                           if (!needbuffer[i]) {
+                               dptr[i] = tptr[i];
+                           }
+                       }
+                   }
+                   /* end inner function over last dimension */
+
+                   if (loop->objfunc) {
+                       /*
+                        * DECREF castbuf when underlying function used
+                        * object arrays and casting was needed to get
+                        * to object arrays
+                        */
+                       for(i = 0; i < self->nargs; i++) {
+                           if (loop->cast[i]) {
+                               if (steps[i] == 0) {
+                                   Py_XDECREF(*((PyObject **)castbuf[i]));
+                               }
+                               else {
+                                   int size = loop->bufsize;
+
+                                   PyObject **objptr = (PyObject **)castbuf[i];
+                                   /*
+                                    * size is loop->bufsize unless there
+                                    * was only one loop
+                                    */
+                                   if (ninnerloops == 1) {
+                                       size = loop->leftover;
+                                   }
+                                   for(j = 0; j < size; j++) {
+                                       Py_XDECREF(*objptr);
+                                       *objptr = NULL;
+                                       objptr += 1;
+                                   }
+                               }
+                           }
+                       }
+
+                   }
+                   /* fixme -- probably not needed here*/
+                   UFUNC_CHECK_ERROR(loop);
+
+                   for(i=0; i<self->nargs; i++) {
+                       PyArray_ITER_NEXT(loop->iters[i]);
+                   }
+                   index++;
+               }
+           }
+    }
+
+    NPY_LOOP_END_THREADS;
+    ufuncloop_dealloc(loop);
+    return 0;
+
+fail:
+    NPY_LOOP_END_THREADS;
+    if (loop) ufuncloop_dealloc(loop);
+    return -1;
+}
+
+static PyArrayObject *
+_getidentity(PyUFuncObject *self, int otype, char *str)
+{
+    PyObject *obj, *arr;
+    PyArray_Descr *typecode;
+
+    if (self->identity == PyUFunc_None) {
+        PyErr_Format(PyExc_ValueError,
+                     "zero-size array to ufunc.%s "      \
+                     "without identity", str);
+        return NULL;
+    }
+    if (self->identity == PyUFunc_One) {
+        obj = PyInt_FromLong((long) 1);
+    } else {
+        obj = PyInt_FromLong((long) 0);
+    }
+
+    typecode = PyArray_DescrFromType(otype);
+    arr = PyArray_FromAny(obj, typecode, 0, 0, CARRAY, NULL);
+    Py_DECREF(obj);
+    return (PyArrayObject *)arr;
+}
+
+static int
+_create_reduce_copy(PyUFuncReduceObject *loop, PyArrayObject **arr, int rtype)
+{
+    intp maxsize;
+    PyObject *new;
+    PyArray_Descr *ntype;
+
+    maxsize = PyArray_SIZE(*arr);
+
+    if (maxsize < loop->bufsize) {
+        if (!(PyArray_ISBEHAVED_RO(*arr)) ||
+            PyArray_TYPE(*arr) != rtype) {
+            ntype = PyArray_DescrFromType(rtype);
+            new = PyArray_FromAny((PyObject *)(*arr),
+                                  ntype, 0, 0,
+                                  FORCECAST | ALIGNED, NULL);
+            if (new == NULL) {
+                return -1;
+            }
+            *arr = (PyArrayObject *)new;
+            loop->decref = new;
+        }
+    }
+
+    /* Don't decref *arr before re-assigning
+       because it was not going to be DECREF'd anyway.
+
+       If a copy is made, then the copy will be removed
+       on deallocation of the loop structure by setting
+       loop->decref.
+    */
+
+    return 0;
+}
+
+static PyUFuncReduceObject *
+construct_reduce(PyUFuncObject *self, PyArrayObject **arr, PyArrayObject *out,
+                 int axis, int otype, int operation, intp ind_size, char *str)
+{
+    PyUFuncReduceObject *loop;
+    PyArrayObject *idarr;
+    PyArrayObject *aar;
+    intp loop_i[MAX_DIMS], outsize=0;
+    int arg_types[3];
+    PyArray_SCALARKIND scalars[3] = {PyArray_NOSCALAR, PyArray_NOSCALAR,
+                                     PyArray_NOSCALAR};
+    int i, j, nd;
+    int flags;
+    /* Reduce type is the type requested of the input
+       during reduction */
+
+    if (self->core_enabled) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "construct_reduce not allowed on ufunc with signature");
+        return NULL;
+    }
+
+    nd = (*arr)->nd;
+    arg_types[0] = otype;
+    arg_types[1] = otype;
+    arg_types[2] = otype;
+    if ((loop = _pya_malloc(sizeof(PyUFuncReduceObject)))==NULL) {
+        PyErr_NoMemory(); return loop;
+    }
+
+    loop->retbase=0;
+    loop->swap = 0;
+    loop->index = 0;
+    loop->ufunc = self;
+    Py_INCREF(self);
+    loop->cast = NULL;
+    loop->buffer = NULL;
+    loop->ret = NULL;
+    loop->it = NULL;
+    loop->rit = NULL;
+    loop->errobj = NULL;
+    loop->first = 1;
+    loop->decref=NULL;
+    loop->N = (*arr)->dimensions[axis];
+    loop->instrides = (*arr)->strides[axis];
+
+    if (select_types(loop->ufunc, arg_types, &(loop->function),
+                     &(loop->funcdata), scalars, NULL) == -1) goto fail;
+
+    /* output type may change -- if it does
+       reduction is forced into that type
+       and we need to select the reduction function again
+    */
+    if (otype != arg_types[2]) {
+        otype = arg_types[2];
+        arg_types[0] = otype;
+        arg_types[1] = otype;
+        if (select_types(loop->ufunc, arg_types, &(loop->function),
+                         &(loop->funcdata), scalars, NULL) == -1)
+            goto fail;
+    }
+
+    /* get looping parameters from Python */
+    if (PyUFunc_GetPyValues(str, &(loop->bufsize), &(loop->errormask),
+                            &(loop->errobj)) < 0) goto fail;
+
+    /* Make copy if misbehaved or not otype for small arrays */
+    if (_create_reduce_copy(loop, arr, otype) < 0) goto fail;
+    aar = *arr;
+
+    if (loop->N == 0) {
+        loop->meth = ZERO_EL_REDUCELOOP;
+    }
+    else if (PyArray_ISBEHAVED_RO(aar) &&           \
+             otype == (aar)->descr->type_num) {
+        if (loop->N == 1) {
+            loop->meth = ONE_EL_REDUCELOOP;
+        }
+        else {
+            loop->meth = NOBUFFER_UFUNCLOOP;
+            loop->steps[1] = (aar)->strides[axis];
+            loop->N -= 1;
+        }
+    }
+    else {
+        loop->meth = BUFFER_UFUNCLOOP;
+        loop->swap = !(PyArray_ISNOTSWAPPED(aar));
+    }
+
+    /* Determine if object arrays are involved */
+    if (otype == PyArray_OBJECT || aar->descr->type_num == PyArray_OBJECT)
+        loop->obj = 1;
+    else
+        loop->obj = 0;
+
+    if (loop->meth == ZERO_EL_REDUCELOOP) {
+        idarr = _getidentity(self, otype, str);
+        if (idarr == NULL) goto fail;
+        if (idarr->descr->elsize > UFUNC_MAXIDENTITY) {
+            PyErr_Format(PyExc_RuntimeError,
+                         "UFUNC_MAXIDENTITY (%d)"           \
+                         " is too small (needs to be at least %d)",
+                         UFUNC_MAXIDENTITY, idarr->descr->elsize);
+            Py_DECREF(idarr);
+            goto fail;
+        }
+        memcpy(loop->idptr, idarr->data, idarr->descr->elsize);
+        Py_DECREF(idarr);
+    }
+
+    /* Construct return array */
+    flags = NPY_CARRAY | NPY_UPDATEIFCOPY | NPY_FORCECAST;
+    switch(operation) {
+    case UFUNC_REDUCE:
+        for(j=0, i=0; i<nd; i++) {
+            if (i != axis)
+                loop_i[j++] = (aar)->dimensions[i];
+
+        }
+        if (out == NULL) {
+            loop->ret = (PyArrayObject *)                   \
+                PyArray_New(aar->ob_type, aar->nd-1, loop_i,
+                            otype, NULL, NULL, 0, 0,
+                            (PyObject *)aar);
+        }
+        else {
+            outsize = PyArray_MultiplyList(loop_i, aar->nd-1);
+        }
+        break;
+    case UFUNC_ACCUMULATE:
+        if (out == NULL) {
+            loop->ret = (PyArrayObject *)                   \
+                PyArray_New(aar->ob_type, aar->nd, aar->dimensions,
+                            otype, NULL, NULL, 0, 0, (PyObject *)aar);
+        }
+        else {
+            outsize = PyArray_MultiplyList(aar->dimensions, aar->nd);
+        }
+        break;
+    case UFUNC_REDUCEAT:
+        memcpy(loop_i, aar->dimensions, nd*sizeof(intp));
+        /* Index is 1-d array */
+        loop_i[axis] = ind_size;
+        if (out == NULL) {
+            loop->ret = (PyArrayObject *)                   \
+                PyArray_New(aar->ob_type, aar->nd, loop_i, otype,
+                            NULL, NULL, 0, 0, (PyObject *)aar);
+        }
+        else {
+            outsize = PyArray_MultiplyList(loop_i, aar->nd);
+        }
+        if (ind_size == 0) {
+            loop->meth = ZERO_EL_REDUCELOOP;
+            return loop;
+        }
+        if (loop->meth == ONE_EL_REDUCELOOP)
+            loop->meth = NOBUFFER_REDUCELOOP;
+        break;
+    }
+    if (out) {
+        if (PyArray_SIZE(out) != outsize) {
+            PyErr_SetString(PyExc_ValueError,
+                            "wrong shape for output");
+            goto fail;
+        }
+        loop->ret = (PyArrayObject *)                   \
+            PyArray_FromArray(out, PyArray_DescrFromType(otype),
+                              flags);
+        if (loop->ret && loop->ret != out) {
+            loop->retbase = 1;
+        }
+    }
+    if (loop->ret == NULL) goto fail;
+    loop->insize = aar->descr->elsize;
+    loop->outsize = loop->ret->descr->elsize;
+    loop->bufptr[0] = loop->ret->data;
+
+    if (loop->meth == ZERO_EL_REDUCELOOP) {
+        loop->size = PyArray_SIZE(loop->ret);
+        return loop;
+    }
+
+    loop->it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)aar);
+    if (loop->it == NULL) return NULL;
+
+    if (loop->meth == ONE_EL_REDUCELOOP) {
+        loop->size = loop->it->size;
+        return loop;
+    }
+
+    /* Fix iterator to loop over correct dimension */
+    /* Set size in axis dimension to 1 */
+
+    loop->it->contiguous = 0;
+    loop->it->size /= (loop->it->dims_m1[axis]+1);
+    loop->it->dims_m1[axis] = 0;
+    loop->it->backstrides[axis] = 0;
+
+
+    loop->size = loop->it->size;
+
+    if (operation == UFUNC_REDUCE) {
+        loop->steps[0] = 0;
+    }
+    else {
+        loop->rit = (PyArrayIterObject *)                       \
+            PyArray_IterNew((PyObject *)(loop->ret));
+        if (loop->rit == NULL) return NULL;
+
+        /* Fix iterator to loop over correct dimension */
+        /* Set size in axis dimension to 1 */
+
+        loop->rit->contiguous = 0;
+        loop->rit->size /= (loop->rit->dims_m1[axis]+1);
+        loop->rit->dims_m1[axis] = 0;
+        loop->rit->backstrides[axis] = 0;
+
+        if (operation == UFUNC_ACCUMULATE)
+            loop->steps[0] = loop->ret->strides[axis];
+        else
+            loop->steps[0] = 0;
+    }
+    loop->steps[2] = loop->steps[0];
+    loop->bufptr[2] = loop->bufptr[0] + loop->steps[2];
+
+
+    if (loop->meth == BUFFER_UFUNCLOOP) {
+        int _size;
+        loop->steps[1] = loop->outsize;
+        if (otype != aar->descr->type_num) {
+            _size=loop->bufsize*(loop->outsize +            \
+                                 aar->descr->elsize);
+            loop->buffer = PyDataMem_NEW(_size);
+            if (loop->buffer == NULL) goto fail;
+            if (loop->obj) memset(loop->buffer, 0, _size);
+            loop->castbuf = loop->buffer + \
+                loop->bufsize*aar->descr->elsize;
+            loop->bufptr[1] = loop->castbuf;
+            loop->cast = PyArray_GetCastFunc(aar->descr, otype);
+            if (loop->cast == NULL) goto fail;
+        }
+        else {
+            _size = loop->bufsize * loop->outsize;
+            loop->buffer = PyDataMem_NEW(_size);
+            if (loop->buffer == NULL) goto fail;
+            if (loop->obj) memset(loop->buffer, 0, _size);
+            loop->bufptr[1] = loop->buffer;
+        }
+    }
+
+
+    PyUFunc_clearfperr();
+    return loop;
+
+ fail:
+    ufuncreduce_dealloc(loop);
+    return NULL;
+}
+
+
+/* We have two basic kinds of loops */
+/*  One is used when arr is not-swapped and aligned and output type
+    is the same as input type.
+    and another using buffers when one of these is not satisfied.
+
+    Zero-length and one-length axes-to-be-reduced are handled separately.
+*/
+
+    static PyObject *
+PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
+        int axis, int otype)
+{
+    PyArrayObject *ret=NULL;
+    PyUFuncReduceObject *loop;
+    intp i, n;
+    char *dptr;
+    NPY_BEGIN_THREADS_DEF;
+
+    /* Construct loop object */
+    loop = construct_reduce(self, &arr, out, axis, otype, UFUNC_REDUCE, 0,
+            "reduce");
+    if (!loop) return NULL;
+
+    NPY_LOOP_BEGIN_THREADS;
+        switch(loop->meth) {
+        case ZERO_EL_REDUCELOOP:
+            /* fprintf(stderr, "ZERO..%d\n", loop->size); */
+            for(i=0; i<loop->size; i++) {
+                if (loop->obj) Py_INCREF(*((PyObject **)loop->idptr));
+                memmove(loop->bufptr[0], loop->idptr, loop->outsize);
+                loop->bufptr[0] += loop->outsize;
+            }
+            break;
+        case ONE_EL_REDUCELOOP:
+            /*fprintf(stderr, "ONEDIM..%d\n", loop->size); */
+            while(loop->index < loop->size) {
+                if (loop->obj)
+                    Py_INCREF(*((PyObject **)loop->it->dataptr));
+                memmove(loop->bufptr[0], loop->it->dataptr,
+                        loop->outsize);
+                PyArray_ITER_NEXT(loop->it);
+                loop->bufptr[0] += loop->outsize;
+                loop->index++;
+            }
+            break;
+        case NOBUFFER_UFUNCLOOP:
+            /*fprintf(stderr, "NOBUFFER..%d\n", loop->size); */
+            while(loop->index < loop->size) {
+                /* Copy first element to output */
+                if (loop->obj)
+                    Py_INCREF(*((PyObject **)loop->it->dataptr));
+                memmove(loop->bufptr[0], loop->it->dataptr,
+                        loop->outsize);
+                /* Adjust input pointer */
+                loop->bufptr[1] = loop->it->dataptr+loop->steps[1];
+                loop->function((char **)loop->bufptr,
+                               &(loop->N),
+                               loop->steps, loop->funcdata);
+                UFUNC_CHECK_ERROR(loop);
+
+                PyArray_ITER_NEXT(loop->it)
+                    loop->bufptr[0] += loop->outsize;
+                loop->bufptr[2] = loop->bufptr[0];
+                loop->index++;
+            }
+            break;
+        case BUFFER_UFUNCLOOP:
+            /* use buffer for arr */
+            /*
+              For each row to reduce
+              1. copy first item over to output (casting if necessary)
+              2. Fill inner buffer
+              3. When buffer is filled or end of row
+              a. Cast input buffers if needed
+              b. Call inner function.
+              4. Repeat 2 until row is done.
+            */
+            /* fprintf(stderr, "BUFFERED..%d %d\n", loop->size,
+               loop->swap); */
+            while(loop->index < loop->size) {
+                loop->inptr = loop->it->dataptr;
+                /* Copy (cast) First term over to output */
+                if (loop->cast) {
+                    /* A little tricky because we need to
+                       cast it first */
+                    arr->descr->f->copyswap(loop->buffer,
+                                            loop->inptr,
+                                            loop->swap,
+                                            NULL);
+                    loop->cast(loop->buffer, loop->castbuf,
+                               1, NULL, NULL);
+                    if (loop->obj) {
+                        Py_XINCREF(*((PyObject **)loop->castbuf));
+                    }
+                    memcpy(loop->bufptr[0], loop->castbuf,
+                           loop->outsize);
+                }
+                else { /* Simple copy */
+                    arr->descr->f->copyswap(loop->bufptr[0],
+                                            loop->inptr,
+                                            loop->swap, NULL);
+                }
+                loop->inptr += loop->instrides;
+                n = 1;
+                while(n < loop->N) {
+                    /* Copy up to loop->bufsize elements to
+                       buffer */
+                    dptr = loop->buffer;
+                    for(i=0; i<loop->bufsize; i++, n++) {
+                        if (n == loop->N) break;
+                        arr->descr->f->copyswap(dptr,
+                                                loop->inptr,
+                                                loop->swap,
+                                                NULL);
+                        loop->inptr += loop->instrides;
+                        dptr += loop->insize;
+                    }
+                    if (loop->cast)
+                        loop->cast(loop->buffer,
+                                   loop->castbuf,
+                                   i, NULL, NULL);
+                    loop->function((char **)loop->bufptr,
+                                   &i,
+                                   loop->steps, loop->funcdata);
+                    loop->bufptr[0] += loop->steps[0]*i;
+                    loop->bufptr[2] += loop->steps[2]*i;
+                    UFUNC_CHECK_ERROR(loop);
+                }
+                PyArray_ITER_NEXT(loop->it);
+                loop->bufptr[0] += loop->outsize;
+                loop->bufptr[2] = loop->bufptr[0];
+                loop->index++;
+            }
+        }
+
+        NPY_LOOP_END_THREADS;
+
+        /* Hang on to this reference -- will be decref'd with loop */
+        if (loop->retbase) ret = (PyArrayObject *)loop->ret->base;
+        else ret = loop->ret;
+        Py_INCREF(ret);
+        ufuncreduce_dealloc(loop);
+        return (PyObject *)ret;
+
+fail:
+    NPY_LOOP_END_THREADS;
+
+        if (loop) ufuncreduce_dealloc(loop);
+    return NULL;
+}
+
+
+static PyObject *
+PyUFunc_Accumulate(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
+                   int axis, int otype)
+{
+    PyArrayObject *ret=NULL;
+    PyUFuncReduceObject *loop;
+    intp i, n;
+    char *dptr;
+    NPY_BEGIN_THREADS_DEF;
+
+        /* Construct loop object */
+        loop = construct_reduce(self, &arr, out, axis, otype, UFUNC_ACCUMULATE, 0,
+                                "accumulate");
+    if (!loop) return NULL;
+
+    NPY_LOOP_BEGIN_THREADS;
+    switch(loop->meth) {
+    case ZERO_EL_REDUCELOOP: /* Accumulate */
+        /* fprintf(stderr, "ZERO..%d\n", loop->size); */
+        for(i=0; i<loop->size; i++) {
+            if (loop->obj)
+                Py_INCREF(*((PyObject **)loop->idptr));
+            memcpy(loop->bufptr[0], loop->idptr, loop->outsize);
+            loop->bufptr[0] += loop->outsize;
+        }
+        break;
+    case ONE_EL_REDUCELOOP: /* Accumulate */
+        /* fprintf(stderr, "ONEDIM..%d\n", loop->size); */
+        while(loop->index < loop->size) {
+            if (loop->obj)
+                Py_INCREF(*((PyObject **)loop->it->dataptr));
+            memmove(loop->bufptr[0], loop->it->dataptr,
+                   loop->outsize);
+            PyArray_ITER_NEXT(loop->it);
+            loop->bufptr[0] += loop->outsize;
+            loop->index++;
+        }
+        break;
+    case NOBUFFER_UFUNCLOOP: /* Accumulate */
+        /* fprintf(stderr, "NOBUFFER..%d\n", loop->size); */
+        while(loop->index < loop->size) {
+            /* Copy first element to output */
+            if (loop->obj)
+                Py_INCREF(*((PyObject **)loop->it->dataptr));
+            memmove(loop->bufptr[0], loop->it->dataptr,
+                   loop->outsize);
+            /* Adjust input pointer */
+            loop->bufptr[1] = loop->it->dataptr+loop->steps[1];
+            loop->function((char **)loop->bufptr,
+                           &(loop->N),
+                           loop->steps, loop->funcdata);
+            UFUNC_CHECK_ERROR(loop);
+
+            PyArray_ITER_NEXT(loop->it);
+            PyArray_ITER_NEXT(loop->rit);
+            loop->bufptr[0] = loop->rit->dataptr;
+            loop->bufptr[2] = loop->bufptr[0] + loop->steps[0];
+            loop->index++;
+        }
+        break;
+    case BUFFER_UFUNCLOOP:  /* Accumulate */
+        /* use buffer for arr */
+        /*
+          For each row to reduce
+          1. copy identity over to output (casting if necessary)
+          2. Fill inner buffer
+          3. When buffer is filled or end of row
+          a. Cast input buffers if needed
+          b. Call inner function.
+          4. Repeat 2 until row is done.
+        */
+        /* fprintf(stderr, "BUFFERED..%d %p\n", loop->size,
+           loop->cast); */
+        while(loop->index < loop->size) {
+            loop->inptr = loop->it->dataptr;
+            /* Copy (cast) First term over to output */
+            if (loop->cast) {
+                /* A little tricky because we need to
+                   cast it first */
+                arr->descr->f->copyswap(loop->buffer,
+                                        loop->inptr,
+                                        loop->swap,
+                                        NULL);
+                loop->cast(loop->buffer, loop->castbuf,
+                           1, NULL, NULL);
+                if (loop->obj) {
+                    Py_XINCREF(*((PyObject **)loop->castbuf));
+                }
+                memcpy(loop->bufptr[0], loop->castbuf,
+                       loop->outsize);
+            }
+            else { /* Simple copy */
+                arr->descr->f->copyswap(loop->bufptr[0],
+                                        loop->inptr,
+                                        loop->swap,
+                                        NULL);
+            }
+            loop->inptr += loop->instrides;
+            n = 1;
+            while(n < loop->N) {
+                /* Copy up to loop->bufsize elements to
+                   buffer */
+                dptr = loop->buffer;
+                for(i=0; i<loop->bufsize; i++, n++) {
+                    if (n == loop->N) break;
+                    arr->descr->f->copyswap(dptr,
+                                            loop->inptr,
+                                            loop->swap,
+                                            NULL);
+                    loop->inptr += loop->instrides;
+                    dptr += loop->insize;
+                }
+                if (loop->cast)
+                    loop->cast(loop->buffer,
+                               loop->castbuf,
+                               i, NULL, NULL);
+                loop->function((char **)loop->bufptr,
+                               &i,
+                               loop->steps, loop->funcdata);
+                loop->bufptr[0] += loop->steps[0]*i;
+                loop->bufptr[2] += loop->steps[2]*i;
+                UFUNC_CHECK_ERROR(loop);
+            }
+            PyArray_ITER_NEXT(loop->it);
+            PyArray_ITER_NEXT(loop->rit);
+            loop->bufptr[0] = loop->rit->dataptr;
+            loop->bufptr[2] = loop->bufptr[0] + loop->steps[0];
+            loop->index++;
+        }
+    }
+
+    NPY_LOOP_END_THREADS;
+
+    /* Hang on to this reference -- will be decref'd with loop */
+    if (loop->retbase) ret = (PyArrayObject *)loop->ret->base;
+    else ret = loop->ret;
+    Py_INCREF(ret);
+    ufuncreduce_dealloc(loop);
+    return (PyObject *)ret;
+
+ fail:
+    NPY_LOOP_END_THREADS;
+
+        if (loop) ufuncreduce_dealloc(loop);
+    return NULL;
+}
+
+/* Reduceat performs a reduce over an axis using the indices as a guide
+
+   op.reduceat(array,indices)  computes
+   op.reduce(array[indices[i]:indices[i+1]]
+   for i=0..end with an implicit indices[i+1]=len(array)
+   assumed when i=end-1
+
+   if indices[i+1] <= indices[i]+1
+   then the result is array[indices[i]] for that value
+
+   op.accumulate(array) is the same as
+   op.reduceat(array,indices)[::2]
+   where indices is range(len(array)-1) with a zero placed in every other sample
+   indices = zeros(len(array)*2-1)
+   indices[1::2] = range(1,len(array))
+
+   output shape is based on the size of indices
+*/
+
+static PyObject *
+PyUFunc_Reduceat(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *ind,
+                 PyArrayObject *out, int axis, int otype)
+{
+    PyArrayObject *ret;
+    PyUFuncReduceObject *loop;
+    intp *ptr=(intp *)ind->data;
+    intp nn=ind->dimensions[0];
+    intp mm=arr->dimensions[axis]-1;
+    intp n, i, j;
+    char *dptr;
+    NPY_BEGIN_THREADS_DEF;
+
+    /* Check for out-of-bounds values in indices array */
+    for(i=0; i<nn; i++) {
+        if ((*ptr < 0) || (*ptr > mm)) {
+            PyErr_Format(PyExc_IndexError,
+                    "index out-of-bounds (0, %d)", (int) mm);
+            return NULL;
+        }
+        ptr++;
+    }
+
+    ptr = (intp *)ind->data;
+    /* Construct loop object */
+    loop = construct_reduce(self, &arr, out, axis, otype, UFUNC_REDUCEAT, nn,
+                            "reduceat");
+    if (!loop) return NULL;
+
+    NPY_LOOP_BEGIN_THREADS;
+    switch(loop->meth) {
+    /* zero-length index -- return array immediately */
+    case ZERO_EL_REDUCELOOP:
+        /* fprintf(stderr, "ZERO..\n"); */
+        break;
+        /* NOBUFFER -- behaved array and same type */
+    case NOBUFFER_UFUNCLOOP:                        /* Reduceat */
+        /* fprintf(stderr, "NOBUFFER..%d\n", loop->size); */
+        while(loop->index < loop->size) {
+            ptr = (intp *)ind->data;
+            for(i=0; i<nn; i++) {
+                loop->bufptr[1] = loop->it->dataptr +   \
+                                  (*ptr)*loop->instrides;
+                if (loop->obj) {
+                    Py_XINCREF(*((PyObject **)loop->bufptr[1]));
+                }
+                memcpy(loop->bufptr[0], loop->bufptr[1],
+                        loop->outsize);
+                mm = (i==nn-1 ? arr->dimensions[axis]-*ptr : \
+                        *(ptr+1) - *ptr) - 1;
+                if (mm > 0) {
+                    loop->bufptr[1] += loop->instrides;
+                    loop->bufptr[2] = loop->bufptr[0];
+                    loop->function((char **)loop->bufptr,
+                            &mm, loop->steps,
+                            loop->funcdata);
+                    UFUNC_CHECK_ERROR(loop);
+                }
+                loop->bufptr[0] += loop->ret->strides[axis];
+                ptr++;
+            }
+            PyArray_ITER_NEXT(loop->it);
+            PyArray_ITER_NEXT(loop->rit);
+            loop->bufptr[0] = loop->rit->dataptr;
+            loop->index++;
+        }
+        break;
+
+        /* BUFFER -- misbehaved array or different types */
+    case BUFFER_UFUNCLOOP:                               /* Reduceat */
+        /* fprintf(stderr, "BUFFERED..%d\n", loop->size); */
+        while(loop->index < loop->size) {
+            ptr = (intp *)ind->data;
+            for(i=0; i<nn; i++) {
+                if (loop->obj) {
+                    Py_XINCREF(*((PyObject **)loop->idptr));
+                }
+                memcpy(loop->bufptr[0], loop->idptr,
+                        loop->outsize);
+                n = 0;
+                mm = (i==nn-1 ? arr->dimensions[axis] - *ptr :\
+                        *(ptr+1) - *ptr);
+                if (mm < 1) mm = 1;
+                loop->inptr = loop->it->dataptr + \
+                              (*ptr)*loop->instrides;
+                while (n < mm) {
+                    /* Copy up to loop->bufsize elements
+                       to buffer */
+                    dptr = loop->buffer;
+                    for(j=0; j<loop->bufsize; j++, n++) {
+                        if (n == mm) break;
+                        arr->descr->f->copyswap\
+                            (dptr,
+                             loop->inptr,
+                             loop->swap, NULL);
+                        loop->inptr += loop->instrides;
+                        dptr += loop->insize;
+                    }
+                    if (loop->cast)
+                        loop->cast(loop->buffer,
+                                loop->castbuf,
+                                j, NULL, NULL);
+                    loop->bufptr[2] = loop->bufptr[0];
+                    loop->function((char **)loop->bufptr,
+                            &j, loop->steps,
+                            loop->funcdata);
+                    UFUNC_CHECK_ERROR(loop);
+                    loop->bufptr[0] += j*loop->steps[0];
+                }
+                loop->bufptr[0] += loop->ret->strides[axis];
+                ptr++;
+            }
+            PyArray_ITER_NEXT(loop->it);
+            PyArray_ITER_NEXT(loop->rit);
+            loop->bufptr[0] = loop->rit->dataptr;
+            loop->index++;
+        }
+        break;
+    }
+
+    NPY_LOOP_END_THREADS;
+
+        /* Hang on to this reference -- will be decref'd with loop */
+        if (loop->retbase) ret = (PyArrayObject *)loop->ret->base;
+        else ret = loop->ret;
+    Py_INCREF(ret);
+    ufuncreduce_dealloc(loop);
+    return (PyObject *)ret;
+
+fail:
+    NPY_LOOP_END_THREADS;
+
+    if (loop) ufuncreduce_dealloc(loop);
+    return NULL;
+}
+
+
+/* This code handles reduce, reduceat, and accumulate
+   (accumulate and reduce are special cases of the more general reduceat
+   but they are handled separately for speed)
+*/
+
+static PyObject *
+PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
+                         PyObject *kwds, int operation)
+{
+    int axis=0;
+    PyArrayObject *mp, *ret = NULL;
+    PyObject *op, *res=NULL;
+    PyObject *obj_ind, *context;
+    PyArrayObject *indices = NULL;
+    PyArray_Descr *otype=NULL;
+    PyArrayObject *out=NULL;
+    static char *kwlist1[] = {"array", "axis", "dtype", "out", NULL};
+    static char *kwlist2[] = {"array", "indices", "axis", "dtype", "out", NULL};
+    static char *_reduce_type[] = {"reduce", "accumulate", \
+                                   "reduceat", NULL};
+    if (self == NULL) {
+        PyErr_SetString(PyExc_ValueError, "function not supported");
+        return NULL;
+    }
+
+    if (self->core_enabled) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "Reduction not defined on ufunc with signature");
+        return NULL;
+    }
+    
+    if (self->nin != 2) {
+        PyErr_Format(PyExc_ValueError,
+                     "%s only supported for binary functions",
+                     _reduce_type[operation]);
+        return NULL;
+    }
+    if (self->nout != 1) {
+        PyErr_Format(PyExc_ValueError,
+                     "%s only supported for functions " \
+                     "returning a single value",
+                     _reduce_type[operation]);
+        return NULL;
+    }
+
+    if (operation == UFUNC_REDUCEAT) {
+        PyArray_Descr *indtype;
+        indtype = PyArray_DescrFromType(PyArray_INTP);
+        if(!PyArg_ParseTupleAndKeywords(args, kwds, "OO|iO&O&", kwlist2,
+                                        &op, &obj_ind, &axis,
+                                        PyArray_DescrConverter2,
+                                        &otype,
+                                        PyArray_OutputConverter,
+                                        &out)) {
+            Py_XDECREF(otype);
+            return NULL;
+        }
+        indices = (PyArrayObject *)PyArray_FromAny(obj_ind, indtype,
+                                                   1, 1, CARRAY, NULL);
+        if (indices == NULL) {Py_XDECREF(otype); return NULL;}
+    }
+    else {
+        if(!PyArg_ParseTupleAndKeywords(args, kwds, "O|iO&O&", kwlist1,
+                                        &op, &axis,
+                                        PyArray_DescrConverter2,
+                                        &otype,
+                                        PyArray_OutputConverter,
+                                        &out)) {
+            Py_XDECREF(otype);
+            return NULL;
+        }
+    }
+
+    /* Ensure input is an array */
+    if (!PyArray_Check(op) && !PyArray_IsScalar(op, Generic)) {
+        context = Py_BuildValue("O(O)i", self, op, 0);
+    }
+    else {
+        context = NULL;
+    }
+    mp = (PyArrayObject *)PyArray_FromAny(op, NULL, 0, 0, 0, context);
+    Py_XDECREF(context);
+    if (mp == NULL) return NULL;
+
+    /* Check to see if input is zero-dimensional */
+    if (mp->nd == 0) {
+        PyErr_Format(PyExc_TypeError, "cannot %s on a scalar",
+                     _reduce_type[operation]);
+        Py_XDECREF(otype);
+        Py_DECREF(mp);
+        return NULL;
+    }
+
+    /* Check to see that type (and otype) is not FLEXIBLE */
+    if (PyArray_ISFLEXIBLE(mp) ||
+        (otype && PyTypeNum_ISFLEXIBLE(otype->type_num))) {
+        PyErr_Format(PyExc_TypeError,
+                     "cannot perform %s with flexible type",
+                     _reduce_type[operation]);
+        Py_XDECREF(otype);
+        Py_DECREF(mp);
+        return NULL;
+    }
+
+    if (axis < 0) axis += mp->nd;
+    if (axis < 0 || axis >= mp->nd) {
+        PyErr_SetString(PyExc_ValueError, "axis not in array");
+        Py_XDECREF(otype);
+        Py_DECREF(mp);
+        return NULL;
+    }
+
+    /* If out is specified it determines otype unless otype
+       already specified.
+    */
+    if (otype == NULL && out != NULL) {
+        otype = out->descr;
+        Py_INCREF(otype);
+    }
+
+    if (otype == NULL) {
+        /* For integer types --- make sure at
+           least a long is used for add and multiply
+           reduction --- to avoid overflow */
+        int typenum = PyArray_TYPE(mp);
+        if ((typenum < NPY_FLOAT) &&                \
+            ((strcmp(self->name,"add")==0) ||       \
+             (strcmp(self->name,"multiply")==0))) {
+            if (PyTypeNum_ISBOOL(typenum))
+                typenum = PyArray_LONG;
+            else if (mp->descr->elsize < sizeof(long)) {
+                if (PyTypeNum_ISUNSIGNED(typenum))
+                    typenum = PyArray_ULONG;
+                else
+                    typenum = PyArray_LONG;
+            }
+        }
+        otype = PyArray_DescrFromType(typenum);
+    }
+
+
+    switch(operation) {
+    case UFUNC_REDUCE:
+        ret = (PyArrayObject *)PyUFunc_Reduce(self, mp, out, axis,
+                                              otype->type_num);
+        break;
+    case UFUNC_ACCUMULATE:
+        ret = (PyArrayObject *)PyUFunc_Accumulate(self, mp, out, axis,
+                                                  otype->type_num);
+        break;
+    case UFUNC_REDUCEAT:
+        ret = (PyArrayObject *)PyUFunc_Reduceat(self, mp, indices, out,
+                                                axis, otype->type_num);
+        Py_DECREF(indices);
+        break;
+    }
+    Py_DECREF(mp);
+    Py_DECREF(otype);
+    if (ret==NULL) return NULL;
+    if (op->ob_type != ret->ob_type) {
+        res = PyObject_CallMethod(op, "__array_wrap__", "O", ret);
+        if (res == NULL) PyErr_Clear();
+        else if (res == Py_None) Py_DECREF(res);
+        else {
+            Py_DECREF(ret);
+            return res;
+        }
+    }
+    return PyArray_Return(ret);
+
+}
+
+/* This function analyzes the input arguments
+   and determines an appropriate __array_wrap__ function to call
+   for the outputs.
+
+   If an output argument is provided, then it is wrapped
+   with its own __array_wrap__ not with the one determined by
+   the input arguments.
+
+   if the provided output argument is already an array,
+   the wrapping function is None (which means no wrapping will
+   be done --- not even PyArray_Return).
+
+   A NULL is placed in output_wrap for outputs that
+   should just have PyArray_Return called.
+*/
+
+static void
+_find_array_wrap(PyObject *args, PyObject **output_wrap, int nin, int nout)
+{
+    Py_ssize_t nargs;
+    int i;
+    int np = 0;
+    double priority, maxpriority;
+    PyObject *with_wrap[NPY_MAXARGS], *wraps[NPY_MAXARGS];
+    PyObject *obj, *wrap = NULL;
+
+    nargs = PyTuple_GET_SIZE(args);
+    for(i = 0; i < nin; i++) {
+        obj = PyTuple_GET_ITEM(args, i);
+        if (PyArray_CheckExact(obj) ||  \
+            PyArray_IsAnyScalar(obj))
+            continue;
+        wrap = PyObject_GetAttrString(obj, "__array_wrap__");
+        if (wrap) {
+            if (PyCallable_Check(wrap)) {
+                with_wrap[np] = obj;
+                wraps[np] = wrap;
+                ++np;
+            }
+            else {
+                Py_DECREF(wrap);
+                wrap = NULL;
+            }
+        }
+        else {
+            PyErr_Clear();
+        }
+    }
+    if (np >= 2) {
+        wrap = wraps[0];
+        maxpriority = PyArray_GetPriority(with_wrap[0],
+                                          PyArray_SUBTYPE_PRIORITY);
+        for(i = 1; i < np; ++i) {
+            priority = \
+                PyArray_GetPriority(with_wrap[i],
+                                    PyArray_SUBTYPE_PRIORITY);
+            if (priority > maxpriority) {
+                maxpriority = priority;
+                Py_DECREF(wrap);
+                wrap = wraps[i];
+            } else {
+                Py_DECREF(wraps[i]);
+            }
+        }
+    }
+
+    /* Here wrap is the wrapping function determined from the
+       input arrays (could be NULL).
+
+       For all the output arrays decide what to do.
+
+       1) Use the wrap function determined from the input arrays
+       This is the default if the output array is not
+       passed in.
+
+       2) Use the __array_wrap__ method of the output object
+       passed in. -- this is special cased for
+       exact ndarray so that no PyArray_Return is
+       done in that case.
+    */
+
+    for(i=0; i<nout; i++) {
+        int j = nin + i;
+        int incref = 1;
+        output_wrap[i] = wrap;
+        if (j < nargs) {
+            obj = PyTuple_GET_ITEM(args, j);
+            if (obj == Py_None)
+                continue;
+            if (PyArray_CheckExact(obj)) {
+                output_wrap[i] = Py_None;
+            }
+            else {
+                PyObject *owrap;
+                owrap = PyObject_GetAttrString(obj,"__array_wrap__");
+                incref = 0;
+                if (!(owrap) || !(PyCallable_Check(owrap))) {
+                    Py_XDECREF(owrap);
+                    owrap = wrap;
+                    incref = 1;
+                    PyErr_Clear();
+                }
+                output_wrap[i] = owrap;
+            }
+        }
+        if (incref) {
+            Py_XINCREF(output_wrap[i]);
+        }
+    }
+
+    Py_XDECREF(wrap);
+    return;
+}
+
+static PyObject *
+ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+{
+    int i;
+    PyTupleObject *ret;
+    PyArrayObject *mps[NPY_MAXARGS];
+    PyObject *retobj[NPY_MAXARGS];
+    PyObject *wraparr[NPY_MAXARGS];
+    PyObject *res;
+    int errval;
+
+    /*
+     * Initialize all array objects to NULL to make cleanup easier
+     * if something goes wrong.
+     */
+    for(i = 0; i < self->nargs; i++) {
+        mps[i] = NULL;
+    }
+
+    errval = PyUFunc_GenericFunction(self, args, kwds, mps);
+    if (errval < 0) {
+        for(i = 0; i < self->nargs; i++) {
+            PyArray_XDECREF_ERR(mps[i]);
+        }
+        if (errval == -1)
+            return NULL;
+        else {
+            /*
+             * PyErr_SetString(PyExc_TypeError,"");
+             * return NULL;
+             */
+            Py_INCREF(Py_NotImplemented);
+            return Py_NotImplemented;
+        }
+    }
+
+    for(i = 0; i < self->nin; i++) {
+        Py_DECREF(mps[i]);
+    }
+
+
+    /*
+     * Use __array_wrap__ on all outputs
+     * if present on one of the input arguments.
+     * If present for multiple inputs:
+     * use __array_wrap__ of input object with largest
+     * __array_priority__ (default = 0.0)
+     *
+     * Exception:  we should not wrap outputs for items already
+     * passed in as output-arguments.  These items should either
+     * be left unwrapped or wrapped by calling their own __array_wrap__
+     * routine.
+     *
+     * For each output argument, wrap will be either
+     * NULL --- call PyArray_Return() -- default if no output arguments given
+     * None --- array-object passed in don't call PyArray_Return
+     * method --- the __array_wrap__ method to call.
+     */
+    _find_array_wrap(args, wraparr, self->nin, self->nout);
+
+    /* wrap outputs */
+    for(i = 0; i < self->nout; i++) {
+        int j=self->nin+i;
+        PyObject *wrap;
+
+        /*
+         * check to see if any UPDATEIFCOPY flags are set
+         * which meant that a temporary output was generated
+         */
+        if (mps[j]->flags & UPDATEIFCOPY) {
+            PyObject *old = mps[j]->base;
+            /* we want to hang on to this */
+            Py_INCREF(old);
+            /* should trigger the copyback into old */
+            Py_DECREF(mps[j]);
+            mps[j] = (PyArrayObject *)old;
+        }
+        wrap = wraparr[i];
+        if (wrap != NULL) {
+            if (wrap == Py_None) {
+                Py_DECREF(wrap);
+                retobj[i] = (PyObject *)mps[j];
+                continue;
+            }
+            res = PyObject_CallFunction(wrap, "O(OOi)",
+                                        mps[j], self, args, i);
+            if (res == NULL && \
+                PyErr_ExceptionMatches(PyExc_TypeError)) {
+                PyErr_Clear();
+                res = PyObject_CallFunctionObjArgs(wrap,
+                                                   mps[j],
+                                                   NULL);
+            }
+            Py_DECREF(wrap);
+            if (res == NULL) {
+                goto fail;
+            }
+            else if (res == Py_None) {
+                Py_DECREF(res);
+            }
+            else {
+                Py_DECREF(mps[j]);
+                retobj[i] = res;
+                continue;
+            }
+        }
+        /* default behavior */
+        retobj[i] = PyArray_Return(mps[j]);
+    }
+
+    if (self->nout == 1) {
+        return retobj[0];
+    } else {
+        ret = (PyTupleObject *)PyTuple_New(self->nout);
+        for(i = 0; i < self->nout; i++) {
+            PyTuple_SET_ITEM(ret, i, retobj[i]);
+        }
+        return (PyObject *)ret;
+    }
+fail:
+    for(i = self->nin; i < self->nargs; i++) {
+        Py_XDECREF(mps[i]);
+    }
+    return NULL;
+}
+
+static PyObject *
+ufunc_geterr(PyObject *NPY_UNUSED(dummy), PyObject *args)
+{
+    PyObject *thedict;
+    PyObject *res;
+
+    if (!PyArg_ParseTuple(args, "")) return NULL;
+
+    if (PyUFunc_PYVALS_NAME == NULL) {
+        PyUFunc_PYVALS_NAME = PyString_InternFromString(UFUNC_PYVALS_NAME);
+    }
+    thedict = PyThreadState_GetDict();
+    if (thedict == NULL) {
+        thedict = PyEval_GetBuiltins();
+    }
+    res = PyDict_GetItem(thedict, PyUFunc_PYVALS_NAME);
+    if (res != NULL) {
+        Py_INCREF(res);
+        return res;
+    }
+    /* Construct list of defaults */
+    res = PyList_New(3);
+    if (res == NULL) return NULL;
+    PyList_SET_ITEM(res, 0, PyInt_FromLong(PyArray_BUFSIZE));
+    PyList_SET_ITEM(res, 1, PyInt_FromLong(UFUNC_ERR_DEFAULT));
+    PyList_SET_ITEM(res, 2, Py_None); Py_INCREF(Py_None);
+    return res;
+}
+
+#if USE_USE_DEFAULTS==1
+/*
+  This is a strategy to buy a little speed up and avoid the dictionary
+  look-up in the default case.  It should work in the presence of
+  threads.  If it is deemed too complicated or it doesn't actually work
+  it could be taken out.
+*/
+static int
+ufunc_update_use_defaults(void)
+{
+    PyObject *errobj=NULL;
+    int errmask, bufsize;
+    int res;
+
+    PyUFunc_NUM_NODEFAULTS += 1;
+    res = PyUFunc_GetPyValues("test", &bufsize, &errmask,
+                              &errobj);
+    PyUFunc_NUM_NODEFAULTS -= 1;
+
+    if (res < 0) {Py_XDECREF(errobj); return -1;}
+
+    if ((errmask != UFUNC_ERR_DEFAULT) ||           \
+        (bufsize != PyArray_BUFSIZE) ||             \
+        (PyTuple_GET_ITEM(errobj, 1) != Py_None)) {
+        PyUFunc_NUM_NODEFAULTS += 1;
+    }
+    else if (PyUFunc_NUM_NODEFAULTS > 0) {
+        PyUFunc_NUM_NODEFAULTS -= 1;
+    }
+    Py_XDECREF(errobj);
+    return 0;
+}
+#endif
+
+static PyObject *
+ufunc_seterr(PyObject *NPY_UNUSED(dummy), PyObject *args)
+{
+    PyObject *thedict;
+    int res;
+    PyObject *val;
+    static char *msg = "Error object must be a list of length 3";
+
+    if (!PyArg_ParseTuple(args, "O", &val)) return NULL;
+
+    if (!PyList_CheckExact(val) || PyList_GET_SIZE(val) != 3) {
+        PyErr_SetString(PyExc_ValueError, msg);
+        return NULL;
+    }
+    if (PyUFunc_PYVALS_NAME == NULL) {
+        PyUFunc_PYVALS_NAME = PyString_InternFromString(UFUNC_PYVALS_NAME);
+    }
+    thedict = PyThreadState_GetDict();
+    if (thedict == NULL) {
+        thedict = PyEval_GetBuiltins();
+    }
+    res = PyDict_SetItem(thedict, PyUFunc_PYVALS_NAME, val);
+    if (res < 0) return NULL;
+#if USE_USE_DEFAULTS==1
+    if (ufunc_update_use_defaults() < 0) return NULL;
+#endif
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+
+
+static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om};
+
+static char
+doc_frompyfunc[] = "frompyfunc(func, nin, nout) take an arbitrary python function that takes nin objects as input and returns nout objects and return a universal function (ufunc).  This ufunc always returns PyObject arrays";
+
+static PyObject *
+ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUSED(kwds)) {
+    /* Keywords are ignored for now */
+
+    PyObject *function, *pyname=NULL;
+    int nin, nout, i;
+    PyUFunc_PyFuncData *fdata;
+    PyUFuncObject *self;
+    char *fname, *str;
+    Py_ssize_t fname_len=-1;
+    int offset[2];
+
+    if (!PyArg_ParseTuple(args, "Oii", &function, &nin, &nout)) return NULL;
+
+    if (!PyCallable_Check(function)) {
+        PyErr_SetString(PyExc_TypeError, "function must be callable");
+        return NULL;
+    }
+
+    self = _pya_malloc(sizeof(PyUFuncObject));
+    if (self == NULL) return NULL;
+    PyObject_Init((PyObject *)self, &PyUFunc_Type);
+
+    self->userloops = NULL;
+    self->nin = nin;
+    self->nout = nout;
+    self->nargs = nin+nout;
+    self->identity = PyUFunc_None;
+    self->functions = pyfunc_functions;
+
+    self->ntypes = 1;
+    self->check_return = 0;
+    
+    /* generalized ufunc */
+    self->core_enabled = 0;
+    self->core_num_dim_ix = 0;
+    self->core_num_dims = NULL;
+    self->core_dim_ixs = NULL;
+    self->core_offsets = NULL;
+    self->core_signature = NULL;
+
+    pyname = PyObject_GetAttrString(function, "__name__");
+    if (pyname)
+        (void) PyString_AsStringAndSize(pyname, &fname, &fname_len);
+
+    if (PyErr_Occurred()) {
+        fname = "?";
+        fname_len = 1;
+        PyErr_Clear();
+    }
+    Py_XDECREF(pyname);
+
+
+
+    /* self->ptr holds a pointer for enough memory for
+       self->data[0] (fdata)
+       self->data
+       self->name
+       self->types
+
+       To be safest, all of these need their memory aligned on void * pointers
+       Therefore, we may need to allocate extra space.
+    */
+    offset[0] = sizeof(PyUFunc_PyFuncData);
+    i = (sizeof(PyUFunc_PyFuncData) % sizeof(void *));
+    if (i) offset[0] += (sizeof(void *) - i);
+    offset[1] = self->nargs;
+    i = (self->nargs % sizeof(void *));
+    if (i) offset[1] += (sizeof(void *)-i);
+
+    self->ptr = _pya_malloc(offset[0] + offset[1] + sizeof(void *) + \
+                            (fname_len+14));
+
+    if (self->ptr == NULL) return PyErr_NoMemory();
+    Py_INCREF(function);
+    self->obj = function;
+    fdata = (PyUFunc_PyFuncData *)(self->ptr);
+    fdata->nin = nin;
+    fdata->nout = nout;
+    fdata->callable = function;
+
+    self->data = (void **)(((char *)self->ptr) + offset[0]);
+    self->data[0] = (void *)fdata;
+
+    self->types = (char *)self->data + sizeof(void *);
+    for(i=0; i<self->nargs; i++) self->types[i] = PyArray_OBJECT;
+
+    str = self->types + offset[1];
+    memcpy(str, fname, fname_len);
+    memcpy(str+fname_len, " (vectorized)", 14);
+
+    self->name = str;
+
+    /* Do a better job someday */
+    self->doc = "dynamic ufunc based on a python function";
+
+
+    return (PyObject *)self;
+}
+
+/*UFUNC_API*/
+static int
+PyUFunc_ReplaceLoopBySignature(PyUFuncObject *func,
+                               PyUFuncGenericFunction newfunc,
+                               int *signature,
+                               PyUFuncGenericFunction *oldfunc)
+{
+    int i,j;
+    int res = -1;
+    /* Find the location of the matching signature */
+    for(i=0; i<func->ntypes; i++) {
+        for(j=0; j<func->nargs; j++) {
+            if (signature[j] != func->types[i*func->nargs+j])
+                break;
+        }
+        if (j < func->nargs) continue;
+
+        if (oldfunc != NULL) {
+            *oldfunc = func->functions[i];
+        }
+        func->functions[i] = newfunc;
+        res = 0;
+        break;
+    }
+    return res;
+}
+
+/*UFUNC_API*/
+static PyObject *
+PyUFunc_FromFuncAndData(PyUFuncGenericFunction *func, void **data,
+                        char *types, int ntypes,
+                        int nin, int nout, int identity,
+                        char *name, char *doc, int check_return)
+{
+    return PyUFunc_FromFuncAndDataAndSignature(func, data, types, ntypes,
+        nin, nout, identity, name, doc, check_return, NULL);
+}
+                        
+/*UFUNC_API*/
+static PyObject *
+PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
+                                     char *types, int ntypes,
+                                     int nin, int nout, int identity,
+                                     char *name, char *doc,
+                                     int check_return, const char *signature)
+{
+    PyUFuncObject *self;
+
+    self = _pya_malloc(sizeof(PyUFuncObject));
+    if (self == NULL) return NULL;
+    PyObject_Init((PyObject *)self, &PyUFunc_Type);
+
+    self->nin = nin;
+    self->nout = nout;
+    self->nargs = nin+nout;
+    self->identity = identity;
+
+    self->functions = func;
+    self->data = data;
+    self->types = types;
+    self->ntypes = ntypes;
+    self->check_return = check_return;
+    self->ptr = NULL;
+    self->obj = NULL;
+    self->userloops=NULL;
+
+    if (name == NULL) self->name = "?";
+    else self->name = name;
+
+    if (doc == NULL) self->doc = "NULL";
+    else self->doc = doc;
+    
+    /* generalized ufunc */
+    self->core_enabled = 0;
+    self->core_num_dim_ix = 0;
+    self->core_num_dims = NULL;
+    self->core_dim_ixs = NULL;
+    self->core_offsets = NULL;
+    self->core_signature = NULL;
+    if (signature != NULL) {
+        if (_parse_signature(self, signature) != 0)
+	    return NULL;
+    }
+
+    return (PyObject *)self;
+}
+
+/* This is the first-part of the CObject structure.
+
+   I don't think this will change, but if it should, then
+   this needs to be fixed.  The exposed C-API was insufficient
+   because I needed to replace the pointer and it wouldn't
+   let me with a destructor set (even though it works fine
+   with the destructor).
+*/
+
+typedef struct {
+    PyObject_HEAD
+    void *c_obj;
+} _simple_cobj;
+
+#define _SETCPTR(cobj, val) ((_simple_cobj *)(cobj))->c_obj = (val)
+
+/* return 1 if arg1 > arg2, 0 if arg1 == arg2, and -1 if arg1 < arg2
+ */
+static int
+cmp_arg_types(int *arg1, int *arg2, int n)
+{
+    for(;n>0; n--, arg1++, arg2++) {
+        if (PyArray_EquivTypenums(*arg1, *arg2)) continue;
+        if (PyArray_CanCastSafely(*arg1, *arg2))
+            return -1;
+        return 1;
+    }
+    return 0;
+}
+
+/* This frees the linked-list structure
+   when the CObject is destroyed (removed
+   from the internal dictionary)
+*/
+static void
+_loop1d_list_free(void *ptr)
+{
+    PyUFunc_Loop1d *funcdata;
+    if (ptr == NULL) return;
+    funcdata = (PyUFunc_Loop1d *)ptr;
+    if (funcdata == NULL) return;
+    _pya_free(funcdata->arg_types);
+    _loop1d_list_free(funcdata->next);
+    _pya_free(funcdata);
+}
+
+
+/*UFUNC_API*/
+static int
+PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
+                            int usertype,
+                            PyUFuncGenericFunction function,
+                            int *arg_types,
+                            void *data)
+{
+    PyArray_Descr *descr;
+    PyUFunc_Loop1d *funcdata;
+    PyObject *key, *cobj;
+    int i;
+    int *newtypes=NULL;
+
+    descr=PyArray_DescrFromType(usertype);
+    if ((usertype < PyArray_USERDEF) || (descr==NULL)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "unknown user-defined type");
+        return -1;
+    }
+    Py_DECREF(descr);
+
+    if (ufunc->userloops == NULL) {
+        ufunc->userloops = PyDict_New();
+    }
+    key = PyInt_FromLong((long) usertype);
+    if (key == NULL) return -1;
+    funcdata = _pya_malloc(sizeof(PyUFunc_Loop1d));
+    if (funcdata == NULL) goto fail;
+    newtypes = _pya_malloc(sizeof(int)*ufunc->nargs);
+    if (newtypes == NULL) goto fail;
+    if (arg_types != NULL) {
+        for(i=0; i<ufunc->nargs; i++) {
+            newtypes[i] = arg_types[i];
+        }
+    }
+    else {
+        for(i=0; i<ufunc->nargs; i++) {
+            newtypes[i] = usertype;
+        }
+    }
+
+    funcdata->func = function;
+    funcdata->arg_types = newtypes;
+    funcdata->data = data;
+    funcdata->next = NULL;
+
+    /* Get entry for this user-defined type*/
+    cobj = PyDict_GetItem(ufunc->userloops, key);
+
+    /* If it's not there, then make one and return. */
+    if (cobj == NULL) {
+        cobj = PyCObject_FromVoidPtr((void *)funcdata,
+                                     _loop1d_list_free);
+        if (cobj == NULL) goto fail;
+        PyDict_SetItem(ufunc->userloops, key, cobj);
+        Py_DECREF(cobj);
+        Py_DECREF(key);
+        return 0;
+    }
+    else {
+        PyUFunc_Loop1d *current, *prev=NULL;
+        int cmp=1;
+        /* There is already at least 1 loop. Place this one in
+           lexicographic order.  If the next one signature
+           is exactly like this one, then just replace.
+           Otherwise insert.
+        */
+        current = (PyUFunc_Loop1d *)PyCObject_AsVoidPtr(cobj);
+        while (current != NULL) {
+            cmp = cmp_arg_types(current->arg_types, newtypes,
+                                ufunc->nargs);
+            if (cmp >= 0) break;
+            prev = current;
+            current = current->next;
+        }
+        if (cmp == 0) { /* just replace it with new function */
+            current->func = function;
+            current->data = data;
+            _pya_free(newtypes);
+            _pya_free(funcdata);
+        }
+        else { /* insert it before the current one
+                  by hacking the internals of cobject to
+                  replace the function pointer ---
+                  can't use CObject API because destructor is set.
+               */
+            funcdata->next = current;
+            if (prev == NULL) { /* place this at front */
+                _SETCPTR(cobj, funcdata);
+            }
+            else {
+                prev->next = funcdata;
+            }
+        }
+    }
+    Py_DECREF(key);
+    return 0;
+
+
+ fail:
+    Py_DECREF(key);
+    _pya_free(funcdata);
+    _pya_free(newtypes);
+    if (!PyErr_Occurred()) PyErr_NoMemory();
+    return -1;
+}
+
+#undef _SETCPTR
+
+
+static void
+ufunc_dealloc(PyUFuncObject *self)
+{
+    if (self->core_num_dims) _pya_free(self->core_num_dims);
+    if (self->core_dim_ixs) _pya_free(self->core_dim_ixs);
+    if (self->core_offsets) _pya_free(self->core_offsets);
+    if (self->core_signature) _pya_free(self->core_signature);
+    if (self->ptr) _pya_free(self->ptr);
+    Py_XDECREF(self->userloops);
+    Py_XDECREF(self->obj);
+    _pya_free(self);
+}
+
+static PyObject *
+ufunc_repr(PyUFuncObject *self)
+{
+    char buf[100];
+
+    sprintf(buf, "<ufunc '%.50s'>", self->name);
+
+    return PyString_FromString(buf);
+}
+
+
+/* -------------------------------------------------------- */
+
+/* op.outer(a,b) is equivalent to op(a[:,NewAxis,NewAxis,etc.],b)
+   where a has b.ndim NewAxis terms appended.
+
+   The result has dimensions a.ndim + b.ndim
+*/
+
+static PyObject *
+ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+{
+    int i;
+    PyObject *ret;
+    PyArrayObject *ap1=NULL, *ap2=NULL, *ap_new=NULL;
+    PyObject *new_args, *tmp;
+    PyObject *shape1, *shape2, *newshape;
+
+    if (self->core_enabled) {
+        PyErr_Format(PyExc_TypeError,
+                     "method outer is not allowed in ufunc with non-trivial"\
+                     " signature");
+        return NULL;
+    }
+    
+    if(self->nin != 2) {
+        PyErr_SetString(PyExc_ValueError,
+                        "outer product only supported "\
+                        "for binary functions");
+        return NULL;
+    }
+
+    if (PySequence_Length(args) != 2) {
+        PyErr_SetString(PyExc_TypeError,
+                        "exactly two arguments expected");
+        return NULL;
+    }
+
+    tmp = PySequence_GetItem(args, 0);
+    if (tmp == NULL) return NULL;
+    ap1 = (PyArrayObject *)                                 \
+        PyArray_FromObject(tmp, PyArray_NOTYPE, 0, 0);
+    Py_DECREF(tmp);
+    if (ap1 == NULL) return NULL;
+
+    tmp = PySequence_GetItem(args, 1);
+    if (tmp == NULL) return NULL;
+    ap2 = (PyArrayObject *)PyArray_FromObject(tmp, PyArray_NOTYPE, 0, 0);
+    Py_DECREF(tmp);
+    if (ap2 == NULL) {Py_DECREF(ap1); return NULL;}
+
+    /* Construct new shape tuple */
+    shape1 = PyTuple_New(ap1->nd);
+    if (shape1 == NULL) goto fail;
+    for(i=0; i<ap1->nd; i++)
+        PyTuple_SET_ITEM(shape1, i,
+                         PyLong_FromLongLong((longlong)ap1->    \
+                                             dimensions[i]));
+
+    shape2 = PyTuple_New(ap2->nd);
+    for(i=0; i<ap2->nd; i++)
+        PyTuple_SET_ITEM(shape2, i, PyInt_FromLong((long) 1));
+    if (shape2 == NULL) {Py_DECREF(shape1); goto fail;}
+    newshape = PyNumber_Add(shape1, shape2);
+    Py_DECREF(shape1);
+    Py_DECREF(shape2);
+    if (newshape == NULL) goto fail;
+
+    ap_new = (PyArrayObject *)PyArray_Reshape(ap1, newshape);
+    Py_DECREF(newshape);
+    if (ap_new == NULL) goto fail;
+
+    new_args = Py_BuildValue("(OO)", ap_new, ap2);
+    Py_DECREF(ap1);
+    Py_DECREF(ap2);
+    Py_DECREF(ap_new);
+    ret = ufunc_generic_call(self, new_args, kwds);
+    Py_DECREF(new_args);
+    return ret;
+
+ fail:
+    Py_XDECREF(ap1);
+    Py_XDECREF(ap2);
+    Py_XDECREF(ap_new);
+    return NULL;
+}
+
+
+static PyObject *
+ufunc_reduce(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+{
+
+    return PyUFunc_GenericReduction(self, args, kwds, UFUNC_REDUCE);
+}
+
+static PyObject *
+ufunc_accumulate(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+{
+
+    return PyUFunc_GenericReduction(self, args, kwds, UFUNC_ACCUMULATE);
+}
+
+static PyObject *
+ufunc_reduceat(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+{
+    return PyUFunc_GenericReduction(self, args, kwds, UFUNC_REDUCEAT);
+}
+
+
+static struct PyMethodDef ufunc_methods[] = {
+    {"reduce",  (PyCFunction)ufunc_reduce, METH_VARARGS | METH_KEYWORDS, NULL },
+    {"accumulate",  (PyCFunction)ufunc_accumulate,
+     METH_VARARGS | METH_KEYWORDS, NULL },
+    {"reduceat",  (PyCFunction)ufunc_reduceat,
+     METH_VARARGS | METH_KEYWORDS, NULL },
+    {"outer", (PyCFunction)ufunc_outer, METH_VARARGS | METH_KEYWORDS, NULL},
+    {NULL, NULL, 0, NULL}           /* sentinel */
+};
+
+
+
+/* construct the string
+   y1,y2,...,yn
+*/
+static PyObject *
+_makeargs(int num, char *ltr, int null_if_none)
+{
+    PyObject *str;
+    int i;
+    switch (num) {
+    case 0:
+        if (null_if_none) return NULL;
+        return PyString_FromString("");
+    case 1:
+        return PyString_FromString(ltr);
+    }
+    str = PyString_FromFormat("%s1, %s2", ltr, ltr);
+    for(i = 3; i <= num; ++i) {
+        PyString_ConcatAndDel(&str, PyString_FromFormat(", %s%d", ltr, i));
+    }
+    return str;
+}
+
+static char
+_typecharfromnum(int num) {
+    PyArray_Descr *descr;
+    char ret;
+
+    descr = PyArray_DescrFromType(num);
+    ret = descr->type;
+    Py_DECREF(descr);
+    return ret;
+}
+
+static PyObject *
+ufunc_get_doc(PyUFuncObject *self)
+{
+    /* Put docstring first or FindMethod finds it...*/
+    /* could so some introspection on name and nin + nout */
+    /* to automate the first part of it */
+    /* the doc string shouldn't need the calling convention */
+    /* construct
+       name(x1, x2, ...,[ out1, out2, ...])
+       
+       __doc__
+    */
+    PyObject *outargs, *inargs, *doc;
+    outargs = _makeargs(self->nout, "out", 1);
+    inargs = _makeargs(self->nin, "x", 0);
+    if (outargs == NULL) {
+        doc = PyString_FromFormat("%s(%s)\n\n%s",
+                                  self->name,
+                                  PyString_AS_STRING(inargs),
+                                  self->doc);
+    } else {
+        doc = PyString_FromFormat("%s(%s[, %s])\n\n%s",
+                                  self->name,
+                                  PyString_AS_STRING(inargs),
+                                  PyString_AS_STRING(outargs),
+                                  self->doc);
+        Py_DECREF(outargs);
+    }
+    Py_DECREF(inargs);
+    return doc;
+}
+
+static PyObject *
+ufunc_get_nin(PyUFuncObject *self)
+{
+    return PyInt_FromLong(self->nin);
+}
+
+static PyObject *
+ufunc_get_nout(PyUFuncObject *self)
+{
+    return PyInt_FromLong(self->nout);
+}
+
+static PyObject *
+ufunc_get_nargs(PyUFuncObject *self)
+{
+    return PyInt_FromLong(self->nargs);
+}
+
+static PyObject *
+ufunc_get_ntypes(PyUFuncObject *self)
+{
+    return PyInt_FromLong(self->ntypes);
+}
+
+static PyObject *
+ufunc_get_types(PyUFuncObject *self)
+{
+    /* return a list with types grouped
+       input->output */
+    PyObject *list;
+    PyObject *str;
+    int k, j, n, nt=self->ntypes;
+    int ni = self->nin;
+    int no = self->nout;
+    char *t;
+    list = PyList_New(nt);
+    if (list == NULL) return NULL;
+    t = _pya_malloc(no+ni+2);
+    n = 0;
+    for(k=0; k<nt; k++) {
+        for(j=0; j<ni; j++) {
+            t[j] = _typecharfromnum(self->types[n]);
+            n++;
+        }
+        t[ni] = '-';
+        t[ni+1] = '>';
+        for(j=0; j<no; j++) {
+            t[ni+2+j] =                                     \
+                _typecharfromnum(self->types[n]);
+            n++;
+        }
+        str = PyString_FromStringAndSize(t, no+ni+2);
+        PyList_SET_ITEM(list, k, str);
+    }
+    _pya_free(t);
+    return list;
+}
+
+static PyObject *
+ufunc_get_name(PyUFuncObject *self)
+{
+    return PyString_FromString(self->name);
+}
+
+static PyObject *
+ufunc_get_identity(PyUFuncObject *self)
+{
+    switch(self->identity) {
+    case PyUFunc_One:
+        return PyInt_FromLong(1);
+    case PyUFunc_Zero:
+        return PyInt_FromLong(0);
+    }
+    return Py_None;
+}
+
+static PyObject *
+ufunc_get_signature(PyUFuncObject *self)
+{
+    if (!self->core_enabled)
+        Py_RETURN_NONE;
+    return PyString_FromString(self->core_signature);
+}
+
+#undef _typecharfromnum
+
+/* Docstring is now set from python */
+/* static char *Ufunctype__doc__ = NULL; */
+
+static PyGetSetDef ufunc_getset[] = {
+    {"__doc__",  (getter)ufunc_get_doc,      NULL, "documentation string", NULL},
+    {"nin",      (getter)ufunc_get_nin,      NULL, "number of inputs", NULL},
+    {"nout",     (getter)ufunc_get_nout,     NULL, "number of outputs", NULL},
+    {"nargs",    (getter)ufunc_get_nargs,    NULL, "number of arguments", NULL},
+    {"ntypes",   (getter)ufunc_get_ntypes,   NULL, "number of types", NULL},
+    {"types",    (getter)ufunc_get_types,    NULL, "return a list with types grouped input->output", NULL},
+    {"__name__", (getter)ufunc_get_name,     NULL, "function name", NULL},
+    {"identity", (getter)ufunc_get_identity, NULL, "identity value", NULL},
+    {"signature",(getter)ufunc_get_signature,NULL, "signature"},
+    {NULL, NULL, NULL, NULL, NULL},  /* Sentinel */
+};
+
+static PyTypeObject PyUFunc_Type = {
+    PyObject_HEAD_INIT(0)
+    0,                              /*ob_size*/
+    "numpy.ufunc",                  /*tp_name*/
+    sizeof(PyUFuncObject),          /*tp_basicsize*/
+    0,                              /*tp_itemsize*/
+    /* methods */
+    (destructor)ufunc_dealloc,      /*tp_dealloc*/
+    (printfunc)0,                   /*tp_print*/
+    (getattrfunc)0,                 /*tp_getattr*/
+    (setattrfunc)0,                 /*tp_setattr*/
+    (cmpfunc)0,                     /*tp_compare*/
+    (reprfunc)ufunc_repr,           /*tp_repr*/
+    0,                             /*tp_as_number*/
+    0,                             /*tp_as_sequence*/
+    0,                             /*tp_as_mapping*/
+    (hashfunc)0,                   /*tp_hash*/
+    (ternaryfunc)ufunc_generic_call, /*tp_call*/
+    (reprfunc)ufunc_repr,          /*tp_str*/
+    0,                             /* tp_getattro */
+    0,                             /* tp_setattro */
+    0,                             /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,            /* tp_flags */
+    NULL,                          /* tp_doc */ /* was Ufunctype__doc__ */
+    0,                             /* tp_traverse */
+    0,                             /* tp_clear */
+    0,                             /* tp_richcompare */
+    0,                             /* tp_weaklistoffset */
+    0,                             /* tp_iter */
+    0,                             /* tp_iternext */
+    ufunc_methods,                 /* tp_methods */
+    0,                             /* tp_members */
+    ufunc_getset,                  /* tp_getset */
+    0,   /* tp_base */
+    0,   /* tp_dict */
+    0,   /* tp_descr_get */
+    0,   /* tp_descr_set */
+    0,   /* tp_dictoffset */
+    0,   /* tp_init */
+    0,   /* tp_alloc */
+    0,   /* tp_new */
+    0,   /* tp_free */
+    0,   /* tp_is_gc */
+    0,   /* tp_bases */
+    0,   /* tp_mro */
+    0,   /* tp_cache */
+    0,   /* tp_subclasses */
+    0,   /* tp_weaklist */
+    0,   /* tp_del */
+
+#ifdef COUNT_ALLOCS
+	/* these must be last and never explicitly initialized */
+    0, /* tp_allocs */
+    0, /* tp_frees */
+    0, /* tp_maxalloc */
+    0, /* tp_prev */
+    0, /* *tp_next */
+#endif
+};
+
+/* End of code for ufunc objects */
+/* -------------------------------------------------------- */

Modified: trunk/numpy/core/src/umathmodule.c.src
===================================================================
--- trunk/numpy/core/src/umathmodule.c.src	2008-11-21 21:50:17 UTC (rev 6088)
+++ trunk/numpy/core/src/umathmodule.c.src	2008-11-22 01:28:52 UTC (rev 6089)
@@ -21,7 +21,7 @@
 #define M_PI 3.14159265358979323846264338328
 #endif
 
-#include "math_c99.inc"
+#include "umath_funcs_c99.inc"
 
 /*
  *****************************************************************************
@@ -182,9 +182,9 @@
 
 /*
  * Don't pass structures between functions (only pointers) because how
- * structures are passed is compiler dependent and could cause
- * segfaults if ufuncobject.c is compiled with a different compiler
- * than an extension that makes use of the UFUNC API
+ * structures are passed is compiler dependent and could cause segfaults if
+ * umath_ufunc_object.inc is compiled with a different compiler than an
+ * extension that makes use of the UFUNC API
  */
 
 /**begin repeat
@@ -1939,7 +1939,7 @@
  */
 
 #include "__umath_generated.c"
-#include "ufuncobject.c"
+#include "umath_ufunc_object.inc"
 #include "__ufunc_api.c"
 
 static PyUFuncGenericFunction frexp_functions[] = {




More information about the Numpy-svn mailing list