[Scipy-svn] r4597 - trunk/scipy/cluster/tests

scipy-svn at scipy.org scipy-svn at scipy.org
Mon Aug 4 14:58:39 EDT 2008


Author: damian.eads
Date: 2008-08-04 13:58:20 -0500 (Mon, 04 Aug 2008)
New Revision: 4597

Added:
   trunk/scipy/cluster/tests/test_distance.py
Log:
forgot to commit this file.

Added: trunk/scipy/cluster/tests/test_distance.py
===================================================================
--- trunk/scipy/cluster/tests/test_distance.py	2008-08-04 14:37:45 UTC (rev 4596)
+++ trunk/scipy/cluster/tests/test_distance.py	2008-08-04 18:58:20 UTC (rev 4597)
@@ -0,0 +1,949 @@
+#! /usr/bin/env python
+#
+# Author: Damian Eads
+# Date: April 17, 2008
+#
+# Copyright (C) 2008 Damian Eads
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above
+#    copyright notice, this list of conditions and the following
+#    disclaimer in the documentation and/or other materials provided
+#    with the distribution.
+#
+# 3. The name of the author may not be used to endorse or promote
+#    products derived from this software without specific prior
+#    written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import sys
+import os.path
+from scipy.testing import *
+from scipy.cluster.hierarchy import squareform, linkage, from_mlab_linkage, numobs_dm, numobs_y, numobs_linkage
+from scipy.cluster.distance import pdist, matching, jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule
+
+import numpy
+#import math
+
+#from scipy.cluster.hierarchy import pdist, euclidean
+
+_filenames = ["iris.txt",
+              "pdist-hamming-ml.txt",
+              "pdist-boolean-inp.txt",
+              "pdist-jaccard-ml.txt",
+              "pdist-cityblock-ml-iris.txt",
+              "pdist-minkowski-3.2-ml-iris.txt",
+              "pdist-cityblock-ml.txt",
+              "pdist-correlation-ml-iris.txt",
+              "pdist-minkowski-5.8-ml-iris.txt",
+              "pdist-correlation-ml.txt",
+              "pdist-minkowski-3.2-ml.txt",
+              "pdist-cosine-ml-iris.txt",
+              "pdist-seuclidean-ml-iris.txt",
+              "pdist-cosine-ml.txt",
+              "pdist-seuclidean-ml.txt",
+              "pdist-double-inp.txt",
+              "pdist-spearman-ml.txt",
+              "pdist-euclidean-ml.txt",
+              "pdist-euclidean-ml-iris.txt",
+              "pdist-chebychev-ml.txt",
+              "pdist-chebychev-ml-iris.txt",
+              "random-bool-data.txt"]
+
+_tdist = numpy.array([[0,    662,  877,  255,  412,  996],
+                      [662,  0,    295,  468,  268,  400],
+                      [877,  295,  0,    754,  564,  138],
+                      [255,  468,  754,  0,    219,  869],
+                      [412,  268,  564,  219,  0,    669],
+                      [996,  400,  138,  869,  669,  0  ]], dtype='double')
+
+_ytdist = squareform(_tdist)
+
+# A hashmap of expected output arrays for the tests. These arrays
+# come from a list of text files, which are read prior to testing.
+
+eo = {}
+
+def load_testing_files():
+    for fn in _filenames:
+        name = fn.replace(".txt", "").replace("-ml", "")
+        fqfn = os.path.join(os.path.dirname(__file__), fn)
+        eo[name] = numpy.loadtxt(open(fqfn))
+        #print "%s: %s   %s" % (name, str(eo[name].shape), str(eo[name].dtype))
+    eo['pdist-boolean-inp'] = numpy.bool_(eo['pdist-boolean-inp'])
+
+load_testing_files()
+
+#print eo.keys()
+
+
+#print numpy.abs(Y_test2 - Y_right).max()
+#print numpy.abs(Y_test1 - Y_right).max()
+
+class TestPdist(TestCase):
+    """
+    Test suite for the pdist function.
+    """
+
+    ################### pdist: euclidean
+    def test_pdist_euclidean_random(self):
+        "Tests pdist(X, 'euclidean') on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-euclidean']
+
+        Y_test1 = pdist(X, 'euclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_euclidean_random_float32(self):
+        "Tests pdist(X, 'euclidean') on random data (float32)."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-euclidean']
+
+        Y_test1 = pdist(X, 'euclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_euclidean_random_nonC(self):
+        "Tests pdist(X, 'test_euclidean') [the non-C implementation] on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-euclidean']
+        Y_test2 = pdist(X, 'test_euclidean')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_euclidean_iris_double(self):
+        "Tests pdist(X, 'euclidean') on the Iris data set."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-euclidean-iris']
+
+        Y_test1 = pdist(X, 'euclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_euclidean_iris_float32(self):
+        "Tests pdist(X, 'euclidean') on the Iris data set. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-euclidean-iris']
+
+        Y_test1 = pdist(X, 'euclidean')
+        print numpy.abs(Y_right - Y_test1).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_euclidean_iris_nonC(self):
+        "Tests pdist(X, 'test_euclidean') [the non-C implementation] on the Iris data set."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-euclidean-iris']
+        Y_test2 = pdist(X, 'test_euclidean')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: seuclidean
+    def test_pdist_seuclidean_random(self):
+        "Tests pdist(X, 'seuclidean') on random data."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-seuclidean']
+
+        Y_test1 = pdist(X, 'seuclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_seuclidean_random_float32(self):
+        "Tests pdist(X, 'seuclidean') on random data (float32)."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-seuclidean']
+
+        Y_test1 = pdist(X, 'seuclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_seuclidean_random_nonC(self):
+        "Tests pdist(X, 'test_sqeuclidean') [the non-C implementation] on random data."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-seuclidean']
+        Y_test2 = pdist(X, 'test_sqeuclidean')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_seuclidean_iris(self):
+        "Tests pdist(X, 'seuclidean') on the Iris data set."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-seuclidean-iris']
+
+        Y_test1 = pdist(X, 'seuclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_seuclidean_iris_float32(self):
+        "Tests pdist(X, 'seuclidean') on the Iris data set (float32)."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-seuclidean-iris']
+
+        Y_test1 = pdist(X, 'seuclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_seuclidean_iris_nonC(self):
+        "Tests pdist(X, 'test_seuclidean') [the non-C implementation] on the Iris data set."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-seuclidean-iris']
+        Y_test2 = pdist(X, 'test_sqeuclidean')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: cosine
+    def test_pdist_cosine_random(self):
+        "Tests pdist(X, 'cosine') on random data."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-cosine']
+        Y_test1 = pdist(X, 'cosine')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_cosine_random_float32(self):
+        "Tests pdist(X, 'cosine') on random data. (float32)"
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-cosine']
+
+        Y_test1 = pdist(X, 'cosine')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_cosine_random_nonC(self):
+        "Tests pdist(X, 'test_cosine') [the non-C implementation] on random data."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-cosine']
+        Y_test2 = pdist(X, 'test_cosine')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_cosine_iris(self):
+        "Tests pdist(X, 'cosine') on the Iris data set."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-cosine-iris']
+
+        Y_test1 = pdist(X, 'cosine')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+        #print "cosine-iris", numpy.abs(Y_test1 - Y_right).max()
+
+    def test_pdist_cosine_iris_float32(self):
+        "Tests pdist(X, 'cosine') on the Iris data set."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-cosine-iris']
+
+        Y_test1 = pdist(X, 'cosine')
+        print numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+        #print "cosine-iris", numpy.abs(Y_test1 - Y_right).max()
+
+    def test_pdist_cosine_iris_nonC(self):
+        "Tests pdist(X, 'test_cosine') [the non-C implementation] on the Iris data set."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-cosine-iris']
+        Y_test2 = pdist(X, 'test_cosine')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: cityblock
+    def test_pdist_cityblock_random(self):
+        "Tests pdist(X, 'cityblock') on random data."
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-cityblock']
+        Y_test1 = pdist(X, 'cityblock')
+        #print "cityblock", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_cityblock_random_float32(self):
+        "Tests pdist(X, 'cityblock') on random data. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-cityblock']
+        Y_test1 = pdist(X, 'cityblock')
+        #print "cityblock", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_cityblock_random_nonC(self):
+        "Tests pdist(X, 'test_cityblock') [the non-C implementation] on random data."
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-cityblock']
+        Y_test2 = pdist(X, 'test_cityblock')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_cityblock_iris(self):
+        "Tests pdist(X, 'cityblock') on the Iris data set."
+        eps = 1e-14
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-cityblock-iris']
+
+        Y_test1 = pdist(X, 'cityblock')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+        #print "cityblock-iris", numpy.abs(Y_test1 - Y_right).max()
+
+    def test_pdist_cityblock_iris_float32(self):
+        "Tests pdist(X, 'cityblock') on the Iris data set. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-cityblock-iris']
+
+        Y_test1 = pdist(X, 'cityblock')
+        print "cityblock-iris-float32", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_cityblock_iris_nonC(self):
+        "Tests pdist(X, 'test_cityblock') [the non-C implementation] on the Iris data set."
+        eps = 1e-14
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-cityblock-iris']
+        Y_test2 = pdist(X, 'test_cityblock')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: correlation
+    def test_pdist_correlation_random(self):
+        "Tests pdist(X, 'correlation') on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-correlation']
+
+        Y_test1 = pdist(X, 'correlation')
+        #print "correlation", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_correlation_random_float32(self):
+        "Tests pdist(X, 'correlation') on random data. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-correlation']
+
+        Y_test1 = pdist(X, 'correlation')
+        #print "correlation", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_correlation_random_nonC(self):
+        "Tests pdist(X, 'test_correlation') [the non-C implementation] on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-correlation']
+        Y_test2 = pdist(X, 'test_correlation')
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_correlation_iris(self):
+        "Tests pdist(X, 'correlation') on the Iris data set."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-correlation-iris']
+
+        Y_test1 = pdist(X, 'correlation')
+        #print "correlation-iris", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_correlation_iris_float32(self):
+        "Tests pdist(X, 'correlation') on the Iris data set. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = numpy.float32(eo['pdist-correlation-iris'])
+
+        Y_test1 = pdist(X, 'correlation')
+        print "correlation-iris", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_correlation_iris_nonC(self):
+        "Tests pdist(X, 'test_correlation') [the non-C implementation] on the Iris data set."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-correlation-iris']
+        Y_test2 = pdist(X, 'test_correlation')
+        #print "test-correlation-iris", numpy.abs(Y_test2 - Y_right).max()
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################# minkowski
+
+    def test_pdist_minkowski_random(self):
+        "Tests pdist(X, 'minkowski') on random data."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-minkowski-3.2']
+
+        Y_test1 = pdist(X, 'minkowski', 3.2)
+        #print "minkowski", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_minkowski_random_float32(self):
+        "Tests pdist(X, 'minkowski') on random data. (float32)"
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-minkowski-3.2']
+
+        Y_test1 = pdist(X, 'minkowski', 3.2)
+        #print "minkowski", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_minkowski_random_nonC(self):
+        "Tests pdist(X, 'test_minkowski') [the non-C implementation] on random data."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-minkowski-3.2']
+        Y_test2 = pdist(X, 'test_minkowski', 3.2)
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_minkowski_iris(self):
+        "Tests pdist(X, 'minkowski') on iris data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-minkowski-3.2-iris']
+        Y_test1 = pdist(X, 'minkowski', 3.2)
+        #print "minkowski-iris-3.2", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_minkowski_iris_float32(self):
+        "Tests pdist(X, 'minkowski') on iris data. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-minkowski-3.2-iris']
+        Y_test1 = pdist(X, 'minkowski', 3.2)
+        #print "minkowski-iris-3.2", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_minkowski_iris_nonC(self):
+        "Tests pdist(X, 'test_minkowski') [the non-C implementation] on iris data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-minkowski-3.2-iris']
+        Y_test2 = pdist(X, 'test_minkowski', 3.2)
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_minkowski_iris(self):
+        "Tests pdist(X, 'minkowski') on iris data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-minkowski-5.8-iris']
+        Y_test1 = pdist(X, 'minkowski', 5.8)
+        #print "minkowski-iris-5.8", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_minkowski_iris_float32(self):
+        "Tests pdist(X, 'minkowski') on iris data. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-minkowski-5.8-iris']
+
+        Y_test1 = pdist(X, 'minkowski', 5.8)
+        print "minkowski-iris-5.8", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_minkowski_iris_nonC(self):
+        "Tests pdist(X, 'test_minkowski') [the non-C implementation] on iris data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-minkowski-5.8-iris']
+        Y_test2 = pdist(X, 'test_minkowski', 5.8)
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: hamming
+    def test_pdist_hamming_random(self):
+        "Tests pdist(X, 'hamming') on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-boolean-inp']
+        Y_right = eo['pdist-hamming']
+
+        Y_test1 = pdist(X, 'hamming')
+        #print "hamming", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_hamming_random_float32(self):
+        "Tests pdist(X, 'hamming') on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-hamming']
+
+        Y_test1 = pdist(X, 'hamming')
+        #print "hamming", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_hamming_random_nonC(self):
+        "Tests pdist(X, 'test_hamming') [the non-C implementation] on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-boolean-inp']
+        Y_right = eo['pdist-hamming']
+        Y_test2 = pdist(X, 'test_hamming')
+        #print "test-hamming", numpy.abs(Y_test2 - Y_right).max()
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: hamming (double)
+    def test_pdist_dhamming_random(self):
+        "Tests pdist(X, 'hamming') on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float64(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-hamming']
+        Y_test1 = pdist(X, 'hamming')
+        #print "hamming", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_dhamming_random_float32(self):
+        "Tests pdist(X, 'hamming') on random data. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-hamming']
+        Y_test1 = pdist(X, 'hamming')
+        #print "hamming", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_dhamming_random_nonC(self):
+        "Tests pdist(X, 'test_hamming') [the non-C implementation] on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float64(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-hamming']
+        Y_test2 = pdist(X, 'test_hamming')
+        #print "test-hamming", numpy.abs(Y_test2 - Y_right).max()
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: jaccard
+    def test_pdist_jaccard_random(self):
+        "Tests pdist(X, 'jaccard') on random data."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-boolean-inp']
+        Y_right = eo['pdist-jaccard']
+
+        Y_test1 = pdist(X, 'jaccard')
+        #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_jaccard_random_float32(self):
+        "Tests pdist(X, 'jaccard') on random data. (float32)"
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-jaccard']
+
+        Y_test1 = pdist(X, 'jaccard')
+        #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_jaccard_random_nonC(self):
+        "Tests pdist(X, 'test_jaccard') [the non-C implementation] on random data."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-boolean-inp']
+        Y_right = eo['pdist-jaccard']
+        Y_test2 = pdist(X, 'test_jaccard')
+        #print "test-jaccard", numpy.abs(Y_test2 - Y_right).max()
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: jaccard (double)
+    def test_pdist_djaccard_random(self):
+        "Tests pdist(X, 'jaccard') on random data."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = numpy.float64(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-jaccard']
+
+        Y_test1 = pdist(X, 'jaccard')
+        #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_djaccard_random_float32(self):
+        "Tests pdist(X, 'jaccard') on random data. (float32)"
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-jaccard']
+
+        Y_test1 = pdist(X, 'jaccard')
+        #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_djaccard_random_nonC(self):
+        "Tests pdist(X, 'test_jaccard') [the non-C implementation] on random data."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = numpy.float64(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-jaccard']
+        Y_test2 = pdist(X, 'test_jaccard')
+        #print "test-jaccard", numpy.abs(Y_test2 - Y_right).max()
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    ################### pdist: chebychev
+    def test_pdist_chebychev_random(self):
+        "Tests pdist(X, 'chebychev') on random data."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-chebychev']
+
+        Y_test1 = pdist(X, 'chebychev')
+        #print "chebychev", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_chebychev_random_float32(self):
+        "Tests pdist(X, 'chebychev') on random data. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-chebychev']
+
+        Y_test1 = pdist(X, 'chebychev')
+        print "chebychev", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_chebychev_random_nonC(self):
+        "Tests pdist(X, 'test_chebychev') [the non-C implementation] on random data."
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = eo['pdist-double-inp']
+        Y_right = eo['pdist-chebychev']
+        Y_test2 = pdist(X, 'test_chebychev')
+        #print "test-chebychev", numpy.abs(Y_test2 - Y_right).max()
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_chebychev_iris(self):
+        "Tests pdist(X, 'chebychev') on the Iris data set."
+        eps = 1e-15
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-chebychev-iris']
+        Y_test1 = pdist(X, 'chebychev')
+        #print "chebychev-iris", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_chebychev_iris_float32(self):
+        "Tests pdist(X, 'chebychev') on the Iris data set. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-chebychev-iris']
+        Y_test1 = pdist(X, 'chebychev')
+        print "chebychev-iris", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
+    def test_pdist_chebychev_iris_nonC(self):
+        "Tests pdist(X, 'test_chebychev') [the non-C implementation] on the Iris data set."
+        eps = 1e-15
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-chebychev-iris']
+        Y_test2 = pdist(X, 'test_chebychev')
+        #print "test-chebychev-iris", numpy.abs(Y_test2 - Y_right).max()
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_pdist_matching_mtica1(self):
+        "Tests matching(*,*) with mtica example #1 (nums)."
+        m = matching(numpy.array([1, 0, 1, 1, 0]),
+                     numpy.array([1, 1, 0, 1, 1]))
+        m2 = matching(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
+                      numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
+        self.failUnless(numpy.abs(m - 0.6) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - 0.6) <= 1e-10)
+
+    def test_pdist_matching_mtica2(self):
+        "Tests matching(*,*) with mtica example #2."
+        m = matching(numpy.array([1, 0, 1]),
+                     numpy.array([1, 1, 0]))
+        m2 = matching(numpy.array([1, 0, 1], dtype=numpy.bool),
+                      numpy.array([1, 1, 0], dtype=numpy.bool))
+        self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
+
+    def test_pdist_matching_match(self):
+        "Tests pdist(X, 'matching') to see if the two implementations match on random boolean input data."
+        D = eo['random-bool-data']
+        B = numpy.bool_(D)
+        print B.shape, B.dtype
+        eps = 1e-10
+        y1 = pdist(B, "matching")
+        y2 = pdist(B, "test_matching")
+        y3 = pdist(D, "test_matching")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y1-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_jaccard_mtica1(self):
+        "Tests jaccard(*,*) with mtica example #1."
+        m = jaccard(numpy.array([1, 0, 1, 1, 0]),
+                    numpy.array([1, 1, 0, 1, 1]))
+        m2 = jaccard(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
+                     numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
+        self.failUnless(numpy.abs(m - 0.6) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - 0.6) <= 1e-10)
+
+    def test_pdist_jaccard_mtica2(self):
+        "Tests jaccard(*,*) with mtica example #2."
+        m = jaccard(numpy.array([1, 0, 1]),
+                    numpy.array([1, 1, 0]))
+        m2 = jaccard(numpy.array([1, 0, 1], dtype=numpy.bool),
+                     numpy.array([1, 1, 0], dtype=numpy.bool))
+        self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
+
+    def test_pdist_jaccard_match(self):
+        "Tests pdist(X, 'jaccard') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "jaccard")
+        y2 = pdist(D, "test_jaccard")
+        y3 = pdist(numpy.bool_(D), "test_jaccard")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_yule_mtica1(self):
+        "Tests yule(*,*) with mtica example #1."
+        m = yule(numpy.array([1, 0, 1, 1, 0]),
+                 numpy.array([1, 1, 0, 1, 1]))
+        m2 = yule(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
+                  numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - 2.0) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - 2.0) <= 1e-10)
+
+    def test_pdist_yule_mtica2(self):
+        "Tests yule(*,*) with mtica example #2."
+        m = yule(numpy.array([1, 0, 1]),
+                 numpy.array([1, 1, 0]))
+        m2 = yule(numpy.array([1, 0, 1], dtype=numpy.bool),
+                  numpy.array([1, 1, 0], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - 2.0) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - 2.0) <= 1e-10)
+
+    def test_pdist_yule_match(self):
+        "Tests pdist(X, 'yule') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "yule")
+        y2 = pdist(D, "test_yule")
+        y3 = pdist(numpy.bool_(D), "test_yule")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_dice_mtica1(self):
+        "Tests dice(*,*) with mtica example #1."
+        m = dice(numpy.array([1, 0, 1, 1, 0]),
+                 numpy.array([1, 1, 0, 1, 1]))
+        m2 = dice(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
+                  numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - (3.0/7.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (3.0/7.0)) <= 1e-10)
+
+    def test_pdist_dice_mtica2(self):
+        "Tests dice(*,*) with mtica example #2."
+        m = dice(numpy.array([1, 0, 1]),
+                 numpy.array([1, 1, 0]))
+        m2 = dice(numpy.array([1, 0, 1], dtype=numpy.bool),
+                  numpy.array([1, 1, 0], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - 0.5) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - 0.5) <= 1e-10)
+
+    def test_pdist_dice_match(self):
+        "Tests pdist(X, 'dice') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "dice")
+        y2 = pdist(D, "test_dice")
+        y3 = pdist(D, "test_dice")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_sokalsneath_mtica1(self):
+        "Tests sokalsneath(*,*) with mtica example #1."
+        m = sokalsneath(numpy.array([1, 0, 1, 1, 0]),
+                        numpy.array([1, 1, 0, 1, 1]))
+        m2 = sokalsneath(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
+                         numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - (3.0/4.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (3.0/4.0)) <= 1e-10)
+
+    def test_pdist_sokalsneath_mtica2(self):
+        "Tests sokalsneath(*,*) with mtica example #2."
+        m = sokalsneath(numpy.array([1, 0, 1]),
+                        numpy.array([1, 1, 0]))
+        m2 = sokalsneath(numpy.array([1, 0, 1], dtype=numpy.bool),
+                         numpy.array([1, 1, 0], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - (4.0/5.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (4.0/5.0)) <= 1e-10)
+
+    def test_pdist_sokalsneath_match(self):
+        "Tests pdist(X, 'sokalsneath') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "sokalsneath")
+        y2 = pdist(D, "test_sokalsneath")
+        y3 = pdist(numpy.bool_(D), "test_sokalsneath")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_rogerstanimoto_mtica1(self):
+        "Tests rogerstanimoto(*,*) with mtica example #1."
+        m = rogerstanimoto(numpy.array([1, 0, 1, 1, 0]),
+                           numpy.array([1, 1, 0, 1, 1]))
+        m2 = rogerstanimoto(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
+                            numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - (3.0/4.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (3.0/4.0)) <= 1e-10)
+
+    def test_pdist_rogerstanimoto_mtica2(self):
+        "Tests rogerstanimoto(*,*) with mtica example #2."
+        m = rogerstanimoto(numpy.array([1, 0, 1]),
+                           numpy.array([1, 1, 0]))
+        m2 = rogerstanimoto(numpy.array([1, 0, 1], dtype=numpy.bool),
+                            numpy.array([1, 1, 0], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - (4.0/5.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (4.0/5.0)) <= 1e-10)
+
+    def test_pdist_rogerstanimoto_match(self):
+        "Tests pdist(X, 'rogerstanimoto') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "rogerstanimoto")
+        y2 = pdist(D, "test_rogerstanimoto")
+        y3 = pdist(numpy.bool_(D), "test_rogerstanimoto")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_russellrao_mtica1(self):
+        "Tests russellrao(*,*) with mtica example #1."
+        m = russellrao(numpy.array([1, 0, 1, 1, 0]),
+                       numpy.array([1, 1, 0, 1, 1]))
+        m2 = russellrao(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
+                        numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - (3.0/5.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (3.0/5.0)) <= 1e-10)
+
+    def test_pdist_russellrao_mtica2(self):
+        "Tests russellrao(*,*) with mtica example #2."
+        m = russellrao(numpy.array([1, 0, 1]),
+                       numpy.array([1, 1, 0]))
+        m2 = russellrao(numpy.array([1, 0, 1], dtype=numpy.bool),
+                        numpy.array([1, 1, 0], dtype=numpy.bool))
+        print m
+        self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
+        self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
+
+    def test_pdist_russellrao_match(self):
+        "Tests pdist(X, 'russellrao') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "russellrao")
+        y2 = pdist(D, "test_russellrao")
+        y3 = pdist(numpy.bool_(D), "test_russellrao")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_sokalmichener_match(self):
+        "Tests pdist(X, 'sokalmichener') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "sokalmichener")
+        y2 = pdist(D, "test_sokalmichener")
+        y3 = pdist(numpy.bool_(D), "test_sokalmichener")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_kulsinski_match(self):
+        "Tests pdist(X, 'kulsinski') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "kulsinski")
+        y2 = pdist(D, "test_kulsinski")
+        y3 = pdist(numpy.bool_(D), "test_kulsinski")
+        print numpy.abs(y1-y2).max()
+        self.failUnless(within_tol(y1, y2, eps))
+
+def within_tol(a, b, tol):
+    return numpy.abs(a - b).max() < tol




More information about the Scipy-svn mailing list