[Scipy-svn] r6984 - in trunk/scipy/stats: . tests

Fri Dec 3 01:56:21 EST 2010

Author: warren.weckesser
Date: 2010-12-03 00:56:20 -0600 (Fri, 03 Dec 2010)
New Revision: 6984

Modified:
   trunk/scipy/stats/stats.py
   trunk/scipy/stats/tests/test_stats.py
Log:
BUG: stats: Eliminate the 'TINY' hack in pearsonr, so perfect correlations return probability=0.  Also allow for numerical errors that could make the computed value of r be less than -1.
 

Modified: trunk/scipy/stats/stats.py
===================================================================

--- trunk/scipy/stats/stats.py	2010-12-01 06:07:29 UTC (rev 6983)
+++ trunk/scipy/stats/stats.py	2010-12-03 06:56:20 UTC (rev 6984)
@@ -2198,20 +2198,18 @@
     r_den = n*np.sqrt(ss(xm)*ss(ym))
     r = (r_num / r_den)
 
-    # Presumably, if r > 1, then it is only some small artifact of floating
+    # Presumably, if abs(r) > 1, then it is only some small artifact of floating
     # point arithmetic.
-    r = min(r, 1.0)
+    r = max(min(r, 1.0), -1.0)
     df = n-2
+    if abs(r) == 1.0:
+        prob = 0.0
+    else:
+        t = r * np.sqrt(df / ((1.0 - r) * (1.0 + r)))
+        prob = betai(0.5*df, 0.5, df / (df + t*t))
+    return r, prob
 
-    # Use a small floating point value to prevent divide-by-zero nonsense
-    # fixme: TINY is probably not the right value and this is probably not
-    # the way to be robust. The scheme used in spearmanr is probably better.
-    TINY = 1.0e-20
-    t = r*np.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
-    prob = betai(0.5*df,0.5,df/(df+t*t))
-    return r,prob
 
-
 def fisher_exact(table) :
     """Performs a Fisher exact test on a 2x2 contingency table.
 

Modified: trunk/scipy/stats/tests/test_stats.py
===================================================================
--- trunk/scipy/stats/tests/test_stats.py	2010-12-01 06:07:29 UTC (rev 6983)
+++ trunk/scipy/stats/tests/test_stats.py	2010-12-03 06:56:20 UTC (rev 6984)
@@ -319,7 +319,20 @@
         r = y[0]
         assert_approx_equal(r,1.0)
 
+    def test_r_exactly_pos1(self):
+        a = arange(3.0)
+        b = a
+        r, prob = stats.pearsonr(a,b)
+        assert_equal(r, 1.0)
+        assert_equal(prob, 0.0)
 
+    def test_r_exactly_neg1(self):
+        a = arange(3.0)
+        b = -a
+        r, prob = stats.pearsonr(a,b)
+        assert_equal(r, -1.0)
+        assert_equal(prob, 0.0)
+
 def test_fisher_exact():
     """Some tests to show that fisher_exact() works correctly.