[Scipy-svn] r5173 - in trunk/scipy/stats: . tests

Sat Nov 22 22:41:19 EST 2008

Author: josef
Date: 2008-11-22 21:41:15 -0600 (Sat, 22 Nov 2008)
New Revision: 5173

Modified:
   trunk/scipy/stats/stats.py
   trunk/scipy/stats/tests/test_stats.py
Log:
correction for friedmanchisquare ticket:113, add tests (including for mstats version)

Modified: trunk/scipy/stats/stats.py
===================================================================

--- trunk/scipy/stats/stats.py	2008-11-22 23:14:44 UTC (rev 5172)
+++ trunk/scipy/stats/stats.py	2008-11-23 03:41:15 UTC (rev 5173)
@@ -190,6 +190,9 @@
 import warnings
 import math
 
+#friedmanchisquare patch uses python sum
+pysum = sum  # save it before it gets overwritten
+
 # Scipy imports.
 from numpy import array, asarray, dot, ma, zeros, sum
 import scipy.special as special
@@ -2139,22 +2142,39 @@
     """Friedman Chi-Square is a non-parametric, one-way within-subjects
     ANOVA.  This function calculates the Friedman Chi-square test for
     repeated measures and returns the result, along with the associated
-    probability value.  It assumes 3 or more repeated measures.  Only 3
-    levels requires a minimum of 10 subjects in the study.  Four levels
-    requires 5 subjects per level(??).
+    probability value.
 
-    Returns: chi-square statistic, associated p-value
+    This function uses Chisquared aproximation of Friedman Chisquared
+    distribution. This is exact only if n > 10 and factor levels > 6. 
+
+    Returns: friedman chi-square statistic, associated p-valueIt assumes 3 or more repeated measures.  Only 3
     """
     k = len(args)
     if k < 3:
         raise ValueError, '\nLess than 3 levels.  Friedman test not appropriate.\n'
     n = len(args[0])
+    for i in range(1,k):
+        if len(args[i]) <> n:
+           raise ValueError, 'Unequal N in friedmanchisquare.  Aborting.'
+    if n < 10 and k < 6:
+        print 'Warning: friedmanchisquare test using Chisquared aproximation'
+
+    # Rank data
     data = apply(_support.abut,args)
     data = data.astype(float)
     for i in range(len(data)):
         data[i] = rankdata(data[i])
-    ssbn = np.sum(np.sum(args,1)**2,axis=0)
-    chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1)
+
+    # Handle ties
+    ties = 0
+    for i in range(len(data)):
+        replist, repnum = scipy.stats.find_repeats(array(data[i]))
+        for t in repnum:
+            ties += t*(t*t-1)
+    c = 1 - ties / float(k*(k*k-1)*n)
+    
+    ssbn = pysum(pysum(data)**2)
+    chisq = ( 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1) ) / c
     return chisq, chisqprob(chisq,k-1)
 
 

Modified: trunk/scipy/stats/tests/test_stats.py
===================================================================
--- trunk/scipy/stats/tests/test_stats.py	2008-11-22 23:14:44 UTC (rev 5172)
+++ trunk/scipy/stats/tests/test_stats.py	2008-11-23 03:41:15 UTC (rev 5173)
@@ -956,5 +956,74 @@
     assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],0,kind = 'mean'),0.0)
 
 
+
+def test_friedmanchisquare():
+    # see ticket:113
+    # verified with matlab and R
+    #From Demsar "Statistical Comparisons of Classifiers over Multiple Data Sets"
+    #2006, Xf=9.28 (no tie handling, tie corrected Xf >=9.28)
+    x1 = [array([0.763, 0.599, 0.954, 0.628, 0.882, 0.936, 0.661, 0.583,
+                 0.775, 1.0, 0.94, 0.619, 0.972, 0.957]),
+          array([0.768, 0.591, 0.971, 0.661, 0.888, 0.931, 0.668, 0.583,
+                 0.838, 1.0, 0.962, 0.666, 0.981, 0.978]),
+          array([0.771, 0.590, 0.968, 0.654, 0.886, 0.916, 0.609, 0.563,
+                 0.866, 1.0, 0.965, 0.614, 0.9751, 0.946]),
+          array([0.798, 0.569, 0.967, 0.657, 0.898, 0.931, 0.685, 0.625,
+                 0.875, 1.0, 0.962, 0.669, 0.975, 0.970])]
+
+    #From "Bioestadistica para las ciencias de la salud" Xf=18.95 p<0.001:
+    x2 = [array([4,3,5,3,5,3,2,5,4,4,4,3]),
+          array([2,2,1,2,3,1,2,3,2,1,1,3]),
+          array([2,4,3,3,4,3,3,4,4,1,2,1]),
+          array([3,5,4,3,4,4,3,3,3,4,4,4])]
+
+    #From Jerrorl H. Zar, "Biostatistical Analysis"(example 12.6), Xf=10.68, 0.005 < p < 0.01:
+    #Probability from this example is inexact using Chisquare aproximation of Friedman Chisquare.
+    x3 = [array([7.0,9.9,8.5,5.1,10.3]),
+          array([5.3,5.7,4.7,3.5,7.7]),
+          array([4.9,7.6,5.5,2.8,8.4]),
+          array([8.8,8.9,8.1,3.3,9.1])]
+
+    # Hollander & Wolfe (1973), p. 140ff.
+    # from R help file
+    xR = array( [5.40, 5.50, 5.55,
+                 5.85, 5.70, 5.75,
+                 5.20, 5.60, 5.50,
+                 5.55, 5.50, 5.40,
+                 5.90, 5.85, 5.70,
+                 5.45, 5.55, 5.60,
+                 5.40, 5.40, 5.35,
+                 5.45, 5.50, 5.35,
+                 5.25, 5.15, 5.00,
+                 5.85, 5.80, 5.70,
+                 5.25, 5.20, 5.10,
+                 5.65, 5.55, 5.45,
+                 5.60, 5.35, 5.45,
+                 5.05, 5.00, 4.95,
+                 5.50, 5.50, 5.40,
+                 5.45, 5.55, 5.50,
+                 5.55, 5.55, 5.35,
+                 5.45, 5.50, 5.55,
+                 5.50, 5.45, 5.25,
+                 5.65, 5.60, 5.40,
+                 5.70, 5.65, 5.55,
+                 6.30, 6.30, 6.25])
+    xR = xR.reshape((len(xR)/3.0,3)).T
+
+    #assert_array_almost_equal(stats.friedmanchisquare(xR[0],xR[1],xR[2]),(11.1429, 0.003805),3)
+    assert_array_almost_equal(stats.friedmanchisquare(x1[0],x1[1],x1[2],x1[3]),(10.2283464566929, 0.0167215803284414))
+    assert_array_almost_equal(stats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]),(18.9428571428571, 0.000280938375189499))
+    assert_array_almost_equal(stats.friedmanchisquare(x3[0],x3[1],x3[2],x3[3]),(10.68, 0.0135882729582176))
+    np.testing.assert_raises(ValueError, stats.friedmanchisquare,x3[0],x3[1])
+
+    assert_array_almost_equal(stats.mstats.friedmanchisquare(xR[0],xR[1],xR[2]),(11.1429, 0.003805),4)
+    assert_array_almost_equal(stats.mstats.friedmanchisquare(x1[0],x1[1],x1[2],x1[3]),(10.2283464566929, 0.0167215803284414))
+    # the following fails
+    #assert_array_almost_equal(stats.mstats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]),(18.9428571428571, 0.000280938375189499))
+    assert_array_almost_equal(stats.mstats.friedmanchisquare(x3[0],x3[1],x3[2],x3[3]),(10.68, 0.0135882729582176))
+    np.testing.assert_raises(ValueError,stats.mstats.friedmanchisquare,x3[0],x3[1])
+
+
+
 if __name__ == "__main__":
     run_module_suite()