[Scipy-svn] r6701 - in trunk/scipy/stats: . tests

Sat Sep 11 19:16:47 EDT 2010

Author: warren.weckesser
Date: 2010-09-11 18:16:47 -0500 (Sat, 11 Sep 2010)
New Revision: 6701

Modified:
   trunk/scipy/stats/morestats.py
   trunk/scipy/stats/tests/test_morestats.py
Log:
ENH: stats.morestats: updated 'raise' statements, tweaked code a bit, added tests that incorrect arguments raise the expected exceptions.

Modified: trunk/scipy/stats/morestats.py
===================================================================

--- trunk/scipy/stats/morestats.py	2010-09-11 19:20:41 UTC (rev 6700)
+++ trunk/scipy/stats/morestats.py	2010-09-11 23:16:47 UTC (rev 6701)
@@ -157,7 +157,7 @@
     x = ravel(data)
     n = len(x)
     if (n < 2):
-        raise ValueError, "Need at least 2 data-points."
+        raise ValueError("Need at least 2 data-points.")
     xbar = x.mean()
     C = x.var()
     if (n > 1000): # gaussian approximations for large n
@@ -206,8 +206,8 @@
     The nth k-statistic is the unique symmetric unbiased estimator of the nth
     cumulant kappa_n
     """
-    if n>4 or n<1:
-        raise ValueError, "k-statistics only supported for 1<=n<=4"
+    if n > 4 or n < 1:
+        raise ValueError("k-statistics only supported for 1<=n<=4")
     n = int(n)
     S = zeros(n+1,'d')
     data = ravel(data)
@@ -225,21 +225,21 @@
                 4*N*(N+1)*S[1]*S[3] + N*N*(N+1)*S[4]) / \
                 (N*(N-1.0)*(N-2.0)*(N-3.0))
     else:
-        raise ValueError, "Should not be here."
+        raise ValueError("Should not be here.")
 
 def kstatvar(data,n=2):
     """Returns an unbiased estimator of the variance of the k-statistic:  n=1 or 2
     """
     data = ravel(data)
     N = len(data)
-    if n==1:
+    if n == 1:
         return kstat(data,n=2)*1.0/N
-    elif n==2:
+    elif n == 2:
         k2 = kstat(data,n=2)
         k4 = kstat(data,n=4)
         return (2*k2*k2*N + (N-1)*k4)/(N*(N+1))
     else:
-        raise ValueError, "Only n=1 or n=2 supported."
+        raise ValueError("Only n=1 or n=2 supported.")
 
 
 #__all__ = ['probplot','ppcc_max','ppcc_plot','boxcox','boxcox_llf',
@@ -262,9 +262,9 @@
     i = arange(2,N)
     Ui[1:-1] = (i-0.3175)/(N+0.365)
     try:
-        ppf_func = eval('distributions.%s.ppf'%dist)
+        ppf_func = eval('distributions.%s.ppf' % dist)
     except AttributeError:
-        raise dist, "is not a valid distribution with a ppf."
+        raise ValueError("%s is not a valid distribution with a ppf." % dist)
     if sparams is None:
         sparams = ()
     if isscalar(sparams):
@@ -312,7 +312,7 @@
     try:
         ppf_func = eval('distributions.%s.ppf'%dist)
     except AttributeError:
-        raise dist, "is not a valid distribution with a ppf."
+        raise ValueError("%s is not a valid distribution with a ppf." % dist)
     """
     res = inspect.getargspec(ppf_func)
     if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \
@@ -386,18 +386,18 @@
     while (rootfunc(newlm,x,target) > 0.0) and (N < 500):
         newlm += 0.1
         N +=1
-    if (N==500):
-        raise RuntimeError, "Could not find endpoint."
+    if N == 500:
+        raise RuntimeError("Could not find endpoint.")
     lmplus = optimize.brentq(rootfunc,lmax,newlm,args=(x,target))
     newlm = lmax-0.5
     N = 0
     while (rootfunc(newlm,x,target) > 0.0) and (N < 500):
         newlm += 0.1
         N +=1
-    if (N==500):
-        raise RuntimeError, "Could not find endpoint."
-    lmminus = optimize.brentq(rootfunc,newlm,lmax,args=(x,target))
-    return lmminus,lmplus
+    if N == 500:
+        raise RuntimeError("Could not find endpoint.")
+    lmminus = optimize.brentq(rootfunc, newlm, lmax, args=(x,target))
+    return lmminus, lmplus
 
 def boxcox(x,lmbda=None,alpha=None):
     """Return a positive dataset tranformed by a Box-Cox power transformation.
@@ -411,7 +411,7 @@
     lambda as the third output argument.
     """
     if any(x < 0):
-        raise ValueError, "Data must be positive."
+        raise ValueError("Data must be positive.")
     if lmbda is not None:  # single transformation
         lmbda = lmbda*(x==x)
         y = where(lmbda == 0, log(x), (x**lmbda - 1)/lmbda)
@@ -506,7 +506,7 @@
     """
     N = len(x)
     if N < 3:
-        raise ValueError, "Data must be at least length 3."
+        raise ValueError("Data must be at least length 3.")
     if a is None:
         a = zeros(N,'f')
         init = 0
@@ -603,7 +603,8 @@
 
     """
     if not dist in ['norm','expon','gumbel','extreme1','logistic']:
-        raise ValueError, "Invalid distribution."
+        raise ValueError("Invalid distribution; dist must be 'norm', "
+                            "'expon', 'gumbel', 'extreme1' or 'logistic'.")
     y = sort(x)
     xbar = np.mean(x, axis=0)
     N = len(y)
@@ -632,7 +633,7 @@
         z = distributions.logistic.cdf(w)
         sig = array([25,10,5,2.5,1,0.5])
         critical = around(_Avals_logistic / (1.0+0.25/N),3)
-    elif (dist == 'gumbel') or (dist == 'extreme1'):
+    else:  # (dist == 'gumbel') or (dist == 'extreme1'):
         #the following is incorrect, see ticket:1097
 ##        def fixedsolve(th,xj,N):
 ##            val = stats.sum(xj)*1.0/N
@@ -647,9 +648,7 @@
         z = distributions.gumbel_l.cdf(w)
         sig = array([25,10,5,2.5,1])
         critical = around(_Avals_gumbel / (1.0 + 0.2/sqrt(N)),3)
-    else:
-        raise ValueError("dist has to be one of 'norm','expon','logistic'",
-                         "'gumbel','extreme1'")
+
     i = arange(1,N+1)
     S = sum((2*i-1.0)/N*(log(z)+log(1-z[::-1])),axis=0)
     A2 = -N-S
@@ -722,10 +721,10 @@
     x,y = asarray(x),asarray(y)
     n = len(x)
     m = len(y)
-    if (m < 1):
-        raise ValueError, "Not enough other observations."
-    if (n < 1):
-        raise ValueError, "Not enough test observations."
+    if m < 1:
+        raise ValueError("Not enough other observations.")
+    if n < 1:
+        raise ValueError("Not enough test observations.")
     N = m+n
     xy = r_[x,y]  # combine
     rank = stats.rankdata(xy)
@@ -804,7 +803,7 @@
     """
     k = len(args)
     if k < 2:
-        raise ValueError, "Must enter at least two input sample vectors."
+        raise ValueError("Must enter at least two input sample vectors.")
     Ni = zeros(k)
     ssq = zeros(k,'d')
     for j in range(k):
@@ -966,13 +965,13 @@
     elif len(x) == 1:
         x = x[0]
         if n is None or n < x:
-            raise ValueError, "n must be >= x"
+            raise ValueError("n must be >= x")
         n = np.int_(n)
     else:
-        raise ValueError, "Incorrect length for x."
+        raise ValueError("Incorrect length for x.")
 
     if (p > 1.0) or (p < 0.0):
-        raise ValueError, "p must be in range [0,1]"
+        raise ValueError("p must be in range [0,1]")
 
     d = distributions.binom.pmf(x,n,p)
     rerr = 1+1e-7
@@ -1128,8 +1127,8 @@
     m = len(y)
     xy = r_[x,y]
     N = m+n
-    if (N < 3):
-        raise ValueError, "Not enough observations."
+    if N < 3:
+        raise ValueError("Not enough observations.")
     ranks = stats.rankdata(xy)
     Ri = ranks[:n]
     M = sum((Ri - (N+1.0)/2)**2,axis=0)
@@ -1162,7 +1161,7 @@
     """
     k = len(args)
     if k < 2:
-        raise ValueError, "Must enter at least two input sample vectors."
+        raise ValueError("Must enter at least two input sample vectors.")
     if 'equal_var' in kwds.keys():
         if kwds['equal_var']: evar = 1
         else: evar = 0
@@ -1229,7 +1228,7 @@
     else:
         x, y = map(asarray, (x, y))
         if len(x) <> len(y):
-            raise ValueError, 'Unequal N in wilcoxon.  Aborting.'
+            raise ValueError('Unequal N in wilcoxon.  Aborting.')
         d = x-y
     d = compress(not_equal(d,0),d,axis=-1) # Keep all non-zero differences
     count = len(d)
@@ -1275,8 +1274,8 @@
     """
     N = len(cnt)
     if N < 2:
-        raise ValueError, "At least two moments must be given to" + \
-              "approximate the pdf."
+        raise ValueError("At least two moments must be given to " +
+              "approximate the pdf.")
     totp = poly1d(1)
     sig = sqrt(cnt[1])
     mu = cnt[0]

Modified: trunk/scipy/stats/tests/test_morestats.py
===================================================================
--- trunk/scipy/stats/tests/test_morestats.py	2010-09-11 19:20:41 UTC (rev 6700)
+++ trunk/scipy/stats/tests/test_morestats.py	2010-09-11 23:16:47 UTC (rev 6701)
@@ -24,6 +24,7 @@
 g9 = [1.002, 0.998, 0.996, 0.995, 0.996, 1.004, 1.004, 0.998, 0.999, 0.991]
 g10= [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997]
 
+
 class TestShapiro(TestCase):
     def test_basic(self):
         x1 = [0.11,7.87,4.61,10.14,7.95,3.14,0.46,
@@ -39,6 +40,12 @@
         assert_almost_equal(w,0.9590270,6)
         assert_almost_equal(pw,0.52460,3)
 
+    def test_bad_arg(self):
+        # Length of x is less than 3.
+        x = [1]
+        assert_raises(ValueError, stats.shapiro, x)
+
+
 class TestAnderson(TestCase):
     def test_normal(self):
         rs = RandomState(1234567890)
@@ -58,7 +65,12 @@
         A,crit,sig = stats.anderson(x2,'expon')
         assert_array_less(crit[:-1], A)
 
+    def test_bad_arg(self):
+        assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
+
+
 class TestAnsari(TestCase):
+
     def test_small(self):
         x = [1,2,3,3,4]
         y = [3,2,6,1,6,1,4,1]
@@ -80,13 +92,24 @@
         assert_almost_equal(W,10.0,11)
         assert_almost_equal(pval,0.533333333333333333,7)
 
+    def test_bad_arg(self):
+        assert_raises(ValueError, stats.ansari, [], [1])
+        assert_raises(ValueError, stats.ansari, [1], [])
+
+
 class TestBartlett(TestCase):
+
     def test_data(self):
         args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
         T, pval = stats.bartlett(*args)
         assert_almost_equal(T,20.78587342806484,7)
         assert_almost_equal(pval,0.0136358632781,7)
 
+    def test_bad_arg(self):
+        """Too few args raises ValueError."""
+        assert_raises(ValueError, stats.bartlett, [1])
+
+
 class TestLevene(TestCase):
 
     def test_data(self):
@@ -127,9 +150,14 @@
 
     def test_bad_center_value(self):
         x = np.linspace(-1,1,21)
-        assert_raises(ValueError, stats.levene, x, x, center='trim')        
+        assert_raises(ValueError, stats.levene, x, x, center='trim')
+        
+    def test_too_few_args(self):
+        assert_raises(ValueError, stats.levene, [1])
 
+
 class TestBinomP(TestCase):
+
     def test_data(self):
         pval = stats.binom_test(100,250)
         assert_almost_equal(pval,0.0018833009350757682,11)
@@ -138,6 +166,21 @@
         pval = stats.binom_test([682,243],p=3.0/4)
         assert_almost_equal(pval,0.38249155957481695,11)
 
+    def test_bad_len_x(self):
+        """Length of x must be 1 or 2."""
+        assert_raises(ValueError, stats.binom_test, [1,2,3])
+
+    def test_bad_n(self):
+        """len(x) is 1, but n is invalid."""
+        # Missing n
+        assert_raises(ValueError, stats.binom_test, [100])
+        # n less than x[0]
+        assert_raises(ValueError, stats.binom_test, [100], n=50)
+
+    def test_bad_p(self):
+        assert_raises(ValueError, stats.binom_test, [50, 50], p=2.0)
+
+
 class TestFindRepeats(TestCase):
     def test_basic(self):
         a = [1,2,3,4,1,2,3,4,1,2,5]
@@ -194,11 +237,60 @@
         x = np.linspace(-1,1,21)
         assert_raises(ValueError, stats.fligner, x, x, center='trim')
 
+    def test_bad_num_args(self):
+        """Too few args raises ValueError."""
+        assert_raises(ValueError, stats.fligner, [1])
+
+
 def test_mood():
     # numbers from R: mood.test in package stats
     x1 = np.arange(5)
     assert_array_almost_equal(stats.mood(x1,x1**2),
             (-1.3830857299399906, 0.16663858066771478), 11)
 
+def test_mood_bad_arg():
+    """Raise ValueError when the sum of the lengths of the args is less than 3."""
+    assert_raises(ValueError, stats.mood, [1], [])
+
+def test_oneway_bad_arg():
+    """Raise ValueError is fewer than two args are given."""
+    assert_raises(ValueError, stats.oneway, [1])
+
+def test_wilcoxon_bad_arg():
+    """Raise ValueError when two args of different lengths are given."""
+    assert_raises(ValueError, stats.wilcoxon, [1], [1,2])
+
+def test_mvsdist_bad_arg():
+    """Raise ValueError if fewer than two data points are given."""
+    data = [1]
+    assert_raises(ValueError, stats.mvsdist, data)
+
+def test_kstat_bad_arg():
+    """Raise ValueError if n > 4 or n > 1."""
+    data = [1]
+    n = 10
+    assert_raises(ValueError, stats.kstat, data, n=n)
+
+def test_kstatvar_bad_arg():
+    """Raise ValueError is n is not 1 or 2."""
+    data = [1]
+    n = 10
+    assert_raises(ValueError, stats.kstatvar, data, n=n)
+
+def test_probplot_bad_arg():
+    """Raise ValueError when given an invalid distribution."""
+    data = [1]
+    assert_raises(ValueError, stats.probplot, data, dist="plate_of_shrimp")
+
+def test_ppcc_max_bad_arg():
+    """Raise ValueError when given an invalid distribution."""
+    data = [1]
+    assert_raises(ValueError, stats.ppcc_max, data, dist="plate_of_shrimp")
+
+def test_boxcox_bad_arg():
+    """Raise ValueError if any data value is negative."""
+    x = np.array([-1])
+    assert_raises(ValueError, stats.boxcox, x)
+
 if __name__ == "__main__":
     run_module_suite()