[Scipy-svn] r5166 - in trunk/scipy/stats: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sat Nov 22 09:40:48 EST 2008
Author: josef
Date: 2008-11-22 08:40:43 -0600 (Sat, 22 Nov 2008)
New Revision: 5166
Modified:
trunk/scipy/stats/stats.py
trunk/scipy/stats/tests/test_stats.py
Log:
merge 2 versions of percentileofscore, "rank" as default for better backwards compatibility
Modified: trunk/scipy/stats/stats.py
===================================================================
--- trunk/scipy/stats/stats.py 2008-11-22 10:40:35 UTC (rev 5165)
+++ trunk/scipy/stats/stats.py 2008-11-22 14:40:43 UTC (rev 5166)
@@ -1014,61 +1014,19 @@
return _interpolate(values[int(idx)], values[int(idx) + 1], idx % 1)
-def percentileofscore(a, score):
- """
- Percentile of a score.
-
- The percentile of a score is the proportion of the values in a given array equal to or
- smaller than the score.
-
- Parameters
- -----------
- a: ndarray
-
- score: float or int.
-
- Returns
- -------
- pct: float (0-100), the percentile of score relative to a
-
- Examples
- --------
- >>> percentileofscore([20,80,100],80)
- 66.666666666666657
- >>>percentileofscore([20,80,100],99)
- 66.666666666666657
- >>>percentileofscore([20,80,100],100)
- 100.0
- >>>percentileofscore([20,80,100],1)
- 0.0
-
- """
- l=len(a)
- a = np.array(a)
- if not(any(a == score)):
- a = np.append(a,score)
- a_len = np.array(range(len(a)))
- else:
- a_len = np.array(range(len(a))) + 1.0
-
- a = np.sort(a)
- idx = [a == score]
- pct = (np.mean(a_len[idx])/(l))*100.0
-
- return pct
-
-def percentileofscore2(a, score, kind = 'mean' ):
+def percentileofscore(a, score, kind = 'rank' ):
'''
The percentile rank of a score relative to a list of scores a.
- A percentileofscore2 of, for example, 80% means that 80% of the scores in a
+ A percentileofscore of for example 80% means that 80% of the scores in a
are below the given score. In the case of gaps or ties, the exact
definition depends on the optional kind:
+ "rank": average percentage ranking of score
"weak": This kind corresponds to the definition of a cumulative
distribution function, and means that 80% have a score lower
or equal to the given score
- "strict": A percentileofscore2 of 80% means that 80% have a strictly
+ "strict": A percentileofscore of 80% means that 80% have a strictly
lower score
"mean": is the average score between "weak" and "strict" and is used in
testing
@@ -1077,97 +1035,118 @@
Parameters
----------
- a: array_like
+ a: array like
list or array of scores to which score is compared
score: int or float
score that is compared with elements in a
kind: kind/type of percentile
+ 'rank' (default): average percentile ranks of score
'weak': percent of elements in a smaller or equal to score
'strict': percent of elements in a strictly smaller than to score
- 'mean' (default): average score between 'weak' and 'strict'
+ 'mean': average score between 'weak' and 'strict'
Returns
-------
float: percentile-position of score (0-100) relative to a
- >>> percentileofscore2([20,80,100],80)
+ >>> percentileofscore([20,80,100],80)
+ 66.666666666666657
+ >>> percentileofscore([20,80,100],80,kind='mean')
50.0
- >>> percentileofscore2([20,80,100],80,kind='strict')
+ >>> percentileofscore([20,80,100],80,kind='strict')
33.333333333333329
- >>> percentileofscore2([20,80,100],80,kind='weak')
+ >>> percentileofscore([20,80,100],80,kind='weak')
66.666666666666657
- >>> percentileofscore2([1,2,3,4,5,6,7,8,9,10],4) #default kind = 'mean'
+
+ >>> percentileofscore([1,2,3,4,5,6,7,8,9,10],4) #default kind = 'rank
+ 40.0
+ >>> percentileofscore([1,2,3,4,5,6,7,8,9,10],4,kind = 'mean')
35.0
- >>> percentileofscore2([1,2,3,4,5,6,7,8,9,10],4,kind = 'strict')
+ >>> percentileofscore([1,2,3,4,5,6,7,8,9,10],4,kind = 'strict')
30.0
- >>> percentileofscore2([1,2,3,4,5,6,7,8,9,10],4,kind = 'weak')
+ >>> percentileofscore([1,2,3,4,5,6,7,8,9,10],4,kind = 'weak')
40.0
# multiple - 2
- >>> percentileofscore2([1,2,3,4,4,5,6,7,8,9],4,kind = 'strict')
+ >>> percentileofscore([1,2,3,4,4,5,6,7,8,9],4)
+ 45.0
+ >>> percentileofscore([1,2,3,4,4,5,6,7,8,9],4,kind = 'mean')
+ 40.0
+ >>> percentileofscore([1,2,3,4,4,5,6,7,8,9],4,kind = 'strict')
30.0
- >>> percentileofscore2([1,2,3,4,4,5,6,7,8,9],4,kind = 'weak')
+ >>> percentileofscore([1,2,3,4,4,5,6,7,8,9],4,kind = 'weak')
50.0
- >>> percentileofscore2([1,2,3,4,4,5,6,7,8,9],4,kind = 'mean')
- 40.0
+
# multiple - 3
- >>> percentileofscore2([1,2,3,4,4,4,5,6,7,8],4,kind = 'mean')
+ >>> percentileofscore([1,2,3,4,4,4,5,6,7,8],4)
+ 50.0
+ >>> percentileofscore([1,2,3,4,4,4,5,6,7,8],4,kind = 'mean')
45.0
- >>> percentileofscore2([1,2,3,4,4,4,5,6,7,8],4,kind = 'strict')
+ >>> percentileofscore([1,2,3,4,4,4,5,6,7,8],4,kind = 'strict')
30.0
- >>> percentileofscore2([1,2,3,4,4,4,5,6,7,8],4,kind = 'weak')
+ >>> percentileofscore([1,2,3,4,4,4,5,6,7,8],4,kind = 'weak')
60.0
# missing
- >>> percentileofscore2([1,2,3,5,6,7,8,9,10,11],4)
+ >>> percentileofscore([1,2,3,5,6,7,8,9,10,11],4)
30.0
- >>> percentileofscore2([1,2,3,5,6,7,8,9,10,11],4,kind = 'strict')
+ >>> percentileofscore([1,2,3,5,6,7,8,9,10,11],4,kind = 'mean')
30.0
- >>> percentileofscore2([1,2,3,5,6,7,8,9,10,11],4,kind = 'weak')
+ >>> percentileofscore([1,2,3,5,6,7,8,9,10,11],4,kind = 'strict')
30.0
+ >>> percentileofscore([1,2,3,5,6,7,8,9,10,11],4,kind = 'weak')
+ 30.0
#larger numbers
- >>> percentileofscore2([10,20,30,40,50,60,70,80,90,100],40)
+ >>> percentileofscore([10,20,30,40,50,60,70,80,90,100],40)
+ 40.0
+ >>> percentileofscore([10,20,30,40,50,60,70,80,90,100],40,kind = 'mean')
35.0
- >>> percentileofscore2([10,20,30,40,50,60,70,80,90,100],40,kind = 'strict')
+ >>> percentileofscore([10,20,30,40,50,60,70,80,90,100],40,kind = 'strict')
30.0
- >>> percentileofscore2([10,20,30,40,50,60,70,80,90,100],40,kind = 'weak')
+ >>> percentileofscore([10,20,30,40,50,60,70,80,90,100],40,kind = 'weak')
40.0
- >>> percentileofscore2([10,20,30,40,40,40,50,60,70,80],40,kind = 'mean')
+ >>> percentileofscore([10,20,30,40,40,40,50,60,70,80],40,kind = 'mean')
45.0
- >>> percentileofscore2([10,20,30,40,40,40,50,60,70,80],40,kind = 'strict')
+ >>> percentileofscore([10,20,30,40,40,40,50,60,70,80],40,kind = 'strict')
30.0
- >>> percentileofscore2([10,20,30,40,40,40,50,60,70,80],40,kind = 'weak')
+ >>> percentileofscore([10,20,30,40,40,40,50,60,70,80],40,kind = 'weak')
60.0
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'mean')
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'rank')
30.0
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'strict')
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'mean')
30.0
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'weak')
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'strict')
30.0
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'weak')
+ 30.0
#boundaries
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],10)
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],10)
+ 10.0
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'mean')
5.0
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'strict')
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'strict')
0.0
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'weak')
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'weak')
10.0
-
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],110)
+
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],110)
+ 100.0
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'mean')
95.0
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'strict')
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'strict')
90.0
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'weak')
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'weak')
100.0
#out of bounds
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],200)
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],200)
100.0
- >>> percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],0)
+ >>> percentileofscore([ 10,20,30,50,60,70,80,90,100,110],0)
0.0
'''
@@ -1175,14 +1154,27 @@
a=np.array(a)
n = len(a)
- if kind == 'strict':
+
+ if kind == 'rank':
+ if not(any(a == score)):
+ a = np.append(a,score)
+ a_len = np.array(range(len(a)))
+ else:
+ a_len = np.array(range(len(a))) + 1.0
+
+ a = np.sort(a)
+ idx = [a == score]
+ pct = (np.mean(a_len[idx])/(n))*100.0
+ return pct
+
+ elif kind == 'strict':
return sum(a<score)/float(n)*100
elif kind == 'weak':
return sum(a<=score)/float(n)*100
elif kind == 'mean':
return (sum(a<score) + sum(a<=score))*50/float(n)
else:
- raise ValueError, "kind can only be 'strong', 'weak' or 'mean'"
+ raise ValueError, "kind can only be 'rank', 'strict', 'weak' or 'mean'"
def histogram2(a, bins):
Modified: trunk/scipy/stats/tests/test_stats.py
===================================================================
--- trunk/scipy/stats/tests/test_stats.py 2008-11-22 10:40:35 UTC (rev 5165)
+++ trunk/scipy/stats/tests/test_stats.py 2008-11-22 14:40:43 UTC (rev 5166)
@@ -909,42 +909,51 @@
assert_equal(stats.scoreatpercentile(np.array([1, 10 ,100]),50,(10,100)), 55)
assert_equal(stats.scoreatpercentile(np.array([1, 10 ,100]),50,(1,10)), 5.5)
-def test_percentileofscore2():
- assert_equal(stats.percentileofscore2([1,2,3,4,5,6,7,8,9,10],4),35.0)
- assert_equal(stats.percentileofscore2([1,2,3,4,5,6,7,8,9,10],4,kind = 'strict'),30.0)
- assert_equal(stats.percentileofscore2([1,2,3,4,5,6,7,8,9,10],4,kind = 'weak'),40.0)
+def test_percentileofscore():
+ assert_equal(stats.percentileofscore([1,2,3,4,5,6,7,8,9,10],4),40.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,5,6,7,8,9,10],4,kind = 'mean'),35.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,5,6,7,8,9,10],4,kind = 'strict'),30.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,5,6,7,8,9,10],4,kind = 'weak'),40.0)
# multiple - 2
- assert_equal(stats.percentileofscore2([1,2,3,4,4,5,6,7,8,9],4,kind = 'strict'),30.0)
- assert_equal(stats.percentileofscore2([1,2,3,4,4,5,6,7,8,9],4,kind = 'weak'),50.0)
- assert_equal(stats.percentileofscore2([1,2,3,4,4,5,6,7,8,9],4,kind = 'mean'),40.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,5,6,7,8,9],4,kind = 'rank'),45.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,5,6,7,8,9],4,kind = 'strict'),30.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,5,6,7,8,9],4,kind = 'weak'),50.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,5,6,7,8,9],4,kind = 'mean'),40.0)
# multiple - 3
- assert_equal(stats.percentileofscore2([1,2,3,4,4,4,5,6,7,8],4,kind = 'mean'),45.0)
- assert_equal(stats.percentileofscore2([1,2,3,4,4,4,5,6,7,8],4,kind = 'strict'),30.0)
- assert_equal(stats.percentileofscore2([1,2,3,4,4,4,5,6,7,8],4,kind = 'weak'),60.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,4,5,6,7,8],4),50.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,4,5,6,7,8],4,kind = 'rank'),50.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,4,5,6,7,8],4,kind = 'mean'),45.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,4,5,6,7,8],4,kind = 'strict'),30.0)
+ assert_equal(stats.percentileofscore([1,2,3,4,4,4,5,6,7,8],4,kind = 'weak'),60.0)
# missing
- assert_equal(stats.percentileofscore2([1,2,3,5,6,7,8,9,10,11],4),30.0)
- assert_equal(stats.percentileofscore2([1,2,3,5,6,7,8,9,10,11],4,kind = 'strict'),30.0)
- assert_equal(stats.percentileofscore2([1,2,3,5,6,7,8,9,10,11],4,kind = 'weak'),30.0)
+ assert_equal(stats.percentileofscore([1,2,3,5,6,7,8,9,10,11],4,kind = 'rank'),30.0)
+ assert_equal(stats.percentileofscore([1,2,3,5,6,7,8,9,10,11],4,kind = 'mean'),30.0)
+ assert_equal(stats.percentileofscore([1,2,3,5,6,7,8,9,10,11],4,kind = 'strict'),30.0)
+ assert_equal(stats.percentileofscore([1,2,3,5,6,7,8,9,10,11],4,kind = 'weak'),30.0)
#larger numbers
- assert_equal(stats.percentileofscore2([10,20,30,40,50,60,70,80,90,100],40),35.0)
- assert_equal(stats.percentileofscore2([10,20,30,40,50,60,70,80,90,100],40,kind = 'strict'),30.0)
- assert_equal(stats.percentileofscore2([10,20,30,40,50,60,70,80,90,100],40,kind = 'weak'),40.0)
- assert_equal(stats.percentileofscore2([10,20,30,40,40,40,50,60,70,80],40,kind = 'mean'),45.0)
- assert_equal(stats.percentileofscore2([10,20,30,40,40,40,50,60,70,80],40,kind = 'strict'),30.0)
- assert_equal(stats.percentileofscore2([10,20,30,40,40,40,50,60,70,80],40,kind = 'weak'),60.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'mean'),30.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'strict'),30.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'weak'),30.0)
+ assert_equal(stats.percentileofscore([10,20,30,40,50,60,70,80,90,100],40,kind = 'mean'),35.0)
+ assert_equal(stats.percentileofscore([10,20,30,40,50,60,70,80,90,100],40,kind = 'strict'),30.0)
+ assert_equal(stats.percentileofscore([10,20,30,40,50,60,70,80,90,100],40,kind = 'weak'),40.0)
+ assert_equal(stats.percentileofscore([10,20,30,40,40,40,50,60,70,80],40,kind = 'mean'),45.0)
+ assert_equal(stats.percentileofscore([10,20,30,40,40,40,50,60,70,80],40,kind = 'strict'),30.0)
+ assert_equal(stats.percentileofscore([10,20,30,40,40,40,50,60,70,80],40,kind = 'weak'),60.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'rank'),30.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'mean'),30.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'strict'),30.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],40,kind = 'weak'),30.0)
#boundaries
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],10),5.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'strict'),0.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'weak'),10.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],110),95.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'strict'),90.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'weak'),100.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'rank'),10.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'mean'),5.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'strict'),0.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],10,kind = 'weak'),10.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],110),100.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'mean'),95.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'strict'),90.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],110,kind = 'weak'),100.0)
#out of bounds
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],200),100.0)
- assert_equal(stats.percentileofscore2([ 10,20,30,50,60,70,80,90,100,110],0),0.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],200,kind = 'rank'),100.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],200,kind = 'mean'),100.0)
+ assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],0,kind = 'mean'),0.0)
if __name__ == "__main__":
More information about the Scipy-svn
mailing list