statistical modules for python?

Paul Magwene paul.magwene at yale.edu
Wed Oct 25 18:01:25 EDT 2000


"Michael A. Miller" wrote:
> 
> Does anyone know of a python module that implements an ANOVA
> calculation.
> 
> Mike

Here's something you could start with.  As is it requires Numeric, but
that could be easily changed....



#
#	Anova.py -- a python module for performing simple ANOVAs
#	Author:  Paul M. Magwene
#	Revision:  09 November 1999

""" A module for doing simple ANOVAs

    
-----------------------------------------------------------------------------
(c) Copyright by Paul M. Magwene, 1999 (mailto:paul.magwene at yale.edu)

    Permission to use, copy, modify, and distribute this software and
its
    documentation for any purpose and without fee or royalty is hereby
granted,
    provided that the above copyright notice appear in all copies and
that
    both that copyright notice and this permission notice appear in
    supporting documentation or portions thereof, including
modifications,
    that you make.

    THE AUTHOR PAUL M. MAGWENE DISCLAIMS ALL WARRANTIES WITH REGARD TO
    THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND
    FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL,
    INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
RESULTING
    FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
    NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
    WITH THE USE OR PERFORMANCE OF THIS SOFTWARE !

"""

__version__ = '0.2'

#----------------------------------------------------------------------------

import Numeric

#----------------------------------------------------------------------------

def mean(data):
    """Returns the arithmetic mean of a sequence of variables.
    
    When matrix is passed in, calculates means over columns.
    """
    data = Numeric.array(data)
    return Numeric.add.reduce(data)/len(data)

def SS(data):
    """Returns the sum of the squared deviates."""
    data = Numeric.array(data)
    return Numeric.add.reduce((deviates(data))**2)

def collapse(inlist, type=type, listtype=types.ListType, \
			integers = xrange(sys.maxint), endoflist=IndexError):
	"""Non destructively flatten a list hierarchy to a single level. 

	Non-recursive.
	"""
	outlist = copy.copy(inlist)
	try:
		for ind in integers :
			while type(outlist[ind]) is listtype:
				outlist[ind:ind+1] = outlist[ind]
	except endoflist:
		return outlist

class Anova:
	
	def __init__ (self,groups=[]):
		self._groups = []
		self._groups = groups
		
	def groups(self,groups=[]):
		self._groups = groups
	
	def groups(self):
		return self._groups
		
	def numGroups(self):
		return len(self._groups)
	
	def grandMean(self):
		temp = self._groups[:]
		return mean(collapse(temp))
		
	def groupMeans(self):
		result = []
		for g in self._groups:
			result.append(mean(g))
		return result
			
	def SSwin(self):
		sswin = 0.0
		for g in self._groups:
			sswin = sswin + SS(g)
		return sswin
	
	def SSmeans(self):
		return SS(self.groupMeans())
	
	def SSamong(self):
		return self.SStot() - self.SSwin()
	
	def SStot(self):
		temp = self._groups[:]
		return SS(collapse(temp))
	
	def MSwin(self):
		DFsum = 0.0
		for g in self._groups:
			DFsum = DFsum + (len(g) - 1.0)
		return self.SSwin()/DFsum
	
	def MSamong(self):
		return self.SSamong()/(self.numGroups() - 1.0)
		
	def MStot(self):
		return self.SStot()/((self.numGroups() * self.avgSampleSz()) - 1.0)
		
	def Fs(self):
		return self.MSamong()/self.MSwin()
		
	def avgSampleSz(self):
		a = self.numGroups()
		sum_ni = 0
		sum_ni2 = 0
		for g in self._groups:
			ni = len(g)
			sum_ni = sum_ni + ni
			sum_ni2 = sum_ni2 + ni**2
		sum_ni = Float(sum_ni)
		sum_ni2 = Float(sum_ni2)
		return (1.0/(a-1))*(sum_ni - (sum_ni2 / sum_ni))
		
	def _sum_ni(self):
		sum_ni = 0
		for g in self._groups:
			sum_ni = sum_ni + len(g)
		return sum_ni
		
	def DFamong(self):
		return self.numGroups() - 1
		
	def DFwin(self):
		return self._sum_ni() - self.numGroups()
		
	def DFtot(self):
		return self._sum_ni() - 1
		
	def anovaTable(self):	
		header = "Source\tdf\tSS\t\tMS\t\tF\n" + (60*'-') + '\n'
		line1 = "among\t%-7d %-15.5f %-15.5f %-15.5f\n" % \
				(self.DFamong(),self.SSamong(),self.MSamong(),self.Fs())
		line2 = "within\t%-7d %-15.5f %-15.5f\n" % \
				(self.DFwin(),self.SSwin(),self.MSwin())
		line3 = "total\t%-7d %-15.5f\n" % (self.DFtot(),self.SStot())
		table = header + line1 + line2 +line3
		return table
	
if __name__ == '__main__':
	#test data from Sokal & Rohlf(1995), Biometry(3rd edition) table 9.1,
p. 210
	print "\n\n\nSample output of Anova.py \n\n"
	print "\nTest data from Sokal & Rohlf (1994), table 9.1, p. 210\n\n"
	_A = [380 , 376 , 360 , 368 , 372 , 366 , 374 , 382]
	_B = [350 , 356 , 358 , 376 , 338 , 342 , 366 , 350 , 344 , 364]
	_C = [354 , 360 , 362 , 352 , 366 , 372 , 362, 344 , 342 , 358 ,\
		  351 , 348 , 348]
	_D = [376 , 344 , 342 , 372 , 374 , 360]
	
	av = Anova([_A,_B,_C,_D])
	
	print "# of groups: ", av.numGroups()
	print "group A: ", _A
	print "group B: ", _B
	print "group C: ", _C
	print "group D: ", _D
	print ""
	print "group means: ", av.groupMeans()
	print "grand mean: ", av.grandMean()
	print ""
	print "anova table --"
	print av.anovaTable()


-- 

Paul Magwene
paul.magwene at yale.edu



More information about the Python-list mailing list