[Spambayes-checkins] spambayes table.py,1.1,1.2

Tim Peters tim_one@users.sourceforge.net
Mon, 21 Oct 2002 14:18:57 -0700


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv11669

Modified Files:
	table.py 
Log Message:
Minor fiddling, + changed to get the counts of total ham & spam tested
out of the "all runs" histogram header line.  Before it was picking up
a wrong value from an interior test, provided you ran a test setup
creating such a beast.


Index: table.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/table.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** table.py	21 Oct 2002 05:00:05 -0000	1.1
--- table.py	21 Oct 2002 21:18:55 -0000	1.2
***************
*** 41,85 ****
      htest = 0
      stest = 0
!     
      get = f.readline
      while 1:
          line = get()
          if line.startswith('-> <stat> tested'):
              print line,
!             htest = int(line.split()[3])
!             stest = int(line.split()[6])
!         if line.find(' items; mean ') != -1:
!             # -> <stat> Ham distribution for this pair: 1000 items; mean 0.05; sample sdev 0.68
!             # and later "sample " went away
              vals = line.split(';')
              mean = float(vals[1].split()[-1])
              sdev = float(vals[2].split()[-1])
              val = (mean, sdev)
              typ = vals[0].split()[2]
              if line.find('for all runs') != -1:
                  if typ == 'Ham':
                      hamdevall = val
                  else:
                      spamdevall = val
!             continue
!         if line.startswith('-> best cost for all runs: $'):
              bestcost = float(line.split('$')[-1])
!         if line.startswith('-> <stat> all runs false positives: '):
              fp = int(line.split()[-1])
!         if line.startswith('-> <stat> all runs false negatives: '):
              fn = int(line.split()[-1])
!         if line.startswith('-> <stat> all runs unsure: '):
              un = int(line.split()[-1])
!         if line.startswith('-> <stat> all runs false positive %: '):
              fpp = float(line.split()[-1])
!         if line.startswith('-> <stat> all runs false negative %: '):
              fnp = float(line.split()[-1])
!         if line.startswith('-> <stat> all runs unsure %: '):
              unp = float(line.split()[-1])
!         if line.startswith('-> <stat> all runs cost: '):
              cost = float(line.split('$')[-1])
              break
-         if line.startswith('-> '):
-             continue
  
      return (htest, stest, fp, fn, un, fpp, fnp, unp, cost, bestcost,
--- 41,95 ----
      htest = 0
      stest = 0
! 
      get = f.readline
      while 1:
          line = get()
          if line.startswith('-> <stat> tested'):
+             # -> <stat> tested 1910 hams & 948 spams against 2741 hams & 948 spams
+             #  0      1      2    3    4 5   6
              print line,
! 
!         elif line.find(' items; mean ') > 0 and line.find('for all runs') > 0:
!             # -> <stat> Ham scores for all runs: 2741 items; mean 0.86; sdev 6.28
!             #                                             0          1          2
              vals = line.split(';')
              mean = float(vals[1].split()[-1])
              sdev = float(vals[2].split()[-1])
              val = (mean, sdev)
+             ntested = int(vals[0].split()[-2])
              typ = vals[0].split()[2]
              if line.find('for all runs') != -1:
                  if typ == 'Ham':
                      hamdevall = val
+                     htest = ntested
                  else:
                      spamdevall = val
!                     stest = ntested
! 
!         elif line.startswith('-> best cost for all runs: $'):
!             # -> best cost for all runs: $28.20
              bestcost = float(line.split('$')[-1])
! 
!         elif line.startswith('-> <stat> all runs false positives: '):
              fp = int(line.split()[-1])
! 
!         elif line.startswith('-> <stat> all runs false negatives: '):
              fn = int(line.split()[-1])
! 
!         elif line.startswith('-> <stat> all runs unsure: '):
              un = int(line.split()[-1])
! 
!         elif line.startswith('-> <stat> all runs false positive %: '):
              fpp = float(line.split()[-1])
! 
!         elif line.startswith('-> <stat> all runs false negative %: '):
              fnp = float(line.split()[-1])
! 
!         elif line.startswith('-> <stat> all runs unsure %: '):
              unp = float(line.split()[-1])
! 
!         elif line.startswith('-> <stat> all runs cost: '):
              cost = float(line.split('$')[-1])
              break
  
      return (htest, stest, fp, fn, un, fpp, fnp, unp, cost, bestcost,
***************
*** 114,126 ****
      (htest, stest, fp, fn, un, fpp, fnp, unp, cost, bestcost,
       hamdevall, spamdevall) = suck(file(filename))
!     ratio += "%8s" % ("%d:%d" % (htest, stest))
!     fptot += "%8d" % fp
      fpper += "%8.2f" % fpp
!     fntot += "%8d" % fn
      fnper += "%8.2f" % fnp
!     untot += "%8d" % un
      unper += "%8.2f" % unp
!     rcost += "%8s" % ("$%.2f" % cost)
!     bcost += "%8s" % ("$%.2f" % bestcost)
      hmean += "%8.2f" % hamdevall[0]
      hsdev += "%8.2f" % hamdevall[1]
--- 124,136 ----
      (htest, stest, fp, fn, un, fpp, fnp, unp, cost, bestcost,
       hamdevall, spamdevall) = suck(file(filename))
!     ratio += "%8s"   % ("%d:%d" % (htest, stest))
!     fptot += "%8d"   % fp
      fpper += "%8.2f" % fpp
!     fntot += "%8d"   % fn
      fnper += "%8.2f" % fnp
!     untot += "%8d"   % un
      unper += "%8.2f" % unp
!     rcost += "%8s"   % ("$%.2f" % cost)
!     bcost += "%8s"   % ("$%.2f" % bestcost)
      hmean += "%8.2f" % hamdevall[0]
      hsdev += "%8.2f" % hamdevall[1]