[Spambayes-checkins] spambayes/spambayes Stats.py,1.12,1.13

Wed Dec 22 01:25:48 CET 2004

Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32506/spambayes

Modified Files:
	Stats.py 
Log Message:
Simplify constructor to take just the messageinfo db and an optionsclass object. 
 We then have access to all the options we want, and (more importantly) the values
 will be updated correctly if the user changes options in mid run.

Index: Stats.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Stats.py,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** Stats.py	21 Dec 2004 23:19:41 -0000	1.12
--- Stats.py	22 Dec 2004 00:25:45 -0000	1.13
***************
*** 52,67 ****
  
  class Stats(object):
!     def __init__(self, spam_threshold, unsure_threshold, messageinfo_db,
!                  ham_string, unsure_string, spam_string, fp_cost, fn_cost,
!                  unsure_cost):
          self.messageinfo_db = messageinfo_db
!         self.spam_threshold = spam_threshold
!         self.unsure_threshold = unsure_threshold
!         self.ham_string = ham_string
!         self.unsure_string = unsure_string
!         self.spam_string = spam_string
!         self.fp_cost = fp_cost
!         self.fn_cost = fn_cost
!         self.unsure_cost = unsure_cost
          # Reset session stats.
          self.Reset()
--- 52,58 ----
  
  class Stats(object):
!     def __init__(self, options, messageinfo_db):
          self.messageinfo_db = messageinfo_db
!         self.options = options
          # Reset session stats.
          self.Reset()
***************
*** 87,93 ****
  
      def RecordClassification(self, score):
!         if score >= self.spam_threshold:
              self.num_spam += 1
!         elif score >= self.unsure_threshold:
              self.num_unsure += 1
          else:
--- 78,84 ----
  
      def RecordClassification(self, score):
!         if score >= self.options["Categorization", "spam_cutoff"]:
              self.num_spam += 1
!         elif score >= self.options["Categorization", "ham_cutoff"]:
              self.num_unsure += 1
          else:
***************
*** 99,103 ****
              # If we are recovering an item that is in the "spam" threshold,
              # then record it as a "false positive"
!             if old_score > self.spam_threshold:
                  self.num_trained_ham_fp += 1
          else:
--- 90,94 ----
              # If we are recovering an item that is in the "spam" threshold,
              # then record it as a "false positive"
!             if old_score > self.options["Categorization", "spam_cutoff"]:
                  self.num_trained_ham_fp += 1
          else:
***************
*** 105,109 ****
              # If we are deleting as Spam an item that was in our "good"
              # range, then record it as a false negative.
!             if old_score < self.unsure_threshold:
                  self.num_trained_spam_fn += 1
  
--- 96,100 ----
              # If we are deleting as Spam an item that was in our "good"
              # range, then record it as a false negative.
!             if old_score < self.options["Categorization", "ham_cutoff"]:
                  self.num_trained_spam_fn += 1
  
***************
*** 128,132 ****
              # Skip ones that are too old.
              if self.from_date and m.date_modified and \
!                m.date_modified > self.from_date:
                  continue
  
--- 119,123 ----
              # Skip ones that are too old.
              if self.from_date and m.date_modified and \
!                m.date_modified < self.from_date:
                  continue
  
***************
*** 134,138 ****
              trained = m.GetTrained()
              
!             if classification == self.spam_string:
                  # Classified as spam.
                  totals["num_spam"] += 1
--- 125,130 ----
              trained = m.GetTrained()
              
!             if classification == self.options["Headers",
!                                               "header_spam_string"]:
                  # Classified as spam.
                  totals["num_spam"] += 1
***************
*** 140,144 ****
                      # False positive (classified as spam, trained as ham)
                      totals["num_trained_ham_fp"] += 1
!             elif classification == self.ham_string:
                  # Classified as ham.
                  totals["num_ham"] += 1
--- 132,137 ----
                      # False positive (classified as spam, trained as ham)
                      totals["num_trained_ham_fp"] += 1
!             elif classification == self.options["Headers",
!                                                 "header_ham_string"]:
                  # Classified as ham.
                  totals["num_ham"] += 1
***************
*** 146,150 ****
                      # False negative (classified as ham, trained as spam)
                      totals["num_trained_spam_fn"] += 1
!             elif classification == self.unsure_string:
                  # Classified as unsure.
                  totals["num_unsure"] += 1
--- 139,144 ----
                      # False negative (classified as ham, trained as spam)
                      totals["num_trained_spam_fn"] += 1
!             elif classification == self.options["Headers",
!                                                 "header_unsure_string"]:
                  # Classified as unsure.
                  totals["num_unsure"] += 1
***************
*** 234,243 ****
                           data["total_spam"]
  
!         data["total_cost"] = data["num_trained_ham_fp"] * self.fp_cost + \
!                              data["num_trained_spam_fn"] * self.fn_cost + \
!                              data["num_unsure"] * self.unsure_cost
          # If there was no filtering done, what would the cost have been?
          # (Assuming that any spam in the inbox earns the cost of a fn)
!         no_filter_cost = data["num_spam"] * self.fn_cost
          data["cost_savings"] = no_filter_cost - data["total_cost"]
  
--- 228,241 ----
                           data["total_spam"]
  
!         fp_cost = self.options["TestDriver", "best_cutoff_fp_weight"]
!         fn_cost = self.options["TestDriver", "best_cutoff_fn_weight"]
!         unsure_cost = self.options["TestDriver",
!                                    "best_cutoff_unsure_weight"]
!         data["total_cost"] = data["num_trained_ham_fp"] * fp_cost + \
!                              data["num_trained_spam_fn"] * fn_cost + \
!                              data["num_unsure"] * unsure_cost
          # If there was no filtering done, what would the cost have been?
          # (Assuming that any spam in the inbox earns the cost of a fn)
!         no_filter_cost = data["num_spam"] * fn_cost
          data["cost_savings"] = no_filter_cost - data["total_cost"]