[Spambayes-checkins] spambayes Options.py,1.10,1.11 classifier.py,1.5,1.6

Thu, 12 Sep 2002 17:14:21 -0700

Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv21941

Modified Files:
	Options.py classifier.py 
Log Message:
Added new options section [Classifier], allowing to change
HAMBIAS, SPAMBIAS, MIN_SPAMPROB, MAX_SPAMPROB, UNKNOWN_SPAMPROB
and MAX_DISCRIMINATORS.  Play with them at your own risk <wink>.

Index: Options.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Options.py,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** Options.py	12 Sep 2002 02:46:15 -0000	1.10
--- Options.py	13 Sep 2002 00:14:18 -0000	1.11
***************
*** 89,92 ****
--- 89,103 ----
  save_trained_pickles: False
  pickle_basename: class
+ 
+ [Classifier]
+ # Fiddling these can have extreme effects.  See classifier.py for comments.
+ hambias: 2.0
+ spambias: 1.0
+ 
+ min_spamprob: 0.01
+ max_spamprob: 0.99
+ unknown_spamprob: 0.5
+ 
+ max_discriminators: 16
  """

***************
*** 115,118 ****
--- 126,136 ----
                     'show_charlimit': int_cracker,
                    },
+     'Classifier': {'hambias': float_cracker,
+                    'spambias': float_cracker,
+                    'min_spamprob': float_cracker,
+                    'max_spamprob': float_cracker,
+                    'unknown_spamprob': float_cracker,
+                    'max_discriminators': int_cracker,
+                    },
  }

Index: classifier.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/classifier.py,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** classifier.py	8 Sep 2002 03:17:31 -0000	1.5
--- classifier.py	13 Sep 2002 00:14:18 -0000	1.6
***************
*** 10,13 ****
--- 10,15 ----
  from sets import Set

+ from Options import options
+ 
  # The count of each word in ham is artificially boosted by a factor of
  # HAMBIAS, and similarly for SPAMBIAS.  Graham uses 2.0 and 1.0.  Final
***************
*** 26,31 ****
  #    total unique false negatives goes up   by a factor of 2.1 (337 -> 702)

! HAMBIAS  = 2.0
! SPAMBIAS = 1.0

  # "And then there is the question of what probability to assign to words
--- 28,33 ----
  #    total unique false negatives goes up   by a factor of 2.1 (337 -> 702)

! HAMBIAS  = options.hambias  # 2.0
! SPAMBIAS = options.spambias # 1.0

  # "And then there is the question of what probability to assign to words
***************
*** 35,40 ****
  # of training data is good enough to justify probabilities of 0 or 1.  It
  # may justify probabilities outside this range, though.
! MIN_SPAMPROB = 0.01
! MAX_SPAMPROB = 0.99

  # The spam probability assigned to words never seen before.  Graham used
--- 37,42 ----
  # of training data is good enough to justify probabilities of 0 or 1.  It
  # may justify probabilities outside this range, though.
! MIN_SPAMPROB = options.min_spamprob # 0.01
! MAX_SPAMPROB = options.max_spamprob # 0.99

  # The spam probability assigned to words never seen before.  Graham used
***************
*** 50,54 ****
  # of kicking out a word with a prob in (0.2, 0.8), and that seems dubious
  # on the face of it.
! UNKNOWN_SPAMPROB = 0.5

  # "I only consider words that occur more than five times in total".
--- 52,56 ----
  # of kicking out a word with a prob in (0.2, 0.8), and that seems dubious
  # on the face of it.
! UNKNOWN_SPAMPROB = options.unknown_spamprob # 0.5

  # "I only consider words that occur more than five times in total".
***************
*** 172,176 ****
  # was a pure win, lowering the false negative rate consistently, and it even
  # managed to tickle a couple rare false positives into "not spam" terrority.
! MAX_DISCRIMINATORS = 16

  PICKLE_VERSION = 1
--- 174,178 ----
  # was a pure win, lowering the false negative rate consistently, and it even
  # managed to tickle a couple rare false positives into "not spam" terrority.
! MAX_DISCRIMINATORS = options.max_discriminators # 16

  PICKLE_VERSION = 1