[Spambayes-checkins] spambayes/Outlook2000 export.py,1.1,1.2
Mark Hammond
mhammond@users.sourceforge.net
Thu Nov 21 12:06:58 2002
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv913
Modified Files:
export.py
Log Message:
Select correct number of sets even when more spam, and allow user to
specify how many messages in each dir.
Index: export.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/export.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** export.py 21 Nov 2002 11:20:14 -0000 1.1
--- export.py 21 Nov 2002 12:06:55 -0000 1.2
***************
*** 4,25 ****
from manager import GetManager
! def BuildBuckets(manager, root_directory, folder_ids, include_sub):
store = manager.message_store
config = manager.config
! num = 0
for folder in store.GetFolderGenerator(config.training.spam_folder_ids, config.training.spam_include_sub):
for msg in folder.GetMessageGenerator():
! num += 1
! num_buckets = num / 400
dirs = []
for i in range(num_buckets):
! dir=os.path.join(root_directory, "Set%d" % (i+1,))
! dir=os.path.abspath(dir)
! if os.path.isdir(dir):
! shutil.rmtree(dir)
! os.makedirs(dir)
! dirs.append(dir)
! return dirs
def ChooseBucket(buckets):
--- 4,24 ----
from manager import GetManager
+ files_per_directory = 400
! def BuildBuckets(manager):
store = manager.message_store
config = manager.config
! num_ham = num_spam = 0
for folder in store.GetFolderGenerator(config.training.spam_folder_ids, config.training.spam_include_sub):
for msg in folder.GetMessageGenerator():
! num_spam += 1
! for folder in store.GetFolderGenerator(config.training.ham_folder_ids, config.training.ham_include_sub):
! for msg in folder.GetMessageGenerator():
! num_ham += 1
! num_buckets = min(num_ham, num_spam)/ files_per_directory
dirs = []
for i in range(num_buckets):
! dirs.append("Set%d" % (i+1,))
! return num_spam, num_ham, dirs
def ChooseBucket(buckets):
***************
*** 27,38 ****
return random.choice(buckets)
! def _export_folders(manager, dir, folder_ids, include_sub):
num = 0
store = manager.message_store
- buckets = BuildBuckets(manager, dir, folder_ids, include_sub)
for folder in store.GetFolderGenerator(folder_ids, include_sub):
print "", folder.name
for message in folder.GetMessageGenerator():
! dir = ChooseBucket(buckets)
# filename is the EID.txt
try:
--- 26,37 ----
return random.choice(buckets)
! def _export_folders(manager, dir, buckets, folder_ids, include_sub):
num = 0
store = manager.message_store
for folder in store.GetFolderGenerator(folder_ids, include_sub):
print "", folder.name
for message in folder.GetMessageGenerator():
! sub = ChooseBucket(buckets)
! this_dir = os.path.join(dir, sub)
# filename is the EID.txt
try:
***************
*** 45,49 ****
continue
! fname = os.path.join(dir, message.GetID()[1]) + ".txt"
f = open(fname, "w")
f.write(msg_text)
--- 44,48 ----
continue
! fname = os.path.join(this_dir, message.GetID()[1]) + ".txt"
f = open(fname, "w")
f.write(msg_text)
***************
*** 57,74 ****
config = manager.config
print "Exporting spam..."
! num = _export_folders(manager, os.path.join(directory, "Spam"),
config.training.spam_folder_ids, config.training.spam_include_sub)
! print "Exported", num, " spam messages."
! print "Exporting ham...",
! num = _export_folders(manager, os.path.join(directory, "Ham"),
config.training.ham_folder_ids, config.training.ham_include_sub)
! print "Exported", num, " ham messages."
def main():
import getopt
try:
! opts, args = getopt.getopt(sys.argv[1:], "q")
except getopt.error, d:
print d
--- 56,84 ----
config = manager.config
+ num_spam, num_ham, buckets = BuildBuckets(manager)
+ print "Have %d spam, and %d ham to export, spread over %d directories." \
+ % (num_spam, num_ham, len(buckets))
+
+ for sub in ["Spam", "Ham"]:
+ if os.path.exists(os.path.join(directory, sub)):
+ shutil.rmtree(os.path.join(directory, sub))
+ for b in buckets:
+ d = os.path.join(directory, sub, b)
+ os.makedirs(d)
+
print "Exporting spam..."
! num = _export_folders(manager, os.path.join(directory, "Spam"), buckets,
config.training.spam_folder_ids, config.training.spam_include_sub)
! print "Exported", num, "spam messages."
! print "Exporting ham..."
! num = _export_folders(manager, os.path.join(directory, "Ham"), buckets,
config.training.ham_folder_ids, config.training.ham_include_sub)
! print "Exported", num, "ham messages."
def main():
import getopt
try:
! opts, args = getopt.getopt(sys.argv[1:], "qn:")
except getopt.error, d:
print d
***************
*** 79,82 ****
--- 89,95 ----
if opt=='-q':
quiet = 1
+ elif opt=='-n':
+ global files_per_directory
+ files_per_directory = int(val)
if len(args) > 1:
***************
*** 106,109 ****
--- 119,123 ----
-q : quiet - don't prompt for confirmation.
+ -n : Minimum number of files to aim for in each directory, default=%d
Export the folders defined in the Outlook Plugin to a test directory.
***************
*** 115,119 ****
If 'directory' exists, it will be recursively deleted before
the export (but you will be asked to confirm unless -q is given).""" \
! % (os.path.basename(sys.argv[0]))
sys.exit(1)
--- 129,133 ----
If 'directory' exists, it will be recursively deleted before
the export (but you will be asked to confirm unless -q is given).""" \
! % (os.path.basename(sys.argv[0]), files_per_directory)
sys.exit(1)
More information about the Spambayes-checkins
mailing list