From montanaro at users.sourceforge.net Sun Nov 5 20:13:28 2006 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun, 05 Nov 2006 11:13:28 -0800 Subject: [Spambayes-checkins] spambayes/spambayes ImageStripper.py,1.6,1.7 Message-ID: <20061105191331.6E5CA1E4007@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv5086 Modified Files: ImageStripper.py Log Message: Better (I hope) location and execution of ocrad on Windows. Index: ImageStripper.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ImageStripper.py,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** ImageStripper.py 9 Sep 2006 22:59:38 -0000 1.6 --- ImageStripper.py 5 Nov 2006 19:13:26 -0000 1.7 *************** *** 44,49 **** return log(n)/c - # I'm sure this is all wrong for Windows. Someone else can fix it. ;-) def is_executable(prog): info = os.stat(prog) return (info.st_uid == os.getuid() and (info.st_mode & 0100) or --- 44,50 ---- return log(n)/c def is_executable(prog): + if sys.platform == "win32": + return True info = os.stat(prog) return (info.st_uid == os.getuid() and (info.st_mode & 0100) or *************** *** 52,56 **** def find_program(prog): ! for directory in os.environ.get("PATH", "").split(os.pathsep): program = os.path.join(directory, prog) if os.path.exists(program) and is_executable(program): --- 53,65 ---- def find_program(prog): ! path = os.environ.get("PATH", "").split(os.pathsep) ! if sys.platform == "win32": ! # Outlook plugin puts executables in (for example): ! # C:/Program Files/SpamBayes/bin ! # so add that directory to the path and make sure we ! # look for a file ending in ".exe". ! path.append(os.path.dirname(sys.executable)) ! prog = "%s.exe" % prog ! for directory in path: program = os.path.join(directory, prog) if os.path.exists(program) and is_executable(program): *************** *** 180,185 **** else: self.misses += 1 ! ocr = os.popen("ocrad -s %s -c %s -x %s < %s 2>/dev/null" % ! (scale, charset, orf, pnmfile)) ctext = ocr.read().lower() ocr.close() --- 189,195 ---- else: self.misses += 1 ! ocr = os.popen("%s -s %s -c %s -x %s -f %s 2>%s" % ! (find_program("ocrad"), scale, charset, ! orf, pnmfile, os.path.devnull)) ctext = ocr.read().lower() ocr.close() From montanaro at users.sourceforge.net Sun Nov 5 20:34:20 2006 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun, 05 Nov 2006 11:34:20 -0800 Subject: [Spambayes-checkins] spambayes/spambayes ImageStripper.py,1.7,1.8 Message-ID: <20061105193423.1A5041E4007@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv13287 Modified Files: ImageStripper.py Log Message: Bug(or feature?) in ocrad keeps it from emitting an export file when the -s flag is used. Just count the number of lines in the output instead. Index: ImageStripper.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ImageStripper.py,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** ImageStripper.py 5 Nov 2006 19:13:26 -0000 1.7 --- ImageStripper.py 5 Nov 2006 19:34:16 -0000 1.8 *************** *** 175,181 **** def extract_ocr_info(self, pnmfiles): - fd, orf = tempfile.mkstemp() - os.close(fd) - textbits = [] tokens = Set() --- 175,178 ---- *************** *** 189,209 **** else: self.misses += 1 ! ocr = os.popen("%s -s %s -c %s -x %s -f %s 2>%s" % (find_program("ocrad"), scale, charset, ! orf, pnmfile, os.path.devnull)) ctext = ocr.read().lower() ocr.close() ctokens = set() ! for line in open(orf): ! if line.startswith("lines"): ! nlines = int(line.split()[1]) ! if nlines: ! ctokens.add("image-text-lines:%d" % ! int(log2(nlines))) self.cache[fhash] = (ctext, ctokens) textbits.append(ctext) tokens |= ctokens os.unlink(pnmfile) - os.unlink(orf) return "\n".join(textbits), tokens --- 186,202 ---- else: self.misses += 1 ! ocr = os.popen("%s -s %s -c %s -f %s 2>%s" % (find_program("ocrad"), scale, charset, ! pnmfile, os.path.devnull)) ctext = ocr.read().lower() ocr.close() ctokens = set() ! nlines = len(ctext.strip().split("\n")) ! if nlines: ! ctokens.add("image-text-lines:%d" % int(log2(nlines))) self.cache[fhash] = (ctext, ctokens) textbits.append(ctext) tokens |= ctokens os.unlink(pnmfile) return "\n".join(textbits), tokens From montanaro at users.sourceforge.net Mon Nov 6 15:22:38 2006 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon, 06 Nov 2006 06:22:38 -0800 Subject: [Spambayes-checkins] spambayes/spambayes ImageStripper.py,1.8,1.9 Message-ID: <20061106142242.4BB4F1E400A@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv20307 Modified Files: ImageStripper.py Log Message: Quote the pnmfile. while tempfile.mkstemp() is called with no arguments, the user could still set any of these environment variables to affect the directory in which the temporary file is to be created: TMPDIR, TEMP, TMP. In theory, those environment variables could contain a directory name containing whitespace. Index: ImageStripper.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ImageStripper.py,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** ImageStripper.py 5 Nov 2006 19:34:16 -0000 1.8 --- ImageStripper.py 6 Nov 2006 14:22:30 -0000 1.9 *************** *** 186,190 **** else: self.misses += 1 ! ocr = os.popen("%s -s %s -c %s -f %s 2>%s" % (find_program("ocrad"), scale, charset, pnmfile, os.path.devnull)) --- 186,190 ---- else: self.misses += 1 ! ocr = os.popen("%s -s %s -c %s -f '%s' 2>%s" % (find_program("ocrad"), scale, charset, pnmfile, os.path.devnull)) From montanaro at users.sourceforge.net Mon Nov 6 15:50:39 2006 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon, 06 Nov 2006 06:50:39 -0800 Subject: [Spambayes-checkins] spambayes/spambayes ImageStripper.py, 1.9, 1.10 Message-ID: <20061106145044.6E1401E400A@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv18175 Modified Files: ImageStripper.py Log Message: Stinkin' Windows... Index: ImageStripper.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ImageStripper.py,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** ImageStripper.py 6 Nov 2006 14:22:30 -0000 1.9 --- ImageStripper.py 6 Nov 2006 14:50:30 -0000 1.10 *************** *** 186,190 **** else: self.misses += 1 ! ocr = os.popen("%s -s %s -c %s -f '%s' 2>%s" % (find_program("ocrad"), scale, charset, pnmfile, os.path.devnull)) --- 186,190 ---- else: self.misses += 1 ! ocr = os.popen('%s -s %s -c %s -f "%s" 2>%s' % (find_program("ocrad"), scale, charset, pnmfile, os.path.devnull)) From montanaro at users.sourceforge.net Fri Nov 17 12:21:10 2006 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Fri, 17 Nov 2006 03:21:10 -0800 Subject: [Spambayes-checkins] spambayes/contrib mod_spambayes.py,1.4,1.5 Message-ID: <20061117112113.2EEC91E4016@bag.python.org> Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv22360 Modified Files: mod_spambayes.py Log Message: typo Index: mod_spambayes.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/contrib/mod_spambayes.py,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** mod_spambayes.py 12 Jan 2004 08:36:15 -0000 1.4 --- mod_spambayes.py 17 Nov 2006 11:21:04 -0000 1.5 *************** *** 12,16 **** from spambayes import hammie, Options, mboxutils ! bdf = Options.get_pathname_option("Storage", "persistent_storage_file") class SpambayesFilter(BufferAllFilter): --- 12,16 ---- from spambayes import hammie, Options, mboxutils ! dbf = Options.get_pathname_option("Storage", "persistent_storage_file") class SpambayesFilter(BufferAllFilter):