[Spambayes-checkins] spambayes/spambayes ImageStripper.py, 1.13, 1.14

Mark Hammond mhammond at users.sourceforge.net
Mon Mar 26 09:50:34 CEST 2007


Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv31720

Modified Files:
	ImageStripper.py 
Log Message:
* Refactor popen() usage into 1 place and print a warning if the ocr engine
  fails.
* In frozen builds on windows, only look in the 'bin' dir for OCR 
  executables, and use the short version of the path to the exe to avoid
  insane weirdness regarding quotes and popen


Index: ImageStripper.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/ImageStripper.py,v
retrieving revision 1.13
retrieving revision 1.14
diff -C2 -d -r1.13 -r1.14
*** ImageStripper.py	14 Feb 2007 00:53:22 -0000	1.13
--- ImageStripper.py	26 Mar 2007 07:50:31 -0000	1.14
***************
*** 73,79 ****
              # so add that directory to the path and make sure we
              # look for a file ending in ".exe".
!             # Put it at the *start* of the paths we search - who knows
!             # what else me may encounter in the wild!
!             path.insert(0, os.path.dirname(sys.executable))
          else:
              # a source build - for testing, allow it in SB package dir.
--- 73,86 ----
              # so add that directory to the path and make sure we
              # look for a file ending in ".exe".
!             if sys.frozen=="dll":
!                 import win32api
!                 sentinal = win32api.GetModuleFileName(sys.frozendllhandle)
!             else:
!                 sentinal = sys.executable
!             # os.popen() trying to quote both the program and argv[1] fails.
!             # So just use the short version.
!             # For the sake of safety, in a binary build we *only* look in
!             # our bin dir.
!             path=[win32api.GetShortPathName(os.path.dirname(sentinal))]
          else:
              # a source build - for testing, allow it in SB package dir.
***************
*** 236,263 ****
      program = property(get_program)
  
! class OCREngineOCRAD(OCRExecutableEngine):
!     engine_name = "ocrad"
  
      def extract_text(self, pnmfile):
          assert self.is_enabled(), "I'm not working!"
!         scale = options["Tokenizer", "ocrad_scale"] or 1
!         charset = options["Tokenizer", "ocrad_charset"]
!         ocr = os.popen('%s -s %s -c %s -f "%s" 2>%s' %
!                        (self.program, scale, charset,
!                         pnmfile, os.path.devnull))
          ret = ocr.read()
!         ocr.close()
          return ret
  
  class OCREngineGOCR(OCRExecutableEngine):
      engine_name="gocr"
  
!     def extract_text(self, pnmfile):
!         assert self.is_enabled(), "I'm not working!"
!         ocr = os.popen('%s "%s" 2>%s' %
!                        (self.program, pnmfile, os.path.devnull))
!         ret = ocr.read()
!         ocr.close()
!         return ret
  
  # This lists all engines, with the first listed that is enabled winning.
--- 243,275 ----
      program = property(get_program)
  
!     def get_command_line(self, pnmfile):
!         raise NotImplementedError, "base classes must override"
  
      def extract_text(self, pnmfile):
+         # Generically reads output from stdout.
          assert self.is_enabled(), "I'm not working!"
!         cmdline = self.get_command_line(pnmfile)
!         ocr = os.popen(cmdline)
          ret = ocr.read()
!         exit_code = ocr.close()
!         if exit_code:
!             print "warning:", self.engine_name, "failed with exit code", exit_code
!             print "command line was:", repr(cmdline)
          return ret
  
+ class OCREngineOCRAD(OCRExecutableEngine):
+     engine_name = "ocrad"
+ 
+     def get_command_line(self, pnmfile):
+         scale = options["Tokenizer", "ocrad_scale"] or 1
+         charset = options["Tokenizer", "ocrad_charset"]
+         return '%s -s %s -c %s -f "%s" 2>%s' % \
+                 (self.program, scale, charset, pnmfile, os.path.devnull)
+ 
  class OCREngineGOCR(OCRExecutableEngine):
      engine_name="gocr"
  
!     def get_command_line(self, pnmfile):
!         return '%s "%s" 2>%s' % (self.program, pnmfile, os.path.devnull)
  
  # This lists all engines, with the first listed that is enabled winning.



More information about the Spambayes-checkins mailing list