[Python-checkins] CVS: python/dist/src/Lib mimetypes.py,1.19,1.20

Barry Warsaw bwarsaw@users.sourceforge.net
Thu, 25 Oct 2001 14:49:20 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv31010/Lib

Modified Files:
	mimetypes.py 
Log Message:
Applying proposed patch for bug #474583, optional support for
non-standard but common types.  Including Martin's suggestion to add
rejected non-standard types from patch #438790.  Specifically,

guess_type(), guess_extension(): Both the functions and the methods
grow an optional "strict" flag, defaulting to true, which determines
whether to recognize non-standard, but commonly found types or not.

Also, I sorted, reformatted, and culled duplicates from the big
types_map dictionary.  Note that there are a few non-equivalent
duplicates (e.g. .cdf and .xls) for which the first will just get
thrown away.  I didn't remove those though.

Finally, use of the module as a script as grown the -l and -e options
to toggle strictness and to do guess_extension(), respectively.

Doc and unittest updates too.


Index: mimetypes.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/mimetypes.py,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** mimetypes.py	2001/09/07 16:49:12	1.19
--- mimetypes.py	2001/10/25 21:49:18	1.20
***************
*** 3,9 ****
  This module defines two useful functions:
  
! guess_type(url) -- guess the MIME type and encoding of a URL.
  
! guess_extension(type) -- guess the extension for a given MIME type.
  
  It also contains the following, for tuning the behavior:
--- 3,9 ----
  This module defines two useful functions:
  
! guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
  
! guess_extension(type, strict=1) -- guess the extension for a given MIME type.
  
  It also contains the following, for tuning the behavior:
***************
*** 22,25 ****
--- 22,35 ----
  read_mime_types(file) -- parse one file, return a dictionary or None
  
+ When run as a script, the following command line options are recognized:
+ 
+ Usage: mimetypes.py [options] type
+ Options:
+     --help / -h       -- print this message and exit
+     --lenient / -l    -- additionally search of some common, but non-standard
+                          types.
+     --extension / -e  -- guess extension instead of type
+ 
+ More than one type argument may be given.
  """
  
***************
*** 54,61 ****
          self.suffix_map = suffix_map.copy()
          self.types_map = types_map.copy()
          for name in filenames:
              self.read(name)
  
!     def guess_type(self, url):
          """Guess the type of a file based on its URL.
  
--- 64,72 ----
          self.suffix_map = suffix_map.copy()
          self.types_map = types_map.copy()
+         self.common_types = common_types.copy()
          for name in filenames:
              self.read(name)
  
!     def guess_type(self, url, strict=1):
          """Guess the type of a file based on its URL.
  
***************
*** 72,75 ****
--- 83,89 ----
          mapped to '.tar.gz'.  (This is table-driven too, using the
          dictionary suffix_map.)
+ 
+         Optional `strict' argument when false adds a bunch of commonly found,
+         but non-standard types.
          """
          scheme, url = urllib.splittype(url)
***************
*** 102,113 ****
              encoding = None
          types_map = self.types_map
          if types_map.has_key(ext):
              return types_map[ext], encoding
          elif types_map.has_key(ext.lower()):
              return types_map[ext.lower()], encoding
          else:
              return None, encoding
  
!     def guess_extension(self, type):
          """Guess the extension for a file based on its MIME type.
  
--- 116,134 ----
              encoding = None
          types_map = self.types_map
+         common_types = self.common_types
          if types_map.has_key(ext):
              return types_map[ext], encoding
          elif types_map.has_key(ext.lower()):
              return types_map[ext.lower()], encoding
+         elif strict:
+             return None, encoding
+         elif common_types.has_key(ext):
+             return common_types[ext], encoding
+         elif common_types.has_key(ext.lower()):
+             return common_types[ext.lower()], encoding
          else:
              return None, encoding
  
!     def guess_extension(self, type, strict=1):
          """Guess the extension for a file based on its MIME type.
  
***************
*** 118,121 ****
--- 139,145 ----
          guess_type().  If no extension can be guessed for `type', None
          is returned.
+ 
+         Optional `strict' argument when false adds a bunch of commonly found,
+         but non-standard types.
          """
          type = type.lower()
***************
*** 123,126 ****
--- 147,154 ----
              if type == stype:
                  return ext
+         if not strict:
+             for ext, stype in common_types.items():
+                 if type == stype:
+                     return ext
          return None
  
***************
*** 150,154 ****
  
  
! def guess_type(url):
      """Guess the type of a file based on its URL.
  
--- 178,182 ----
  
  
! def guess_type(url, strict=1):
      """Guess the type of a file based on its URL.
  
***************
*** 164,173 ****
      to ".tar.gz".  (This is table-driven too, using the dictionary
      suffix_map).
      """
      init()
!     return guess_type(url)
  
  
! def guess_extension(type):
      """Guess the extension for a file based on its MIME type.
  
--- 192,204 ----
      to ".tar.gz".  (This is table-driven too, using the dictionary
      suffix_map).
+ 
+     Optional `strict' argument when false adds a bunch of commonly found, but
+     non-standard types.
      """
      init()
!     return guess_type(url, strict)
  
  
! def guess_extension(type, strict=1):
      """Guess the extension for a file based on its MIME type.
  
***************
*** 177,188 ****
      MIME type `type' by guess_type().  If no extension can be guessed for
      `type', None is returned.
      """
      init()
!     return guess_extension(type)
  
  
  def init(files=None):
      global guess_extension, guess_type
!     global suffix_map, types_map, encodings_map
      global inited
      inited = 1
--- 208,222 ----
      MIME type `type' by guess_type().  If no extension can be guessed for
      `type', None is returned.
+ 
+     Optional `strict' argument when false adds a bunch of commonly found,
+     but non-standard types.
      """
      init()
!     return guess_extension(type, strict)
  
  
  def init(files=None):
      global guess_extension, guess_type
!     global suffix_map, types_map, encodings_map, common_types
      global inited
      inited = 1
***************
*** 198,201 ****
--- 232,236 ----
      guess_extension = db.guess_extension
      guess_type = db.guess_type
+     common_types = db.common_types
  
  
***************
*** 224,355 ****
  # http://www.isi.edu/in-notes/iana/assignments/media-types
  # or extensions, i.e. using the x- prefix
  types_map = {
!     '.a': 'application/octet-stream',
!     '.ai': 'application/postscript',
!     '.aif': 'audio/x-aiff',
!     '.aifc': 'audio/x-aiff',
!     '.aiff': 'audio/x-aiff',
!     '.au': 'audio/basic',
!     '.avi': 'video/x-msvideo',
!     '.bcpio': 'application/x-bcpio',
!     '.bin': 'application/octet-stream',
!     '.bmp': 'image/x-ms-bmp',
!     '.cdf': 'application/x-netcdf',
!     '.cpio': 'application/x-cpio',
!     '.csh': 'application/x-csh',
!     '.css': 'text/css',
!     '.dll': 'application/octet-stream',
!     '.doc': 'application/msword',
!     '.dvi': 'application/x-dvi',
!     '.exe': 'application/octet-stream',
!     '.eps': 'application/postscript',
!     '.etx': 'text/x-setext',
!     '.gif': 'image/gif',
!     '.gtar': 'application/x-gtar',
!     '.hdf': 'application/x-hdf',
!     '.htm': 'text/html',
!     '.html': 'text/html',
!     '.ief': 'image/ief',
!     '.jpe': 'image/jpeg',
!     '.jpeg': 'image/jpeg',
!     '.jpg': 'image/jpeg',
!     '.js': 'application/x-javascript',
!     '.latex': 'application/x-latex',
!     '.man': 'application/x-troff-man',
!     '.me': 'application/x-troff-me',
!     '.mif': 'application/x-mif',
!     '.mov': 'video/quicktime',
!     '.movie': 'video/x-sgi-movie',
!     '.mp2': 'audio/mpeg',
!     '.mp3': 'audio/mpeg',
!     '.mpe': 'video/mpeg',
!     '.mpeg': 'video/mpeg',
!     '.mpg': 'video/mpeg',
!     '.ms': 'application/x-troff-ms',
!     '.nc': 'application/x-netcdf',
!     '.o': 'application/octet-stream',
!     '.obj': 'application/octet-stream',
!     '.oda': 'application/oda',
!     '.pbm': 'image/x-portable-bitmap',
!     '.pdf': 'application/pdf',
!     '.pgm': 'image/x-portable-graymap',
!     '.pnm': 'image/x-portable-anymap',
!     '.png': 'image/png',
!     '.ppm': 'image/x-portable-pixmap',
!     '.ps': 'application/postscript',
!     '.py': 'text/x-python',
!     '.pyc': 'application/x-python-code',
!     '.pyo': 'application/x-python-code',
!     '.qt': 'video/quicktime',
!     '.ras': 'image/x-cmu-raster',
!     '.rgb': 'image/x-rgb',
!     '.rdf': 'application/xml',
!     '.roff': 'application/x-troff',
!     '.rtx': 'text/richtext',
!     '.sgm': 'text/x-sgml',
!     '.sgml': 'text/x-sgml',
!     '.sh': 'application/x-sh',
!     '.shar': 'application/x-shar',
!     '.snd': 'audio/basic',
!     '.so': 'application/octet-stream',
!     '.src': 'application/x-wais-source',
      '.sv4cpio': 'application/x-sv4cpio',
!     '.sv4crc': 'application/x-sv4crc',
!     '.t': 'application/x-troff',
!     '.tar': 'application/x-tar',
!     '.tcl': 'application/x-tcl',
!     '.tex': 'application/x-tex',
!     '.texi': 'application/x-texinfo',
      '.texinfo': 'application/x-texinfo',
!     '.tif': 'image/tiff',
!     '.tiff': 'image/tiff',
!     '.tr': 'application/x-troff',
!     '.tsv': 'text/tab-separated-values',
!     '.txt': 'text/plain',
!     '.ustar': 'application/x-ustar',
!     '.wav': 'audio/x-wav',
!     '.xbm': 'image/x-xbitmap',
!     '.xls': 'application/excel',
!     '.xml': 'text/xml',
!     '.xsl': 'application/xml',
!     '.xpm': 'image/x-xpixmap',
!     '.xwd': 'image/x-xwindowdump',
!     '.zip': 'application/zip',
!     '.mp3': 'audio/mpeg',
!     '.ra': 'audio/x-pn-realaudio',
!     '.pdf': 'application/pdf',
!     '.c': 'text/plain',
!     '.bat': 'text/plain',
!     '.h': 'text/plain',
!     '.pl': 'text/plain',
!     '.ksh': 'text/plain',
!     '.ram': 'application/x-pn-realaudio',
!     '.cdf': 'application/x-cdf',
!     '.doc': 'application/msword',
!     '.dot': 'application/msword',
!     '.wiz': 'application/msword',
!     '.xlb': 'application/vnd.ms-excel',
!     '.xls': 'application/vnd.ms-excel',
!     '.ppa': 'application/vnd.ms-powerpoint',
!     '.ppt': 'application/vnd.ms-powerpoint',
!     '.pps': 'application/vnd.ms-powerpoint',
!     '.pot': 'application/vnd.ms-powerpoint',
!     '.pwz': 'application/vnd.ms-powerpoint',
!     '.eml':   'message/rfc822',
!     '.nws':   'message/rfc822',
!     '.mht':   'message/rfc822',
!     '.mhtml': 'message/rfc822',
!     '.css': 'text/css',
!     '.p7c': 'application/pkcs7-mime',
!     '.p12': 'application/x-pkcs12',
!     '.pfx': 'application/x-pkcs12',
!     '.js':  'application/x-javascript',
!     '.m1v': 'video/mpeg',
!     '.mpa': 'video/mpeg',
!     '.vcf': 'text/x-vcard',
!     '.xml': 'text/xml',
      }
  
  if __name__ == '__main__':
      import sys
!     print guess_type(sys.argv[1])
--- 259,435 ----
  # http://www.isi.edu/in-notes/iana/assignments/media-types
  # or extensions, i.e. using the x- prefix
+ 
+ # If you add to these, please keep them sorted!
  types_map = {
!     '.a'      : 'application/octet-stream',
!     '.ai'     : 'application/postscript',
!     '.aif'    : 'audio/x-aiff',
!     '.aifc'   : 'audio/x-aiff',
!     '.aiff'   : 'audio/x-aiff',
!     '.au'     : 'audio/basic',
!     '.avi'    : 'video/x-msvideo',
!     '.bat'    : 'text/plain',
!     '.bcpio'  : 'application/x-bcpio',
!     '.bin'    : 'application/octet-stream',
!     '.bmp'    : 'image/x-ms-bmp',
!     '.c'      : 'text/plain',
!     # Duplicates :(
!     '.cdf'    : 'application/x-cdf',
!     '.cdf'    : 'application/x-netcdf',
!     '.cpio'   : 'application/x-cpio',
!     '.csh'    : 'application/x-csh',
!     '.css'    : 'text/css',
!     '.dll'    : 'application/octet-stream',
!     '.doc'    : 'application/msword',
!     '.dot'    : 'application/msword',
!     '.dvi'    : 'application/x-dvi',
!     '.eml'    : 'message/rfc822',
!     '.eps'    : 'application/postscript',
!     '.etx'    : 'text/x-setext',
!     '.exe'    : 'application/octet-stream',
!     '.gif'    : 'image/gif',
!     '.gtar'   : 'application/x-gtar',
!     '.h'      : 'text/plain',
!     '.hdf'    : 'application/x-hdf',
!     '.htm'    : 'text/html',
!     '.html'   : 'text/html',
!     '.ief'    : 'image/ief',
!     '.jpe'    : 'image/jpeg',
!     '.jpeg'   : 'image/jpeg',
!     '.jpg'    : 'image/jpeg',
!     '.js'     : 'application/x-javascript',
!     '.ksh'    : 'text/plain',
!     '.latex'  : 'application/x-latex',
!     '.m1v'    : 'video/mpeg',
!     '.man'    : 'application/x-troff-man',
!     '.me'     : 'application/x-troff-me',
!     '.mht'    : 'message/rfc822',
!     '.mhtml'  : 'message/rfc822',
!     '.mif'    : 'application/x-mif',
!     '.mov'    : 'video/quicktime',
!     '.movie'  : 'video/x-sgi-movie',
!     '.mp2'    : 'audio/mpeg',
!     '.mp3'    : 'audio/mpeg',
!     '.mpa'    : 'video/mpeg',
!     '.mpe'    : 'video/mpeg',
!     '.mpeg'   : 'video/mpeg',
!     '.mpg'    : 'video/mpeg',
!     '.ms'     : 'application/x-troff-ms',
!     '.nc'     : 'application/x-netcdf',
!     '.nws'    : 'message/rfc822',
!     '.o'      : 'application/octet-stream',
!     '.obj'    : 'application/octet-stream',
!     '.oda'    : 'application/oda',
!     '.p12'    : 'application/x-pkcs12',
!     '.p7c'    : 'application/pkcs7-mime',
!     '.pbm'    : 'image/x-portable-bitmap',
!     '.pdf'    : 'application/pdf',
!     '.pfx'    : 'application/x-pkcs12',
!     '.pgm'    : 'image/x-portable-graymap',
!     '.pl'     : 'text/plain',
!     '.png'    : 'image/png',
!     '.pnm'    : 'image/x-portable-anymap',
!     '.pot'    : 'application/vnd.ms-powerpoint',
!     '.ppa'    : 'application/vnd.ms-powerpoint',
!     '.ppm'    : 'image/x-portable-pixmap',
!     '.pps'    : 'application/vnd.ms-powerpoint',
!     '.ppt'    : 'application/vnd.ms-powerpoint',
!     '.ps'     : 'application/postscript',
!     '.pwz'    : 'application/vnd.ms-powerpoint',
!     '.py'     : 'text/x-python',
!     '.pyc'    : 'application/x-python-code',
!     '.pyo'    : 'application/x-python-code',
!     '.qt'     : 'video/quicktime',
!     '.ra'     : 'audio/x-pn-realaudio',
!     '.ram'    : 'application/x-pn-realaudio',
!     '.ras'    : 'image/x-cmu-raster',
!     '.rdf'    : 'application/xml',
!     '.rgb'    : 'image/x-rgb',
!     '.roff'   : 'application/x-troff',
!     '.rtx'    : 'text/richtext',
!     '.sgm'    : 'text/x-sgml',
!     '.sgml'   : 'text/x-sgml',
!     '.sh'     : 'application/x-sh',
!     '.shar'   : 'application/x-shar',
!     '.snd'    : 'audio/basic',
!     '.so'     : 'application/octet-stream',
!     '.src'    : 'application/x-wais-source',
      '.sv4cpio': 'application/x-sv4cpio',
!     '.sv4crc' : 'application/x-sv4crc',
!     '.t'      : 'application/x-troff',
!     '.tar'    : 'application/x-tar',
!     '.tcl'    : 'application/x-tcl',
!     '.tex'    : 'application/x-tex',
!     '.texi'   : 'application/x-texinfo',
      '.texinfo': 'application/x-texinfo',
!     '.tif'    : 'image/tiff',
!     '.tiff'   : 'image/tiff',
!     '.tr'     : 'application/x-troff',
!     '.tsv'    : 'text/tab-separated-values',
!     '.txt'    : 'text/plain',
!     '.ustar'  : 'application/x-ustar',
!     '.vcf'    : 'text/x-vcard',
!     '.wav'    : 'audio/x-wav',
!     '.wiz'    : 'application/msword',
!     '.xbm'    : 'image/x-xbitmap',
!     '.xlb'    : 'application/vnd.ms-excel',
!     # Duplicates :(
!     '.xls'    : 'application/excel',
!     '.xls'    : 'application/vnd.ms-excel',
!     '.xml'    : 'text/xml',
!     '.xpm'    : 'image/x-xpixmap',
!     '.xsl'    : 'application/xml',
!     '.xwd'    : 'image/x-xwindowdump',
!     '.zip'    : 'application/zip',
!     }
! 
! # These are non-standard types, commonly found in the wild.  They will only
! # match if strict=0 flag is given to the API methods.
! 
! # Please sort these too
! common_types = {
!     '.jpg' : 'image/jpg',
!     '.mid' : 'audio/midi',
!     '.midi': 'audio/midi',
!     '.pct' : 'image/pict',
!     '.pic' : 'image/pict',
!     '.pict': 'image/pict',
!     '.rtf' : 'application/rtf',
!     '.xul' : 'text/xul'
      }
  
+ 
+ def usage(code, msg=''):
+     print __doc__
+     if msg: print msg
+     sys.exit(code)
+ 
+ 
  if __name__ == '__main__':
      import sys
!     import getopt
! 
!     try:
!         opts, args = getopt.getopt(sys.argv[1:], 'hle',
!                                    ['help', 'lenient', 'extension'])
!     except getopt.error, msg:
!         usage(1, msg)
! 
!     strict = 1
!     extension = 0
!     for opt, arg in opts:
!         if opt in ('-h', '--help'):
!             usage(0)
!         elif opt in ('-l', '--lenient'):
!             strict = 0
!         elif opt in ('-e', '--extension'):
!             extension = 1
!     for gtype in args:
!         if extension:
!             guess = guess_extension(gtype, strict)
!             if not guess: print "I don't know anything about type", gtype
!             else: print guess
!         else:
!             guess, encoding = guess_type(gtype, strict)
!             if not guess: print "I don't know anything about type", gtype
!             else: print 'type:', guess, 'encoding:', encoding