[Python-checkins] python/dist/src/Tools/i18n pygettext.py,1.21,1.22
loewis@users.sourceforge.net
loewis@users.sourceforge.net
Fri, 22 Nov 2002 00:36:56 -0800
Update of /cvsroot/python/python/dist/src/Tools/i18n
In directory sc8-pr-cvs1:/tmp/cvs-serv18615/Tools/i18n
Modified Files:
pygettext.py
Log Message:
Patch #494845: Support string concatenation, detect non-string data,
add globbing support, find modules by name instead of by file.
Index: pygettext.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Tools/i18n/pygettext.py,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** pygettext.py 27 Jul 2001 16:47:18 -0000 1.21
--- pygettext.py 22 Nov 2002 08:36:54 -0000 1.22
***************
*** 1,30 ****
#! /usr/bin/env python
# Originally written by Barry Warsaw <barry@zope.com>
#
# Minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
! """pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
! internationalization of C programs. Most of these tools are independent of
! the programming language and can be used from within Python programs. Martin
! von Loewis' work[1] helps considerably in this regard.
There's one problem though; xgettext is the program that scans source code
! looking for message strings, but it groks only C (or C++). Python introduces
! a few wrinkles, such as dual quoting characters, triple quoted strings, and
! raw strings. xgettext understands none of this.
! Enter pygettext, which uses Python's standard tokenize module to scan Python
! source code, generating .pot files identical to what GNU xgettext[2] generates
! for C and C++ code. From there, the standard GNU tools can be used.
! A word about marking Python strings as candidates for translation. GNU
! xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
! gettext_noop. But those can be a lot of text to include all over your code.
! C and C++ have a trick: they use the C preprocessor. Most internationalized C
! source includes a #define for gettext() to _() so that what has to be written
! in the source is much less. Thus these are both translatable strings:
gettext("Translatable String")
--- 1,48 ----
#! /usr/bin/env python
+ # -*- coding: iso-8859-1 -*-
# Originally written by Barry Warsaw <barry@zope.com>
#
# Minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
+ #
+ # 2002-11-22 Jürgen Hermann <jh@web.de>
+ # Added checks that _() only contains string literals, and
+ # command line args are resolved to module lists, i.e. you
+ # can now pass a filename, a module or package name, or a
+ # directory (including globbing chars, important for Win32).
+ # Made docstring fit in 80 chars wide displays using pydoc.
+ #
! # for selftesting
! try:
! import fintl
! _ = fintl.gettext
! except ImportError:
! _ = lambda s: s
!
! __doc__ = _("""pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
! internationalization of C programs. Most of these tools are independent of
! the programming language and can be used from within Python programs.
! Martin von Loewis' work[1] helps considerably in this regard.
There's one problem though; xgettext is the program that scans source code
! looking for message strings, but it groks only C (or C++). Python
! introduces a few wrinkles, such as dual quoting characters, triple quoted
! strings, and raw strings. xgettext understands none of this.
! Enter pygettext, which uses Python's standard tokenize module to scan
! Python source code, generating .pot files identical to what GNU xgettext[2]
! generates for C and C++ code. From there, the standard GNU tools can be
! used.
! A word about marking Python strings as candidates for translation. GNU
! xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
! and gettext_noop. But those can be a lot of text to include all over your
! code. C and C++ have a trick: they use the C preprocessor. Most
! internationalized C source includes a #define for gettext() to _() so that
! what has to be written in the source is much less. Thus these are both
! translatable strings:
gettext("Translatable String")
***************
*** 38,46 ****
[2] http://www.gnu.org/software/gettext/gettext.html
! NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
! where ever possible. However some options are still missing or are not fully
! implemented. Also, xgettext's use of command line switches with option
! arguments is broken, and in these cases, pygettext just defines additional
! switches.
Usage: pygettext [options] inputfile ...
--- 56,64 ----
[2] http://www.gnu.org/software/gettext/gettext.html
! NOTE: pygettext attempts to be option and feature compatible with GNU
! xgettext where ever possible. However some options are still missing or are
! not fully implemented. Also, xgettext's use of command line switches with
! option arguments is broken, and in these cases, pygettext just defines
! additional switches.
Usage: pygettext [options] inputfile ...
***************
*** 62,68 ****
-D
--docstrings
! Extract module, class, method, and function docstrings. These do not
! need to be wrapped in _() markers, and in fact cannot be for Python to
! consider them docstrings. (See also the -X option).
-h
--- 80,86 ----
-D
--docstrings
! Extract module, class, method, and function docstrings. These do
! not need to be wrapped in _() markers, and in fact cannot be for
! Python to consider them docstrings. (See also the -X option).
-h
***************
*** 136,140 ****
If `inputfile' is -, standard input is read.
! """
import os
--- 154,158 ----
If `inputfile' is -, standard input is read.
! """)
import os
***************
*** 142,156 ****
import time
import getopt
import tokenize
import operator
! # for selftesting
! try:
! import fintl
! _ = fintl.gettext
! except ImportError:
! def _(s): return s
!
! __version__ = '1.4'
default_keywords = ['_']
--- 160,168 ----
import time
import getopt
+ import token
import tokenize
import operator
! __version__ = '1.5'
default_keywords = ['_']
***************
*** 184,188 ****
def usage(code, msg=''):
! print >> sys.stderr, _(__doc__) % globals()
if msg:
print >> sys.stderr, msg
--- 196,200 ----
def usage(code, msg=''):
! print >> sys.stderr, __doc__ % globals()
if msg:
print >> sys.stderr, msg
***************
*** 243,246 ****
--- 255,355 ----
return s
+
+ def containsAny(str, set):
+ """ Check whether 'str' contains ANY of the chars in 'set'
+ """
+ return 1 in [c in str for c in set]
+
+
+ def _visit_pyfiles(list, dirname, names):
+ """ Helper for getFilesForName().
+ """
+ # get extension for python source files
+ if not globals().has_key('_py_ext'):
+ import imp
+ global _py_ext
+ _py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0]
+
+ # don't recurse into CVS directories
+ if 'CVS' in names:
+ names.remove('CVS')
+
+ # add all *.py files to list
+ list.extend(
+ [os.path.join(dirname, file)
+ for file in names
+ if os.path.splitext(file)[1] == _py_ext])
+
+
+ def _get_modpkg_path(dotted_name, pathlist=None):
+ """ Get the filesystem path for a module or a package.
+
+ Return the file system path to a file for a module,
+ and to a directory for a package. Return None if
+ the name is not found, or is a builtin or extension module.
+ """
+ import imp
+
+ # split off top-most name
+ parts = dotted_name.split('.', 1)
+
+ if len(parts) > 1:
+ # we have a dotted path, import top-level package
+ try:
+ file, pathname, description = imp.find_module(parts[0], pathlist)
+ if file: file.close()
+ except ImportError:
+ return None
+
+ # check if it's indeed a package
+ if description[2] == imp.PKG_DIRECTORY:
+ # recursively handle the remaining name parts
+ pathname = _get_modpkg_path(parts[1], [pathname])
+ else:
+ pathname = None
+ else:
+ # plain name
+ try:
+ file, pathname, description = imp.find_module(dotted_name, pathlist)
+ if file: file.close()
+ if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
+ pathname = None
+ except ImportError:
+ pathname = None
+
+ return pathname
+
+
+ def getFilesForName(name):
+ """ Get a list of module files for a filename, a module or package name,
+ or a directory.
+ """
+ import imp
+
+ if not os.path.exists(name):
+ # check for glob chars
+ if containsAny(name, "*?[]"):
+ import glob
+ files = glob.glob(name)
+ list = []
+ for file in files:
+ list.extend(getFilesForName(file))
+ return list
+
+ # try to find module or package
+ name = _get_modpkg_path(name)
+ if not name:
+ return []
+
+ if os.path.isdir(name):
+ # find all python files in directory
+ list = []
+ os.path.walk(name, _visit_pyfiles, list)
+ return list
+ elif os.path.exists(name):
+ # a single file
+ return [name]
+
+ return []
***************
*** 315,319 ****
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
! # TBD: should we warn if we seen anything else?
def __addentry(self, msg, lineno=None, isdocstring=0):
--- 424,433 ----
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
! elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
! token.NEWLINE, tokenize.NL]:
! # warn if we see anything else than STRING or whitespace
! print >>sys.stderr, _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') % {
! 'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno}
! self.__state = self.__waiting
def __addentry(self, msg, lineno=None, isdocstring=0):
***************
*** 496,499 ****
--- 610,622 ----
options.toexclude = []
+ # resolve args to module lists
+ expanded = []
+ for arg in args:
+ if arg == '-':
+ expanded.append(arg)
+ else:
+ expanded.extend(getFilesForName(arg))
+ args = expanded
+
# slurp through all the files
eater = TokenEater(options)
***************
*** 540,541 ****
--- 663,667 ----
# some more test strings
_(u'a unicode string')
+ _('*** Seen unexpected token "%(token)s"' % {'token': 'test'}) # this one creates a warning
+ _('more' 'than' 'one' 'string')
+