[Python-checkins] python/dist/src/Lib gettext.py,1.17,1.18

bwarsaw@users.sourceforge.net bwarsaw@users.sourceforge.net
Fri, 11 Apr 2003 11:36:07 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv24888

Modified Files:
	gettext.py 
Log Message:
>From http://mail.python.org/pipermail/i18n-sig/2003-April/001557.html

- Expose NullTranslations and GNUTranslations to __all__

- Set the default charset to iso-8859-1.  It used to be None, which
would cause problems with .ugettext() if the file had no charset
parameter.  Arguably, the po/mo file would be broken, but I still think
iso-8859-1 is a reasonable default.

- Add a "coerce" default argument to GNUTranslations's constructor.  The
reason for this is that in Zope, we want all msgids and msgstrs to be
Unicode.  For the latter, we could use .ugettext() but there isn't
currently a mechanism for Unicode-ifying msgids.

The plan then is that the charset parameter specifies the encoding for
both the msgids and msgstrs, and both are decoded to Unicode when read. 
For example, we might encode po files with utf-8. I think the GNU
gettext tools don't care.

Since this could potentially break code [*] that wants to use the
encoded interface .gettext(), the constructor flag is added, defaulting
to False.  Most code I suspect will want to set this to True and use
.ugettext().

- A few other minor changes from the Zope project, including asserting
that a zero-length msgid must have a Project-ID-Version header for it to
be counted as the metadata record.


Index: gettext.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/gettext.py,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** gettext.py	10 Mar 2003 16:01:43 -0000	1.17
--- gettext.py	11 Apr 2003 18:36:04 -0000	1.18
***************
*** 51,56 ****
  
  
! __all__ = ["bindtextdomain","textdomain","gettext","dgettext",
!            "find","translation","install","Catalog"]
  
  _default_localedir = os.path.join(sys.prefix, 'share', 'locale')
--- 51,58 ----
  
  
! __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
!            'find', 'translation', 'install', 'textdomain', 'bindtextdomain',
!            'dgettext', 'dngettext', 'gettext', 'ngettext',
!            ]
  
  _default_localedir = os.path.join(sys.prefix, 'share', 'locale')
***************
*** 171,175 ****
      def __init__(self, fp=None):
          self._info = {}
!         self._charset = None
          self._fallback = None
          if fp is not None:
--- 173,177 ----
      def __init__(self, fp=None):
          self._info = {}
!         self._charset = 'iso-8859-1'
          self._fallback = None
          if fp is not None:
***************
*** 227,230 ****
--- 229,238 ----
      BE_MAGIC = 0xde120495L
  
+     def __init__(self, fp=None, coerce=False):
+         # Set this attribute before calling the base class constructor, since
+         # the latter calls _parse() which depends on self._coerce.
+         self._coerce = coerce
+         NullTranslations.__init__(self, fp)
+ 
      def _parse(self, fp):
          """Override this method to support alternative .mo formats."""
***************
*** 261,274 ****
                      msgid1, msgid2 = msg.split('\x00')
                      tmsg = tmsg.split('\x00')
                      for i in range(len(tmsg)):
                          catalog[(msgid1, i)] = tmsg[i]
                  else:
                      catalog[msg] = tmsg
              else:
                  raise IOError(0, 'File is corrupt', filename)
              # See if we're looking at GNU .mo conventions for metadata
!             if mlen == 0:
                  # Catalog description
!                 for item in tmsg.split('\n'):
                      item = item.strip()
                      if not item:
--- 269,288 ----
                      msgid1, msgid2 = msg.split('\x00')
                      tmsg = tmsg.split('\x00')
+                     if self._coerce:
+                         msgid1 = unicode(msgid1, self._charset)
+                         tmsg = [unicode(x, self._charset) for x in tmsg]
                      for i in range(len(tmsg)):
                          catalog[(msgid1, i)] = tmsg[i]
                  else:
+                     if self._coerce:
+                         msg = unicode(msg, self._charset)
+                         tmsg = unicode(tmsg, self._charset)
                      catalog[msg] = tmsg
              else:
                  raise IOError(0, 'File is corrupt', filename)
              # See if we're looking at GNU .mo conventions for metadata
!             if mlen == 0 and tmsg.lower().startswith('project-id-version:'):
                  # Catalog description
!                 for item in tmsg.splitlines():
                      item = item.strip()
                      if not item:
***************
*** 298,302 ****
              return message
  
- 
      def ngettext(self, msgid1, msgid2, n):
          try:
--- 312,315 ----
***************
*** 310,323 ****
                  return msgid2
  
- 
      def ugettext(self, message):
!         try:
!             tmsg = self._catalog[message]
!         except KeyError:
              if self._fallback:
                  return self._fallback.ugettext(message)
              tmsg = message
!         return unicode(tmsg, self._charset)
! 
  
      def ungettext(self, msgid1, msgid2, n):
--- 323,337 ----
                  return msgid2
  
      def ugettext(self, message):
!         missing = object()
!         tmsg = self._catalog.get(message, missing)
!         if tmsg is missing:
              if self._fallback:
                  return self._fallback.ugettext(message)
              tmsg = message
!         if not self._coerce:
!             return unicode(tmsg, self._charset)
!         # The msgstr is already coerced to Unicode
!         return tmsg
  
      def ungettext(self, msgid1, msgid2, n):
***************
*** 331,335 ****
              else:
                  tmsg = msgid2
!         return unicode(tmsg, self._charset)
  
  
--- 345,352 ----
              else:
                  tmsg = msgid2
!         if not self._coerce:
!             return unicode(tmsg, self._charset)
!         # The msgstr is already coerced to Unicode
!         return tmsg