[Python-checkins] python/dist/src/Lib gettext.py,1.15,1.16

Thu, 21 Nov 2002 13:45:34 -0800

Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv11193/Lib

Modified Files:
	gettext.py 
Log Message:
Patch #633547: Support plural forms. Do TODOs in test suite.

Index: gettext.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/gettext.py,v
retrieving revision 1.15
retrieving revision 1.16
diff -C2 -d -r1.15 -r1.16
*** gettext.py	14 Aug 2002 15:09:12 -0000	1.15
--- gettext.py	21 Nov 2002 21:45:32 -0000	1.16
***************
*** 33,36 ****
--- 33,38 ----
  # module.
  #
+ # J. David Ibanez implemented plural forms.
+ #
  # TODO:
  # - Lazy loading of .mo files.  Currently the entire catalog is loaded into
***************
*** 44,53 ****
  #   find this format documented anywhere.

! import os
! import sys
! import struct
! import copy
  from errno import ENOENT

  __all__ = ["bindtextdomain","textdomain","gettext","dgettext",
             "find","translation","install","Catalog"]
--- 46,54 ----
  #   find this format documented anywhere.

! 
! import copy, os, re, struct, sys
  from errno import ENOENT

+ 
  __all__ = ["bindtextdomain","textdomain","gettext","dgettext",
             "find","translation","install","Catalog"]
***************
*** 56,59 ****
--- 57,119 ----

+ def test(condition, true, false):
+     """
+     Implements the C expression:
+ 
+       condition ? true : false
+ 
+     Required to correctly interpret plural forms.
+     """
+     if condition:
+         return true
+     else:
+         return false
+ 
+ 
+ def c2py(plural):
+     """
+     Gets a C expression as used in PO files for plural forms and
+     returns a Python lambda function that implements an equivalent
+     expression.
+     """
+     # Security check, allow only the "n" identifier
+     from StringIO import StringIO
+     import token, tokenize
+     tokens = tokenize.generate_tokens(StringIO(plural).readline)
+     danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ]
+     if danger:
+         raise ValueError, 'dangerous expression'
+ 
+     # Replace some C operators by their Python equivalents
+     plural = plural.replace('&&', ' and ')
+     plural = plural.replace('||', ' or ')
+ 
+     expr = re.compile(r'\![^=]')
+     plural = expr.sub(' not ', plural)
+ 
+     # Regular expression and replacement function used to transform
+     # "a?b:c" to "test(a,b,c)".
+     expr = re.compile(r'(.*?)\?(.*?):(.*)')
+     def repl(x):
+         return "test(%s, %s, %s)" % (x.group(1), x.group(2),
+                                      expr.sub(repl, x.group(3)))
+ 
+     # Code to transform the plural expression, taking care of parentheses
+     stack = ['']
+     for c in plural:
+         if c == '(':
+             stack.append('')
+         elif c == ')':
+             if len(stack) == 0:
+                 raise ValueError, 'unbalanced parenthesis in plural form'
+             s = expr.sub(repl, stack.pop())
+             stack[-1] += '(%s)' % s
+         else:
+             stack[-1] += c
+     plural = expr.sub(repl, stack.pop())
+ 
+     return eval('lambda n: int(%s)' % plural)
+ 
+ 

  def _expand_lang(locale):
***************
*** 122,125 ****
--- 182,193 ----
          return message

+     def ngettext(self, msgid1, msgid2, n):
+         if self._fallback:
+             return self._fallback.ngettext(msgid1, msgid2, n)
+         if n == 1:
+             return msgid1
+         else:
+             return msgid2
+ 
      def ugettext(self, message):
          if self._fallback:
***************
*** 127,130 ****
--- 195,206 ----
          return unicode(message)

+     def ungettext(self, msgid1, msgid2, n):
+         if self._fallback:
+             return self._fallback.ungettext(msgid1, msgid2, n)
+         if n == 1:
+             return unicode(msgid1)
+         else:
+             return unicode(msgid2)
+ 
      def info(self):
          return self._info
***************
*** 170,175 ****
              tend = toff + tlen
              if mend < buflen and tend < buflen:
                  tmsg = buf[toff:tend]
!                 catalog[buf[moff:mend]] = tmsg
              else:
                  raise IOError(0, 'File is corrupt', filename)
--- 246,259 ----
              tend = toff + tlen
              if mend < buflen and tend < buflen:
+                 msg = buf[moff:mend]
                  tmsg = buf[toff:tend]
!                 if msg.find('\x00') >= 0:
!                     # Plural forms
!                     msgid1, msgid2 = msg.split('\x00')
!                     tmsg = tmsg.split('\x00')
!                     for i in range(len(tmsg)):
!                         catalog[(msgid1, i)] = tmsg[i]
!                 else:
!                     catalog[msg] = tmsg
              else:
                  raise IOError(0, 'File is corrupt', filename)
***************
*** 187,190 ****
--- 271,280 ----
                      if k == 'content-type':
                          self._charset = v.split('charset=')[1]
+                     elif k == 'plural-forms':
+                         v = v.split(';')
+ ##                        nplurals = v[0].split('nplurals=')[1]
+ ##                        nplurals = int(nplurals.strip())
+                         plural = v[1].split('plural=')[1]
+                         self.plural = c2py(plural)
              # advance to next entry in the seek tables
              masteridx += 8
***************
*** 199,202 ****
--- 289,305 ----
              return message

+ 
+     def ngettext(self, msgid1, msgid2, n):
+         try:
+             return self._catalog[(msgid1, self.plural(n))]
+         except KeyError:
+             if self._fallback:
+                 return self._fallback.ngettext(msgid1, msgid2, n)
+             if n == 1:
+                 return msgid1
+             else:
+                 return msgid2
+ 
+ 
      def ugettext(self, message):
          try:
***************
*** 209,212 ****
--- 312,327 ----

+     def ungettext(self, msgid1, msgid2, n):
+         try:
+             tmsg = self._catalog[(msgid1, self.plural(n))]
+         except KeyError:
+             if self._fallback:
+                 return self._fallback.ungettext(msgid1, msgid2, n)
+             if n == 1:
+                 tmsg = msgid1
+             else:
+                 tmsg = msgid2
+         return unicode(tmsg, self._charset)
+ 

  # Locate a .mo file using the gettext strategy
***************
*** 312,317 ****
--- 427,447 ----

+ def dngettext(domain, msgid1, msgid2, n):
+     try:
+         t = translation(domain, _localedirs.get(domain, None))
+     except IOError:
+         if n == 1:
+             return msgid1
+         else:
+             return msgid2
+     return t.ngettext(msgid1, msgid2, n)
+ 
+ 
  def gettext(message):
      return dgettext(_current_domain, message)
+ 
+ 
+ def ngettext(msgid1, msgid2, n):
+     return dngettext(_current_domain, msgid1, msgid2, n)