[Python-checkins] bpo-32222: Fix pygettext skipping docstrings for funcs with arg typehints (GH-4745)

Mon Feb 26 17:48:17 EST 2018

https://github.com/python/cpython/commit/eee72d4778a5513038edd5236cdd87ccce2bc60a
commit: eee72d4778a5513038edd5236cdd87ccce2bc60a
branch: master
author: Tobotimus <Tobotimus at users.noreply.github.com>
committer: Serhiy Storchaka <storchaka at gmail.com>
date: 2018-02-27T00:48:14+02:00
summary:

bpo-32222: Fix pygettext skipping docstrings for funcs with arg typehints (GH-4745)

files:
A Misc/NEWS.d/next/Tools-Demos/2017-12-07-20-51-20.bpo-32222.hPBcGT.rst
M Lib/test/test_tools/test_i18n.py
M Tools/i18n/pygettext.py

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index 5c28baea7241..3c8be27a57b4 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -3,6 +3,7 @@
 import os
 import sys
 import unittest
+import textwrap
 
 from test.support.script_helper import assert_python_ok
 from test.test_tools import skip_if_missing, toolsdir
@@ -28,6 +29,41 @@ def get_header(self, data):
             headers[key] = val.strip()
         return headers
 
+    def get_msgids(self, data):
+        """ utility: return all msgids in .po file as a list of strings """
+        msgids = []
+        reading_msgid = False
+        cur_msgid = []
+        for line in data.split('\n'):
+            if reading_msgid:
+                if line.startswith('"'):
+                    cur_msgid.append(line.strip('"'))
+                else:
+                    msgids.append('\n'.join(cur_msgid))
+                    cur_msgid = []
+                    reading_msgid = False
+                    continue
+            if line.startswith('msgid '):
+                line = line[len('msgid '):]
+                cur_msgid.append(line.strip('"'))
+                reading_msgid = True
+        else:
+            if reading_msgid:
+                msgids.append('\n'.join(cur_msgid))
+
+        return msgids
+
+    def extract_docstrings_from_str(self, module_content):
+        """ utility: return all msgids extracted from module_content """
+        filename = 'test_docstrings.py'
+        with temp_cwd(None) as cwd:
+            with open(filename, 'w') as fp:
+                fp.write(module_content)
+            assert_python_ok(self.script, '-D', filename)
+            with open('messages.pot') as fp:
+                data = fp.read()
+        return self.get_msgids(data)
+
     def test_header(self):
         """Make sure the required fields are in the header, according to:
            http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
@@ -72,3 +108,55 @@ def test_POT_Creation_Date(self):
 
             # This will raise if the date format does not exactly match.
             datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
+
+    def test_funcdocstring_annotated_args(self):
+        """ Test docstrings for functions with annotated args """
+        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        def foo(bar: str):
+            """doc"""
+        '''))
+        self.assertIn('doc', msgids)
+
+    def test_funcdocstring_annotated_return(self):
+        """ Test docstrings for functions with annotated return type """
+        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        def foo(bar) -> str:
+            """doc"""
+        '''))
+        self.assertIn('doc', msgids)
+
+    def test_funcdocstring_defvalue_args(self):
+        """ Test docstring for functions with default arg values """
+        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        def foo(bar=()):
+            """doc"""
+        '''))
+        self.assertIn('doc', msgids)
+
+    def test_funcdocstring_multiple_funcs(self):
+        """ Test docstring extraction for multiple functions combining
+        annotated args, annotated return types and default arg values
+        """
+        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        def foo1(bar: tuple=()) -> str:
+            """doc1"""
+
+        def foo2(bar: List[1:2]) -> (lambda x: x):
+            """doc2"""
+
+        def foo3(bar: 'func'=lambda x: x) -> {1: 2}:
+            """doc3"""
+        '''))
+        self.assertIn('doc1', msgids)
+        self.assertIn('doc2', msgids)
+        self.assertIn('doc3', msgids)
+
+    def test_classdocstring_early_colon(self):
+        """ Test docstring extraction for a class with colons occuring within
+        the parentheses.
+        """
+        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
+            """doc"""
+        '''))
+        self.assertIn('doc', msgids)
diff --git a/Misc/NEWS.d/next/Tools-Demos/2017-12-07-20-51-20.bpo-32222.hPBcGT.rst b/Misc/NEWS.d/next/Tools-Demos/2017-12-07-20-51-20.bpo-32222.hPBcGT.rst
new file mode 100644
index 000000000000..b0b4c5e9357c
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2017-12-07-20-51-20.bpo-32222.hPBcGT.rst
@@ -0,0 +1,3 @@
+Fix pygettext not extracting docstrings for functions with type annotated
+arguments.
+Patch by Toby Harradine.
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 8ef5ff8a3e72..0f0395a4fcab 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -320,6 +320,7 @@ def __init__(self, options):
         self.__lineno = -1
         self.__freshmodule = 1
         self.__curfile = None
+        self.__enclosurecount = 0
 
     def __call__(self, ttype, tstring, stup, etup, line):
         # dispatch
@@ -340,7 +341,7 @@ def __waiting(self, ttype, tstring, lineno):
                 elif ttype not in (tokenize.COMMENT, tokenize.NL):
                     self.__freshmodule = 0
                 return
-            # class docstring?
+            # class or func/method docstring?
             if ttype == tokenize.NAME and tstring in ('class', 'def'):
                 self.__state = self.__suiteseen
                 return
@@ -348,9 +349,15 @@ def __waiting(self, ttype, tstring, lineno):
             self.__state = self.__keywordseen
 
     def __suiteseen(self, ttype, tstring, lineno):
-        # ignore anything until we see the colon
-        if ttype == tokenize.OP and tstring == ':':
-            self.__state = self.__suitedocstring
+        # skip over any enclosure pairs until we see the colon
+        if ttype == tokenize.OP:
+            if tstring == ':' and self.__enclosurecount == 0:
+                # we see a colon and we're not in an enclosure: end of def
+                self.__state = self.__suitedocstring
+            elif tstring in '([{':
+                self.__enclosurecount += 1
+            elif tstring in ')]}':
+                self.__enclosurecount -= 1
 
     def __suitedocstring(self, ttype, tstring, lineno):
         # ignore any intervening noise