[Python-checkins] bpo-46307: Add string.Template.get_identifiers() method (GH-30493)

miss-islington webhook-mailer at python.org
Tue Jan 11 14:15:51 EST 2022


https://github.com/python/cpython/commit/dce642f24418c58e67fa31a686575c980c31dd37
commit: dce642f24418c58e67fa31a686575c980c31dd37
branch: main
author: Ben Kehoe <bkehoe at irobot.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2022-01-11T11:15:42-08:00
summary:

bpo-46307: Add string.Template.get_identifiers() method (GH-30493)



Add `string.Template.get_identifiers()` method that returns the identifiers within the template. By default, raises an error if it encounters an invalid identifier (like `substitute()`). The keyword-only argument `raise_on_invalid` can be set to `False` to ignore invalid identifiers (like `safe_substitute()`).

Automerge-Triggered-By: GH:warsaw

files:
A Misc/NEWS.d/next/Library/2022-01-10-07-51-43.bpo-46307.SKvOIY.rst
M Doc/library/string.rst
M Lib/string.py
M Lib/test/test_string.py

diff --git a/Doc/library/string.rst b/Doc/library/string.rst
index b27782f8d8e9b..9bc703e70cdaa 100644
--- a/Doc/library/string.rst
+++ b/Doc/library/string.rst
@@ -783,6 +783,22 @@ these rules.  The methods of :class:`Template` are:
       templates containing dangling delimiters, unmatched braces, or
       placeholders that are not valid Python identifiers.
 
+
+   .. method:: is_valid()
+
+      Returns false if the template has invalid placeholders that will cause
+      :meth:`substitute` to raise :exc:`ValueError`.
+
+      .. versionadded:: 3.11
+
+
+   .. method:: get_identifiers()
+
+      Returns a list of the valid identifiers in the template, in the order
+      they first appear, ignoring any invalid identifiers.
+
+      .. versionadded:: 3.11
+
    :class:`Template` instances also provide one public data attribute:
 
    .. attribute:: template
@@ -869,6 +885,9 @@ rule:
 * *invalid* -- This group matches any other delimiter pattern (usually a single
   delimiter), and it should appear last in the regular expression.
 
+The methods on this class will raise :exc:`ValueError` if the pattern matches
+the template without one of these named groups matching.
+
 
 Helper functions
 ----------------
diff --git a/Lib/string.py b/Lib/string.py
index 261789cc10a44..2eab6d4f595c4 100644
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -141,6 +141,35 @@ def convert(mo):
                              self.pattern)
         return self.pattern.sub(convert, self.template)
 
+    def is_valid(self):
+        for mo in self.pattern.finditer(self.template):
+            if mo.group('invalid') is not None:
+                return False
+            if (mo.group('named') is None
+                and mo.group('braced') is None
+                and mo.group('escaped') is None):
+                # If all the groups are None, there must be
+                # another group we're not expecting
+                raise ValueError('Unrecognized named group in pattern',
+                    self.pattern)
+        return True
+
+    def get_identifiers(self):
+        ids = []
+        for mo in self.pattern.finditer(self.template):
+            named = mo.group('named') or mo.group('braced')
+            if named is not None and named not in ids:
+                # add a named group only the first time it appears
+                ids.append(named)
+            elif (named is None
+                and mo.group('invalid') is None
+                and mo.group('escaped') is None):
+                # If all the groups are None, there must be
+                # another group we're not expecting
+                raise ValueError('Unrecognized named group in pattern',
+                    self.pattern)
+        return ids
+
 # Initialize Template.pattern.  __init_subclass__() is automatically called
 # only for subclasses, not for the Template class itself.
 Template.__init_subclass__()
diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py
index 0be28fdb609ea..824b89ad517c1 100644
--- a/Lib/test/test_string.py
+++ b/Lib/test/test_string.py
@@ -475,6 +475,57 @@ class PieDelims(Template):
         self.assertEqual(s.substitute(dict(who='tim', what='ham')),
                          'tim likes to eat a bag of ham worth $100')
 
+    def test_is_valid(self):
+        eq = self.assertEqual
+        s = Template('$who likes to eat a bag of ${what} worth $$100')
+        self.assertTrue(s.is_valid())
+
+        s = Template('$who likes to eat a bag of ${what} worth $100')
+        self.assertFalse(s.is_valid())
+
+        # if the pattern has an unrecognized capture group,
+        # it should raise ValueError like substitute and safe_substitute do
+        class BadPattern(Template):
+            pattern = r"""
+            (?P<badname>.*)                  |
+            (?P<escaped>@{2})                   |
+            @(?P<named>[_a-z][._a-z0-9]*)       |
+            @{(?P<braced>[_a-z][._a-z0-9]*)}    |
+            (?P<invalid>@)                      |
+            """
+        s = BadPattern('@bag.foo.who likes to eat a bag of @bag.what')
+        self.assertRaises(ValueError, s.is_valid)
+
+    def test_get_identifiers(self):
+        eq = self.assertEqual
+        raises = self.assertRaises
+        s = Template('$who likes to eat a bag of ${what} worth $$100')
+        ids = s.get_identifiers()
+        eq(ids, ['who', 'what'])
+
+        # repeated identifiers only included once
+        s = Template('$who likes to eat a bag of ${what} worth $$100; ${who} likes to eat a bag of $what worth $$100')
+        ids = s.get_identifiers()
+        eq(ids, ['who', 'what'])
+
+        # invalid identifiers are ignored
+        s = Template('$who likes to eat a bag of ${what} worth $100')
+        ids = s.get_identifiers()
+        eq(ids, ['who', 'what'])
+
+        # if the pattern has an unrecognized capture group,
+        # it should raise ValueError like substitute and safe_substitute do
+        class BadPattern(Template):
+            pattern = r"""
+            (?P<badname>.*)                  |
+            (?P<escaped>@{2})                   |
+            @(?P<named>[_a-z][._a-z0-9]*)       |
+            @{(?P<braced>[_a-z][._a-z0-9]*)}    |
+            (?P<invalid>@)                      |
+            """
+        s = BadPattern('@bag.foo.who likes to eat a bag of @bag.what')
+        self.assertRaises(ValueError, s.get_identifiers)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2022-01-10-07-51-43.bpo-46307.SKvOIY.rst b/Misc/NEWS.d/next/Library/2022-01-10-07-51-43.bpo-46307.SKvOIY.rst
new file mode 100644
index 0000000000000..6207c424ce9c0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-01-10-07-51-43.bpo-46307.SKvOIY.rst
@@ -0,0 +1 @@
+Add :meth:`string.Template.is_valid` and :meth:`string.Template.get_identifiers` methods.



More information about the Python-checkins mailing list