[Python-checkins] bpo-33453: Handle string type annotations in dataclasses. (GH-6768)

Eric V. Smith webhook-mailer at python.org
Tue May 15 22:44:30 EDT 2018


https://github.com/python/cpython/commit/2a7bacbd913cf2bf568b3c0f85a758946d3cf4e9
commit: 2a7bacbd913cf2bf568b3c0f85a758946d3cf4e9
branch: master
author: Eric V. Smith <ericvsmith at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2018-05-15T22:44:27-04:00
summary:

bpo-33453: Handle string type annotations in dataclasses. (GH-6768)

files:
A Lib/test/dataclass_module_1.py
A Lib/test/dataclass_module_1_str.py
A Lib/test/dataclass_module_2.py
A Lib/test/dataclass_module_2_str.py
A Misc/NEWS.d/next/Library/2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst
M Lib/dataclasses.py
M Lib/test/test_dataclasses.py

diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py
index 0f9041604a5a..c93aadc95e0a 100644
--- a/Lib/dataclasses.py
+++ b/Lib/dataclasses.py
@@ -1,3 +1,4 @@
+import re
 import sys
 import copy
 import types
@@ -187,6 +188,10 @@ def __repr__(self):
 # __init__.
 _POST_INIT_NAME = '__post_init__'
 
+# String regex that string annotations for ClassVar or InitVar must match.
+# Allows "identifier.identifier[" or "identifier[".
+# https://bugs.python.org/issue33453 for details.
+_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
 
 class _InitVarMeta(type):
     def __getitem__(self, params):
@@ -532,6 +537,80 @@ def _hash_fn(fields):
                       [f'return hash({self_tuple})'])
 
 
+def _is_classvar(a_type, typing):
+    if typing:
+        # This test uses a typing internal class, but it's the best
+        #  way to test if this is a ClassVar.
+        return (a_type is typing.ClassVar
+                or (type(a_type) is typing._GenericAlias
+                    and a_type.__origin__ is typing.ClassVar))
+
+
+def _is_initvar(a_type, dataclasses):
+    # The module we're checking against is the module we're
+    # currently in (dataclasses.py).
+    return a_type is dataclasses.InitVar
+
+
+def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
+    # Given a type annotation string, does it refer to a_type in
+    # a_module?  For example, when checking that annotation denotes a
+    # ClassVar, then a_module is typing, and a_type is
+    # typing.ClassVar.
+
+    # It's possible to look up a_module given a_type, but it involves
+    # looking in sys.modules (again!), and seems like a waste since
+    # the caller already knows a_module.
+
+    # - annotation is a string type annotation
+    # - cls is the class that this annotation was found in
+    # - a_module is the module we want to match
+    # - a_type is the type in that module we want to match
+    # - is_type_predicate is a function called with (obj, a_module)
+    #   that determines if obj is of the desired type.
+
+    # Since this test does not do a local namespace lookup (and
+    # instead only a module (global) lookup), there are some things it
+    # gets wrong.
+
+    # With string annotations, this will work:
+    #   CV = ClassVar
+    #   @dataclass
+    #   class C0:
+    #     cv0: CV
+
+    # But this will not:
+    #   @dataclass
+    #   class C1:
+    #     CV = ClassVar
+    #     cv1: CV
+
+    # In C1, the code in this function will look up "CV" in the module
+    # and not find it, so it will not consider cv1 as a ClassVar.
+    # This is a fairly obscure corner case, and the best way to fix it
+    # would be to eval() the string "CV" with the correct global and
+    # local namespaces.  However that would involve a eval() penalty
+    # for every single field of every dataclass that's defined.  It
+    # was judged not worth it.
+
+    match = _MODULE_IDENTIFIER_RE.match(annotation)
+    if match:
+        ns = None
+        module_name = match.group(1)
+        if not module_name:
+            # No module name, assume the class's module did
+            # "from dataclasses import InitVar".
+            ns = sys.modules.get(cls.__module__).__dict__
+        else:
+            # Look up module_name in the class's module.
+            module = sys.modules.get(cls.__module__)
+            if module and module.__dict__.get(module_name) is a_module:
+                ns = sys.modules.get(a_type.__module__).__dict__
+        if ns and is_type_predicate(ns.get(match.group(2)), a_module):
+            return True
+    return False
+
+
 def _get_field(cls, a_name, a_type):
     # Return a Field object for this field name and type.  ClassVars
     #  and InitVars are also returned, but marked as such (see
@@ -548,34 +627,54 @@ def _get_field(cls, a_name, a_type):
             default = MISSING
         f = field(default=default)
 
-    # Assume it's a normal field until proven otherwise.
-    f._field_type = _FIELD
-
     # Only at this point do we know the name and the type. Set them.
     f.name = a_name
     f.type = a_type
 
-    # If typing has not been imported, then it's impossible for
-    #  any annotation to be a ClassVar. So, only look for ClassVar
-    #  if typing has been imported.
+    # Assume it's a normal field until proven otherwise.  We're next
+    #  going to decide if it's a ClassVar or InitVar, everything else
+    #  is just a normal field.
+    f._field_type = _FIELD
+
+    # In addition to checking for actual types here, also check for
+    #  string annotations.  get_type_hints() won't always work for us
+    #  (see https://github.com/python/typing/issues/508 for example),
+    #  plus it's expensive and would require an eval for every stirng
+    #  annotation.  So, make a best effort to see if this is a
+    #  ClassVar or InitVar using regex's and checking that the thing
+    #  referenced is actually of the correct type.
+
+    # For the complete discussion, see https://bugs.python.org/issue33453
+
+    # If typing has not been imported, then it's impossible for any
+    #  annotation to be a ClassVar. So, only look for ClassVar if
+    #  typing has been imported by any module (not necessarily cls's
+    #  module).
     typing = sys.modules.get('typing')
-    if typing is not None:
+    if typing:
         # This test uses a typing internal class, but it's the best
         #  way to test if this is a ClassVar.
-        if (type(a_type) is typing._GenericAlias and
-                a_type.__origin__ is typing.ClassVar):
-            # This field is a ClassVar, so it's not a field.
+        if (_is_classvar(a_type, typing)
+            or (isinstance(f.type, str)
+                and _is_type(f.type, cls, typing, typing.ClassVar,
+                             _is_classvar))):
             f._field_type = _FIELD_CLASSVAR
 
+    # If the type is InitVar, or if it's a matching string annotation,
+    # then it's an InitVar.
     if f._field_type is _FIELD:
-        # Check if this is an InitVar.
-        if a_type is InitVar:
-            # InitVars are not fields, either.
+        # The module we're checking against is the module we're
+        # currently in (dataclasses.py).
+        dataclasses = sys.modules[__name__]
+        if (_is_initvar(a_type, dataclasses)
+            or (isinstance(f.type, str)
+                and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
+                             _is_initvar))):
             f._field_type = _FIELD_INITVAR
 
-    # Validations for fields.  This is delayed until now, instead of
-    # in the Field() constructor, since only here do we know the field
-    # name, which allows better error reporting.
+    # Validations for individual fields.  This is delayed until now,
+    # instead of in the Field() constructor, since only here do we
+    # know the field name, which allows for better error reporting.
 
     # Special restrictions for ClassVar and InitVar.
     if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
@@ -605,7 +704,6 @@ def _set_new_attribute(cls, name, value):
     return False
 
 
-
 # Decide if/how we're going to create a hash function.  Key is
 #  (unsafe_hash, eq, frozen, does-hash-exist).  Value is the action to
 #  take.  The common case is to do nothing, so instead of providing a
@@ -865,7 +963,7 @@ def fields(class_or_instance):
 
     # Might it be worth caching this, per class?
     try:
-        fields =  getattr(class_or_instance, _FIELDS)
+        fields = getattr(class_or_instance, _FIELDS)
     except AttributeError:
         raise TypeError('must be called with a dataclass type or instance')
 
diff --git a/Lib/test/dataclass_module_1.py b/Lib/test/dataclass_module_1.py
new file mode 100644
index 000000000000..87a33f8191d3
--- /dev/null
+++ b/Lib/test/dataclass_module_1.py
@@ -0,0 +1,32 @@
+#from __future__ import annotations
+USING_STRINGS = False
+
+# dataclass_module_1.py and dataclass_module_1_str.py are identical
+# except only the latter uses string annotations.
+
+import dataclasses
+import typing
+
+T_CV2 = typing.ClassVar[int]
+T_CV3 = typing.ClassVar
+
+T_IV2 = dataclasses.InitVar[int]
+T_IV3 = dataclasses.InitVar
+
+ at dataclasses.dataclass
+class CV:
+    T_CV4 = typing.ClassVar
+    cv0: typing.ClassVar[int] = 20
+    cv1: typing.ClassVar = 30
+    cv2: T_CV2
+    cv3: T_CV3
+    not_cv4: T_CV4  # When using string annotations, this field is not recognized as a ClassVar.
+
+ at dataclasses.dataclass
+class IV:
+    T_IV4 = dataclasses.InitVar
+    iv0: dataclasses.InitVar[int]
+    iv1: dataclasses.InitVar
+    iv2: T_IV2
+    iv3: T_IV3
+    not_iv4: T_IV4  # When using string annotations, this field is not recognized as an InitVar.
diff --git a/Lib/test/dataclass_module_1_str.py b/Lib/test/dataclass_module_1_str.py
new file mode 100644
index 000000000000..6de490b7ad78
--- /dev/null
+++ b/Lib/test/dataclass_module_1_str.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+USING_STRINGS = True
+
+# dataclass_module_1.py and dataclass_module_1_str.py are identical
+# except only the latter uses string annotations.
+
+import dataclasses
+import typing
+
+T_CV2 = typing.ClassVar[int]
+T_CV3 = typing.ClassVar
+
+T_IV2 = dataclasses.InitVar[int]
+T_IV3 = dataclasses.InitVar
+
+ at dataclasses.dataclass
+class CV:
+    T_CV4 = typing.ClassVar
+    cv0: typing.ClassVar[int] = 20
+    cv1: typing.ClassVar = 30
+    cv2: T_CV2
+    cv3: T_CV3
+    not_cv4: T_CV4  # When using string annotations, this field is not recognized as a ClassVar.
+
+ at dataclasses.dataclass
+class IV:
+    T_IV4 = dataclasses.InitVar
+    iv0: dataclasses.InitVar[int]
+    iv1: dataclasses.InitVar
+    iv2: T_IV2
+    iv3: T_IV3
+    not_iv4: T_IV4  # When using string annotations, this field is not recognized as an InitVar.
diff --git a/Lib/test/dataclass_module_2.py b/Lib/test/dataclass_module_2.py
new file mode 100644
index 000000000000..68fb733e2992
--- /dev/null
+++ b/Lib/test/dataclass_module_2.py
@@ -0,0 +1,32 @@
+#from __future__ import annotations
+USING_STRINGS = False
+
+# dataclass_module_2.py and dataclass_module_2_str.py are identical
+# except only the latter uses string annotations.
+
+from dataclasses import dataclass, InitVar
+from typing import ClassVar
+
+T_CV2 = ClassVar[int]
+T_CV3 = ClassVar
+
+T_IV2 = InitVar[int]
+T_IV3 = InitVar
+
+ at dataclass
+class CV:
+    T_CV4 = ClassVar
+    cv0: ClassVar[int] = 20
+    cv1: ClassVar = 30
+    cv2: T_CV2
+    cv3: T_CV3
+    not_cv4: T_CV4  # When using string annotations, this field is not recognized as a ClassVar.
+
+ at dataclass
+class IV:
+    T_IV4 = InitVar
+    iv0: InitVar[int]
+    iv1: InitVar
+    iv2: T_IV2
+    iv3: T_IV3
+    not_iv4: T_IV4  # When using string annotations, this field is not recognized as an InitVar.
diff --git a/Lib/test/dataclass_module_2_str.py b/Lib/test/dataclass_module_2_str.py
new file mode 100644
index 000000000000..b363d17c176c
--- /dev/null
+++ b/Lib/test/dataclass_module_2_str.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+USING_STRINGS = True
+
+# dataclass_module_2.py and dataclass_module_2_str.py are identical
+# except only the latter uses string annotations.
+
+from dataclasses import dataclass, InitVar
+from typing import ClassVar
+
+T_CV2 = ClassVar[int]
+T_CV3 = ClassVar
+
+T_IV2 = InitVar[int]
+T_IV3 = InitVar
+
+ at dataclass
+class CV:
+    T_CV4 = ClassVar
+    cv0: ClassVar[int] = 20
+    cv1: ClassVar = 30
+    cv2: T_CV2
+    cv3: T_CV3
+    not_cv4: T_CV4  # When using string annotations, this field is not recognized as a ClassVar.
+
+ at dataclass
+class IV:
+    T_IV4 = InitVar
+    iv0: InitVar[int]
+    iv1: InitVar
+    iv2: T_IV2
+    iv3: T_IV3
+    not_iv4: T_IV4  # When using string annotations, this field is not recognized as an InitVar.
diff --git a/Lib/test/test_dataclasses.py b/Lib/test/test_dataclasses.py
index 2c890a2cbe92..b251c04cb9a1 100755
--- a/Lib/test/test_dataclasses.py
+++ b/Lib/test/test_dataclasses.py
@@ -12,6 +12,9 @@
 from collections import deque, OrderedDict, namedtuple
 from functools import total_ordering
 
+import typing       # Needed for the string "typing.ClassVar[int]" to work as an annotation.
+import dataclasses  # Needed for the string "dataclasses.InitVar[int]" to work as an annotation.
+
 # Just any custom exception we can catch.
 class CustomError(Exception): pass
 
@@ -600,7 +603,6 @@ class C:
                 class C:
                     x: ClassVar[typ] = Subclass()
 
-
     def test_deliberately_mutable_defaults(self):
         # If a mutable default isn't in the known list of
         #  (list, dict, set), then it's okay.
@@ -924,14 +926,16 @@ class C:
             z: ClassVar[int] = 1000
             w: ClassVar[int] = 2000
             t: ClassVar[int] = 3000
+            s: ClassVar      = 4000
 
         c = C(5)
         self.assertEqual(repr(c), 'TestCase.test_class_var.<locals>.C(x=5, y=10)')
         self.assertEqual(len(fields(C)), 2)                 # We have 2 fields.
-        self.assertEqual(len(C.__annotations__), 5)         # And 3 ClassVars.
+        self.assertEqual(len(C.__annotations__), 6)         # And 4 ClassVars.
         self.assertEqual(c.z, 1000)
         self.assertEqual(c.w, 2000)
         self.assertEqual(c.t, 3000)
+        self.assertEqual(c.s, 4000)
         C.z += 1
         self.assertEqual(c.z, 1001)
         c = C(20)
@@ -939,6 +943,7 @@ class C:
         self.assertEqual(c.z, 1001)
         self.assertEqual(c.w, 2000)
         self.assertEqual(c.t, 3000)
+        self.assertEqual(c.s, 4000)
 
     def test_class_var_no_default(self):
         # If a ClassVar has no default value, it should not be set on the class.
@@ -2798,5 +2803,149 @@ class C:
         self.assertEqual(D.__set_name__.call_count, 1)
 
 
+class TestStringAnnotations(unittest.TestCase):
+    def test_classvar(self):
+        # Some expressions recognized as ClassVar really aren't.  But
+        #  if you're using string annotations, it's not an exact
+        #  science.
+        # These tests assume that both "import typing" and "from
+        # typing import *" have been run in this file.
+        for typestr in ('ClassVar[int]',
+                        'ClassVar [int]'
+                        ' ClassVar [int]',
+                        'ClassVar',
+                        ' ClassVar ',
+                        'typing.ClassVar[int]',
+                        'typing.ClassVar[str]',
+                        ' typing.ClassVar[str]',
+                        'typing .ClassVar[str]',
+                        'typing. ClassVar[str]',
+                        'typing.ClassVar [str]',
+                        'typing.ClassVar [ str]',
+
+                        # Not syntactically valid, but these will
+                        #  be treated as ClassVars.
+                        'typing.ClassVar.[int]',
+                        'typing.ClassVar+',
+                        ):
+            with self.subTest(typestr=typestr):
+                @dataclass
+                class C:
+                    x: typestr
+
+                # x is a ClassVar, so C() takes no args.
+                C()
+
+                # And it won't appear in the class's dict because it doesn't
+                # have a default.
+                self.assertNotIn('x', C.__dict__)
+
+    def test_isnt_classvar(self):
+        for typestr in ('CV',
+                        't.ClassVar',
+                        't.ClassVar[int]',
+                        'typing..ClassVar[int]',
+                        'Classvar',
+                        'Classvar[int]',
+                        'typing.ClassVarx[int]',
+                        'typong.ClassVar[int]',
+                        'dataclasses.ClassVar[int]',
+                        'typingxClassVar[str]',
+                        ):
+            with self.subTest(typestr=typestr):
+                @dataclass
+                class C:
+                    x: typestr
+
+                # x is not a ClassVar, so C() takes one arg.
+                self.assertEqual(C(10).x, 10)
+
+    def test_initvar(self):
+        # These tests assume that both "import dataclasses" and "from
+        #  dataclasses import *" have been run in this file.
+        for typestr in ('InitVar[int]',
+                        'InitVar [int]'
+                        ' InitVar [int]',
+                        'InitVar',
+                        ' InitVar ',
+                        'dataclasses.InitVar[int]',
+                        'dataclasses.InitVar[str]',
+                        ' dataclasses.InitVar[str]',
+                        'dataclasses .InitVar[str]',
+                        'dataclasses. InitVar[str]',
+                        'dataclasses.InitVar [str]',
+                        'dataclasses.InitVar [ str]',
+
+                        # Not syntactically valid, but these will
+                        #  be treated as InitVars.
+                        'dataclasses.InitVar.[int]',
+                        'dataclasses.InitVar+',
+                        ):
+            with self.subTest(typestr=typestr):
+                @dataclass
+                class C:
+                    x: typestr
+
+                # x is an InitVar, so doesn't create a member.
+                with self.assertRaisesRegex(AttributeError,
+                                            "object has no attribute 'x'"):
+                    C(1).x
+
+    def test_isnt_initvar(self):
+        for typestr in ('IV',
+                        'dc.InitVar',
+                        'xdataclasses.xInitVar',
+                        'typing.xInitVar[int]',
+                        ):
+            with self.subTest(typestr=typestr):
+                @dataclass
+                class C:
+                    x: typestr
+
+                # x is not an InitVar, so there will be a member x.
+                self.assertEqual(C(10).x, 10)
+
+    def test_classvar_module_level_import(self):
+        from . import dataclass_module_1
+        from . import dataclass_module_1_str
+        from . import dataclass_module_2
+        from . import dataclass_module_2_str
+
+        for m in (dataclass_module_1, dataclass_module_1_str,
+                  dataclass_module_2, dataclass_module_2_str,
+                  ):
+            with self.subTest(m=m):
+                # There's a difference in how the ClassVars are
+                # interpreted when using string annotations or
+                # not. See the imported modules for details.
+                if m.USING_STRINGS:
+                    c = m.CV(10)
+                else:
+                    c = m.CV()
+                self.assertEqual(c.cv0, 20)
+
+
+                # There's a difference in how the InitVars are
+                # interpreted when using string annotations or
+                # not. See the imported modules for details.
+                c = m.IV(0, 1, 2, 3, 4)
+
+                for field_name in ('iv0', 'iv1', 'iv2', 'iv3'):
+                    with self.subTest(field_name=field_name):
+                        with self.assertRaisesRegex(AttributeError, f"object has no attribute '{field_name}'"):
+                            # Since field_name is an InitVar, it's
+                            # not an instance field.
+                            getattr(c, field_name)
+
+                if m.USING_STRINGS:
+                    # iv4 is interpreted as a normal field.
+                    self.assertIn('not_iv4', c.__dict__)
+                    self.assertEqual(c.not_iv4, 4)
+                else:
+                    # iv4 is interpreted as an InitVar, so it
+                    # won't exist on the instance.
+                    self.assertNotIn('not_iv4', c.__dict__)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst b/Misc/NEWS.d/next/Library/2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst
new file mode 100644
index 000000000000..6595b1265a4e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst
@@ -0,0 +1,4 @@
+Fix dataclasses to work if using literal string type annotations or if using
+PEP 563 "Postponed Evaluation of Annotations".  Only specific string
+prefixes are detected for both ClassVar ("ClassVar" and "typing.ClassVar")
+and InitVar ("InitVar" and "dataclasses.InitVar").



More information about the Python-checkins mailing list