From 4e1a4e230e47cd27d7a3a0ba564aa43b6c2ab005 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Tue, 30 Nov 2021 09:53:16 -0500 Subject: [PATCH 1/8] Use unhashability as a proxy for mutability for default arguments. --- Lib/dataclasses.py | 5 ++-- Lib/test/test_dataclasses.py | 23 +++++++++++++++++++ .../2021-11-29-19-37-20.bpo-44674.NijWLt.rst | 3 +++ 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 8643589077a4a8..6024906ddb7fa8 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -808,8 +808,9 @@ def _get_field(cls, a_name, a_type, default_kw_only): raise TypeError(f'field {f.name} is a ClassVar but specifies ' 'kw_only') - # For real fields, disallow mutable defaults for known types. - if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)): + # For real fields, disallow mutable defaults. Use unhashable as a proxy + # indicator for mutability. + if f._field_type is _FIELD and f.default.__hash__ is None: raise ValueError(f'mutable default {type(f.default)} for field ' f'{f.name} is not allowed: use default_factory') diff --git a/Lib/test/test_dataclasses.py b/Lib/test/test_dataclasses.py index bcd004f4ec3aa2..6eefa82dd4837c 100644 --- a/Lib/test/test_dataclasses.py +++ b/Lib/test/test_dataclasses.py @@ -500,6 +500,29 @@ class C: self.assertNotEqual(C(3), C(4, 10)) self.assertNotEqual(C(3, 10), C(4, 10)) + def test_no_unhashable_default(self): + # See bpo-44674. + class Unhashable: + __hash__ = None + + class Hashable: + def __hash__(self): + return 0 + + with self.assertRaisesRegex(ValueError, + f'mutable default .* for field ' + 'a is not allowed'): + @dataclass + class A: + a: dict = {} + + with self.assertRaisesRegex(ValueError, + f'mutable default .* for field ' + 'a is not allowed'): + @dataclass + class A: + a: Any = Unhashable() + def test_hash_field_rules(self): # Test all 6 cases of: # hash=True/False/None diff --git a/Misc/NEWS.d/next/Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst b/Misc/NEWS.d/next/Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst new file mode 100644 index 00000000000000..c17496e1cf8e52 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst @@ -0,0 +1,3 @@ +Change how dataclasses disallows mutable default values. It used to use a +list of known types (list, dict, set). Now it disallows unhashable objects +to be defaults. It's using unhashability as a proxy for mutability. From 91732ea276759fd71afa25e1dd2e353b0772e447 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Tue, 30 Nov 2021 10:16:07 -0500 Subject: [PATCH 2/8] Document changed behavior. --- Doc/library/dataclasses.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index b06547074378f0..fc47b914a8a393 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -712,9 +712,9 @@ Mutable default values creation they also share this behavior. There is no general way for Data Classes to detect this condition. Instead, the :func:`dataclass` decorator will raise a :exc:`TypeError` if it - detects a default parameter of type ``list``, ``dict``, or ``set``. - This is a partial solution, but it does protect against many common - errors. + detects an unhashable default parameter. The assumption is that if + an parameter is unhashable, it is mutable. This is a partial + solution, but it does protect against many common errors. Using default factory functions is a way to create new instances of mutable types as default values for fields:: @@ -724,3 +724,9 @@ Mutable default values x: list = field(default_factory=list) assert D().x is not D().x + + .. versionchanged:: 3.11 + Instead of looking for and disallowing objects of type ``list``, + ``dict``, or ``set``, unhashable objects are now not allowed as + default values. Unhashability is used to approximate + mutability. From 0e1cbdf9c8619ae305aac9ee15e0853efb53e0b8 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Tue, 30 Nov 2021 20:14:33 -0500 Subject: [PATCH 3/8] Check __hash__ on the class, not the instance. --- Lib/dataclasses.py | 6 ++++-- Lib/test/test_dataclasses.py | 23 +++++++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 6024906ddb7fa8..bc6701aa45309b 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -809,8 +809,10 @@ def _get_field(cls, a_name, a_type, default_kw_only): 'kw_only') # For real fields, disallow mutable defaults. Use unhashable as a proxy - # indicator for mutability. - if f._field_type is _FIELD and f.default.__hash__ is None: + # indicator for mutability. Read the __hash__ attribute from the class, + # not the instance. + defaults_hash_fn = getattr(f.default.__class__, '__hash__') + if f._field_type is _FIELD and defaults_hash_fn is None: raise ValueError(f'mutable default {type(f.default)} for field ' f'{f.name} is not allowed: use default_factory') diff --git a/Lib/test/test_dataclasses.py b/Lib/test/test_dataclasses.py index 6eefa82dd4837c..940fbd21a60eb5 100644 --- a/Lib/test/test_dataclasses.py +++ b/Lib/test/test_dataclasses.py @@ -505,24 +505,27 @@ def test_no_unhashable_default(self): class Unhashable: __hash__ = None - class Hashable: - def __hash__(self): - return 0 - - with self.assertRaisesRegex(ValueError, - f'mutable default .* for field ' - 'a is not allowed'): + unhashable_re = 'mutable default .* for field a is not allowed' + with self.assertRaisesRegex(ValueError, unhashable_re): @dataclass class A: a: dict = {} - with self.assertRaisesRegex(ValueError, - f'mutable default .* for field ' - 'a is not allowed'): + with self.assertRaisesRegex(ValueError, unhashable_re): @dataclass class A: a: Any = Unhashable() + # Make sure that the machinery looking for hashability is using the + # class's __hash__, not the instance's __hash__. + with self.assertRaisesRegex(ValueError, unhashable_re): + unhashable = Unhashable() + # This shouldn't make the variable hashable. + unhashable.__hash__ = lambda: 0 + @dataclass + class A: + a: Any = unhashable + def test_hash_field_rules(self): # Test all 6 cases of: # hash=True/False/None From 016e2a96e3df09da3014c410f5e7ca794d2927e8 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Wed, 1 Dec 2021 01:47:25 -0500 Subject: [PATCH 4/8] Tweak documentation. --- Doc/library/dataclasses.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index fc47b914a8a393..3592429d06a4ce 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -713,8 +713,8 @@ Mutable default values for Data Classes to detect this condition. Instead, the :func:`dataclass` decorator will raise a :exc:`TypeError` if it detects an unhashable default parameter. The assumption is that if - an parameter is unhashable, it is mutable. This is a partial - solution, but it does protect against many common errors. + a value is unhashable, it is mutable. This is a partial solution, + but it does protect against many common errors. Using default factory functions is a way to create new instances of mutable types as default values for fields:: From 1254be0c469c59a9fad916617e5a66f2542ff104 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Wed, 1 Dec 2021 11:37:05 -0500 Subject: [PATCH 5/8] No need to use getattr, just read __hash__ directly from the class. --- Lib/dataclasses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index bc6701aa45309b..1c288396ee4a18 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -811,7 +811,7 @@ def _get_field(cls, a_name, a_type, default_kw_only): # For real fields, disallow mutable defaults. Use unhashable as a proxy # indicator for mutability. Read the __hash__ attribute from the class, # not the instance. - defaults_hash_fn = getattr(f.default.__class__, '__hash__') + defaults_hash_fn = f.default.__class__.__hash__ if f._field_type is _FIELD and defaults_hash_fn is None: raise ValueError(f'mutable default {type(f.default)} for field ' f'{f.name} is not allowed: use default_factory') From 024cd9b43ecceaeb742d115c705c4dbbb2332fed Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Wed, 1 Dec 2021 14:20:58 -0500 Subject: [PATCH 6/8] Ensure that hash-checking works with non-fields. --- Lib/dataclasses.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 1c288396ee4a18..f93fefbab3a355 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -811,10 +811,10 @@ def _get_field(cls, a_name, a_type, default_kw_only): # For real fields, disallow mutable defaults. Use unhashable as a proxy # indicator for mutability. Read the __hash__ attribute from the class, # not the instance. - defaults_hash_fn = f.default.__class__.__hash__ - if f._field_type is _FIELD and defaults_hash_fn is None: - raise ValueError(f'mutable default {type(f.default)} for field ' - f'{f.name} is not allowed: use default_factory') + if f._field_type is _FIELD: + if f.default.__class__.__hash__ is None: + raise ValueError(f'mutable default {type(f.default)} for field ' + f'{f.name} is not allowed: use default_factory') return f From 4346a40cad9a703d0d590e3756e6321796074f83 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Fri, 3 Dec 2021 18:31:58 -0500 Subject: [PATCH 7/8] Put credits in the News file. --- .../Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst b/Misc/NEWS.d/next/Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst index c17496e1cf8e52..79e7a08b3b1741 100644 --- a/Misc/NEWS.d/next/Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst +++ b/Misc/NEWS.d/next/Library/2021-11-29-19-37-20.bpo-44674.NijWLt.rst @@ -1,3 +1,6 @@ -Change how dataclasses disallows mutable default values. It used to use a -list of known types (list, dict, set). Now it disallows unhashable objects -to be defaults. It's using unhashability as a proxy for mutability. +Change how dataclasses disallows mutable default values. It used to +use a list of known types (list, dict, set). Now it disallows +unhashable objects to be defaults. It's using unhashability as a +proxy for mutability. Patch by Eric V. Smith, idea by Raymond +Hettinger. + From 174f65c9446511937b1f87817db5a5d02b179e72 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Sat, 4 Dec 2021 04:38:33 -0500 Subject: [PATCH 8/8] Simplify test. --- Lib/dataclasses.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index f93fefbab3a355..737ab82365127c 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -811,10 +811,9 @@ def _get_field(cls, a_name, a_type, default_kw_only): # For real fields, disallow mutable defaults. Use unhashable as a proxy # indicator for mutability. Read the __hash__ attribute from the class, # not the instance. - if f._field_type is _FIELD: - if f.default.__class__.__hash__ is None: - raise ValueError(f'mutable default {type(f.default)} for field ' - f'{f.name} is not allowed: use default_factory') + if f._field_type is _FIELD and f.default.__class__.__hash__ is None: + raise ValueError(f'mutable default {type(f.default)} for field ' + f'{f.name} is not allowed: use default_factory') return f