-
-
Notifications
You must be signed in to change notification settings - Fork 32.1k
gh-114087: Speed up dataclasses._asdict_inner #114088
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1a7bf3c
712dae3
ffc6e60
98965fa
d593af4
9f81e16
5ca7a6f
67eeeec
3d9278c
71e3033
1290c65
916715b
14abed8
75d7052
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1332,58 +1332,69 @@ class C: | |
|
||
|
||
def _asdict_inner(obj, dict_factory): | ||
if type(obj) in _ATOMIC_TYPES: | ||
obj_type = type(obj) | ||
if obj_type in _ATOMIC_TYPES: | ||
return obj | ||
elif _is_dataclass_instance(obj): | ||
# fast path for the common case | ||
elif hasattr(obj_type, _FIELDS): | ||
# dataclass instance: fast path for the common case | ||
if dict_factory is dict: | ||
return { | ||
f.name: _asdict_inner(getattr(obj, f.name), dict) | ||
for f in fields(obj) | ||
} | ||
else: | ||
result = [] | ||
for f in fields(obj): | ||
value = _asdict_inner(getattr(obj, f.name), dict_factory) | ||
result.append((f.name, value)) | ||
return dict_factory(result) | ||
elif isinstance(obj, tuple) and hasattr(obj, '_fields'): | ||
# obj is a namedtuple. Recurse into it, but the returned | ||
# object is another namedtuple of the same type. This is | ||
# similar to how other list- or tuple-derived classes are | ||
# treated (see below), but we just need to create them | ||
# differently because a namedtuple's __init__ needs to be | ||
# called differently (see bpo-34363). | ||
|
||
# I'm not using namedtuple's _asdict() | ||
# method, because: | ||
# - it does not recurse in to the namedtuple fields and | ||
# convert them to dicts (using dict_factory). | ||
# - I don't actually want to return a dict here. The main | ||
# use case here is json.dumps, and it handles converting | ||
# namedtuples to lists. Admittedly we're losing some | ||
# information here when we produce a json list instead of a | ||
# dict. Note that if we returned dicts here instead of | ||
# namedtuples, we could no longer call asdict() on a data | ||
# structure where a namedtuple was used as a dict key. | ||
|
||
return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) | ||
elif isinstance(obj, (list, tuple)): | ||
# Assume we can create an object of this type by passing in a | ||
# generator (which is not true for namedtuples, handled | ||
# above). | ||
return type(obj)(_asdict_inner(v, dict_factory) for v in obj) | ||
elif isinstance(obj, dict): | ||
if hasattr(type(obj), 'default_factory'): | ||
return dict_factory([ | ||
(f.name, _asdict_inner(getattr(obj, f.name), dict_factory)) | ||
for f in fields(obj) | ||
]) | ||
# handle the builtin types first for speed; subclasses handled below | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The implementation looks ok, but I am not really fond of the code duplication this introduces. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not concerned about this; there isn't duplication of boilerplate, just the essential differences between generating different types of values in the most efficient way. |
||
elif obj_type is list: | ||
return [_asdict_inner(v, dict_factory) for v in obj] | ||
elif obj_type is dict: | ||
return { | ||
_asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory) | ||
for k, v in obj.items() | ||
} | ||
elif obj_type is tuple: | ||
return tuple([_asdict_inner(v, dict_factory) for v in obj]) | ||
elif issubclass(obj_type, tuple): | ||
if hasattr(obj, '_fields'): | ||
# obj is a namedtuple. Recurse into it, but the returned | ||
# object is another namedtuple of the same type. This is | ||
# similar to how other list- or tuple-derived classes are | ||
# treated (see below), but we just need to create them | ||
# differently because a namedtuple's __init__ needs to be | ||
# called differently (see bpo-34363). | ||
|
||
# I'm not using namedtuple's _asdict() | ||
# method, because: | ||
# - it does not recurse in to the namedtuple fields and | ||
# convert them to dicts (using dict_factory). | ||
# - I don't actually want to return a dict here. The main | ||
# use case here is json.dumps, and it handles converting | ||
# namedtuples to lists. Admittedly we're losing some | ||
# information here when we produce a json list instead of a | ||
# dict. Note that if we returned dicts here instead of | ||
# namedtuples, we could no longer call asdict() on a data | ||
# structure where a namedtuple was used as a dict key. | ||
return obj_type(*[_asdict_inner(v, dict_factory) for v in obj]) | ||
else: | ||
return obj_type(_asdict_inner(v, dict_factory) for v in obj) | ||
elif issubclass(obj_type, dict): | ||
if hasattr(obj_type, 'default_factory'): | ||
# obj is a defaultdict, which has a different constructor from | ||
# dict as it requires the default_factory as its first arg. | ||
result = type(obj)(getattr(obj, 'default_factory')) | ||
result = obj_type(obj.default_factory) | ||
for k, v in obj.items(): | ||
result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) | ||
return result | ||
return type(obj)((_asdict_inner(k, dict_factory), | ||
_asdict_inner(v, dict_factory)) | ||
for k, v in obj.items()) | ||
return obj_type((_asdict_inner(k, dict_factory), | ||
_asdict_inner(v, dict_factory)) | ||
for k, v in obj.items()) | ||
elif issubclass(obj_type, list): | ||
# Assume we can create an object of this type by passing in a | ||
# generator | ||
return obj_type(_asdict_inner(v, dict_factory) for v in obj) | ||
else: | ||
return copy.deepcopy(obj) | ||
|
||
|
@@ -1416,11 +1427,10 @@ def _astuple_inner(obj, tuple_factory): | |
if type(obj) in _ATOMIC_TYPES: | ||
return obj | ||
elif _is_dataclass_instance(obj): | ||
result = [] | ||
for f in fields(obj): | ||
value = _astuple_inner(getattr(obj, f.name), tuple_factory) | ||
result.append(value) | ||
return tuple_factory(result) | ||
return tuple_factory([ | ||
_astuple_inner(getattr(obj, f.name), tuple_factory) | ||
for f in fields(obj) | ||
]) | ||
elif isinstance(obj, tuple) and hasattr(obj, '_fields'): | ||
# obj is a namedtuple. Recurse into it, but the returned | ||
# object is another namedtuple of the same type. This is | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Speed up ``dataclasses.asdict`` up to 1.35x. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change might make the case for atomic types a tiny bit slower, since we now have to assign to the variable
obj_type
. But it is faster for all other cases, so seems fine.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. This is mitigated by the fact that the first pass through
_asdict_inner
is always the dataclass instance itself, so thehasattr
speedup (removing a function call) somewhat offsets the obj_type slowdown.