Skip to content

Commit db6d9a5

Browse files
authored
bpo-41431: Optimize dict_merge for copy (GH-21674)
1 parent 602a971 commit db6d9a5

File tree

3 files changed

+75
-34
lines changed

3 files changed

+75
-34
lines changed

Lib/test/test_ordered_dict.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -740,20 +740,21 @@ def test_sizeof_exact(self):
740740
size = support.calcobjsize
741741
check = self.check_sizeof
742742

743-
basicsize = size('nQ2P' + '3PnPn2P') + calcsize('2nP2n')
743+
basicsize = size('nQ2P' + '3PnPn2P')
744+
keysize = calcsize('2nP2n')
744745

745746
entrysize = calcsize('n2P')
746747
p = calcsize('P')
747748
nodesize = calcsize('Pn2P')
748749

749750
od = OrderedDict()
750-
check(od, basicsize + 8 + 5*entrysize) # 8byte indices + 8*2//3 * entry table
751+
check(od, basicsize) # 8byte indices + 8*2//3 * entry table
751752
od.x = 1
752-
check(od, basicsize + 8 + 5*entrysize)
753+
check(od, basicsize)
753754
od.update([(i, i) for i in range(3)])
754-
check(od, basicsize + 8*p + 8 + 5*entrysize + 3*nodesize)
755+
check(od, basicsize + keysize + 8*p + 8 + 5*entrysize + 3*nodesize)
755756
od.update([(i, i) for i in range(3, 10)])
756-
check(od, basicsize + 16*p + 16 + 10*entrysize + 10*nodesize)
757+
check(od, basicsize + keysize + 16*p + 16 + 10*entrysize + 10*nodesize)
757758

758759
check(od.keys(), size('P'))
759760
check(od.items(), size('P'))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Optimize ``dict_merge()`` for copying dict (e.g. ``dict(d)`` and
2+
``{}.update(d)``).

Objects/dictobject.c

Lines changed: 67 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -674,10 +674,11 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
674674
}
675675

676676

677-
static PyObject *
678-
clone_combined_dict(PyDictObject *orig)
677+
static PyDictKeysObject *
678+
clone_combined_dict_keys(PyDictObject *orig)
679679
{
680-
assert(PyDict_CheckExact(orig));
680+
assert(PyDict_Check(orig));
681+
assert(Py_TYPE(orig)->tp_iter == (getiterfunc)dict_iter);
681682
assert(orig->ma_values == NULL);
682683
assert(orig->ma_keys->dk_refcnt == 1);
683684

@@ -704,28 +705,14 @@ clone_combined_dict(PyDictObject *orig)
704705
}
705706
}
706707

707-
PyDictObject *new = (PyDictObject *)new_dict(keys, NULL);
708-
if (new == NULL) {
709-
/* In case of an error, `new_dict()` takes care of
710-
cleaning up `keys`. */
711-
return NULL;
712-
}
713-
new->ma_used = orig->ma_used;
714-
ASSERT_CONSISTENT(new);
715-
if (_PyObject_GC_IS_TRACKED(orig)) {
716-
/* Maintain tracking. */
717-
_PyObject_GC_TRACK(new);
718-
}
719-
720708
/* Since we copied the keys table we now have an extra reference
721709
in the system. Manually call increment _Py_RefTotal to signal that
722710
we have it now; calling dictkeys_incref would be an error as
723711
keys->dk_refcnt is already set to 1 (after memcpy). */
724712
#ifdef Py_REF_DEBUG
725713
_Py_RefTotal++;
726714
#endif
727-
728-
return (PyObject *)new;
715+
return keys;
729716
}
730717

731718
PyObject *
@@ -2527,12 +2514,45 @@ dict_merge(PyObject *a, PyObject *b, int override)
25272514
if (other == mp || other->ma_used == 0)
25282515
/* a.update(a) or a.update({}); nothing to do */
25292516
return 0;
2530-
if (mp->ma_used == 0)
2517+
if (mp->ma_used == 0) {
25312518
/* Since the target dict is empty, PyDict_GetItem()
25322519
* always returns NULL. Setting override to 1
25332520
* skips the unnecessary test.
25342521
*/
25352522
override = 1;
2523+
PyDictKeysObject *okeys = other->ma_keys;
2524+
2525+
// If other is clean, combined, and just allocated, just clone it.
2526+
if (other->ma_values == NULL &&
2527+
other->ma_used == okeys->dk_nentries &&
2528+
(okeys->dk_size == PyDict_MINSIZE ||
2529+
USABLE_FRACTION(okeys->dk_size/2) < other->ma_used)) {
2530+
PyDictKeysObject *keys = clone_combined_dict_keys(other);
2531+
if (keys == NULL) {
2532+
return -1;
2533+
}
2534+
2535+
dictkeys_decref(mp->ma_keys);
2536+
mp->ma_keys = keys;
2537+
if (mp->ma_values != NULL) {
2538+
if (mp->ma_values != empty_values) {
2539+
free_values(mp->ma_values);
2540+
}
2541+
mp->ma_values = NULL;
2542+
}
2543+
2544+
mp->ma_used = other->ma_used;
2545+
mp->ma_version_tag = DICT_NEXT_VERSION();
2546+
ASSERT_CONSISTENT(mp);
2547+
2548+
if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) {
2549+
/* Maintain tracking. */
2550+
_PyObject_GC_TRACK(mp);
2551+
}
2552+
2553+
return 0;
2554+
}
2555+
}
25362556
/* Do one big resize at the start, rather than
25372557
* incrementally resizing as we insert new items. Expect
25382558
* that there will be no (or few) overlapping keys.
@@ -2718,12 +2738,13 @@ PyDict_Copy(PyObject *o)
27182738
return (PyObject *)split_copy;
27192739
}
27202740

2721-
if (PyDict_CheckExact(mp) && mp->ma_values == NULL &&
2741+
if (Py_TYPE(mp)->tp_iter == (getiterfunc)dict_iter &&
2742+
mp->ma_values == NULL &&
27222743
(mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3))
27232744
{
27242745
/* Use fast-copy if:
27252746
2726-
(1) 'mp' is an instance of a subclassed dict; and
2747+
(1) type(mp) doesn't override tp_iter; and
27272748
27282749
(2) 'mp' is not a split-dict; and
27292750
@@ -2735,13 +2756,31 @@ PyDict_Copy(PyObject *o)
27352756
operations and copied after that. In cases like this, we defer to
27362757
PyDict_Merge, which produces a compacted copy.
27372758
*/
2738-
return clone_combined_dict(mp);
2759+
PyDictKeysObject *keys = clone_combined_dict_keys(mp);
2760+
if (keys == NULL) {
2761+
return NULL;
2762+
}
2763+
PyDictObject *new = (PyDictObject *)new_dict(keys, NULL);
2764+
if (new == NULL) {
2765+
/* In case of an error, `new_dict()` takes care of
2766+
cleaning up `keys`. */
2767+
return NULL;
2768+
}
2769+
2770+
new->ma_used = mp->ma_used;
2771+
ASSERT_CONSISTENT(new);
2772+
if (_PyObject_GC_IS_TRACKED(mp)) {
2773+
/* Maintain tracking. */
2774+
_PyObject_GC_TRACK(new);
2775+
}
2776+
2777+
return (PyObject *)new;
27392778
}
27402779

27412780
copy = PyDict_New();
27422781
if (copy == NULL)
27432782
return NULL;
2744-
if (PyDict_Merge(copy, o, 1) == 0)
2783+
if (dict_merge(copy, o, 1) == 0)
27452784
return copy;
27462785
Py_DECREF(copy);
27472786
return NULL;
@@ -3359,16 +3398,15 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
33593398
d = (PyDictObject *)self;
33603399

33613400
/* The object has been implicitly tracked by tp_alloc */
3362-
if (type == &PyDict_Type)
3401+
if (type == &PyDict_Type) {
33633402
_PyObject_GC_UNTRACK(d);
3403+
}
33643404

33653405
d->ma_used = 0;
33663406
d->ma_version_tag = DICT_NEXT_VERSION();
3367-
d->ma_keys = new_keys_object(PyDict_MINSIZE);
3368-
if (d->ma_keys == NULL) {
3369-
Py_DECREF(self);
3370-
return NULL;
3371-
}
3407+
dictkeys_incref(Py_EMPTY_KEYS);
3408+
d->ma_keys = Py_EMPTY_KEYS;
3409+
d->ma_values = empty_values;
33723410
ASSERT_CONSISTENT(d);
33733411
return self;
33743412
}

0 commit comments

Comments
 (0)