Skip to content

Commit a1ef98c

Browse files
Un-revert "bpo-40521: Per-interpreter interned strings (pythonGH-20085)".
This reverts commit 35d6540.
1 parent 1a4d1c1 commit a1ef98c

File tree

4 files changed

+30
-75
lines changed

4 files changed

+30
-75
lines changed

Include/internal/pycore_unicodeobject.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,21 @@ struct _Py_unicode_state {
5050
PyObject *latin1[256];
5151
struct _Py_unicode_fs_codec fs_codec;
5252

53+
/* This dictionary holds all interned unicode strings. Note that references
54+
to strings in this dictionary are *not* counted in the string's ob_refcnt.
55+
When the interned string reaches a refcnt of 0 the string deallocation
56+
function will delete the reference from this dictionary.
57+
58+
Another way to look at this is that to say that the actual reference
59+
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
60+
*/
61+
PyObject *interned;
62+
5363
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
5464
struct _Py_unicode_ids ids;
5565
};
5666

57-
extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
67+
extern void _PyUnicode_ClearInterned(PyInterpreterState *);
5868

5969

6070
#ifdef __cplusplus

Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst

Lines changed: 0 additions & 5 deletions
This file was deleted.

Objects/typeobject.c

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,6 @@ typedef struct PySlot_Offset {
5454
} PySlot_Offset;
5555

5656

57-
/* bpo-40521: Interned strings are shared by all subinterpreters */
58-
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
59-
# define INTERN_NAME_STRINGS
60-
#endif
61-
6257
/* alphabetical order */
6358
_Py_IDENTIFIER(__abstractmethods__);
6459
_Py_IDENTIFIER(__annotations__);
@@ -4033,7 +4028,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
40334028
if (name == NULL)
40344029
return -1;
40354030
}
4036-
#ifdef INTERN_NAME_STRINGS
40374031
if (!PyUnicode_CHECK_INTERNED(name)) {
40384032
PyUnicode_InternInPlace(&name);
40394033
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -4043,7 +4037,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
40434037
return -1;
40444038
}
40454039
}
4046-
#endif
40474040
}
40484041
else {
40494042
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -8431,17 +8424,10 @@ _PyTypes_InitSlotDefs(void)
84318424
for (slotdef *p = slotdefs; p->name; p++) {
84328425
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
84338426
assert(!p[1].name || p->offset <= p[1].offset);
8434-
#ifdef INTERN_NAME_STRINGS
84358427
p->name_strobj = PyUnicode_InternFromString(p->name);
84368428
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
84378429
return _PyStatus_NO_MEMORY();
84388430
}
8439-
#else
8440-
p->name_strobj = PyUnicode_FromString(p->name);
8441-
if (!p->name_strobj) {
8442-
return _PyStatus_NO_MEMORY();
8443-
}
8444-
#endif
84458431
}
84468432
slotdefs_initialized = 1;
84478433
return _PyStatus_OK();
@@ -8466,24 +8452,16 @@ update_slot(PyTypeObject *type, PyObject *name)
84668452
int offset;
84678453

84688454
assert(PyUnicode_CheckExact(name));
8469-
#ifdef INTERN_NAME_STRINGS
84708455
assert(PyUnicode_CHECK_INTERNED(name));
8471-
#endif
84728456

84738457
assert(slotdefs_initialized);
84748458
pp = ptrs;
84758459
for (p = slotdefs; p->name; p++) {
84768460
assert(PyUnicode_CheckExact(p->name_strobj));
84778461
assert(PyUnicode_CheckExact(name));
8478-
#ifdef INTERN_NAME_STRINGS
84798462
if (p->name_strobj == name) {
84808463
*pp++ = p;
84818464
}
8482-
#else
8483-
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
8484-
*pp++ = p;
8485-
}
8486-
#endif
84878465
}
84888466
*pp = NULL;
84898467
for (pp = ptrs; *pp; pp++) {

Objects/unicodeobject.c

Lines changed: 19 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -214,22 +214,6 @@ extern "C" {
214214
# define OVERALLOCATE_FACTOR 4
215215
#endif
216216

217-
/* bpo-40521: Interned strings are shared by all interpreters. */
218-
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
219-
# define INTERNED_STRINGS
220-
#endif
221-
222-
/* This dictionary holds all interned unicode strings. Note that references
223-
to strings in this dictionary are *not* counted in the string's ob_refcnt.
224-
When the interned string reaches a refcnt of 0 the string deallocation
225-
function will delete the reference from this dictionary.
226-
227-
Another way to look at this is that to say that the actual reference
228-
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
229-
*/
230-
#ifdef INTERNED_STRINGS
231-
static PyObject *interned = NULL;
232-
#endif
233217

234218
/* Forward declaration */
235219
static inline int
@@ -1966,21 +1950,20 @@ unicode_dealloc(PyObject *unicode)
19661950

19671951
case SSTATE_INTERNED_MORTAL:
19681952
{
1969-
#ifdef INTERNED_STRINGS
1953+
struct _Py_unicode_state *state = get_unicode_state();
19701954
/* Revive the dead object temporarily. PyDict_DelItem() removes two
19711955
references (key and value) which were ignored by
19721956
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19731957
to prevent calling unicode_dealloc() again. Adjust refcnt after
19741958
PyDict_DelItem(). */
19751959
assert(Py_REFCNT(unicode) == 0);
19761960
Py_SET_REFCNT(unicode, 3);
1977-
if (PyDict_DelItem(interned, unicode) != 0) {
1961+
if (PyDict_DelItem(state->interned, unicode) != 0) {
19781962
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19791963
NULL);
19801964
}
19811965
assert(Py_REFCNT(unicode) == 1);
19821966
Py_SET_REFCNT(unicode, 0);
1983-
#endif
19841967
break;
19851968
}
19861969

@@ -11359,13 +11342,11 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1135911342
if (PyUnicode_CHECK_INTERNED(left))
1136011343
return 0;
1136111344

11362-
#ifdef INTERNED_STRINGS
1136311345
assert(_PyUnicode_HASH(right_uni) != -1);
1136411346
Py_hash_t hash = _PyUnicode_HASH(left);
1136511347
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1136611348
return 0;
1136711349
}
11368-
#endif
1136911350

1137011351
return unicode_compare_eq(left, right_uni);
1137111352
}
@@ -15610,21 +15591,21 @@ PyUnicode_InternInPlace(PyObject **p)
1561015591
return;
1561115592
}
1561215593

15613-
#ifdef INTERNED_STRINGS
1561415594
if (PyUnicode_READY(s) == -1) {
1561515595
PyErr_Clear();
1561615596
return;
1561715597
}
1561815598

15619-
if (interned == NULL) {
15620-
interned = PyDict_New();
15621-
if (interned == NULL) {
15599+
struct _Py_unicode_state *state = get_unicode_state();
15600+
if (state->interned == NULL) {
15601+
state->interned = PyDict_New();
15602+
if (state->interned == NULL) {
1562215603
PyErr_Clear(); /* Don't leave an exception */
1562315604
return;
1562415605
}
1562515606
}
1562615607

15627-
PyObject *t = PyDict_SetDefault(interned, s, s);
15608+
PyObject *t = PyDict_SetDefault(state->interned, s, s);
1562815609
if (t == NULL) {
1562915610
PyErr_Clear();
1563015611
return;
@@ -15641,13 +15622,9 @@ PyUnicode_InternInPlace(PyObject **p)
1564115622
this. */
1564215623
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1564315624
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15644-
#else
15645-
// PyDict expects that interned strings have their hash
15646-
// (PyASCIIObject.hash) already computed.
15647-
(void)unicode_hash(s);
15648-
#endif
1564915625
}
1565015626

15627+
1565115628
void
1565215629
PyUnicode_InternImmortal(PyObject **p)
1565315630
{
@@ -15681,29 +15658,25 @@ PyUnicode_InternFromString(const char *cp)
1568115658
void
1568215659
_PyUnicode_ClearInterned(PyInterpreterState *interp)
1568315660
{
15684-
if (!_Py_IsMainInterpreter(interp)) {
15685-
// interned dict is shared by all interpreters
15686-
return;
15687-
}
15688-
15689-
if (interned == NULL) {
15661+
struct _Py_unicode_state *state = &interp->unicode;
15662+
if (state->interned == NULL) {
1569015663
return;
1569115664
}
15692-
assert(PyDict_CheckExact(interned));
15665+
assert(PyDict_CheckExact(state->interned));
1569315666

1569415667
/* Interned unicode strings are not forcibly deallocated; rather, we give
1569515668
them their stolen references back, and then clear and DECREF the
1569615669
interned dict. */
1569715670

1569815671
#ifdef INTERNED_STATS
1569915672
fprintf(stderr, "releasing %zd interned strings\n",
15700-
PyDict_GET_SIZE(interned));
15673+
PyDict_GET_SIZE(state->interned));
1570115674

1570215675
Py_ssize_t immortal_size = 0, mortal_size = 0;
1570315676
#endif
1570415677
Py_ssize_t pos = 0;
1570515678
PyObject *s, *ignored_value;
15706-
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
15679+
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
1570715680
assert(PyUnicode_IS_READY(s));
1570815681

1570915682
switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15734,8 +15707,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1573415707
mortal_size, immortal_size);
1573515708
#endif
1573615709

15737-
PyDict_Clear(interned);
15738-
Py_CLEAR(interned);
15710+
PyDict_Clear(state->interned);
15711+
Py_CLEAR(state->interned);
1573915712
}
1574015713

1574115714

@@ -16106,7 +16079,8 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1610616079
static inline int
1610716080
unicode_is_finalizing(void)
1610816081
{
16109-
return (interned == NULL);
16082+
struct _Py_unicode_state *state = get_unicode_state();
16083+
return (state->interned == NULL);
1611016084
}
1611116085
#endif
1611216086

@@ -16116,10 +16090,8 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1611616090
{
1611716091
struct _Py_unicode_state *state = &interp->unicode;
1611816092

16119-
if (_Py_IsMainInterpreter(interp)) {
16120-
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16121-
assert(interned == NULL);
16122-
}
16093+
// _PyUnicode_ClearInterned() must be called before
16094+
assert(state->interned == NULL);
1612316095

1612416096
_PyUnicode_FiniEncodings(&state->fs_codec);
1612516097

0 commit comments

Comments
 (0)