Skip to content

Commit d5d9a71

Browse files
methanevstinner
andauthored
bpo-36346: array: Don't use deprecated APIs (GH-19653)
* Py_UNICODE -> wchar_t * Py_UNICODE -> unicode in Argument Clinic * PyUnicode_AsUnicode -> PyUnicode_AsWideCharString * Don't use "u#" format. Co-authored-by: Victor Stinner <[email protected]>
1 parent 6067d4b commit d5d9a71

File tree

4 files changed

+63
-55
lines changed

4 files changed

+63
-55
lines changed

Doc/library/array.rst

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ defined:
2222
+-----------+--------------------+-------------------+-----------------------+-------+
2323
| ``'B'`` | unsigned char | int | 1 | |
2424
+-----------+--------------------+-------------------+-----------------------+-------+
25-
| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) |
25+
| ``'u'`` | wchar_t | Unicode character | 2 | \(1) |
2626
+-----------+--------------------+-------------------+-----------------------+-------+
2727
| ``'h'`` | signed short | int | 2 | |
2828
+-----------+--------------------+-------------------+-----------------------+-------+
@@ -48,15 +48,16 @@ defined:
4848
Notes:
4949

5050
(1)
51-
The ``'u'`` type code corresponds to Python's obsolete unicode character
52-
(:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
53-
platform, it can be 16 bits or 32 bits.
51+
It can be 16 bits or 32 bits depending on the platform.
5452

55-
``'u'`` will be removed together with the rest of the :c:type:`Py_UNICODE`
56-
API.
53+
.. versionchanged:: 3.9
54+
``array('u')`` now uses ``wchar_t`` as C type instead of deprecated
55+
``Py_UNICODE``. This change doesn't affect to its behavior because
56+
``Py_UNICODE`` is alias of ``wchar_t`` since Python 3.3.
5757

5858
.. deprecated-removed:: 3.3 4.0
5959

60+
6061
The actual representation of values is determined by the machine architecture
6162
(strictly speaking, by the C implementation). The actual size can be accessed
6263
through the :attr:`itemsize` attribute.

Doc/whatsnew/3.9.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,12 @@ Changes in the Python API
786786
``PyCF_ALLOW_TOP_LEVEL_AWAIT`` was clashing with ``CO_FUTURE_DIVISION``.
787787
(Contributed by Batuhan Taskaya in :issue:`39562`)
788788

789+
* ``array('u')`` now uses ``wchar_t`` as C type instead of ``Py_UNICODE``.
790+
This change doesn't affect to its behavior because ``Py_UNICODE`` is alias
791+
of ``wchar_t`` since Python 3.3.
792+
(Contributed by Inada Naoki in :issue:`34538`.)
793+
794+
789795
CPython bytecode changes
790796
------------------------
791797

Modules/arraymodule.c

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -235,24 +235,31 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
235235
static PyObject *
236236
u_getitem(arrayobject *ap, Py_ssize_t i)
237237
{
238-
return PyUnicode_FromOrdinal(((Py_UNICODE *) ap->ob_item)[i]);
238+
return PyUnicode_FromOrdinal(((wchar_t *) ap->ob_item)[i]);
239239
}
240240

241241
static int
242242
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
243243
{
244-
Py_UNICODE *p;
245-
Py_ssize_t len;
246-
247-
if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
244+
PyObject *u;
245+
if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) {
248246
return -1;
249-
if (len != 1) {
247+
}
248+
249+
Py_ssize_t len = PyUnicode_AsWideChar(u, NULL, 0);
250+
if (len != 2) {
250251
PyErr_SetString(PyExc_TypeError,
251252
"array item must be unicode character");
252253
return -1;
253254
}
254-
if (i >= 0)
255-
((Py_UNICODE *)ap->ob_item)[i] = p[0];
255+
256+
wchar_t w;
257+
len = PyUnicode_AsWideChar(u, &w, 1);
258+
assert(len == 1);
259+
260+
if (i >= 0) {
261+
((wchar_t *)ap->ob_item)[i] = w;
262+
}
256263
return 0;
257264
}
258265

@@ -530,7 +537,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
530537

531538
DEFINE_COMPAREITEMS(b, signed char)
532539
DEFINE_COMPAREITEMS(BB, unsigned char)
533-
DEFINE_COMPAREITEMS(u, Py_UNICODE)
540+
DEFINE_COMPAREITEMS(u, wchar_t)
534541
DEFINE_COMPAREITEMS(h, short)
535542
DEFINE_COMPAREITEMS(HH, unsigned short)
536543
DEFINE_COMPAREITEMS(i, int)
@@ -548,7 +555,7 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long)
548555
static const struct arraydescr descriptors[] = {
549556
{'b', 1, b_getitem, b_setitem, b_compareitems, "b", 1, 1},
550557
{'B', 1, BB_getitem, BB_setitem, BB_compareitems, "B", 1, 0},
551-
{'u', sizeof(Py_UNICODE), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
558+
{'u', sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
552559
{'h', sizeof(short), h_getitem, h_setitem, h_compareitems, "h", 1, 1},
553560
{'H', sizeof(short), HH_getitem, HH_setitem, HH_compareitems, "H", 1, 0},
554561
{'i', sizeof(int), i_getitem, i_setitem, i_compareitems, "i", 1, 1},
@@ -1660,7 +1667,7 @@ array_array_tobytes_impl(arrayobject *self)
16601667
/*[clinic input]
16611668
array.array.fromunicode
16621669
1663-
ustr: Py_UNICODE(zeroes=True)
1670+
ustr: unicode
16641671
/
16651672
16661673
Extends this array with data from the unicode string ustr.
@@ -1671,25 +1678,28 @@ some other type.
16711678
[clinic start generated code]*/
16721679

16731680
static PyObject *
1674-
array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr,
1675-
Py_ssize_clean_t ustr_length)
1676-
/*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/
1681+
array_array_fromunicode_impl(arrayobject *self, PyObject *ustr)
1682+
/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/
16771683
{
1678-
char typecode;
1679-
1680-
typecode = self->ob_descr->typecode;
1681-
if (typecode != 'u') {
1684+
if (self->ob_descr->typecode != 'u') {
16821685
PyErr_SetString(PyExc_ValueError,
16831686
"fromunicode() may only be called on "
16841687
"unicode type arrays");
16851688
return NULL;
16861689
}
1687-
if (ustr_length > 0) {
1690+
1691+
Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0);
1692+
assert(ustr_length > 0);
1693+
if (ustr_length > 1) {
1694+
ustr_length--; /* trim trailing NUL character */
16881695
Py_ssize_t old_size = Py_SIZE(self);
1689-
if (array_resize(self, old_size + ustr_length) == -1)
1696+
if (array_resize(self, old_size + ustr_length) == -1) {
16901697
return NULL;
1691-
memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
1692-
ustr, ustr_length * sizeof(Py_UNICODE));
1698+
}
1699+
1700+
// must not fail
1701+
PyUnicode_AsWideChar(
1702+
ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length);
16931703
}
16941704

16951705
Py_RETURN_NONE;
@@ -1709,14 +1719,12 @@ static PyObject *
17091719
array_array_tounicode_impl(arrayobject *self)
17101720
/*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/
17111721
{
1712-
char typecode;
1713-
typecode = self->ob_descr->typecode;
1714-
if (typecode != 'u') {
1722+
if (self->ob_descr->typecode != 'u') {
17151723
PyErr_SetString(PyExc_ValueError,
17161724
"tounicode() may only be called on unicode type arrays");
17171725
return NULL;
17181726
}
1719-
return PyUnicode_FromWideChar((Py_UNICODE *) self->ob_item, Py_SIZE(self));
1727+
return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self));
17201728
}
17211729

17221730
/*[clinic input]
@@ -2675,30 +2683,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
26752683
Py_DECREF(v);
26762684
}
26772685
else if (initial != NULL && PyUnicode_Check(initial)) {
2678-
Py_UNICODE *ustr;
26792686
Py_ssize_t n;
2680-
2681-
ustr = PyUnicode_AsUnicode(initial);
2687+
wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n);
26822688
if (ustr == NULL) {
2683-
PyErr_NoMemory();
26842689
Py_DECREF(a);
26852690
return NULL;
26862691
}
26872692

2688-
n = PyUnicode_GET_DATA_SIZE(initial);
26892693
if (n > 0) {
26902694
arrayobject *self = (arrayobject *)a;
2691-
char *item = self->ob_item;
2692-
item = (char *)PyMem_Realloc(item, n);
2693-
if (item == NULL) {
2694-
PyErr_NoMemory();
2695-
Py_DECREF(a);
2696-
return NULL;
2697-
}
2698-
self->ob_item = item;
2699-
Py_SET_SIZE(self, n / sizeof(Py_UNICODE));
2700-
memcpy(item, ustr, n);
2701-
self->allocated = Py_SIZE(self);
2695+
// self->ob_item may be NULL but it is safe.
2696+
PyMem_Free(self->ob_item);
2697+
self->ob_item = (char *)ustr;
2698+
Py_SET_SIZE(self, n);
2699+
self->allocated = n;
27022700
}
27032701
}
27042702
else if (initial != NULL && array_Check(initial) && len > 0) {

Modules/clinic/arraymodule.c.h

Lines changed: 10 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)