From c0386340a9a66d3d695d629ef78929c856ecc16b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 16:48:00 +0200 Subject: [PATCH 01/16] Draft: adapt str.{start,end}swith to Argument Clinic --- Objects/clinic/unicodeobject.c.h | 104 ++++++++++++++++++++++++++++++- Objects/unicodeobject.c | 89 +++++++++++++------------- 2 files changed, 145 insertions(+), 48 deletions(-) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 3e5167d9242fe4..9ee37cb25f2905 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -1369,6 +1369,108 @@ unicode_zfill(PyObject *self, PyObject *arg) return return_value; } +PyDoc_STRVAR(unicode_startswith__doc__, +"startswith($self, prefix[, start[, end]], /)\n" +"--\n" +"\n" +"Return True if the string starts with the specified prefix, False otherwise.\n" +"\n" +" prefix\n" +" A string or a tuple of strings to try.\n" +" start\n" +" Optional start position. Default 0.\n" +" end\n" +" Optional stop position. Default PY_SSIZE_T_MAX."); + +#define UNICODE_STARTSWITH_METHODDEF \ + {"startswith", _PyCFunction_CAST(unicode_startswith), METH_FASTCALL, unicode_startswith__doc__}, + +static PyObject * +unicode_startswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, + Py_ssize_t end); + +static PyObject * +unicode_startswith(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *subobj; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + + if (!_PyArg_CheckPositional("startswith", nargs, 1, 3)) { + goto exit; + } + subobj = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[2], &end)) { + goto exit; + } +skip_optional: + return_value = unicode_startswith_impl(self, subobj, start, end); + +exit: + return return_value; +} + +PyDoc_STRVAR(unicode_endswith__doc__, +"endswith($self, prefix, start=None, end=None, /)\n" +"--\n" +"\n" +"Return True if the string ends with the specified prefix, False otherwise.\n" +"\n" +" prefix\n" +" A string or a tuple of strings to try.\n" +" start\n" +" Optional start position. Default 0.\n" +" end\n" +" Optional stop position. Default PY_SSIZE_T_MAX."); + +#define UNICODE_ENDSWITH_METHODDEF \ + {"endswith", _PyCFunction_CAST(unicode_endswith), METH_FASTCALL, unicode_endswith__doc__}, + +static PyObject * +unicode_endswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, + Py_ssize_t end); + +static PyObject * +unicode_endswith(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *subobj; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + + if (!_PyArg_CheckPositional("endswith", nargs, 1, 3)) { + goto exit; + } + subobj = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[2], &end)) { + goto exit; + } +skip_optional: + return_value = unicode_endswith_impl(self, subobj, start, end); + +exit: + return return_value; +} + PyDoc_STRVAR(unicode___format____doc__, "__format__($self, format_spec, /)\n" "--\n" @@ -1507,4 +1609,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=1aab29bab5201c78 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c2306767984a3a86 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e412af5f797e7a..d439463b02be7e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13021,30 +13021,30 @@ unicode_zfill_impl(PyObject *self, Py_ssize_t width) return u; } -PyDoc_STRVAR(startswith__doc__, - "S.startswith(prefix[, start[, end]]) -> bool\n\ -\n\ -Return True if S starts with the specified prefix, False otherwise.\n\ -With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position.\n\ -prefix can also be a tuple of strings to try."); +/*[clinic input] +@text_signature "($self, prefix[, start[, end]], /)" +str.startswith as unicode_startswith + + prefix as subobj: object + A string or a tuple of strings to try. + start: slice_index(accept={int, NoneType}, c_default='0') = None + Optional start position. Default 0. + end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None + Optional stop position. Default PY_SSIZE_T_MAX. + / + +Return True if the string starts with the specified prefix, False otherwise. +[clinic start generated code]*/ static PyObject * -unicode_startswith(PyObject *self, - PyObject *args) +unicode_startswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=4bd7cfd0803051d4 input=9c6ded2ba2e2f90b]*/ { - PyObject *subobj; - PyObject *substring; - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - int result; - - if (!asciilib_parse_args_finds("startswith", args, &subobj, &start, &end)) - return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { - substring = PyTuple_GET_ITEM(subobj, i); + PyObject *substring = PyTuple_GET_ITEM(subobj, i); if (!PyUnicode_Check(substring)) { PyErr_Format(PyExc_TypeError, "tuple for startswith must only contain str, " @@ -13052,9 +13052,10 @@ unicode_startswith(PyObject *self, Py_TYPE(substring)->tp_name); return NULL; } - result = tailmatch(self, substring, start, end, -1); - if (result == -1) + int result = tailmatch(self, substring, start, end, -1); + if (result < 0) { return NULL; + } if (result) { Py_RETURN_TRUE; } @@ -13065,40 +13066,32 @@ unicode_startswith(PyObject *self, if (!PyUnicode_Check(subobj)) { PyErr_Format(PyExc_TypeError, "startswith first arg must be str or " - "a tuple of str, not %.100s", Py_TYPE(subobj)->tp_name); + "a tuple of str, not %N", subobj); return NULL; } - result = tailmatch(self, subobj, start, end, -1); - if (result == -1) + int result = tailmatch(self, subobj, start, end, -1); + if (result < 0) { return NULL; + } return PyBool_FromLong(result); } -PyDoc_STRVAR(endswith__doc__, - "S.endswith(suffix[, start[, end]]) -> bool\n\ -\n\ -Return True if S ends with the specified suffix, False otherwise.\n\ -With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position.\n\ -suffix can also be a tuple of strings to try."); +/*[clinic input] +str.endswith as unicode_endswith = str.startswith + +Return True if the string ends with the specified prefix, False otherwise. +[clinic start generated code]*/ static PyObject * -unicode_endswith(PyObject *self, - PyObject *args) +unicode_endswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=cce6f8ceb0102ca9 input=aed4beb8024d9292]*/ { - PyObject *subobj; - PyObject *substring; - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - int result; - - if (!asciilib_parse_args_finds("endswith", args, &subobj, &start, &end)) - return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { - substring = PyTuple_GET_ITEM(subobj, i); + PyObject *substring = PyTuple_GET_ITEM(subobj, i); if (!PyUnicode_Check(substring)) { PyErr_Format(PyExc_TypeError, "tuple for endswith must only contain str, " @@ -13106,9 +13099,10 @@ unicode_endswith(PyObject *self, Py_TYPE(substring)->tp_name); return NULL; } - result = tailmatch(self, substring, start, end, +1); - if (result == -1) + int result = tailmatch(self, substring, start, end, +1); + if (result < 0) { return NULL; + } if (result) { Py_RETURN_TRUE; } @@ -13121,9 +13115,10 @@ unicode_endswith(PyObject *self, "a tuple of str, not %.100s", Py_TYPE(subobj)->tp_name); return NULL; } - result = tailmatch(self, subobj, start, end, +1); - if (result == -1) + int result = tailmatch(self, subobj, start, end, +1); + if (result < 0) { return NULL; + } return PyBool_FromLong(result); } @@ -13576,8 +13571,8 @@ static PyMethodDef unicode_methods[] = { UNICODE_SWAPCASE_METHODDEF UNICODE_TRANSLATE_METHODDEF UNICODE_UPPER_METHODDEF - {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__}, - {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__}, + UNICODE_STARTSWITH_METHODDEF + UNICODE_ENDSWITH_METHODDEF UNICODE_REMOVEPREFIX_METHODDEF UNICODE_REMOVESUFFIX_METHODDEF UNICODE_ISASCII_METHODDEF From a9d31723a519cb66a1e3e57a7dbab4dbdde6a953 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 17:17:19 +0200 Subject: [PATCH 02/16] Revert unneeded and malformed change --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d439463b02be7e..1cd3e60668010e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13066,7 +13066,7 @@ unicode_startswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, if (!PyUnicode_Check(subobj)) { PyErr_Format(PyExc_TypeError, "startswith first arg must be str or " - "a tuple of str, not %N", subobj); + "a tuple of str, not %.100s", Py_TYPE(subobj)->tp_name); return NULL; } int result = tailmatch(self, subobj, start, end, -1); From 2ce5be63844534f362476cf183ba8f38950acea3 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 17:23:17 +0200 Subject: [PATCH 03/16] Loosen error message tests --- Lib/test/string_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index cecf309dca9194..5ade7013328d63 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1513,9 +1513,9 @@ def test_find_etc_raise_correct_error_messages(self): x, None, None, None) self.assertRaisesRegex(TypeError, r'^count\(', s.count, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith, + self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith, + self.assertRaisesRegex(TypeError, r'^endswith\b', s.endswith, x, None, None, None) # issue #15534 From 6c95a0d66c786d4f0b5bd94bba27c0f370469dea Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 17:37:38 +0200 Subject: [PATCH 04/16] NEWS --- .../2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst new file mode 100644 index 00000000000000..96e14ea0c3b1bd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst @@ -0,0 +1,2 @@ +Improve the performance of :meth:`str.startswith` and :meth:`str.endswith` +by adapting them to the :c:macro:`METH_FASTCALL` calling convention. From 0ee5a28b695a056791caa8c8a9f8a42acbea1090 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 18:32:52 +0200 Subject: [PATCH 05/16] Adapt str.count --- Lib/test/string_tests.py | 2 +- Objects/clinic/unicodeobject.c.h | 55 ++++++++++- Objects/unicodeobject.c | 161 ++++++++++++++----------------- 3 files changed, 129 insertions(+), 89 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 5ade7013328d63..a188ee01d37302 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1511,7 +1511,7 @@ def test_find_etc_raise_correct_error_messages(self): x, None, None, None) self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^count\(', s.count, + self.assertRaisesRegex(TypeError, r'^count\b', s.count, x, None, None, None) self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith, x, None, None, None) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 9ee37cb25f2905..7fa5a8637fdf0f 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -136,6 +136,59 @@ unicode_center(PyObject *self, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(unicode_count__doc__, +"count($self, sub[, start[, end]], /)\n" +"--\n" +"\n" +"Return the number of non-overlapping occurrences of substring sub in string S[start:end]."); + +#define UNICODE_COUNT_METHODDEF \ + {"count", _PyCFunction_CAST(unicode_count), METH_FASTCALL, unicode_count__doc__}, + +static Py_ssize_t +unicode_count_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end); + +static PyObject * +unicode_count(PyObject *str, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *substr; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + Py_ssize_t _return_value; + + if (!_PyArg_CheckPositional("count", nargs, 1, 3)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("count", "argument 1", "str", args[0]); + goto exit; + } + substr = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[2], &end)) { + goto exit; + } +skip_optional: + _return_value = unicode_count_impl(str, substr, start, end); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(unicode_encode__doc__, "encode($self, /, encoding=\'utf-8\', errors=\'strict\')\n" "--\n" @@ -1609,4 +1662,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=c2306767984a3a86 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=6e0b7dda0329e54b input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1cd3e60668010e..a5389d99276409 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9194,75 +9194,6 @@ _PyUnicode_InsertThousandsGrouping( return count; } -static Py_ssize_t -unicode_count_impl(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end) -{ - assert(PyUnicode_Check(str)); - assert(PyUnicode_Check(substr)); - - Py_ssize_t result; - int kind1, kind2; - const void *buf1 = NULL, *buf2 = NULL; - Py_ssize_t len1, len2; - - kind1 = PyUnicode_KIND(str); - kind2 = PyUnicode_KIND(substr); - if (kind1 < kind2) - return 0; - - len1 = PyUnicode_GET_LENGTH(str); - len2 = PyUnicode_GET_LENGTH(substr); - ADJUST_INDICES(start, end, len1); - if (end - start < len2) - return 0; - - buf1 = PyUnicode_DATA(str); - buf2 = PyUnicode_DATA(substr); - if (kind2 != kind1) { - buf2 = unicode_askind(kind2, buf2, len2, kind1); - if (!buf2) - goto onError; - } - - // We don't reuse `anylib_count` here because of the explicit casts. - switch (kind1) { - case PyUnicode_1BYTE_KIND: - result = ucs1lib_count( - ((const Py_UCS1*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_2BYTE_KIND: - result = ucs2lib_count( - ((const Py_UCS2*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_4BYTE_KIND: - result = ucs4lib_count( - ((const Py_UCS4*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - default: - Py_UNREACHABLE(); - } - - assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); - if (kind2 != kind1) - PyMem_Free((void *)buf2); - - return result; - onError: - assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); - if (kind2 != kind1) - PyMem_Free((void *)buf2); - return -1; -} - Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, @@ -11149,29 +11080,85 @@ parse_args_finds_unicode(const char * function_name, PyObject *args, return 0; } -PyDoc_STRVAR(count__doc__, - "S.count(sub[, start[, end]]) -> int\n\ -\n\ -Return the number of non-overlapping occurrences of substring sub in\n\ -string S[start:end]. Optional arguments start and end are\n\ -interpreted as in slice notation."); +/*[clinic input] +@text_signature "($self, sub[, start[, end]], /)" +str.count as unicode_count -> Py_ssize_t -static PyObject * -unicode_count(PyObject *self, PyObject *args) + self as str: self + sub as substr: unicode + start: slice_index(accept={int, NoneType}, c_default='0') = None + end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None + / + +Return the number of non-overlapping occurrences of substring sub in string S[start:end]. +[clinic start generated code]*/ + +static Py_ssize_t +unicode_count_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=8fcc3aef0b18edbf input=9e91e81ffff6e356]*/ { - PyObject *substring = NULL; /* initialize to fix a compiler warning */ - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; + assert(PyUnicode_Check(str)); + assert(PyUnicode_Check(substr)); + Py_ssize_t result; + int kind1, kind2; + const void *buf1 = NULL, *buf2 = NULL; + Py_ssize_t len1, len2; - if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) - return NULL; + kind1 = PyUnicode_KIND(str); + kind2 = PyUnicode_KIND(substr); + if (kind1 < kind2) + return 0; - result = unicode_count_impl(self, substring, start, end); - if (result == -1) - return NULL; + len1 = PyUnicode_GET_LENGTH(str); + len2 = PyUnicode_GET_LENGTH(substr); + ADJUST_INDICES(start, end, len1); + if (end - start < len2) + return 0; - return PyLong_FromSsize_t(result); + buf1 = PyUnicode_DATA(str); + buf2 = PyUnicode_DATA(substr); + if (kind2 != kind1) { + buf2 = unicode_askind(kind2, buf2, len2, kind1); + if (!buf2) + goto onError; + } + + // We don't reuse `anylib_count` here because of the explicit casts. + switch (kind1) { + case PyUnicode_1BYTE_KIND: + result = ucs1lib_count( + ((const Py_UCS1*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_2BYTE_KIND: + result = ucs2lib_count( + ((const Py_UCS2*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_4BYTE_KIND: + result = ucs4lib_count( + ((const Py_UCS4*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + default: + Py_UNREACHABLE(); + } + + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); + if (kind2 != kind1) + PyMem_Free((void *)buf2); + + return result; + onError: + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); + if (kind2 != kind1) + PyMem_Free((void *)buf2); + return -1; } /*[clinic input] @@ -13553,7 +13540,7 @@ static PyMethodDef unicode_methods[] = { UNICODE_CASEFOLD_METHODDEF UNICODE_TITLE_METHODDEF UNICODE_CENTER_METHODDEF - {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, + UNICODE_COUNT_METHODDEF UNICODE_EXPANDTABS_METHODDEF {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, UNICODE_PARTITION_METHODDEF From ec4c400e84f1ec8a84a6b3a824a1cd34ceeaba0b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 18:36:53 +0200 Subject: [PATCH 06/16] Adapt str.find --- Objects/clinic/unicodeobject.c.h | 55 +++++++++++++++++++++++++++++++- Objects/unicodeobject.c | 37 +++++++-------------- 2 files changed, 65 insertions(+), 27 deletions(-) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 7fa5a8637fdf0f..8769cbcc61a3ad 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -354,6 +354,59 @@ unicode_expandtabs(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyOb return return_value; } +PyDoc_STRVAR(unicode_find__doc__, +"find($self, sub, start=None, end=None, /)\n" +"--\n" +"\n" +"Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end]."); + +#define UNICODE_FIND_METHODDEF \ + {"find", _PyCFunction_CAST(unicode_find), METH_FASTCALL, unicode_find__doc__}, + +static Py_ssize_t +unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end); + +static PyObject * +unicode_find(PyObject *str, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *substr; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + Py_ssize_t _return_value; + + if (!_PyArg_CheckPositional("find", nargs, 1, 3)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("find", "argument 1", "str", args[0]); + goto exit; + } + substr = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[2], &end)) { + goto exit; + } +skip_optional: + _return_value = unicode_find_impl(str, substr, start, end); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(unicode_isascii__doc__, "isascii($self, /)\n" "--\n" @@ -1662,4 +1715,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=6e0b7dda0329e54b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=85f7b2ab50398fb3 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a5389d99276409..1d38c10fe3045c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11269,33 +11269,18 @@ unicode_expandtabs_impl(PyObject *self, int tabsize) return NULL; } -PyDoc_STRVAR(find__doc__, - "S.find(sub[, start[, end]]) -> int\n\ -\n\ -Return the lowest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -unicode_find(PyObject *self, PyObject *args) -{ - /* initialize variables to prevent gcc warning */ - PyObject *substring = NULL; - Py_ssize_t start = 0; - Py_ssize_t end = 0; - Py_ssize_t result; - - if (!parse_args_finds_unicode("find", args, &substring, &start, &end)) - return NULL; - - result = any_find_slice(self, substring, start, end, 1); +/*[clinic input] +str.find as unicode_find = str.count - if (result == -2) - return NULL; +Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end]. +[clinic start generated code]*/ - return PyLong_FromSsize_t(result); +static Py_ssize_t +unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=51dbe6255712e278 input=37e8a66191930f45]*/ +{ + return any_find_slice(str, substr, start, end, 1); } static PyObject * @@ -13542,7 +13527,7 @@ static PyMethodDef unicode_methods[] = { UNICODE_CENTER_METHODDEF UNICODE_COUNT_METHODDEF UNICODE_EXPANDTABS_METHODDEF - {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, + UNICODE_FIND_METHODDEF UNICODE_PARTITION_METHODDEF {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, UNICODE_LJUST_METHODDEF From 6f05fe6fb9f043a3985d929ce5a2dcf5cc0ff615 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 18:39:27 +0200 Subject: [PATCH 07/16] fixup! Adapt str.find --- Lib/test/string_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index a188ee01d37302..8907fec7cfef82 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1503,7 +1503,7 @@ def test_find_etc_raise_correct_error_messages(self): # issue 11828 s = 'hello' x = 'x' - self.assertRaisesRegex(TypeError, r'^find\(', s.find, + self.assertRaisesRegex(TypeError, r'^find\b', s.find, x, None, None, None) self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind, x, None, None, None) From 4c465708cdaf1c355886c41dd5caffb32ecd3142 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 18:53:27 +0200 Subject: [PATCH 08/16] Adapt str.index --- Lib/test/string_tests.py | 2 +- Objects/clinic/unicodeobject.c.h | 58 +++++++++++++++++++++++++++++++- Objects/unicodeobject.c | 43 +++++++++-------------- 3 files changed, 73 insertions(+), 30 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 8907fec7cfef82..e150e18f77fb3b 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1507,7 +1507,7 @@ def test_find_etc_raise_correct_error_messages(self): x, None, None, None) self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^index\(', s.index, + self.assertRaisesRegex(TypeError, r'^index\b', s.index, x, None, None, None) self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex, x, None, None, None) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 8769cbcc61a3ad..b0f3e9fa169d1d 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -407,6 +407,62 @@ unicode_find(PyObject *str, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(unicode_index__doc__, +"index($self, sub, start=None, end=None, /)\n" +"--\n" +"\n" +"Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].\n" +"\n" +"Optional arguments start and end are interpreted as in slice notation.\n" +"Raises ValueError when the substring is not found."); + +#define UNICODE_INDEX_METHODDEF \ + {"index", _PyCFunction_CAST(unicode_index), METH_FASTCALL, unicode_index__doc__}, + +static Py_ssize_t +unicode_index_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end); + +static PyObject * +unicode_index(PyObject *str, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *substr; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + Py_ssize_t _return_value; + + if (!_PyArg_CheckPositional("index", nargs, 1, 3)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("index", "argument 1", "str", args[0]); + goto exit; + } + substr = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[2], &end)) { + goto exit; + } +skip_optional: + _return_value = unicode_index_impl(str, substr, start, end); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(unicode_isascii__doc__, "isascii($self, /)\n" "--\n" @@ -1715,4 +1771,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=85f7b2ab50398fb3 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1ee08b0d6a6377d7 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1d38c10fe3045c..20116ef1ebf957 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11323,38 +11323,25 @@ unicode_hash(PyObject *self) return x; } -PyDoc_STRVAR(index__doc__, - "S.index(sub[, start[, end]]) -> int\n\ -\n\ -Return the lowest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Raises ValueError when the substring is not found."); - -static PyObject * -unicode_index(PyObject *self, PyObject *args) -{ - /* initialize variables to prevent gcc warning */ - Py_ssize_t result; - PyObject *substring = NULL; - Py_ssize_t start = 0; - Py_ssize_t end = 0; - - if (!parse_args_finds_unicode("index", args, &substring, &start, &end)) - return NULL; +/*[clinic input] +str.index as unicode_index = str.count - result = any_find_slice(self, substring, start, end, 1); +Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end]. - if (result == -2) - return NULL; +Optional arguments start and end are interpreted as in slice notation. +Raises ValueError when the substring is not found. +[clinic start generated code]*/ - if (result < 0) { +static Py_ssize_t +unicode_index_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=77558288837cdf40 input=d986aeac0be14a1c]*/ +{ + Py_ssize_t result = any_find_slice(str, substr, start, end, 1); + if (result == -1) { PyErr_SetString(PyExc_ValueError, "substring not found"); - return NULL; } - - return PyLong_FromSsize_t(result); + return result; } /*[clinic input] @@ -13529,7 +13516,7 @@ static PyMethodDef unicode_methods[] = { UNICODE_EXPANDTABS_METHODDEF UNICODE_FIND_METHODDEF UNICODE_PARTITION_METHODDEF - {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, + UNICODE_INDEX_METHODDEF UNICODE_LJUST_METHODDEF UNICODE_LOWER_METHODDEF UNICODE_LSTRIP_METHODDEF From df08da84061ee5226aa563d773b24220e24a43cb Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 18:59:39 +0200 Subject: [PATCH 09/16] Adapt str.rfind --- Lib/test/string_tests.py | 2 +- Objects/clinic/unicodeobject.c.h | 58 +++++++++++++++++++++++++++++++- Objects/unicodeobject.c | 38 +++++++-------------- 3 files changed, 71 insertions(+), 27 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index e150e18f77fb3b..e3aa5c5a6a4808 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1505,7 +1505,7 @@ def test_find_etc_raise_correct_error_messages(self): x = 'x' self.assertRaisesRegex(TypeError, r'^find\b', s.find, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind, + self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind, x, None, None, None) self.assertRaisesRegex(TypeError, r'^index\b', s.index, x, None, None, None) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index b0f3e9fa169d1d..df34d0ffb0cc45 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -1054,6 +1054,62 @@ unicode_removesuffix(PyObject *self, PyObject *arg) return return_value; } +PyDoc_STRVAR(unicode_rfind__doc__, +"rfind($self, sub, start=None, end=None, /)\n" +"--\n" +"\n" +"Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].\n" +"\n" +"Optional arguments start and end are interpreted as in slice notation.\n" +"Return -1 on failure."); + +#define UNICODE_RFIND_METHODDEF \ + {"rfind", _PyCFunction_CAST(unicode_rfind), METH_FASTCALL, unicode_rfind__doc__}, + +static Py_ssize_t +unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end); + +static PyObject * +unicode_rfind(PyObject *str, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *substr; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + Py_ssize_t _return_value; + + if (!_PyArg_CheckPositional("rfind", nargs, 1, 3)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("rfind", "argument 1", "str", args[0]); + goto exit; + } + substr = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[2], &end)) { + goto exit; + } +skip_optional: + _return_value = unicode_rfind_impl(str, substr, start, end); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(unicode_rjust__doc__, "rjust($self, width, fillchar=\' \', /)\n" "--\n" @@ -1771,4 +1827,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=1ee08b0d6a6377d7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9f100f72d363e465 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 20116ef1ebf957..70f329b1e6c304 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12421,33 +12421,21 @@ unicode_repr(PyObject *unicode) return repr; } -PyDoc_STRVAR(rfind__doc__, - "S.rfind(sub[, start[, end]]) -> int\n\ -\n\ -Return the highest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -unicode_rfind(PyObject *self, PyObject *args) -{ - /* initialize variables to prevent gcc warning */ - PyObject *substring = NULL; - Py_ssize_t start = 0; - Py_ssize_t end = 0; - Py_ssize_t result; - - if (!parse_args_finds_unicode("rfind", args, &substring, &start, &end)) - return NULL; +/*[clinic input] +str.rfind as unicode_rfind = str.count - result = any_find_slice(self, substring, start, end, -1); +Return the highest index in S where substring sub is found, such that sub is contained within S[start:end]. - if (result == -2) - return NULL; +Optional arguments start and end are interpreted as in slice notation. +Return -1 on failure. +[clinic start generated code]*/ - return PyLong_FromSsize_t(result); +static Py_ssize_t +unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=880b29f01dd014c8 input=898361fb71f59294]*/ +{ + return any_find_slice(str, substr, start, end, -1); } PyDoc_STRVAR(rindex__doc__, @@ -13520,7 +13508,7 @@ static PyMethodDef unicode_methods[] = { UNICODE_LJUST_METHODDEF UNICODE_LOWER_METHODDEF UNICODE_LSTRIP_METHODDEF - {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, + UNICODE_RFIND_METHODDEF {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, UNICODE_RJUST_METHODDEF UNICODE_RSTRIP_METHODDEF From 9f26ea83663a9b5b51c53b40bd7f8276c595b802 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 19:02:44 +0200 Subject: [PATCH 10/16] Adapt str.rindex --- Lib/test/string_tests.py | 2 +- Objects/clinic/unicodeobject.c.h | 58 +++++++++++++++++++++++++++++- Objects/unicodeobject.c | 61 ++++++++------------------------ 3 files changed, 73 insertions(+), 48 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index e3aa5c5a6a4808..9bb0ce7bb57f8b 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1509,7 +1509,7 @@ def test_find_etc_raise_correct_error_messages(self): x, None, None, None) self.assertRaisesRegex(TypeError, r'^index\b', s.index, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex, + self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex, x, None, None, None) self.assertRaisesRegex(TypeError, r'^count\b', s.count, x, None, None, None) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index df34d0ffb0cc45..86a755f0734aac 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -1110,6 +1110,62 @@ unicode_rfind(PyObject *str, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(unicode_rindex__doc__, +"rindex($self, sub, start=None, end=None, /)\n" +"--\n" +"\n" +"Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].\n" +"\n" +"Optional arguments start and end are interpreted as in slice notation.\n" +"Raises ValueError when the substring is not found."); + +#define UNICODE_RINDEX_METHODDEF \ + {"rindex", _PyCFunction_CAST(unicode_rindex), METH_FASTCALL, unicode_rindex__doc__}, + +static Py_ssize_t +unicode_rindex_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end); + +static PyObject * +unicode_rindex(PyObject *str, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *substr; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + Py_ssize_t _return_value; + + if (!_PyArg_CheckPositional("rindex", nargs, 1, 3)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("rindex", "argument 1", "str", args[0]); + goto exit; + } + substr = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndex(args[2], &end)) { + goto exit; + } +skip_optional: + _return_value = unicode_rindex_impl(str, substr, start, end); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(unicode_rjust__doc__, "rjust($self, width, fillchar=\' \', /)\n" "--\n" @@ -1827,4 +1883,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=9f100f72d363e465 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=71a97a302f61fedb input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 70f329b1e6c304..002d23ee51d189 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11062,24 +11062,6 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) Py_XDECREF(right); } -/* -Wraps asciilib_parse_args_finds() and additionally ensures that the -first argument is a unicode object. -*/ - -static inline int -parse_args_finds_unicode(const char * function_name, PyObject *args, - PyObject **substring, - Py_ssize_t *start, Py_ssize_t *end) -{ - if (asciilib_parse_args_finds(function_name, args, substring, start, end)) { - if (ensure_unicode(*substring) < 0) - return 0; - return 1; - } - return 0; -} - /*[clinic input] @text_signature "($self, sub[, start[, end]], /)" str.count as unicode_count -> Py_ssize_t @@ -12438,38 +12420,25 @@ unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start, return any_find_slice(str, substr, start, end, -1); } -PyDoc_STRVAR(rindex__doc__, - "S.rindex(sub[, start[, end]]) -> int\n\ -\n\ -Return the highest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Raises ValueError when the substring is not found."); - -static PyObject * -unicode_rindex(PyObject *self, PyObject *args) -{ - /* initialize variables to prevent gcc warning */ - PyObject *substring = NULL; - Py_ssize_t start = 0; - Py_ssize_t end = 0; - Py_ssize_t result; - - if (!parse_args_finds_unicode("rindex", args, &substring, &start, &end)) - return NULL; +/*[clinic input] +str.rindex as unicode_rindex = str.count - result = any_find_slice(self, substring, start, end, -1); +Return the highest index in S where substring sub is found, such that sub is contained within S[start:end]. - if (result == -2) - return NULL; +Optional arguments start and end are interpreted as in slice notation. +Raises ValueError when the substring is not found. +[clinic start generated code]*/ - if (result < 0) { +static Py_ssize_t +unicode_rindex_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=5f3aef124c867fe1 input=35943dead6c1ea9d]*/ +{ + Py_ssize_t result = any_find_slice(str, substr, start, end, -1); + if (result == -1) { PyErr_SetString(PyExc_ValueError, "substring not found"); - return NULL; } - - return PyLong_FromSsize_t(result); + return result; } /*[clinic input] @@ -13509,7 +13478,7 @@ static PyMethodDef unicode_methods[] = { UNICODE_LOWER_METHODDEF UNICODE_LSTRIP_METHODDEF UNICODE_RFIND_METHODDEF - {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, + UNICODE_RINDEX_METHODDEF UNICODE_RJUST_METHODDEF UNICODE_RSTRIP_METHODDEF UNICODE_RPARTITION_METHODDEF From 1970887d535987bacd253aae16fd12a30a7169a8 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 19:04:26 +0200 Subject: [PATCH 11/16] Fixup docstrings --- Objects/clinic/unicodeobject.c.h | 11 ++++++++--- Objects/unicodeobject.c | 9 +++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 86a755f0734aac..5a9fe991f3fdc8 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -140,7 +140,9 @@ PyDoc_STRVAR(unicode_count__doc__, "count($self, sub[, start[, end]], /)\n" "--\n" "\n" -"Return the number of non-overlapping occurrences of substring sub in string S[start:end]."); +"Return the number of non-overlapping occurrences of substring sub in string S[start:end].\n" +"\n" +"Optional arguments start and end are interpreted as in slice notation."); #define UNICODE_COUNT_METHODDEF \ {"count", _PyCFunction_CAST(unicode_count), METH_FASTCALL, unicode_count__doc__}, @@ -358,7 +360,10 @@ PyDoc_STRVAR(unicode_find__doc__, "find($self, sub, start=None, end=None, /)\n" "--\n" "\n" -"Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end]."); +"Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].\n" +"\n" +"Optional arguments start and end are interpreted as in slice notation.\n" +"Return -1 on failure."); #define UNICODE_FIND_METHODDEF \ {"find", _PyCFunction_CAST(unicode_find), METH_FASTCALL, unicode_find__doc__}, @@ -1883,4 +1888,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=71a97a302f61fedb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c45a7065749e0d27 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 002d23ee51d189..718185c65da078 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11073,12 +11073,14 @@ str.count as unicode_count -> Py_ssize_t / Return the number of non-overlapping occurrences of substring sub in string S[start:end]. + +Optional arguments start and end are interpreted as in slice notation. [clinic start generated code]*/ static Py_ssize_t unicode_count_impl(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end) -/*[clinic end generated code: output=8fcc3aef0b18edbf input=9e91e81ffff6e356]*/ +/*[clinic end generated code: output=8fcc3aef0b18edbf input=6f168ffd94be8785]*/ { assert(PyUnicode_Check(str)); assert(PyUnicode_Check(substr)); @@ -11255,12 +11257,15 @@ unicode_expandtabs_impl(PyObject *self, int tabsize) str.find as unicode_find = str.count Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end]. + +Optional arguments start and end are interpreted as in slice notation. +Return -1 on failure. [clinic start generated code]*/ static Py_ssize_t unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end) -/*[clinic end generated code: output=51dbe6255712e278 input=37e8a66191930f45]*/ +/*[clinic end generated code: output=51dbe6255712e278 input=4a89d2d68ef57256]*/ { return any_find_slice(str, substr, start, end, 1); } From 571976d2dfb4f5a4fd087e74f5ae52923aee76c5 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 19:12:26 +0200 Subject: [PATCH 12/16] Adapt NEWS --- .../2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst index 96e14ea0c3b1bd..22008130b52fa1 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst @@ -1,2 +1,9 @@ -Improve the performance of :meth:`str.startswith` and :meth:`str.endswith` -by adapting them to the :c:macro:`METH_FASTCALL` calling convention. +Improve the performance of the following :class:`str` methods +by adapting them to the :c:macro:`METH_FASTCALL` calling convention: + +* :meth:`~str.endswith` +* :meth:`~str.find` +* :meth:`~str.index` +* :meth:`~str.rfind` +* :meth:`~str.rindex` +* :meth:`~str.startswith` From 154fe70785c0a00d21c268a28bc020013d3bc987 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 19:53:47 +0200 Subject: [PATCH 13/16] Address review: also mention str.count in the NEWS entry --- .../2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst index 22008130b52fa1..83f243ae214f7d 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-02-17-37-35.gh-issue-117431.vDKAOn.rst @@ -1,6 +1,7 @@ Improve the performance of the following :class:`str` methods by adapting them to the :c:macro:`METH_FASTCALL` calling convention: +* :meth:`~str.count` * :meth:`~str.endswith` * :meth:`~str.find` * :meth:`~str.index` From 8fe04ae89e12c1f8447ac4bab44772957ab2e660 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 21:08:56 +0200 Subject: [PATCH 14/16] Fixup endswith signature --- Objects/clinic/unicodeobject.c.h | 4 ++-- Objects/unicodeobject.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 9ee37cb25f2905..d46c8fb7915a8d 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -1421,7 +1421,7 @@ unicode_startswith(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } PyDoc_STRVAR(unicode_endswith__doc__, -"endswith($self, prefix, start=None, end=None, /)\n" +"endswith($self, prefix[, start[, end]], /)\n" "--\n" "\n" "Return True if the string ends with the specified prefix, False otherwise.\n" @@ -1609,4 +1609,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=c2306767984a3a86 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=44a894d803f8f81e input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1cd3e60668010e..1d640c440109f0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13078,6 +13078,7 @@ unicode_startswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, /*[clinic input] +@text_signature "($self, prefix[, start[, end]], /)" str.endswith as unicode_endswith = str.startswith Return True if the string ends with the specified prefix, False otherwise. @@ -13086,7 +13087,7 @@ Return True if the string ends with the specified prefix, False otherwise. static PyObject * unicode_endswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, Py_ssize_t end) -/*[clinic end generated code: output=cce6f8ceb0102ca9 input=aed4beb8024d9292]*/ +/*[clinic end generated code: output=cce6f8ceb0102ca9 input=82cd5ce9e7623646]*/ { if (PyTuple_Check(subobj)) { Py_ssize_t i; From c2ce49e0cbd75ed378913083eac75617af18c3d6 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Apr 2024 21:15:34 +0200 Subject: [PATCH 15/16] Address Jelle's review: clarify default values in param docstrings --- Objects/clinic/unicodeobject.c.h | 10 +++++----- Objects/unicodeobject.c | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index d46c8fb7915a8d..c956bb1936776a 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -1378,9 +1378,9 @@ PyDoc_STRVAR(unicode_startswith__doc__, " prefix\n" " A string or a tuple of strings to try.\n" " start\n" -" Optional start position. Default 0.\n" +" Optional start position. Default: start of the string.\n" " end\n" -" Optional stop position. Default PY_SSIZE_T_MAX."); +" Optional stop position. Default: end of the string."); #define UNICODE_STARTSWITH_METHODDEF \ {"startswith", _PyCFunction_CAST(unicode_startswith), METH_FASTCALL, unicode_startswith__doc__}, @@ -1429,9 +1429,9 @@ PyDoc_STRVAR(unicode_endswith__doc__, " prefix\n" " A string or a tuple of strings to try.\n" " start\n" -" Optional start position. Default 0.\n" +" Optional start position. Default: start of the string.\n" " end\n" -" Optional stop position. Default PY_SSIZE_T_MAX."); +" Optional stop position. Default: end of the string."); #define UNICODE_ENDSWITH_METHODDEF \ {"endswith", _PyCFunction_CAST(unicode_endswith), METH_FASTCALL, unicode_endswith__doc__}, @@ -1609,4 +1609,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=44a894d803f8f81e input=a9049054013a1b77]*/ +/*[clinic end generated code: output=e495e878d8283217 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1d640c440109f0..ac59419b3c7a50 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13028,9 +13028,9 @@ str.startswith as unicode_startswith prefix as subobj: object A string or a tuple of strings to try. start: slice_index(accept={int, NoneType}, c_default='0') = None - Optional start position. Default 0. + Optional start position. Default: start of the string. end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None - Optional stop position. Default PY_SSIZE_T_MAX. + Optional stop position. Default: end of the string. / Return True if the string starts with the specified prefix, False otherwise. @@ -13039,7 +13039,7 @@ Return True if the string starts with the specified prefix, False otherwise. static PyObject * unicode_startswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, Py_ssize_t end) -/*[clinic end generated code: output=4bd7cfd0803051d4 input=9c6ded2ba2e2f90b]*/ +/*[clinic end generated code: output=4bd7cfd0803051d4 input=5f918b5f5f89d856]*/ { if (PyTuple_Check(subobj)) { Py_ssize_t i; From e7a55a8698aee3e33fc33951b7f53468e706bddd Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 3 Apr 2024 09:30:02 +0200 Subject: [PATCH 16/16] any_find_slice() can return -2 on error --- Objects/unicodeobject.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b18a2c69cf8271..7572a5c88e59ee 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11267,7 +11267,11 @@ unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end) /*[clinic end generated code: output=51dbe6255712e278 input=4a89d2d68ef57256]*/ { - return any_find_slice(str, substr, start, end, 1); + Py_ssize_t result = any_find_slice(str, substr, start, end, 1); + if (result < 0) { + return -1; + } + return result; } static PyObject * @@ -11328,6 +11332,9 @@ unicode_index_impl(PyObject *str, PyObject *substr, Py_ssize_t start, if (result == -1) { PyErr_SetString(PyExc_ValueError, "substring not found"); } + else if (result < 0) { + return -1; + } return result; } @@ -12422,7 +12429,11 @@ unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end) /*[clinic end generated code: output=880b29f01dd014c8 input=898361fb71f59294]*/ { - return any_find_slice(str, substr, start, end, -1); + Py_ssize_t result = any_find_slice(str, substr, start, end, -1); + if (result < 0) { + return -1; + } + return result; } /*[clinic input] @@ -12443,6 +12454,9 @@ unicode_rindex_impl(PyObject *str, PyObject *substr, Py_ssize_t start, if (result == -1) { PyErr_SetString(PyExc_ValueError, "substring not found"); } + else if (result < 0) { + return -1; + } return result; }