Skip to content

Commit 2e9cd58

Browse files
bpo-30529: Fix errors for invalid whitespaces in f-string subexpressions. (#1888)
'invalid character in identifier' now is raised instead of 'f-string: empty expression not allowed' if a subexpression contains only whitespaces and they are not accepted by Python parser.
1 parent 29adc13 commit 2e9cd58

File tree

2 files changed

+17
-24
lines changed

2 files changed

+17
-24
lines changed

Lib/test/test_fstring.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,10 @@ def test_missing_expression(self):
280280
"f'{10:{ }}'",
281281
"f' { } '",
282282

283+
# The Python parser ignores also the following
284+
# whitespace characters in additional to a space.
285+
"f'''{\t\f\r\n}'''",
286+
283287
# Catch the empty expression before the
284288
# invalid conversion.
285289
"f'{!x}'",
@@ -300,6 +304,12 @@ def test_missing_expression(self):
300304
"f'{:x'",
301305
])
302306

307+
# Different error message is raised for other whitespace characters.
308+
self.assertAllRaise(SyntaxError, 'invalid character in identifier',
309+
["f'''{\xa0}'''",
310+
"\xa0",
311+
])
312+
303313
def test_parens_in_expressions(self):
304314
self.assertEqual(f'{3,}', '(3,)')
305315

Python/ast.c

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4274,49 +4274,32 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
42744274
struct compiling *c, const node *n)
42754275

42764276
{
4277-
int all_whitespace = 1;
4278-
int kind;
4279-
void *data;
42804277
PyCompilerFlags cf;
42814278
mod_ty mod;
42824279
char *str;
4283-
PyObject *o;
42844280
Py_ssize_t len;
4285-
Py_ssize_t i;
4281+
const char *s;
42864282

42874283
assert(expr_end >= expr_start);
42884284
assert(*(expr_start-1) == '{');
42894285
assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
42904286

4291-
/* We know there are no escapes here, because backslashes are not allowed,
4292-
and we know it's utf-8 encoded (per PEP 263). But, in order to check
4293-
that each char is not whitespace, we need to decode it to unicode.
4294-
Which is unfortunate, but such is life. */
4295-
42964287
/* If the substring is all whitespace, it's an error. We need to catch
42974288
this here, and not when we call PyParser_ASTFromString, because turning
42984289
the expression '' in to '()' would go from being invalid to valid. */
4299-
/* Note that this code says an empty string is all whitespace. That's
4300-
important. There's a test for it: f'{}'. */
4301-
o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL);
4302-
if (o == NULL)
4303-
return NULL;
4304-
len = PyUnicode_GET_LENGTH(o);
4305-
kind = PyUnicode_KIND(o);
4306-
data = PyUnicode_DATA(o);
4307-
for (i = 0; i < len; i++) {
4308-
if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
4309-
all_whitespace = 0;
4290+
for (s = expr_start; s != expr_end; s++) {
4291+
char c = *s;
4292+
/* The Python parser ignores only the following whitespace
4293+
characters (\r already is converted to \n). */
4294+
if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
43104295
break;
43114296
}
43124297
}
4313-
Py_DECREF(o);
4314-
if (all_whitespace) {
4298+
if (s == expr_end) {
43154299
ast_error(c, n, "f-string: empty expression not allowed");
43164300
return NULL;
43174301
}
43184302

4319-
/* Reuse len to be the length of the utf-8 input string. */
43204303
len = expr_end - expr_start;
43214304
/* Allocate 3 extra bytes: open paren, close paren, null byte. */
43224305
str = PyMem_RawMalloc(len + 3);

0 commit comments

Comments
 (0)