Skip to content

Commit a0efc0c

Browse files
authored
bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (pythonGH-30130)
1 parent b1cb843 commit a0efc0c

File tree

5 files changed

+165
-16
lines changed

5 files changed

+165
-16
lines changed

Lib/test/test_ast.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,8 +1078,7 @@ def test_literal_eval_malformed_lineno(self):
10781078
ast.literal_eval(node)
10791079

10801080
def test_literal_eval_syntax_errors(self):
1081-
msg = "unexpected character after line continuation character"
1082-
with self.assertRaisesRegex(SyntaxError, msg):
1081+
with self.assertRaisesRegex(SyntaxError, "unexpected indent"):
10831082
ast.literal_eval(r'''
10841083
\
10851084
(\

Lib/test/test_syntax.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,6 +1613,36 @@ def test_empty_line_after_linecont(self):
16131613
except SyntaxError:
16141614
self.fail("Empty line after a line continuation character is valid.")
16151615

1616+
# See issue-46091
1617+
s1 = r"""\
1618+
def fib(n):
1619+
\
1620+
'''Print a Fibonacci series up to n.'''
1621+
\
1622+
a, b = 0, 1
1623+
"""
1624+
s2 = r"""\
1625+
def fib(n):
1626+
'''Print a Fibonacci series up to n.'''
1627+
a, b = 0, 1
1628+
"""
1629+
try:
1630+
self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
1631+
except SyntaxError:
1632+
self.fail("Indented statement over multiple lines is valid")
1633+
1634+
def test_continuation_bad_indentation(self):
1635+
# Check that code that breaks indentation across multiple lines raises a syntax error
1636+
1637+
code = r"""\
1638+
if x:
1639+
y = 1
1640+
\
1641+
foo = 1
1642+
"""
1643+
1644+
self.assertRaises(IndentationError, exec, code)
1645+
16161646
@support.cpython_only
16171647
def test_nested_named_except_blocks(self):
16181648
code = ""

Lib/test/test_tokenize.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
NEWLINE, _generate_tokens_from_c_tokenizer)
77
from io import BytesIO, StringIO
88
import unittest
9+
from textwrap import dedent
910
from unittest import TestCase, mock
1011
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
1112
INVALID_UNDERSCORE_LITERALS)
@@ -44,7 +45,6 @@ def check_tokenize(self, s, expected):
4445
# The ENDMARKER and final NEWLINE are omitted.
4546
f = BytesIO(s.encode('utf-8'))
4647
result = stringify_tokens_from_source(tokenize(f.readline), s)
47-
4848
self.assertEqual(result,
4949
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
5050
expected.rstrip().splitlines())
@@ -2511,7 +2511,105 @@ def get_tokens(string):
25112511

25122512
self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000)
25132513
self.assertRaises(SyntaxError, get_tokens, "]")
2514+
2515+
def test_continuation_lines_indentation(self):
2516+
def get_tokens(string):
2517+
return [(kind, string) for (kind, string, *_) in _generate_tokens_from_c_tokenizer(string)]
25142518

2519+
code = dedent("""
2520+
def fib(n):
2521+
\\
2522+
'''Print a Fibonacci series up to n.'''
2523+
\\
2524+
a, b = 0, 1
2525+
""")
2526+
2527+
self.check_tokenize(code, """\
2528+
NAME 'def' (2, 0) (2, 3)
2529+
NAME 'fib' (2, 4) (2, 7)
2530+
LPAR '(' (2, 7) (2, 8)
2531+
NAME 'n' (2, 8) (2, 9)
2532+
RPAR ')' (2, 9) (2, 10)
2533+
COLON ':' (2, 10) (2, 11)
2534+
NEWLINE '' (2, 11) (2, 11)
2535+
INDENT '' (4, -1) (4, -1)
2536+
STRING "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
2537+
NEWLINE '' (4, 39) (4, 39)
2538+
NAME 'a' (6, 0) (6, 1)
2539+
COMMA ',' (6, 1) (6, 2)
2540+
NAME 'b' (6, 3) (6, 4)
2541+
EQUAL '=' (6, 5) (6, 6)
2542+
NUMBER '0' (6, 7) (6, 8)
2543+
COMMA ',' (6, 8) (6, 9)
2544+
NUMBER '1' (6, 10) (6, 11)
2545+
NEWLINE '' (6, 11) (6, 11)
2546+
DEDENT '' (6, -1) (6, -1)
2547+
""")
2548+
2549+
code_no_cont = dedent("""
2550+
def fib(n):
2551+
'''Print a Fibonacci series up to n.'''
2552+
a, b = 0, 1
2553+
""")
2554+
2555+
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
2556+
2557+
code = dedent("""
2558+
pass
2559+
\\
2560+
2561+
pass
2562+
""")
2563+
2564+
self.check_tokenize(code, """\
2565+
NAME 'pass' (2, 0) (2, 4)
2566+
NEWLINE '' (2, 4) (2, 4)
2567+
NAME 'pass' (5, 0) (5, 4)
2568+
NEWLINE '' (5, 4) (5, 4)
2569+
""")
2570+
2571+
code_no_cont = dedent("""
2572+
pass
2573+
pass
2574+
""")
2575+
2576+
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
2577+
2578+
code = dedent("""
2579+
if x:
2580+
y = 1
2581+
\\
2582+
\\
2583+
\\
2584+
\\
2585+
foo = 1
2586+
""")
2587+
2588+
self.check_tokenize(code, """\
2589+
NAME 'if' (2, 0) (2, 2)
2590+
NAME 'x' (2, 3) (2, 4)
2591+
COLON ':' (2, 4) (2, 5)
2592+
NEWLINE '' (2, 5) (2, 5)
2593+
INDENT '' (3, -1) (3, -1)
2594+
NAME 'y' (3, 4) (3, 5)
2595+
EQUAL '=' (3, 6) (3, 7)
2596+
NUMBER '1' (3, 8) (3, 9)
2597+
NEWLINE '' (3, 9) (3, 9)
2598+
NAME 'foo' (8, 4) (8, 7)
2599+
EQUAL '=' (8, 8) (8, 9)
2600+
NUMBER '1' (8, 10) (8, 11)
2601+
NEWLINE '' (8, 11) (8, 11)
2602+
DEDENT '' (8, -1) (8, -1)
2603+
""")
2604+
2605+
code_no_cont = dedent("""
2606+
if x:
2607+
y = 1
2608+
foo = 1
2609+
""")
2610+
2611+
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
2612+
25152613

25162614
if __name__ == "__main__":
25172615
unittest.main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Correctly calculate indentation levels for lines with whitespace character
2+
that are ended by line continuation characters. Patch by Pablo Galindo

Parser/tokenizer.c

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,6 +1347,24 @@ tok_decimal_tail(struct tok_state *tok)
13471347

13481348
/* Get next token, after space stripping etc. */
13491349

1350+
static inline int
1351+
tok_continuation_line(struct tok_state *tok) {
1352+
int c = tok_nextc(tok);
1353+
if (c != '\n') {
1354+
tok->done = E_LINECONT;
1355+
return -1;
1356+
}
1357+
c = tok_nextc(tok);
1358+
if (c == EOF) {
1359+
tok->done = E_EOF;
1360+
tok->cur = tok->inp;
1361+
return -1;
1362+
} else {
1363+
tok_backup(tok, c);
1364+
}
1365+
return c;
1366+
}
1367+
13501368
static int
13511369
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13521370
{
@@ -1363,6 +1381,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13631381
int col = 0;
13641382
int altcol = 0;
13651383
tok->atbol = 0;
1384+
int cont_line_col = 0;
13661385
for (;;) {
13671386
c = tok_nextc(tok);
13681387
if (c == ' ') {
@@ -1375,14 +1394,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13751394
else if (c == '\014') {/* Control-L (formfeed) */
13761395
col = altcol = 0; /* For Emacs users */
13771396
}
1397+
else if (c == '\\') {
1398+
// Indentation cannot be split over multiple physical lines
1399+
// using backslashes. This means that if we found a backslash
1400+
// preceded by whitespace, **the first one we find** determines
1401+
// the level of indentation of whatever comes next.
1402+
cont_line_col = cont_line_col ? cont_line_col : col;
1403+
if ((c = tok_continuation_line(tok)) == -1) {
1404+
return ERRORTOKEN;
1405+
}
1406+
}
13781407
else {
13791408
break;
13801409
}
13811410
}
13821411
tok_backup(tok, c);
1383-
if (c == '#' || c == '\n' || c == '\\') {
1412+
if (c == '#' || c == '\n') {
13841413
/* Lines with only whitespace and/or comments
1385-
and/or a line continuation character
13861414
shouldn't affect the indentation and are
13871415
not passed to the parser as NEWLINE tokens,
13881416
except *totally* empty lines in interactive
@@ -1403,6 +1431,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
14031431
may need to skip to the end of a comment */
14041432
}
14051433
if (!blankline && tok->level == 0) {
1434+
col = cont_line_col ? cont_line_col : col;
1435+
altcol = cont_line_col ? cont_line_col : altcol;
14061436
if (col == tok->indstack[tok->indent]) {
14071437
/* No change */
14081438
if (altcol != tok->altindstack[tok->indent]) {
@@ -1964,19 +1994,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
19641994

19651995
/* Line continuation */
19661996
if (c == '\\') {
1967-
c = tok_nextc(tok);
1968-
if (c != '\n') {
1969-
tok->done = E_LINECONT;
1997+
if ((c = tok_continuation_line(tok)) == -1) {
19701998
return ERRORTOKEN;
19711999
}
1972-
c = tok_nextc(tok);
1973-
if (c == EOF) {
1974-
tok->done = E_EOF;
1975-
tok->cur = tok->inp;
1976-
return ERRORTOKEN;
1977-
} else {
1978-
tok_backup(tok, c);
1979-
}
19802000
tok->cont_line = 1;
19812001
goto again; /* Read next line */
19822002
}

0 commit comments

Comments
 (0)