Skip to content

Commit a674cef

Browse files
committed
gh-99581: Fix a buffer overflow in the tokenizer when copying lines that fill the available buffer
Signed-off-by: Pablo Galindo <[email protected]>
1 parent b0e1f9c commit a674cef

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

Lib/test/test_tokenize.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from unittest import TestCase, mock
1111
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
1212
INVALID_UNDERSCORE_LITERALS)
13+
from test.support import os_helper
14+
from test.support.script_helper import run_test_script, make_script
1315
import os
1416
import token
1517

@@ -1026,7 +1028,7 @@ def test_utf8_coding_cookie_and_utf8_bom(self):
10261028
def test_bad_coding_cookie(self):
10271029
self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
10281030
self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
1029-
1031+
10301032

10311033
class Test_Tokenize(TestCase):
10321034

@@ -2631,5 +2633,19 @@ def fib(n):
26312633
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
26322634

26332635

2636+
class CTokenizerBufferTests(unittest.TestCase):
2637+
def test_newline_at_the_end_of_buffer(self):
2638+
# See issue 99581: Make sure that if we need to add a new line at the
2639+
# end of the buffer, we have enough space in the buffer, specially when
2640+
# the current line is as long as the buffer space available.
2641+
test_script = f"""\
2642+
#coding: latin-1
2643+
#{"a"*10000}
2644+
#{"a"*10002}"""
2645+
with os_helper.temp_dir() as temp_dir:
2646+
file_name = make_script(temp_dir, 'foo', test_script)
2647+
run_test_script(file_name)
2648+
2649+
26342650
if __name__ == "__main__":
2635-
unittest.main()
2651+
unittest.main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed a bug that was causing a buffer overflow if the tokenizer copies a
2+
line missing the newline caracter from a file that is as long as the
3+
available tokenizer buffer. Patch by Pablo galindo

Parser/tokenizer.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,11 @@ tok_readline_recode(struct tok_state *tok) {
413413
error_ret(tok);
414414
goto error;
415415
}
416-
if (!tok_reserve_buf(tok, buflen + 1)) {
416+
// Make room for the null terminator *and* potentially
417+
// an extra newline character that we may need to artificially
418+
// add.
419+
size_t buffer_size = buflen + 2;
420+
if (!tok_reserve_buf(tok, buffer_size)) {
417421
goto error;
418422
}
419423
memcpy(tok->inp, buf, buflen);
@@ -1000,6 +1004,7 @@ tok_underflow_file(struct tok_state *tok) {
10001004
return 0;
10011005
}
10021006
if (tok->inp[-1] != '\n') {
1007+
assert(tok->inp + 1 < tok->end);
10031008
/* Last line does not end in \n, fake one */
10041009
*tok->inp++ = '\n';
10051010
*tok->inp = '\0';

0 commit comments

Comments
 (0)