12
12
// file (like "_PyPegen_raise_syntax_error").
13
13
14
14
static int
15
- warn_invalid_escape_sequence (Parser * p , unsigned char first_invalid_escape_char )
15
+ warn_invalid_escape_sequence (Parser * p , unsigned char first_invalid_escape_char , Token * t )
16
16
{
17
17
PyObject * msg =
18
18
PyUnicode_FromFormat ("invalid escape sequence \\%c" , first_invalid_escape_char );
19
19
if (msg == NULL ) {
20
20
return -1 ;
21
21
}
22
22
if (PyErr_WarnExplicitObject (PyExc_DeprecationWarning , msg , p -> tok -> filename ,
23
- p -> tok -> lineno , NULL , NULL ) < 0 ) {
23
+ t -> lineno , NULL , NULL ) < 0 ) {
24
24
if (PyErr_ExceptionMatches (PyExc_DeprecationWarning )) {
25
25
/* Replace the DeprecationWarning exception with a SyntaxError
26
26
to get a more accurate error report */
27
27
PyErr_Clear ();
28
+
29
+ /* This is needed, in order for the SyntaxError to point to the token t,
30
+ since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
31
+ error location, if p->known_err_token is not set. */
32
+ p -> known_err_token = t ;
28
33
RAISE_SYNTAX_ERROR ("invalid escape sequence \\%c" , first_invalid_escape_char );
29
34
}
30
35
Py_DECREF (msg );
@@ -47,7 +52,7 @@ decode_utf8(const char **sPtr, const char *end)
47
52
}
48
53
49
54
static PyObject *
50
- decode_unicode_with_escapes (Parser * parser , const char * s , size_t len )
55
+ decode_unicode_with_escapes (Parser * parser , const char * s , size_t len , Token * t )
51
56
{
52
57
PyObject * v , * u ;
53
58
char * buf ;
@@ -110,7 +115,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len)
110
115
v = _PyUnicode_DecodeUnicodeEscape (s , len , NULL , & first_invalid_escape );
111
116
112
117
if (v != NULL && first_invalid_escape != NULL ) {
113
- if (warn_invalid_escape_sequence (parser , * first_invalid_escape ) < 0 ) {
118
+ if (warn_invalid_escape_sequence (parser , * first_invalid_escape , t ) < 0 ) {
114
119
/* We have not decref u before because first_invalid_escape points
115
120
inside u. */
116
121
Py_XDECREF (u );
@@ -123,7 +128,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len)
123
128
}
124
129
125
130
static PyObject *
126
- decode_bytes_with_escapes (Parser * p , const char * s , Py_ssize_t len )
131
+ decode_bytes_with_escapes (Parser * p , const char * s , Py_ssize_t len , Token * t )
127
132
{
128
133
const char * first_invalid_escape ;
129
134
PyObject * result = _PyBytes_DecodeEscape (s , len , NULL , & first_invalid_escape );
@@ -132,7 +137,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len)
132
137
}
133
138
134
139
if (first_invalid_escape != NULL ) {
135
- if (warn_invalid_escape_sequence (p , * first_invalid_escape ) < 0 ) {
140
+ if (warn_invalid_escape_sequence (p , * first_invalid_escape , t ) < 0 ) {
136
141
Py_DECREF (result );
137
142
return NULL ;
138
143
}
@@ -146,9 +151,14 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len)
146
151
If the string is an f-string, set *fstr and *fstrlen to the unparsed
147
152
string object. Return 0 if no errors occurred. */
148
153
int
149
- _PyPegen_parsestr (Parser * p , const char * s , int * bytesmode , int * rawmode , PyObject * * result ,
150
- const char * * fstr , Py_ssize_t * fstrlen )
154
+ _PyPegen_parsestr (Parser * p , int * bytesmode , int * rawmode , PyObject * * result ,
155
+ const char * * fstr , Py_ssize_t * fstrlen , Token * t )
151
156
{
157
+ const char * s = PyBytes_AsString (t -> bytes );
158
+ if (s == NULL ) {
159
+ return -1 ;
160
+ }
161
+
152
162
size_t len ;
153
163
int quote = Py_CHARMASK (* s );
154
164
int fmode = 0 ;
@@ -245,15 +255,15 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje
245
255
* result = PyBytes_FromStringAndSize (s , len );
246
256
}
247
257
else {
248
- * result = decode_bytes_with_escapes (p , s , len );
258
+ * result = decode_bytes_with_escapes (p , s , len , t );
249
259
}
250
260
}
251
261
else {
252
262
if (* rawmode ) {
253
263
* result = PyUnicode_DecodeUTF8Stateful (s , len , NULL , NULL );
254
264
}
255
265
else {
256
- * result = decode_unicode_with_escapes (p , s , len );
266
+ * result = decode_unicode_with_escapes (p , s , len , t );
257
267
}
258
268
}
259
269
return * result == NULL ? -1 : 0 ;
@@ -637,7 +647,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
637
647
*/
638
648
static int
639
649
fstring_find_literal (Parser * p , const char * * str , const char * end , int raw ,
640
- PyObject * * literal , int recurse_lvl )
650
+ PyObject * * literal , int recurse_lvl , Token * t )
641
651
{
642
652
/* Get any literal string. It ends when we hit an un-doubled left
643
653
brace (which isn't part of a unicode name escape such as
@@ -660,7 +670,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
660
670
}
661
671
break ;
662
672
}
663
- if (ch == '{' && warn_invalid_escape_sequence (p , ch ) < 0 ) {
673
+ if (ch == '{' && warn_invalid_escape_sequence (p , ch , t ) < 0 ) {
664
674
return -1 ;
665
675
}
666
676
}
@@ -704,7 +714,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
704
714
NULL , NULL );
705
715
else
706
716
* literal = decode_unicode_with_escapes (p , literal_start ,
707
- s - literal_start );
717
+ s - literal_start , t );
708
718
if (!* literal )
709
719
return -1 ;
710
720
}
@@ -1041,7 +1051,7 @@ fstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int
1041
1051
assert (* literal == NULL && * expression == NULL );
1042
1052
1043
1053
/* Get any literal string. */
1044
- result = fstring_find_literal (p , str , end , raw , literal , recurse_lvl );
1054
+ result = fstring_find_literal (p , str , end , raw , literal , recurse_lvl , t );
1045
1055
if (result < 0 )
1046
1056
goto error ;
1047
1057
0 commit comments