diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs index 28368e1ab462b..4a7e4bf75cf3c 100644 --- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs +++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs @@ -101,14 +101,28 @@ impl EarlyLintPass for HiddenUnicodeCodepoints { if !contains_text_flow_control_chars(text.as_str()) { return; } - let padding = match token_lit.kind { + let (padding, point_at_inner_spans) = match token_lit.kind { // account for `"` or `'` - ast::token::LitKind::Str | ast::token::LitKind::Char => 1, + ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), + // account for `c"` + ast::token::LitKind::CStr => (2, true), // account for `r###"` - ast::token::LitKind::StrRaw(n) => n as u32 + 2, - _ => return, + ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), + // account for `cr###"` + ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), + // suppress bad literals. + ast::token::LitKind::Err(_) => return, + // Be conservative just in case new literals do support these. + _ => (0, false), }; - self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal"); + self.lint_text_direction_codepoint( + cx, + text, + expr.span, + padding, + point_at_inner_spans, + "literal", + ); } _ => {} }; diff --git a/tests/ui/parser/unicode-control-codepoints.rs b/tests/ui/parser/unicode-control-codepoints.rs index df099bb62ad1e..c2b9a9911ac5d 100644 --- a/tests/ui/parser/unicode-control-codepoints.rs +++ b/tests/ui/parser/unicode-control-codepoints.rs @@ -1,3 +1,5 @@ +//@ edition: 2021 + fn main() { // if access_level != "us‫e‪r" { // Check if admin //~^ ERROR unicode codepoint changing visible direction of text present in comment @@ -25,6 +27,11 @@ fn main() { //~| ERROR non-ASCII character in raw byte string literal println!("{:?}", '‮'); //~^ ERROR unicode codepoint changing visible direction of text present in literal + + let _ = c"‮"; + //~^ ERROR unicode codepoint changing visible direction of text present in literal + let _ = cr#"‮"#; + //~^ ERROR unicode codepoint changing visible direction of text present in literal } //"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */" diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index 28de4ae72abbd..fa75df6a443ad 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -1,5 +1,5 @@ error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:26 + --> $DIR/unicode-control-codepoints.rs:8:26 | LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | ^^^^^^^^ unicode escape in byte string @@ -7,7 +7,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); = help: unicode escape sequences cannot be used as a byte or in a byte string error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:35 + --> $DIR/unicode-control-codepoints.rs:8:35 | LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | ^^^^^^^^ unicode escape in byte string @@ -15,7 +15,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); = help: unicode escape sequences cannot be used as a byte or in a byte string error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:26 + --> $DIR/unicode-control-codepoints.rs:18:26 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{202e}' @@ -26,7 +26,7 @@ LL | println!("{:?}", b"/*\xE2\x80\xAE } �if isAdmin� � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:30 + --> $DIR/unicode-control-codepoints.rs:18:30 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2066}' @@ -37,7 +37,7 @@ LL | println!("{:?}", b"/*� } \xE2\x81\xA6if isAdmin� � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:41 + --> $DIR/unicode-control-codepoints.rs:18:41 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2069}' @@ -48,7 +48,7 @@ LL | println!("{:?}", b"/*� } �if isAdmin\xE2\x81\xA9 � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:43 + --> $DIR/unicode-control-codepoints.rs:18:43 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2066}' @@ -59,31 +59,31 @@ LL | println!("{:?}", b"/*� } �if isAdmin� \xE2\x81\xA6 begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:29 + --> $DIR/unicode-control-codepoints.rs:23:29 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{202e}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:33 + --> $DIR/unicode-control-codepoints.rs:23:33 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2066}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:44 + --> $DIR/unicode-control-codepoints.rs:23:44 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2069}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:46 + --> $DIR/unicode-control-codepoints.rs:23:46 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2066}' error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:2:5 + --> $DIR/unicode-control-codepoints.rs:4:5 | LL | // if access_level != "us�e�r" { // Check if admin | ^^^^^^^^^^^^^^^^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -97,7 +97,7 @@ LL | // if access_level != "us�e�r" { // Check if admin = help: if their presence wasn't intentional, you can remove them error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:30:1 + --> $DIR/unicode-control-codepoints.rs:37:1 | LL | //"/*� } �if isAdmin� � begin admins only */" | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -112,7 +112,7 @@ LL | //"/*� } �if isAdmin� � begin admins only */" = help: if their presence wasn't intentional, you can remove them error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:11:22 + --> $DIR/unicode-control-codepoints.rs:13:22 | LL | println!("{:?}", "/*� } �if isAdmin� � begin admins only "); | ^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^ @@ -132,7 +132,7 @@ LL | println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begi | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:14:22 + --> $DIR/unicode-control-codepoints.rs:16:22 | LL | println!("{:?}", r##"/*� } �if isAdmin� � begin admins only "##); | ^^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -151,7 +151,7 @@ LL | println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} b | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:26:22 + --> $DIR/unicode-control-codepoints.rs:28:22 | LL | println!("{:?}", '�'); | ^-^ @@ -166,8 +166,40 @@ help: if you want to keep them but make them visible in your source code, you ca LL | println!("{:?}", '\u{202e}'); | ~~~~~~~~ +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:31:13 + | +LL | let _ = c"�"; + | ^^-^ + | | | + | | '\u{202e}' + | this literal contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | let _ = c"\u{202e}"; + | ~~~~~~~~ + +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:33:13 + | +LL | let _ = cr#"�"#; + | ^^^^-^^ + | | | + | | '\u{202e}' + | this literal contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | let _ = cr#"\u{202e}"#; + | ~~~~~~~~ + error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:33:1 + --> $DIR/unicode-control-codepoints.rs:40:1 | LL | /** '�'); */fn foo() {} | ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint @@ -177,7 +209,7 @@ LL | /** '�'); */fn foo() {} = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:36:1 + --> $DIR/unicode-control-codepoints.rs:43:1 | LL | / /** LL | | * @@ -188,5 +220,5 @@ LL | | * '�'); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' -error: aborting due to 17 previous errors +error: aborting due to 19 previous errors