From f4b9ea7315ef2fc7bf161adff15d06328e46a036 Mon Sep 17 00:00:00 2001 From: Bruce Guenter Date: Thu, 27 Feb 2020 18:00:13 -0600 Subject: [PATCH] Add support for additional characters in group names We have an application that requires the ability to use field access notation, which includes periods and square braces in group names. This commit adds support and tests for these additional characters. --- regex-syntax/src/ast/parse.rs | 44 +++++++++++++++++++++++++++++++++-- src/lib.rs | 2 +- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index f5b4548b23..11ca85e10d 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -98,10 +98,11 @@ fn is_hex(c: char) -> bool { /// Returns true if the given character is a valid in a capture group name. /// /// If `first` is true, then `c` is treated as the first character in the -/// group name (which is not allowed to be a digit). +/// group name (which must be alphabetic or underscore). fn is_capture_char(c: char, first: bool) -> bool { c == '_' - || (!first && c >= '0' && c <= '9') + || (!first + && ((c >= '0' && c <= '9') || c == '.' || c == '[' || c == ']')) || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') } @@ -3851,6 +3852,45 @@ bar })) ); + assert_eq!( + parser("(?Pz)").parse(), + Ok(Ast::Group(ast::Group { + span: span(0..10), + kind: ast::GroupKind::CaptureName(ast::CaptureName { + span: span(4..7), + name: s("a_1"), + index: 1, + }), + ast: Box::new(lit('z', 8)), + })) + ); + + assert_eq!( + parser("(?Pz)").parse(), + Ok(Ast::Group(ast::Group { + span: span(0..10), + kind: ast::GroupKind::CaptureName(ast::CaptureName { + span: span(4..7), + name: s("a.1"), + index: 1, + }), + ast: Box::new(lit('z', 8)), + })) + ); + + assert_eq!( + parser("(?Pz)").parse(), + Ok(Ast::Group(ast::Group { + span: span(0..11), + kind: ast::GroupKind::CaptureName(ast::CaptureName { + span: span(4..8), + name: s("a[1]"), + index: 1, + }), + ast: Box::new(lit('z', 9)), + })) + ); + assert_eq!( parser("(?P<").parse().unwrap_err(), TestError { diff --git a/src/lib.rs b/src/lib.rs index 2a74bf8185..430ab7e743 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -364,7 +364,7 @@ $ the end of text (or end-of-line with multi-line mode)
 (exp)          numbered capture group (indexed by opening parenthesis)
-(?P<name>exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z])
+(?P<name>exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
 (?:exp)        non-capturing group
 (?flags)       set flags within current group
 (?flags:exp)   set flags for exp (non-capturing)