@@ -175,11 +175,8 @@ pub enum HirKind {
175
175
/// class. A class can either consist of Unicode scalar values as
176
176
/// characters, or it can use bytes.
177
177
Class ( Class ) ,
178
- /// An anchor assertion. An anchor assertion match always has zero length.
179
- Anchor ( Anchor ) ,
180
- /// A word boundary assertion, which may or may not be Unicode aware. A
181
- /// word boundary assertion match always has zero length.
182
- WordBoundary ( WordBoundary ) ,
178
+ /// A look-around assertion. A look-around match always has zero length.
179
+ Look ( Look ) ,
183
180
/// A repetition operation applied to a child expression.
184
181
Repetition ( Repetition ) ,
185
182
/// A possibly capturing group, which contains a child expression.
@@ -271,8 +268,8 @@ impl Hir {
271
268
Hir { kind : HirKind :: Class ( class) , info }
272
269
}
273
270
274
- /// Creates an anchor assertion HIR expression.
275
- pub fn anchor ( anchor : Anchor ) -> Hir {
271
+ /// Creates a look-around assertion HIR expression.
272
+ pub fn look ( look : Look ) -> Hir {
276
273
let mut info = HirInfo :: new ( ) ;
277
274
info. set_always_utf8 ( true ) ;
278
275
info. set_all_assertions ( true ) ;
@@ -282,53 +279,34 @@ impl Hir {
282
279
info. set_line_anchored_end ( false ) ;
283
280
info. set_any_anchored_start ( false ) ;
284
281
info. set_any_anchored_end ( false ) ;
282
+ // All look-around assertions always produce zero-length or "empty"
283
+ // matches. This is true even though not all of them (like \b) match
284
+ // the empty string itself. That is, '\b' does not match ''. But it
285
+ // does match the empty string between '!' and 'a' in '!a'.
285
286
info. set_match_empty ( true ) ;
286
287
info. set_literal ( false ) ;
287
288
info. set_alternation_literal ( false ) ;
288
- if let Anchor :: StartText = anchor {
289
+ if let Look :: Start = look {
289
290
info. set_anchored_start ( true ) ;
290
291
info. set_line_anchored_start ( true ) ;
291
292
info. set_any_anchored_start ( true ) ;
292
293
}
293
- if let Anchor :: EndText = anchor {
294
+ if let Look :: End = look {
294
295
info. set_anchored_end ( true ) ;
295
296
info. set_line_anchored_end ( true ) ;
296
297
info. set_any_anchored_end ( true ) ;
297
298
}
298
- if let Anchor :: StartLine = anchor {
299
+ if let Look :: StartLF = look {
299
300
info. set_line_anchored_start ( true ) ;
300
301
}
301
- if let Anchor :: EndLine = anchor {
302
+ if let Look :: EndLF = look {
302
303
info. set_line_anchored_end ( true ) ;
303
304
}
304
- Hir { kind : HirKind :: Anchor ( anchor) , info }
305
- }
306
-
307
- /// Creates a word boundary assertion HIR expression.
308
- pub fn word_boundary ( word_boundary : WordBoundary ) -> Hir {
309
- let mut info = HirInfo :: new ( ) ;
310
- info. set_always_utf8 ( true ) ;
311
- info. set_all_assertions ( true ) ;
312
- info. set_anchored_start ( false ) ;
313
- info. set_anchored_end ( false ) ;
314
- info. set_line_anchored_start ( false ) ;
315
- info. set_line_anchored_end ( false ) ;
316
- info. set_any_anchored_start ( false ) ;
317
- info. set_any_anchored_end ( false ) ;
318
- info. set_literal ( false ) ;
319
- info. set_alternation_literal ( false ) ;
320
- // A negated word boundary matches '', so that's fine. But \b does not
321
- // match \b, so why do we say it can match the empty string? Well,
322
- // because, if you search for \b against 'a', it will report [0, 0) and
323
- // [1, 1) as matches, and both of those matches correspond to the empty
324
- // string. Thus, only *certain* empty strings match \b, which similarly
325
- // applies to \B.
326
- info. set_match_empty ( true ) ;
327
- // Negated ASCII word boundaries can match invalid UTF-8.
328
- if let WordBoundary :: AsciiNegate = word_boundary {
305
+ if let Look :: WordAsciiNegate = look {
306
+ // Negated ASCII word boundaries can match invalid UTF-8.
329
307
info. set_always_utf8 ( false ) ;
330
308
}
331
- Hir { kind : HirKind :: WordBoundary ( word_boundary ) , info }
309
+ Hir { kind : HirKind :: Look ( look ) , info }
332
310
}
333
311
334
312
/// Creates a repetition HIR expression.
@@ -697,8 +675,7 @@ impl HirKind {
697
675
HirKind :: Empty
698
676
| HirKind :: Literal ( _)
699
677
| HirKind :: Class ( _)
700
- | HirKind :: Anchor ( _)
701
- | HirKind :: WordBoundary ( _) => false ,
678
+ | HirKind :: Look ( _) => false ,
702
679
HirKind :: Group ( _)
703
680
| HirKind :: Repetition ( _)
704
681
| HirKind :: Concat ( _)
@@ -1313,44 +1290,37 @@ impl core::fmt::Debug for ClassBytesRange {
1313
1290
}
1314
1291
}
1315
1292
1316
- /// The high-level intermediate representation for an anchor assertion.
1293
+ /// The high-level intermediate representation for a look-around assertion.
1317
1294
///
1318
- /// A matching anchor assertion is always zero-length.
1295
+ /// An assertion match is always zero-length. Also called an "empty match."
1319
1296
#[ derive( Clone , Debug , Eq , PartialEq ) ]
1320
- pub enum Anchor {
1321
- /// Match the beginning of a line or the beginning of text. Specifically,
1322
- /// this matches at the starting position of the input, or at the position
1323
- /// immediately following a `\n` character.
1324
- StartLine ,
1325
- /// Match the end of a line or the end of text. Specifically,
1326
- /// this matches at the end position of the input, or at the position
1327
- /// immediately preceding a `\n` character.
1328
- EndLine ,
1297
+ pub enum Look {
1329
1298
/// Match the beginning of text. Specifically, this matches at the starting
1330
1299
/// position of the input.
1331
- StartText ,
1300
+ Start ,
1332
1301
/// Match the end of text. Specifically, this matches at the ending
1333
1302
/// position of the input.
1334
- EndText ,
1335
- }
1336
-
1337
- /// The high-level intermediate representation for a word-boundary assertion.
1338
- ///
1339
- /// A matching word boundary assertion is always zero-length.
1340
- #[ derive( Clone , Debug , Eq , PartialEq ) ]
1341
- pub enum WordBoundary {
1342
- /// Match a Unicode-aware word boundary. That is, this matches a position
1343
- /// where the left adjacent character and right adjacent character
1344
- /// correspond to a word and non-word or a non-word and word character.
1345
- Unicode ,
1346
- /// Match a Unicode-aware negation of a word boundary.
1347
- UnicodeNegate ,
1303
+ End ,
1304
+ /// Match the beginning of a line or the beginning of text. Specifically,
1305
+ /// this matches at the starting position of the input, or at the position
1306
+ /// immediately following a `\n` character.
1307
+ StartLF ,
1308
+ /// Match the end of a line or the end of text. Specifically, this matches
1309
+ /// at the end position of the input, or at the position immediately
1310
+ /// preceding a `\n` character.
1311
+ EndLF ,
1348
1312
/// Match an ASCII-only word boundary. That is, this matches a position
1349
1313
/// where the left adjacent character and right adjacent character
1350
1314
/// correspond to a word and non-word or a non-word and word character.
1351
- Ascii ,
1315
+ WordAscii ,
1352
1316
/// Match an ASCII-only negation of a word boundary.
1353
- AsciiNegate ,
1317
+ WordAsciiNegate ,
1318
+ /// Match a Unicode-aware word boundary. That is, this matches a position
1319
+ /// where the left adjacent character and right adjacent character
1320
+ /// correspond to a word and non-word or a non-word and word character.
1321
+ WordUnicode ,
1322
+ /// Match a Unicode-aware negation of a word boundary.
1323
+ WordUnicodeNegate ,
1354
1324
}
1355
1325
1356
1326
/// The high-level intermediate representation for a group.
@@ -1461,8 +1431,7 @@ impl Drop for Hir {
1461
1431
HirKind :: Empty
1462
1432
| HirKind :: Literal ( _)
1463
1433
| HirKind :: Class ( _)
1464
- | HirKind :: Anchor ( _)
1465
- | HirKind :: WordBoundary ( _) => return ,
1434
+ | HirKind :: Look ( _) => return ,
1466
1435
HirKind :: Group ( ref x) if !x. hir . kind . has_subexprs ( ) => return ,
1467
1436
HirKind :: Repetition ( ref x) if !x. hir . kind . has_subexprs ( ) => return ,
1468
1437
HirKind :: Concat ( ref x) if x. is_empty ( ) => return ,
@@ -1476,8 +1445,7 @@ impl Drop for Hir {
1476
1445
HirKind :: Empty
1477
1446
| HirKind :: Literal ( _)
1478
1447
| HirKind :: Class ( _)
1479
- | HirKind :: Anchor ( _)
1480
- | HirKind :: WordBoundary ( _) => { }
1448
+ | HirKind :: Look ( _) => { }
1481
1449
HirKind :: Group ( ref mut x) => {
1482
1450
stack. push ( mem:: replace ( & mut x. hir , Hir :: empty ( ) ) ) ;
1483
1451
}
0 commit comments