From 987f017fe3141eca95258b86f58247e201c19b62 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Fri, 21 Jul 2023 20:32:37 +0800 Subject: [PATCH 01/12] syntax: get canonicalization down in-place with constant memory. Replace the old canonicalize part with the in-place way with constant memory. --- regex-syntax/src/hir/interval.rs | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index e063390a8..34a9f4b81 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -347,24 +347,19 @@ impl IntervalSet { self.ranges.sort(); assert!(!self.ranges.is_empty()); - // Is there a way to do this in-place with constant memory? I couldn't - // figure out a way to do it. So just append the canonicalization to - // the end of this range, and then drain it before we're done. - let drain_end = self.ranges.len(); - for oldi in 0..drain_end { - // If we've added at least one new range, then check if we can - // merge this range in the previously added range. - if self.ranges.len() > drain_end { - let (last, rest) = self.ranges.split_last_mut().unwrap(); - if let Some(union) = last.union(&rest[oldi]) { - *last = union; - continue; - } + // We consistently try to merge range with previous range + // and merge them if possible. Otherwise, we make it the + // range as the last one. + let mut newi = 0; + for oldi in 1..self.ranges.len() { + if let Some(union) = self.ranges[newi].union(&self.ranges[oldi]) { + self.ranges[newi] = union; + } else { + newi += 1; + self.ranges[newi] = self.ranges[oldi]; } - let range = self.ranges[oldi]; - self.ranges.push(range); } - self.ranges.drain(..drain_end); + self.ranges.truncate(newi + 1); } /// Returns true if and only if this class is in a canonical ordering. From 8f2a6de6c9c4385f0e9f5219a98e279a2ba94aa0 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Fri, 21 Jul 2023 20:37:26 +0800 Subject: [PATCH 02/12] removes redundant tabs --- regex-syntax/src/hir/interval.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index 34a9f4b81..a446b9b47 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -351,7 +351,7 @@ impl IntervalSet { // and merge them if possible. Otherwise, we make it the // range as the last one. let mut newi = 0; - for oldi in 1..self.ranges.len() { + for oldi in 1..self.ranges.len() { if let Some(union) = self.ranges[newi].union(&self.ranges[oldi]) { self.ranges[newi] = union; } else { From 9e100890abbb40651bac0878679ed9c0fbbeb159 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Fri, 21 Jul 2023 21:40:55 +0800 Subject: [PATCH 03/12] doc: clean up the comments making it more readable. --- regex-syntax/src/hir/interval.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index a446b9b47..0cd7ba6c2 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -347,11 +347,12 @@ impl IntervalSet { self.ranges.sort(); assert!(!self.ranges.is_empty()); - // We consistently try to merge range with previous range - // and merge them if possible. Otherwise, we make it the - // range as the last one. + // We maintains the canonicalization results in-place at `0..newi`. + // `newi` will keep track of the end of the canonicalized ranges. let mut newi = 0; for oldi in 1..self.ranges.len() { + // The last new range gets merged with currnet old range when unionable. + // If not, we store it as the new range at the current `newi`. if let Some(union) = self.ranges[newi].union(&self.ranges[oldi]) { self.ranges[newi] = union; } else { From 21b89be64cd771363576472bdffa90973c915e7f Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Fri, 21 Jul 2023 21:51:30 +0800 Subject: [PATCH 04/12] doc: fix some grammar --- regex-syntax/src/hir/interval.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index 0cd7ba6c2..874d63ed9 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -347,12 +347,12 @@ impl IntervalSet { self.ranges.sort(); assert!(!self.ranges.is_empty()); - // We maintains the canonicalization results in-place at `0..newi`. + // We maintain the canonicalization results in-place at `0..newi`. // `newi` will keep track of the end of the canonicalized ranges. let mut newi = 0; for oldi in 1..self.ranges.len() { // The last new range gets merged with currnet old range when unionable. - // If not, we store it as the new range at the current `newi`. + // If not, we update `newi` and store it as a new range. if let Some(union) = self.ranges[newi].union(&self.ranges[oldi]) { self.ranges[newi] = union; } else { From ab47190f2b3765944f82e8399d4b4a87ebe62d5f Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Sat, 22 Jul 2023 13:30:39 +0800 Subject: [PATCH 05/12] syntax: simplify the difference functino in IntervalSet. --- regex-syntax/src/hir/interval.rs | 69 ++++++++------------------------ 1 file changed, 17 insertions(+), 52 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index 874d63ed9..dc3aa4aac 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -192,34 +192,13 @@ impl IntervalSet { // Folks seem to suggest interval or segment trees, but I'd like to // avoid the overhead (both runtime and conceptual) of that. // - // The following is basically my Shitty First Draft. Therefore, in - // order to grok it, you probably need to read each line carefully. - // Simplifications are most welcome! - // // Remember, we can assume the canonical format invariant here, which // says that all ranges are sorted, not overlapping and not adjacent in // each class. let drain_end = self.ranges.len(); - let (mut a, mut b) = (0, 0); - 'LOOP: while a < drain_end && b < other.ranges.len() { - // Basically, the easy cases are when neither range overlaps with - // each other. If the `b` range is less than our current `a` - // range, then we can skip it and move on. - if other.ranges[b].upper() < self.ranges[a].lower() { - b += 1; - continue; - } - // ... similarly for the `a` range. If it's less than the smallest - // `b` range, then we can add it as-is. - if self.ranges[a].upper() < other.ranges[b].lower() { - let range = self.ranges[a]; - self.ranges.push(range); - a += 1; - continue; - } - // Otherwise, we have overlapping ranges. - assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b])); + let mut b = 0; + for a in 0..drain_end { // This part is tricky and was non-obvious to me without looking // at explicit examples (see the tests). The trickiness stems from // two things: 1) subtracting a range from another range could @@ -231,45 +210,31 @@ impl IntervalSet { // For example, if our `a` range is `a-t` and our next three `b` // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply // subtraction three times before moving on to the next `a` range. - let mut range = self.ranges[a]; + self.ranges.push(self.ranges[a]); + // Only when `b` is not above `a`, `b` might apply to current + // `a` range. while b < other.ranges.len() - && !range.is_intersection_empty(&other.ranges[b]) - { - let old_range = range; - range = match range.difference(&other.ranges[b]) { - (None, None) => { - // We lost the entire range, so move on to the next - // without adding this one. - a += 1; - continue 'LOOP; + && other.ranges[b].lower() <= self.ranges[a].upper() { + match self.ranges.pop().unwrap().difference(&other.ranges[b]) { + (Some(range1), None) | (None, Some(range1)) => { + self.ranges.push(range1); } - (Some(range1), None) | (None, Some(range1)) => range1, (Some(range1), Some(range2)) => { self.ranges.push(range1); - range2 + self.ranges.push(range2); } - }; - // It's possible that the `b` range has more to contribute - // here. In particular, if it is greater than the original - // range, then it might impact the next `a` range *and* it - // has impacted the current `a` range as much as possible, - // so we can quit. We don't bump `b` so that the next `a` - // range can apply it. - if other.ranges[b].upper() > old_range.upper() { - break; + (None, None) => {} } - // Otherwise, the next `b` range might apply to the current + // The next `b` range might apply to the current // `a` range. b += 1; } - self.ranges.push(range); - a += 1; - } - while a < drain_end { - let range = self.ranges[a]; - self.ranges.push(range); - a += 1; + // It's possible that the last `b` range has more to + // contribute to the next `a`. We don't bump the last + // `b` so that the next `a` range can apply it. + b = b.saturating_sub(1); } + self.ranges.drain(..drain_end); self.folded = self.folded && other.folded; } From bb5cce763c8acb0108ad27c79b26f699acf7aef4 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Sat, 22 Jul 2023 16:33:52 +0800 Subject: [PATCH 06/12] syntax: add folded opt at difference. When ranges is empty, it is folded. --- regex-syntax/src/hir/interval.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index dc3aa4aac..5a1039dcc 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -236,7 +236,7 @@ impl IntervalSet { } self.ranges.drain(..drain_end); - self.folded = self.folded && other.folded; + self.folded = self.ranges.is_empty() || (self.folded && other.folded); } /// Compute the symmetric difference of the two sets, in place. From 4ca1979f7332830292a149b2ffb2095aac5c99d0 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Sat, 22 Jul 2023 16:27:53 +0800 Subject: [PATCH 07/12] syntax: optimize the symmetric_difference function in IntervalSet. It now behaves like difference and others. --- regex-syntax/src/hir/interval.rs | 70 +++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index 5a1039dcc..243c28b5f 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -247,11 +247,65 @@ impl IntervalSet { /// set. That is, the set will contain all elements in either set, /// but will not contain any elements that are in both sets. pub fn symmetric_difference(&mut self, other: &IntervalSet) { - // TODO(burntsushi): Fix this so that it amortizes allocation. - let mut intersection = self.clone(); - intersection.intersect(other); - self.union(other); - self.difference(&intersection); + if self.ranges.is_empty() { + self.ranges.extend(&other.ranges); + self.folded = other.folded; + return; + } + if other.ranges.is_empty() { + return; + } + + // There should be a way to do this in-place with constant memory, + // but I couldn't figure out a simple way to do it. So just append + // the symmetric difference to the end of this range, and then drain + // it before we're done. + let drain_end = self.ranges.len(); + let mut b = 0; + let mut b_range = Some(other.ranges[b]); + for a in 0..drain_end { + self.ranges.push(self.ranges[a]); + while b_range.is_some_and(|r| r.lower() <= self.ranges[a].upper()) { + let (range1, range2) = match self.ranges.pop().unwrap() + .symmetric_difference(&b_range.as_ref().unwrap()) + { + (Some(range1), None) | (None, Some(range1)) => (Some(range1), None), + (Some(range1), Some(range2)) => (Some(range1), Some(range2)), + (None, None) => (None, None) + }; + if let Some(range) = range1 { + if self.ranges.len() > drain_end && self.ranges.last().unwrap().is_contiguous(&range){ + self.ranges.last_mut().map(|last| *last = last.union(&range).unwrap()); + } else { + self.ranges.push(range); + } + } + if let Some(range) = range2 { + self.ranges.push(range); + } + + b_range = if self.ranges.len() > drain_end + && self.ranges.last().unwrap().upper() > self.ranges[a].upper() + { + Some(*self.ranges.last().unwrap()) + } else { + b += 1; + other.ranges.get(b).cloned() + }; + } + } + while let Some(range) = b_range { + if self.ranges.len() > drain_end && self.ranges.last().unwrap().is_contiguous(&range){ + self.ranges.last_mut().map(|last| *last = last.union(&range).unwrap()); + } else { + self.ranges.push(range); + } + b += 1; + b_range = other.ranges.get(b).cloned(); + } + + self.ranges.drain(..drain_end); + self.folded = self.ranges.is_empty() || (self.folded && other.folded); } /// Negate this interval set. @@ -447,7 +501,11 @@ pub trait Interval: other: &Self, ) -> (Option, Option) { let union = match self.union(other) { - None => return (Some(self.clone()), Some(other.clone())), + None => return if self.upper() < other.lower() { + (Some(self.clone()), Some(other.clone())) + } else { + (Some(other.clone()), Some(self.clone())) + }, Some(union) => union, }; let intersection = match self.intersect(other) { From 6af1a180a347384694be85f51c88699c3bde8543 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Sat, 22 Jul 2023 17:40:06 +0800 Subject: [PATCH 08/12] syntax: get negate down in-place with constant memory. --- regex-syntax/src/hir/interval.rs | 38 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index 243c28b5f..e276c861f 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -321,28 +321,29 @@ impl IntervalSet { return; } - // There should be a way to do this in-place with constant memory, - // but I couldn't figure out a simple way to do it. So just append - // the negation to the end of this range, and then drain it before - // we're done. - let drain_end = self.ranges.len(); - // We do checked arithmetic below because of the canonical ordering // invariant. if self.ranges[0].lower() > I::Bound::min_value() { - let upper = self.ranges[0].lower().decrement(); - self.ranges.push(I::create(I::Bound::min_value(), upper)); - } - for i in 1..drain_end { - let lower = self.ranges[i - 1].upper().increment(); - let upper = self.ranges[i].lower().decrement(); - self.ranges.push(I::create(lower, upper)); - } - if self.ranges[drain_end - 1].upper() < I::Bound::max_value() { - let lower = self.ranges[drain_end - 1].upper().increment(); - self.ranges.push(I::create(lower, I::Bound::max_value())); + let mut pre_upper = self.ranges[0].upper(); + self.ranges[0] = I::create(I::Bound::min_value(), self.ranges[0].lower().decrement()); + for i in 1..self.ranges.len() { + let lower = pre_upper.increment(); + pre_upper = self.ranges[i].upper(); + self.ranges[i] = I::create(lower, self.ranges[i].lower().decrement()); + } + if pre_upper < I::Bound::max_value() { + self.ranges.push(I::create(pre_upper.increment(), I::Bound::max_value())); + } + } else { + for i in 1..self.ranges.len() { + self.ranges[i - 1] = I::create(self.ranges[i - 1].upper().increment(), self.ranges[i].lower().decrement()); + } + if self.ranges.last().unwrap().upper() < I::Bound::max_value() { + self.ranges.last_mut().map(|range| *range = I::create(range.upper().increment(), I::Bound::max_value())); + } else { + self.ranges.pop(); + } } - self.ranges.drain(..drain_end); // We don't need to update whether this set is folded or not, because // it is conservatively preserved through negation. Namely, if a set // is not folded, then it is possible that its negation is folded, for @@ -356,6 +357,7 @@ impl IntervalSet { // of case folded characters. Negating it in turn means that all // equivalence classes in the set are negated, and any equivalence // class that was previously not in the set is now entirely in the set. + self.folded = self.ranges.is_empty() || self.folded; } /// Converts this set into a canonical ordering. From 3d04223a6dfa6ef371f2af8b2c974e5e0dfd64c2 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Sat, 22 Jul 2023 17:54:16 +0800 Subject: [PATCH 09/12] syntax: format codes. --- regex-syntax/src/hir/interval.rs | 83 +++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 24 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index e276c861f..e86f2dbe2 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -214,7 +214,8 @@ impl IntervalSet { // Only when `b` is not above `a`, `b` might apply to current // `a` range. while b < other.ranges.len() - && other.ranges[b].lower() <= self.ranges[a].upper() { + && other.ranges[b].lower() <= self.ranges[a].upper() + { match self.ranges.pop().unwrap().difference(&other.ranges[b]) { (Some(range1), None) | (None, Some(range1)) => { self.ranges.push(range1); @@ -265,17 +266,29 @@ impl IntervalSet { let mut b_range = Some(other.ranges[b]); for a in 0..drain_end { self.ranges.push(self.ranges[a]); - while b_range.is_some_and(|r| r.lower() <= self.ranges[a].upper()) { - let (range1, range2) = match self.ranges.pop().unwrap() + while b_range.is_some_and(|r| r.lower() <= self.ranges[a].upper()) + { + let (range1, range2) = match self + .ranges + .pop() + .unwrap() .symmetric_difference(&b_range.as_ref().unwrap()) { - (Some(range1), None) | (None, Some(range1)) => (Some(range1), None), - (Some(range1), Some(range2)) => (Some(range1), Some(range2)), - (None, None) => (None, None) + (Some(range1), None) | (None, Some(range1)) => { + (Some(range1), None) + } + (Some(range1), Some(range2)) => { + (Some(range1), Some(range2)) + } + (None, None) => (None, None), }; if let Some(range) = range1 { - if self.ranges.len() > drain_end && self.ranges.last().unwrap().is_contiguous(&range){ - self.ranges.last_mut().map(|last| *last = last.union(&range).unwrap()); + if self.ranges.len() > drain_end + && self.ranges.last().unwrap().is_contiguous(&range) + { + self.ranges + .last_mut() + .map(|last| *last = last.union(&range).unwrap()); } else { self.ranges.push(range); } @@ -283,9 +296,10 @@ impl IntervalSet { if let Some(range) = range2 { self.ranges.push(range); } - - b_range = if self.ranges.len() > drain_end - && self.ranges.last().unwrap().upper() > self.ranges[a].upper() + + b_range = if self.ranges.len() > drain_end + && self.ranges.last().unwrap().upper() + > self.ranges[a].upper() { Some(*self.ranges.last().unwrap()) } else { @@ -295,15 +309,19 @@ impl IntervalSet { } } while let Some(range) = b_range { - if self.ranges.len() > drain_end && self.ranges.last().unwrap().is_contiguous(&range){ - self.ranges.last_mut().map(|last| *last = last.union(&range).unwrap()); + if self.ranges.len() > drain_end + && self.ranges.last().unwrap().is_contiguous(&range) + { + self.ranges + .last_mut() + .map(|last| *last = last.union(&range).unwrap()); } else { self.ranges.push(range); } b += 1; b_range = other.ranges.get(b).cloned(); } - + self.ranges.drain(..drain_end); self.folded = self.ranges.is_empty() || (self.folded && other.folded); } @@ -325,21 +343,36 @@ impl IntervalSet { // invariant. if self.ranges[0].lower() > I::Bound::min_value() { let mut pre_upper = self.ranges[0].upper(); - self.ranges[0] = I::create(I::Bound::min_value(), self.ranges[0].lower().decrement()); + self.ranges[0] = I::create( + I::Bound::min_value(), + self.ranges[0].lower().decrement(), + ); for i in 1..self.ranges.len() { let lower = pre_upper.increment(); pre_upper = self.ranges[i].upper(); - self.ranges[i] = I::create(lower, self.ranges[i].lower().decrement()); + self.ranges[i] = + I::create(lower, self.ranges[i].lower().decrement()); } if pre_upper < I::Bound::max_value() { - self.ranges.push(I::create(pre_upper.increment(), I::Bound::max_value())); + self.ranges.push(I::create( + pre_upper.increment(), + I::Bound::max_value(), + )); } } else { for i in 1..self.ranges.len() { - self.ranges[i - 1] = I::create(self.ranges[i - 1].upper().increment(), self.ranges[i].lower().decrement()); + self.ranges[i - 1] = I::create( + self.ranges[i - 1].upper().increment(), + self.ranges[i].lower().decrement(), + ); } if self.ranges.last().unwrap().upper() < I::Bound::max_value() { - self.ranges.last_mut().map(|range| *range = I::create(range.upper().increment(), I::Bound::max_value())); + self.ranges.last_mut().map(|range| { + *range = I::create( + range.upper().increment(), + I::Bound::max_value(), + ) + }); } else { self.ranges.pop(); } @@ -503,11 +536,13 @@ pub trait Interval: other: &Self, ) -> (Option, Option) { let union = match self.union(other) { - None => return if self.upper() < other.lower() { - (Some(self.clone()), Some(other.clone())) - } else { - (Some(other.clone()), Some(self.clone())) - }, + None => { + return if self.upper() < other.lower() { + (Some(self.clone()), Some(other.clone())) + } else { + (Some(other.clone()), Some(self.clone())) + } + } Some(union) => union, }; let intersection = match self.intersect(other) { From 7386e5fcaf294577eb303dc507f467eca77dafca Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Sat, 22 Jul 2023 18:01:46 +0800 Subject: [PATCH 10/12] syntax: replace the is_some_and func to is_some. --- regex-syntax/src/hir/interval.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index e86f2dbe2..919efbf6f 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -266,7 +266,8 @@ impl IntervalSet { let mut b_range = Some(other.ranges[b]); for a in 0..drain_end { self.ranges.push(self.ranges[a]); - while b_range.is_some_and(|r| r.lower() <= self.ranges[a].upper()) + while b_range.is_some() + && b_range.unwrap().lower() <= self.ranges[a].upper() { let (range1, range2) = match self .ranges From a71e3c86b8e4821a333260dd86a2a2cb44c07140 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Sat, 22 Jul 2023 19:20:11 +0800 Subject: [PATCH 11/12] syntax: implement fast push in-place. --- regex-syntax/src/hir/interval.rs | 38 ++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index 919efbf6f..545b6f326 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -81,14 +81,44 @@ impl IntervalSet { /// Add a new interval to this set. pub fn push(&mut self, interval: I) { - // TODO: This could be faster. e.g., Push the interval such that - // it preserves canonicalization. - self.ranges.push(interval); - self.canonicalize(); // We don't know whether the new interval added here is considered // case folded, so we conservatively assume that the entire set is // no longer case folded if it was previously. self.folded = false; + + if self.ranges.is_empty() { + self.ranges.push(interval); + return; + } + + // Find the first range that is not greater than the new interval. + // This is the first range that could possibly be unioned with the + // new interval. + let mut drain_end = self.ranges.len(); + while drain_end > 0 + && self.ranges[drain_end - 1].lower() > interval.upper() + { + drain_end -= 1; + } + + // Try to union the new interval with old intervals backwards. + if drain_end > 0 && self.ranges[drain_end - 1].is_contiguous(&interval) + { + self.ranges[drain_end - 1] = + self.ranges[drain_end - 1].union(&interval).unwrap(); + for i in 0..drain_end - 1 { + if let Some(union) = + self.ranges[drain_end - 1].union(&self.ranges[i]) + { + self.ranges[drain_end - 1] = union; + } else { + self.ranges.drain(i + 1..drain_end - 1); + break; + } + } + } else { + self.ranges.insert(drain_end, interval); + } } /// Return an iterator over all intervals in this set. From dd46d346fb4090716c226e4316cd15a368b4e754 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Sun, 23 Jul 2023 14:56:36 +0800 Subject: [PATCH 12/12] syntax: fix a bug when pushing range in intervalset. --- regex-syntax/src/hir/interval.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index 545b6f326..83b2e4005 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -97,6 +97,7 @@ impl IntervalSet { let mut drain_end = self.ranges.len(); while drain_end > 0 && self.ranges[drain_end - 1].lower() > interval.upper() + && !self.ranges[drain_end - 1].is_contiguous(&interval) { drain_end -= 1; } @@ -106,7 +107,7 @@ impl IntervalSet { { self.ranges[drain_end - 1] = self.ranges[drain_end - 1].union(&interval).unwrap(); - for i in 0..drain_end - 1 { + for i in (0..drain_end - 1).rev() { if let Some(union) = self.ranges[drain_end - 1].union(&self.ranges[i]) {