Skip to content

Commit 9c4ce43

Browse files
addisoncrumpBurntSushi
authored andcommitted
fuzz: improve Arbitrary impl for Unicode classes
... and add some more fuzz testing based on it. Closes #991
1 parent bede55a commit 9c4ce43

File tree

2 files changed

+178
-3
lines changed

2 files changed

+178
-3
lines changed

fuzz/fuzz_targets/ast_roundtrip.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ struct FuzzData {
1515
impl std::fmt::Debug for FuzzData {
1616
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1717
let mut builder = f.debug_struct("FuzzData");
18-
builder.field("ast", &format!("{}", self.ast));
18+
builder.field("ast", &self.ast);
19+
builder.field("stringified", &format!("{}", self.ast));
1920
builder.finish()
2021
}
2122
}

regex-syntax/src/ast/mod.rs

Lines changed: 176 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,6 @@ impl ClassUnicode {
840840

841841
/// The available forms of Unicode character classes.
842842
#[derive(Clone, Debug, Eq, PartialEq)]
843-
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
844843
pub enum ClassUnicodeKind {
845844
/// A one letter abbreviated class, e.g., `\pN`.
846845
OneLetter(char),
@@ -858,6 +857,153 @@ pub enum ClassUnicodeKind {
858857
},
859858
}
860859

860+
#[cfg(feature = "arbitrary")]
861+
impl arbitrary::Arbitrary<'_> for ClassUnicodeKind {
862+
fn arbitrary(
863+
u: &mut arbitrary::Unstructured,
864+
) -> arbitrary::Result<ClassUnicodeKind> {
865+
#[cfg(any(
866+
feature = "unicode-age",
867+
feature = "unicode-bool",
868+
feature = "unicode-gencat",
869+
feature = "unicode-perl",
870+
feature = "unicode-script",
871+
feature = "unicode-segment",
872+
))]
873+
{
874+
use alloc::string::ToString;
875+
876+
use super::unicode_tables::{
877+
property_names::PROPERTY_NAMES,
878+
property_values::PROPERTY_VALUES,
879+
};
880+
881+
match u.choose_index(3)? {
882+
0 => {
883+
let all = PROPERTY_VALUES
884+
.iter()
885+
.flat_map(|e| e.1.iter())
886+
.filter(|(name, _)| name.len() == 1)
887+
.count();
888+
let idx = u.choose_index(all)?;
889+
let value = PROPERTY_VALUES
890+
.iter()
891+
.flat_map(|e| e.1.iter())
892+
.take(idx + 1)
893+
.last()
894+
.unwrap()
895+
.0
896+
.chars()
897+
.next()
898+
.unwrap();
899+
Ok(ClassUnicodeKind::OneLetter(value))
900+
}
901+
1 => {
902+
let all = PROPERTY_VALUES
903+
.iter()
904+
.map(|e| e.1.len())
905+
.sum::<usize>()
906+
+ PROPERTY_NAMES.len();
907+
let idx = u.choose_index(all)?;
908+
let name = PROPERTY_VALUES
909+
.iter()
910+
.flat_map(|e| e.1.iter())
911+
.chain(PROPERTY_NAMES)
912+
.map(|(_, e)| e)
913+
.take(idx + 1)
914+
.last()
915+
.unwrap();
916+
Ok(ClassUnicodeKind::Named(name.to_string()))
917+
}
918+
2 => {
919+
let all = PROPERTY_VALUES
920+
.iter()
921+
.map(|e| e.1.len())
922+
.sum::<usize>();
923+
let idx = u.choose_index(all)?;
924+
let (prop, value) = PROPERTY_VALUES
925+
.iter()
926+
.flat_map(|e| {
927+
e.1.iter().map(|(_, value)| (e.0, value))
928+
})
929+
.take(idx + 1)
930+
.last()
931+
.unwrap();
932+
Ok(ClassUnicodeKind::NamedValue {
933+
op: u.arbitrary()?,
934+
name: prop.to_string(),
935+
value: value.to_string(),
936+
})
937+
}
938+
_ => unreachable!("index chosen is impossible"),
939+
}
940+
}
941+
#[cfg(not(any(
942+
feature = "unicode-age",
943+
feature = "unicode-bool",
944+
feature = "unicode-gencat",
945+
feature = "unicode-perl",
946+
feature = "unicode-script",
947+
feature = "unicode-segment",
948+
)))]
949+
{
950+
match u.choose_index(3)? {
951+
0 => Ok(ClassUnicodeKind::OneLetter(u.arbitrary()?)),
952+
1 => Ok(ClassUnicodeKind::Named(u.arbitrary()?)),
953+
2 => Ok(ClassUnicodeKind::NamedValue {
954+
op: u.arbitrary()?,
955+
name: u.arbitrary()?,
956+
value: u.arbitrary()?,
957+
}),
958+
_ => unreachable!("index chosen is impossible"),
959+
}
960+
}
961+
}
962+
963+
fn size_hint(depth: usize) -> (usize, Option<usize>) {
964+
#[cfg(any(
965+
feature = "unicode-age",
966+
feature = "unicode-bool",
967+
feature = "unicode-gencat",
968+
feature = "unicode-perl",
969+
feature = "unicode-script",
970+
feature = "unicode-segment",
971+
))]
972+
{
973+
arbitrary::size_hint::and_all(&[
974+
usize::size_hint(depth),
975+
usize::size_hint(depth),
976+
arbitrary::size_hint::or(
977+
(0, Some(0)),
978+
ClassUnicodeOpKind::size_hint(depth),
979+
),
980+
])
981+
}
982+
#[cfg(not(any(
983+
feature = "unicode-age",
984+
feature = "unicode-bool",
985+
feature = "unicode-gencat",
986+
feature = "unicode-perl",
987+
feature = "unicode-script",
988+
feature = "unicode-segment",
989+
)))]
990+
{
991+
arbitrary::size_hint::and(
992+
usize::size_hint(depth),
993+
arbitrary::size_hint::or_all(&[
994+
char::size_hint(depth),
995+
String::size_hint(depth),
996+
arbitrary::size_hint::and_all(&[
997+
String::size_hint(depth),
998+
String::size_hint(depth),
999+
ClassUnicodeOpKind::size_hint(depth),
1000+
]),
1001+
]),
1002+
)
1003+
}
1004+
}
1005+
}
1006+
8611007
/// The type of op used in a Unicode character class.
8621008
#[derive(Clone, Debug, Eq, PartialEq)]
8631009
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
@@ -1238,7 +1384,6 @@ pub enum GroupKind {
12381384
/// This corresponds to the name itself between the angle brackets in, e.g.,
12391385
/// `(?P<foo>expr)`.
12401386
#[derive(Clone, Debug, Eq, PartialEq)]
1241-
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
12421387
pub struct CaptureName {
12431388
/// The span of this capture name.
12441389
pub span: Span,
@@ -1248,6 +1393,35 @@ pub struct CaptureName {
12481393
pub index: u32,
12491394
}
12501395

1396+
#[cfg(feature = "arbitrary")]
1397+
impl arbitrary::Arbitrary<'_> for CaptureName {
1398+
fn arbitrary(
1399+
u: &mut arbitrary::Unstructured,
1400+
) -> arbitrary::Result<CaptureName> {
1401+
let len = u.arbitrary_len::<char>()?;
1402+
if len == 0 {
1403+
return Err(arbitrary::Error::NotEnoughData);
1404+
}
1405+
let mut name: String = String::new();
1406+
for _ in 0..len {
1407+
let ch: char = u.arbitrary()?;
1408+
let cp = u32::from(ch);
1409+
let ascii_letter_offset = u8::try_from(cp % 26).unwrap();
1410+
let ascii_letter = b'a' + ascii_letter_offset;
1411+
name.push(char::from(ascii_letter));
1412+
}
1413+
Ok(CaptureName { span: u.arbitrary()?, name, index: u.arbitrary()? })
1414+
}
1415+
1416+
fn size_hint(depth: usize) -> (usize, Option<usize>) {
1417+
arbitrary::size_hint::and_all(&[
1418+
Span::size_hint(depth),
1419+
usize::size_hint(depth),
1420+
u32::size_hint(depth),
1421+
])
1422+
}
1423+
}
1424+
12511425
/// A group of flags that is not applied to a particular regular expression.
12521426
#[derive(Clone, Debug, Eq, PartialEq)]
12531427
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]

0 commit comments

Comments
 (0)