From 7baa33efbf9b47abc6d603ebeea225811aec6767 Mon Sep 17 00:00:00 2001 From: Teymour Aldridge Date: Fri, 26 Aug 2022 20:42:09 +0100 Subject: [PATCH] Add operator overloading for building grammars. --- fuzzcheck/src/mutators/grammar/grammar.rs | 138 ++++++++++++++++----- fuzzcheck/src/mutators/grammar/mod.rs | 4 +- fuzzcheck/src/mutators/grammar/mutators.rs | 24 ++-- fuzzcheck/src/mutators/grammar/regex.rs | 9 +- fuzzcheck/tests/grammar_based_mutators.rs | 109 ++++++++-------- 5 files changed, 176 insertions(+), 108 deletions(-) diff --git a/fuzzcheck/src/mutators/grammar/grammar.rs b/fuzzcheck/src/mutators/grammar/grammar.rs index 9a737c32..d51f170d 100644 --- a/fuzzcheck/src/mutators/grammar/grammar.rs +++ b/fuzzcheck/src/mutators/grammar/grammar.rs @@ -1,4 +1,4 @@ -use std::ops::{Range, RangeBounds, RangeInclusive}; +use std::ops::{Add, BitOr, Range, RangeBounds, RangeInclusive}; use std::rc::{Rc, Weak}; #[cfg(feature = "regex_grammar")] @@ -8,46 +8,90 @@ use crate::mutators::grammar::regex::grammar_from_regex; /// A grammar which can be used for fuzzing. /// /// See [the module documentation](crate::mutators::grammar) for advice on how to create a grammar. -pub enum Grammar { +pub enum GrammarInner { Literal(Vec>), - Alternation(Vec>), - Concatenation(Vec>), - Repetition(Rc, Range), - Recurse(Weak), - Recursive(Rc), + Alternation(Vec), + Concatenation(Vec), + Repetition(Grammar, Range), + Recurse(Weak), + Recursive(Grammar), +} + +#[derive(Debug, Clone)] +/// A [`Grammar`] can be transformed into an [`ASTMutator`] (which generates +/// [`String`]s corresponding to the grammar in question) or combined with other +/// grammars to produce a more complicated grammar. +/// +/// For examples on how to use this struct, see the crate documentation +/// ([`super`]). +/// +/// [`ASTMutator`]: crate::mutators::grammar::ASTMutator +pub struct Grammar(pub(crate) Rc); + +impl From> for Grammar { + fn from(inner: Rc) -> Self { + Grammar(inner) + } +} + +impl AsRef for Grammar { + fn as_ref(&self) -> &GrammarInner { + &self.0 + } +} + +impl Add for Grammar { + type Output = Grammar; + + /// Calls [`concatenation`] on the two provided grammars. + fn add(self, rhs: Self) -> Self::Output { + concatenation([self, rhs]) + } +} + +impl BitOr for Grammar { + type Output = Grammar; + + /// Calls [`alternation`] on the two provided grammars. + fn bitor(self, rhs: Self) -> Self::Output { + alternation([self, rhs]) + } } #[cfg(feature = "regex_grammar")] #[doc(cfg(feature = "regex_grammar"))] #[no_coverage] -pub fn regex(s: &str) -> Rc { +pub fn regex(s: &str) -> Grammar { grammar_from_regex(s) } #[no_coverage] -/// Creates an [`Rc`] which outputs characters in the given range. +/// Creates a [`Grammar`] which outputs characters in the given range. /// /// For example, to generate characters in the range 'a' to 'z' (inclusive), one /// could use this code /// /// ``` -/// let a_to_z = literal_ranges('a'..='z'); +/// # use fuzzcheck::mutators::grammar::literal_ranges; +/// let a_to_z = literal_ranges(vec!['a'..='b', 'q'..='s', 't'..='w']); /// ``` -pub fn literal_ranges(ranges: Vec>) -> Rc { - Rc::new(Grammar::Literal(ranges)) +pub fn literal_ranges(ranges: Vec>) -> Grammar { + Rc::new(GrammarInner::Literal(ranges)).into() } #[no_coverage] -/// Creates an [`Rc`] which matches a single character literal. +/// Creates a [`Grammar`] which matches a single character literal. /// /// ``` +/// # use fuzzcheck::mutators::grammar::literal; /// let l = literal('l'); /// ``` -pub fn literal(l: char) -> Rc { - Rc::new(Grammar::Literal(vec![l..=l])) +pub fn literal(l: char) -> Grammar { + Rc::new(GrammarInner::Literal(vec![l..=l])).into() } + #[no_coverage] -pub fn literal_range(range: R) -> Rc +pub fn literal_range(range: R) -> Grammar where R: RangeBounds, { @@ -61,13 +105,31 @@ where std::ops::Bound::Excluded(x) => unsafe { char::from_u32_unchecked(*x as u32 - 1) }, std::ops::Bound::Unbounded => panic!("The range must have an upper bound"), }; - Rc::new(Grammar::Literal(vec![start..=end])) + Rc::new(GrammarInner::Literal(vec![start..=end])).into() } /// Produces a grammar which will choose between the provided grammars. +/// +/// For example, this grammar +/// ``` +/// # use fuzzcheck::mutators::grammar::{Grammar, alternation, regex}; +/// let fuzz_or_check: Grammar = alternation([ +/// regex("fuzz"), +/// regex("check") +/// ]); +/// ``` +/// would output either "fuzz" or "check". +/// +/// It is also possible to use the `|` operator to write alternation grammars. +/// For example, the [`Grammar`] above could be equivalently written as +/// +/// ``` +/// # use fuzzcheck::mutators::grammar::{Grammar, regex}; +/// let fuzz_or_check: Grammar = regex("fuzz") | regex("check"); +/// ``` #[no_coverage] -pub fn alternation(gs: impl IntoIterator>) -> Rc { - Rc::new(Grammar::Alternation(gs.into_iter().collect())) +pub fn alternation(gs: impl IntoIterator) -> Grammar { + Rc::new(GrammarInner::Alternation(gs.into_iter().collect())).into() } /// Produces a grammar which will concatenate the output of all the provided @@ -75,20 +137,29 @@ pub fn alternation(gs: impl IntoIterator>) -> Rc { /// /// For example, the grammar /// ``` -/// concatenation([ +/// # use fuzzcheck::mutators::grammar::{concatenation, Grammar, regex}; +/// let fuzzcheck: Grammar = concatenation([ /// regex("fuzz"), /// regex("check") -/// ]) +/// ]); /// ``` /// would output "fuzzcheck". +/// +/// It is also possible to use the `+` operator to concatenate separate grammars +/// together. For example, the grammar above could be equivalently written as +/// +/// ``` +/// # use fuzzcheck::mutators::grammar::{Grammar, regex}; +/// let fuzzcheck: Grammar = regex("fuzz") + regex("check"); +/// ``` #[no_coverage] -pub fn concatenation(gs: impl IntoIterator>) -> Rc { - Rc::new(Grammar::Concatenation(gs.into_iter().collect())) +pub fn concatenation(gs: impl IntoIterator) -> Grammar { + Rc::new(GrammarInner::Concatenation(gs.into_iter().collect())).into() } #[no_coverage] -/// Repeats the provided grammar some number of times in the given range. -pub fn repetition(gs: Rc, range: R) -> Rc +/// Repeats the provided [`Grammar`] some number of times in the given range. +pub fn repetition(gs: Grammar, range: R) -> Grammar where R: RangeBounds, { @@ -102,7 +173,7 @@ where std::ops::Bound::Excluded(x) => *x, std::ops::Bound::Unbounded => usize::MAX, }; - Rc::new(Grammar::Repetition(gs, start..end)) + Rc::new(GrammarInner::Repetition(gs, start..end)).into() } #[no_coverage] @@ -110,17 +181,18 @@ where /// [`recursive`]. /// /// See the module documentation ([`super`]) for an example on how to use it. -pub fn recurse(g: &Weak) -> Rc { - Rc::new(Grammar::Recurse(g.clone())) +pub fn recurse(g: &Weak) -> Grammar { + Rc::new(GrammarInner::Recurse(g.clone())).into() } #[no_coverage] -/// Creates a recursive grammar. This function should be combined with +/// Creates a recursive [`Grammar`]. This function should be combined with /// [`recurse`] to make recursive calls. /// /// See the module documentation ([`super`]) for an example on how to use it. -pub fn recursive(data_fn: impl Fn(&Weak) -> Rc) -> Rc { - Rc::new(Grammar::Recursive(Rc::new_cyclic(|g| { - Rc::try_unwrap(data_fn(g)).unwrap() - }))) +pub fn recursive(data_fn: impl Fn(&Weak) -> Grammar) -> Grammar { + Rc::new(GrammarInner::Recursive( + Rc::new_cyclic(|g| Rc::try_unwrap(data_fn(g).0).unwrap()).into(), + )) + .into() } diff --git a/fuzzcheck/src/mutators/grammar/mod.rs b/fuzzcheck/src/mutators/grammar/mod.rs index 1b718169..45128d09 100644 --- a/fuzzcheck/src/mutators/grammar/mod.rs +++ b/fuzzcheck/src/mutators/grammar/mod.rs @@ -76,10 +76,10 @@ pub use ast::AST; #[doc(cfg(feature = "regex_grammar"))] pub use grammar::regex; #[doc(inline)] -pub use grammar::Grammar; -#[doc(inline)] pub use grammar::{alternation, concatenation, literal, literal_range, literal_ranges, recurse, recursive, repetition}; #[doc(inline)] +pub use grammar::{Grammar, GrammarInner}; +#[doc(inline)] pub use mutators::grammar_based_ast_mutator; #[doc(inline)] pub use mutators::ASTMutator; diff --git a/fuzzcheck/src/mutators/grammar/mutators.rs b/fuzzcheck/src/mutators/grammar/mutators.rs index e393a3fb..70dcbadf 100644 --- a/fuzzcheck/src/mutators/grammar/mutators.rs +++ b/fuzzcheck/src/mutators/grammar/mutators.rs @@ -6,7 +6,7 @@ use std::rc::{Rc, Weak}; use fuzzcheck_mutators_derive::make_single_variant_mutator; -use super::grammar::Grammar; +use super::grammar::{Grammar, GrammarInner}; use crate::mutators::alternation::AlternationMutator; use crate::mutators::character_classes::CharacterMutator; use crate::mutators::either::Either3; @@ -233,7 +233,7 @@ impl Mutator for ASTMutator { } #[no_coverage] -pub fn grammar_based_ast_mutator(grammar: Rc) -> ASTMutator { +pub fn grammar_based_ast_mutator(grammar: Grammar) -> ASTMutator { ASTMutator::from_grammar(grammar) } @@ -282,19 +282,19 @@ impl ASTMutator { } #[no_coverage] - pub(crate) fn from_grammar(grammar: Rc) -> Self { + pub(crate) fn from_grammar(grammar: Grammar) -> Self { let mut others = HashMap::new(); Self::from_grammar_rec(grammar, &mut others) } #[no_coverage] pub(crate) fn from_grammar_rec( - grammar: Rc, - others: &mut HashMap<*const Grammar, Weak>, + grammar: Grammar, + others: &mut HashMap<*const GrammarInner, Weak>, ) -> Self { match grammar.as_ref() { - Grammar::Literal(l) => Self::token(CharacterMutator::new(l.clone())), - Grammar::Alternation(gs) => Self::alternation(AlternationMutator::new( + GrammarInner::Literal(l) => Self::token(CharacterMutator::new(l.clone())), + GrammarInner::Alternation(gs) => Self::alternation(AlternationMutator::new( gs.iter() .map( #[no_coverage] @@ -303,7 +303,7 @@ impl ASTMutator { .collect(), 0.0, )), - Grammar::Concatenation(gs) => { + GrammarInner::Concatenation(gs) => { let mut ms = Vec::::new(); for g in gs { let m = Self::from_grammar_rec(g.clone(), others); @@ -311,21 +311,21 @@ impl ASTMutator { } Self::concatenation(FixedLenVecMutator::new_without_inherent_complexity(ms)) } - Grammar::Repetition(g, range) => Self::repetition(VecMutator::new_without_inherent_complexity( + GrammarInner::Repetition(g, range) => Self::repetition(VecMutator::new_without_inherent_complexity( Self::from_grammar_rec(g.clone(), others), range.start..=range.end - 1, )), - Grammar::Recurse(g) => { + GrammarInner::Recurse(g) => { if let Some(m) = others.get(&g.as_ptr()) { Self::recur(RecurToMutator::from(m)) } else { panic!() } } - Grammar::Recursive(g) => Self::recursive( + GrammarInner::Recursive(g) => Self::recursive( #[no_coverage] |m| { - let weak_g = Rc::downgrade(g); + let weak_g = Rc::downgrade(&g.0); others.insert(weak_g.as_ptr(), m.clone()); Self::from_grammar_rec(g.clone(), others) }, diff --git a/fuzzcheck/src/mutators/grammar/regex.rs b/fuzzcheck/src/mutators/grammar/regex.rs index 36357976..de73a540 100644 --- a/fuzzcheck/src/mutators/grammar/regex.rs +++ b/fuzzcheck/src/mutators/grammar/regex.rs @@ -1,17 +1,16 @@ -use std::rc::Rc; - use regex_syntax::hir::{Class, HirKind, Literal, RepetitionKind, RepetitionRange}; -use crate::mutators::grammar::{alternation, concatenation, literal, literal_ranges, repetition, Grammar}; +use super::grammar::Grammar; +use crate::mutators::grammar::{alternation, concatenation, literal, literal_ranges, repetition}; #[no_coverage] -pub(crate) fn grammar_from_regex(regex: &str) -> Rc { +pub(crate) fn grammar_from_regex(regex: &str) -> Grammar { let mut parser = regex_syntax::Parser::new(); let hir = parser.parse(regex).unwrap(); grammar_from_regex_hir_kind(hir.kind()) } #[no_coverage] -pub fn grammar_from_regex_hir_kind(hir: &HirKind) -> Rc { +pub fn grammar_from_regex_hir_kind(hir: &HirKind) -> Grammar { match hir { HirKind::Empty => panic!("empty regexes are not supported"), HirKind::Literal(l) => match l { diff --git a/fuzzcheck/tests/grammar_based_mutators.rs b/fuzzcheck/tests/grammar_based_mutators.rs index 6c039168..32e947f2 100644 --- a/fuzzcheck/tests/grammar_based_mutators.rs +++ b/fuzzcheck/tests/grammar_based_mutators.rs @@ -2,84 +2,75 @@ #![allow(unused_attributes)] #![feature(no_coverage)] -use std::rc::{Rc, Weak}; +use std::rc::Weak; use fuzzcheck::mutators::grammar::*; use fuzzcheck::mutators::testing_utilities::test_mutator; -// use fuzzcheck::{DefaultMutator, Mutator}; #[no_coverage] -fn text() -> Rc { +fn text() -> Grammar { regex("([\u{0}-\u{7f}]|.)+|CDATA") } + #[no_coverage] -fn whitespace() -> Rc { +fn whitespace() -> Grammar { regex("[ \t\n\r]+") } + #[no_coverage] -fn header(md: &Weak) -> Rc { - concatenation([regex("#+"), recurse(md), regex("#*")]) +fn header(md: &Weak) -> Grammar { + regex("#+") + recurse(md) + regex("#*") } + #[no_coverage] -pub fn quote() -> Rc { +pub fn quote() -> Grammar { regex(">+") } + #[no_coverage] -pub fn list() -> Rc { +pub fn list() -> Grammar { regex("[-*+]|[0-9]*[.)]") } + #[no_coverage] -pub fn emphasis(md: &Weak) -> Rc { - concatenation([regex("[*_~`]+"), recurse(md), regex("[*_~`]+")]) +pub fn emphasis(md: &Weak) -> Grammar { + regex("[*_~`]+") + recurse(md) + regex("[*_~`]+") } + #[no_coverage] -pub fn autolink(md: &Weak) -> Rc { +pub fn autolink(md: &Weak) -> Grammar { concatenation([literal('<'), alternation([recurse(md), text(), web()]), literal('>')]) } + #[no_coverage] -pub fn reference(md: &Weak) -> Rc { +pub fn reference(md: &Weak) -> Grammar { concatenation([ regex("!?\\["), recurse(md), literal(']'), - repetition(concatenation([literal('('), recurse(md), literal(')')]), 0..=1), + repetition(literal('(') + recurse(md) + literal(')'), 0..=1), ]) } #[no_coverage] -pub fn reference_definition(md: &Weak) -> Rc { - concatenation([ - literal('['), - recurse(md), - literal(']'), - repetition(whitespace(), 0..=1), - literal(':'), - ]) +pub fn reference_definition(md: &Weak) -> Grammar { + literal('[') + recurse(md) + literal(']') + repetition(whitespace(), 0..=1) + literal(':') } #[no_coverage] -pub fn thematic_break_or_setext_or_fence() -> Rc { - alternation([ - regex("[* \t]{3,}"), - regex("[- \t]{3,}"), - regex("[= \t]{3,}"), - regex("[~ \t]{3,}"), - regex("[` \t]{3,}"), - ]) +pub fn thematic_break_or_setext_or_fence() -> Grammar { + regex("[* \t]{3,}") | regex("[- \t]{3,}") | regex("[= \t]{3,}") | regex("[~ \t]{3,}") | regex("[` \t]{3,}") } #[no_coverage] -pub fn backslash() -> Rc { +pub fn backslash() -> Grammar { literal('\\') } + #[no_coverage] -pub fn entity() -> Rc { - concatenation([ - literal('&'), - repetition(literal('#'), 0..=1), - repetition(text(), 0..=1), - repetition(literal(';'), 0..=1), - ]) +pub fn entity() -> Grammar { + literal('&') + repetition(literal('#'), 0..=1) + repetition(text(), 0..=1) + repetition(literal(';'), 0..=1) } + #[no_coverage] -pub fn task(whole: &Weak) -> Rc { +pub fn task(whole: &Weak) -> Grammar { concatenation([ regex("-|\\+"), alternation([whitespace(), text()]), @@ -88,16 +79,17 @@ pub fn task(whole: &Weak) -> Rc { literal(']'), ]) } + #[no_coverage] -pub fn indented_block(whole: &Weak) -> Rc { - concatenation([regex("[ \t]+"), recurse(whole)]) +pub fn indented_block(whole: &Weak) -> Grammar { + regex("[ \t]+") + recurse(whole) } + #[no_coverage] -pub fn html() -> Rc { - concatenation([ - regex(" Grammar { + regex(" Rc { literal('"'), ]), 0.., - ), - literal('>'), - ]) + ) + + literal('>') } + #[no_coverage] -pub fn html_comment(whole: &Weak) -> Rc { - concatenation([regex("<-+"), recurse(whole), regex("-+>")]) +pub fn html_comment(whole: &Weak) -> Grammar { + regex("<-+") + recurse(whole) + regex("-+>") } + #[no_coverage] -fn quoted(whole: &Weak) -> Rc { - concatenation([regex("[\"']"), alternation([text(), recurse(whole)]), regex("[\"']")]) +fn quoted(whole: &Weak) -> Grammar { + regex("[\"']") + (text() | recurse(whole)) + regex("[\"']") } + #[no_coverage] -fn fenced_block(whole: &Weak) -> Rc { +fn fenced_block(whole: &Weak) -> Grammar { concatenation([regex("~{3,}|`{3,}"), recurse(whole), regex("~{3,}|`{3,}")]) } + #[no_coverage] -fn table(whole: &Weak) -> Rc { +fn table(whole: &Weak) -> Grammar { repetition( // row concatenation([ @@ -142,12 +137,14 @@ fn table(whole: &Weak) -> Rc { 1..10, ) } + #[no_coverage] -fn web() -> Rc { +fn web() -> Grammar { concatenation([regex("(https?://)?(www.)?"), text(), literal('.'), text()]) } + #[no_coverage] -fn markdown() -> Rc { +fn markdown() -> Grammar { recursive(|md| { repetition( alternation([