From 7baa33efbf9b47abc6d603ebeea225811aec6767 Mon Sep 17 00:00:00 2001
From: Teymour Aldridge <teymour@reasoning.page>
Date: Fri, 26 Aug 2022 20:42:09 +0100
Subject: [PATCH] Add operator overloading for building grammars.

---
 fuzzcheck/src/mutators/grammar/grammar.rs  | 138 ++++++++++++++++-----
 fuzzcheck/src/mutators/grammar/mod.rs      |   4 +-
 fuzzcheck/src/mutators/grammar/mutators.rs |  24 ++--
 fuzzcheck/src/mutators/grammar/regex.rs    |   9 +-
 fuzzcheck/tests/grammar_based_mutators.rs  | 109 ++++++++--------
 5 files changed, 176 insertions(+), 108 deletions(-)
diff --git a/fuzzcheck/src/mutators/grammar/grammar.rs b/fuzzcheck/src/mutators/grammar/grammar.rs
index 9a737c32..d51f170d 100644
--- a/fuzzcheck/src/mutators/grammar/grammar.rs
+++ b/fuzzcheck/src/mutators/grammar/grammar.rs
@@ -1,4 +1,4 @@
-use std::ops::{Range, RangeBounds, RangeInclusive};
+use std::ops::{Add, BitOr, Range, RangeBounds, RangeInclusive};
 use std::rc::{Rc, Weak};
 
 #[cfg(feature = "regex_grammar")]
@@ -8,46 +8,90 @@ use crate::mutators::grammar::regex::grammar_from_regex;
 /// A grammar which can be used for fuzzing.
 ///
 /// See [the module documentation](crate::mutators::grammar) for advice on how to create a grammar.
-pub enum Grammar {
+pub enum GrammarInner {
     Literal(Vec<RangeInclusive<char>>),
-    Alternation(Vec<Rc<Grammar>>),
-    Concatenation(Vec<Rc<Grammar>>),
-    Repetition(Rc<Grammar>, Range<usize>),
-    Recurse(Weak<Grammar>),
-    Recursive(Rc<Grammar>),
+    Alternation(Vec<Grammar>),
+    Concatenation(Vec<Grammar>),
+    Repetition(Grammar, Range<usize>),
+    Recurse(Weak<GrammarInner>),
+    Recursive(Grammar),
+}
+
+#[derive(Debug, Clone)]
+/// A [`Grammar`] can be transformed into an [`ASTMutator`] (which generates
+/// [`String`]s corresponding to the grammar in question) or combined with other
+/// grammars to produce a more complicated grammar.
+///
+/// For examples on how to use this struct, see the crate documentation
+/// ([`super`]).
+///
+/// [`ASTMutator`]: crate::mutators::grammar::ASTMutator
+pub struct Grammar(pub(crate) Rc<GrammarInner>);
+
+impl From<Rc<GrammarInner>> for Grammar {
+    fn from(inner: Rc<GrammarInner>) -> Self {
+        Grammar(inner)
+    }
+}
+
+impl AsRef<GrammarInner> for Grammar {
+    fn as_ref(&self) -> &GrammarInner {
+        &self.0
+    }
+}
+
+impl Add for Grammar {
+    type Output = Grammar;
+
+    /// Calls [`concatenation`] on the two provided grammars.
+    fn add(self, rhs: Self) -> Self::Output {
+        concatenation([self, rhs])
+    }
+}
+
+impl BitOr for Grammar {
+    type Output = Grammar;
+
+    /// Calls [`alternation`] on the two provided grammars.
+    fn bitor(self, rhs: Self) -> Self::Output {
+        alternation([self, rhs])
+    }
 }
 
 #[cfg(feature = "regex_grammar")]
 #[doc(cfg(feature = "regex_grammar"))]
 #[no_coverage]
-pub fn regex(s: &str) -> Rc<Grammar> {
+pub fn regex(s: &str) -> Grammar {
     grammar_from_regex(s)
 }
 
 #[no_coverage]
-/// Creates an [`Rc<Grammar>`] which outputs characters in the given range.
+/// Creates a [`Grammar`] which outputs characters in the given range.
 ///
 /// For example, to generate characters in the range 'a' to 'z' (inclusive), one
 /// could use this code
 ///
 /// ```
-/// let a_to_z = literal_ranges('a'..='z');
+/// # use fuzzcheck::mutators::grammar::literal_ranges;
+/// let a_to_z = literal_ranges(vec!['a'..='b', 'q'..='s', 't'..='w']);
 /// ```
-pub fn literal_ranges(ranges: Vec<RangeInclusive<char>>) -> Rc<Grammar> {
-    Rc::new(Grammar::Literal(ranges))
+pub fn literal_ranges(ranges: Vec<RangeInclusive<char>>) -> Grammar {
+    Rc::new(GrammarInner::Literal(ranges)).into()
 }
 
 #[no_coverage]
-/// Creates an [`Rc<Grammar>`] which matches a single character literal.
+/// Creates a [`Grammar`] which matches a single character literal.
 ///
 /// ```
+/// # use fuzzcheck::mutators::grammar::literal;
 /// let l = literal('l');
 /// ```
-pub fn literal(l: char) -> Rc<Grammar> {
-    Rc::new(Grammar::Literal(vec![l..=l]))
+pub fn literal(l: char) -> Grammar {
+    Rc::new(GrammarInner::Literal(vec![l..=l])).into()
 }
+
 #[no_coverage]
-pub fn literal_range<R>(range: R) -> Rc<Grammar>
+pub fn literal_range<R>(range: R) -> Grammar
 where
     R: RangeBounds<char>,
 {
@@ -61,13 +105,31 @@ where
         std::ops::Bound::Excluded(x) => unsafe { char::from_u32_unchecked(*x as u32 - 1) },
         std::ops::Bound::Unbounded => panic!("The range must have an upper bound"),
     };
-    Rc::new(Grammar::Literal(vec![start..=end]))
+    Rc::new(GrammarInner::Literal(vec![start..=end])).into()
 }
 
 /// Produces a grammar which will choose between the provided grammars.
+///
+/// For example, this grammar
+/// ```
+/// # use fuzzcheck::mutators::grammar::{Grammar, alternation, regex};
+/// let fuzz_or_check: Grammar = alternation([
+///     regex("fuzz"),
+///     regex("check")
+/// ]);
+/// ```
+/// would output either "fuzz" or "check".
+///
+/// It is also possible to use the `|` operator to write alternation grammars.
+/// For example, the [`Grammar`] above could be equivalently written as
+///
+/// ```
+/// # use fuzzcheck::mutators::grammar::{Grammar, regex};
+/// let fuzz_or_check: Grammar = regex("fuzz") | regex("check");
+/// ```
 #[no_coverage]
-pub fn alternation(gs: impl IntoIterator<Item = Rc<Grammar>>) -> Rc<Grammar> {
-    Rc::new(Grammar::Alternation(gs.into_iter().collect()))
+pub fn alternation(gs: impl IntoIterator<Item = Grammar>) -> Grammar {
+    Rc::new(GrammarInner::Alternation(gs.into_iter().collect())).into()
 }
 
 /// Produces a grammar which will concatenate the output of all the provided
@@ -75,20 +137,29 @@ pub fn alternation(gs: impl IntoIterator<Item = Rc<Grammar>>) -> Rc<Grammar> {
 ///
 /// For example, the grammar
 /// ```
-/// concatenation([
+/// # use fuzzcheck::mutators::grammar::{concatenation, Grammar, regex};
+/// let fuzzcheck: Grammar = concatenation([
 ///     regex("fuzz"),
 ///     regex("check")
-/// ])
+/// ]);
 /// ```
 /// would output "fuzzcheck".
+///
+/// It is also possible to use the `+` operator to concatenate separate grammars
+/// together. For example, the grammar above could be equivalently written as
+///
+/// ```
+/// # use fuzzcheck::mutators::grammar::{Grammar, regex};
+/// let fuzzcheck: Grammar = regex("fuzz") + regex("check");
+/// ```
 #[no_coverage]
-pub fn concatenation(gs: impl IntoIterator<Item = Rc<Grammar>>) -> Rc<Grammar> {
-    Rc::new(Grammar::Concatenation(gs.into_iter().collect()))
+pub fn concatenation(gs: impl IntoIterator<Item = Grammar>) -> Grammar {
+    Rc::new(GrammarInner::Concatenation(gs.into_iter().collect())).into()
 }
 
 #[no_coverage]
-/// Repeats the provided grammar some number of times in the given range.
-pub fn repetition<R>(gs: Rc<Grammar>, range: R) -> Rc<Grammar>
+/// Repeats the provided [`Grammar`] some number of times in the given range.
+pub fn repetition<R>(gs: Grammar, range: R) -> Grammar
 where
     R: RangeBounds<usize>,
 {
@@ -102,7 +173,7 @@ where
         std::ops::Bound::Excluded(x) => *x,
         std::ops::Bound::Unbounded => usize::MAX,
     };
-    Rc::new(Grammar::Repetition(gs, start..end))
+    Rc::new(GrammarInner::Repetition(gs, start..end)).into()
 }
 
 #[no_coverage]
@@ -110,17 +181,18 @@ where
 /// [`recursive`].
 ///
 /// See the module documentation ([`super`]) for an example on how to use it.
-pub fn recurse(g: &Weak<Grammar>) -> Rc<Grammar> {
-    Rc::new(Grammar::Recurse(g.clone()))
+pub fn recurse(g: &Weak<GrammarInner>) -> Grammar {
+    Rc::new(GrammarInner::Recurse(g.clone())).into()
 }
 
 #[no_coverage]
-/// Creates a recursive grammar. This function should be combined with
+/// Creates a recursive [`Grammar`]. This function should be combined with
 /// [`recurse`] to make recursive calls.
 ///
 /// See the module documentation ([`super`]) for an example on how to use it.
-pub fn recursive(data_fn: impl Fn(&Weak<Grammar>) -> Rc<Grammar>) -> Rc<Grammar> {
-    Rc::new(Grammar::Recursive(Rc::new_cyclic(|g| {
-        Rc::try_unwrap(data_fn(g)).unwrap()
-    })))
+pub fn recursive(data_fn: impl Fn(&Weak<GrammarInner>) -> Grammar) -> Grammar {
+    Rc::new(GrammarInner::Recursive(
+        Rc::new_cyclic(|g| Rc::try_unwrap(data_fn(g).0).unwrap()).into(),
+    ))
+    .into()
 }
diff --git a/fuzzcheck/src/mutators/grammar/mod.rs b/fuzzcheck/src/mutators/grammar/mod.rs
index 1b718169..45128d09 100644
--- a/fuzzcheck/src/mutators/grammar/mod.rs
+++ b/fuzzcheck/src/mutators/grammar/mod.rs
@@ -76,10 +76,10 @@ pub use ast::AST;
 #[doc(cfg(feature = "regex_grammar"))]
 pub use grammar::regex;
 #[doc(inline)]
-pub use grammar::Grammar;
-#[doc(inline)]
 pub use grammar::{alternation, concatenation, literal, literal_range, literal_ranges, recurse, recursive, repetition};
 #[doc(inline)]
+pub use grammar::{Grammar, GrammarInner};
+#[doc(inline)]
 pub use mutators::grammar_based_ast_mutator;
 #[doc(inline)]
 pub use mutators::ASTMutator;
diff --git a/fuzzcheck/src/mutators/grammar/mutators.rs b/fuzzcheck/src/mutators/grammar/mutators.rs
index e393a3fb..70dcbadf 100644
--- a/fuzzcheck/src/mutators/grammar/mutators.rs
+++ b/fuzzcheck/src/mutators/grammar/mutators.rs
@@ -6,7 +6,7 @@ use std::rc::{Rc, Weak};
 
 use fuzzcheck_mutators_derive::make_single_variant_mutator;
 
-use super::grammar::Grammar;
+use super::grammar::{Grammar, GrammarInner};
 use crate::mutators::alternation::AlternationMutator;
 use crate::mutators::character_classes::CharacterMutator;
 use crate::mutators::either::Either3;
@@ -233,7 +233,7 @@ impl Mutator<AST> for ASTMutator {
 }
 
 #[no_coverage]
-pub fn grammar_based_ast_mutator(grammar: Rc<Grammar>) -> ASTMutator {
+pub fn grammar_based_ast_mutator(grammar: Grammar) -> ASTMutator {
     ASTMutator::from_grammar(grammar)
 }
 
@@ -282,19 +282,19 @@ impl ASTMutator {
     }
 
     #[no_coverage]
-    pub(crate) fn from_grammar(grammar: Rc<Grammar>) -> Self {
+    pub(crate) fn from_grammar(grammar: Grammar) -> Self {
         let mut others = HashMap::new();
         Self::from_grammar_rec(grammar, &mut others)
     }
 
     #[no_coverage]
     pub(crate) fn from_grammar_rec(
-        grammar: Rc<Grammar>,
-        others: &mut HashMap<*const Grammar, Weak<ASTMutator>>,
+        grammar: Grammar,
+        others: &mut HashMap<*const GrammarInner, Weak<ASTMutator>>,
     ) -> Self {
         match grammar.as_ref() {
-            Grammar::Literal(l) => Self::token(CharacterMutator::new(l.clone())),
-            Grammar::Alternation(gs) => Self::alternation(AlternationMutator::new(
+            GrammarInner::Literal(l) => Self::token(CharacterMutator::new(l.clone())),
+            GrammarInner::Alternation(gs) => Self::alternation(AlternationMutator::new(
                 gs.iter()
                     .map(
                         #[no_coverage]
@@ -303,7 +303,7 @@ impl ASTMutator {
                     .collect(),
                 0.0,
             )),
-            Grammar::Concatenation(gs) => {
+            GrammarInner::Concatenation(gs) => {
                 let mut ms = Vec::<ASTMutator>::new();
                 for g in gs {
                     let m = Self::from_grammar_rec(g.clone(), others);
@@ -311,21 +311,21 @@ impl ASTMutator {
                 }
                 Self::concatenation(FixedLenVecMutator::new_without_inherent_complexity(ms))
             }
-            Grammar::Repetition(g, range) => Self::repetition(VecMutator::new_without_inherent_complexity(
+            GrammarInner::Repetition(g, range) => Self::repetition(VecMutator::new_without_inherent_complexity(
                 Self::from_grammar_rec(g.clone(), others),
                 range.start..=range.end - 1,
             )),
-            Grammar::Recurse(g) => {
+            GrammarInner::Recurse(g) => {
                 if let Some(m) = others.get(&g.as_ptr()) {
                     Self::recur(RecurToMutator::from(m))
                 } else {
                     panic!()
                 }
             }
-            Grammar::Recursive(g) => Self::recursive(
+            GrammarInner::Recursive(g) => Self::recursive(
                 #[no_coverage]
                 |m| {
-                    let weak_g = Rc::downgrade(g);
+                    let weak_g = Rc::downgrade(&g.0);
                     others.insert(weak_g.as_ptr(), m.clone());
                     Self::from_grammar_rec(g.clone(), others)
                 },
diff --git a/fuzzcheck/src/mutators/grammar/regex.rs b/fuzzcheck/src/mutators/grammar/regex.rs
index 36357976..de73a540 100644
--- a/fuzzcheck/src/mutators/grammar/regex.rs
+++ b/fuzzcheck/src/mutators/grammar/regex.rs
@@ -1,17 +1,16 @@
-use std::rc::Rc;
-
 use regex_syntax::hir::{Class, HirKind, Literal, RepetitionKind, RepetitionRange};
 
-use crate::mutators::grammar::{alternation, concatenation, literal, literal_ranges, repetition, Grammar};
+use super::grammar::Grammar;
+use crate::mutators::grammar::{alternation, concatenation, literal, literal_ranges, repetition};
 
 #[no_coverage]
-pub(crate) fn grammar_from_regex(regex: &str) -> Rc<Grammar> {
+pub(crate) fn grammar_from_regex(regex: &str) -> Grammar {
     let mut parser = regex_syntax::Parser::new();
     let hir = parser.parse(regex).unwrap();
     grammar_from_regex_hir_kind(hir.kind())
 }
 #[no_coverage]
-pub fn grammar_from_regex_hir_kind(hir: &HirKind) -> Rc<Grammar> {
+pub fn grammar_from_regex_hir_kind(hir: &HirKind) -> Grammar {
     match hir {
         HirKind::Empty => panic!("empty regexes are not supported"),
         HirKind::Literal(l) => match l {
diff --git a/fuzzcheck/tests/grammar_based_mutators.rs b/fuzzcheck/tests/grammar_based_mutators.rs
index 6c039168..32e947f2 100644
--- a/fuzzcheck/tests/grammar_based_mutators.rs
+++ b/fuzzcheck/tests/grammar_based_mutators.rs
@@ -2,84 +2,75 @@
 #![allow(unused_attributes)]
 #![feature(no_coverage)]
 
-use std::rc::{Rc, Weak};
+use std::rc::Weak;
 
 use fuzzcheck::mutators::grammar::*;
 use fuzzcheck::mutators::testing_utilities::test_mutator;
-// use fuzzcheck::{DefaultMutator, Mutator};
 
 #[no_coverage]
-fn text() -> Rc<Grammar> {
+fn text() -> Grammar {
     regex("([\u{0}-\u{7f}]|.)+|CDATA")
 }
+
 #[no_coverage]
-fn whitespace() -> Rc<Grammar> {
+fn whitespace() -> Grammar {
     regex("[ \t\n\r]+")
 }
+
 #[no_coverage]
-fn header(md: &Weak<Grammar>) -> Rc<Grammar> {
-    concatenation([regex("#+"), recurse(md), regex("#*")])
+fn header(md: &Weak<GrammarInner>) -> Grammar {
+    regex("#+") + recurse(md) + regex("#*")
 }
+
 #[no_coverage]
-pub fn quote() -> Rc<Grammar> {
+pub fn quote() -> Grammar {
     regex(">+")
 }
+
 #[no_coverage]
-pub fn list() -> Rc<Grammar> {
+pub fn list() -> Grammar {
     regex("[-*+]|[0-9]*[.)]")
 }
+
 #[no_coverage]
-pub fn emphasis(md: &Weak<Grammar>) -> Rc<Grammar> {
-    concatenation([regex("[*_~`]+"), recurse(md), regex("[*_~`]+")])
+pub fn emphasis(md: &Weak<GrammarInner>) -> Grammar {
+    regex("[*_~`]+") + recurse(md) + regex("[*_~`]+")
 }
+
 #[no_coverage]
-pub fn autolink(md: &Weak<Grammar>) -> Rc<Grammar> {
+pub fn autolink(md: &Weak<GrammarInner>) -> Grammar {
     concatenation([literal('<'), alternation([recurse(md), text(), web()]), literal('>')])
 }
+
 #[no_coverage]
-pub fn reference(md: &Weak<Grammar>) -> Rc<Grammar> {
+pub fn reference(md: &Weak<GrammarInner>) -> Grammar {
     concatenation([
         regex("!?\\["),
         recurse(md),
         literal(']'),
-        repetition(concatenation([literal('('), recurse(md), literal(')')]), 0..=1),
+        repetition(literal('(') + recurse(md) + literal(')'), 0..=1),
     ])
 }
 #[no_coverage]
-pub fn reference_definition(md: &Weak<Grammar>) -> Rc<Grammar> {
-    concatenation([
-        literal('['),
-        recurse(md),
-        literal(']'),
-        repetition(whitespace(), 0..=1),
-        literal(':'),
-    ])
+pub fn reference_definition(md: &Weak<GrammarInner>) -> Grammar {
+    literal('[') + recurse(md) + literal(']') + repetition(whitespace(), 0..=1) + literal(':')
 }
 #[no_coverage]
-pub fn thematic_break_or_setext_or_fence() -> Rc<Grammar> {
-    alternation([
-        regex("[* \t]{3,}"),
-        regex("[- \t]{3,}"),
-        regex("[= \t]{3,}"),
-        regex("[~ \t]{3,}"),
-        regex("[` \t]{3,}"),
-    ])
+pub fn thematic_break_or_setext_or_fence() -> Grammar {
+    regex("[* \t]{3,}") | regex("[- \t]{3,}") | regex("[= \t]{3,}") | regex("[~ \t]{3,}") | regex("[` \t]{3,}")
 }
 #[no_coverage]
-pub fn backslash() -> Rc<Grammar> {
+pub fn backslash() -> Grammar {
     literal('\\')
 }
+
 #[no_coverage]
-pub fn entity() -> Rc<Grammar> {
-    concatenation([
-        literal('&'),
-        repetition(literal('#'), 0..=1),
-        repetition(text(), 0..=1),
-        repetition(literal(';'), 0..=1),
-    ])
+pub fn entity() -> Grammar {
+    literal('&') + repetition(literal('#'), 0..=1) + repetition(text(), 0..=1) + repetition(literal(';'), 0..=1)
 }
+
 #[no_coverage]
-pub fn task(whole: &Weak<Grammar>) -> Rc<Grammar> {
+pub fn task(whole: &Weak<GrammarInner>) -> Grammar {
     concatenation([
         regex("-|\\+"),
         alternation([whitespace(), text()]),
@@ -88,16 +79,17 @@ pub fn task(whole: &Weak<Grammar>) -> Rc<Grammar> {
         literal(']'),
     ])
 }
+
 #[no_coverage]
-pub fn indented_block(whole: &Weak<Grammar>) -> Rc<Grammar> {
-    concatenation([regex("[ \t]+"), recurse(whole)])
+pub fn indented_block(whole: &Weak<GrammarInner>) -> Grammar {
+    regex("[ \t]+") + recurse(whole)
 }
+
 #[no_coverage]
-pub fn html() -> Rc<Grammar> {
-    concatenation([
-        regex("</?"),
-        text(),
-        repetition(
+pub fn html() -> Grammar {
+    regex("</?")
+        + text()
+        + repetition(
             concatenation([
                 regex("[ \t]?"),
                 text(),
@@ -107,24 +99,27 @@ pub fn html() -> Rc<Grammar> {
                 literal('"'),
             ]),
             0..,
-        ),
-        literal('>'),
-    ])
+        )
+        + literal('>')
 }
+
 #[no_coverage]
-pub fn html_comment(whole: &Weak<Grammar>) -> Rc<Grammar> {
-    concatenation([regex("<-+"), recurse(whole), regex("-+>")])
+pub fn html_comment(whole: &Weak<GrammarInner>) -> Grammar {
+    regex("<-+") + recurse(whole) + regex("-+>")
 }
+
 #[no_coverage]
-fn quoted(whole: &Weak<Grammar>) -> Rc<Grammar> {
-    concatenation([regex("[\"']"), alternation([text(), recurse(whole)]), regex("[\"']")])
+fn quoted(whole: &Weak<GrammarInner>) -> Grammar {
+    regex("[\"']") + (text() | recurse(whole)) + regex("[\"']")
 }
+
 #[no_coverage]
-fn fenced_block(whole: &Weak<Grammar>) -> Rc<Grammar> {
+fn fenced_block(whole: &Weak<GrammarInner>) -> Grammar {
     concatenation([regex("~{3,}|`{3,}"), recurse(whole), regex("~{3,}|`{3,}")])
 }
+
 #[no_coverage]
-fn table(whole: &Weak<Grammar>) -> Rc<Grammar> {
+fn table(whole: &Weak<GrammarInner>) -> Grammar {
     repetition(
         // row
         concatenation([
@@ -142,12 +137,14 @@ fn table(whole: &Weak<Grammar>) -> Rc<Grammar> {
         1..10,
     )
 }
+
 #[no_coverage]
-fn web() -> Rc<Grammar> {
+fn web() -> Grammar {
     concatenation([regex("(https?://)?(www.)?"), text(), literal('.'), text()])
 }
+
 #[no_coverage]
-fn markdown() -> Rc<Grammar> {
+fn markdown() -> Grammar {
     recursive(|md| {
         repetition(
             alternation([