diff --git a/src/lib.rs b/src/lib.rs index 1b4a90c..eedd5de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +mod parse; + #[cfg(test)] mod tests { #[test] diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..4872fc0 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,123 @@ +//! +//! +//! Inspired by [Matt Mights article](https://matt.might.net/articles/parsing-regex-with-recursive-descent/) +//! +//! Parses the regular expression using the following grammar +//! ```text +//! # e.g. abc|c(de)* +//! ::= '|' +//! | term +//! +//! ::= { } +//! +//! ::= { '*' } +//! +//! ::= +//! | '\' +//! | '(' ')' +//! ``` + +use std::iter::Peekable; +use std::str::Chars; + +#[derive(Debug)] +pub enum Regex { + Choice(Box, Box), + Term(Vec), + Repetition(Box), + Primitive(char), + Char(char), +} + +#[derive(Debug)] +struct Parser<'a> { + chars: Peekable>, +} + +type RegexResult = Result; + +impl<'a> Parser<'a> { + pub fn parse(regex: &'a str) -> Result { + let chars = regex.chars(); + let mut parser = Self { + chars: chars.peekable(), + }; + parser.regex() + } + + fn next(&mut self) -> Option { + self.chars.next() + } + + fn expect(&mut self, c: char) { + if self.peek() == Some(c) { + let _ = self.next(); + } else { + panic!("handle this better") + } + } + + fn peek(&mut self) -> Option { + self.chars.peek().cloned() + } + + // regex term types + + fn regex(&mut self) -> RegexResult { + let term = self.term()?; + if let Some('|') = self.peek() { + let rhs = self.regex()?; + Ok(Regex::Choice(Box::new(term), Box::new(rhs))) + } else { + Ok(term) + } + } + + fn term(&mut self) -> RegexResult { + let mut factors = Vec::new(); + + loop { + match self.peek() { + None | Some('(') | Some('|') => break, + _ => {} + } + let next_factor = self.factor()?; + factors.push(next_factor); + } + Ok(Regex::Term(factors)) + } + + fn factor(&mut self) -> RegexResult { + let mut base = self.base()?; + + while let Some('*') = self.peek() { + let _ = self.next(); + base = Regex::Repetition(Box::new(base)); + } + + Ok(base) + } + fn base(&mut self) -> RegexResult { + match self.peek() { + Some('(') => { + let _ = self.next(); + let regex = self.regex()?; + self.expect(')'); + Ok(regex) + } + Some('\\') => { + let _ = self.next(); + let esc = self.next().ok_or_else(|| ())?; + Ok(Regex::Primitive(esc)) + } + Some(char) => { + let _ = self.next(); + Ok(Regex::Char(char)) + } + None => Err(()), + } + } +} + +#[cfg(test)] +mod test {}