diff --git a/src/lib.rs b/src/lib.rs index eedd5de..f055e6e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,5 @@ mod parse; -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - let result = 2 + 2; - assert_eq!(result, 4); - } +pub fn no_unused_code(regex: &str) { + let _ = parse::Parser::parse(regex); } diff --git a/src/parse.rs b/src/parse.rs index 4d8ffb5..c9f9cfe 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -3,7 +3,7 @@ //! Inspired by [Matt Mights article](https://matt.might.net/articles/parsing-regex-with-recursive-descent/) //! //! Parses the regular expression using the following grammar -//! ```text +//! ```txt //! # e.g. abc|c(de)* //! ::= '|' //! | term @@ -15,9 +15,16 @@ //! ::= //! | '\' //! | '(' ')' +//! | '[' { } ']' +//! +//! ::= +//! | +//! +//! ::= '-' //! ``` use std::iter::Peekable; +use std::ops::Range; use std::str::Chars; #[derive(Debug, Clone, PartialEq, Eq)] @@ -25,6 +32,8 @@ pub enum Regex { Choice(Box, Box), Sequence(Vec), Repetition(Box), + Set(Vec), + Range(Range), Primitive(Primitive), Char(char), } @@ -36,7 +45,7 @@ pub enum Primitive { } #[derive(Debug)] -struct Parser<'a> { +pub struct Parser<'a> { chars: Peekable>, } @@ -51,6 +60,7 @@ impl<'a> Parser<'a> { parser.regex() } + #[must_use] fn next(&mut self) -> Option { self.chars.next() } @@ -123,6 +133,15 @@ impl<'a> Parser<'a> { _ => return Err(()), })) } + Some('[') => { + let _ = self.next(); + let mut elems = Vec::new(); + while self.peek() != Some(']') { + elems.push(self.set_elem()?); + } + let _ = self.next(); + Ok(Regex::Set(elems)) + } Some(char) => { let _ = self.next(); Ok(Regex::Char(char)) @@ -130,16 +149,24 @@ impl<'a> Parser<'a> { None => Err(()), } } + + fn set_elem(&mut self) -> RegexResult { + let first_char = self.next().ok_or(())?; + + if let Some('-') = self.peek() { + let _ = self.next(); + let second_char = self.next().ok_or(())?; + Ok(Regex::Range(first_char..second_char)) + } else { + Ok(Regex::Char(first_char)) + } + } } #[cfg(test)] mod test { use crate::parse::{Parser, Regex, Regex::*}; - fn box_seq(elements: Vec) -> Box { - Box::new(Sequence(elements)) - } - fn char_seq(char: char) -> Regex { Sequence(vec![Char(char)]) } @@ -184,4 +211,27 @@ mod test { Sequence(vec![char_seq('a'), Sequence(vec![Char('b'), Char('c')])]) ) } + + #[test] + fn set() { + let regex = "[ab]"; + let parsed = Parser::parse(regex).unwrap(); + assert_eq!( + parsed, + Sequence(vec![Regex::Set(vec![Regex::Char('a'), Regex::Char('b')])]) + ) + } + + #[test] + fn set_range() { + let regex = "[a-zA-Z]"; + let parsed = Parser::parse(regex).unwrap(); + assert_eq!( + parsed, + Sequence(vec![Regex::Set(vec![ + Regex::Range('a'..'z'), + Regex::Range('A'..'Z') + ])]) + ) + } }