pub mod parse; use self::parse::{ iter::{Peek, PeekExt, PeekableTakeWhile}, requires, Parse, ParseError, }; use std::convert::Infallible; use std::str::FromStr; #[derive(Clone, Debug, Eq, PartialEq)] pub struct Ident { name: String, } impl Parse for Ident { type Err = Infallible; fn parse>(mut iter: I) -> Result> { // rustc can't predict type, but rust-analyzer can... let name: String = PeekableTakeWhile::new(&mut iter, |c| c.is_alphabetic()).collect(); if name.is_empty() { if let Some(c) = iter.peek() { Err(ParseError::InvalidCharacter(*c)) } else { Err(ParseError::EndOfFile) } } else { Ok(Self { name }) } } } impl FromStr for Ident { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Ident::parse(s.chars().peekable()) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Literal { contents: String, } impl Parse for Literal { type Err = LiteralError; fn parse>(mut iter: I) -> Result> { fn parse_digit>( mut iter: I, radix: u32, ) -> Result> { Ok(iter .next_if(|c| c.is_digit(radix)) .ok_or(ParseError::EndOfFile)? .map_err(|i| ParseError::InvalidCharacter(*i.peek().unwrap()))? .to_digit(radix) .unwrap()) } /// Parse full character escape. fn parse_escape>( mut iter: I, ) -> Result> { requires(&mut iter, '\\')?; match iter.peek().ok_or(ParseError::EndOfFile)? { '\'' | '\"' | '\\' => Ok(iter.next().unwrap()), 'n' => { requires(iter, 'n')?; Ok('\n') } 'r' => { requires(iter, 'r')?; Ok('\r') } 't' => { requires(iter, 't')?; Ok('\t') } '0' => { requires(iter, '0')?; Ok('\0') } 'x' => { requires(&mut iter, 'x')?; let upper = parse_digit(&mut iter, 8)?; let ord = 16 * upper + parse_digit(&mut iter, 16)?; std::char::from_u32(ord) .ok_or(ParseError::Other(LiteralError::InvalidCharacterCode(ord))) } 'u' => { requires(&mut iter, 'u')?; requires(&mut iter, '{')?; let mut ord = 0; for _ in 1..=6 { ord *= 16; ord += parse_digit(&mut iter, 16)?; iter.advance_while(|&c| c == '_'); if iter.peek() == Some(&'}') { break; } } requires(&mut iter, '}')?; std::char::from_u32(ord) .ok_or(ParseError::Other(LiteralError::InvalidCharacterCode(ord))) } &c => Err(ParseError::InvalidCharacter(c)), } } requires(&mut iter, '\'')?; let mut s = String::new(); loop { match iter.peek().ok_or(ParseError::EndOfFile)? { '\'' => { iter.next(); return if s.is_empty() { Err(ParseError::Other(LiteralError::EmptyLiteral)) } else { Ok(Literal { contents: s }) }; } &c @ '\n' | &c @ '\r' | &c @ '\t' => return Err(ParseError::InvalidCharacter(c)), '\\' => s.push(parse_escape(&mut iter)?), _ => s.push(iter.next().unwrap()), } } } } impl FromStr for Literal { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Literal::parse(s.chars().peekable()) } } #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum LiteralError { EmptyLiteral, InvalidCharacterCode(u32), } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Call { name: Ident, args: Vec, } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Fix { arg: Ident, expr: Expression, } impl Parse for Fix { type Err = FixError; fn parse>(mut iter: I) -> Result> { requires(&mut iter, '[')?; let arg = Ident::parse(&mut iter).map_err(ParseError::map)?; requires(&mut iter, ']')?; requires(&mut iter, '(')?; iter.advance_while(|c| c.is_whitespace()); let expr = Expression::parse(Box::new(&mut iter as &mut dyn Peek)) .map_err(ParseError::map)?; requires(&mut iter, ')')?; Ok(Self { arg, expr }) } } impl FromStr for Fix { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Fix::parse(s.chars().peekable()) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct FixError(Box); impl From for FixError { fn from(other: Infallible) -> Self { match other {} } } impl From for FixError { fn from(other: ExpressionError) -> Self { Self(Box::new(other)) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum Term { Epsilon, Ident(Ident), Literal(Literal), Call { name: Ident, args: Vec }, Fix(Fix), Parens(Expression), } impl Parse for Term { type Err = TermError; fn parse>(mut iter: I) -> Result> { match iter.peek().ok_or(ParseError::EndOfFile)? { '(' => { iter.next(); if iter.consume_if_next(&')') { Ok(Self::Epsilon) } else { let expr = Expression::parse(Box::new(&mut iter as &mut dyn Peek)) .map_err(ParseError::map)?; requires(iter, ')').map(|_| Self::Parens(expr)) } } '[' => Fix::parse(iter).map(Self::Fix).map_err(ParseError::map), '\'' => Literal::parse(iter) .map(Self::Literal) .map_err(ParseError::map), c if c.is_alphabetic() => { let ident = Ident::parse(&mut iter).map_err(ParseError::map)?; if iter.consume_if_next(&'(') { iter.advance_while(|c| c.is_whitespace()); let mut args = Vec::new(); loop { args.push( Expression::parse(Box::new(&mut iter as &mut dyn Peek)) .map_err(ParseError::::map) .map_err(ParseError::map)?, ); if iter.consume_if_next(&')') { return Ok(Self::Call { name: ident, args }); } requires(&mut iter, ',')?; iter.advance_while(|c| c.is_whitespace()); } } else { Ok(Self::Ident(ident)) } } &c => Err(ParseError::InvalidCharacter(c)), } } } impl FromStr for Term { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Term::parse(s.chars().peekable()) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct CallError(Box); impl From for CallError { fn from(other: ExpressionError) -> Self { Self(Box::new(other)) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum TermError { Literal(LiteralError), Call(CallError), Fix(FixError), Parens(Box), } impl From for TermError { fn from(other: Infallible) -> Self { match other {} } } impl From for TermError { fn from(other: ExpressionError) -> Self { Self::Parens(Box::new(other)) } } impl From for TermError { fn from(other: FixError) -> Self { Self::Fix(other) } } impl From for TermError { fn from(other: CallError) -> Self { Self::Call(other) } } impl From for TermError { fn from(other: LiteralError) -> Self { Self::Literal(other) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Seq { terms: Vec, } impl Parse for Seq { type Err = SeqError; fn parse>(mut iter: I) -> Result> { let mut terms = Vec::new(); terms.push(Term::parse(&mut iter).map_err(ParseError::map)?); iter.advance_while(|c| c.is_whitespace()); while iter.consume_if_next(&'.') { iter.advance_while(|c| c.is_whitespace()); terms.push(Term::parse(&mut iter).map_err(ParseError::map)?); iter.advance_while(|c| c.is_whitespace()); } Ok(Self { terms }) } } impl FromStr for Seq { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Seq::parse(s.chars().peekable()) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct SeqError(TermError); impl From for SeqError { fn from(other: TermError) -> Self { Self(other) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Alt { seqs: Vec, } impl Parse for Alt { type Err = AltError; fn parse>(mut iter: I) -> Result> { let mut seqs = Vec::new(); seqs.push(Seq::parse(&mut iter).map_err(ParseError::map)?); while iter.consume_if_next(&'|') { iter.advance_while(|c| c.is_whitespace()); seqs.push(Seq::parse(&mut iter).map_err(ParseError::map)?); } Ok(Self { seqs }) } } impl FromStr for Alt { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Alt::parse(s.chars().peekable()) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct AltError(SeqError); impl From for AltError { fn from(other: SeqError) -> Self { Self(other) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Expression { alt: Alt, } impl Parse for Expression { type Err = ExpressionError; fn parse>(iter: I) -> Result> { Ok(Self { alt: Alt::parse(iter).map_err(ParseError::map)?, }) } } impl FromStr for Expression { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Expression::parse(s.chars().peekable()) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct ExpressionError(AltError); impl From for ExpressionError { fn from(other: AltError) -> Self { Self(other) } } #[cfg(test)] mod tests { use super::Alt; use super::Expression; use super::Fix; use super::Seq; use super::Term; use super::{ parse::{Parse, ParseError}, Ident, Literal, LiteralError, }; use std::iter; macro_rules! parse_indent { ($s:literal) => { assert_eq!( $s.parse(), Ok(Ident { name: $s.to_owned() }) ) }; } #[test] fn parse_ident_simple() { parse_indent!("x"); parse_indent!("variable"); } #[test] fn parse_indent_unicode() { parse_indent!("ℕꥇℜ"); } #[test] fn parse_indent_stops() { let s = "variable then more text"; let mut iter = s.chars().peekable(); assert_eq!( Ident::parse(&mut iter), Ok(Ident { name: "variable".to_owned() }) ); assert_eq!(iter.collect::(), " then more text"); } #[test] fn parse_indent_not_empty() { assert_eq!("".parse::(), Err(ParseError::EndOfFile)); } #[test] fn parse_indent_alphabetic_only() { assert_eq!( "123".parse::(), Err(ParseError::InvalidCharacter('1')) ); } macro_rules! parse_lit { ($str:literal) => { assert_eq!( Literal::parse( iter::once('\'') .chain($str.escape_debug()) .chain(iter::once('\''),) .peekable() ), Ok(Literal { contents: $str.to_owned() }), ) }; } #[test] fn parse_literal_basic() { parse_lit!("hello"); parse_lit!("this has whitespace"); } #[test] fn parse_literal_quotes() { parse_lit!(r#"quote ' " and backslash \ quoting"#); } #[test] fn parse_literal_ascii_escape() { parse_lit!("newline \n tab \t carriage return \r"); assert_eq!( r#"'\0'"#.parse(), Ok(Literal { contents: "\0".to_owned() }) ); } #[test] fn parse_literal_hex_escape() { assert_eq!( r#"'\x20'"#.parse(), Ok(Literal { contents: " ".to_owned() }) ); } #[test] fn parse_literal_unicode_escape() { parse_lit!("emoji ❤ escape \u{200c}"); } #[test] fn parse_literal_stops() { let mut iter = "'hi there' Not quoted".chars().peekable(); assert_eq!( Literal::parse(&mut iter), Ok(Literal { contents: "hi there".to_owned() }) ); assert_eq!(iter.collect::(), " Not quoted"); } #[test] fn parse_literal_needs_quotes() { assert_eq!( "hello".parse::(), Err(ParseError::InvalidCharacter('h')) ); assert_eq!("'hello".parse::(), Err(ParseError::EndOfFile)); } #[test] fn parse_literal_not_empty() { assert_eq!( "''".parse::(), Err(ParseError::Other(LiteralError::EmptyLiteral)) ); } #[test] fn parse_literal_no_forbidden_whitespace() { assert_eq!( "'\n".parse::(), Err(ParseError::InvalidCharacter('\n')) ); assert_eq!( "'\r".parse::(), Err(ParseError::InvalidCharacter('\r')) ); assert_eq!( "'\t".parse::(), Err(ParseError::InvalidCharacter('\t')) ); } #[test] fn parse_literal_invalid_escape() { assert_eq!( r#"'\F"#.parse::(), Err(ParseError::InvalidCharacter('F')) ); } #[test] fn parse_literal_bad_hex_escape() { assert_eq!( r#"'\xF"#.parse::(), Err(ParseError::InvalidCharacter('F')) ); assert_eq!( r#"'\x0z"#.parse::(), Err(ParseError::InvalidCharacter('z')) ); } #[test] fn parse_literal_bad_unicode_escape() { // Negative tests assert_eq!( r#"'\u0"#.parse::(), Err(ParseError::InvalidCharacter('0')) ); assert_eq!( r#"'\u{}"#.parse::(), Err(ParseError::InvalidCharacter('}')) ); assert_eq!( r#"'\u{z"#.parse::(), Err(ParseError::InvalidCharacter('z')) ); assert_eq!( r#"'\u{_"#.parse::(), Err(ParseError::InvalidCharacter('_')) ); assert_eq!( r#"'\u{0___h"#.parse::(), Err(ParseError::InvalidCharacter('h')) ); assert_eq!( r#"'\u{D800}"#.parse::(), Err(ParseError::Other(LiteralError::InvalidCharacterCode( 0xd800 ))) ); assert_eq!( r#"'\u{0000000"#.parse::(), Err(ParseError::InvalidCharacter('0')) ); } #[test] fn parse_fix_basic() { let expr = Expression { alt: Alt { seqs: vec![Seq { terms: vec![Term::Epsilon], }], }, }; assert_eq!( "[x](())".parse(), Ok(Fix { arg: Ident { name: "x".to_owned() }, expr: expr.clone() }) ); assert_eq!( "[x](() )".parse(), Ok(Fix { arg: Ident { name: "x".to_owned() }, expr: expr.clone() }) ); assert_eq!( "[x]( ())".parse(), Ok(Fix { arg: Ident { name: "x".to_owned() }, expr }) ); } #[test] fn parse_fix_needs_arg() { assert_eq!( "x]()".parse::(), Err(ParseError::InvalidCharacter('x')) ); assert_eq!( "[]()".parse::(), Err(ParseError::InvalidCharacter(']')) ); } #[test] fn parse_fix_needs_parens() { assert_eq!( "[x]x".parse::(), Err(ParseError::InvalidCharacter('x')) ); assert_eq!("[x](x".parse::(), Err(ParseError::EndOfFile)) } #[test] fn parse_fix_stops() { let mut iter = "[x](()) Not in fix".chars().peekable(); assert_eq!( Fix::parse(&mut iter), Ok(Fix { arg: Ident { name: "x".to_owned() }, expr: Expression { alt: Alt { seqs: vec![Seq { terms: vec![Term::Epsilon], }], }, }, }) ); assert_eq!(iter.collect::(), " Not in fix"); } }