pub mod parse; use crate::ast::{ self, convert::{Context, Convert}, }; use self::parse::{ iter::{Peek, PeekExt, PeekableTakeWhile}, requires, Parse, ParseError, }; use std::convert::Infallible; use std::str::FromStr; #[derive(Clone, Debug, Eq, PartialEq)] pub struct Ident { name: String, } impl Parse for Ident { type Err = Infallible; fn parse>(mut iter: I) -> Result> { // rustc can't predict type, but rust-analyzer can... let name: String = PeekableTakeWhile::new(&mut iter, |c| c.is_alphabetic()).collect(); if name.is_empty() { if let Some(c) = iter.peek() { Err(ParseError::InvalidCharacter(*c)) } else { Err(ParseError::EndOfFile) } } else { Ok(Self { name }) } } } impl FromStr for Ident { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Ident::parse(s.chars().peekable()) } } impl Convert for Ident { fn convert(self, context: &Context) -> ast::Term { use ast::Term; if let Some(variable) = context.get(&self.name) { Term::Variable(variable) } else { Term::Call(self.name, vec![]) } } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Literal { contents: String, } impl Parse for Literal { type Err = LiteralError; fn parse>(mut iter: I) -> Result> { fn parse_digit>( mut iter: I, radix: u32, ) -> Result> { Ok(iter .next_if(|c| c.is_digit(radix)) .ok_or(ParseError::EndOfFile)? .map_err(|i| ParseError::InvalidCharacter(*i.peek().unwrap()))? .to_digit(radix) .unwrap()) } /// Parse full character escape. fn parse_escape>( mut iter: I, ) -> Result> { requires(&mut iter, '\\')?; match iter.peek().ok_or(ParseError::EndOfFile)? { '\'' | '\"' | '\\' => Ok(iter.next().unwrap()), 'n' => { requires(iter, 'n')?; Ok('\n') } 'r' => { requires(iter, 'r')?; Ok('\r') } 't' => { requires(iter, 't')?; Ok('\t') } '0' => { requires(iter, '0')?; Ok('\0') } 'x' => { requires(&mut iter, 'x')?; let upper = parse_digit(&mut iter, 8)?; let ord = 16 * upper + parse_digit(&mut iter, 16)?; std::char::from_u32(ord) .ok_or(ParseError::Other(LiteralError::InvalidCharacterCode(ord))) } 'u' => { requires(&mut iter, 'u')?; requires(&mut iter, '{')?; let mut ord = 0; for _ in 1..=6 { ord *= 16; ord += parse_digit(&mut iter, 16)?; iter.advance_while(|&c| c == '_'); if iter.peek() == Some(&'}') { break; } } requires(&mut iter, '}')?; std::char::from_u32(ord) .ok_or(ParseError::Other(LiteralError::InvalidCharacterCode(ord))) } &c => Err(ParseError::InvalidCharacter(c)), } } requires(&mut iter, '\'')?; let mut s = String::new(); loop { match iter.peek().ok_or(ParseError::EndOfFile)? { '\'' => { iter.next(); return if s.is_empty() { Err(ParseError::Other(LiteralError::EmptyLiteral)) } else { Ok(Literal { contents: s }) }; } &c @ '\n' | &c @ '\r' | &c @ '\t' => return Err(ParseError::InvalidCharacter(c)), '\\' => s.push(parse_escape(&mut iter)?), _ => s.push(iter.next().unwrap()), } } } } impl Convert for Literal { fn convert(self, _context: &Context) -> ast::Term { ast::Term::Literal(self.contents) } } impl FromStr for Literal { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Literal::parse(s.chars().peekable()) } } #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum LiteralError { EmptyLiteral, InvalidCharacterCode(u32), } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Call { name: Ident, args: Vec, } impl Convert for Call { fn convert(self, context: &Context) -> ast::Term { use ast::Term; Term::Call( self.name.name, self.args.into_iter().map(|e| e.convert(context)).collect(), ) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Fix { arg: Ident, expr: Expression, } impl Parse for Fix { type Err = FixError; fn parse>(mut iter: I) -> Result> { requires(&mut iter, '[')?; let arg = Ident::parse(&mut iter).map_err(ParseError::map)?; requires(&mut iter, ']')?; requires(&mut iter, '(')?; iter.advance_while(|c| c.is_whitespace()); let expr = Expression::parse(Box::new(&mut iter as &mut dyn Peek)) .map_err(ParseError::map)?; requires(&mut iter, ')')?; Ok(Self { arg, expr }) } } impl FromStr for Fix { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Fix::parse(s.chars().peekable()) } } impl Convert for Fix { fn convert(self, context: &Context) -> ast::Term { use ast::Term; let expr = self.expr; Term::Fix(Box::new(context.push(self.arg.name, |c| expr.convert(c)))) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct FixError(Box); impl From for FixError { fn from(other: Infallible) -> Self { match other {} } } impl From for FixError { fn from(other: ExpressionError) -> Self { Self(Box::new(other)) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum Term { Epsilon, Ident(Ident), Literal(Literal), Call(Call), Fix(Fix), Parens(Expression), } impl Parse for Term { type Err = TermError; fn parse>(mut iter: I) -> Result> { match iter.peek().ok_or(ParseError::EndOfFile)? { '(' => { iter.next(); if iter.consume_if_next(&')') { Ok(Self::Epsilon) } else { let expr = Expression::parse(Box::new(&mut iter as &mut dyn Peek)) .map_err(ParseError::map)?; requires(iter, ')').map(|_| Self::Parens(expr)) } } '[' => Fix::parse(iter).map(Self::Fix).map_err(ParseError::map), '\'' => Literal::parse(iter) .map(Self::Literal) .map_err(ParseError::map), c if c.is_alphabetic() => { let ident = Ident::parse(&mut iter).map_err(ParseError::map)?; if iter.consume_if_next(&'(') { iter.advance_while(|c| c.is_whitespace()); let mut args = Vec::new(); loop { args.push( Expression::parse(Box::new(&mut iter as &mut dyn Peek)) .map_err(ParseError::::map) .map_err(ParseError::map)?, ); if iter.consume_if_next(&')') { return Ok(Self::Call(Call { name: ident, args })); } requires(&mut iter, ',')?; iter.advance_while(|c| c.is_whitespace()); } } else { Ok(Self::Ident(ident)) } } &c => Err(ParseError::InvalidCharacter(c)), } } } impl FromStr for Term { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Term::parse(s.chars().peekable()) } } impl Convert for Term { fn convert(self, context: &Context) -> ast::Term { use ast::Term; match self { Self::Epsilon => Term::Epsilon, Self::Ident(i) => i.convert(context), Self::Literal(l) => l.convert(context), Self::Call(c) => c.convert(context), Self::Fix(f) => f.convert(context), Self::Parens(e) => e.convert(context), } } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct CallError(Box); impl From for CallError { fn from(other: ExpressionError) -> Self { Self(Box::new(other)) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum TermError { Literal(LiteralError), Call(CallError), Fix(FixError), Parens(Box), } impl From for TermError { fn from(other: Infallible) -> Self { match other {} } } impl From for TermError { fn from(other: ExpressionError) -> Self { Self::Parens(Box::new(other)) } } impl From for TermError { fn from(other: FixError) -> Self { Self::Fix(other) } } impl From for TermError { fn from(other: CallError) -> Self { Self::Call(other) } } impl From for TermError { fn from(other: LiteralError) -> Self { Self::Literal(other) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Cat { terms: Vec, } impl Parse for Cat { type Err = SeqError; fn parse>(mut iter: I) -> Result> { let mut terms = Vec::new(); terms.push(Term::parse(&mut iter).map_err(ParseError::map)?); iter.advance_while(|c| c.is_whitespace()); while iter.consume_if_next(&'.') { iter.advance_while(|c| c.is_whitespace()); terms.push(Term::parse(&mut iter).map_err(ParseError::map)?); iter.advance_while(|c| c.is_whitespace()); } Ok(Self { terms }) } } impl FromStr for Cat { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Cat::parse(s.chars().peekable()) } } impl Convert for Cat { fn convert(self, context: &Context) -> ast::Term { use ast::Term; self.terms.into_iter().rfold(Term::Epsilon, |exp, term| { Term::Cat(Box::new(term.convert(context)), Box::new(exp)) }) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct SeqError(TermError); impl From for SeqError { fn from(other: TermError) -> Self { Self(other) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Alt { cats: Vec, } impl Parse for Alt { type Err = AltError; fn parse>(mut iter: I) -> Result> { let mut seqs = Vec::new(); seqs.push(Cat::parse(&mut iter).map_err(ParseError::map)?); while iter.consume_if_next(&'|') { iter.advance_while(|c| c.is_whitespace()); seqs.push(Cat::parse(&mut iter).map_err(ParseError::map)?); } Ok(Self { cats: seqs }) } } impl FromStr for Alt { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Alt::parse(s.chars().peekable()) } } impl Convert for Alt { fn convert(self, context: &Context) -> ast::Term { use ast::Term; self.cats.into_iter().rfold(Term::Bottom, |exp, cat| { Term::Alt(Box::new(cat.convert(context)), Box::new(exp)) }) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct AltError(SeqError); impl From for AltError { fn from(other: SeqError) -> Self { Self(other) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Expression { alt: Alt, } impl Parse for Expression { type Err = ExpressionError; fn parse>(iter: I) -> Result> { Ok(Self { alt: Alt::parse(iter).map_err(ParseError::map)?, }) } } impl FromStr for Expression { type Err = ParseError<::Err>; fn from_str(s: &str) -> Result { Expression::parse(s.chars().peekable()) } } impl Convert for Expression { fn convert(self, context: &Context) -> ast::Term { self.alt.convert(context) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct ExpressionError(AltError); impl From for ExpressionError { fn from(other: AltError) -> Self { Self(other) } } #[cfg(test)] mod tests { use super::{ parse::{Parse, ParseError}, Alt, Cat, Expression, Fix, Ident, Literal, LiteralError, Term, }; use crate::ast::{ self, convert::{Context, Convert}, }; use std::iter; macro_rules! parse_indent { ($s:literal) => { assert_eq!( $s.parse(), Ok(Ident { name: $s.to_owned() }) ) }; } #[test] fn parse_ident_simple() { parse_indent!("x"); parse_indent!("variable"); } #[test] fn parse_indent_unicode() { parse_indent!("ℕꥇℜ"); } #[test] fn parse_indent_stops() { let s = "variable then more text"; let mut iter = s.chars().peekable(); assert_eq!( Ident::parse(&mut iter), Ok(Ident { name: "variable".to_owned() }) ); assert_eq!(iter.collect::(), " then more text"); } #[test] fn parse_indent_not_empty() { assert_eq!("".parse::(), Err(ParseError::EndOfFile)); } #[test] fn parse_indent_alphabetic_only() { assert_eq!( "123".parse::(), Err(ParseError::InvalidCharacter('1')) ); } #[test] fn convert_ident_variable() { use ast::Term; let context = Context::new(); context.push("x".to_owned(), |c| { assert_eq!( Ident { name: "x".to_owned() } .convert(c), Term::Variable(0) ); c.push("y".to_owned(), |c| { assert_eq!( Ident { name: "y".to_owned() } .convert(c), Term::Variable(0) ); assert_eq!( Ident { name: "x".to_owned() } .convert(c), Term::Variable(1) ); }) }) } #[test] fn convert_ident_call() { use ast::Term; let context = Context::new(); assert_eq!( Ident { name: "call".to_owned() } .convert(&context), Term::Call("call".to_owned(), vec![]) ); assert_eq!( Ident { name: "x".to_owned() } .convert(&context), Term::Call("x".to_owned(), vec![]) ); context.push("x".to_owned(), |c| { assert_eq!( Ident { name: "call".to_owned() } .convert(c), Term::Call("call".to_owned(), vec![]) ); assert_eq!( Ident { name: "x".to_owned() } .convert(c), Term::Variable(0) ); }) } macro_rules! parse_lit { ($str:literal) => { assert_eq!( Literal::parse( iter::once('\'') .chain($str.escape_debug()) .chain(iter::once('\''),) .peekable() ), Ok(Literal { contents: $str.to_owned() }), ) }; } #[test] fn parse_literal_basic() { parse_lit!("hello"); parse_lit!("this has whitespace"); } #[test] fn parse_literal_quotes() { parse_lit!(r#"quote ' " and backslash \ quoting"#); } #[test] fn parse_literal_ascii_escape() { parse_lit!("newline \n tab \t carriage return \r"); assert_eq!( r#"'\0'"#.parse(), Ok(Literal { contents: "\0".to_owned() }) ); } #[test] fn parse_literal_hex_escape() { assert_eq!( r#"'\x20'"#.parse(), Ok(Literal { contents: " ".to_owned() }) ); } #[test] fn parse_literal_unicode_escape() { parse_lit!("emoji ❤ escape \u{200c}"); } #[test] fn parse_literal_stops() { let mut iter = "'hi there' Not quoted".chars().peekable(); assert_eq!( Literal::parse(&mut iter), Ok(Literal { contents: "hi there".to_owned() }) ); assert_eq!(iter.collect::(), " Not quoted"); } #[test] fn parse_literal_needs_quotes() { assert_eq!( "hello".parse::(), Err(ParseError::InvalidCharacter('h')) ); assert_eq!("'hello".parse::(), Err(ParseError::EndOfFile)); } #[test] fn parse_literal_not_empty() { assert_eq!( "''".parse::(), Err(ParseError::Other(LiteralError::EmptyLiteral)) ); } #[test] fn parse_literal_no_forbidden_whitespace() { assert_eq!( "'\n".parse::(), Err(ParseError::InvalidCharacter('\n')) ); assert_eq!( "'\r".parse::(), Err(ParseError::InvalidCharacter('\r')) ); assert_eq!( "'\t".parse::(), Err(ParseError::InvalidCharacter('\t')) ); } #[test] fn parse_literal_invalid_escape() { assert_eq!( r#"'\F"#.parse::(), Err(ParseError::InvalidCharacter('F')) ); } #[test] fn parse_literal_bad_hex_escape() { assert_eq!( r#"'\xF"#.parse::(), Err(ParseError::InvalidCharacter('F')) ); assert_eq!( r#"'\x0z"#.parse::(), Err(ParseError::InvalidCharacter('z')) ); } #[test] fn parse_literal_bad_unicode_escape() { // Negative tests assert_eq!( r#"'\u0"#.parse::(), Err(ParseError::InvalidCharacter('0')) ); assert_eq!( r#"'\u{}"#.parse::(), Err(ParseError::InvalidCharacter('}')) ); assert_eq!( r#"'\u{z"#.parse::(), Err(ParseError::InvalidCharacter('z')) ); assert_eq!( r#"'\u{_"#.parse::(), Err(ParseError::InvalidCharacter('_')) ); assert_eq!( r#"'\u{0___h"#.parse::(), Err(ParseError::InvalidCharacter('h')) ); assert_eq!( r#"'\u{D800}"#.parse::(), Err(ParseError::Other(LiteralError::InvalidCharacterCode( 0xd800 ))) ); assert_eq!( r#"'\u{0000000"#.parse::(), Err(ParseError::InvalidCharacter('0')) ); } #[test] fn convert_literal() { use ast::Term; let context = Context::new(); assert_eq!( Literal { contents: "hello".to_owned() } .convert(&context), Term::Literal("hello".to_owned()) ); } #[test] fn parse_fix_basic() { let expr = Expression { alt: Alt { cats: vec![Cat { terms: vec![Term::Epsilon], }], }, }; assert_eq!( "[x](())".parse(), Ok(Fix { arg: Ident { name: "x".to_owned() }, expr: expr.clone() }) ); assert_eq!( "[x](() )".parse(), Ok(Fix { arg: Ident { name: "x".to_owned() }, expr: expr.clone() }) ); assert_eq!( "[x]( ())".parse(), Ok(Fix { arg: Ident { name: "x".to_owned() }, expr }) ); } #[test] fn parse_fix_needs_arg() { assert_eq!( "x]()".parse::(), Err(ParseError::InvalidCharacter('x')) ); assert_eq!( "[]()".parse::(), Err(ParseError::InvalidCharacter(']')) ); } #[test] fn parse_fix_needs_parens() { assert_eq!( "[x]x".parse::(), Err(ParseError::InvalidCharacter('x')) ); assert_eq!("[x](x".parse::(), Err(ParseError::EndOfFile)) } #[test] fn parse_fix_stops() { let mut iter = "[x](()) Not in fix".chars().peekable(); assert_eq!( Fix::parse(&mut iter), Ok(Fix { arg: Ident { name: "x".to_owned() }, expr: Expression { alt: Alt { cats: vec![Cat { terms: vec![Term::Epsilon], }], }, }, }) ); assert_eq!(iter.collect::(), " Not in fix"); } #[test] fn convert_fix_no_var() { let context = Context::new(); let expr = Expression { alt: Alt { cats: vec![Cat { terms: vec![Term::Epsilon], }], }, }; assert_eq!( Fix { arg: Ident { name: "x".to_owned() }, expr: expr.clone() } .convert(&context), ast::Term::Fix(Box::new(expr.convert(&context))) ) } #[test] fn convert_fix_var() { use ast::Term; let context = Context::new(); let expr = Expression { alt: Alt { cats: vec![Cat { terms: vec![super::Term::Ident(Ident { name: "x".to_owned(), })], }], }, }; assert_eq!( Fix { arg: Ident { name: "x".to_owned() }, expr } .convert(&context), Term::Fix(Box::new(Term::Alt( Box::new(Term::Cat( Box::new(Term::Variable(0)), Box::new(Term::Epsilon) )), Box::new(Term::Bottom) ))) ) } }