diff options
author | Greg Brown <gmb60@cam.ac.uk> | 2021-01-14 11:42:55 +0000 |
---|---|---|
committer | Greg Brown <gmb60@cam.ac.uk> | 2021-01-14 11:42:55 +0000 |
commit | aac3549a72663c523a456b2f5d7c3b77f509cdd6 (patch) | |
tree | 562824f3cfa5feca791715c733f7749197bb7e7a | |
parent | 0d01692c97ea8ca6fc4b229e5b9678cb252bceda (diff) |
Add labelled expressions.
Restructure project (again).
Convert `Cat` and `Alt` from binary to n+2-ary.
30 files changed, 2210 insertions, 2348 deletions
@@ -8,6 +8,8 @@ edition = "2018" members = ["autochomp", "chewed", "chomp-macro"] [dependencies] +heck = "0.3" +paste = "1" quote = "1" [dependencies.proc-macro2] diff --git a/autochomp/src/nibble.rs b/autochomp/src/nibble.rs index f6629f2..657a20f 100644 --- a/autochomp/src/nibble.rs +++ b/autochomp/src/nibble.rs @@ -1,8 +1,8 @@ chomp_macro::nibble! { - let opt(x) = _ | x; - let plus(x) = [plus](x . opt(plus)); + let opt(x) = _ : None | x : Some; + let plus(x) = [plus]((x : First) . (opt(plus) : Next)); let star(x) = opt(plus(x)); - let star_(base, step) = [rec](base | step . rec); + let star_(base, step) = [rec](base : Base | step . rec : Step); let Pattern_Whitespace = "\t"|"\n"|"\x0B"|"\x0c"|"\r"|" "|"\u{85}"|"\u{200e}"|"\u{200f}"|"\u{2028}"|"\u{2029}"; @@ -25,46 +25,53 @@ chomp_macro::nibble! { XID_Start | "_" | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; let literal_char = - " " | "!" | "#" | "$" | "%" | "&" | "'" | - "(" | ")" | "*" | "+" | "," | "-" | "." | "/" | - "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | - "8" | "9" | ":" | ";" | "<" | "=" | ">" | "?" | - "@" | "A" | "B" | "C" | "D" | "E" | "F" | "G" | - "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | - "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | - "X" | "Y" | "Z" | "[" | "]" | "^" | "_" | - "`" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | - "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | - "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | - "x" | "y" | "z" | "{" | "|" | "}" | "~" | - "\\" . ("\"" | "'" | "n" | "r" | "t" | "\\" | "0" | - "x" . oct_digit . hex_digit | - "u{" . hex_digit . opt(hex_digit . opt(hex_digit . opt(hex_digit . opt(hex_digit . opt(hex_digit))))) . "}"); + (" " | "!" | "#" | "$" | "%" | "&" | "'" | + "(" | ")" | "*" | "+" | "," | "-" | "." | "/" | + "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | + "8" | "9" | ":" | ";" | "<" | "=" | ">" | "?" | + "@" | "A" | "B" | "C" | "D" | "E" | "F" | "G" | + "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | + "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | + "X" | "Y" | "Z" | "[" | "]" | "^" | "_" | + "`" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | + "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | + "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | + "x" | "y" | "z" | "{" | "|" | "}" | "~") : Literal | + "\\" . ( + ("\"" | "'" | "n" | "r" | "t" | "\\" | "0") : Ascii | + "x" . oct_digit . hex_digit : Oct | + "u{" .hex_digit + .opt(hex_digit + .opt(hex_digit + .opt(hex_digit + .opt(hex_digit . opt(hex_digit))))) . "}" : Unicode + ) : Escape ; let ws = plus(Pattern_Whitespace); - let punctuated(x, p) = [rec](x . opt(p . opt(ws) . rec)); - let list(x) = "(" . opt(ws) . [rec](x . opt("," . opt(ws) . opt(rec))) . ")"; + let punctuated(x, p) = [rec]((x : First) . (opt(p . opt(ws) . rec) : Next)); + let list(x) = "(" . opt(ws) . [rec]((x : First) . (opt("," . opt(ws) . opt(rec)) : Next)) . ")"; let epsilon = "_"; let ident = XID_Start . star(XID_Continue); - let literal = "\"" . plus(literal_char) . "\""; - let parens(expr) = "(" . opt(ws) . expr . ")"; - let fix(expr) = "[" . opt(ws) . ident . opt(ws) . "]" . opt(ws) . parens(expr); + let literal = "\"" . (plus(literal_char) : Contents) . "\""; + let parens(expr) = "(" . opt(ws) . (expr : Inner) . ")"; + let fix(expr) = "[" . opt(ws) . (ident : Arg) . opt(ws) . "]" . opt(ws) . (parens(expr) : Inner); let term(expr) = - epsilon . opt(ws) - | literal . opt(ws) - | parens(expr) . opt(ws) - | fix(expr) . opt(ws) - | ident . opt(ws) . opt(list(expr) . opt(ws)) + epsilon . opt(ws) : Epsilon + | literal . opt(ws) : Literal + | parens(expr) . opt(ws) : Parens + | fix(expr) . opt(ws) : Fix + | ident . opt(ws) . opt(list(expr) . opt(ws)) : CallOrVariable ; + let label = ":" . opt(ws) . (ident : Label) . opt(ws); let cat(expr) = punctuated(term(expr), "."); - let alt(expr) = punctuated(cat(expr), "|"); + let alt(expr) = punctuated((cat(expr) : Cat) . (opt(label) : Name), "|"); let expr = [expr](alt(expr)); - let let = "let" . ws . ident . opt(ws) . opt(list(ident . opt(ws)) . opt(ws)) . "=" . opt(ws) . expr . ";" . opt(ws); - let goal = "match" . ws . expr . ";" . opt(ws); + let let = "let" . ws . (ident : Name) . opt(ws) . (opt(list(ident . opt(ws)) . opt(ws)) : Args) . "=" . opt(ws) . (expr : Expr) . ";" . opt(ws); + let goal = "match" . ws . (expr : Expr) . ";" . opt(ws); - match star_(star_(goal, let), Pattern_Whitespace); + match star_(star_(goal : Goal, let : Let), Pattern_Whitespace); } diff --git a/chewed/src/parse.rs b/chewed/src/parse.rs index 65e9272..2d01757 100644 --- a/chewed/src/parse.rs +++ b/chewed/src/parse.rs @@ -1,3 +1,5 @@ +use std::fmt::Display; + use super::{ error::{ParseError, TakeError}, position::LineCol, @@ -102,7 +104,7 @@ impl<I: ?Sized + Iterator<Item = char>> Parser for IterWrapper<I> { } } -pub trait Parse: Sized { +pub trait Parse: Display + Sized { fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError>; fn parse<P: Parser>(mut input: P) -> Result<Self, ParseError> { @@ -124,23 +126,18 @@ pub trait Parse: Sized { } } -impl Parse for () { - fn take<P: Parser + ?Sized>(_: &mut P) -> Result<Self, TakeError> { - Ok(()) - } -} +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct Epsilon; -impl<A: Parse, B: Parse> Parse for (A, B) { - fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { - let a = input.take()?; - let b = input.take()?; - Ok((a, b)) +impl Display for Epsilon { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "") } +} - fn parse<P: Parser>(mut input: P) -> Result<Self, ParseError> { - let a = A::take(&mut input)?; - let b = B::parse(input)?; - Ok((a, b)) +impl Parse for Epsilon { + fn take<P: Parser + ?Sized>(_: &mut P) -> Result<Self, TakeError> { + Ok(Epsilon) } } diff --git a/chomp-macro/src/lib.rs b/chomp-macro/src/lib.rs index d58bbdc..91e1527 100644 --- a/chomp-macro/src/lib.rs +++ b/chomp-macro/src/lib.rs @@ -1,33 +1,33 @@ use chomp::{ chomp::{ - check::{InlineCalls, TypeCheck}, - context::Context, + ast::substitute::InlineCalls, + typed::{ + context::Context, + lower::{Backend, GenerateCode}, + TypeInfer, + }, visit::Visitable, }, - lower::{rust::RustBackend, Backend, GenerateCode}, + lower::RustBackend, nibble::cst::File, }; -use proc_macro::{Span, TokenStream}; +use proc_macro::TokenStream; use syn::Error; #[proc_macro] pub fn nibble(item: TokenStream) -> TokenStream { syn::parse(item) - .and_then(|nibble: File| { - nibble - .convert() - .ok_or_else(|| todo!()) - }) + .and_then(|nibble: File| nibble.convert().map_err(Error::from)) .and_then(|(funs, goal)| { funs.into_iter() .try_rfold(goal, |goal, function| { - goal.fold(&mut InlineCalls::new(function)) + goal.fold(&mut InlineCalls { function }) }) .map_err(Error::from) }) .and_then(|expr| { let mut context = Context::default(); - expr.fold(&mut TypeCheck { + expr.fold(&mut TypeInfer { context: &mut context, }) .map_err(Error::from) @@ -35,7 +35,7 @@ pub fn nibble(item: TokenStream) -> TokenStream { .map(|typed| { let mut backend = RustBackend::default(); let id = typed.gen(&mut backend); - backend.emit_code(id) + backend.emit_code(None, None, id) }) .unwrap_or_else(Error::into_compile_error) .into() diff --git a/src/chomp/ast/error.rs b/src/chomp/ast/error.rs new file mode 100644 index 0000000..d2c49cd --- /dev/null +++ b/src/chomp/ast/error.rs @@ -0,0 +1,27 @@ +use std::{error::Error, fmt::{self, Display}}; + +use proc_macro2::Span; + +use crate::chomp::Name; + +use super::{Call, Parameter}; + +#[derive(Debug)] +pub enum SubstituteError { + FreeParameter { param: Parameter, span: Option<Span>, name: Option<Name> }, + WrongArgCount { call: Call, expected: usize, span: Option<Span> }, +} + +impl From<SubstituteError> for syn::Error { + fn from(e: SubstituteError) -> Self { + todo!() + } +} + +impl Display for SubstituteError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + todo!() + } +} + +impl Error for SubstituteError {} diff --git a/src/chomp/ast.rs b/src/chomp/ast/mod.rs index 76d75ae..110e5c0 100644 --- a/src/chomp/ast.rs +++ b/src/chomp/ast/mod.rs @@ -1,127 +1,46 @@ -use std::{ - fmt::{self, Display}, - hash, -}; +use std::fmt::{self, Display}; use proc_macro2::Span; -use syn::{Ident, LitStr, Token}; +use syn::Token; use super::Name; -pub type Epsilon = Option<Token![_]>; +pub mod error; +pub mod substitute; -#[derive(Clone, Debug)] -pub enum Literal { - Spanned(LitStr), - Spanless(String), -} +#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)] +pub struct Epsilon; -impl Literal { - pub fn value(&self) -> String { - match self { - Self::Spanned(l) => l.value(), - Self::Spanless(s) => s.clone(), - } - } - - pub fn span(&self) -> Option<Span> { - match self { - Self::Spanned(l) => Some(l.span()), - Self::Spanless(_) => None, - } - } - - pub fn as_litstr(self, span: Span) -> LitStr { - match self { - Self::Spanned(l) => l, - Self::Spanless(s) => LitStr::new(&s, span), - } - } -} - -impl PartialEq for Literal { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::Spanned(me), Self::Spanned(them)) => me == them, - (Self::Spanned(me), Self::Spanless(them)) => &me.value() == them, - (Self::Spanless(me), Self::Spanned(them)) => me == &them.value(), - (Self::Spanless(me), Self::Spanless(them)) => me == them, - } - } -} - -impl Eq for Literal {} - -impl hash::Hash for Literal { - fn hash<H: hash::Hasher>(&self, state: &mut H) { - self.value().hash(state) - } -} - -impl Display for Literal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}", self.value()) - } -} - -impl From<LitStr> for Literal { - fn from(l: LitStr) -> Self { - Self::Spanned(l) - } -} - -impl From<String> for Literal { - fn from(s: String) -> Self { - Self::Spanless(s) - } -} +pub type Literal = String; #[derive(Clone, Debug)] pub struct Cat { - pub fst: Box<Expression>, + pub first: Box<NamedExpression>, pub punct: Option<Token![.]>, - pub snd: Box<Expression>, -} - -impl Cat { - pub fn new(fst: Expression, punct: Option<Token![.]>, snd: Expression) -> Self { - Self { - fst: Box::new(fst), - punct, - snd: Box::new(snd), - } - } - - pub fn first(&self) -> &Expression { - &self.fst - } - - pub fn first_mut(&mut self) -> &mut Expression { - &mut self.fst - } - - pub fn punct(&self) -> Option<Token![.]> { - self.punct - } - - pub fn second(&self) -> &Expression { - &self.snd - } - - pub fn second_mut(&mut self) -> &mut Expression { - &mut self.snd - } + pub second: Box<NamedExpression>, + pub rest: Vec<(Option<Token![.]>, NamedExpression)>, } impl Display for Cat { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "({}.{})", self.fst, self.snd) + write!(f, "({} . {}", self.first, self.second)?; + for (_, other) in &self.rest { + write!(f, " . {}", other)?; + } + write!(f, ")") } } impl PartialEq for Cat { fn eq(&self, other: &Self) -> bool { - self.first() == other.first() && self.second() == other.second() + self.first == other.first + && self.second == other.second + && self.rest.len() == other.rest.len() + && self + .rest + .iter() + .zip(other.rest.iter()) + .all(|((_, me), (_, them))| me == them) } } @@ -129,50 +48,32 @@ impl Eq for Cat {} #[derive(Clone, Debug)] pub struct Alt { - pub left: Box<Expression>, + pub first: Box<NamedExpression>, pub punct: Option<Token![|]>, - pub right: Box<Expression>, -} - -impl Alt { - pub fn new(left: Expression, punct: Option<Token![|]>, right: Expression) -> Self { - Self { - left: Box::new(left), - punct, - right: Box::new(right), - } - } - - pub fn left(&self) -> &Expression { - &self.left - } - - pub fn left_mut(&mut self) -> &mut Expression { - &mut self.left - } - - pub fn punct(&self) -> Option<Token![|]> { - self.punct - } - - pub fn right(&self) -> &Expression { - &self.right - } - - pub fn right_mut(&mut self) -> &mut Expression { - &mut self.right - } + pub second: Box<NamedExpression>, + pub rest: Vec<(Option<Token![|]>, NamedExpression)> } impl Display for Alt { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "({}|{})", self.left, self.right) + write!(f, "({} | {}", self.first, self.second)?; + for (_, other) in &self.rest { + write!(f, " | {}", other)?; + } + write!(f, ")") } } impl PartialEq for Alt { fn eq(&self, other: &Self) -> bool { - self.left() == other.left() && self.right() == other.right() + self.first == other.first + && self.second == other.second + && self.rest.len() == other.rest.len() + && self + .rest + .iter() + .zip(other.rest.iter()) + .all(|((_, me), (_, them))| me == them) } } @@ -180,46 +81,22 @@ impl Eq for Alt {} #[derive(Clone, Debug)] pub struct Fix { - pub arg: Name, - pub inner: Box<Expression>, - pub span: Option<Span>, -} - -impl Fix { - pub fn new(arg: Name, inner: Expression, span: Option<Span>) -> Self { - Self { - arg, - inner: Box::new(inner), - span, - } - } - - pub fn arg(&self) -> &Name { - &self.arg - } - - pub fn inner(&self) -> &Expression { - &self.inner - } - - pub fn inner_mut(&mut self) -> &mut Expression { - &mut self.inner - } - - pub fn span(&self) -> Option<Span> { - self.span - } + pub arg: Option<Name>, + pub inner: Box<NamedExpression>, } impl Display for Fix { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "[{}]({})", self.arg, self.inner) + match &self.arg { + Some(arg) => write!(f, "[{}]({})", arg, self.inner), + None => write!(f, "[]({})", self.inner), + } } } impl PartialEq for Fix { fn eq(&self, other: &Self) -> bool { - self.inner() == other.inner() + self.inner == other.inner } } @@ -227,92 +104,31 @@ impl Eq for Fix {} #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct Variable { - pub name: Name, pub index: usize, } -impl Variable { - pub fn new(name: Name, index: usize) -> Self { - Self { name, index } - } - - pub fn name(&self) -> &Name { - &self.name - } - - pub fn index(&self) -> usize { - self.index - } - - pub fn index_mut(&mut self) -> &mut usize { - &mut self.index - } -} - impl Display for Variable { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.name.fmt(f) + write!(f, "'{}", self.index) } } #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct Parameter { - pub name: Name, pub index: usize, } -impl Parameter { - pub const fn new(name: Name, index: usize) -> Self { - Self { name, index } - } - - pub fn name(&self) -> &Name { - &self.name - } - - pub fn index(&self) -> usize { - self.index - } - - pub fn index_mut(&mut self) -> &mut usize { - &mut self.index - } -} - impl Display for Parameter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.name) + write!(f, "<{}>", self.index) } } /// A macro invocation. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Call { pub name: Name, - pub args: Vec<Expression>, - pub span: Option<Span>, -} - -impl Call { - pub fn new(name: Name, args: Vec<Expression>, span: Option<Span>) -> Self { - Self { name, args, span } - } - - pub fn name(&self) -> &Name { - &self.name - } - - pub fn args(&self) -> &[Expression] { - &self.args - } - - pub fn args_mut(&mut self) -> &mut [Expression] { - &mut self.args - } - - pub fn span(&self) -> Option<Span> { - self.span - } + pub args: Vec<NamedExpression>, } impl Display for Call { @@ -335,14 +151,6 @@ impl Display for Call { } } -impl PartialEq for Call { - fn eq(&self, other: &Self) -> bool { - self.name() == other.name() && self.args() == other.args() - } -} - -impl Eq for Call {} - #[derive(Clone, Debug)] pub enum Expression { /// Matches the empty string. @@ -486,20 +294,33 @@ impl From<Call> for Expression { } #[derive(Clone, Debug)] -pub struct Function { - pub name: Name, - pub params: usize, +pub struct NamedExpression { + pub name: Option<Name>, pub expr: Expression, pub span: Option<Span>, } -impl Function { - pub const fn new(name: Name, params: usize, expr: Expression, span: Option<Span>) -> Self { - Self { - name, - params, - expr, - span, +impl Display for NamedExpression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.name { + Some(name) => write!(f, "({} : {})", self.expr, name), + None => self.expr.fmt(f), } } } + +impl PartialEq for NamedExpression { + fn eq(&self, other: &Self) -> bool { + self.expr == other.expr + } +} + +impl Eq for NamedExpression {} + +#[derive(Clone, Debug)] +pub struct Function { + pub name: Name, + pub params: Vec<Option<Name>>, + pub expr: NamedExpression, + pub span: Option<Span>, +} diff --git a/src/chomp/ast/substitute.rs b/src/chomp/ast/substitute.rs new file mode 100644 index 0000000..1a622e1 --- /dev/null +++ b/src/chomp/ast/substitute.rs @@ -0,0 +1,439 @@ +use proc_macro2::Span; + +use crate::chomp::{ + visit::{Folder, Mapper, Visitable}, + Name, +}; + +use super::{ + error::SubstituteError, Alt, Call, Cat, Epsilon, Fix, Function, Literal, NamedExpression, + Parameter, Variable, +}; + +#[derive(Clone, Copy, Debug, Default)] +pub struct DeepenVars { + pub depth: usize, +} + +impl Mapper for DeepenVars { + type Out = (); + + fn map_epsilon( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + _eps: &mut Epsilon, + ) -> Self::Out { + } + + fn map_literal( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + _lit: &mut Literal, + ) -> Self::Out { + } + + fn map_cat( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + cat: &mut Cat, + ) -> Self::Out { + cat.first.map(self); + cat.second.map(self); + + for (_, term) in &mut cat.rest { + term.map(self); + } + } + + fn map_alt( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + alt: &mut Alt, + ) -> Self::Out { + alt.first.map(self); + alt.second.map(self); + + for (_, term) in &mut alt.rest { + term.map(self); + } + } + + fn map_fix( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + fix: &mut Fix, + ) -> Self::Out { + self.depth += 1; + fix.inner.map(self); + self.depth -= 1; + } + + fn map_variable( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + var: &mut Variable, + ) -> Self::Out { + if var.index >= self.depth { + var.index += 1; + } + } + + fn map_parameter( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + _param: &mut Parameter, + ) -> Self::Out { + } + + fn map_call( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + call: &mut Call, + ) -> Self::Out { + for arg in &mut call.args { + arg.map(self) + } + } +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct ShallowVars { + pub depth: usize, +} + +impl Mapper for ShallowVars { + type Out = (); + + fn map_epsilon( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + _eps: &mut Epsilon, + ) -> Self::Out { + } + + fn map_literal( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + _lit: &mut Literal, + ) -> Self::Out { + } + + fn map_cat( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + cat: &mut Cat, + ) -> Self::Out { + cat.first.map(self); + cat.second.map(self); + + for (_, term) in &mut cat.rest { + term.map(self); + } + } + + fn map_alt( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + alt: &mut Alt, + ) -> Self::Out { + alt.first.map(self); + alt.second.map(self); + + for (_, term) in &mut alt.rest { + term.map(self); + } + } + + fn map_fix( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + fix: &mut Fix, + ) -> Self::Out { + self.depth += 1; + fix.inner.map(self); + self.depth -= 1; + } + + fn map_variable( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + var: &mut Variable, + ) -> Self::Out { + if var.index >= self.depth { + var.index -= 1; + } + } + + fn map_parameter( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + _param: &mut Parameter, + ) -> Self::Out { + } + + fn map_call( + &mut self, + _name: &mut Option<Name>, + _span: Option<Span>, + call: &mut Call, + ) -> Self::Out { + for arg in &mut call.args { + arg.map(self) + } + } +} + +#[derive(Clone, Debug)] +pub struct SubstituteParams { + pub params: Vec<NamedExpression>, +} + +impl Folder for SubstituteParams { + type Out = Result<NamedExpression, SubstituteError>; + + fn fold_epsilon(&mut self, name: Option<Name>, span: Option<Span>, eps: Epsilon) -> Self::Out { + Ok(NamedExpression { + name, + expr: eps.into(), + span, + }) + } + + fn fold_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Out { + Ok(NamedExpression { + name, + expr: lit.into(), + span, + }) + } + + fn fold_cat(&mut self, name: Option<Name>, span: Option<Span>, mut cat: Cat) -> Self::Out { + cat.first = Box::new(cat.first.fold(self)?); + cat.second = Box::new(cat.second.fold(self)?); + cat.rest = cat + .rest + .into_iter() + .map(|(p, term)| Ok((p, term.fold(self)?))) + .collect::<Result<_, _>>()?; + Ok(NamedExpression { + name, + expr: cat.into(), + span, + }) + } + + fn fold_alt(&mut self, name: Option<Name>, span: Option<Span>, mut alt: Alt) -> Self::Out { + alt.first = Box::new(alt.first.fold(self)?); + alt.second = Box::new(alt.second.fold(self)?); + alt.rest = alt + .rest + .into_iter() + .map(|(p, term)| Ok((p, term.fold(self)?))) + .collect::<Result<_, _>>()?; + Ok(NamedExpression { + name, + expr: alt.into(), + span, + }) + } + + fn fold_fix(&mut self, name: Option<Name>, span: Option<Span>, mut fix: Fix) -> Self::Out { + for param in &mut self.params { + param.map(&mut DeepenVars::default()); + } + + fix.inner = Box::new(fix.inner.fold(self)?); + + for param in &mut self.params { + param.map(&mut ShallowVars::default()); + } + + Ok(NamedExpression { + name, + expr: fix.into(), + span, + }) + } + + fn fold_variable( + &mut self, + name: Option<Name>, + span: Option<Span>, + var: Variable, + ) -> Self::Out { + Ok(NamedExpression { + name, + expr: var.into(), + span, + }) + } + + fn fold_parameter( + &mut self, + name: Option<Name>, + span: Option<Span>, + param: Parameter, + ) -> Self::Out { + let mut expr = self + .params + .get(param.index) + .cloned() + .ok_or_else(|| SubstituteError::FreeParameter { param, span, name: name.clone() })?; + expr.name = expr.name.or(name); + expr.span = expr.span.or(span); + Ok(expr) + } + + fn fold_call(&mut self, name: Option<Name>, span: Option<Span>, mut call: Call) -> Self::Out { + call.args = call + .args + .into_iter() + .map(|arg| arg.fold(self)) + .collect::<Result<_, _>>()?; + Ok(NamedExpression { + name, + expr: call.into(), + span, + }) + } +} + +#[derive(Clone, Debug)] +pub struct InlineCalls { + pub function: Function, +} + +impl Folder for InlineCalls { + type Out = Result<NamedExpression, SubstituteError>; + + fn fold_epsilon(&mut self, name: Option<Name>, span: Option<Span>, eps: Epsilon) -> Self::Out { + Ok(NamedExpression { + name, + expr: eps.into(), + span, + }) + } + + fn fold_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Out { + Ok(NamedExpression { + name, + expr: lit.into(), + span, + }) + } + + fn fold_cat(&mut self, name: Option<Name>, span: Option<Span>, mut cat: Cat) -> Self::Out { + cat.first = Box::new(cat.first.fold(self)?); + cat.second = Box::new(cat.second.fold(self)?); + cat.rest = cat + .rest + .into_iter() + .map(|(punct, term)| Ok((punct, term.fold(self)?))) + .collect::<Result<_, _>>()?; + + Ok(NamedExpression { + name, + expr: cat.into(), + span, + }) + } + + fn fold_alt(&mut self, name: Option<Name>, span: Option<Span>, mut alt: Alt) -> Self::Out { + alt.first = Box::new(alt.first.fold(self)?); + alt.second = Box::new(alt.second.fold(self)?); + alt.rest = alt + .rest + .into_iter() + .map(|(punct, term)| Ok((punct, term.fold(self)?))) + .collect::<Result<_, _>>()?; + + Ok(NamedExpression { + name, + expr: alt.into(), + span, + }) + } + + fn fold_fix(&mut self, name: Option<Name>, span: Option<Span>, mut fix: Fix) -> Self::Out { + fix.inner = Box::new(fix.inner.fold(self)?); + + Ok(NamedExpression { + name, + expr: fix.into(), + span, + }) + } + + fn fold_variable( + &mut self, + name: Option<Name>, + span: Option<Span>, + var: Variable, + ) -> Self::Out { + Ok(NamedExpression { + name, + expr: var.into(), + span, + }) + } + + fn fold_parameter( + &mut self, + name: Option<Name>, + span: Option<Span>, + param: Parameter, + ) -> Self::Out { + Ok(NamedExpression { + name, + expr: param.into(), + span, + }) + } + + fn fold_call(&mut self, name: Option<Name>, span: Option<Span>, mut call: Call) -> Self::Out { + call.args = call + .args + .into_iter() + .map(|arg| arg.fold(self)) + .collect::<Result<_, _>>()?; + + if call.name != self.function.name { + Ok(NamedExpression { + name, + expr: call.into(), + span, + }) + } else if call.args.len() != self.function.params.len() { + Err(SubstituteError::WrongArgCount { + call, + expected: self.function.params.len(), + span, + }) + } else { + let mut expr = self + .function + .expr + .clone() + .fold(&mut SubstituteParams { params: call.args })?; + expr.name = Some(expr.name.or(name).unwrap_or(call.name)); + expr.span = expr.span.or(span); + + Ok(expr) + } + } +} diff --git a/src/chomp/check/check.rs b/src/chomp/check/check.rs deleted file mode 100644 index 8729565..0000000 --- a/src/chomp/check/check.rs +++ /dev/null @@ -1,81 +0,0 @@ -use super::{ - super::{ - ast::{Alt, Call, Cat, Epsilon, Fix, Literal, Parameter, Variable}, - context::Context, - error::TypeError, - typed::{self, TypedExpression}, - visit::{Folder, Visitable, Visitor}, - }, - TypeInfer, -}; - -#[derive(Debug)] -pub struct TypeCheck<'a> { - pub context: &'a mut Context, -} - -impl Folder for TypeCheck<'_> { - type Out = Result<TypedExpression, TypeError>; - - fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { - Ok(typed::Epsilon::from(eps).into()) - } - - fn fold_literal(&mut self, lit: Literal) -> Self::Out { - Ok(typed::Literal::from(lit).into()) - } - - fn fold_cat(&mut self, cat: Cat) -> Self::Out { - let ty = TypeInfer { - context: self.context, - } - .visit_cat(&cat)?; - let fst = cat.fst.fold(self)?; - let snd = cat.snd; - let snd = self - .context - .with_unguard(|context| snd.fold(&mut TypeCheck { context }))?; - - Ok(typed::Cat::new(fst, cat.punct, snd, ty).into()) - } - - fn fold_alt(&mut self, alt: Alt) -> Self::Out { - let ty = TypeInfer { - context: self.context, - } - .visit_alt(&alt)?; - let left = alt.left.fold(self)?; - let right = alt.right.fold(self)?; - - Ok(typed::Alt::new(left, alt.punct, right, ty).into()) - } - - fn fold_fix(&mut self, fix: Fix) -> Self::Out { - let ty = TypeInfer { - context: self.context, - } - .visit_fix(&fix)?; - let inner = fix.inner; - let inner = self - .context - .with_variable_type(ty.clone(), |context| inner.fold(&mut TypeCheck { context }))?; - - Ok(typed::Fix::new(fix.arg, inner, fix.span, ty).into()) - } - - fn fold_variable(&mut self, var: Variable) -> Self::Out { - let ty = TypeInfer { - context: self.context, - } - .visit_variable(&var)?; - Ok(typed::Variable::new(var, ty).into()) - } - - fn fold_parameter(&mut self, _param: Parameter) -> Self::Out { - todo!() - } - - fn fold_call(&mut self, _call: Call) -> Self::Out { - todo!() - } -} diff --git a/src/chomp/check/closed.rs b/src/chomp/check/closed.rs deleted file mode 100644 index 07ef7ac..0000000 --- a/src/chomp/check/closed.rs +++ /dev/null @@ -1,50 +0,0 @@ -use super::super::{ - ast::{Alt, Call, Cat, Epsilon, Fix, Literal, Parameter, Variable}, - visit::{Visitable, Visitor}, -}; - -/// Test if term is closed for a context with `depth` variables. -#[derive(Copy, Clone, Debug, Default)] -pub struct Closed { - depth: usize, - params: usize, -} - -impl Visitor for Closed { - type Out = bool; - - fn visit_epsilon(&mut self, _eps: &Epsilon) -> Self::Out { - true - } - - fn visit_literal(&mut self, _lit: &Literal) -> Self::Out { - true - } - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out { - cat.first().visit(self) && cat.second().visit(self) - } - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out { - alt.left().visit(self) && alt.right().visit(self) - } - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out { - self.depth += 1; - let res = fix.inner().visit(self); - self.depth -= 1; - res - } - - fn visit_variable(&mut self, var: &Variable) -> Self::Out { - var.index() < self.depth - } - - fn visit_parameter(&mut self, param: &Parameter) -> Self::Out { - param.index() < self.params - } - - fn visit_call(&mut self, call: &Call) -> Self::Out { - call.args().iter().all(|arg| arg.visit(self)) - } -} diff --git a/src/chomp/check/deepen.rs b/src/chomp/check/deepen.rs deleted file mode 100644 index b9f606d..0000000 --- a/src/chomp/check/deepen.rs +++ /dev/null @@ -1,47 +0,0 @@ -use super::super::{ - ast::{Alt, Call, Cat, Epsilon, Fix, Literal, Parameter, Variable}, - visit::{Mapper, Visitable}, -}; - -#[derive(Clone, Copy, Debug, Default)] -pub struct DeepenVars { - depth: usize, -} - -impl Mapper for DeepenVars { - type Out = (); - - fn map_epsilon(&mut self, _: &mut Epsilon) -> Self::Out {} - - fn map_literal(&mut self, _: &mut Literal) -> Self::Out {} - - fn map_cat(&mut self, cat: &mut Cat) -> Self::Out { - cat.first_mut().map(self); - cat.second_mut().map(self); - } - - fn map_alt(&mut self, alt: &mut Alt) -> Self::Out { - alt.left_mut().map(self); - alt.right_mut().map(self); - } - - fn map_fix(&mut self, fix: &mut Fix) -> Self::Out { - self.depth += 1; - fix.inner_mut().map(self); - self.depth -= 1; - } - - fn map_variable(&mut self, bind: &mut Variable) -> Self::Out { - if bind.index() >= self.depth { - *bind.index_mut() += 1; - } - } - - fn map_parameter(&mut self, _param: &mut Parameter) -> Self::Out {} - - fn map_call(&mut self, call: &mut Call) -> Self::Out { - for arg in call.args_mut() { - arg.map(self); - } - } -} diff --git a/src/chomp/check/infer.rs b/src/chomp/check/infer.rs deleted file mode 100644 index 941ddba..0000000 --- a/src/chomp/check/infer.rs +++ /dev/null @@ -1,92 +0,0 @@ -use super::super::{ - ast::{Alt, Call, Cat, Epsilon, Fix, Literal, Parameter, Variable}, - context::Context, - error::{AltError, CatError, FixError, TypeError}, - set::{FirstSet, FlastSet}, - typed::Type, - visit::{Visitable, Visitor}, -}; - -#[derive(Debug)] -pub struct TypeInfer<'a> { - pub context: &'a mut Context, -} - -impl Visitor for TypeInfer<'_> { - type Out = Result<Type, TypeError>; - - fn visit_epsilon(&mut self, _eps: &Epsilon) -> Self::Out { - Ok(Type::new(true, FirstSet::default(), FlastSet::default())) - } - - fn visit_literal(&mut self, lit: &Literal) -> Self::Out { - Ok(Type::of_str(&lit.value())) - } - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out { - let first = cat.first().visit(self)?; - let second = self - .context - .with_unguard(|context| cat.second().visit(&mut TypeInfer { context }))?; - - if first.nullable() { - Err(TypeError::Cat(CatError::FirstNullable(cat.clone()))) - } else if !first - .flast_set() - .intersect_first(second.first_set()) - .is_empty() - { - Err(TypeError::Cat(CatError::FirstFlastOverlap(cat.clone()))) - } else { - Ok(first.cat(second)) - } - } - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out { - let left = alt.left().visit(self)?; - let right = alt.right().visit(self)?; - - if left.nullable() && right.nullable() { - Err(TypeError::Alt(AltError::BothNullable(alt.clone()))) - } else if !left.first_set().intersect(right.first_set()).is_empty() { - Err(TypeError::Alt(AltError::FirstOverlap(alt.clone()))) - } else { - Ok(left.alt(right)) - } - } - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out { - let mut res = Type::default(); - let mut last = None; - - while last.map(|r| r != res).unwrap_or(true) { - last = Some(res); - res = self - .context - .with_variable_type(last.as_ref().cloned().unwrap(), |context| { - fix.inner().visit(&mut TypeInfer { context }) - }) - .map_err(|e| { - TypeError::Fix(FixError( - fix.clone(), - last.as_ref().cloned().unwrap(), - Box::new(e), - )) - })?; - } - - Ok(res) - } - - fn visit_variable(&mut self, var: &Variable) -> Self::Out { - Ok(self.context.get_variable_type(&var)?.clone()) - } - - fn visit_parameter(&mut self, _param: &Parameter) -> Self::Out { - todo!() - } - - fn visit_call(&mut self, _call: &Call) -> Self::Out { - todo!() - } -} diff --git a/src/chomp/check/inline.rs b/src/chomp/check/inline.rs deleted file mode 100644 index da501f1..0000000 --- a/src/chomp/check/inline.rs +++ /dev/null @@ -1,349 +0,0 @@ -use super::{ - super::{ - ast::{Alt, Call, Cat, Epsilon, Expression, Fix, Function, Literal, Parameter, Variable}, - error::SubstituteError, - visit::{Folder, Visitable}, - }, - SubstituteParams, -}; - -#[derive(Clone, Debug)] -pub struct InlineCalls { - function: Function, -} - -impl InlineCalls { - pub fn new(function: Function) -> Self { - Self { function } - } -} - -impl Folder for InlineCalls { - type Out = Result<Expression, SubstituteError>; - - fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { - Ok(eps.into()) - } - - fn fold_literal(&mut self, lit: Literal) -> Self::Out { - Ok(lit.into()) - } - - fn fold_cat(&mut self, mut cat: Cat) -> Self::Out { - cat.fst = Box::new(cat.fst.fold(self)?); - cat.snd = Box::new(cat.snd.fold(self)?); - Ok(cat.into()) - } - - fn fold_alt(&mut self, mut alt: Alt) -> Self::Out { - alt.left = Box::new(alt.left.fold(self)?); - alt.right = Box::new(alt.right.fold(self)?); - Ok(alt.into()) - } - - fn fold_fix(&mut self, mut fix: Fix) -> Self::Out { - fix.inner = Box::new(fix.inner.fold(self)?); - Ok(fix.into()) - } - - fn fold_variable(&mut self, var: Variable) -> Self::Out { - Ok(var.into()) - } - - fn fold_parameter(&mut self, param: Parameter) -> Self::Out { - Ok(param.into()) - } - - fn fold_call(&mut self, mut call: Call) -> Self::Out { - call.args = call - .args - .into_iter() - .map(|arg| arg.fold(self)) - .collect::<Result<_, _>>()?; - - if call.name != self.function.name { - Ok(call.into()) - } else if call.args.len() != self.function.params { - Err(SubstituteError::WrongArgCount { - call, - expected: self.function.params, - }) - } else { - self.function - .expr - .clone() - .fold(&mut SubstituteParams::new(call.args)) - } - } -} - -#[cfg(test)] -mod tests { - use crate::chomp::Name; - - use super::*; - - fn opt() -> Function { - Function::new( - Name::Spanless("opt".to_string()), - 1, - Expression::Alt(Alt::new( - Expression::Epsilon(None), - None, - Parameter::new(Name::Spanless("x".to_string()), 0).into(), - )), - None, - ) - } - - #[test] - fn test_inline_absent() { - let expr = Epsilon::default(); - let inlined = expr.fold(&mut InlineCalls::new(opt())); - assert_eq!(inlined, Ok(Epsilon::default().into())) - } - - #[test] - fn test_inline_in_fix() { - let expr = Fix::new( - Name::Spanless("rec".to_string()), - Call::new( - Name::Spanless("opt".to_string()), - vec![Variable::new(Name::Spanless("rec".to_string()), 0).into()], - None, - ) - .into(), - None, - ); - let inlined = expr.fold(&mut InlineCalls::new(opt())); - assert_eq!( - inlined, - Ok(Fix::new( - Name::Spanless("rec".to_string()), - Alt::new( - Epsilon::default().into(), - None, - Variable::new(Name::Spanless("rec".to_string()), 0).into() - ) - .into(), - None - ) - .into()) - ) - } - - #[test] - fn test_inline_deepens_vars() { - let function = Function::new( - Name::Spanless("plus".into()), - 1, - Fix::new( - Name::Spanless("rec".to_string()), - Cat::new( - Parameter::new(Name::Spanless("x".to_string()), 0).into(), - None, - Variable::new(Name::Spanless("rec".to_string()), 0).into(), - ) - .into(), - None, - ) - .into(), - None, - ); - let expr = Fix::new( - Name::Spanless("var".to_string()), - Call::new( - Name::Spanless("plus".into()), - vec![Variable::new(Name::Spanless("var".to_string()), 0).into()], - None, - ) - .into(), - None, - ); - let inlined = expr.fold(&mut InlineCalls::new(function)); - assert_eq!( - inlined, - Ok(Fix::new( - Name::Spanless("var".to_string()), - Fix::new( - Name::Spanless("rec".to_string()), - Cat::new( - Variable::new(Name::Spanless("var".to_string()), 1).into(), - None, - Variable::new(Name::Spanless("rec".to_string()), 0).into(), - ) - .into(), - None, - ) - .into(), - None - ) - .into()) - ) - } - - #[test] - fn test_inline_resets_vars() { - let function = Function::new( - Name::Spanless("plus".into()), - 1, - Cat::new( - Fix::new( - Name::Spanless("rec".to_string()), - Epsilon::default().into(), - None, - ) - .into(), - None, - Parameter::new(Name::Spanless("x".to_string()), 0).into(), - ) - .into(), - None, - ); - let expr = Fix::new( - Name::Spanless("var".to_string()), - Call::new( - Name::Spanless("plus".into()), - vec![Variable::new(Name::Spanless("var".to_string()), 0).into()], - None, - ) - .into(), - None, - ); - let inlined = expr.fold(&mut InlineCalls::new(function)); - assert_eq!( - inlined, - Ok(Fix::new( - Name::Spanless("var".to_string()), - Cat::new( - Fix::new( - Name::Spanless("rec".to_string()), - Epsilon::default().into(), - None, - ) - .into(), - None, - Variable::new(Name::Spanless("var".to_string()), 0).into(), - ) - .into(), - None - ) - .into()) - ) - } - - #[test] - fn test_inline_double_subst() { - let expr = Call::new( - Name::Spanless("opt".to_string()), - vec![Call::new( - Name::Spanless("opt".to_string()), - vec![Literal::Spanless("x".to_string()).into()], - None, - ) - .into()], - None, - ); - let inlined = expr.fold(&mut InlineCalls::new(opt())); - assert_eq!( - inlined, - Ok(Alt::new( - Epsilon::default().into(), - None, - Alt::new( - Epsilon::default().into(), - None, - Literal::Spanless("x".to_string()).into() - ) - .into() - ) - .into()) - ) - } - - #[test] - fn test_inline_call_args() { - let expr = Fix::new( - Name::Spanless("rec".to_string()), - Cat::new( - Literal::Spanless("a".to_string()).into(), - None, - Call::new( - Name::Spanless("opt".to_string()), - vec![Cat::new( - Cat::new( - Literal::Spanless("a".to_string()).into(), - None, - Fix::new( - Name::Spanless("star".to_string()), - Call::new( - Name::Spanless("opt".to_string()), - vec![Cat::new( - Literal::Spanless(" ".to_string()).into(), - None, - Variable::new(Name::Spanless("star".to_string()), 0).into(), - ) - .into()], - None, - ) - .into(), - None, - ) - .into(), - ) - .into(), - None, - Variable::new(Name::Spanless("rec".to_string()), 0).into(), - ) - .into()], - None, - ) - .into(), - ) - .into(), - None, - ); - let inlined = expr.fold(&mut InlineCalls::new(opt())); - assert_eq!(inlined, - Ok(Fix::new( - Name::Spanless("rec".to_string()), - Cat::new( - Literal::Spanless("a".to_string()).into(), - None, - Alt::new( - Epsilon::default().into(), - None, - Cat::new( - Cat::new( - Literal::Spanless("a".to_string()).into(), - None, - Fix::new( - Name::Spanless("star".to_string()), - Alt::new( - Epsilon::default().into(), - None, - Cat::new( - Literal::Spanless(" ".to_string()).into(), - None, - Variable::new(Name::Spanless("star".to_string()), 0).into(), - ) - .into() - ) - .into(), - None, - ) - .into(), - ) - .into(), - None, - Variable::new(Name::Spanless("rec".to_string()), 0).into(), - ) - .into(), - ) - .into(), - ) - .into(), - None, - ).into())) - } -} diff --git a/src/chomp/check/mod.rs b/src/chomp/check/mod.rs deleted file mode 100644 index c9aeda4..0000000 --- a/src/chomp/check/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -mod check; -mod closed; -mod deepen; -mod infer; -mod inline; -mod shallow; -mod spanning; -mod substitute; - -pub use check::TypeCheck; -pub use closed::Closed; -pub use deepen::DeepenVars; -pub use infer::TypeInfer; -pub use inline::InlineCalls; -pub use shallow::ShallowVars; -pub use spanning::Spanning; -pub use substitute::SubstituteParams; diff --git a/src/chomp/check/shallow.rs b/src/chomp/check/shallow.rs deleted file mode 100644 index e5cc1a1..0000000 --- a/src/chomp/check/shallow.rs +++ /dev/null @@ -1,47 +0,0 @@ -use super::super::{ - ast::{Alt, Call, Cat, Epsilon, Fix, Literal, Parameter, Variable}, - visit::{Mapper, Visitable}, -}; - -#[derive(Clone, Copy, Debug, Default)] -pub struct ShallowVars { - depth: usize, -} - -impl Mapper for ShallowVars { - type Out = (); - - fn map_epsilon(&mut self, _: &mut Epsilon) -> Self::Out {} - - fn map_literal(&mut self, _: &mut Literal) -> Self::Out {} - - fn map_cat(&mut self, cat: &mut Cat) -> Self::Out { - cat.first_mut().map(self); - cat.second_mut().map(self); - } - - fn map_alt(&mut self, alt: &mut Alt) -> Self::Out { - alt.left_mut().map(self); - alt.right_mut().map(self); - } - - fn map_fix(&mut self, fix: &mut Fix) -> Self::Out { - self.depth += 1; - fix.inner_mut().map(self); - self.depth -= 1; - } - - fn map_variable(&mut self, bind: &mut Variable) -> Self::Out { - if bind.index() > self.depth { - *bind.index_mut() -= 1; - } - } - - fn map_parameter(&mut self, _param: &mut Parameter) -> Self::Out {} - - fn map_call(&mut self, call: &mut Call) -> Self::Out { - for arg in call.args_mut() { - arg.map(self); - } - } -} diff --git a/src/chomp/check/spanning.rs b/src/chomp/check/spanning.rs deleted file mode 100644 index 59c3811..0000000 --- a/src/chomp/check/spanning.rs +++ /dev/null @@ -1,59 +0,0 @@ -use proc_macro2::Span; - -use super::super::{ - ast::{Alt, Call, Cat, Epsilon, Fix, Literal, Parameter, Variable}, - visit::{Visitable, Visitor}, -}; - -#[derive(Clone, Copy, Debug)] -pub struct Spanning; - -impl Visitor for Spanning { - type Out = Option<Span>; - - fn visit_epsilon(&mut self, eps: &Epsilon) -> Self::Out { - eps.map(|e| e.span) - } - - fn visit_literal(&mut self, lit: &Literal) -> Self::Out { - lit.span() - } - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out { - let fst = cat.first().visit(self); - let snd = cat.second().visit(self); - - match (fst, snd) { - (None, snd) => snd, - (Some(fst), None) => Some(fst), - (Some(fst), Some(snd)) => fst.join(snd), - } - } - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out { - let left = alt.left().visit(self); - let right = alt.right().visit(self); - - match (left, right) { - (None, right) => right, - (Some(left), None) => Some(left), - (Some(left), Some(right)) => left.join(right), - } - } - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out { - fix.span() - } - - fn visit_variable(&mut self, var: &Variable) -> Self::Out { - var.name().span() - } - - fn visit_parameter(&mut self, param: &Parameter) -> Self::Out { - param.name().span() - } - - fn visit_call(&mut self, call: &Call) -> Self::Out { - call.span() - } -} diff --git a/src/chomp/check/substitute.rs b/src/chomp/check/substitute.rs deleted file mode 100644 index 32595b1..0000000 --- a/src/chomp/check/substitute.rs +++ /dev/null @@ -1,77 +0,0 @@ -use super::{ - super::{ - ast::{Alt, Call, Cat, Epsilon, Expression, Fix, Literal, Parameter, Variable}, - error::SubstituteError, - visit::{Folder, Visitable}, - }, - DeepenVars, ShallowVars, -}; - -#[derive(Clone, Debug)] -pub struct SubstituteParams { - params: Vec<Expression>, -} - -impl SubstituteParams { - pub fn new(params: Vec<Expression>) -> Self { - Self { params } - } -} - -impl Folder for SubstituteParams { - type Out = Result<Expression, SubstituteError>; - - fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { - Ok(eps.into()) - } - - fn fold_literal(&mut self, lit: Literal) -> Self::Out { - Ok(lit.into()) - } - - fn fold_cat(&mut self, mut cat: Cat) -> Self::Out { - cat.fst = Box::new(cat.fst.fold(self)?); - cat.snd = Box::new(cat.snd.fold(self)?); - Ok(cat.into()) - } - - fn fold_alt(&mut self, mut alt: Alt) -> Self::Out { - alt.left = Box::new(alt.left.fold(self)?); - alt.right = Box::new(alt.right.fold(self)?); - Ok(alt.into()) - } - - fn fold_fix(&mut self, mut fix: Fix) -> Self::Out { - for param in &mut self.params { - param.map(&mut DeepenVars::default()); - } - - fix.inner = Box::new(fix.inner.fold(self)?); - - for param in &mut self.params { - param.map(&mut ShallowVars::default()); - } - - Ok(fix.into()) - } - - fn fold_variable(&mut self, var: Variable) -> Self::Out { - Ok(Expression::Variable(var)) - } - - fn fold_call(&mut self, mut call: Call) -> Self::Out { - call.args = call - .args - .into_iter() - .map(|arg| arg.fold(self)) - .collect::<Result<_, _>>()?; - Ok(call.into()) - } - - fn fold_parameter(&mut self, param: Parameter) -> Self::Out { - self.params - .get(param.index()) - .cloned() - .ok_or(SubstituteError::FreeParameter(param)) - } -} diff --git a/src/chomp/error.rs b/src/chomp/error.rs deleted file mode 100644 index e7e4660..0000000 --- a/src/chomp/error.rs +++ /dev/null @@ -1,402 +0,0 @@ -use std::{ - error::Error, - fmt::{self, Display}, -}; - -use proc_macro2::Span; - -use super::{ - ast::{Alt, Call, Cat, Fix, Parameter, Variable}, - check::Spanning, - typed::Type, - visit::Visitable, -}; - -/// A type error when using a fix point variable. -#[derive(Debug, Eq, PartialEq)] -pub enum VariableError { - /// Usage of a free variable. - FreeVariable(Variable), - /// Usage of a guarded variable. - GuardedVariable(Variable), -} - -impl From<VariableError> for syn::Error { - fn from(other: VariableError) -> Self { - match other { - VariableError::FreeVariable(_) => todo!(), - VariableError::GuardedVariable(_) => todo!(), - } - } -} - -impl Display for VariableError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::FreeVariable(var) => { - let start = var.name().span().unwrap_or_else(Span::call_site).start(); - write!( - f, - "{}:{}: unbound variable '{}'", - start.line, - start.column, - var.name() - ) - } - Self::GuardedVariable(var) => { - let start = var.name().span().unwrap_or_else(Span::call_site).start(); - write!( - f, - "{}:{}: variable '{}' is guarded", - start.line, - start.column, - var.name() - ) - } - } - } -} - -impl Error for VariableError {} - -/// A type error when concatenating two terms. -#[derive(Debug, Eq, PartialEq)] -pub enum CatError { - /// The first term was unexpectedly nullable. - FirstNullable(Cat), - /// The flast set of the first term intersects the first set of the second. - FirstFlastOverlap(Cat), -} - -impl From<CatError> for syn::Error { - fn from(other: CatError) -> Self { - match other { - CatError::FirstNullable(cat) => { - let mut err = Self::new( - cat.punct().map_or_else(Span::call_site, |p| p.span), - "first item in sequence cannot accept the empty string", - ); - err.combine(Self::new( - cat.first() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site), - "this can accept empty string", - )); - err - } - CatError::FirstFlastOverlap(cat) => { - let mut err = Self::new( - cat.punct().map_or_else(Span::call_site, |p| p.span), - "cannot decide whether to repeat first or start accepting second", - ); - err.combine(Self::new( - cat.first() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site), - "a repetition of this", - )); - err.combine(Self::new( - cat.second() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site), - "collides with the start of this", - )); - err - } - } - } -} - -impl Display for CatError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::FirstNullable(cat) => { - let start = cat.punct().map_or_else(Span::call_site, |p| p.span).start(); - let fst_start = cat - .first() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site) - .start(); - write!( - f, - "{}:{}: term `{}' ({}:{}) accepts the empty string", - start.line, - start.column, - cat.first(), - fst_start.line, - fst_start.column - ) - } - Self::FirstFlastOverlap(cat) => { - let start = cat.punct().map_or_else(Span::call_site, |p| p.span).start(); - let fst_start = cat - .first() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site) - .start(); - let snd_start = cat - .second() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site) - .start(); - write!( - f, - "{}:{}: cannot decide whether to repeat `{}' ({}:{}) or start accepting `{}' ({}:{}).", - start.line, - start.column, - cat.first(), - fst_start.line, - fst_start.column, - cat.second(), - snd_start.line, - snd_start.column - ) - } - } - } -} - -impl Error for CatError {} - -/// A type error when alternating two terms. -#[derive(Debug, Eq, PartialEq)] -pub enum AltError { - /// Both terms are nullable. - BothNullable(Alt), - /// The first sets of the two terms intersect. - FirstOverlap(Alt), -} - -impl From<AltError> for syn::Error { - fn from(other: AltError) -> Self { - match other { - AltError::BothNullable(alt) => { - let mut err = Self::new( - alt.punct().map_or_else(Span::call_site, |p| p.span), - "both branches accept the empty string", - ); - err.combine(Self::new( - alt.left() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site), - "left branch accepts the empty string", - )); - err.combine(Self::new( - alt.right() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site), - "right branch accepts the empty string", - )); - err - } - AltError::FirstOverlap(alt) => { - let mut err = Self::new( - alt.punct().map_or_else(Span::call_site, |p| p.span), - "cannot decide whether to accept the left or right branch", - ); - err.combine(Self::new( - alt.left() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site), - "left branch starts with a character", - )); - err.combine(Self::new( - alt.right() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site), - "right branch starts with the same character", - )); - err - } - } - } -} - -impl Display for AltError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::BothNullable(alt) => { - let start = alt.punct().map_or_else(Span::call_site, |p| p.span).start(); - let left_start = alt - .left() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site) - .start(); - let right_start = alt - .right() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site) - .start(); - write!( - f, - "{}:{}: both `{}' ({}:{}) and `{}' ({}:{}) accept the empty string.", - start.line, - start.column, - alt.left(), - left_start.line, - left_start.column, - alt.right(), - right_start.line, - right_start.column, - ) - } - Self::FirstOverlap(alt) => { - let start = alt.punct().map_or_else(Span::call_site, |p| p.span).start(); - let left_start = alt - .left() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site) - .start(); - let right_start = alt - .right() - .visit(&mut Spanning) - .unwrap_or_else(Span::call_site) - .start(); - write!( - f, - "{}:{}: cannot decide whether to accept `{}' ({}:{}) or `{}' ({}:{}).", - start.line, - start.column, - alt.left(), - left_start.line, - left_start.column, - alt.right(), - right_start.line, - right_start.column, - ) - } - } - } -} - -impl Error for AltError {} - -#[derive(Debug, Eq, PartialEq)] -pub struct FixError(pub Fix, pub Type, pub Box<TypeError>); - -impl From<FixError> for syn::Error { - fn from(e: FixError) -> Self { - let mut error = Self::from(*e.2); - error.combine(Self::new( - e.0.span().unwrap_or_else(Span::call_site), - format!("assuming `{}' has type {:?}.", e.0.arg(), e.1), - )); - error - } -} - -impl Display for FixError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.2.fmt(f)?; - let start = self.0.span().unwrap_or_else(Span::call_site).start(); - write!( - f, - "\n{}:{}: assuming `{}' has type {:?}.", - start.line, - start.column, - self.0.arg(), - self.1 - ) - } -} - -impl Error for FixError {} - -#[derive(Debug, Eq, PartialEq)] -pub enum TypeError { - Cat(CatError), - Alt(AltError), - Variable(VariableError), - Fix(FixError), -} - -impl From<CatError> for TypeError { - fn from(other: CatError) -> Self { - Self::Cat(other) - } -} - -impl From<AltError> for TypeError { - fn from(other: AltError) -> Self { - Self::Alt(other) - } -} - -impl From<VariableError> for TypeError { - fn from(other: VariableError) -> Self { - Self::Variable(other) - } -} - -impl From<TypeError> for syn::Error { - fn from(other: TypeError) -> Self { - match other { - TypeError::Cat(e) => e.into(), - TypeError::Alt(e) => e.into(), - TypeError::Variable(e) => e.into(), - TypeError::Fix(e) => e.into(), - } - } -} - -impl Display for TypeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Cat(e) => e.fmt(f), - Self::Alt(e) => e.fmt(f), - Self::Variable(e) => e.fmt(f), - Self::Fix(e) => e.fmt(f), - } - } -} - -impl Error for TypeError {} - -#[derive(Debug, Eq, PartialEq)] -pub enum SubstituteError { - FreeParameter(Parameter), - WrongArgCount { call: Call, expected: usize }, -} - -impl From<SubstituteError> for syn::Error { - fn from(e: SubstituteError) -> Self { - match e { - SubstituteError::FreeParameter(param) => { - Self::new(param.name().span().unwrap_or_else(Span::call_site), format!("undeclared variable `{}'", param.name())) - } - SubstituteError::WrongArgCount { call, expected } => { - Self::new(call.span().unwrap_or_else(Span::call_site), format!("wrong number of arguments. Expected {}, got {}", expected, call.args().len())) - } - } - } -} - -impl Display for SubstituteError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::FreeParameter(param) => { - let start = param.name().span().unwrap_or_else(Span::call_site).start(); - write!( - f, - "{}:{}: undeclared variable `{}'", - start.line, - start.column, - param.name() - ) - } - SubstituteError::WrongArgCount { call, expected } => { - let start = call.span().unwrap_or_else(Span::call_site).start(); - write!( - f, - "{}:{}: wrong number of arguments. Expected {}, got {}", - start.line, - start.column, - expected, - call.args().len() - ) - } - } - } -} - -impl Error for SubstituteError {} diff --git a/src/chomp/mod.rs b/src/chomp/mod.rs index 1e30738..79b4fac 100644 --- a/src/chomp/mod.rs +++ b/src/chomp/mod.rs @@ -1,12 +1,10 @@ use std::{fmt, hash}; +use heck::{CamelCase, SnakeCase}; use proc_macro2::{Ident, Span}; use syn::ext::IdentExt; pub mod ast; -pub mod check; -pub mod context; -pub mod error; pub mod set; pub mod typed; pub mod visit; @@ -25,7 +23,7 @@ impl Name { } } - pub fn as_ident(self, span: Span) -> Ident { + pub fn into_ident(self, span: Span) -> Ident { match self { Self::Spanned(i) => i, Self::Spanless(s) => Ident::new(&s, span), @@ -33,6 +31,36 @@ impl Name { } } +impl CamelCase for Name { + fn to_camel_case(&self) -> Self::Owned { + match self { + Self::Spanned(ident) => { + let span = ident.span(); + let name = ident.unraw().to_string(); + Ident::new(&name.to_camel_case(), span).into() + } + Name::Spanless(name) => { + name.to_camel_case().into() + } + } + } +} + +impl SnakeCase for Name { + fn to_snake_case(&self) -> Self::Owned { + match self { + Self::Spanned(ident) => { + let span = ident.span(); + let name = ident.unraw().to_string(); + Ident::new(&name.to_snake_case(), span).into() + } + Name::Spanless(name) => { + name.to_snake_case().into() + } + } + } +} + impl PartialEq for Name { fn eq(&self, other: &Self) -> bool { match (self, other) { diff --git a/src/chomp/typed.rs b/src/chomp/typed.rs deleted file mode 100644 index db07552..0000000 --- a/src/chomp/typed.rs +++ /dev/null @@ -1,323 +0,0 @@ -use std::hash::{Hash, Hasher}; - -use proc_macro2::Span; -use syn::Token; - -use super::{ - ast, - set::{FirstSet, FlastSet}, - Name, -}; - -#[derive(Debug, Default, Clone, Eq, Hash, PartialEq)] -pub struct Type { - nullable: bool, - first_set: FirstSet, - flast_set: FlastSet, -} - -impl Type { - pub const fn new(nullable: bool, first_set: FirstSet, flast_set: FlastSet) -> Self { - Self { - nullable, - first_set, - flast_set, - } - } - - pub fn of_str(s: &str) -> Self { - Self { - nullable: s.is_empty(), - first_set: FirstSet::of_str(s), - flast_set: FlastSet::default(), - } - } - - pub fn nullable(&self) -> bool { - self.nullable - } - - pub fn first_set(&self) -> &FirstSet { - &self.first_set - } - - pub fn flast_set(&self) -> &FlastSet { - &self.flast_set - } - - pub fn cat(self, other: Self) -> Self { - Self { - nullable: self.nullable && other.nullable, - first_set: self.first_set.union(if self.nullable { - other.first_set.clone() - } else { - FirstSet::default() - }), - flast_set: other.flast_set.union(if other.nullable { - self.flast_set.union_first(other.first_set) - } else { - FlastSet::default() - }), - } - } - - pub fn alt(self, other: Self) -> Self { - Self { - nullable: self.nullable || other.nullable, - first_set: self.first_set.union(other.first_set), - flast_set: self.flast_set.union(other.flast_set), - } - } -} - -#[derive(Debug, Eq, Hash, PartialEq)] -pub struct Epsilon { - inner: Option<Token![_]>, - ty: Type, -} - -impl From<ast::Epsilon> for Epsilon { - fn from(inner: ast::Epsilon) -> Self { - Self { - inner, - ty: Type::new(true, FirstSet::default(), FlastSet::default()), - } - } -} - -#[derive(Debug, Eq, Hash, PartialEq)] -pub struct Literal { - inner: ast::Literal, - ty: Type, -} - -impl Literal { - pub fn inner(&self) -> &ast::Literal { - &self.inner - } - - pub fn span(&self) -> Option<Span> { - self.inner.span() - } -} - -impl From<ast::Literal> for Literal { - fn from(inner: ast::Literal) -> Self { - let ty = Type::of_str(&inner.value()); - Self { inner, ty } - } -} - -impl From<Literal> for ast::Literal { - fn from(lit: Literal) -> Self { - lit.inner - } -} - -#[derive(Debug, Eq, Hash, PartialEq)] -pub struct Cat { - fst: Box<TypedExpression>, - punct: Option<Token![.]>, - snd: Box<TypedExpression>, - ty: Type, -} - -impl Cat { - pub(crate) fn new( - fst: TypedExpression, - punct: Option<Token![.]>, - snd: TypedExpression, - ty: Type, - ) -> Self { - Self { - fst: Box::new(fst), - punct, - snd: Box::new(snd), - ty, - } - } - - pub fn unwrap(self) -> (TypedExpression, Option<Token![.]>, TypedExpression) { - (*self.fst, self.punct, *self.snd) - } -} - -#[derive(Debug, Eq, Hash, PartialEq)] -pub struct Alt { - left: Box<TypedExpression>, - punct: Option<Token![|]>, - right: Box<TypedExpression>, - ty: Type, -} - -impl Alt { - pub(crate) fn new( - left: TypedExpression, - punct: Option<Token![|]>, - right: TypedExpression, - ty: Type, - ) -> Self { - Self { - left: Box::new(left), - punct, - right: Box::new(right), - ty, - } - } - - pub fn unwrap(self) -> (TypedExpression, Option<Token![|]>, TypedExpression) { - (*self.left, self.punct, *self.right) - } -} - -#[derive(Debug)] -pub struct Fix { - arg: Name, - inner: Box<TypedExpression>, - span: Option<Span>, - ty: Type, -} - -impl Fix { - pub(crate) fn new(arg: Name, inner: TypedExpression, span: Option<Span>, ty: Type) -> Self { - Self { - arg, - inner: Box::new(inner), - span, - ty, - } - } - - pub fn unwrap(self) -> (Name, TypedExpression, Option<Span>) { - (self.arg, *self.inner, self.span) - } -} - -impl PartialEq for Fix { - fn eq(&self, other: &Self) -> bool { - self.arg == other.arg && self.inner == other.inner && self.ty == other.ty - } -} - -impl Eq for Fix {} - -impl Hash for Fix { - fn hash<H: Hasher>(&self, state: &mut H) { - self.arg.hash(state); - self.inner.hash(state); - self.ty.hash(state); - } -} - -#[derive(Debug, Eq, Hash, PartialEq)] -pub struct Variable { - inner: ast::Variable, - ty: Type, -} - -impl Variable { - pub(crate) fn new(inner: ast::Variable, ty: Type) -> Self { - Self { inner, ty } - } - - pub fn index(&self) -> usize { - self.inner.index - } -} - -#[derive(Debug, Eq, Hash, PartialEq)] -pub(crate) enum RawTypedExpression { - Epsilon(Epsilon), - Literal(Literal), - Cat(Cat), - Alt(Alt), - Fix(Fix), - Variable(Variable), -} - -#[derive(Debug, Eq, Hash, PartialEq)] -pub struct TypedExpression { - pub(crate) inner: RawTypedExpression, -} - -impl From<Epsilon> for TypedExpression { - fn from(eps: Epsilon) -> Self { - Self { - inner: RawTypedExpression::Epsilon(eps), - } - } -} - -impl From<Literal> for TypedExpression { - fn from(lit: Literal) -> Self { - Self { - inner: RawTypedExpression::Literal(lit), - } - } -} - -impl From<Cat> for TypedExpression { - fn from(cat: Cat) -> Self { - Self { - inner: RawTypedExpression::Cat(cat), - } - } -} - -impl From<Alt> for TypedExpression { - fn from(alt: Alt) -> Self { - Self { - inner: RawTypedExpression::Alt(alt), - } - } -} - -impl From<Fix> for TypedExpression { - fn from(fix: Fix) -> Self { - Self { - inner: RawTypedExpression::Fix(fix), - } - } -} - -impl From<Variable> for TypedExpression { - fn from(var: Variable) -> Self { - Self { - inner: RawTypedExpression::Variable(var), - } - } -} - -pub trait Typed { - fn get_type(&self) -> &Type; -} - -macro_rules! leaf_typed { - ($ty:ty, $field:ident) => { - impl Typed for $ty { - fn get_type(&self) -> &Type { - &self.$field - } - } - }; -} - -leaf_typed!(Epsilon, ty); -leaf_typed!(Literal, ty); -leaf_typed!(Cat, ty); -leaf_typed!(Alt, ty); -leaf_typed!(Fix, ty); -leaf_typed!(Variable, ty); - -impl Typed for TypedExpression { - fn get_type(&self) -> &Type { - match &self.inner { - RawTypedExpression::Epsilon(e) => e.get_type(), - RawTypedExpression::Literal(l) => l.get_type(), - RawTypedExpression::Cat(c) => c.get_type(), - RawTypedExpression::Alt(a) => a.get_type(), - RawTypedExpression::Fix(f) => f.get_type(), - RawTypedExpression::Variable(v) => v.get_type(), - } - } -} diff --git a/src/chomp/context.rs b/src/chomp/typed/context.rs index 392023f..5c8d398 100644 --- a/src/chomp/context.rs +++ b/src/chomp/typed/context.rs @@ -1,4 +1,6 @@ -use super::{ast::Variable, error::VariableError, typed::Type}; +use crate::chomp::ast::Variable; + +use super::Type; #[derive(Debug, Default)] pub struct Context { @@ -16,14 +18,12 @@ impl Context { } pub fn is_unguarded(&self, var: &Variable) -> Option<bool> { - if self.vars.len() <= var.index() { + if self.vars.len() <= var.index { None } else if self.unguard_points.is_empty() { Some(false) } else { - Some( - self.unguard_points[self.unguard_points.len() - 1] + var.index() >= self.vars.len(), - ) + Some(self.unguard_points[self.unguard_points.len() - 1] + var.index >= self.vars.len()) } } @@ -34,11 +34,11 @@ impl Context { res } - pub fn get_variable_type(&self, var: &Variable) -> Result<&Type, VariableError> { + pub fn get_variable_type(&self, var: &Variable) -> Result<&Type, GetVariableError> { match self.is_unguarded(var) { - None => Err(VariableError::FreeVariable(var.clone())), - Some(false) => Err(VariableError::GuardedVariable(var.clone())), - Some(true) => Ok(&self.vars[self.vars.len() - var.index() - 1]), + None => Err(GetVariableError::FreeVariable), + Some(false) => Err(GetVariableError::GuardedVariable), + Some(true) => Ok(&self.vars[self.vars.len() - var.index - 1]), } } @@ -49,3 +49,9 @@ impl Context { res } } + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum GetVariableError { + FreeVariable, + GuardedVariable, +} diff --git a/src/chomp/typed/error.rs b/src/chomp/typed/error.rs new file mode 100644 index 0000000..bb807fc --- /dev/null +++ b/src/chomp/typed/error.rs @@ -0,0 +1,150 @@ +use std::{ + error::Error, + fmt::{self, Display}, +}; + +use proc_macro2::Span; + +use crate::chomp::{ast::Variable, Name}; + +use super::{context::GetVariableError, TypedExpression}; + +/// A type error when using a fix point variable. +#[derive(Debug)] +pub struct VariableError { + pub inner: GetVariableError, + pub var: Variable, + pub span: Option<Span>, + pub name: Option<Name>, +} + +impl PartialEq for VariableError { + fn eq(&self, other: &Self) -> bool { + todo!() + } +} + +impl Eq for VariableError {} + +impl From<VariableError> for syn::Error { + fn from(other: VariableError) -> Self { + todo!() + } +} + +impl Display for VariableError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + todo!() + } +} + +impl Error for VariableError {} + +/// A type error when concatenating two terms. +#[derive(Debug)] +pub enum CatError { + /// The first term was unexpectedly nullable. + FirstNullable(TypedExpression, Option<Span>), + /// The flast set of the first term intersects the first set of the second. + FirstFlastOverlap(Vec<TypedExpression>, Option<Span>, TypedExpression), +} + +impl PartialEq for CatError { + fn eq(&self, other: &Self) -> bool { + todo!() + } +} + +impl Eq for CatError {} + +impl From<CatError> for syn::Error { + fn from(other: CatError) -> Self { + todo!() + } +} + +impl Display for CatError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + todo!() + } +} + +impl Error for CatError {} + +/// A type error when alternating two terms. +#[derive(Debug)] +pub enum AltError { + /// Both terms are nullable. + BothNullable(Vec<TypedExpression>, Option<Span>, TypedExpression), + /// The first sets of the two terms intersect. + FirstOverlap(Vec<TypedExpression>, Option<Span>, TypedExpression), +} + +impl PartialEq for AltError { + fn eq(&self, other: &Self) -> bool { + todo!() + } +} + +impl Eq for AltError {} + +impl From<AltError> for syn::Error { + fn from(other: AltError) -> Self { + todo!() + } +} + +impl Display for AltError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + todo!() + } +} + +impl Error for AltError {} + +#[derive(Debug, Eq, PartialEq)] +pub enum TypeError { + Cat(CatError), + Alt(AltError), + Variable(VariableError), +} + +impl From<CatError> for TypeError { + fn from(other: CatError) -> Self { + Self::Cat(other) + } +} + +impl From<AltError> for TypeError { + fn from(other: AltError) -> Self { + Self::Alt(other) + } +} + +impl From<VariableError> for TypeError { + fn from(other: VariableError) -> Self { + Self::Variable(other) + } +} + +impl From<TypeError> for syn::Error { + fn from(other: TypeError) -> Self { + match other { + TypeError::Cat(e) => e.into(), + TypeError::Alt(e) => e.into(), + TypeError::Variable(e) => e.into(), + } + } +} + +impl Display for TypeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Cat(e) => e.fmt(f), + Self::Alt(e) => e.fmt(f), + Self::Variable(e) => e.fmt(f), + } + } +} + +impl Error for TypeError {} diff --git a/src/chomp/typed/infer.rs b/src/chomp/typed/infer.rs new file mode 100644 index 0000000..0161471 --- /dev/null +++ b/src/chomp/typed/infer.rs @@ -0,0 +1,145 @@ +use proc_macro2::Span; + +use crate::chomp::{ + ast::{Alt, Call, Cat, Epsilon, Fix, Literal, Parameter, Variable}, + visit::{Folder, Visitable}, + Name, +}; + +use super::{ + context::Context, + error::{TypeError, VariableError}, + Type, Typed, TypedExpression, +}; + +#[derive(Debug)] +pub struct TypeInfer<'a> { + pub context: &'a mut Context, +} + +impl Folder for TypeInfer<'_> { + type Out = Result<TypedExpression, TypeError>; + + fn fold_epsilon(&mut self, name: Option<Name>, span: Option<Span>, eps: Epsilon) -> Self::Out { + Ok(TypedExpression { + inner: super::Epsilon::from(eps).into(), + name, + span, + }) + } + + fn fold_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Out { + Ok(TypedExpression { + inner: super::Literal::from(lit).into(), + name, + span, + }) + } + + fn fold_cat(&mut self, name: Option<Name>, span: Option<Span>, cat: Cat) -> Self::Out { + let first = cat.first.fold(self)?; + let second = cat.second; + let rest = cat.rest; + let punct = cat.punct; + self.context + .with_unguard(|context| -> Result<TypedExpression, TypeError> { + let mut infer = TypeInfer { context }; + let second = second.fold(&mut infer)?; + let rest = rest + .into_iter() + .map(|(punct, term)| -> Result<_, TypeError> { + Ok((punct.map(|p| p.span), term.fold(&mut infer)?)) + }) + .collect::<Result<Vec<_>, _>>()?; + Ok(TypedExpression { + inner: super::Cat::new(first, punct.map(|p| p.span), second, rest)?.into(), + name, + span, + }) + }) + } + + fn fold_alt(&mut self, name: Option<Name>, span: Option<Span>, alt: Alt) -> Self::Out { + let first = alt.first.fold(self)?; + let second = alt.second; + let rest = alt.rest; + let punct = alt.punct; + self.context + .with_unguard(|context| -> Result<TypedExpression, TypeError> { + let mut infer = TypeInfer { context }; + let second = second.fold(&mut infer)?; + let rest = rest + .into_iter() + .map(|(punct, term)| -> Result<_, TypeError> { + Ok((punct.map(|p| p.span), term.fold(&mut infer)?)) + }) + .collect::<Result<Vec<_>, _>>()?; + Ok(TypedExpression { + inner: super::Alt::new(first, punct.map(|p| p.span), second, rest)?.into(), + name, + span, + }) + }) + } + + fn fold_fix(&mut self, name: Option<Name>, span: Option<Span>, fix: Fix) -> Self::Out { + let mut ty = Type::default(); + + loop { + let last = ty; + let res = self.context.with_variable_type(last.clone(), |context| { + fix.inner.clone().fold(&mut TypeInfer { context }) + })?; + ty = res.get_type().clone(); + + if last == ty { + return Ok(TypedExpression { + inner: super::Fix { + inner: Box::new(res), + } + .into(), + name, + span, + }); + } + } + } + + fn fold_variable( + &mut self, + name: Option<Name>, + span: Option<Span>, + var: Variable, + ) -> Self::Out { + let ty = match self.context.get_variable_type(&var) { + Ok(ty) => ty.clone(), + Err(inner) => { + return Err(VariableError { + inner, + var, + span, + name, + } + .into()) + } + }; + Ok(TypedExpression { + inner: super::Variable { inner: var, ty }.into(), + name, + span, + }) + } + + fn fold_parameter( + &mut self, + _name: Option<Name>, + _span: Option<Span>, + _param: Parameter, + ) -> Self::Out { + todo!() + } + + fn fold_call(&mut self, _name: Option<Name>,_span: Option<Span>, _call: Call) -> Self::Out { + todo!() + } +} diff --git a/src/chomp/typed/lower.rs b/src/chomp/typed/lower.rs new file mode 100644 index 0000000..37589a1 --- /dev/null +++ b/src/chomp/typed/lower.rs @@ -0,0 +1,41 @@ +use proc_macro2::Span; + +use crate::chomp::Name; + +use super::{Alt, Cat, Epsilon, Fix, Literal, RawTypedExpression, TypedExpression, Variable}; + +pub trait Backend: Default { + type Id; + type Code; + + fn gen_epsilon(&mut self, name: Option<Name>, span: Option<Span>, eps: Epsilon) -> Self::Id; + + fn gen_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Id; + + fn gen_cat(&mut self, name: Option<Name>, span: Option<Span>, cat: Cat) -> Self::Id; + + fn gen_alt(&mut self, name: Option<Name>, span: Option<Span>, alt: Alt) -> Self::Id; + + fn gen_fix(&mut self, name: Option<Name>, span: Option<Span>, fix: Fix) -> Self::Id; + + fn gen_variable(&mut self, name: Option<Name>, span: Option<Span>, var: Variable) -> Self::Id; + + fn emit_code(self, name: Option<Name>, span: Option<Span>, id: Self::Id) -> Self::Code; +} + +pub trait GenerateCode { + fn gen<B: Backend>(self, backend: &mut B) -> B::Id; +} + +impl GenerateCode for TypedExpression { + fn gen<B: Backend>(self, backend: &mut B) -> B::Id { + match self.inner { + RawTypedExpression::Epsilon(e) => backend.gen_epsilon(self.name, self.span, e), + RawTypedExpression::Literal(l) => backend.gen_literal(self.name, self.span, l), + RawTypedExpression::Cat(c) => backend.gen_cat(self.name, self.span, c), + RawTypedExpression::Alt(a) => backend.gen_alt(self.name, self.span, a), + RawTypedExpression::Fix(f) => backend.gen_fix(self.name, self.span, f), + RawTypedExpression::Variable(v) => backend.gen_variable(self.name, self.span, v), + } + } +} diff --git a/src/chomp/typed/mod.rs b/src/chomp/typed/mod.rs new file mode 100644 index 0000000..e9aed79 --- /dev/null +++ b/src/chomp/typed/mod.rs @@ -0,0 +1,343 @@ +use std::{hash, iter}; + +use proc_macro2::Span; + +use super::{ + ast, + set::{FirstSet, FlastSet}, + Name, +}; + +pub mod context; +pub mod error; +pub mod lower; + +mod infer; + +pub use self::infer::TypeInfer; +use self::error::{AltError, CatError}; + +#[derive(Debug, Default, Clone, Eq, Hash, PartialEq)] +pub struct Type { + nullable: bool, + first_set: FirstSet, + flast_set: FlastSet, +} + +impl Type { + pub const fn new(nullable: bool, first_set: FirstSet, flast_set: FlastSet) -> Self { + Self { + nullable, + first_set, + flast_set, + } + } + + pub fn of_str(s: &str) -> Self { + Self { + nullable: s.is_empty(), + first_set: FirstSet::of_str(s), + flast_set: FlastSet::default(), + } + } + + pub fn nullable(&self) -> bool { + self.nullable + } + + pub fn first_set(&self) -> &FirstSet { + &self.first_set + } + + pub fn flast_set(&self) -> &FlastSet { + &self.flast_set + } + + pub fn cat(self, other: Self) -> Self { + Self { + nullable: self.nullable && other.nullable, + first_set: self.first_set.union(if self.nullable { + other.first_set.clone() + } else { + FirstSet::default() + }), + flast_set: other.flast_set.union(if other.nullable { + self.flast_set.union_first(other.first_set) + } else { + FlastSet::default() + }), + } + } + + pub fn alt(self, other: Self) -> Self { + Self { + nullable: self.nullable || other.nullable, + first_set: self.first_set.union(other.first_set), + flast_set: self.flast_set.union(other.flast_set), + } + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Epsilon { + ty: Type, +} + +impl From<ast::Epsilon> for Epsilon { + fn from(_: ast::Epsilon) -> Self { + Self { + ty: Type::new(true, FirstSet::default(), FlastSet::default()), + } + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Literal { + inner: ast::Literal, + ty: Type, +} + +impl Literal { + pub fn inner(&self) -> &ast::Literal { + &self.inner + } +} + +impl From<ast::Literal> for Literal { + fn from(inner: ast::Literal) -> Self { + let ty = Type::of_str(&inner); + Self { inner, ty } + } +} + +impl From<Literal> for ast::Literal { + fn from(lit: Literal) -> Self { + lit.inner + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Cat { + terms: Vec<TypedExpression>, + ty: Type, +} + +impl Cat { + fn new<I: IntoIterator<Item = (Option<Span>, TypedExpression)>>( + first: TypedExpression, + punct: Option<Span>, + second: TypedExpression, + rest: I, + ) -> Result<Self, CatError> { + if first.get_type().nullable() { + return Err(CatError::FirstNullable(first, punct)); + } + + iter::once((punct, second)) + .chain(rest) + .try_fold( + (first.get_type().clone(), vec![first]), + |(ty, mut terms), (punct, right)| { + if !ty + .flast_set() + .intersect_first(right.get_type().first_set()) + .is_empty() + { + Err(CatError::FirstFlastOverlap(terms, punct, right)) + } else { + let ty = ty.cat(right.get_type().clone()); + terms.push(right); + Ok((ty, terms)) + } + }, + ) + .map(|(ty, terms)| Self { ty, terms }) + } +} + +impl IntoIterator for Cat { + type Item = TypedExpression; + + type IntoIter = <Vec<TypedExpression> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.terms.into_iter() + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Alt { + terms: Vec<TypedExpression>, + ty: Type, +} + +impl Alt { + pub fn new<I: IntoIterator<Item = (Option<Span>, TypedExpression)>>( + first: TypedExpression, + punct: Option<Span>, + second: TypedExpression, + rest: I, + ) -> Result<Self, AltError> { + iter::once((punct, second)) + .chain(rest) + .try_fold( + (first.get_type().clone(), vec![first]), + |(ty, mut terms), (punct, right)| { + if ty.nullable() && right.get_type().nullable() { + Err(AltError::BothNullable(terms, punct, right)) + } else if !ty + .first_set() + .intersect(right.get_type().first_set()) + .is_empty() + { + Err(AltError::FirstOverlap(terms, punct, right)) + } else { + let ty = ty.alt(right.get_type().clone()); + terms.push(right); + Ok((ty, terms)) + } + }, + ) + .map(|(ty, terms)| Self { ty, terms }) + } +} + +impl IntoIterator for Alt { + type Item = TypedExpression; + + type IntoIter = <Vec<TypedExpression> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.terms.into_iter() + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Fix { + inner: Box<TypedExpression>, +} + +impl Fix { + pub fn unwrap(self) -> TypedExpression { + *self.inner + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Variable { + inner: ast::Variable, + ty: Type, +} + +impl Variable { + pub fn index(&self) -> usize { + self.inner.index + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +enum RawTypedExpression { + Epsilon(Epsilon), + Literal(Literal), + Cat(Cat), + Alt(Alt), + Fix(Fix), + Variable(Variable), +} + +impl From<Epsilon> for RawTypedExpression { + fn from(eps: Epsilon) -> Self { + Self::Epsilon(eps) + } +} + +impl From<Literal> for RawTypedExpression { + fn from(lit: Literal) -> Self { + Self::Literal(lit) + } +} + +impl From<Cat> for RawTypedExpression { + fn from(cat: Cat) -> Self { + Self::Cat(cat) + } +} + +impl From<Alt> for RawTypedExpression { + fn from(alt: Alt) -> Self { + Self::Alt(alt) + } +} + +impl From<Fix> for RawTypedExpression { + fn from(fix: Fix) -> Self { + Self::Fix(fix) + } +} + +impl From<Variable> for RawTypedExpression { + fn from(var: Variable) -> Self { + Self::Variable(var) + } +} + +#[derive(Clone, Debug)] +pub struct TypedExpression { + inner: RawTypedExpression, + pub name: Option<Name>, + pub span: Option<Span>, +} + +impl PartialEq for TypedExpression { + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner && self.name == other.name + } +} + +impl Eq for TypedExpression {} + +impl hash::Hash for TypedExpression { + fn hash<H: hash::Hasher>(&self, state: &mut H) { + self.inner.hash(state); + self.name.hash(state); + } +} + +pub trait Typed { + fn get_type(&self) -> &Type; +} + +macro_rules! leaf_typed { + ($ty:ty, $field:ident) => { + impl Typed for $ty { + fn get_type(&self) -> &Type { + &self.$field + } + } + }; +} + +leaf_typed!(Epsilon, ty); +leaf_typed!(Literal, ty); +leaf_typed!(Cat, ty); +leaf_typed!(Alt, ty); +leaf_typed!(Variable, ty); + +impl Typed for Fix { + fn get_type(&self) -> &Type { + self.inner.get_type() + } +} + +impl Typed for TypedExpression { + fn get_type(&self) -> &Type { + match &self.inner { + RawTypedExpression::Epsilon(e) => e.get_type(), + RawTypedExpression::Literal(l) => l.get_type(), + RawTypedExpression::Cat(c) => c.get_type(), + RawTypedExpression::Alt(a) => a.get_type(), + RawTypedExpression::Fix(f) => f.get_type(), + RawTypedExpression::Variable(v) => v.get_type(), + } + } +} diff --git a/src/chomp/visit.rs b/src/chomp/visit.rs index 4113b64..bdef30e 100644 --- a/src/chomp/visit.rs +++ b/src/chomp/visit.rs @@ -1,62 +1,120 @@ -use super::ast::{Alt, Call, Cat, Epsilon, Expression, Fix, Literal, Parameter, Variable}; +use proc_macro2::Span; + +use super::{ + ast::{ + Alt, Call, Cat, Epsilon, Expression, Fix, Literal, NamedExpression, Parameter, Variable, + }, + Name, +}; pub trait Visitor { type Out; - fn visit_epsilon(&mut self, eps: &Epsilon) -> Self::Out; - - fn visit_literal(&mut self, lit: &Literal) -> Self::Out; - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out; - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out; - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out; - - fn visit_variable(&mut self, var: &Variable) -> Self::Out; - - fn visit_parameter(&mut self, param: &Parameter) -> Self::Out; - - fn visit_call(&mut self, call: &Call) -> Self::Out; + fn visit_epsilon( + &mut self, + name: Option<&Name>, + span: Option<Span>, + eps: &Epsilon, + ) -> Self::Out; + + fn visit_literal( + &mut self, + name: Option<&Name>, + span: Option<Span>, + lit: &Literal, + ) -> Self::Out; + + fn visit_cat(&mut self, name: Option<&Name>, span: Option<Span>, cat: &Cat) -> Self::Out; + + fn visit_alt(&mut self, name: Option<&Name>, span: Option<Span>, alt: &Alt) -> Self::Out; + + fn visit_fix(&mut self, name: Option<&Name>, span: Option<Span>, fix: &Fix) -> Self::Out; + + fn visit_variable( + &mut self, + name: Option<&Name>, + span: Option<Span>, + var: &Variable, + ) -> Self::Out; + + fn visit_parameter( + &mut self, + name: Option<&Name>, + span: Option<Span>, + param: &Parameter, + ) -> Self::Out; + + fn visit_call(&mut self, name: Option<&Name>, span: Option<Span>, call: &Call) -> Self::Out; } pub trait Mapper { type Out; - fn map_epsilon(&mut self, eps: &mut Epsilon) -> Self::Out; - - fn map_literal(&mut self, lit: &mut Literal) -> Self::Out; - - fn map_cat(&mut self, cat: &mut Cat) -> Self::Out; - - fn map_alt(&mut self, alt: &mut Alt) -> Self::Out; - - fn map_fix(&mut self, fix: &mut Fix) -> Self::Out; - - fn map_variable(&mut self, var: &mut Variable) -> Self::Out; - - fn map_parameter(&mut self, param: &mut Parameter) -> Self::Out; - - fn map_call(&mut self, call: &mut Call) -> Self::Out; + fn map_epsilon( + &mut self, + name: &mut Option<Name>, + span: Option<Span>, + eps: &mut Epsilon, + ) -> Self::Out; + + fn map_literal( + &mut self, + name: &mut Option<Name>, + span: Option<Span>, + lit: &mut Literal, + ) -> Self::Out; + + fn map_cat(&mut self, name: &mut Option<Name>, span: Option<Span>, cat: &mut Cat) -> Self::Out; + + fn map_alt(&mut self, name: &mut Option<Name>, span: Option<Span>, alt: &mut Alt) -> Self::Out; + + fn map_fix(&mut self, name: &mut Option<Name>, span: Option<Span>, fix: &mut Fix) -> Self::Out; + + fn map_variable( + &mut self, + name: &mut Option<Name>, + span: Option<Span>, + var: &mut Variable, + ) -> Self::Out; + + fn map_parameter( + &mut self, + name: &mut Option<Name>, + span: Option<Span>, + param: &mut Parameter, + ) -> Self::Out; + + fn map_call( + &mut self, + name: &mut Option<Name>, + span: Option<Span>, + call: &mut Call, + ) -> Self::Out; } pub trait Folder { type Out; - fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out; + fn fold_epsilon(&mut self, name: Option<Name>, span: Option<Span>, eps: Epsilon) -> Self::Out; - fn fold_literal(&mut self, lit: Literal) -> Self::Out; + fn fold_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Out; - fn fold_cat(&mut self, cat: Cat) -> Self::Out; + fn fold_cat(&mut self, name: Option<Name>, span: Option<Span>, cat: Cat) -> Self::Out; - fn fold_alt(&mut self, alt: Alt) -> Self::Out; + fn fold_alt(&mut self, name: Option<Name>, span: Option<Span>, alt: Alt) -> Self::Out; - fn fold_fix(&mut self, fix: Fix) -> Self::Out; + fn fold_fix(&mut self, name: Option<Name>, span: Option<Span>, fix: Fix) -> Self::Out; - fn fold_variable(&mut self, var: Variable) -> Self::Out; + fn fold_variable(&mut self, name: Option<Name>, span: Option<Span>, var: Variable) + -> Self::Out; - fn fold_parameter(&mut self, param: Parameter) -> Self::Out; + fn fold_parameter( + &mut self, + name: Option<Name>, + span: Option<Span>, + param: Parameter, + ) -> Self::Out; - fn fold_call(&mut self, call: Call) -> Self::Out; + fn fold_call(&mut self, name: Option<Name>, span: Option<Span>, call: Call) -> Self::Out; } pub trait Visitable { @@ -67,70 +125,43 @@ pub trait Visitable { fn fold<F: Folder>(self, folder: &mut F) -> <F as Folder>::Out; } -macro_rules! visitable_leaf { - ($ty:ty, $visit:ident, $map:ident, $fold:ident) => { - impl Visitable for $ty { - fn visit<V: Visitor>(&self, visitor: &mut V) -> <V as Visitor>::Out { - visitor.$visit(self) - } - - fn map<M: Mapper>(&mut self, mapper: &mut M) -> <M as Mapper>::Out { - mapper.$map(self) - } - - fn fold<F: Folder>(self, folder: &mut F) -> <F as Folder>::Out { - folder.$fold(self) - } - } - }; -} - -visitable_leaf!(Epsilon, visit_epsilon, map_epsilon, fold_epsilon); -visitable_leaf!(Literal, visit_literal, map_literal, fold_literal); -visitable_leaf!(Cat, visit_cat, map_cat, fold_cat); -visitable_leaf!(Alt, visit_alt, map_alt, fold_alt); -visitable_leaf!(Fix, visit_fix, map_fix, fold_fix); -visitable_leaf!(Variable, visit_variable, map_variable, fold_variable); -visitable_leaf!(Parameter, visit_parameter, map_parameter, fold_parameter); -visitable_leaf!(Call, visit_call, map_call, fold_call); - -impl Visitable for Expression { +impl Visitable for NamedExpression { fn visit<V: Visitor>(&self, visitor: &mut V) -> <V as Visitor>::Out { - match self { - Self::Epsilon(e) => visitor.visit_epsilon(e), - Self::Literal(l) => visitor.visit_literal(l), - Self::Cat(c) => visitor.visit_cat(c), - Self::Alt(a) => visitor.visit_alt(a), - Self::Fix(f) => visitor.visit_fix(f), - Self::Variable(v) => visitor.visit_variable(v), - Self::Parameter(p) => visitor.visit_parameter(p), - Self::Call(c) => visitor.visit_call(c), + match &self.expr { + Expression::Epsilon(e) => visitor.visit_epsilon(self.name.as_ref(), self.span, e), + Expression::Literal(l) => visitor.visit_literal(self.name.as_ref(), self.span, l), + Expression::Cat(c) => visitor.visit_cat(self.name.as_ref(), self.span, c), + Expression::Alt(a) => visitor.visit_alt(self.name.as_ref(), self.span, a), + Expression::Fix(f) => visitor.visit_fix(self.name.as_ref(), self.span, f), + Expression::Variable(v) => visitor.visit_variable(self.name.as_ref(), self.span, v), + Expression::Parameter(p) => visitor.visit_parameter(self.name.as_ref(), self.span, p), + Expression::Call(c) => visitor.visit_call(self.name.as_ref(), self.span, c), } } fn map<M: Mapper>(&mut self, mapper: &mut M) -> <M as Mapper>::Out { - match self { - Self::Epsilon(e) => mapper.map_epsilon(e), - Self::Literal(l) => mapper.map_literal(l), - Self::Cat(c) => mapper.map_cat(c), - Self::Alt(a) => mapper.map_alt(a), - Self::Fix(f) => mapper.map_fix(f), - Self::Variable(v) => mapper.map_variable(v), - Self::Parameter(p) => mapper.map_parameter(p), - Self::Call(c) => mapper.map_call(c), + match &mut self.expr { + Expression::Epsilon(e) => mapper.map_epsilon(&mut self.name, self.span, e), + Expression::Literal(l) => mapper.map_literal(&mut self.name, self.span, l), + Expression::Cat(c) => mapper.map_cat(&mut self.name, self.span, c), + Expression::Alt(a) => mapper.map_alt(&mut self.name, self.span, a), + Expression::Fix(f) => mapper.map_fix(&mut self.name, self.span, f), + Expression::Variable(v) => mapper.map_variable(&mut self.name, self.span, v), + Expression::Parameter(p) => mapper.map_parameter(&mut self.name, self.span, p), + Expression::Call(c) => mapper.map_call(&mut self.name, self.span, c), } } fn fold<F: Folder>(self, folder: &mut F) -> <F as Folder>::Out { - match self { - Self::Epsilon(e) => folder.fold_epsilon(e), - Self::Literal(l) => folder.fold_literal(l), - Self::Cat(c) => folder.fold_cat(c), - Self::Alt(a) => folder.fold_alt(a), - Self::Fix(f) => folder.fold_fix(f), - Self::Variable(v) => folder.fold_variable(v), - Self::Parameter(p) => folder.fold_parameter(p), - Self::Call(c) => folder.fold_call(c), + match self.expr { + Expression::Epsilon(e) => folder.fold_epsilon(self.name, self.span, e), + Expression::Literal(l) => folder.fold_literal(self.name, self.span, l), + Expression::Cat(c) => folder.fold_cat(self.name, self.span, c), + Expression::Alt(a) => folder.fold_alt(self.name, self.span, a), + Expression::Fix(f) => folder.fold_fix(self.name, self.span, f), + Expression::Variable(v) => folder.fold_variable(self.name, self.span, v), + Expression::Parameter(p) => folder.fold_parameter(self.name, self.span, p), + Expression::Call(c) => folder.fold_call(self.name, self.span, c), } } } diff --git a/src/lower/mod.rs b/src/lower/mod.rs index 242fa47..06099e3 100644 --- a/src/lower/mod.rs +++ b/src/lower/mod.rs @@ -1,58 +1,488 @@ -use crate::chomp::typed::{ - Alt, Cat, Epsilon, Fix, Literal, RawTypedExpression, TypedExpression, Variable, +use std::{ + collections::{BTreeSet, HashMap}, + convert::TryInto, }; -pub mod rust; +use heck::{CamelCase, SnakeCase}; +use proc_macro2::{Ident, Span, TokenStream, TokenTree}; +use quote::{format_ident, quote, quote_spanned}; +use syn::{Index, LitStr}; -pub trait Backend: Default { - type Id; - type Code; +use crate::chomp::{ + ast, + set::FirstSet, + typed::{ + lower::{Backend, GenerateCode}, + Alt, Cat, Epsilon, Fix, Literal, Type, Typed, TypedExpression, Variable, + }, + Name, +}; - fn gen_epsilon(&mut self, eps: Epsilon) -> Self::Id; +#[derive(Clone, Debug)] +struct Ty { + name: TokenStream, + rest: TokenStream, + deps: BTreeSet<usize>, +} - fn gen_literal(&mut self, lit: Literal) -> Self::Id; +#[derive(Debug)] +pub struct RustBackend { + // Indexed by ID, stores type, impl and dependencies + data: Vec<Ty>, + eps_id: Option<usize>, + lit_map: HashMap<String, usize>, + cat_map: HashMap<Vec<usize>, usize>, + alt_map: HashMap<Vec<usize>, usize>, + fix_map: HashMap<TypedExpression, usize>, + var_map: HashMap<usize, usize>, // Key is fix point ID + name_map: HashMap<Ident, usize>, + can_char: BTreeSet<usize>, + context: Vec<usize>, +} - fn gen_cat(&mut self, cat: Cat) -> Self::Id; +impl RustBackend { + fn new_type_name(&mut self, prefix: &str, name: Option<Name>, span: Span) -> (usize, Ident) { + let id = self.data.len(); - fn gen_alt(&mut self, alt: Alt) -> Self::Id; + match name { + None => (id, format_ident!("{}{}", prefix, id + 1, span = span)), + Some(name) => { + let name = name.to_camel_case().into_ident(span); + let count = self.name_map.entry(name.clone()).or_insert(0); + *count += 1; + (id, format_ident!("{}{}", name, count)) + } + } + } - fn gen_fix(&mut self, fix: Fix) -> Self::Id; + fn recurse_exprs<I: IntoIterator<Item = TypedExpression>>( + &mut self, + iter: I, + ) -> (Vec<Type>, Vec<(Option<Name>, Span)>, Vec<usize>) { + let (ty_name_span, ids): (Vec<_>, _) = iter + .into_iter() + .map(|e| { + ( + ( + e.get_type().clone(), + (e.name.clone(), e.span.unwrap_or_else(Span::call_site)), + ), + e.gen(self), + ) + }) + .unzip(); + let (ty, name_span) = ty_name_span.into_iter().unzip(); + (ty, name_span, ids) + } + + fn indexes<'a, I: 'a + IntoIterator<Item = Span>>( + prefix: &'a str, + iter: I, + ) -> impl Iterator<Item = (Index, Ident)> + '_ { + iter.into_iter().enumerate().map(move |(idx, span)| { + ( + Index { + index: idx.try_into().unwrap(), + span, + }, + format_ident!("{}{}", prefix, idx, span = span), + ) + }) + } + + fn ty_names<'a, I: 'a + IntoIterator<Item = usize>>( + &'a self, + iter: I, + ) -> impl Iterator<Item = TokenStream> + '_ { + iter.into_iter().map(move |id| self.data[id].name.clone()) + } - fn gen_variable(&mut self, var: Variable) -> Self::Id; + fn name_parts_snake<'a, I: 'a + IntoIterator<Item = (Option<Name>, Span)>>( + default: &'a str, + iter: I, + ) -> impl Iterator<Item = Ident> + '_ { + let mut name_map = HashMap::new(); + iter.into_iter() + .enumerate() + .map(move |(index, (name, span))| match name { + None => format_ident!("{}{}", default, index + 1, span = span), + Some(name) => { + let name = name.to_snake_case().into_ident(span); + let count = name_map.entry(name.clone()).or_insert(0usize); + *count += 1; + format_ident!("{}{}", name, count) + } + }) + } - fn emit_code(self, id: Self::Id) -> Self::Code; + fn name_parts_camel<'a, I: 'a + IntoIterator<Item = (Option<Name>, Span)>>( + default: &'a str, + iter: I, + ) -> impl Iterator<Item = Ident> + '_ { + let mut name_map = HashMap::new(); + iter.into_iter() + .enumerate() + .map(move |(index, (name, span))| match name { + None => format_ident!("{}{}", default, index + 1, span = span), + Some(name) => { + let name = name.to_camel_case().into_ident(span); + let count = name_map.entry(name.clone()).or_insert(0usize); + *count += 1; + format_ident!("{}{}", name, count) + } + }) + } } -pub trait GenerateCode { - fn gen<B: Backend>(self, backend: &mut B) -> B::Id; +impl Default for RustBackend { + fn default() -> Self { + Self { + data: Vec::new(), + eps_id: None, + lit_map: HashMap::new(), + cat_map: HashMap::new(), + alt_map: HashMap::new(), + fix_map: HashMap::new(), + var_map: HashMap::new(), + name_map: HashMap::new(), + can_char: BTreeSet::new(), + context: Vec::new(), + } + } } -macro_rules! generate_leaf { - ($ty:ty, $gen:ident) => { - impl GenerateCode for $ty { - fn gen<B: Backend>(self, backend: &mut B) -> B::Id { - backend.$gen(self) +impl Backend for RustBackend { + type Id = usize; + + type Code = TokenStream; + + fn gen_epsilon(&mut self, _name: Option<Name>, _span: Option<Span>, _eps: Epsilon) -> Self::Id { + if let Some(id) = self.eps_id { + return id; + } + + let id = self.data.len(); + self.data.push(Ty { + name: quote! {Epsilon}, + rest: TokenStream::new(), + deps: BTreeSet::new(), + }); + id + } + + fn gen_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Id { + let span = span.unwrap_or_else(Span::call_site); + + let lit: ast::Literal = lit.into(); + if let Some(&id) = self.lit_map.get(&lit) { + return id; + } + + let (id, name) = self.new_type_name("Lit", name, span); + let doc_name = format!(r#"The literal string `{:?}`."#, lit); + let lit = LitStr::new(&lit, span); + + let mut rest = quote_spanned! {span=> + #[doc=#doc_name] + #[derive(Clone, Debug, Eq, Hash, PartialEq)] + pub struct #name; + + impl ::std::fmt::Display for #name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + write!(f, "{}", #lit) + } + } + + impl Parse for #name { + fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { + input.consume_str(#lit).map(|()| #name) + } } + }; + + if lit.value().len() == 1 { + self.can_char.insert(id); + let c = lit.value().chars().next().unwrap(); + rest.extend(quote_spanned! {span=> + impl From<#name> for char { + fn from(_: #name) -> Self { + #c + } + } + }); } - }; -} -generate_leaf!(Epsilon, gen_epsilon); -generate_leaf!(Literal, gen_literal); -generate_leaf!(Cat, gen_cat); -generate_leaf!(Alt, gen_alt); -generate_leaf!(Fix, gen_fix); -generate_leaf!(Variable, gen_variable); - -impl GenerateCode for TypedExpression { - fn gen<B: Backend>(self, backend: &mut B) -> B::Id { - match self.inner { - RawTypedExpression::Epsilon(e) => e.gen(backend), - RawTypedExpression::Literal(l) => l.gen(backend), - RawTypedExpression::Cat(c) => c.gen(backend), - RawTypedExpression::Alt(a) => a.gen(backend), - RawTypedExpression::Fix(f) => f.gen(backend), - RawTypedExpression::Variable(v) => v.gen(backend), + self.lit_map.insert(lit.value(), id); + self.data.push(Ty { + name: TokenTree::from(name).into(), + rest, + deps: BTreeSet::new(), + }); + + id + } + + fn gen_cat(&mut self, name: Option<Name>, span: Option<Span>, cat: Cat) -> Self::Id { + let span = span.unwrap_or_else(Span::call_site); + let (_, name_spans, ids) = self.recurse_exprs(cat); + + if let Some(&id) = self.cat_map.get(&ids) { + return id; } + + let (id, name) = self.new_type_name("Cat", name, span); + let ty_names: Vec<_> = self.ty_names(ids.iter().copied()).collect(); + let named = name_spans.iter().any(|(name, _)| name.is_some()); + let (indexes, number_parts): (Vec<_>, Vec<_>) = + Self::indexes("part", name_spans.iter().cloned().map(|(_, span)| span)).unzip(); + let name_parts: Vec<_> = Self::name_parts_snake("part", name_spans).collect(); + let rest = if named { + quote_spanned! {span=> + #[derive(Clone, Debug, Eq, Hash, PartialEq)] + pub struct #name { + #(pub #name_parts: #ty_names),* + } + + impl ::std::fmt::Display for #name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + #(self.#name_parts.fmt(f)?;)* + Ok(()) + } + } + + impl Parse for #name { + fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { + #(let #number_parts = input.take()?;)* + Ok(Self {#(#name_parts: #number_parts),*}) + } + } + } + } else { + quote_spanned! {span=> + #[derive(Clone, Debug, Eq, Hash, PartialEq)] + pub struct #name(#(pub #ty_names),*); + + impl ::std::fmt::Display for #name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + #(self.#indexes.fmt(f)?;)* + Ok(()) + } + } + + impl Parse for #name { + fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { + #(let #number_parts = input.take()?;)* + Ok(Self(#(#number_parts),*)) + } + } + } + }; + + self.cat_map.insert(ids.clone(), id); + self.data.push(Ty { + name: TokenTree::from(name).into(), + rest, + deps: ids.into_iter().collect(), + }); + id + } + + fn gen_alt(&mut self, name: Option<Name>, span: Option<Span>, alt: Alt) -> Self::Id { + let span = span.unwrap_or_else(Span::call_site); + let (tys, name_spans, ids) = self.recurse_exprs(alt); + + if let Some(&id) = self.alt_map.get(&ids) { + return id; + } + + let (id, name) = self.new_type_name("Alt", name, span); + let ty_names: Vec<_> = self.ty_names(ids.iter().copied()).collect(); + let name_parts: Vec<_> = Self::name_parts_camel("Branch", name_spans).collect(); + + let nullable = tys + .iter() + .enumerate() + .find(|(_, ty)| ty.nullable()) + .map(|(idx, _)| name_parts[idx].clone()); + let (first_alts, firsts): (Vec<_>, Vec<_>) = tys + .iter() + .map(|ty| ty.first_set()) + .cloned() + .enumerate() + .filter(|(_, fs)| !fs.is_empty()) + .map(|(idx, fs)| { + let fs = fs.into_iter(); + (name_parts[idx].clone(), quote! {#(Some(#fs))|*}) + }) + .unzip(); + let all_firsts = tys + .iter() + .map(|ty| ty.first_set()) + .cloned() + .flat_map(FirstSet::into_iter); + + let mut rest = quote_spanned! {span=> + #[derive(Clone, Debug, Eq, Hash, PartialEq)] + pub enum #name { + #(#name_parts(#ty_names)),* + } + + impl ::std::fmt::Display for #name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + match self { + #(Self::#name_parts(inner) => inner.fmt(f)),* + } + } + } + }; + + rest.extend(if let Some(nullable) = nullable { + quote_spanned! {span=> + impl Parse for #name { + fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { + match input.peek() { + #(#firsts => Ok(Self::#first_alts(input.take()?)),)* + _ => Ok(Self::#nullable(input.take()?)) + } + } + } + } + } else { + quote_spanned! {span=> + impl Parse for #name { + fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { + match input.peek() { + #(#firsts => Ok(Self::#first_alts(input.take()?)),)* + Some(c) => Err(TakeError::BadBranch(input.pos(), c, &[#(#all_firsts),*])), + None => Err(TakeError::EndOfStream(input.pos())) + } + } + } + } + }); + + if ids.iter().all(|id| self.can_char.contains(id)) { + self.can_char.insert(id); + rest.extend(quote_spanned! {span=> + impl From<#name> for char { + fn from(alt: #name) -> Self { + match alt { + #(#name::#name_parts(inner) => inner.into()),* + } + } + } + }) + } + + self.alt_map.insert(ids.clone(), id); + self.data.push(Ty { + name: TokenTree::from(name).into(), + rest, + deps: ids.into_iter().collect(), + }); + id + } + + fn gen_fix(&mut self, name: Option<Name>, span: Option<Span>, fix: Fix) -> Self::Id { + let span = span.unwrap_or_else(Span::call_site); + let inner = fix.unwrap(); + + if let Some(&id) = self.fix_map.get(&inner) { + return id; + } + + let (id, name) = self.new_type_name("Fix", name, span); + self.fix_map.insert(inner.clone(), id); + + self.data.push(Ty { + name: TokenTree::from(name.clone()).into(), + rest: TokenStream::new(), + deps: BTreeSet::new(), + }); + + self.context.push(id); + let inner = inner.gen(self); + self.context.pop(); + + let inner_ty = self.data[inner].name.clone(); + let rest = quote_spanned! {span=> + #[derive(Clone, Debug, Eq, Hash, PartialEq)] + pub struct #name(pub #inner_ty); + + impl ::std::fmt::Display for #name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + self.0.fmt(f) + } + } + + impl Parse for #name { + fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { + input.take().map(Self) + } + } + }; + self.data[id].rest = rest; + self.data[id].deps = [inner].iter().cloned().collect(); + id + } + + fn gen_variable(&mut self, name: Option<Name>, span: Option<Span>, var: Variable) -> Self::Id { + let span = span.unwrap_or_else(Span::call_site); + let fix_id = self.context[self.context.len() - var.index() - 1]; + + if let Some(&id) = self.var_map.get(&fix_id) { + return id; + } + + let id = self.data.len(); + let fix_ty = self.data[fix_id].name.clone(); + let name = quote! {Box<#fix_ty>}; + self.var_map.insert(fix_id, id); + self.data.push(Ty { + name, + rest: TokenStream::new(), + deps: BTreeSet::new(), + }); + + id + } + + fn emit_code(self, name: Option<Name>, span: Option<Span>, id: Self::Id) -> Self::Code { + let span = span.unwrap_or_else(Span::call_site); + + let mut tokens = quote_spanned! {span=> + use ::chewed::*; + }; + + let mut todo = [id].iter().copied().collect::<BTreeSet<_>>(); + let mut completed = BTreeSet::new(); + + while !todo.is_empty() { + let next = todo.clone(); + completed.append(&mut todo); + + for ty in next { + let ty = self.data[ty].clone(); + tokens.extend(ty.rest); + todo.extend(ty.deps.into_iter().filter(|id| !completed.contains(id))); + } + } + + let root_name = self.data[id].name.clone(); + + tokens.extend(if let Some(name) = name { + let name = name.into_ident(span); + let count = self.name_map.get(&name).copied().unwrap_or_default() + 1; + let name = format_ident!("{}{}", name, count); + quote_spanned! {span=> + pub type #name = #root_name; + } + } else { + quote_spanned! {span=> + pub type Ast = #root_name; + } + }); + + tokens } } diff --git a/src/lower/rust.rs b/src/lower/rust.rs deleted file mode 100644 index 7931306..0000000 --- a/src/lower/rust.rs +++ /dev/null @@ -1,273 +0,0 @@ -use std::collections::{BTreeSet, HashMap}; - -use proc_macro2::{Span, TokenStream, TokenTree}; -use quote::{format_ident, quote}; - -use crate::chomp::{ - ast, - typed::{Alt, Cat, Epsilon, Fix, Literal, Typed, TypedExpression, Variable}, -}; - -use super::{Backend, GenerateCode}; - -#[derive(Debug)] -pub struct RustBackend { - // Indexed by ID, stores type, impl and dependencies - data: Vec<(TokenStream, TokenStream, BTreeSet<usize>)>, - eps_id: Option<usize>, - lit_map: HashMap<String, usize>, - cat_map: HashMap<(usize, usize), usize>, - alt_map: HashMap<(usize, usize), usize>, - fix_map: HashMap<Box<TypedExpression>, usize>, - var_map: HashMap<usize, usize>, // Key is fix point ID - context: Vec<usize>, -} - -impl Default for RustBackend { - fn default() -> Self { - Self { - data: Vec::new(), - eps_id: None, - lit_map: HashMap::new(), - cat_map: HashMap::new(), - alt_map: HashMap::new(), - fix_map: HashMap::new(), - var_map: HashMap::new(), - context: Vec::new(), - } - } -} - -impl Backend for RustBackend { - type Id = usize; - - type Code = TokenStream; - - fn gen_epsilon(&mut self, _eps: Epsilon) -> Self::Id { - match self.eps_id { - Some(id) => id, - None => { - let id = self.data.len(); - let ty = quote! { () }; - let tokens = TokenStream::new(); - self.data.push((ty, tokens, BTreeSet::new())); - id - } - } - } - - fn gen_literal(&mut self, lit: Literal) -> Self::Id { - let lit: ast::Literal = lit.into(); - if let Some(&id) = self.lit_map.get(&lit.value()) { - id - } else { - let id = self.data.len(); - let name = format_ident!("Lit{}", id); - let doc_name = format!( - r#"The literal string `"{}"`."#, - lit.value().escape_debug().collect::<String>() - ); - let lit = lit.as_litstr(Span::call_site()); - let tokens = quote! { - #[doc=#doc_name] - #[derive(Clone, Debug, Eq, Hash, PartialEq)] - pub struct #name; - - impl Parse for #name { - fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { - input.consume_str(#lit).map(|()| #name) - } - } - }; - - self.data - .push((TokenTree::from(name).into(), tokens, BTreeSet::new())); - self.lit_map.insert(lit.value(), id); - - id - } - } - - fn gen_cat(&mut self, cat: Cat) -> Self::Id { - let (fst, _punct, snd) = cat.unwrap(); - let fst = fst.gen(self); - let snd = snd.gen(self); - - if let Some(&id) = self.cat_map.get(&(fst, snd)) { - id - } else { - let id = self.data.len(); - let fst_ty = self.data[fst].0.clone(); - let snd_ty = self.data[snd].0.clone(); - self.data.push(( - quote! {(#fst_ty, #snd_ty)}, - TokenStream::new(), - [fst, snd].iter().cloned().collect(), - )); - self.cat_map.insert((fst, snd), id); - id - } - } - - fn gen_alt(&mut self, alt: Alt) -> Self::Id { - let iter_first = alt.get_type().first_set().clone().into_iter(); - - let (left, _punct, right) = alt.unwrap(); - let left_ty = left.get_type(); - let right_ty = right.get_type(); - - let left_null = left_ty.nullable(); - let left_first = left_ty.first_set().clone(); - - let right_null = right_ty.nullable(); - let right_first = right_ty.first_set().clone(); - - let left = left.gen(self); - let right = right.gen(self); - - if let Some(&id) = self.alt_map.get(&(left, right)) { - id - } else { - let id = self.data.len(); - let left_ty = self.data[left].0.clone(); - let right_ty = self.data[right].0.clone(); - let name = format_ident!("Alt{}", id); - let mut tokens = quote! { - #[derive(Clone, Debug, Eq, Hash, PartialEq)] - pub enum #name { - Left(#left_ty), - Right(#right_ty), - } - }; - - let other = if left_null { - let iter = right_first.into_iter(); - - quote! { - impl Parse for #name { - fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { - match input.peek() { - #(Some(#iter))|* => input.take().map(Self::Right), - _ => input.take().map(Self::Left), - } - } - } - } - } else if right_null { - let iter = left_first.into_iter(); - - quote! { - impl Parse for #name { - fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { - match input.peek() { - #(Some(#iter))|* => input.take().map(Self::Left), - _ => input.take().map(Self::Right), - } - } - } - } - } else { - let iter_left = left_first.into_iter(); - let iter_right = right_first.into_iter(); - - quote! { - impl Parse for #name { - fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { - match input.peek().ok_or_else(|| TakeError::EndOfStream(input.pos()))? { - #(#iter_left)|* => input.take().map(Self::Left), - #(#iter_right)|* => input.take().map(Self::Right), - c => Err(TakeError::BadBranch(input.pos(), c, &[#(#iter_first),*])) - } - } - } - } - }; - - tokens.extend(other); - self.data.push(( - TokenTree::from(name).into(), - tokens, - [left, right].iter().cloned().collect(), - )); - self.alt_map.insert((left, right), id); - id - } - } - - fn gen_fix(&mut self, fix: Fix) -> Self::Id { - let (_arg, inner, _span) = fix.unwrap(); - if let Some(&id) = self.fix_map.get(&inner) { - id - } else { - let id = self.data.len(); - let name = format_ident!("Fix{}", id); - self.data.push(( - TokenTree::from(name.clone()).into(), - TokenStream::new(), - BTreeSet::new(), - )); - - self.context.push(id); - let inner = inner.gen(self); - self.context.pop(); - - let inner_ty = self.data[inner].0.clone(); - let tokens = quote! { - #[derive(Clone, Debug, Eq, Hash, PartialEq)] - pub struct #name(#inner_ty); - - impl Parse for #name { - fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { - input.take().map(Self) - } - } - }; - self.data[id].1 = tokens; - self.data[id].2 = [inner].iter().cloned().collect(); - id - } - } - - fn gen_variable(&mut self, var: Variable) -> Self::Id { - let fix_id = self.context[self.context.len() - var.index() - 1]; - if let Some(&id) = self.var_map.get(&fix_id) { - id - } else { - let id = self.data.len(); - let fix_ty = self.data[fix_id].0.clone(); - let name = quote! {Box<#fix_ty>}; - self.data.push((name, TokenStream::new(), BTreeSet::new())); - self.var_map.insert(fix_id, id); - id - } - } - - fn emit_code(self, id: Self::Id) -> Self::Code { - let mut tokens = quote! { - use ::chewed::*; - }; - - let (root_ty, root_impl, mut todo) = self.data[id].clone(); - tokens.extend(root_impl); - let mut completed = [id].iter().cloned().collect::<BTreeSet<_>>(); - - while !todo.is_empty() { - let mut next = BTreeSet::new(); - completed.extend(todo.clone()); - - for ty in todo { - let ty = self.data[ty].clone(); - tokens.extend(ty.1); - next.extend(ty.2.into_iter().filter(|id| !completed.contains(id))); - } - - todo = next; - } - - tokens.extend(quote! { - pub type Ast = #root_ty; - }); - - tokens - } -} diff --git a/src/main.rs b/src/main.rs index a255590..100bb68 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,15 +5,7 @@ use std::{ process::exit, }; -use chomp::{ - chomp::{ - check::{InlineCalls, TypeCheck}, - context::Context, - visit::Visitable, - }, - lower::{rust::RustBackend, Backend, GenerateCode}, - nibble::cst::File, -}; +use chomp::{chomp::{ast::substitute::InlineCalls, typed::{TypeInfer, context::Context, lower::{Backend, GenerateCode}}, visit::Visitable}, lower::RustBackend, nibble::cst::File}; #[derive(Debug)] struct UndecVar; @@ -31,17 +23,17 @@ fn main() { .read_to_string(&mut input) .map_err(|e| Box::new(e) as Box<dyn Error>) .and_then(|_| syn::parse_str(&input).map_err(|e| Box::new(e) as Box<dyn Error>)) - .and_then(|nibble: File| nibble.convert().ok_or(Box::new(UndecVar) as Box<dyn Error>)) + .and_then(|nibble: File| nibble.convert().map_err(|e| Box::new(e) as Box<dyn Error>)) .and_then(|(funs, goal)| { funs.into_iter() .try_rfold(goal, |goal, function| { - goal.fold(&mut InlineCalls::new(function)) + goal.fold(&mut InlineCalls { function }) }) .map_err(|e| Box::new(e) as Box<dyn Error>) }) .and_then(|term| { let mut context = Context::default(); - term.fold(&mut TypeCheck { + term.fold(&mut TypeInfer { context: &mut context, }) .map_err(|e| Box::new(e) as Box<dyn Error>) @@ -49,7 +41,7 @@ fn main() { .map(|typed| { let mut backend = RustBackend::default(); let id = typed.gen(&mut backend); - backend.emit_code(id) + backend.emit_code(None, None, id) }) .and_then(|code| { write!(io::stdout(), "{:#}", code).map_err(|e| Box::new(e) as Box<dyn Error>) diff --git a/src/nibble/convert.rs b/src/nibble/convert.rs index 422d100..5cbf5e2 100644 --- a/src/nibble/convert.rs +++ b/src/nibble/convert.rs @@ -1,10 +1,10 @@ -use std::collections::HashMap; +use std::{collections::HashMap, fmt}; use syn::punctuated::Pair; -use crate::chomp::ast; +use crate::chomp::ast::{self, NamedExpression}; -use super::cst::{Alt, Call, Cat, Fix, Ident, ParenExpression, Term}; +use super::cst::{Alt, Call, Cat, Fix, Ident, Labelled, ParenExpression, Term}; #[derive(Clone, Copy, Debug)] pub enum Binding { @@ -13,7 +13,7 @@ pub enum Binding { Global, } -#[derive(Debug)] +#[derive(Debug, Default)] pub struct Context { names: HashMap<String, Binding>, vars: usize, @@ -62,55 +62,132 @@ impl Context { } } +#[derive(Clone, Debug)] +pub enum ConvertError { + UndeclaredName(Ident), +} + +impl From<ConvertError> for syn::Error { + fn from(e: ConvertError) -> Self { + match e { + ConvertError::UndeclaredName(ident) => { + Self::new(ident.span(), "undeclared name") + } + } + } +} + +impl fmt::Display for ConvertError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::UndeclaredName(i) => { + let start = i.span().start(); + write!( + f, + "{}:{}: undeclared name `{}'", + start.line, start.column, i + ) + } + } + } +} + +impl std::error::Error for ConvertError {} + pub trait Convert { - fn convert(self, context: &mut Context) -> Option<ast::Expression>; + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError>; } impl Convert for Ident { - fn convert(self, context: &mut Context) -> Option<ast::Expression> { - let span = self.span(); + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError> { + let span = Some(self.span()); + let binding = match context.lookup(&self) { + Some(b) => b, + None => return Err(ConvertError::UndeclaredName(self)), + }; + let name = self.into(); - match context.lookup(&self)? { - Binding::Variable(index) => Some(ast::Variable::new(self.into(), index).into()), - Binding::Parameter(index) => Some(ast::Parameter::new(self.into(), index).into()), - Binding::Global => Some(ast::Call::new(self.into(), Vec::new(), Some(span)).into()), - } + Ok(match binding { + Binding::Variable(index) => NamedExpression { + name: Some(name), + expr: ast::Variable { index }.into(), + span, + }, + Binding::Parameter(index) => NamedExpression { + name: Some(name), + expr: ast::Parameter { index }.into(), + span, + }, + Binding::Global => NamedExpression { + name: None, + expr: ast::Call { + name, + args: Vec::new(), + } + .into(), + span, + }, + }) } } impl Convert for Call { - fn convert(self, context: &mut Context) -> Option<ast::Expression> { + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError> { let span = self.span(); let args = self .args .into_iter() .map(|arg| arg.convert(context)) - .collect::<Option<_>>()?; - Some(ast::Call::new(self.name.into(), args, span).into()) + .collect::<Result<_, _>>()?; + Ok(NamedExpression { + name: None, + expr: ast::Call { + name: self.name.into(), + args, + } + .into(), + span, + }) } } impl Convert for Fix { - fn convert(self, context: &mut Context) -> Option<ast::Expression> { + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError> { let span = self.span(); let expr = self.expr; let inner = context.with_variable(&self.arg, |context| expr.convert(context))?; - Some(ast::Fix::new(self.arg.into(), inner, span).into()) + Ok(NamedExpression { + name: None, + expr: ast::Fix { + arg: Some(self.arg.into()), + inner: Box::new(inner), + } + .into(), + span, + }) } } impl Convert for ParenExpression { - fn convert(self, context: &mut Context) -> Option<ast::Expression> { + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError> { self.expr.convert(context) } } impl Convert for Term { - fn convert(self, context: &mut Context) -> Option<ast::Expression> { + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError> { match self { - Self::Epsilon(e) => Some(Some(e).into()), + Self::Epsilon(e) => Ok(NamedExpression { + name: None, + expr: ast::Epsilon.into(), + span: Some(e.span), + }), Self::Ident(i) => i.convert(context), - Self::Literal(l) => Some(ast::Literal::from(l).into()), + Self::Literal(l) => Ok(NamedExpression { + name: None, + expr: l.value().into(), + span: Some(l.span()), + }), Self::Call(c) => c.convert(context), Self::Fix(f) => f.convert(context), Self::Parens(p) => p.convert(context), @@ -119,47 +196,111 @@ impl Convert for Term { } impl Convert for Cat { - fn convert(self, context: &mut Context) -> Option<ast::Expression> { + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError> { let mut iter = self.0.into_pairs(); - let mut out = match iter.next().unwrap() { + + let (first, punct) = match iter.next().unwrap() { Pair::Punctuated(t, p) => (t.convert(context)?, Some(p)), Pair::End(t) => (t.convert(context)?, None), }; - for pair in iter { - let (fst, punct) = out; - out = match pair { - Pair::Punctuated(t, p) => ( - ast::Cat::new(fst, punct, t.convert(context)?).into(), - Some(p), - ), - Pair::End(t) => (ast::Cat::new(fst, punct, t.convert(context)?).into(), None), - }; + let mut rest = Vec::new(); + let (span, _) = iter.try_fold( + ( + first.span.and_then(|s| punct.and_then(|p| s.join(p.span))), + punct, + ), + |(span, punct), pair| { + let (snd, p) = match pair { + Pair::Punctuated(t, p) => (t.convert(context)?, Some(p)), + Pair::End(t) => (t.convert(context)?, None), + }; + + let span = span + .and_then(|s| snd.span.and_then(|t| s.join(t))) + .and_then(|s| p.and_then(|p| s.join(p.span))); + rest.push((punct, snd)); + Ok((span, p)) + }, + )?; + + let mut iter = rest.into_iter(); + if let Some((punct, second)) = iter.next() { + Ok(NamedExpression { + name: None, + expr: ast::Cat { + first: Box::new(first), + punct, + second: Box::new(second), + rest: iter.collect(), + } + .into(), + span, + }) + } else { + Ok(first) } + } +} - Some(out.0) +impl Convert for Labelled { + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError> { + let span = self.span(); + let named = self.cat.convert(context)?; + let name = self.label.map(|l| l.label.into()).or(named.name); + + Ok(NamedExpression { + name, + expr: named.expr, + span, + }) } } impl Convert for Alt { - fn convert(self, context: &mut Context) -> Option<ast::Expression> { + fn convert(self, context: &mut Context) -> Result<NamedExpression, ConvertError> { let mut iter = self.0.into_pairs(); - let mut out = match iter.next().unwrap() { + + let (first, punct) = match iter.next().unwrap() { Pair::Punctuated(t, p) => (t.convert(context)?, Some(p)), Pair::End(t) => (t.convert(context)?, None), }; - for pair in iter { - let (fst, punct) = out; - out = match pair { - Pair::Punctuated(t, p) => ( - ast::Alt::new(fst, punct, t.convert(context)?).into(), - Some(p), - ), - Pair::End(t) => (ast::Alt::new(fst, punct, t.convert(context)?).into(), None), - }; - } + let mut rest = Vec::new(); + let (span, _) = iter.try_fold( + ( + first.span.and_then(|s| punct.and_then(|p| s.join(p.span))), + punct, + ), + |(span, punct), pair| { + let (snd, p) = match pair { + Pair::Punctuated(t, p) => (t.convert(context)?, Some(p)), + Pair::End(t) => (t.convert(context)?, None), + }; - Some(out.0) + let span = span + .and_then(|s| snd.span.and_then(|t| s.join(t))) + .and_then(|s| p.and_then(|p| s.join(p.span))); + rest.push((punct, snd)); + Ok((span, p)) + }, + )?; + + let mut iter = rest.into_iter(); + if let Some((punct, second)) = iter.next() { + Ok(NamedExpression { + name: None, + expr: ast::Alt { + first: Box::new(first), + punct, + second: Box::new(second), + rest: iter.collect(), + } + .into(), + span, + }) + } else { + Ok(first) + } } } diff --git a/src/nibble/cst.rs b/src/nibble/cst.rs index 2b52678..f6fa51b 100644 --- a/src/nibble/cst.rs +++ b/src/nibble/cst.rs @@ -4,14 +4,14 @@ use syn::{ ext::IdentExt, parenthesized, parse::{Parse, ParseStream}, - punctuated::Punctuated, + punctuated::{Pair, Punctuated}, token::{Bracket, Comma, Let, Match, Paren}, LitStr, Token, }; -use crate::chomp::ast; +use crate::chomp::{Name, ast}; -use super::convert::{Context, Convert}; +use super::convert::{Context, Convert, ConvertError}; pub type Epsilon = Token![_]; @@ -134,6 +134,19 @@ pub enum Term { Parens(ParenExpression), } +impl Term { + pub fn span(&self) -> Option<Span> { + match self { + Self::Epsilon(e) => Some(e.span), + Self::Ident(i) => Some(i.span()), + Self::Literal(l) => Some(l.span()), + Self::Call(c) => c.span(), + Self::Fix(f) => f.span(), + Self::Parens(p) => Some(p.paren_token.span), + } + } +} + impl Parse for Term { fn parse(input: ParseStream<'_>) -> syn::Result<Self> { let lookahead = input.lookahead1(); @@ -169,8 +182,74 @@ impl Parse for Cat { } } +impl Cat { + pub fn span(&self) -> Option<Span> { + let mut iter = self.0.pairs(); + let span = match iter.next()? { + Pair::Punctuated(t, p) => t.span().and_then(|s| s.join(p.span)), + Pair::End(t) => t.span(), + }?; + + iter.try_fold(span, |span, pair| match pair { + Pair::Punctuated(t, p) => t + .span() + .and_then(|s| span.join(s)) + .and_then(|s| s.join(p.span)), + Pair::End(t) => t.span().and_then(|s| span.join(s)), + }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Label { + colon_tok: Token![:], + pub label: Ident, +} + +impl Label { + pub fn span(&self) -> Option<Span> { + self.colon_tok.span.join(self.label.span()) + } +} + +impl Parse for Label { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let colon_tok = input.parse()?; + let label = input.call(Ident::parse_any)?; + Ok(Self { colon_tok, label }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Labelled { + pub cat: Cat, + pub label: Option<Label>, +} + +impl Labelled { + pub fn span(&self) -> Option<Span> { + self.cat.span().and_then(|s| { + self.label + .as_ref() + .and_then(|l| l.span().and_then(|t| s.join(t))) + }) + } +} + +impl Parse for Labelled { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let cat = input.parse()?; + let label = if input.peek(Token![:]) { + Some(input.parse()?) + } else { + None + }; + Ok(Self { cat, label }) + } +} + #[derive(Clone, Debug, Eq, PartialEq)] -pub struct Alt(pub Punctuated<Cat, Token![|]>); +pub struct Alt(pub Punctuated<Labelled, Token![|]>); impl Parse for Alt { fn parse(input: ParseStream<'_>) -> syn::Result<Self> { @@ -248,28 +327,28 @@ pub struct File { } impl File { - pub fn convert(self) -> Option<(Vec<ast::Function>, ast::Expression)> { + pub fn convert(self) -> Result<(Vec<ast::Function>, ast::NamedExpression), ConvertError> { let mut names = Vec::new(); let mut map = Vec::new(); for stmt in self.lets { - let count = stmt.args.as_ref().map(ArgList::len).unwrap_or_default(); let span = stmt.span(); - let mut context = Context::new( - &names, - stmt.args.into_iter().flat_map(|args| args.into_iter()), - ); + let params = stmt.args.into_iter().flat_map(|args| args.into_iter()); + let mut context = Context::new(&names, params.clone()); names.push(stmt.name.clone()); - map.push(ast::Function::new( - stmt.name.into(), - count, - stmt.expr.convert(&mut context)?, + let mut expr = stmt.expr.convert(&mut context)?; + let name: Name = stmt.name.into(); + expr.name = Some(name.clone()); + map.push(ast::Function { + name, + params: params.map(|name| Some(name.into())).collect(), + expr, span, - )); + }); } let mut context = Context::new(&names, Vec::new()); let goal = self.goal.expr.convert(&mut context)?; - Some((map, goal)) + Ok((map, goal)) } } |