diff options
author | Greg Brown <gmb60@cam.ac.uk> | 2021-01-06 14:56:11 +0000 |
---|---|---|
committer | Greg Brown <gmb60@cam.ac.uk> | 2021-01-06 14:56:11 +0000 |
commit | dc10a278cca74d737e4af0fe034a1caa8abb291d (patch) | |
tree | 3fbb9efc0bdc9fd3bc24d2ad743585071b7006a4 | |
parent | 4fb6b740e79c1942fd0bfde9b167ea273c7d0b4b (diff) |
Restructure code base to separate compilation phases.
-rw-r--r-- | src/ast/convert.rs | 153 | ||||
-rw-r--r-- | src/ast/mod.rs | 1647 | ||||
-rw-r--r-- | src/ast/typed.rs | 284 | ||||
-rw-r--r-- | src/chomp/' | 475 | ||||
-rw-r--r-- | src/chomp/ast.rs | 345 | ||||
-rw-r--r-- | src/chomp/check.rs | 481 | ||||
-rw-r--r-- | src/chomp/context.rs | 51 | ||||
-rw-r--r-- | src/chomp/error.rs | 389 | ||||
-rw-r--r-- | src/chomp/mod.rs | 7 | ||||
-rw-r--r-- | src/chomp/set.rs | 91 | ||||
-rw-r--r-- | src/chomp/typed.rs | 321 | ||||
-rw-r--r-- | src/chomp/visit.rs | 136 | ||||
-rw-r--r-- | src/lib.rs | 3 | ||||
-rw-r--r-- | src/lower/mod.rs | 58 | ||||
-rw-r--r-- | src/lower/rust.rs | 315 | ||||
-rw-r--r-- | src/main.rs | 48 | ||||
-rw-r--r-- | src/nibble/convert.rs | 165 | ||||
-rw-r--r-- | src/nibble/cst.rs | 294 | ||||
-rw-r--r-- | src/nibble/mod.rs | 539 |
19 files changed, 3215 insertions, 2587 deletions
diff --git a/src/ast/convert.rs b/src/ast/convert.rs deleted file mode 100644 index 8051c90..0000000 --- a/src/ast/convert.rs +++ /dev/null @@ -1,153 +0,0 @@ -use std::borrow::Borrow; -use std::collections::HashMap; -use std::hash::Hash; - -use super::Term; - -#[derive(Debug, Default)] -/// The variable context for converting a concrete syntax tree to an abstract -/// one. -/// -/// There are two name scopes in a context. Bindings are used by fixed points. -/// Variables are formal parameters of macros. -pub struct Context { - bindings: Vec<String>, - variables: HashMap<String, usize>, -} - -impl Context { - /// Creates a new variable context. No names are bound. - pub fn new() -> Self { - Self::default() - } - - /// Returns [`Some`] index of a binding `name`. - /// - /// # Errors - /// Returns [`None`] if `name.is_empty()` or if `name` is unbound. - /// - /// # Examples - /// ``` - /// use chomp::ast::convert::Context; - /// - /// let mut context = Context::new(); - /// assert_eq!(context.get_binding("x"), None); - /// - /// context.with_binding("x".to_owned(), |c| { - /// assert_eq!(c.get_binding("x"), Some(0)); - /// - /// c.with_binding("y".to_owned(), |c| { - /// assert_eq!(c.get_binding("x"), Some(1)); - /// }) - /// }); - /// - /// assert_eq!(context.get_binding("x"), None); - /// ``` - pub fn get_binding<T: ?Sized + PartialEq<str>>(&self, name: &T) -> Option<usize> { - let mut iter = self.bindings.iter(); - let mut pos = 0; - - if name == "" { - return None; - } - - while let Some(var) = iter.next_back() { - if T::eq(&name, &var) { - return Some(pos); - } else { - pos += 1; - } - } - - None - } - - /// Adds a binding `name` for the duration of the function `f`. - /// - /// # Panics - /// If `name.is_empty()`. - /// - /// # Examples - /// ``` - /// use chomp::ast::convert::Context; - /// - /// let mut context = Context::new(); - /// assert_eq!(context.get_binding("x"), None); - /// - /// context.with_binding("x".to_owned(), |c| { - /// assert_eq!(c.get_binding("x"), Some(0)); - /// }); - /// - /// assert_eq!(context.get_binding("x"), None); - /// ``` - pub fn with_binding<F: FnOnce(&mut Self) -> T, T>(&mut self, name: String, f: F) -> T { - if name.is_empty() { - panic!() - } - - self.bindings.push(name); - let res = f(self); - self.bindings.pop(); - res - } - - /// Returns [`Some`] index of a variable `name`. - /// - /// # Errors - /// If `name == "".to_owned().borrow()` or `name` is unbound. - /// - /// # Examples - /// ``` - /// use chomp::ast::convert::Context; - /// - /// let mut ctx = Context::new(); - /// assert_eq!(ctx.get_variable("x"), None); - /// - /// ctx.set_variables(vec!["x".to_owned(), "y".to_owned()]); - /// assert_eq!(ctx.get_variable("x"), Some(0)); - /// assert_eq!(ctx.get_variable("y"), Some(1)); - /// - /// ctx.set_variables(vec![]); - /// assert_eq!(ctx.get_variable("x"), None); - /// ``` - pub fn get_variable<T: ?Sized + Hash + Eq>(&self, name: &T) -> Option<usize> - where - String: Borrow<T>, - { - if name == "".to_owned().borrow() { - return None; - } - - self.variables.get(name).copied() - } - - /// Reassigns all variable indices based off of their position in `args`. - /// - /// # Examples - /// ``` - /// use chomp::ast::convert::Context; - /// - /// let mut ctx = Context::new(); - /// assert_eq!(ctx.get_variable("x"), None); - /// - /// ctx.set_variables(vec!["x".to_owned(), "y".to_owned()]); - /// assert_eq!(ctx.get_variable("x"), Some(0)); - /// assert_eq!(ctx.get_variable("y"), Some(1)); - /// - /// ctx.set_variables(vec![]); - /// assert_eq!(ctx.get_variable("x"), None); - /// ``` - pub fn set_variables<I: IntoIterator<Item = String>>(&mut self, args: I) { - self.variables = args - .into_iter() - .enumerate() - .map(|(index, name)| (name, index)) - .collect(); - } -} - -/// Used to convert a concrete term into an abstract term. -pub trait Convert { - /// Performs the conversion. - fn convert(&self, context: &mut Context) -> Term; -} diff --git a/src/ast/mod.rs b/src/ast/mod.rs deleted file mode 100644 index b4baa9d..0000000 --- a/src/ast/mod.rs +++ /dev/null @@ -1,1647 +0,0 @@ -use self::typed::{FirstContext, FlastContext, NullContext}; -use proc_macro2::{Span, TokenStream, TokenTree}; -use quote::{format_ident, quote}; -use std::collections::{BTreeSet, HashMap}; -use std::error::Error; -use std::fmt::{self, Display}; -use syn::{Ident, LitStr, Token}; - -pub mod convert; -pub mod typed; - -/// A literal epsilon. -pub type Epsilon = Token![_]; - -/// A literal string. -pub type Literal = LitStr; - -/// The concatenation of two terms. -#[derive(Clone, Debug)] -pub struct Cat { - fst: Box<Term>, - punct: Token![.], - snd: Box<Term>, -} - -impl Cat { - /// Creates a new `Cat` from the two sub-terms and the joining punctuation. - pub fn new(fst: Term, punct: Token![.], snd: Term) -> Self { - Self { - fst: Box::new(fst), - punct, - snd: Box::new(snd), - } - } -} - -/// A type error when concatenating two terms. -#[derive(Debug)] -pub enum CatError { - /// The first term was unexpectedly nullable. - FirstNullable { - /// The punctuation of the concatenation. - punct: Span, - /// The span of the first term. - fst_span: Span, - /// The first term. - fst: Typed, - }, - /// The flast set of the first term intersects the first set of the second. - FirstFlastOverlap { - /// The punctuation of the concatenation. - punct: Span, - /// The span of the first term. - fst_span: Span, - /// The first term. - fst: Typed, - /// The span of the second term. - snd_span: Span, - /// The second term. - snd: Typed, - }, -} - -impl From<CatError> for syn::Error { - fn from(other: CatError) -> Self { - match other { - CatError::FirstNullable { - punct, fst_span, .. - } => { - let mut err = Self::new( - punct, - "first item in sequence cannot accept the empty string", - ); - err.combine(Self::new(fst_span, "this can accept empty string")); - err - } - CatError::FirstFlastOverlap { - punct, - fst_span, - snd_span, - .. - } => { - let mut err = Self::new( - punct, - "cannot decide whether to repeat first or start accepting second", - ); - err.combine(Self::new(fst_span, "a repetition of this")); - err.combine(Self::new(snd_span, "collides with the start of this")); - err - } - } - } -} - -impl Display for CatError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::FirstNullable { - punct, - fst_span, - fst, - .. - } => { - let start = punct.start(); - let fst_start = fst_span.start(); - write!( - f, - "{}:{}: term '{}' ({}:{}) accepts the empty string", - start.line, start.column, fst, fst_start.line, fst_start.column - ) - } - Self::FirstFlastOverlap { - punct, - fst, - fst_span, - snd, - snd_span, - .. - } => { - let start = punct.start(); - let fst_start = fst_span.start(); - let snd_start = snd_span.start(); - write!( - f, - "{}:{}: cannot decide whether to repeat '{}' ({}:{}) or start accepting '{}' ({}:{}).", - start.line, start.column, fst, fst_start.line, fst_start.column, snd, snd_start.line, snd_start.column - ) - } - } - } -} - -impl Error for CatError {} - -/// An alternation of two terms. -#[derive(Clone, Debug)] -pub struct Alt { - left: Box<Term>, - punct: Token![|], - right: Box<Term>, -} - -impl Alt { - /// Creates a new `Alt` from the two terms and the joining punctuation. - pub fn new(left: Term, punct: Token![|], right: Term) -> Self { - Self { - left: Box::new(left), - punct, - right: Box::new(right), - } - } -} - -/// A type error when alternating two terms. -#[derive(Debug)] -pub enum AltError { - /// Both terms are nullable. - BothNullable { - /// The punctuation of the alternation. - punct: Span, - /// The span of the left term. - left_span: Span, - /// The left term. - left: Typed, - /// The span of the right term. - right_span: Span, - /// The right term. - right: Typed, - }, - /// The first sets of the two terms intersect. - FirstOverlap { - /// The punctuation of the alternation. - punct: Span, - /// The span of the left term. - left_span: Span, - /// The left term. - left: Typed, - /// The span of the right term. - right_span: Span, - /// The right term. - right: Typed, - }, -} - -impl From<AltError> for syn::Error { - fn from(other: AltError) -> Self { - match other { - AltError::BothNullable { - punct, - left_span, - right_span, - .. - } => { - let mut err = Self::new(punct, "both branches accept the empty string"); - err.combine(Self::new(left_span, "left branch accepts the empty string")); - err.combine(Self::new( - right_span, - "right branch accepts the empty string", - )); - err - } - AltError::FirstOverlap { - punct, - left_span, - right_span, - .. - } => { - let mut err = Self::new( - punct, - "cannot decide whether to accept the left or right branch", - ); - err.combine(Self::new(left_span, "left branch starts with a character")); - err.combine(Self::new( - right_span, - "right branch starts with the same character", - )); - err - } - } - } -} - -impl Display for AltError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::BothNullable { - punct, - left_span, - left, - right_span, - right, - .. - } => { - let start = punct.start(); - let left_start = left_span.start(); - let right_start = right_span.start(); - write!( - f, - "{}:{}: both '{}' ({}:{}) and '{}' ({}:{}) accept the empty string.", - start.line, - start.column, - left, - left_start.line, - left_start.column, - right, - right_start.line, - right_start.column, - ) - } - Self::FirstOverlap { - punct, - left_span, - left, - right_span, - right, - .. - } => { - let start = punct.start(); - let left_start = left_span.start(); - let right_start = right_span.start(); - write!( - f, - "{}:{}: cannot decide whether to accept '{}' ({}:{}) or '{}' ({}:{}).", - start.line, - start.column, - left, - left_start.line, - left_start.column, - right, - right_start.line, - right_start.column, - ) - } - } - } -} - -impl Error for AltError {} - -/// The least fixed point of a term. -#[derive(Clone, Debug)] -pub struct Fix { - span: Span, - arg: Ident, - inner: Box<Term>, -} - -impl Fix { - /// Creates a new `Fix` from the argument name, inner term and overall span. - pub fn new(arg: Ident, inner: Term, span: Span) -> Self { - Self { - arg, - inner: Box::new(inner), - span, - } - } - - /// Iteratively computes the least fixed point of a function from an initial - /// state. - pub fn fixpoint<F: FnMut(&Self, &T) -> Result<T, E>, T: Default + PartialEq, E>( - &self, - mut step: F, - ) -> Result<T, E> { - let mut res = T::default(); - let mut last = None; - while last.map(|r| r != res).unwrap_or(true) { - last = Some(res); - res = step(self, last.as_ref().unwrap())?; - } - - Ok(res) - } -} - -/// A variable usage -#[derive(Clone, Debug)] -pub struct Variable { - name: Ident, - index: usize, -} - -impl Variable { - /// Creates a new `Variable` from the name and index. - pub fn new(name: Ident, index: usize) -> Self { - Self { name, index } - } -} - -/// A type error when using a fix point variable. -#[derive(Debug)] -pub enum BindingError { - /// Usage of a free binding variable. - FreeBinding(Variable), - /// Usage of a guarded binding variable. - GuardedBinding(Variable), -} - -impl From<BindingError> for syn::Error { - fn from(other: BindingError) -> Self { - match other { - BindingError::FreeBinding(_) => todo!(), - BindingError::GuardedBinding(_) => todo!(), - } - } -} - -impl Display for BindingError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::FreeBinding(var) => { - let start = var.name.span().start(); - write!( - f, - "{}:{}: unbound binding '{}'", - start.line, start.column, var.name - ) - } - Self::GuardedBinding(var) => { - let start = var.name.span().start(); - write!( - f, - "{}:{}: binding '{}' is guarded", - start.line, start.column, var.name - ) - } - } - } -} - -impl Error for BindingError {} - -/// A macro invocation. -#[derive(Clone, Debug)] -pub struct Call { - span: Span, - name: Ident, - args: Vec<Term>, -} - -impl Call { - /// Create a new `Call` from the macro name, actual parameters and overall span. - pub fn new(name: Ident, args: Vec<Term>, span: Span) -> Self { - Self { name, args, span } - } -} - -/// An abstract Nibble term. -#[derive(Clone, Debug)] -pub enum Term { - /// Matches the empty string. - Epsilon(Epsilon), - /// Matches the literal string. - Literal(Literal), - /// Matches one term followed by another. - Cat(Cat), - /// Matches either one term or another. - Alt(Alt), - /// The least fix point of a term. - Fix(Fix), - /// A fixed point variable. - Binding(Variable), - /// A formal parameter. - Variable(Variable), - /// A macro invocation. - Call(Call), -} - -impl Term { - /// Dispatches a [`Visitor`] to the underlying term. - pub fn visit<V: Visitor + ?Sized>(&self, visitor: &mut V) -> <V as Visitor>::Out { - match self { - Self::Epsilon(eps) => visitor.visit_epsilon(eps), - Self::Literal(lit) => visitor.visit_literal(lit), - Self::Cat(cat) => visitor.visit_cat(cat), - Self::Alt(alt) => visitor.visit_alt(alt), - Self::Fix(fix) => visitor.visit_fix(fix), - Self::Binding(bind) => visitor.visit_binding(bind), - Self::Variable(var) => visitor.visit_variable(var), - Self::Call(call) => visitor.visit_call(call), - } - } - - /// Dispatches a [`VisitorMut`] to the underlying term. - pub fn visit_mut<V: VisitorMut>(&mut self, visitor: &mut V) -> <V as VisitorMut>::Out { - match self { - Self::Epsilon(eps) => visitor.visit_mut_epsilon(eps), - Self::Literal(lit) => visitor.visit_mut_literal(lit), - Self::Cat(cat) => visitor.visit_mut_cat(cat), - Self::Alt(alt) => visitor.visit_mut_alt(alt), - Self::Fix(fix) => visitor.visit_mut_fix(fix), - Self::Binding(bind) => visitor.visit_mut_binding(bind), - Self::Variable(var) => visitor.visit_mut_variable(var), - Self::Call(call) => visitor.visit_mut_call(call), - } - } - - /// Dispatches a [`Folder`] to the underlying term. - pub fn fold<F: Folder>(self, folder: &mut F) -> <F as Folder>::Out { - match self { - Self::Epsilon(eps) => folder.fold_epsilon(eps), - Self::Literal(lit) => folder.fold_literal(lit), - Self::Cat(cat) => folder.fold_cat(cat), - Self::Alt(alt) => folder.fold_alt(alt), - Self::Fix(fix) => folder.fold_fix(fix), - Self::Binding(bind) => folder.fold_binding(bind), - Self::Variable(var) => folder.fold_variable(var), - Self::Call(call) => folder.fold_call(call), - } - } -} - -#[derive(Debug)] -/// An error when checking whether a term is well typed. -pub enum TermError { - /// An error with a [`Cat`]. - Cat(CatError), - /// An error with an [`Alt`]. - Alt(AltError), - /// An error with a binding [`Variable`]. - Binding(BindingError), -} - -impl From<CatError> for TermError { - fn from(other: CatError) -> Self { - Self::Cat(other) - } -} - -impl From<AltError> for TermError { - fn from(other: AltError) -> Self { - Self::Alt(other) - } -} - -impl From<BindingError> for TermError { - fn from(other: BindingError) -> Self { - Self::Binding(other) - } -} - -impl From<TermError> for syn::Error { - fn from(other: TermError) -> Self { - match other { - TermError::Cat(e) => e.into(), - TermError::Alt(e) => e.into(), - TermError::Binding(e) => e.into(), - } - } -} - -impl Display for TermError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Cat(e) => e.fmt(f), - Self::Alt(e) => e.fmt(f), - Self::Binding(e) => e.fmt(f), - } - } -} - -impl Error for TermError {} - -/// Immutably visit parts of a [`Term`]. -pub trait Visitor { - /// The result of visiting terms. - type Out; - fn visit_epsilon(&mut self, eps: &Epsilon) -> Self::Out; - - fn visit_literal(&mut self, lit: &Literal) -> Self::Out; - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out; - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out; - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out; - - fn visit_binding(&mut self, bind: &Variable) -> Self::Out; - - fn visit_variable(&mut self, var: &Variable) -> Self::Out; - - fn visit_call(&mut self, call: &Call) -> Self::Out; -} - -pub trait VisitorMut { - type Out; - fn visit_mut_epsilon(&mut self, eps: &mut Epsilon) -> Self::Out; - fn visit_mut_literal(&mut self, lit: &mut Literal) -> Self::Out; - fn visit_mut_cat(&mut self, cat: &mut Cat) -> Self::Out; - fn visit_mut_alt(&mut self, alt: &mut Alt) -> Self::Out; - fn visit_mut_fix(&mut self, fix: &mut Fix) -> Self::Out; - fn visit_mut_binding(&mut self, bind: &mut Variable) -> Self::Out; - fn visit_mut_variable(&mut self, var: &mut Variable) -> Self::Out; - fn visit_mut_call(&mut self, call: &mut Call) -> Self::Out; -} - -pub trait Folder { - type Out; - fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out; - fn fold_literal(&mut self, lit: Literal) -> Self::Out; - fn fold_cat(&mut self, cat: Cat) -> Self::Out; - fn fold_alt(&mut self, alt: Alt) -> Self::Out; - fn fold_fix(&mut self, fix: Fix) -> Self::Out; - fn fold_binding(&mut self, bind: Variable) -> Self::Out; - fn fold_variable(&mut self, var: Variable) -> Self::Out; - fn fold_call(&mut self, call: Call) -> Self::Out; -} - -struct Closed(usize); - -impl Visitor for Closed { - type Out = bool; - - fn visit_epsilon(&mut self, _eps: &Epsilon) -> Self::Out { - true - } - - fn visit_literal(&mut self, _lit: &Literal) -> Self::Out { - true - } - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out { - cat.fst.visit(self) && cat.snd.visit(self) - } - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out { - alt.left.visit(self) && alt.right.visit(self) - } - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out { - self.0 += 1; - let res = fix.inner.visit(self); - self.0 -= 1; - res - } - - fn visit_binding(&mut self, bind: &Variable) -> Self::Out { - self.0 > bind.index - } - - fn visit_variable(&mut self, _var: &Variable) -> Self::Out { - true - } - - fn visit_call(&mut self, call: &Call) -> Self::Out { - call.args.iter().all(|arg| arg.visit(self)) - } -} - -struct Nullable<'a>(NullContext<'a>); - -impl Visitor for Nullable<'_> { - type Out = Result<bool, BindingError>; - - fn visit_epsilon(&mut self, _: &Epsilon) -> Self::Out { - Ok(true) - } - - fn visit_literal(&mut self, lit: &Literal) -> Self::Out { - Ok(lit.value().is_empty()) - } - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out { - if !cat.fst.visit(self)? { - return Ok(false); - } - - self.0.unguard(); - let res = cat.snd.visit(self)?; - self.0.guard(); - Ok(res) - } - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out { - if alt.left.visit(self)? { - Ok(true) - } else { - alt.right.visit(self) - } - } - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out { - fix.fixpoint(|fix, last| { - self.0.push_nullable(*last); - let res = fix.inner.visit(self)?; - self.0.pop_nullable(); - Ok(res) - }) - } - - fn visit_binding(&mut self, bind: &Variable) -> Self::Out { - match self.0.is_guarded(bind.index) { - None => Err(BindingError::FreeBinding(bind.clone())), - Some(true) => Err(BindingError::GuardedBinding(bind.clone())), - Some(false) => Ok(self.0.is_nullable(bind.index).unwrap()), - } - } - - fn visit_variable(&mut self, _var: &Variable) -> Self::Out { - todo!() - } - - fn visit_call(&mut self, _call: &Call) -> Self::Out { - todo!() - } -} - -struct FirstSet<'a>(FirstContext<'a>); - -impl Visitor for FirstSet<'_> { - type Out = Result<typed::FirstSet, BindingError>; - - fn visit_epsilon(&mut self, _: &Epsilon) -> Self::Out { - Ok(typed::FirstSet::new()) - } - - fn visit_literal(&mut self, lit: &Literal) -> Self::Out { - Ok(typed::FirstSet::of_str(&lit.value())) - } - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out { - let mut set = cat.fst.visit(self)?; - - if cat.fst.visit(&mut Nullable(self.0.as_null()))? { - self.0.unguard(); - set.union(cat.snd.visit(self)?); - self.0.guard(); - } - - Ok(set) - } - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out { - let mut set = alt.left.visit(self)?; - set.union(alt.right.visit(self)?); - Ok(set) - } - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out { - let nullable = Nullable(self.0.as_null()).visit_fix(fix)?; - fix.fixpoint(|fix, last: &typed::FirstSet| { - self.0.push_first_set(nullable, last.clone()); - let res = fix.inner.visit(self)?; - self.0.pop_first_set(); - Ok(res) - }) - } - - fn visit_binding(&mut self, bind: &Variable) -> Self::Out { - match self.0.is_guarded(bind.index) { - None => Err(BindingError::FreeBinding(bind.clone())), - Some(true) => Err(BindingError::GuardedBinding(bind.clone())), - Some(false) => Ok(self.0.first_set(bind.index).unwrap().clone()), - } - } - - fn visit_variable(&mut self, _var: &Variable) -> Self::Out { - todo!() - } - - fn visit_call(&mut self, _call: &Call) -> Self::Out { - todo!() - } -} - -struct FlastSet<'a>(&'a mut FlastContext); - -impl Visitor for FlastSet<'_> { - type Out = Result<typed::FlastSet, BindingError>; - - fn visit_epsilon(&mut self, _: &Epsilon) -> Self::Out { - Ok(typed::FlastSet::new()) - } - - fn visit_literal(&mut self, _: &Literal) -> Self::Out { - Ok(typed::FlastSet::new()) - } - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out { - self.0.unguard(); - let mut set = cat.snd.visit(self)?; - let nullable = cat.snd.visit(&mut Nullable(self.0.as_null()))?; - self.0.guard(); - - if nullable { - self.0.unguard(); - set.union_first(cat.snd.visit(&mut FirstSet(self.0.as_first()))?); - self.0.guard(); - set.union(cat.fst.visit(self)?); - } - - Ok(set) - } - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out { - let mut set = alt.left.visit(self)?; - set.union(alt.right.visit(self)?); - Ok(set) - } - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out { - let nullable = Nullable(self.0.as_null()).visit_fix(fix)?; - let first_set = FirstSet(self.0.as_first()).visit_fix(fix)?; - fix.fixpoint(|fix, last: &typed::FlastSet| { - self.0 - .push_flast_set(nullable, first_set.clone(), last.clone()); - let res = fix.inner.visit(self)?; - self.0.pop_flast_set(); - Ok(res) - }) - } - - fn visit_binding(&mut self, bind: &Variable) -> Self::Out { - match self.0.is_guarded(bind.index) { - None => Err(BindingError::FreeBinding(bind.clone())), - Some(true) => Err(BindingError::GuardedBinding(bind.clone())), - Some(false) => Ok(self.0.flast_set(bind.index).unwrap().clone()), - } - } - - fn visit_variable(&mut self, _var: &Variable) -> Self::Out { - todo!() - } - - fn visit_call(&mut self, _call: &Call) -> Self::Out { - todo!() - } -} - -struct Spanning; - -impl Visitor for Spanning { - type Out = Span; - - fn visit_epsilon(&mut self, eps: &Epsilon) -> Self::Out { - eps.span - } - - fn visit_literal(&mut self, lit: &Literal) -> Self::Out { - lit.span() - } - - fn visit_cat(&mut self, cat: &Cat) -> Self::Out { - let fst = cat.fst.visit(self); - let snd = cat.snd.visit(self); - fst.join(snd).unwrap_or_else(Span::call_site) - } - - fn visit_alt(&mut self, alt: &Alt) -> Self::Out { - let left = alt.left.visit(self); - let right = alt.right.visit(self); - left.join(right).unwrap_or_else(Span::call_site) - } - - fn visit_fix(&mut self, fix: &Fix) -> Self::Out { - fix.span - } - - fn visit_binding(&mut self, var: &Variable) -> Self::Out { - var.name.span() - } - - fn visit_variable(&mut self, var: &Variable) -> Self::Out { - var.name.span() - } - - fn visit_call(&mut self, call: &Call) -> Self::Out { - call.span - } -} - -#[derive(Debug, Default)] -pub struct TypeCheck(FlastContext); - -impl TypeCheck { - pub fn new() -> Self { - Self::default() - } -} - -impl Folder for TypeCheck { - type Out = Result<(Typed, Span), TermError>; - - fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { - let nullable = Nullable(self.0.as_null()).visit_epsilon(&eps)?; - let first_set = FirstSet(self.0.as_first()).visit_epsilon(&eps)?; - let flast_set = FlastSet(&mut self.0).visit_epsilon(&eps)?; - let span = Spanning.visit_epsilon(&eps); - let kind = TypeKind::Epsilon; - Ok(( - Typed { - kind, - nullable, - first_set, - flast_set, - }, - span, - )) - } - - fn fold_literal(&mut self, lit: Literal) -> Self::Out { - let nullable = Nullable(self.0.as_null()).visit_literal(&lit)?; - let first_set = FirstSet(self.0.as_first()).visit_literal(&lit)?; - let flast_set = FlastSet(&mut self.0).visit_literal(&lit)?; - let span = Spanning.visit_literal(&lit); - let kind = TypeKind::Literal(lit.value()); - Ok(( - Typed { - kind, - nullable, - first_set, - flast_set, - }, - span, - )) - } - - fn fold_cat(&mut self, cat: Cat) -> Self::Out { - let nullable = Nullable(self.0.as_null()).visit_cat(&cat)?; - let first_set = FirstSet(self.0.as_first()).visit_cat(&cat)?; - let flast_set = FlastSet(&mut self.0).visit_cat(&cat)?; - let span = Spanning.visit_cat(&cat); - - let (fst, fst_span) = cat.fst.fold(self)?; - - self.0.unguard(); - let (snd, snd_span) = cat.snd.fold(self)?; - self.0.guard(); - - if fst.is_nullable() { - Err(TermError::Cat(CatError::FirstNullable { - punct: cat.punct.span, - fst_span, - fst, - })) - } else if !fst.flast_set().intersect_first(&snd.first_set()).is_empty() { - Err(TermError::Cat(CatError::FirstFlastOverlap { - punct: cat.punct.span, - fst_span, - fst, - snd_span, - snd, - })) - } else { - let kind = TypeKind::Cat(Box::new(fst), Box::new(snd)); - Ok(( - Typed { - kind, - nullable, - first_set, - flast_set, - }, - span, - )) - } - } - - fn fold_alt(&mut self, alt: Alt) -> Self::Out { - let nullable = Nullable(self.0.as_null()).visit_alt(&alt)?; - let first_set = FirstSet(self.0.as_first()).visit_alt(&alt)?; - let flast_set = FlastSet(&mut self.0).visit_alt(&alt)?; - let span = Spanning.visit_alt(&alt); - - let (left, left_span) = alt.left.fold(self)?; - let (right, right_span) = alt.right.fold(self)?; - - if left.is_nullable() && right.is_nullable() { - Err(TermError::Alt(AltError::BothNullable { - punct: alt.punct.span, - left_span, - left, - right_span, - right, - })) - } else if !left.first_set().intersect(&right.first_set()).is_empty() { - Err(TermError::Alt(AltError::FirstOverlap { - punct: alt.punct.span, - left_span, - left, - right_span, - right, - })) - } else { - let kind = TypeKind::Alt(Box::new(left), Box::new(right)); - Ok(( - Typed { - kind, - nullable, - first_set, - flast_set, - }, - span, - )) - } - } - - fn fold_fix(&mut self, fix: Fix) -> Self::Out { - let nullable = Nullable(self.0.as_null()).visit_fix(&fix)?; - let first_set = FirstSet(self.0.as_first()).visit_fix(&fix)?; - let flast_set = FlastSet(&mut self.0).visit_fix(&fix)?; - let span = Spanning.visit_fix(&fix); - - self.0 - .push_flast_set(nullable, first_set.clone(), flast_set.clone()); - let (inner, _) = fix.inner.fold(self)?; - self.0.pop_flast_set(); - - let kind = TypeKind::Fix(Box::new(inner)); - - Ok(( - Typed { - kind, - nullable, - first_set, - flast_set, - }, - span, - )) - } - - fn fold_binding(&mut self, bind: Variable) -> Self::Out { - let nullable = Nullable(self.0.as_null()).visit_binding(&bind)?; - let first_set = FirstSet(self.0.as_first()).visit_binding(&bind)?; - let flast_set = FlastSet(&mut self.0).visit_binding(&bind)?; - let span = Spanning.visit_binding(&bind); - let kind = TypeKind::Binding(bind.index); - - Ok(( - Typed { - kind, - nullable, - first_set, - flast_set, - }, - span, - )) - } - - fn fold_variable(&mut self, _var: Variable) -> Self::Out { - todo!() - } - - fn fold_call(&mut self, _call: Call) -> Self::Out { - todo!() - } -} - -struct DeepenVars(usize); - -impl VisitorMut for DeepenVars { - type Out = (); - - fn visit_mut_epsilon(&mut self, _: &mut Epsilon) -> Self::Out {} - - fn visit_mut_literal(&mut self, _: &mut Literal) -> Self::Out {} - - fn visit_mut_cat(&mut self, cat: &mut Cat) -> Self::Out { - cat.fst.visit_mut(self); - cat.snd.visit_mut(self); - } - - fn visit_mut_alt(&mut self, alt: &mut Alt) -> Self::Out { - alt.left.visit_mut(self); - alt.right.visit_mut(self); - } - - fn visit_mut_fix(&mut self, fix: &mut Fix) -> Self::Out { - self.0 += 1; - fix.inner.visit_mut(self); - self.0 -= 1; - } - - fn visit_mut_binding(&mut self, bind: &mut Variable) -> Self::Out { - if bind.index >= self.0 { - bind.index += 1; - } - } - - fn visit_mut_variable(&mut self, _: &mut Variable) -> Self::Out {} - - fn visit_mut_call(&mut self, call: &mut Call) -> Self::Out { - for arg in &mut call.args { - arg.visit_mut(self); - } - } -} - -struct ShallowVars(usize); - -impl VisitorMut for ShallowVars { - type Out = (); - - fn visit_mut_epsilon(&mut self, _: &mut Epsilon) -> Self::Out {} - - fn visit_mut_literal(&mut self, _: &mut Literal) -> Self::Out {} - - fn visit_mut_cat(&mut self, cat: &mut Cat) -> Self::Out { - cat.fst.visit_mut(self); - cat.snd.visit_mut(self); - } - - fn visit_mut_alt(&mut self, alt: &mut Alt) -> Self::Out { - alt.left.visit_mut(self); - alt.right.visit_mut(self); - } - - fn visit_mut_fix(&mut self, fix: &mut Fix) -> Self::Out { - self.0 += 1; - fix.inner.visit_mut(self); - self.0 -= 1; - } - - fn visit_mut_binding(&mut self, bind: &mut Variable) -> Self::Out { - if bind.index > self.0 { - bind.index -= 1; - } - } - - fn visit_mut_variable(&mut self, _: &mut Variable) -> Self::Out { - todo!() - } - - fn visit_mut_call(&mut self, call: &mut Call) -> Self::Out { - for arg in &mut call.args { - arg.visit_mut(self); - } - } -} - -struct Substitute(Vec<Term>); - -impl Folder for Substitute { - type Out = Result<Term, SubstituteError>; - - fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { - Ok(Term::Epsilon(eps)) - } - - fn fold_literal(&mut self, lit: Literal) -> Self::Out { - Ok(Term::Literal(lit)) - } - - fn fold_cat(&mut self, cat: Cat) -> Self::Out { - let fst = cat.fst.fold(self)?; - let snd = cat.snd.fold(self)?; - Ok(Term::Cat(Cat { - fst: Box::new(fst), - punct: cat.punct, - snd: Box::new(snd), - })) - } - - fn fold_alt(&mut self, alt: Alt) -> Self::Out { - let left = alt.left.fold(self)?; - let right = alt.right.fold(self)?; - Ok(Term::Alt(Alt { - left: Box::new(left), - punct: alt.punct, - right: Box::new(right), - })) - } - - fn fold_fix(&mut self, fix: Fix) -> Self::Out { - for var in &mut self.0 { - var.visit_mut(&mut DeepenVars(0)); - } - let inner = fix.inner.fold(self)?; - for var in &mut self.0 { - var.visit_mut(&mut ShallowVars(0)); - } - Ok(Term::Fix(Fix { - span: fix.span, - arg: fix.arg, - inner: Box::new(inner), - })) - } - - fn fold_binding(&mut self, var: Variable) -> Self::Out { - Ok(Term::Binding(var)) - } - - fn fold_call(&mut self, call: Call) -> Self::Out { - let args = call - .args - .into_iter() - .map(|arg| arg.fold(self)) - .collect::<Result<_, _>>()?; - Ok(Term::Call(Call { - span: call.span, - name: call.name, - args, - })) - } - - fn fold_variable(&mut self, var: Variable) -> Self::Out { - self.0 - .get(var.index) - .cloned() - .ok_or(SubstituteError::FreeVariable(var)) - } -} - -#[derive(Clone, Debug)] -pub struct InlineCall { - name: String, - term: Term, - args: usize, -} - -impl InlineCall { - pub fn new(name: Ident, term: Term, args: usize) -> Self { - Self { - name: name.to_string(), - term, - args, - } - } -} - -impl Folder for InlineCall { - type Out = Result<Term, SubstituteError>; - - fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { - Ok(Term::Epsilon(eps)) - } - - fn fold_literal(&mut self, lit: Literal) -> Self::Out { - Ok(Term::Literal(lit)) - } - - fn fold_cat(&mut self, cat: Cat) -> Self::Out { - let fst = cat.fst.fold(self)?; - let snd = cat.snd.fold(self)?; - Ok(Term::Cat(Cat { - fst: Box::new(fst), - punct: cat.punct, - snd: Box::new(snd), - })) - } - - fn fold_alt(&mut self, alt: Alt) -> Self::Out { - let left = alt.left.fold(self)?; - let right = alt.right.fold(self)?; - Ok(Term::Alt(Alt { - left: Box::new(left), - punct: alt.punct, - right: Box::new(right), - })) - } - - fn fold_fix(&mut self, fix: Fix) -> Self::Out { - let inner = fix.inner.fold(self)?; - Ok(Term::Fix(Fix { - span: fix.span, - arg: fix.arg, - inner: Box::new(inner), - })) - } - - fn fold_binding(&mut self, bind: Variable) -> Self::Out { - Ok(Term::Binding(bind)) - } - - fn fold_variable(&mut self, var: Variable) -> Self::Out { - Ok(Term::Variable(var)) - } - - fn fold_call(&mut self, call: Call) -> Self::Out { - if call.name != self.name { - let args = call - .args - .into_iter() - .map(|arg| arg.fold(self)) - .collect::<Result<_, _>>()?; - Ok(Term::Call(Call { - span: call.span, - name: call.name, - args, - })) - } else if call.args.len() != self.args { - Err(SubstituteError::WrongArgCount { - call, - expected: self.args, - }) - } else { - let args = call - .args - .into_iter() - .map(|arg| arg.fold(self)) - .collect::<Result<_, _>>()?; - self.term.clone().fold(&mut Substitute(args)) - } - } -} - -#[derive(Debug)] -pub enum SubstituteError { - FreeVariable(Variable), - WrongArgCount { call: Call, expected: usize }, -} - -impl Display for SubstituteError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::FreeVariable(var) => { - let start = var.name.span().start(); - write!( - f, - "{}:{}: undeclared variable '{}'", - start.line, start.column, var.name - ) - } - SubstituteError::WrongArgCount { call, expected } => { - let start = call.span.start(); - write!( - f, - "{}:{}: wrong number of arguments. Expected {}, got {}", - start.line, - start.column, - call.args.len(), - expected - ) - } - } - } -} - -impl Error for SubstituteError {} - -#[derive(Clone, Debug, Eq, PartialEq, Hash)] -enum TypeKind { - Epsilon, - Literal(String), - Cat(Box<Typed>, Box<Typed>), - Alt(Box<Typed>, Box<Typed>), - Fix(Box<Typed>), - Binding(usize), -} - -#[derive(Debug)] -pub struct Code { - id: usize, -} - -impl TypeKind { - /// Produces ident for the type and token stream for implementation - fn emit_code(self, context: &mut CodeContext) -> Code { - match self { - Self::Epsilon => context.epsilon(), - Self::Literal(s) => context.literal(s), - Self::Cat(fst, snd) => { - let Typed { kind: fst_kind, .. } = *fst; - let Typed { kind: snd_kind, .. } = *snd; - let fst_code = fst_kind.emit_code(context); - let snd_code = snd_kind.emit_code(context); - context.cat(fst_code, snd_code) - } - Self::Alt(left, right) => { - let Typed { - kind: left_kind, - nullable: left_null, - first_set: left_first, - .. - } = *left; - let Typed { - kind: right_kind, - nullable: right_null, - first_set: right_first, - .. - } = *right; - let left_code = left_kind.emit_code(context); - let right_code = right_kind.emit_code(context); - context.alt( - left_code, - left_null, - left_first, - right_code, - right_null, - right_first, - ) - } - Self::Fix(inner) => { - let Typed { - kind: inner_kind, .. - } = *inner; - context.fix(inner_kind) - } - Self::Binding(index) => context.variable(index), - } - } -} - -impl Display for TypeKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Epsilon => write!(f, "_"), - Self::Literal(s) => write!(f, "{:?}", s), - Self::Cat(fst, snd) => write!(f, "{}.{}", fst, snd), - Self::Alt(left, right) => write!(f, "({} | {})", left, right), - Self::Fix(inner) => write!(f, "[]({})", inner), - Self::Binding(index) => write!(f, "{}", index), - } - } -} - -#[derive(Clone, Debug, Eq, PartialEq, Hash)] -pub struct Typed { - kind: TypeKind, - nullable: bool, - first_set: typed::FirstSet, - flast_set: typed::FlastSet, -} - -impl Typed { - pub fn is_nullable(&self) -> bool { - self.nullable - } - - pub fn first_set(&self) -> &typed::FirstSet { - &self.first_set - } - - pub fn flast_set(&self) -> &typed::FlastSet { - &self.flast_set - } - - pub fn emit_code(self, name: Ident) -> TokenStream { - let mut context = CodeContext::new(); - let code = self.kind.emit_code(&mut context); - context.all_code(name, code) - } -} - -impl Display for Typed { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.kind.fmt(f) - } -} - -#[derive(Debug)] -pub struct CodeContext { - data: Vec<(TokenStream, TokenStream, BTreeSet<usize>)>, - lit_map: HashMap<String, usize>, - cat_map: HashMap<(usize, usize), usize>, - alt_map: HashMap<(usize, usize), usize>, - fix_map: HashMap<TypeKind, usize>, - var_map: HashMap<usize, usize>, // Key is fix point ID - context: Vec<usize>, -} - -impl CodeContext { - fn new() -> Self { - let eps = (quote! {()}, TokenStream::new(), BTreeSet::new()); - - Self { - data: vec![eps], - lit_map: HashMap::new(), - cat_map: HashMap::new(), - alt_map: HashMap::new(), - fix_map: HashMap::new(), - var_map: HashMap::new(), - context: Vec::new(), - } - } - - fn epsilon(&mut self) -> Code { - Code { id: 0 } - } - - fn literal(&mut self, lit: String) -> Code { - if let Some(&id) = self.lit_map.get(&lit) { - Code { id } - } else { - let id = self.data.len(); - let name = format_ident!("Lit{}", id); - let doc_name = format!( - r#"The literal string `"{}"`."#, - lit.escape_debug().collect::<String>() - ); - let tokens = quote! { - #[doc=#doc_name] - pub struct #name; - - impl Parse for #name { - fn parse<P: Parser>(input: &mut P) -> Result<Self> { - input.take_str(#lit).map(|()| #name) - } - } - }; - - self.data - .push((TokenTree::from(name).into(), tokens, BTreeSet::new())); - self.lit_map.insert(lit, id); - - Code { id } - } - } - - fn cat(&mut self, fst: Code, snd: Code) -> Code { - if let Some(&id) = self.cat_map.get(&(fst.id, snd.id)) { - Code { id } - } else { - let id = self.data.len(); - let fst_ty = self.data[fst.id].0.clone(); - let snd_ty = self.data[snd.id].0.clone(); - self.data.push(( - quote! {(#fst_ty, #snd_ty)}, - TokenStream::new(), - [fst.id, snd.id].iter().cloned().collect(), - )); - self.cat_map.insert((fst.id, snd.id), id); - Code { id } - } - } - - fn alt( - &mut self, - left_code: Code, - left_null: bool, - left_first: typed::FirstSet, - right_code: Code, - right_null: bool, - right_first: typed::FirstSet, - ) -> Code { - if let Some(&id) = self.alt_map.get(&(left_code.id, right_code.id)) { - Code { id } - } else { - let id = self.data.len(); - let left_ty = self.data[left_code.id].0.clone(); - let right_ty = self.data[right_code.id].0.clone(); - let name = format_ident!("Alt{}", id); - let mut tokens = quote! { - pub enum #name { - Left(#left_ty), - Right(#right_ty), - } - }; - - let other = if left_null { - let iter = right_first.into_iter(); - - quote! { - impl Parse for #name { - fn parse<P: Parser>(input: &mut P) -> Result<Self> { - match input.peek() { - #(Some(#iter))|* => input.parse().map(Self::Right), - _ => input.parse().map(Self::Left), - } - } - } - } - } else if right_null { - let iter = left_first.into_iter(); - - quote! { - impl Parse for #name { - fn parse<P: Parser>(input: &mut P) -> Result<Self> { - match input.peek() { - #(Some(#iter))|* => input.parse().map(Self::Left), - _ => input.parse().map(Self::Right), - } - } - } - } - } else { - let mut first = left_first.clone(); - first.union(right_first.clone()); - let iter_first = first.into_iter(); - let iter_left = left_first.into_iter(); - let iter_right = right_first.into_iter(); - - quote! { - impl Parse for #name { - fn parse<P: Parser>(input: &mut P) -> Result<Self> { - match input.peek().ok_or(Error::EndOfStream)? { - #(#iter_left)|* => input.parse().map(Self::Left), - #(#iter_right)|* => input.parse().map(Self::Right), - c => input.error(Error::BadBranch(c, &[#(#iter_first),*])) - } - } - } - } - }; - - tokens.extend(other); - self.data.push(( - TokenTree::from(name).into(), - tokens, - [left_code.id, right_code.id].iter().cloned().collect(), - )); - self.alt_map.insert((left_code.id, right_code.id), id); - Code { id } - } - } - - fn fix(&mut self, inner: TypeKind) -> Code { - if let Some(&id) = self.fix_map.get(&inner) { - Code { id } - } else { - let id = self.data.len(); - let name = format_ident!("Fix{}", id); - self.data.push(( - TokenTree::from(name.clone()).into(), - TokenStream::new(), - BTreeSet::new(), - )); - self.context.push(id); - let inner = inner.emit_code(self).id; - let inner_ty = self.data[inner].0.clone(); - let tokens = quote! { - pub struct #name(#inner_ty); - - impl Parse for #name { - fn parse<P: Parser>(input: &mut P) -> Result<Self> { - input.parse().map(Self) - } - } - }; - self.data[id].1 = tokens; - self.data[id].2 = [inner].iter().cloned().collect(); - Code { id } - } - } - - fn variable(&mut self, index: usize) -> Code { - let fix_id = self.context[self.context.len() - index - 1]; - if let Some(&id) = self.var_map.get(&fix_id) { - Code { id } - } else { - let id = self.data.len(); - let fix_ty = self.data[fix_id].0.clone(); - let name = quote! {Box<#fix_ty>}; - self.data.push((name, TokenStream::new(), BTreeSet::new())); - self.var_map.insert(fix_id, id); - Code { id } - } - } - - pub fn all_code(&mut self, name: Ident, code: Code) -> TokenStream { - let root = self.data[code.id].clone(); - let mut tokens = root.1; - let mut completed = [code.id].iter().cloned().collect::<BTreeSet<_>>(); - let mut todo = root.2; - - while !todo.is_empty() { - let mut next = BTreeSet::new(); - completed.extend(todo.clone()); - - for ty in todo { - let ty = self.data[ty].clone(); - tokens.extend(ty.1); - next.extend(ty.2.into_iter().filter(|id| !completed.contains(id))); - } - - todo = next; - } - - let root_ty = root.0; - tokens.extend(quote! { - pub type #name = #root_ty; - - pub enum Error { - BadBranch(char, &'static [char]), - EndOfStream, - } - - pub type Result<T> = std::result::Result<T, Error>; - - pub trait Parser: Iterator<Item = char> { - fn peek(&mut self) -> Option<char>; - - fn parse<T: Parse>(&mut self) -> Result<T> where Self: Sized { - T::parse(self) - } - - fn take_str(&mut self, s: &str) -> Result<()>; - - fn error<T>(&mut self, e: Error) -> Result<T>; - } - - pub trait Parse: Sized { - fn parse<P: Parser>(input: &mut P) -> Result<Self>; - } - - impl Parse for () { - fn parse<P: Parser>(_input: &mut P) -> Result<Self> { - Ok(()) - } - } - - impl<A: Parse, B: Parse> Parse for (A, B) { - fn parse<P: Parser>(input: &mut P) -> Result<Self> { - let a = input.parse()?; - let b = input.parse()?; - Ok((a, b)) - } - } - - impl<T: Parse + Sized> Parse for Box<T> { - fn parse<P: Parser>(input: &mut P) -> Result<Self> { - input.parse().map(Box::new) - } - } - }); - - tokens - } -} diff --git a/src/ast/typed.rs b/src/ast/typed.rs deleted file mode 100644 index 7c3f89e..0000000 --- a/src/ast/typed.rs +++ /dev/null @@ -1,284 +0,0 @@ -use std::collections::BTreeSet; - -#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)] -pub struct FirstSet { - inner: BTreeSet<char>, -} - -impl FirstSet { - pub fn new() -> Self { - Self::default() - } - - pub fn of_str(s: &str) -> Self { - let mut inner = BTreeSet::new(); - s.chars().next().map(|c| inner.insert(c)); - - Self { inner } - } - - pub fn is_empty(&self) -> bool { - self.inner.is_empty() - } - - pub fn union(&mut self, mut other: Self) { - self.inner.append(&mut other.inner); - } - - pub fn intersect(&self, other: &Self) -> Self { - Self { - inner: self.inner.intersection(&other.inner).copied().collect(), - } - } -} - -impl IntoIterator for FirstSet { - type Item = char; - - type IntoIter = <BTreeSet<char> as IntoIterator>::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.inner.into_iter() - } -} - -#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)] -pub struct FlastSet { - inner: BTreeSet<char>, -} - -impl FlastSet { - pub fn new() -> Self { - Self::default() - } - - pub fn is_empty(&self) -> bool { - self.inner.is_empty() - } - - pub fn union_first(&mut self, mut other: FirstSet) { - self.inner.append(&mut other.inner); - } - - pub fn union(&mut self, mut other: Self) { - self.inner.append(&mut other.inner); - } - - pub fn intersect_first(&self, other: &FirstSet) -> Self { - Self { - inner: self.inner.intersection(&other.inner).copied().collect(), - } - } - - pub fn intersect(&self, other: &Self) -> Self { - Self { - inner: self.inner.intersection(&other.inner).copied().collect(), - } - } -} - -#[derive(Debug)] -pub struct NullContext<'a> { - inner: &'a mut FlastContext, -} - -impl NullContext<'_> { - pub fn depth(&self) -> usize { - self.inner.depth() - } - - pub fn is_guarded(&self, index: usize) -> Option<bool> { - self.inner.is_guarded(index) - } - - pub fn with_unguard<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R { - self.unguard(); - let res = f(self); - self.guard(); - res - } - - pub(crate) fn unguard(&mut self) { - self.inner.unguard() - } - - pub(crate) fn guard(&mut self) { - self.inner.guard() - } - - pub fn is_nullable(&self, index: usize) -> Option<bool> { - self.inner.is_nullable(index) - } - - pub fn with_nullable<F: FnOnce(&mut Self) -> R, R>(&mut self, nullable: bool, f: F) -> R { - self.push_nullable(nullable); - let res = f(self); - self.guard(); - res - } - - pub(crate) fn push_nullable(&mut self, nullable: bool) { - self.inner.push_nullable(nullable) - } - - pub(crate) fn pop_nullable(&mut self) { - self.inner.pop_nullable() - } -} - -#[derive(Debug)] -pub struct FirstContext<'a> { - inner: &'a mut FlastContext, -} - -impl FirstContext<'_> { - pub fn depth(&self) -> usize { - self.inner.depth() - } - - pub fn is_guarded(&self, index: usize) -> Option<bool> { - self.inner.is_guarded(index) - } - - pub fn with_unguard<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R { - self.unguard(); - let res = f(self); - self.guard(); - res - } - - pub(crate) fn unguard(&mut self) { - self.inner.unguard() - } - - pub(crate) fn guard(&mut self) { - self.inner.guard() - } - - pub fn first_set(&self, index: usize) -> Option<&FirstSet> { - self.inner.first_set(index) - } - - pub fn with_first_set<F: FnOnce(&mut Self) -> R, R>( - &mut self, - nullable: bool, - first_set: FirstSet, - f: F, - ) -> R { - self.push_first_set(nullable, first_set); - let res = f(self); - self.pop_first_set(); - res - } - - pub(crate) fn push_first_set(&mut self, nullable: bool, first_set: FirstSet) { - self.inner.push_first_set(nullable, first_set) - } - - pub(crate) fn pop_first_set(&mut self) { - self.inner.pop_first_set() - } - - pub fn as_null(&mut self) -> NullContext<'_> { - NullContext { inner: self.inner } - } -} - -#[derive(Debug, Default)] -pub struct FlastContext { - binds: Vec<(bool, FirstSet, FlastSet)>, - unguard_points: Vec<usize>, -} - -impl FlastContext { - pub fn new() -> Self { - Self::default() - } - - pub fn depth(&self) -> usize { - self.binds.len() - } - - pub fn is_guarded(&self, index: usize) -> Option<bool> { - if self.binds.len() <= index { - None - } else if self.unguard_points.is_empty() { - Some(true) - } else { - Some(self.unguard_points[self.unguard_points.len() - 1] + index < self.binds.len()) - } - } - - pub fn with_unguard<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R { - self.unguard(); - let res = f(self); - self.guard(); - res - } - - pub(crate) fn unguard(&mut self) { - self.unguard_points.push(self.binds.len()); - } - - pub(crate) fn guard(&mut self) { - self.unguard_points.pop(); - } - - fn is_nullable(&self, index: usize) -> Option<bool> { - self.binds.get(self.binds.len() - index - 1).map(|(null, _, _)| *null) - } - - fn push_nullable(&mut self, nullable: bool) { - self.binds - .push((nullable, FirstSet::default(), FlastSet::default())) - } - - fn pop_nullable(&mut self) { - self.binds.pop(); - } - - fn first_set(&self, index: usize) -> Option<&FirstSet> { - self.binds.get(self.binds.len() - index - 1).map(|(_, first, _)| first) - } - - fn push_first_set(&mut self, nullable: bool, first_set: FirstSet) { - self.binds.push((nullable, first_set, FlastSet::default())) - } - - fn pop_first_set(&mut self) { - self.binds.pop(); - } - - pub fn flast_set(&self, index: usize) -> Option<&FlastSet> { - self.binds.get(self.binds.len() - index - 1).map(|(_, _, flast)| flast) - } - - pub fn with_flast_set<F: FnOnce(&mut Self) -> R, R>( - &mut self, - nullable: bool, - first_set: FirstSet, - flast_set: FlastSet, - f: F, - ) -> R { - self.push_flast_set(nullable, first_set, flast_set); - let res = f(self); - self.pop_flast_set(); - res - } - - pub(crate) fn push_flast_set(&mut self, nullable: bool, first_set: FirstSet, flast_set: FlastSet) { - self.binds.push((nullable, first_set, flast_set)); - } - - pub(crate) fn pop_flast_set(&mut self) { - self.binds.pop(); - } - - pub fn as_first(&mut self) -> FirstContext<'_> { - FirstContext { inner: self } - } - - pub fn as_null(&mut self) -> NullContext<'_> { - NullContext { inner: self } - } -} diff --git a/src/chomp/' b/src/chomp/' new file mode 100644 index 0000000..f2f6588 --- /dev/null +++ b/src/chomp/' @@ -0,0 +1,475 @@ +use proc_macro2::{Ident, Span}; + +use super::{ast::{Alt, Call, Cat, Epsilon, Expression, Fix, Function, Literal, Parameter, Variable}, context::Context, error::{AltError, CatError, FixError, SubstituteError, TypeError}, set::{FirstSet, FlastSet}, typed::{self, Type, TypedExpression}, visit::{Folder, Mapper, Visitable, Visitor}}; + +/// Test if term is closed for a context with `depth` variables. +#[derive(Debug, Default)] +struct Closed { + depth: usize, +} + +impl Visitor for Closed { + type Out = bool; + + fn visit_epsilon(&mut self, _eps: &Epsilon) -> Self::Out { + true + } + + fn visit_literal(&mut self, _lit: &Literal) -> Self::Out { + true + } + + fn visit_cat(&mut self, cat: &Cat) -> Self::Out { + cat.first().visit(self) && cat.second().visit(self) + } + + fn visit_alt(&mut self, alt: &Alt) -> Self::Out { + alt.left().visit(self) && alt.right().visit(self) + } + + fn visit_fix(&mut self, fix: &Fix) -> Self::Out { + self.depth += 1; + let res = fix.inner().visit(self); + self.depth -= 1; + res + } + + fn visit_variable(&mut self, var: &Variable) -> Self::Out { + var.index() < self.depth + } + + fn visit_parameter(&mut self, _param: &Parameter) -> Self::Out { + true + } + + fn visit_call(&mut self, call: &Call) -> Self::Out { + call.args().iter().all(|arg| arg.visit(self)) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Spanning; + +impl Visitor for Spanning { + type Out = Option<Span>; + + fn visit_epsilon(&mut self, eps: &Epsilon) -> Self::Out { + Some(eps.span) + } + + fn visit_literal(&mut self, lit: &Literal) -> Self::Out { + Some(lit.span()) + } + + fn visit_cat(&mut self, cat: &Cat) -> Self::Out { + let fst = cat.first().visit(self); + let snd = cat.second().visit(self); + + match (fst, snd) { + (None, snd) => snd, + (Some(fst), None) => Some(fst), + (Some(fst), Some(snd)) => fst.join(snd), + } + } + + fn visit_alt(&mut self, alt: &Alt) -> Self::Out { + let left = alt.left().visit(self); + let right = alt.right().visit(self); + + match (left, right) { + (None, right) => right, + (Some(left), None) => Some(left), + (Some(left), Some(right)) => left.join(right), + } + } + + fn visit_fix(&mut self, fix: &Fix) -> Self::Out { + fix.span() + } + + fn visit_variable(&mut self, var: &Variable) -> Self::Out { + Some(var.name().span()) + } + + fn visit_parameter(&mut self, param: &Parameter) -> Self::Out { + Some(param.name().span()) + } + + fn visit_call(&mut self, call: &Call) -> Self::Out { + call.span() + } +} + +#[derive(Debug)] +pub struct TypeInfer<'a> { + context: &'a mut Context, +} + +impl Visitor for TypeInfer<'_> { + type Out = Result<Type, TypeError>; + + fn visit_epsilon(&mut self, _eps: &Epsilon) -> Self::Out { + Ok(Type::new(true, FirstSet::default(), FlastSet::default())) + } + + fn visit_literal(&mut self, lit: &Literal) -> Self::Out { + Ok(Type::of_str(&lit.value())) + } + + fn visit_cat(&mut self, cat: &Cat) -> Self::Out { + let first = cat.first().visit(self)?; + let second = self + .context + .with_unguard(|context| cat.second().visit(&mut TypeInfer { context }))?; + + if first.nullable() { + Err(TypeError::Cat(CatError::FirstNullable(cat.clone()))) + } else if !first + .flast_set() + .intersect_first(second.first_set()) + .is_empty() + { + Err(TypeError::Cat(CatError::FirstFlastOverlap(cat.clone()))) + } else { + Ok(first.cat(second)) + } + } + + fn visit_alt(&mut self, alt: &Alt) -> Self::Out { + let left = alt.left().visit(self)?; + let right = alt.right().visit(self)?; + + if left.nullable() && right.nullable() { + Err(TypeError::Alt(AltError::BothNullable(alt.clone()))) + } else if !left.first_set().intersect(right.first_set()).is_empty() { + Err(TypeError::Alt(AltError::FirstOverlap(alt.clone()))) + } else { + Ok(left.alt(right)) + } + } + + fn visit_fix(&mut self, fix: &Fix) -> Self::Out { + let mut res = Type::default(); + let mut last = None; + + while last.map(|r| r != res).unwrap_or(true) { + last = Some(res); + res = self + .context + .with_variable_type(last.as_ref().cloned().unwrap(), |context| { + fix.inner().visit(&mut TypeInfer { context }) + }) + .map_err(|e| { + TypeError::Fix(FixError( + fix.clone(), + last.as_ref().cloned().unwrap(), + Box::new(e), + )) + })?; + } + + Ok(res) + } + + fn visit_variable(&mut self, var: &Variable) -> Self::Out { + Ok(self.context.get_variable_type(&var)?.clone()) + } + + fn visit_parameter(&mut self, _param: &Parameter) -> Self::Out { + todo!() + } + + fn visit_call(&mut self, _call: &Call) -> Self::Out { + todo!() + } +} + +#[derive(Debug)] +pub struct TypeCheck<'a> { + pub context: &'a mut Context, +} + +impl Folder for TypeCheck<'_> { + type Out = Result<TypedExpression, TypeError>; + + fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { + Ok(typed::Epsilon::from(eps).into()) + } + + fn fold_literal(&mut self, lit: Literal) -> Self::Out { + Ok(typed::Literal::from(lit).into()) + } + + fn fold_cat(&mut self, cat: Cat) -> Self::Out { + let ty = TypeInfer { + context: self.context, + } + .visit_cat(&cat)?; + let fst = cat.fst.fold(self)?; + let snd = cat.snd; + let snd = self + .context + .with_unguard(|context| snd.fold(&mut TypeCheck { context }))?; + + Ok(typed::Cat::new(fst, cat.punct, snd, ty).into()) + } + + fn fold_alt(&mut self, alt: Alt) -> Self::Out { + let ty = TypeInfer { + context: self.context, + } + .visit_alt(&alt)?; + let left = alt.left.fold(self)?; + let right = alt.right.fold(self)?; + + Ok(typed::Alt::new(left, alt.punct, right, ty).into()) + } + + fn fold_fix(&mut self, fix: Fix) -> Self::Out { + let ty = TypeInfer { + context: self.context, + } + .visit_fix(&fix)?; + let inner = fix.inner; + let inner = self + .context + .with_variable_type(ty.clone(), |context| inner.fold(&mut TypeCheck { context }))?; + + Ok(typed::Fix::new(fix.arg, inner, fix.span, ty).into()) + } + + fn fold_variable(&mut self, var: Variable) -> Self::Out { + let ty = TypeInfer { + context: self.context, + } + .visit_variable(&var)?; + Ok(typed::Variable::new(var, ty).into()) + } + + fn fold_parameter(&mut self, _param: Parameter) -> Self::Out { + todo!() + } + + fn fold_call(&mut self, _call: Call) -> Self::Out { + todo!() + } +} + +#[derive(Debug, Default)] +struct DeepenVars { + depth: usize, +} + +impl Mapper for DeepenVars { + type Out = (); + + fn map_epsilon(&mut self, _: &mut Epsilon) -> Self::Out {} + + fn map_literal(&mut self, _: &mut Literal) -> Self::Out {} + + fn map_cat(&mut self, cat: &mut Cat) -> Self::Out { + cat.first_mut().map(self); + cat.second_mut().map(self); + } + + fn map_alt(&mut self, alt: &mut Alt) -> Self::Out { + alt.left_mut().map(self); + alt.right_mut().map(self); + } + + fn map_fix(&mut self, fix: &mut Fix) -> Self::Out { + self.depth += 1; + fix.inner_mut().map(self); + self.depth -= 1; + } + + fn map_variable(&mut self, bind: &mut Variable) -> Self::Out { + if bind.index() >= self.depth { + *bind.index_mut() += 1; + } + } + + fn map_parameter(&mut self, _param: &mut Parameter) -> Self::Out {} + + fn map_call(&mut self, call: &mut Call) -> Self::Out { + for arg in call.args_mut() { + arg.map(self); + } + } +} + +#[derive(Debug, Default)] +struct ShallowVars { + depth: usize, +} + +impl Mapper for ShallowVars { + type Out = (); + + fn map_epsilon(&mut self, _: &mut Epsilon) -> Self::Out {} + + fn map_literal(&mut self, _: &mut Literal) -> Self::Out {} + + fn map_cat(&mut self, cat: &mut Cat) -> Self::Out { + cat.first_mut().map(self); + cat.second_mut().map(self); + } + + fn map_alt(&mut self, alt: &mut Alt) -> Self::Out { + alt.left_mut().map(self); + alt.right_mut().map(self); + } + + fn map_fix(&mut self, fix: &mut Fix) -> Self::Out { + self.depth += 1; + fix.inner_mut().map(self); + self.depth -= 1; + } + + fn map_variable(&mut self, bind: &mut Variable) -> Self::Out { + if bind.index() > self.depth { + *bind.index_mut() -= 1; + } + } + + fn map_parameter(&mut self, _param: &mut Parameter) -> Self::Out {} + + fn map_call(&mut self, call: &mut Call) -> Self::Out { + for arg in call.args_mut() { + arg.map(self); + } + } +} + +struct Substitute { + params: Vec<Expression>, +} + +impl Folder for Substitute { + type Out = Result<Expression, SubstituteError>; + + fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { + Ok(eps.into()) + } + + fn fold_literal(&mut self, lit: Literal) -> Self::Out { + Ok(lit.into()) + } + + fn fold_cat(&mut self, mut cat: Cat) -> Self::Out { + cat.fst = Box::new(cat.fst.fold(self)?); + cat.snd = Box::new(cat.snd.fold(self)?); + Ok(cat.into()) + } + + fn fold_alt(&mut self, mut alt: Alt) -> Self::Out { + alt.left = Box::new(alt.left.fold(self)?); + alt.right = Box::new(alt.right.fold(self)?); + Ok(alt.into()) + } + + fn fold_fix(&mut self, mut fix: Fix) -> Self::Out { + for param in &mut self.params { + param.map(&mut DeepenVars::default()); + } + + fix.inner = Box::new(fix.inner.fold(self)?); + + for param in &mut self.params { + param.map(&mut ShallowVars::default()); + } + + Ok(fix.into()) + } + + fn fold_variable(&mut self, var: Variable) -> Self::Out { + Ok(Expression::Variable(var)) + } + + fn fold_call(&mut self, mut call: Call) -> Self::Out { + call.args = call + .args + .into_iter() + .map(|arg| arg.fold(self)) + .collect::<Result<_, _>>()?; + Ok(call.into()) + } + + fn fold_parameter(&mut self, param: Parameter) -> Self::Out { + self.params + .get(param.index()) + .cloned() + .ok_or(SubstituteError::FreeParameter(param)) + } +} + +#[derive(Clone, Debug)] +pub struct InlineCall { + function: Function, +} + +impl InlineCall { + pub fn new(function: Function) -> Self { + Self { + function + } + } +} + +impl Folder for InlineCall { + type Out = Result<Expression, SubstituteError>; + + fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { + Ok(eps.into()) + } + + fn fold_literal(&mut self, lit: Literal) -> Self::Out { + Ok(lit.into()) + } + + fn fold_cat(&mut self, mut cat: Cat) -> Self::Out { + cat.fst = Box::new(cat.fst.fold(self)?); + cat.snd = Box::new(cat.snd.fold(self)?); + Ok(cat.into()) + } + + fn fold_alt(&mut self, mut alt: Alt) -> Self::Out { + alt.left = Box::new(alt.left.fold(self)?); + alt.right = Box::new(alt.right.fold(self)?); + Ok(alt.into()) + } + + fn fold_fix(&mut self, mut fix: Fix) -> Self::Out { + fix.inner = Box::new(fix.inner.fold(self)?); + Ok(fix.into()) + } + + fn fold_variable(&mut self, var: Variable) -> Self::Out { + Ok(var.into()) + } + + fn fold_parameter(&mut self, param: Parameter) -> Self::Out { + Ok(param.into()) + } + + fn fold_call(&mut self, mut call: Call) -> Self::Out { + call.args = call + .args + .into_iter() + .map(|arg| arg.fold(self)) + .collect::<Result<_, _>>()?; + + if call.name != self.function.name { + Ok(call.into()) + } else if call.args.len() != self.function.params { + Err(SubstituteError::WrongArgCount { + call, + expected: self.args, + }) + } else { + self.term + .clone() + .fold(&mut Substitute { params: call.args }) + } + } +} diff --git a/src/chomp/ast.rs b/src/chomp/ast.rs new file mode 100644 index 0000000..e8e3309 --- /dev/null +++ b/src/chomp/ast.rs @@ -0,0 +1,345 @@ +use std::fmt::{self, Display}; + +use proc_macro2::Span; +use syn::{Ident, LitStr, Token}; + +pub type Epsilon = Token![_]; + +pub type Literal = LitStr; + +#[derive(Clone, Debug)] +pub struct Cat { + pub fst: Box<Expression>, + pub punct: Option<Token![.]>, + pub snd: Box<Expression>, +} + +impl Cat { + pub fn new(fst: Expression, punct: Option<Token![.]>, snd: Expression) -> Self { + Self { + fst: Box::new(fst), + punct, + snd: Box::new(snd), + } + } + + pub fn first(&self) -> &Expression { + &self.fst + } + + pub fn first_mut(&mut self) -> &mut Expression { + &mut self.fst + } + + pub fn punct(&self) -> Option<Token![.]> { + self.punct + } + + pub fn second(&self) -> &Expression { + &self.snd + } + + pub fn second_mut(&mut self) -> &mut Expression { + &mut self.snd + } +} + +impl Display for Cat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "({}.{})", self.fst, self.snd) + } +} + +#[derive(Clone, Debug)] +pub struct Alt { + pub left: Box<Expression>, + pub punct: Option<Token![|]>, + pub right: Box<Expression>, +} + +impl Alt { + pub fn new(left: Expression, punct: Option<Token![|]>, right: Expression) -> Self { + Self { + left: Box::new(left), + punct, + right: Box::new(right), + } + } + + pub fn left(&self) -> &Expression { + &self.left + } + + pub fn left_mut(&mut self) -> &mut Expression { + &mut self.left + } + + pub fn punct(&self) -> Option<Token![|]> { + self.punct + } + + pub fn right(&self) -> &Expression { + &self.right + } + + pub fn right_mut(&mut self) -> &mut Expression { + &mut self.right + } +} + +impl Display for Alt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "({}|{})", self.left, self.right) + } +} + +#[derive(Clone, Debug)] +pub struct Fix { + pub arg: Ident, + pub inner: Box<Expression>, + pub span: Option<Span>, +} + +impl Fix { + pub fn new(arg: Ident, inner: Expression, span: Option<Span>) -> Self { + Self { + arg, + inner: Box::new(inner), + span, + } + } + + pub fn arg(&self) -> &Ident { + &self.arg + } + + pub fn inner(&self) -> &Expression { + &self.inner + } + + pub fn inner_mut(&mut self) -> &mut Expression { + &mut self.inner + } + + pub fn span(&self) -> Option<Span> { + self.span + } +} + +impl Display for Fix { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{}]({})", self.arg, self.inner) + } +} + +#[derive(Clone, Debug)] +pub struct Variable { + pub name: Ident, + pub index: usize, +} + +impl Variable { + pub fn new(name: Ident, index: usize) -> Self { + Self { name, index } + } + + pub fn name(&self) -> &Ident { + &self.name + } + + pub fn index(&self) -> usize { + self.index + } + + pub fn index_mut(&mut self) -> &mut usize { + &mut self.index + } +} + +impl Display for Variable { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.name.fmt(f) + } +} + +#[derive(Clone, Debug)] +pub struct Parameter { + pub name: Ident, + pub index: usize, +} + +impl Parameter { + pub fn new(name: Ident, index: usize) -> Self { + Self { name, index } + } + + pub fn name(&self) -> &Ident { + &self.name + } + + pub fn index(&self) -> usize { + self.index + } + + pub fn index_mut(&mut self) -> &mut usize { + &mut self.index + } +} + +impl Display for Parameter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.name.fmt(f) + } +} + +/// A macro invocation. +#[derive(Clone, Debug)] +pub struct Call { + pub name: Ident, + pub args: Vec<Expression>, + pub span: Option<Span>, +} + +impl Call { + pub fn new(name: Ident, args: Vec<Expression>, span: Option<Span>) -> Self { + Self { name, args, span } + } + + pub fn name(&self) -> &Ident { + &self.name + } + + pub fn args(&self) -> &[Expression] { + &self.args + } + + pub fn args_mut(&mut self) -> &mut [Expression] { + &mut self.args + } + + pub fn span(&self) -> Option<Span> { + self.span + } +} + +impl Display for Call { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name)?; + + let mut iter = self.args.iter(); + + if let Some(arg) = iter.next() { + write!(f, "({}", arg)?; + + for arg in iter { + write!(f, ", {}", arg)?; + } + + write!(f, ")") + } else { + Ok(()) + } + } +} + +#[derive(Clone, Debug)] +pub enum Expression { + /// Matches the empty string. + Epsilon(Epsilon), + /// Matches the literal string. + Literal(Literal), + /// Matches one term followed by another. + Cat(Cat), + /// Matches either one term or another. + Alt(Alt), + /// The least fix point of a term. + Fix(Fix), + /// A fixed point variable. + Variable(Variable), + /// A formal parameter. + Parameter(Parameter), + /// A macro invocation. + Call(Call), +} + +impl Display for Expression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Epsilon(_) => write!(f, "_"), + Self::Literal(l) => write!(f, "{:?}", l.value()), + Self::Cat(c) => c.fmt(f), + Self::Alt(a) => a.fmt(f), + Self::Fix(x) => x.fmt(f), + Self::Variable(v) => v.fmt(f), + Self::Parameter(p) => p.fmt(f), + Self::Call(c) => c.fmt(f), + } + } +} + +impl From<Epsilon> for Expression { + fn from(eps: Epsilon) -> Self { + Self::Epsilon(eps) + } +} + +impl From<Literal> for Expression { + fn from(lit: Literal) -> Self { + Self::Literal(lit) + } +} + +impl From<Cat> for Expression { + fn from(cat: Cat) -> Self { + Self::Cat(cat) + } +} + +impl From<Alt> for Expression { + fn from(alt: Alt) -> Self { + Self::Alt(alt) + } +} + +impl From<Fix> for Expression { + fn from(fix: Fix) -> Self { + Self::Fix(fix) + } +} + +impl From<Variable> for Expression { + fn from(var: Variable) -> Self { + Self::Variable(var) + } +} + +impl From<Parameter> for Expression { + fn from(param: Parameter) -> Self { + Self::Parameter(param) + } +} + +impl From<Call> for Expression { + fn from(call: Call) -> Self { + Self::Call(call) + } +} + +#[derive(Clone, Debug)] +pub struct Function { + pub name: Ident, + pub params: usize, + pub expr: Expression, + pub span: Option<Span>, +} + +impl Function { + pub fn new(name: Ident, params: usize, expr: Expression, span: Option<Span>) -> Self { + Self { + name, + params, + expr, + span, + } + } +} diff --git a/src/chomp/check.rs b/src/chomp/check.rs new file mode 100644 index 0000000..cb4798c --- /dev/null +++ b/src/chomp/check.rs @@ -0,0 +1,481 @@ +use proc_macro2::Span; + +use super::{ + ast::{Alt, Call, Cat, Epsilon, Expression, Fix, Function, Literal, Parameter, Variable}, + context::Context, + error::{AltError, CatError, FixError, SubstituteError, TypeError}, + set::{FirstSet, FlastSet}, + typed::{self, Type, TypedExpression}, + visit::{Folder, Mapper, Visitable, Visitor}, +}; + +/// Test if term is closed for a context with `depth` variables. +#[derive(Debug, Default)] +struct Closed { + depth: usize, +} + +impl Visitor for Closed { + type Out = bool; + + fn visit_epsilon(&mut self, _eps: &Epsilon) -> Self::Out { + true + } + + fn visit_literal(&mut self, _lit: &Literal) -> Self::Out { + true + } + + fn visit_cat(&mut self, cat: &Cat) -> Self::Out { + cat.first().visit(self) && cat.second().visit(self) + } + + fn visit_alt(&mut self, alt: &Alt) -> Self::Out { + alt.left().visit(self) && alt.right().visit(self) + } + + fn visit_fix(&mut self, fix: &Fix) -> Self::Out { + self.depth += 1; + let res = fix.inner().visit(self); + self.depth -= 1; + res + } + + fn visit_variable(&mut self, var: &Variable) -> Self::Out { + var.index() < self.depth + } + + fn visit_parameter(&mut self, _param: &Parameter) -> Self::Out { + true + } + + fn visit_call(&mut self, call: &Call) -> Self::Out { + call.args().iter().all(|arg| arg.visit(self)) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Spanning; + +impl Visitor for Spanning { + type Out = Option<Span>; + + fn visit_epsilon(&mut self, eps: &Epsilon) -> Self::Out { + Some(eps.span) + } + + fn visit_literal(&mut self, lit: &Literal) -> Self::Out { + Some(lit.span()) + } + + fn visit_cat(&mut self, cat: &Cat) -> Self::Out { + let fst = cat.first().visit(self); + let snd = cat.second().visit(self); + + match (fst, snd) { + (None, snd) => snd, + (Some(fst), None) => Some(fst), + (Some(fst), Some(snd)) => fst.join(snd), + } + } + + fn visit_alt(&mut self, alt: &Alt) -> Self::Out { + let left = alt.left().visit(self); + let right = alt.right().visit(self); + + match (left, right) { + (None, right) => right, + (Some(left), None) => Some(left), + (Some(left), Some(right)) => left.join(right), + } + } + + fn visit_fix(&mut self, fix: &Fix) -> Self::Out { + fix.span() + } + + fn visit_variable(&mut self, var: &Variable) -> Self::Out { + Some(var.name().span()) + } + + fn visit_parameter(&mut self, param: &Parameter) -> Self::Out { + Some(param.name().span()) + } + + fn visit_call(&mut self, call: &Call) -> Self::Out { + call.span() + } +} + +#[derive(Debug)] +pub struct TypeInfer<'a> { + context: &'a mut Context, +} + +impl Visitor for TypeInfer<'_> { + type Out = Result<Type, TypeError>; + + fn visit_epsilon(&mut self, _eps: &Epsilon) -> Self::Out { + Ok(Type::new(true, FirstSet::default(), FlastSet::default())) + } + + fn visit_literal(&mut self, lit: &Literal) -> Self::Out { + Ok(Type::of_str(&lit.value())) + } + + fn visit_cat(&mut self, cat: &Cat) -> Self::Out { + let first = cat.first().visit(self)?; + let second = self + .context + .with_unguard(|context| cat.second().visit(&mut TypeInfer { context }))?; + + if first.nullable() { + Err(TypeError::Cat(CatError::FirstNullable(cat.clone()))) + } else if !first + .flast_set() + .intersect_first(second.first_set()) + .is_empty() + { + Err(TypeError::Cat(CatError::FirstFlastOverlap(cat.clone()))) + } else { + Ok(first.cat(second)) + } + } + + fn visit_alt(&mut self, alt: &Alt) -> Self::Out { + let left = alt.left().visit(self)?; + let right = alt.right().visit(self)?; + + if left.nullable() && right.nullable() { + Err(TypeError::Alt(AltError::BothNullable(alt.clone()))) + } else if !left.first_set().intersect(right.first_set()).is_empty() { + Err(TypeError::Alt(AltError::FirstOverlap(alt.clone()))) + } else { + Ok(left.alt(right)) + } + } + + fn visit_fix(&mut self, fix: &Fix) -> Self::Out { + let mut res = Type::default(); + let mut last = None; + + while last.map(|r| r != res).unwrap_or(true) { + last = Some(res); + res = self + .context + .with_variable_type(last.as_ref().cloned().unwrap(), |context| { + fix.inner().visit(&mut TypeInfer { context }) + }) + .map_err(|e| { + TypeError::Fix(FixError( + fix.clone(), + last.as_ref().cloned().unwrap(), + Box::new(e), + )) + })?; + } + + Ok(res) + } + + fn visit_variable(&mut self, var: &Variable) -> Self::Out { + Ok(self.context.get_variable_type(&var)?.clone()) + } + + fn visit_parameter(&mut self, _param: &Parameter) -> Self::Out { + todo!() + } + + fn visit_call(&mut self, _call: &Call) -> Self::Out { + todo!() + } +} + +#[derive(Debug)] +pub struct TypeCheck<'a> { + pub context: &'a mut Context, +} + +impl Folder for TypeCheck<'_> { + type Out = Result<TypedExpression, TypeError>; + + fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { + Ok(typed::Epsilon::from(eps).into()) + } + + fn fold_literal(&mut self, lit: Literal) -> Self::Out { + Ok(typed::Literal::from(lit).into()) + } + + fn fold_cat(&mut self, cat: Cat) -> Self::Out { + let ty = TypeInfer { + context: self.context, + } + .visit_cat(&cat)?; + let fst = cat.fst.fold(self)?; + let snd = cat.snd; + let snd = self + .context + .with_unguard(|context| snd.fold(&mut TypeCheck { context }))?; + + Ok(typed::Cat::new(fst, cat.punct, snd, ty).into()) + } + + fn fold_alt(&mut self, alt: Alt) -> Self::Out { + let ty = TypeInfer { + context: self.context, + } + .visit_alt(&alt)?; + let left = alt.left.fold(self)?; + let right = alt.right.fold(self)?; + + Ok(typed::Alt::new(left, alt.punct, right, ty).into()) + } + + fn fold_fix(&mut self, fix: Fix) -> Self::Out { + let ty = TypeInfer { + context: self.context, + } + .visit_fix(&fix)?; + let inner = fix.inner; + let inner = self + .context + .with_variable_type(ty.clone(), |context| inner.fold(&mut TypeCheck { context }))?; + + Ok(typed::Fix::new(fix.arg, inner, fix.span, ty).into()) + } + + fn fold_variable(&mut self, var: Variable) -> Self::Out { + let ty = TypeInfer { + context: self.context, + } + .visit_variable(&var)?; + Ok(typed::Variable::new(var, ty).into()) + } + + fn fold_parameter(&mut self, _param: Parameter) -> Self::Out { + todo!() + } + + fn fold_call(&mut self, _call: Call) -> Self::Out { + todo!() + } +} + +#[derive(Debug, Default)] +struct DeepenVars { + depth: usize, +} + +impl Mapper for DeepenVars { + type Out = (); + + fn map_epsilon(&mut self, _: &mut Epsilon) -> Self::Out {} + + fn map_literal(&mut self, _: &mut Literal) -> Self::Out {} + + fn map_cat(&mut self, cat: &mut Cat) -> Self::Out { + cat.first_mut().map(self); + cat.second_mut().map(self); + } + + fn map_alt(&mut self, alt: &mut Alt) -> Self::Out { + alt.left_mut().map(self); + alt.right_mut().map(self); + } + + fn map_fix(&mut self, fix: &mut Fix) -> Self::Out { + self.depth += 1; + fix.inner_mut().map(self); + self.depth -= 1; + } + + fn map_variable(&mut self, bind: &mut Variable) -> Self::Out { + if bind.index() >= self.depth { + *bind.index_mut() += 1; + } + } + + fn map_parameter(&mut self, _param: &mut Parameter) -> Self::Out {} + + fn map_call(&mut self, call: &mut Call) -> Self::Out { + for arg in call.args_mut() { + arg.map(self); + } + } +} + +#[derive(Debug, Default)] +struct ShallowVars { + depth: usize, +} + +impl Mapper for ShallowVars { + type Out = (); + + fn map_epsilon(&mut self, _: &mut Epsilon) -> Self::Out {} + + fn map_literal(&mut self, _: &mut Literal) -> Self::Out {} + + fn map_cat(&mut self, cat: &mut Cat) -> Self::Out { + cat.first_mut().map(self); + cat.second_mut().map(self); + } + + fn map_alt(&mut self, alt: &mut Alt) -> Self::Out { + alt.left_mut().map(self); + alt.right_mut().map(self); + } + + fn map_fix(&mut self, fix: &mut Fix) -> Self::Out { + self.depth += 1; + fix.inner_mut().map(self); + self.depth -= 1; + } + + fn map_variable(&mut self, bind: &mut Variable) -> Self::Out { + if bind.index() > self.depth { + *bind.index_mut() -= 1; + } + } + + fn map_parameter(&mut self, _param: &mut Parameter) -> Self::Out {} + + fn map_call(&mut self, call: &mut Call) -> Self::Out { + for arg in call.args_mut() { + arg.map(self); + } + } +} + +struct Substitute { + params: Vec<Expression>, +} + +impl Folder for Substitute { + type Out = Result<Expression, SubstituteError>; + + fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { + Ok(eps.into()) + } + + fn fold_literal(&mut self, lit: Literal) -> Self::Out { + Ok(lit.into()) + } + + fn fold_cat(&mut self, mut cat: Cat) -> Self::Out { + cat.fst = Box::new(cat.fst.fold(self)?); + cat.snd = Box::new(cat.snd.fold(self)?); + Ok(cat.into()) + } + + fn fold_alt(&mut self, mut alt: Alt) -> Self::Out { + alt.left = Box::new(alt.left.fold(self)?); + alt.right = Box::new(alt.right.fold(self)?); + Ok(alt.into()) + } + + fn fold_fix(&mut self, mut fix: Fix) -> Self::Out { + for param in &mut self.params { + param.map(&mut DeepenVars::default()); + } + + fix.inner = Box::new(fix.inner.fold(self)?); + + for param in &mut self.params { + param.map(&mut ShallowVars::default()); + } + + Ok(fix.into()) + } + + fn fold_variable(&mut self, var: Variable) -> Self::Out { + Ok(Expression::Variable(var)) + } + + fn fold_call(&mut self, mut call: Call) -> Self::Out { + call.args = call + .args + .into_iter() + .map(|arg| arg.fold(self)) + .collect::<Result<_, _>>()?; + Ok(call.into()) + } + + fn fold_parameter(&mut self, param: Parameter) -> Self::Out { + self.params + .get(param.index()) + .cloned() + .ok_or(SubstituteError::FreeParameter(param)) + } +} + +#[derive(Clone, Debug)] +pub struct InlineCall { + function: Function, +} + +impl InlineCall { + pub fn new(function: Function) -> Self { + Self { function } + } +} + +impl Folder for InlineCall { + type Out = Result<Expression, SubstituteError>; + + fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out { + Ok(eps.into()) + } + + fn fold_literal(&mut self, lit: Literal) -> Self::Out { + Ok(lit.into()) + } + + fn fold_cat(&mut self, mut cat: Cat) -> Self::Out { + cat.fst = Box::new(cat.fst.fold(self)?); + cat.snd = Box::new(cat.snd.fold(self)?); + Ok(cat.into()) + } + + fn fold_alt(&mut self, mut alt: Alt) -> Self::Out { + alt.left = Box::new(alt.left.fold(self)?); + alt.right = Box::new(alt.right.fold(self)?); + Ok(alt.into()) + } + + fn fold_fix(&mut self, mut fix: Fix) -> Self::Out { + fix.inner = Box::new(fix.inner.fold(self)?); + Ok(fix.into()) + } + + fn fold_variable(&mut self, var: Variable) -> Self::Out { + Ok(var.into()) + } + + fn fold_parameter(&mut self, param: Parameter) -> Self::Out { + Ok(param.into()) + } + + fn fold_call(&mut self, mut call: Call) -> Self::Out { + call.args = call + .args + .into_iter() + .map(|arg| arg.fold(self)) + .collect::<Result<_, _>>()?; + + if call.name != self.function.name { + Ok(call.into()) + } else if call.args.len() != self.function.params { + Err(SubstituteError::WrongArgCount { + call, + expected: self.function.params, + }) + } else { + self.function + .expr + .clone() + .fold(&mut Substitute { params: call.args }) + } + } +} diff --git a/src/chomp/context.rs b/src/chomp/context.rs new file mode 100644 index 0000000..392023f --- /dev/null +++ b/src/chomp/context.rs @@ -0,0 +1,51 @@ +use super::{ast::Variable, error::VariableError, typed::Type}; + +#[derive(Debug, Default)] +pub struct Context { + vars: Vec<Type>, + unguard_points: Vec<usize>, +} + +impl Context { + pub fn new() -> Self { + Self::default() + } + + pub fn depth(&self) -> usize { + self.vars.len() + } + + pub fn is_unguarded(&self, var: &Variable) -> Option<bool> { + if self.vars.len() <= var.index() { + None + } else if self.unguard_points.is_empty() { + Some(false) + } else { + Some( + self.unguard_points[self.unguard_points.len() - 1] + var.index() >= self.vars.len(), + ) + } + } + + pub fn with_unguard<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R { + self.unguard_points.push(self.vars.len()); + let res = f(self); + self.unguard_points.pop(); + res + } + + pub fn get_variable_type(&self, var: &Variable) -> Result<&Type, VariableError> { + match self.is_unguarded(var) { + None => Err(VariableError::FreeVariable(var.clone())), + Some(false) => Err(VariableError::GuardedVariable(var.clone())), + Some(true) => Ok(&self.vars[self.vars.len() - var.index() - 1]), + } + } + + pub fn with_variable_type<F: FnOnce(&mut Self) -> R, R>(&mut self, ty: Type, f: F) -> R { + self.vars.push(ty); + let res = f(self); + self.vars.pop(); + res + } +} diff --git a/src/chomp/error.rs b/src/chomp/error.rs new file mode 100644 index 0000000..6733d06 --- /dev/null +++ b/src/chomp/error.rs @@ -0,0 +1,389 @@ +use std::{ + error::Error, + fmt::{self, Display}, +}; + +use proc_macro2::Span; + +use super::{ + ast::{Alt, Call, Cat, Fix, Parameter, Variable}, + check::Spanning, + typed::Type, + visit::Visitable, +}; + +/// A type error when using a fix point variable. +#[derive(Debug)] +pub enum VariableError { + /// Usage of a free variable. + FreeVariable(Variable), + /// Usage of a guarded variable. + GuardedVariable(Variable), +} + +impl From<VariableError> for syn::Error { + fn from(other: VariableError) -> Self { + match other { + VariableError::FreeVariable(_) => todo!(), + VariableError::GuardedVariable(_) => todo!(), + } + } +} + +impl Display for VariableError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::FreeVariable(var) => { + let start = var.name().span().start(); + write!( + f, + "{}:{}: unbound variable '{}'", + start.line, + start.column, + var.name() + ) + } + Self::GuardedVariable(var) => { + let start = var.name().span().start(); + write!( + f, + "{}:{}: variable '{}' is guarded", + start.line, + start.column, + var.name() + ) + } + } + } +} + +impl Error for VariableError {} + +/// A type error when concatenating two terms. +#[derive(Debug)] +pub enum CatError { + /// The first term was unexpectedly nullable. + FirstNullable(Cat), + /// The flast set of the first term intersects the first set of the second. + FirstFlastOverlap(Cat), +} + +impl From<CatError> for syn::Error { + fn from(other: CatError) -> Self { + match other { + CatError::FirstNullable(cat) => { + let mut err = Self::new( + cat.punct().map_or_else(Span::call_site, |p| p.span), + "first item in sequence cannot accept the empty string", + ); + err.combine(Self::new( + cat.first() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site), + "this can accept empty string", + )); + err + } + CatError::FirstFlastOverlap(cat) => { + let mut err = Self::new( + cat.punct().map_or_else(Span::call_site, |p| p.span), + "cannot decide whether to repeat first or start accepting second", + ); + err.combine(Self::new( + cat.first() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site), + "a repetition of this", + )); + err.combine(Self::new( + cat.second() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site), + "collides with the start of this", + )); + err + } + } + } +} + +impl Display for CatError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::FirstNullable(cat) => { + let start = cat.punct().map_or_else(Span::call_site, |p| p.span).start(); + let fst_start = cat + .first() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site) + .start(); + write!( + f, + "{}:{}: term `{}' ({}:{}) accepts the empty string", + start.line, + start.column, + cat.first(), + fst_start.line, + fst_start.column + ) + } + Self::FirstFlastOverlap(cat) => { + let start = cat.punct().map_or_else(Span::call_site, |p| p.span).start(); + let fst_start = cat + .first() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site) + .start(); + let snd_start = cat + .second() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site) + .start(); + write!( + f, + "{}:{}: cannot decide whether to repeat `{}' ({}:{}) or start accepting `{}' ({}:{}).", + start.line, + start.column, + cat.first(), + fst_start.line, + fst_start.column, + cat.second(), + snd_start.line, + snd_start.column + ) + } + } + } +} + +impl Error for CatError {} + +/// A type error when alternating two terms. +#[derive(Debug)] +pub enum AltError { + /// Both terms are nullable. + BothNullable(Alt), + /// The first sets of the two terms intersect. + FirstOverlap(Alt), +} + +impl From<AltError> for syn::Error { + fn from(other: AltError) -> Self { + match other { + AltError::BothNullable(alt) => { + let mut err = Self::new( + alt.punct().map_or_else(Span::call_site, |p| p.span), + "both branches accept the empty string", + ); + err.combine(Self::new( + alt.left() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site), + "left branch accepts the empty string", + )); + err.combine(Self::new( + alt.right() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site), + "right branch accepts the empty string", + )); + err + } + AltError::FirstOverlap(alt) => { + let mut err = Self::new( + alt.punct().map_or_else(Span::call_site, |p| p.span), + "cannot decide whether to accept the left or right branch", + ); + err.combine(Self::new( + alt.left() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site), + "left branch starts with a character", + )); + err.combine(Self::new( + alt.right() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site), + "right branch starts with the same character", + )); + err + } + } + } +} + +impl Display for AltError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::BothNullable(alt) => { + let start = alt.punct().map_or_else(Span::call_site, |p| p.span).start(); + let left_start = alt + .left() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site) + .start(); + let right_start = alt + .right() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site) + .start(); + write!( + f, + "{}:{}: both `{}' ({}:{}) and `{}' ({}:{}) accept the empty string.", + start.line, + start.column, + alt.left(), + left_start.line, + left_start.column, + alt.right(), + right_start.line, + right_start.column, + ) + } + Self::FirstOverlap(alt) => { + let start = alt.punct().map_or_else(Span::call_site, |p| p.span).start(); + let left_start = alt + .left() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site) + .start(); + let right_start = alt + .right() + .visit(&mut Spanning) + .unwrap_or_else(Span::call_site) + .start(); + write!( + f, + "{}:{}: cannot decide whether to accept `{}' ({}:{}) or `{}' ({}:{}).", + start.line, + start.column, + alt.left(), + left_start.line, + left_start.column, + alt.right(), + right_start.line, + right_start.column, + ) + } + } + } +} + +impl Error for AltError {} + +#[derive(Debug)] +pub struct FixError(pub Fix, pub Type, pub Box<TypeError>); + +impl From<FixError> for syn::Error { + fn from(e: FixError) -> Self { + let mut error = Self::from(*e.2); + error.combine(Self::new( + e.0.span().unwrap_or_else(Span::call_site), + format!("assuming `{}' has type {:?}.", e.0.arg(), e.1), + )); + error + } +} + +impl Display for FixError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.2.fmt(f)?; + let start = self.0.span().unwrap_or_else(Span::call_site).start(); + write!( + f, + "\n({}:{}) assuming `{}' has type {:?}.", + start.line, + start.column, + self.0.arg(), + self.1 + ) + } +} + +impl Error for FixError {} + +#[derive(Debug)] +pub enum TypeError { + Cat(CatError), + Alt(AltError), + Variable(VariableError), + Fix(FixError), +} + +impl From<CatError> for TypeError { + fn from(other: CatError) -> Self { + Self::Cat(other) + } +} + +impl From<AltError> for TypeError { + fn from(other: AltError) -> Self { + Self::Alt(other) + } +} + +impl From<VariableError> for TypeError { + fn from(other: VariableError) -> Self { + Self::Variable(other) + } +} + +impl From<TypeError> for syn::Error { + fn from(other: TypeError) -> Self { + match other { + TypeError::Cat(e) => e.into(), + TypeError::Alt(e) => e.into(), + TypeError::Variable(e) => e.into(), + TypeError::Fix(e) => e.into(), + } + } +} + +impl Display for TypeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Cat(e) => e.fmt(f), + Self::Alt(e) => e.fmt(f), + Self::Variable(e) => e.fmt(f), + Self::Fix(e) => e.fmt(f), + } + } +} + +impl Error for TypeError {} + +#[derive(Debug)] +pub enum SubstituteError { + FreeParameter(Parameter), + WrongArgCount { call: Call, expected: usize }, +} + +impl Display for SubstituteError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::FreeParameter(param) => { + let start = param.name().span().start(); + write!( + f, + "{}:{}: undeclared variable `{}'", + start.line, + start.column, + param.name() + ) + } + SubstituteError::WrongArgCount { call, expected } => { + let start = call.span().unwrap_or_else(Span::call_site).start(); + write!( + f, + "{}:{}: wrong number of arguments. Expected {}, got {}", + start.line, + start.column, + call.args().len(), + expected + ) + } + } + } +} + +impl Error for SubstituteError {} diff --git a/src/chomp/mod.rs b/src/chomp/mod.rs new file mode 100644 index 0000000..bb31b6f --- /dev/null +++ b/src/chomp/mod.rs @@ -0,0 +1,7 @@ +pub mod ast; +pub mod check; +pub mod context; +pub mod error; +pub mod set; +pub mod typed; +pub mod visit; diff --git a/src/chomp/set.rs b/src/chomp/set.rs new file mode 100644 index 0000000..0661ab6 --- /dev/null +++ b/src/chomp/set.rs @@ -0,0 +1,91 @@ +use std::collections::BTreeSet; + +#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)] +pub struct FirstSet { + inner: BTreeSet<char>, +} + +impl FirstSet { + pub fn new() -> Self { + Self::default() + } + + pub fn of_str(s: &str) -> Self { + let mut inner = BTreeSet::new(); + s.chars().next().map(|c| inner.insert(c)); + + Self { inner } + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn union(mut self, mut other: Self) -> Self { + self.inner.append(&mut other.inner); + self + } + + pub fn intersect(&self, other: &Self) -> Self { + Self { + inner: self.inner.intersection(&other.inner).copied().collect(), + } + } +} + +impl IntoIterator for FirstSet { + type Item = char; + + type IntoIter = <BTreeSet<char> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.inner.into_iter() + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)] +pub struct FlastSet { + inner: BTreeSet<char>, +} + +impl FlastSet { + pub fn new() -> Self { + Self::default() + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn union_first(mut self, mut other: FirstSet) -> Self { + self.inner.append(&mut other.inner); + self + } + + pub fn union(mut self, mut other: Self) -> Self { + self.inner.append(&mut other.inner); + self + } + + pub fn intersect_first(&self, other: &FirstSet) -> Self { + Self { + inner: self.inner.intersection(&other.inner).copied().collect(), + } + } + + pub fn intersect(&self, other: &Self) -> Self { + Self { + inner: self.inner.intersection(&other.inner).copied().collect(), + } + } +} + +impl IntoIterator for FlastSet { + type Item = char; + + type IntoIter = <BTreeSet<char> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.inner.into_iter() + } +} diff --git a/src/chomp/typed.rs b/src/chomp/typed.rs new file mode 100644 index 0000000..69108ae --- /dev/null +++ b/src/chomp/typed.rs @@ -0,0 +1,321 @@ +use std::hash::{Hash, Hasher}; + +use proc_macro2::Span; +use syn::{Ident, LitStr, Token}; + +use super::{ + ast, + set::{FirstSet, FlastSet}, +}; + +#[derive(Debug, Default, Clone, Eq, Hash, PartialEq)] +pub struct Type { + nullable: bool, + first_set: FirstSet, + flast_set: FlastSet, +} + +impl Type { + pub const fn new(nullable: bool, first_set: FirstSet, flast_set: FlastSet) -> Self { + Self { + nullable, + first_set, + flast_set, + } + } + + pub fn of_str(s: &str) -> Self { + Self { + nullable: s.is_empty(), + first_set: FirstSet::of_str(s), + flast_set: FlastSet::default(), + } + } + + pub fn nullable(&self) -> bool { + self.nullable + } + + pub fn first_set(&self) -> &FirstSet { + &self.first_set + } + + pub fn flast_set(&self) -> &FlastSet { + &self.flast_set + } + + pub fn cat(self, other: Self) -> Self { + Self { + nullable: self.nullable && other.nullable, + first_set: self.first_set.union(if self.nullable { + other.first_set.clone() + } else { + FirstSet::default() + }), + flast_set: other.flast_set.union(if other.nullable { + self.flast_set.union_first(other.first_set) + } else { + FlastSet::default() + }), + } + } + + pub fn alt(self, other: Self) -> Self { + Self { + nullable: self.nullable || other.nullable, + first_set: self.first_set.union(other.first_set), + flast_set: self.flast_set.union(other.flast_set), + } + } +} + +#[derive(Debug, Eq, Hash, PartialEq)] +pub struct Epsilon { + inner: Token![_], + ty: Type, +} + +impl From<ast::Epsilon> for Epsilon { + fn from(inner: ast::Epsilon) -> Self { + Self { + inner, + ty: Type::new(true, FirstSet::default(), FlastSet::default()), + } + } +} + +#[derive(Debug, Eq, Hash, PartialEq)] +pub struct Literal { + inner: LitStr, + ty: Type, +} + +impl Literal { + pub fn inner(&self) -> &LitStr { + &self.inner + } + + pub fn span(&self) -> Span { + self.inner.span() + } +} + +impl From<ast::Literal> for Literal { + fn from(inner: ast::Literal) -> Self { + let ty = Type::of_str(&inner.value()); + Self { inner, ty } + } +} + +#[derive(Debug, Eq, Hash, PartialEq)] +pub struct Cat { + fst: Box<TypedExpression>, + punct: Option<Token![.]>, + snd: Box<TypedExpression>, + ty: Type, +} + +impl Cat { + pub(crate) fn new( + fst: TypedExpression, + punct: Option<Token![.]>, + snd: TypedExpression, + ty: Type, + ) -> Self { + Self { + fst: Box::new(fst), + punct, + snd: Box::new(snd), + ty, + } + } + + pub fn unwrap(self) -> (TypedExpression, Option<Token![.]>, TypedExpression) { + (*self.fst, self.punct, *self.snd) + } +} + +#[derive(Debug, Eq, Hash, PartialEq)] +pub struct Alt { + left: Box<TypedExpression>, + punct: Option<Token![|]>, + right: Box<TypedExpression>, + ty: Type, +} + +impl Alt { + pub(crate) fn new( + left: TypedExpression, + punct: Option<Token![|]>, + right: TypedExpression, + ty: Type, + ) -> Self { + Self { + left: Box::new(left), + punct, + right: Box::new(right), + ty, + } + } + + pub fn unwrap(self) -> (TypedExpression, Option<Token![|]>, TypedExpression) { + (*self.left, self.punct, *self.right) + } +} + +#[derive(Debug)] +pub struct Fix { + arg: Ident, + inner: Box<TypedExpression>, + span: Option<Span>, + ty: Type, +} + +impl Fix { + pub(crate) fn new(arg: Ident, inner: TypedExpression, span: Option<Span>, ty: Type) -> Self { + Self { + arg, + inner: Box::new(inner), + span, + ty, + } + } + + pub fn unwrap(self) -> (Ident, TypedExpression, Option<Span>) { + (self.arg, *self.inner, self.span) + } +} + +impl PartialEq for Fix { + fn eq(&self, other: &Self) -> bool { + self.arg == other.arg && self.inner == other.inner && self.ty == other.ty + } +} + +impl Eq for Fix {} + +impl Hash for Fix { + fn hash<H: Hasher>(&self, state: &mut H) { + self.arg.hash(state); + self.inner.hash(state); + self.ty.hash(state); + } +} + +#[derive(Debug, Eq, Hash, PartialEq)] +pub struct Variable { + ident: Ident, + index: usize, + ty: Type, +} + +impl Variable { + pub(crate) fn new(var: ast::Variable, ty: Type) -> Self { + Self { + ident: var.name, + index: var.index, + ty, + } + } + + pub fn index(&self) -> usize { + self.index + } +} + +#[derive(Debug, Eq, Hash, PartialEq)] +pub(crate) enum RawTypedExpression { + Epsilon(Epsilon), + Literal(Literal), + Cat(Cat), + Alt(Alt), + Fix(Fix), + Variable(Variable), +} + +#[derive(Debug, Eq, Hash, PartialEq)] +pub struct TypedExpression { + pub(crate) inner: RawTypedExpression, +} + +impl From<Epsilon> for TypedExpression { + fn from(eps: Epsilon) -> Self { + Self { + inner: RawTypedExpression::Epsilon(eps), + } + } +} + +impl From<Literal> for TypedExpression { + fn from(lit: Literal) -> Self { + Self { + inner: RawTypedExpression::Literal(lit), + } + } +} + +impl From<Cat> for TypedExpression { + fn from(cat: Cat) -> Self { + Self { + inner: RawTypedExpression::Cat(cat), + } + } +} + +impl From<Alt> for TypedExpression { + fn from(alt: Alt) -> Self { + Self { + inner: RawTypedExpression::Alt(alt), + } + } +} + +impl From<Fix> for TypedExpression { + fn from(fix: Fix) -> Self { + Self { + inner: RawTypedExpression::Fix(fix), + } + } +} + +impl From<Variable> for TypedExpression { + fn from(var: Variable) -> Self { + Self { + inner: RawTypedExpression::Variable(var), + } + } +} + +pub trait Typed { + fn get_type(&self) -> &Type; +} + +macro_rules! leaf_typed { + ($ty:ty, $field:ident) => { + impl Typed for $ty { + fn get_type(&self) -> &Type { + &self.$field + } + } + }; +} + +leaf_typed!(Epsilon, ty); +leaf_typed!(Literal, ty); +leaf_typed!(Cat, ty); +leaf_typed!(Alt, ty); +leaf_typed!(Fix, ty); +leaf_typed!(Variable, ty); + +impl Typed for TypedExpression { + fn get_type(&self) -> &Type { + match &self.inner { + RawTypedExpression::Epsilon(e) => e.get_type(), + RawTypedExpression::Literal(l) => l.get_type(), + RawTypedExpression::Cat(c) => c.get_type(), + RawTypedExpression::Alt(a) => a.get_type(), + RawTypedExpression::Fix(f) => f.get_type(), + RawTypedExpression::Variable(v) => v.get_type(), + } + } +} diff --git a/src/chomp/visit.rs b/src/chomp/visit.rs new file mode 100644 index 0000000..4113b64 --- /dev/null +++ b/src/chomp/visit.rs @@ -0,0 +1,136 @@ +use super::ast::{Alt, Call, Cat, Epsilon, Expression, Fix, Literal, Parameter, Variable}; + +pub trait Visitor { + type Out; + fn visit_epsilon(&mut self, eps: &Epsilon) -> Self::Out; + + fn visit_literal(&mut self, lit: &Literal) -> Self::Out; + + fn visit_cat(&mut self, cat: &Cat) -> Self::Out; + + fn visit_alt(&mut self, alt: &Alt) -> Self::Out; + + fn visit_fix(&mut self, fix: &Fix) -> Self::Out; + + fn visit_variable(&mut self, var: &Variable) -> Self::Out; + + fn visit_parameter(&mut self, param: &Parameter) -> Self::Out; + + fn visit_call(&mut self, call: &Call) -> Self::Out; +} + +pub trait Mapper { + type Out; + + fn map_epsilon(&mut self, eps: &mut Epsilon) -> Self::Out; + + fn map_literal(&mut self, lit: &mut Literal) -> Self::Out; + + fn map_cat(&mut self, cat: &mut Cat) -> Self::Out; + + fn map_alt(&mut self, alt: &mut Alt) -> Self::Out; + + fn map_fix(&mut self, fix: &mut Fix) -> Self::Out; + + fn map_variable(&mut self, var: &mut Variable) -> Self::Out; + + fn map_parameter(&mut self, param: &mut Parameter) -> Self::Out; + + fn map_call(&mut self, call: &mut Call) -> Self::Out; +} + +pub trait Folder { + type Out; + + fn fold_epsilon(&mut self, eps: Epsilon) -> Self::Out; + + fn fold_literal(&mut self, lit: Literal) -> Self::Out; + + fn fold_cat(&mut self, cat: Cat) -> Self::Out; + + fn fold_alt(&mut self, alt: Alt) -> Self::Out; + + fn fold_fix(&mut self, fix: Fix) -> Self::Out; + + fn fold_variable(&mut self, var: Variable) -> Self::Out; + + fn fold_parameter(&mut self, param: Parameter) -> Self::Out; + + fn fold_call(&mut self, call: Call) -> Self::Out; +} + +pub trait Visitable { + fn visit<V: Visitor>(&self, visitor: &mut V) -> <V as Visitor>::Out; + + fn map<M: Mapper>(&mut self, mapper: &mut M) -> <M as Mapper>::Out; + + fn fold<F: Folder>(self, folder: &mut F) -> <F as Folder>::Out; +} + +macro_rules! visitable_leaf { + ($ty:ty, $visit:ident, $map:ident, $fold:ident) => { + impl Visitable for $ty { + fn visit<V: Visitor>(&self, visitor: &mut V) -> <V as Visitor>::Out { + visitor.$visit(self) + } + + fn map<M: Mapper>(&mut self, mapper: &mut M) -> <M as Mapper>::Out { + mapper.$map(self) + } + + fn fold<F: Folder>(self, folder: &mut F) -> <F as Folder>::Out { + folder.$fold(self) + } + } + }; +} + +visitable_leaf!(Epsilon, visit_epsilon, map_epsilon, fold_epsilon); +visitable_leaf!(Literal, visit_literal, map_literal, fold_literal); +visitable_leaf!(Cat, visit_cat, map_cat, fold_cat); +visitable_leaf!(Alt, visit_alt, map_alt, fold_alt); +visitable_leaf!(Fix, visit_fix, map_fix, fold_fix); +visitable_leaf!(Variable, visit_variable, map_variable, fold_variable); +visitable_leaf!(Parameter, visit_parameter, map_parameter, fold_parameter); +visitable_leaf!(Call, visit_call, map_call, fold_call); + +impl Visitable for Expression { + fn visit<V: Visitor>(&self, visitor: &mut V) -> <V as Visitor>::Out { + match self { + Self::Epsilon(e) => visitor.visit_epsilon(e), + Self::Literal(l) => visitor.visit_literal(l), + Self::Cat(c) => visitor.visit_cat(c), + Self::Alt(a) => visitor.visit_alt(a), + Self::Fix(f) => visitor.visit_fix(f), + Self::Variable(v) => visitor.visit_variable(v), + Self::Parameter(p) => visitor.visit_parameter(p), + Self::Call(c) => visitor.visit_call(c), + } + } + + fn map<M: Mapper>(&mut self, mapper: &mut M) -> <M as Mapper>::Out { + match self { + Self::Epsilon(e) => mapper.map_epsilon(e), + Self::Literal(l) => mapper.map_literal(l), + Self::Cat(c) => mapper.map_cat(c), + Self::Alt(a) => mapper.map_alt(a), + Self::Fix(f) => mapper.map_fix(f), + Self::Variable(v) => mapper.map_variable(v), + Self::Parameter(p) => mapper.map_parameter(p), + Self::Call(c) => mapper.map_call(c), + } + } + + fn fold<F: Folder>(self, folder: &mut F) -> <F as Folder>::Out { + match self { + Self::Epsilon(e) => folder.fold_epsilon(e), + Self::Literal(l) => folder.fold_literal(l), + Self::Cat(c) => folder.fold_cat(c), + Self::Alt(a) => folder.fold_alt(a), + Self::Fix(f) => folder.fold_fix(f), + Self::Variable(v) => folder.fold_variable(v), + Self::Parameter(p) => folder.fold_parameter(p), + Self::Call(c) => folder.fold_call(c), + } + } +} @@ -23,5 +23,6 @@ #![warn(unused_qualifications)] #![warn(variant_size_differences)] +pub mod chomp; +pub mod lower; pub mod nibble; -pub mod ast; diff --git a/src/lower/mod.rs b/src/lower/mod.rs new file mode 100644 index 0000000..242fa47 --- /dev/null +++ b/src/lower/mod.rs @@ -0,0 +1,58 @@ +use crate::chomp::typed::{ + Alt, Cat, Epsilon, Fix, Literal, RawTypedExpression, TypedExpression, Variable, +}; + +pub mod rust; + +pub trait Backend: Default { + type Id; + type Code; + + fn gen_epsilon(&mut self, eps: Epsilon) -> Self::Id; + + fn gen_literal(&mut self, lit: Literal) -> Self::Id; + + fn gen_cat(&mut self, cat: Cat) -> Self::Id; + + fn gen_alt(&mut self, alt: Alt) -> Self::Id; + + fn gen_fix(&mut self, fix: Fix) -> Self::Id; + + fn gen_variable(&mut self, var: Variable) -> Self::Id; + + fn emit_code(self, id: Self::Id) -> Self::Code; +} + +pub trait GenerateCode { + fn gen<B: Backend>(self, backend: &mut B) -> B::Id; +} + +macro_rules! generate_leaf { + ($ty:ty, $gen:ident) => { + impl GenerateCode for $ty { + fn gen<B: Backend>(self, backend: &mut B) -> B::Id { + backend.$gen(self) + } + } + }; +} + +generate_leaf!(Epsilon, gen_epsilon); +generate_leaf!(Literal, gen_literal); +generate_leaf!(Cat, gen_cat); +generate_leaf!(Alt, gen_alt); +generate_leaf!(Fix, gen_fix); +generate_leaf!(Variable, gen_variable); + +impl GenerateCode for TypedExpression { + fn gen<B: Backend>(self, backend: &mut B) -> B::Id { + match self.inner { + RawTypedExpression::Epsilon(e) => e.gen(backend), + RawTypedExpression::Literal(l) => l.gen(backend), + RawTypedExpression::Cat(c) => c.gen(backend), + RawTypedExpression::Alt(a) => a.gen(backend), + RawTypedExpression::Fix(f) => f.gen(backend), + RawTypedExpression::Variable(v) => v.gen(backend), + } + } +} diff --git a/src/lower/rust.rs b/src/lower/rust.rs new file mode 100644 index 0000000..e47fa8a --- /dev/null +++ b/src/lower/rust.rs @@ -0,0 +1,315 @@ +use std::collections::{BTreeSet, HashMap}; + +use proc_macro2::{TokenStream, TokenTree}; +use quote::{format_ident, quote}; + +use crate::chomp::typed::{Alt, Cat, Epsilon, Fix, Literal, Typed, TypedExpression, Variable}; + +use super::{Backend, GenerateCode}; + +#[derive(Debug)] +pub struct RustBackend { + // Indexed by ID, stores type, impl and dependencies + data: Vec<(TokenStream, TokenStream, BTreeSet<usize>)>, + eps_id: Option<usize>, + lit_map: HashMap<String, usize>, + cat_map: HashMap<(usize, usize), usize>, + alt_map: HashMap<(usize, usize), usize>, + fix_map: HashMap<Box<TypedExpression>, usize>, + var_map: HashMap<usize, usize>, // Key is fix point ID + context: Vec<usize>, +} + +impl Default for RustBackend { + fn default() -> Self { + Self { + data: Vec::new(), + eps_id: None, + lit_map: HashMap::new(), + cat_map: HashMap::new(), + alt_map: HashMap::new(), + fix_map: HashMap::new(), + var_map: HashMap::new(), + context: Vec::new(), + } + } +} + +impl Backend for RustBackend { + type Id = usize; + + type Code = TokenStream; + + fn gen_epsilon(&mut self, _eps: Epsilon) -> Self::Id { + match self.eps_id { + Some(id) => id, + None => { + let id = self.data.len(); + let ty = quote! { () }; + let tokens = quote! { + impl Parse for () { + fn parse<P: Parser>(_input: &mut P) -> Result<Self> { + Ok(()) + } + } + }; + self.data.push((ty, tokens, BTreeSet::new())); + id + } + } + } + + fn gen_literal(&mut self, lit: Literal) -> Self::Id { + let lit = lit.inner(); + if let Some(&id) = self.lit_map.get(&lit.value()) { + id + } else { + let id = self.data.len(); + let name = format_ident!("Lit{}", id); + let doc_name = format!( + r#"The literal string `"{}"`."#, + lit.value().escape_debug().collect::<String>() + ); + let tokens = quote! { + #[doc=#doc_name] + pub struct #name; + + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + input.take_str(#lit).map(|()| #name) + } + } + }; + + self.data + .push((TokenTree::from(name).into(), tokens, BTreeSet::new())); + self.lit_map.insert(lit.value(), id); + + id + } + } + + fn gen_cat(&mut self, cat: Cat) -> Self::Id { + let (fst, _punct, snd) = cat.unwrap(); + let fst = fst.gen(self); + let snd = snd.gen(self); + + if let Some(&id) = self.cat_map.get(&(fst, snd)) { + id + } else { + let id = self.data.len(); + let fst_ty = self.data[fst].0.clone(); + let snd_ty = self.data[snd].0.clone(); + self.data.push(( + quote! {(#fst_ty, #snd_ty)}, + TokenStream::new(), + [fst, snd].iter().cloned().collect(), + )); + self.cat_map.insert((fst, snd), id); + id + } + } + + fn gen_alt(&mut self, alt: Alt) -> Self::Id { + let iter_first = alt.get_type().first_set().clone().into_iter(); + + let (left, _punct, right) = alt.unwrap(); + let left_ty = left.get_type(); + let right_ty = right.get_type(); + + let left_null = left_ty.nullable(); + let left_first = left_ty.first_set().clone(); + + let right_null = right_ty.nullable(); + let right_first = right_ty.first_set().clone(); + + let left = left.gen(self); + let right = right.gen(self); + + if let Some(&id) = self.alt_map.get(&(left, right)) { + id + } else { + let id = self.data.len(); + let left_ty = self.data[left].0.clone(); + let right_ty = self.data[right].0.clone(); + let name = format_ident!("Alt{}", id); + let mut tokens = quote! { + pub enum #name { + Left(#left_ty), + Right(#right_ty), + } + }; + + let other = if left_null { + let iter = right_first.into_iter(); + + quote! { + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + match input.peek() { + #(Some(#iter))|* => input.parse().map(Self::Right), + _ => input.parse().map(Self::Left), + } + } + } + } + } else if right_null { + let iter = left_first.into_iter(); + + quote! { + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + match input.peek() { + #(Some(#iter))|* => input.parse().map(Self::Left), + _ => input.parse().map(Self::Right), + } + } + } + } + } else { + let iter_left = left_first.into_iter(); + let iter_right = right_first.into_iter(); + + quote! { + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + match input.peek().ok_or(Error::EndOfStream)? { + #(#iter_left)|* => input.parse().map(Self::Left), + #(#iter_right)|* => input.parse().map(Self::Right), + c => input.error(Error::BadBranch(c, &[#(#iter_first),*])) + } + } + } + } + }; + + tokens.extend(other); + self.data.push(( + TokenTree::from(name).into(), + tokens, + [left, right].iter().cloned().collect(), + )); + self.alt_map.insert((left, right), id); + id + } + } + + fn gen_fix(&mut self, fix: Fix) -> Self::Id { + let (_arg, inner, _span) = fix.unwrap(); + if let Some(&id) = self.fix_map.get(&inner) { + id + } else { + let id = self.data.len(); + let name = format_ident!("Fix{}", id); + self.data.push(( + TokenTree::from(name.clone()).into(), + TokenStream::new(), + BTreeSet::new(), + )); + self.context.push(id); + let inner = inner.gen(self); + let inner_ty = self.data[inner].0.clone(); + let tokens = quote! { + pub struct #name(#inner_ty); + + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + input.parse().map(Self) + } + } + }; + self.data[id].1 = tokens; + self.data[id].2 = [inner].iter().cloned().collect(); + id + } + } + + fn gen_variable(&mut self, var: Variable) -> Self::Id { + let fix_id = self.context[self.context.len() - var.index() - 1]; + if let Some(&id) = self.var_map.get(&fix_id) { + id + } else { + let id = self.data.len(); + let fix_ty = self.data[fix_id].0.clone(); + let name = quote! {Box<#fix_ty>}; + self.data.push((name, TokenStream::new(), BTreeSet::new())); + self.var_map.insert(fix_id, id); + id + } + } + + fn emit_code(self, id: Self::Id) -> Self::Code { + let root = self.data[id].clone(); + let mut tokens = root.1; + let mut completed = [id].iter().cloned().collect::<BTreeSet<_>>(); + let mut todo = root.2; + + while !todo.is_empty() { + let mut next = BTreeSet::new(); + completed.extend(todo.clone()); + + for ty in todo { + let ty = self.data[ty].clone(); + tokens.extend(ty.1); + next.extend(ty.2.into_iter().filter(|id| !completed.contains(id))); + } + + todo = next; + } + + let root_ty = root.0; + tokens.extend(quote! { + pub type Ast = #root_ty; + + pub enum Error { + BadBranch(char, &'static [char]), + EndOfStream, + } + + pub type Result<T> = std::result::Result<T, Error>; + + pub trait Parser: Iterator<Item = char> { + fn peek(&mut self) -> Option<char>; + + fn parse<T: Parse>(&mut self) -> Result<T> where Self: Sized { + T::parse(self) + } + + fn take_str(&mut self, s: &str) -> Result<()>; + + fn error<T>(&mut self, e: Error) -> Result<T>; + } + + pub trait Parse: Sized { + fn parse<P: Parser>(input: &mut P) -> Result<Self>; + } + + }); + + // Good enough guess of whether we need concatenation rules + if !self.cat_map.is_empty() { + tokens.extend(quote! { + impl<A: Parse, B: Parse> Parse for (A, B) { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + let a = input.parse()?; + let b = input.parse()?; + Ok((a, b)) + } + } + }); + } + + // Good enough guess of whether we need variable rules + if !self.fix_map.is_empty() { + tokens.extend(quote! { + impl<T: Parse + Sized> Parse for Box<T> { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + input.parse().map(Box::new) + } + } + }); + } + + tokens + } +} diff --git a/src/main.rs b/src/main.rs index 5026085..418f99f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,29 @@ -use chomp::ast::InlineCall; -use chomp::{ast::TypeCheck, nibble::File}; -use proc_macro2::Span; use std::{ error::Error, + fmt::Display, io::{self, Read, Write}, process::exit, }; -use syn::Ident; + +use chomp::{ + chomp::{ + check::{InlineCall, TypeCheck}, + context::Context, + visit::Visitable, + }, + lower::{rust::RustBackend, Backend, GenerateCode}, + nibble::cst::File, +}; + +#[derive(Debug)] +struct UndecVar; + +impl Display for UndecVar { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Undeclared variable somewhere.") + } +} +impl Error for UndecVar {} fn main() { let mut input = String::new(); @@ -14,27 +31,32 @@ fn main() { .read_to_string(&mut input) .map_err(|e| Box::new(e) as Box<dyn Error>) .and_then(|_| syn::parse_str(&input).map_err(|e| Box::new(e) as Box<dyn Error>)) - .and_then(|nibble: File| { - let (funs, goal) = nibble.convert(); - + .and_then(|nibble: File| nibble.convert().ok_or(Box::new(UndecVar) as Box<dyn Error>)) + .and_then(|(funs, goal)| { funs.into_iter() - .try_rfold(goal, |goal, (name, term, args)| { - goal.fold(&mut InlineCall::new(name, term, args)) + .try_rfold(goal, |goal, function| { + goal.fold(&mut InlineCall::new(function)) }) .map_err(|e| Box::new(e) as Box<dyn Error>) }) .and_then(|term| { - term.fold(&mut TypeCheck::new()) - .map_err(|e| Box::new(e) as Box<dyn Error>) + let mut context = Context::default(); + term.fold(&mut TypeCheck { + context: &mut context, + }) + .map_err(|e| Box::new(e) as Box<dyn Error>) + }) + .map(|typed| { + let mut backend = RustBackend::default(); + let id = typed.gen(&mut backend); + backend.emit_code(id) }) - .map(|(typed, _)| typed.emit_code(Ident::new("Ast", Span::call_site()))) .and_then(|code| { write!(io::stdout(), "{:#}", code).map_err(|e| Box::new(e) as Box<dyn Error>) }); if let Err(e) = res { eprintln!("{}", e); - drop(e); exit(1) } } diff --git a/src/nibble/convert.rs b/src/nibble/convert.rs new file mode 100644 index 0000000..3e47208 --- /dev/null +++ b/src/nibble/convert.rs @@ -0,0 +1,165 @@ +use std::collections::HashMap; + +use syn::punctuated::Pair; + +use crate::chomp::ast; + +use super::cst::{Alt, Call, Cat, Fix, Ident, ParenExpression, Term}; + +#[derive(Clone, Copy, Debug)] +pub enum Binding { + Variable(usize), + Parameter(usize), + Global, +} + +#[derive(Debug)] +pub struct Context { + names: HashMap<String, Binding>, + vars: usize, +} + +impl Context { + pub fn new<I: IntoIterator<Item = Ident>>(globals: &[Ident], params: I) -> Self { + let mut names = HashMap::new(); + for global in globals { + names.insert(global.to_string(), Binding::Global); + } + + for (index, param) in params.into_iter().enumerate() { + names.insert(param.to_string(), Binding::Parameter(index)); + } + + Self { names, vars: 0 } + } + + pub fn lookup(&self, name: &Ident) -> Option<Binding> { + // we make variable binding cheaper by inserting wrong and pulling right. + match self.names.get(&name.to_string()).copied() { + Some(Binding::Variable(index)) => Some(Binding::Variable(self.vars - index - 1)), + x => x, + } + } + + pub fn with_variable<F: FnOnce(&mut Self) -> R, R>(&mut self, name: &Ident, f: F) -> R { + let old = self + .names + .insert(name.to_string(), Binding::Variable(self.vars)); + + // we make variable binding cheaper by inserting wrong and pulling right. + // we should increment all values in names instead, but that's slow + self.vars += 1; + let res = f(self); + self.vars -= 1; + + if let Some(val) = old { + self.names.insert(name.to_string(), val); + } else { + self.names.remove(&name.to_string()); + } + + res + } +} + +pub trait Convert { + fn convert(self, context: &mut Context) -> Option<ast::Expression>; +} + +impl Convert for Ident { + fn convert(self, context: &mut Context) -> Option<ast::Expression> { + let span = self.span(); + + match context.lookup(&self)? { + Binding::Variable(index) => Some(ast::Variable::new(self, index).into()), + Binding::Parameter(index) => Some(ast::Parameter::new(self, index).into()), + Binding::Global => Some(ast::Call::new(self, Vec::new(), Some(span)).into()), + } + } +} + +impl Convert for Call { + fn convert(self, context: &mut Context) -> Option<ast::Expression> { + let span = self.span(); + let args = self + .args + .into_iter() + .map(|arg| arg.convert(context)) + .collect::<Option<_>>()?; + Some(ast::Call::new(self.name, args, span).into()) + } +} + +impl Convert for Fix { + fn convert(self, context: &mut Context) -> Option<ast::Expression> { + let span = self.span(); + let expr = self.expr; + let inner = context.with_variable(&self.arg, |context| expr.convert(context))?; + Some(ast::Fix::new(self.arg, inner, span).into()) + } +} + +impl Convert for ParenExpression { + fn convert(self, context: &mut Context) -> Option<ast::Expression> { + self.expr.convert(context) + } +} + +impl Convert for Term { + fn convert(self, context: &mut Context) -> Option<ast::Expression> { + match self { + Self::Epsilon(e) => Some(e.into()), + Self::Ident(i) => i.convert(context), + Self::Literal(l) => Some(l.into()), + Self::Call(c) => c.convert(context), + Self::Fix(f) => f.convert(context), + Self::Parens(p) => p.convert(context), + } + } +} + +impl Convert for Cat { + fn convert(self, context: &mut Context) -> Option<ast::Expression> { + let mut iter = self.0.into_pairs(); + let mut out = match iter.next().unwrap() { + Pair::Punctuated(t, p) => (t.convert(context)?, Some(p)), + Pair::End(t) => (t.convert(context)?, None), + }; + + for pair in iter { + let (fst, punct) = out; + out = match pair { + Pair::Punctuated(t, p) => ( + ast::Cat::new(fst, punct, t.convert(context)?).into(), + Some(p), + ), + Pair::End(t) => (ast::Cat::new(fst, punct, t.convert(context)?).into(), None), + }; + } + + Some(out.0) + } +} + +impl Convert for Alt { + fn convert(self, context: &mut Context) -> Option<ast::Expression> { + let mut iter = self.0.into_pairs(); + let mut out = match iter.next().unwrap() { + Pair::Punctuated(t, p) => (t.convert(context)?, Some(p)), + Pair::End(t) => (t.convert(context)?, None), + }; + + for pair in iter { + let (fst, punct) = out; + out = match pair { + Pair::Punctuated(t, p) => ( + ast::Alt::new(fst, punct, t.convert(context)?).into(), + Some(p), + ), + Pair::End(t) => (ast::Alt::new(fst, punct, t.convert(context)?).into(), None), + }; + } + + Some(out.0) + } +} diff --git a/src/nibble/cst.rs b/src/nibble/cst.rs new file mode 100644 index 0000000..383eae9 --- /dev/null +++ b/src/nibble/cst.rs @@ -0,0 +1,294 @@ +use proc_macro2::Span; +use syn::{ + bracketed, + ext::IdentExt, + parenthesized, + parse::{Parse, ParseStream}, + punctuated::Punctuated, + token::{Bracket, Comma, Let, Match, Paren}, + LitStr, Token, +}; + +use crate::chomp::ast; + +use super::convert::{Context, Convert}; + +pub type Epsilon = Token![_]; + +pub type Ident = syn::Ident; + +pub type Literal = LitStr; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ArgList<T> { + paren_token: Paren, + args: Punctuated<T, Comma>, +} + +impl<T> ArgList<T> { + pub fn span(&self) -> Span { + self.paren_token.span + } + + pub fn len(&self) -> usize { + self.args.len() + } + + pub fn is_empty(&self) -> bool { + self.args.is_empty() + } +} + +impl<T> IntoIterator for ArgList<T> { + type Item = T; + + type IntoIter = <Punctuated<T, Comma> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.args.into_iter() + } +} + +impl<T: Parse> Parse for ArgList<T> { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let args; + let paren_token = parenthesized!(args in input); + let args = args.call(Punctuated::parse_terminated)?; + Ok(Self { paren_token, args }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Call { + pub name: Ident, + pub args: ArgList<Expression>, +} + +impl Call { + pub fn span(&self) -> Option<Span> { + self.name.span().join(self.args.span()) + } +} + +impl Parse for Call { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let name = input.call(Ident::parse_any)?; + let args = input.parse()?; + Ok(Self { name, args }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Fix { + bracket_token: Bracket, + pub arg: Ident, + paren_token: Paren, + pub expr: Expression, +} + +impl Fix { + pub fn span(&self) -> Option<Span> { + self.bracket_token.span.join(self.paren_token.span) + } +} + +impl Parse for Fix { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let arg; + let bracket_token = bracketed!(arg in input); + let arg = arg.call(Ident::parse_any)?; + let expr; + let paren_token = parenthesized!(expr in input); + let expr = expr.parse()?; + Ok(Self { + bracket_token, + arg, + paren_token, + expr, + }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ParenExpression { + paren_token: Paren, + pub expr: Expression, +} + +impl Parse for ParenExpression { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let expr; + let paren_token = parenthesized!(expr in input); + let expr = expr.parse()?; + Ok(Self { paren_token, expr }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Term { + Epsilon(Epsilon), + Ident(Ident), + Literal(Literal), + Call(Call), + Fix(Fix), + Parens(ParenExpression), +} + +impl Parse for Term { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let lookahead = input.lookahead1(); + + if lookahead.peek(Token![_]) { + input.parse().map(Self::Epsilon) + } else if lookahead.peek(LitStr) { + input.parse().map(Self::Literal) + } else if lookahead.peek(Bracket) { + input.parse().map(Self::Fix) + } else if lookahead.peek(Paren) { + input.parse().map(Self::Parens) + } else if lookahead.peek(Ident::peek_any) { + let name = input.call(Ident::parse_any)?; + + if input.peek(Paren) { + input.parse().map(|args| Self::Call(Call { name, args })) + } else { + Ok(Self::Ident(name)) + } + } else { + Err(lookahead.error()) + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Cat(pub Punctuated<Term, Token![.]>); + +impl Parse for Cat { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + input.call(Punctuated::parse_separated_nonempty).map(Self) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Alt(pub Punctuated<Cat, Token![|]>); + +impl Parse for Alt { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + input.call(Punctuated::parse_separated_nonempty).map(Self) + } +} + +pub type Expression = Alt; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct LetStatement { + let_token: Token![let], + name: Ident, + args: Option<ArgList<Ident>>, + eq_token: Token![=], + expr: Expression, + semi_token: Token![;], +} + +impl LetStatement { + pub fn span(&self) -> Option<Span> { + self.let_token.span.join(self.semi_token.span) + } +} + +impl Parse for LetStatement { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let let_token = input.parse()?; + let name = input.call(Ident::parse_any)?; + let args = if input.peek(Paren) { + Some(input.parse()?) + } else { + None + }; + let eq_token = input.parse()?; + let expr = input.parse()?; + let semi_token = input.parse()?; + + Ok(Self { + let_token, + name, + args, + eq_token, + expr, + semi_token, + }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct GoalStatement { + match_token: Token![match], + expr: Expression, + semi_token: Token![;], +} + +impl Parse for GoalStatement { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let match_token = input.parse()?; + let expr = input.parse()?; + let semi_token = input.parse()?; + + Ok(Self { + match_token, + expr, + semi_token, + }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct File { + lets: Vec<LetStatement>, + goal: GoalStatement, +} + +impl File { + pub fn convert(self) -> Option<(Vec<ast::Function>, ast::Expression)> { + let mut names = Vec::new(); + let mut map = Vec::new(); + for stmt in self.lets { + let count = stmt.args.as_ref().map(ArgList::len).unwrap_or_default(); + let span = stmt.span(); + let mut context = Context::new( + &names, + stmt.args.into_iter().flat_map(|args| args.into_iter()), + ); + names.push(stmt.name.clone()); + map.push(ast::Function::new( + stmt.name.clone(), + count, + stmt.expr.convert(&mut context)?, + span, + )); + } + + let mut context = Context::new(&names, Vec::new()); + let goal = self.goal.expr.convert(&mut context)?; + Some((map, goal)) + } +} + +impl Parse for File { + fn parse(input: ParseStream<'_>) -> syn::Result<Self> { + let mut lets = Vec::new(); + let mut lookahead = input.lookahead1(); + + while lookahead.peek(Let) { + lets.push(input.parse()?); + lookahead = input.lookahead1(); + } + + let goal = if lookahead.peek(Match) { + input.parse()? + } else { + return Err(lookahead.error()); + }; + + Ok(Self { lets, goal }) + } +} diff --git a/src/nibble/mod.rs b/src/nibble/mod.rs index 071b41a..14791fd 100644 --- a/src/nibble/mod.rs +++ b/src/nibble/mod.rs @@ -1,489 +1,50 @@ -use std::collections::HashMap; -use std::rc::Rc; - -use crate::ast::{ - self, - convert::{Context, Convert}, -}; -use proc_macro2::Span; -use syn::{ - ext::IdentExt, - parse::{Parse, ParseStream}, - punctuated::{Pair, Punctuated}, - token, Result, Token, -}; - -pub type Epsilon = Token![_]; - -impl Convert for Epsilon { - fn convert(&self, _: &mut Context) -> ast::Term { - ast::Term::Epsilon(*self) - } -} - -type Ident = syn::Ident; - -impl Convert for Ident { - fn convert(&self, context: &mut Context) -> ast::Term { - use ast::Term; - let name = self.to_string(); - - if let Some(binding) = context.get_binding(&name) { - Term::Binding(ast::Variable::new(self.clone(), binding)) - } else if let Some(variable) = context.get_variable(&name) { - Term::Variable(ast::Variable::new(self.clone(), variable)) - } else { - let span = self.span(); - Term::Call(ast::Call::new(self.clone(), Vec::new(), span)) - } - } -} - -type Literal = syn::LitStr; - -impl Convert for Literal { - fn convert(&self, _: &mut Context) -> ast::Term { - ast::Term::Literal(self.clone()) - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ArgList<T> { - paren_token: token::Paren, - args: Punctuated<T, token::Comma>, -} - -impl<T> ArgList<T> { - fn span(&self) -> Span { - self.paren_token.span - } - - fn len(&self) -> usize { - self.args.len() - } -} - -impl<T> IntoIterator for ArgList<T> { - type Item = T; - type IntoIter = <Punctuated<T, Token![,]> as IntoIterator>::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.args.into_iter() - } -} - -impl<T: Parse> Parse for ArgList<T> { - fn parse(input: ParseStream<'_>) -> Result<Self> { - let args; - let paren_token = syn::parenthesized!(args in input); - let args = args.call(Punctuated::parse_terminated)?; - Ok(Self { paren_token, args }) - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Call { - name: Ident, - args: ArgList<Expression>, -} - -impl Call { - fn span(&self) -> Span { - self.name - .span() - .join(self.args.span()) - .unwrap_or_else(Span::call_site) - } -} - -impl Parse for Call { - fn parse(input: ParseStream<'_>) -> Result<Self> { - let name = input.call(Ident::parse_any)?; - let args = input.parse()?; - Ok(Self { name, args }) - } -} - -impl Convert for Call { - fn convert(&self, context: &mut Context) -> ast::Term { - use ast::Term; - let args = self - .args - .clone() - .into_iter() - .map(|arg| arg.convert(context)) - .collect::<Vec<_>>(); - Term::Call(ast::Call::new(self.name.clone(), args, self.span())) - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Fix { - bracket_token: token::Bracket, - arg: Ident, - paren_token: token::Paren, - expr: Expression, -} - -impl Fix { - fn span(&self) -> Span { - self.bracket_token - .span - .join(self.paren_token.span) - .unwrap_or_else(Span::call_site) - } -} - -impl Parse for Fix { - fn parse(input: ParseStream<'_>) -> Result<Self> { - let arg; - let bracket_token = syn::bracketed!(arg in input); - let arg = arg.call(Ident::parse_any)?; - let expr; - let paren_token = syn::parenthesized!(expr in input); - let expr = expr.parse()?; - Ok(Self { - bracket_token, - arg, - paren_token, - expr, - }) - } -} - -impl Convert for Fix { - fn convert(&self, context: &mut Context) -> ast::Term { - use ast::Term; - let span = self.span(); - let expr = &self.expr; - let arg_name = self.arg.to_string(); - Term::Fix(ast::Fix::new( - self.arg.clone(), - context.with_binding(arg_name, |ctx| expr.convert(ctx)), - span, - )) - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ParenExpression { - paren_tok: token::Paren, - expr: Expression, -} - -impl ParenExpression { - pub fn span(&self) -> Span { - self.paren_tok.span - } -} - -impl Parse for ParenExpression { - fn parse(input: ParseStream<'_>) -> Result<Self> { - let expr; - let paren_tok = syn::parenthesized!(expr in input); - let expr = expr.parse()?; - Ok(Self { paren_tok, expr }) - } -} - -impl Convert for ParenExpression { - fn convert(&self, context: &mut Context) -> ast::Term { - self.expr.convert(context) - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Term { - Epsilon(Epsilon), - Ident(Ident), - Literal(Literal), - Call(Call), - Fix(Fix), - Parens(ParenExpression), -} - -impl Term { - pub fn span(&self) -> Span { - match self { - Self::Epsilon(eps) => eps.span, - Self::Ident(ident) => ident.span(), - Self::Literal(lit) => lit.span(), - Self::Call(call) => call.span(), - Self::Fix(fix) => fix.span(), - Self::Parens(paren) => paren.span(), - } - } -} - -impl Parse for Term { - fn parse(input: ParseStream<'_>) -> Result<Self> { - let lookahead = input.lookahead1(); - if lookahead.peek(Token![_]) { - input.parse().map(Self::Epsilon) - } else if lookahead.peek(syn::LitStr) { - input.parse().map(Self::Literal) - } else if lookahead.peek(token::Bracket) { - input.parse().map(Self::Fix) - } else if lookahead.peek(token::Paren) { - let content; - let paren_tok = syn::parenthesized!(content in input); - content - .parse() - .map(|expr| Self::Parens(ParenExpression { paren_tok, expr })) - } else if lookahead.peek(Ident::peek_any) { - let name = input.call(Ident::parse_any)?; - if input.peek(token::Paren) { - input.parse().map(|args| Self::Call(Call { name, args })) - } else { - Ok(Self::Ident(name)) - } - } else { - Err(lookahead.error()) - } - } -} - -impl Convert for Term { - fn convert(&self, context: &mut Context) -> ast::Term { - match self { - Self::Epsilon(e) => e.convert(context), - Self::Ident(i) => i.convert(context), - Self::Literal(l) => l.convert(context), - Self::Call(c) => c.convert(context), - Self::Fix(f) => f.convert(context), - Self::Parens(e) => e.convert(context), - } - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Cat { - terms: Punctuated<Term, Token![.]>, -} - -impl Cat { - pub fn span(&self) -> Span { - let mut pairs = self.terms.pairs(); - let mut span = pairs.next().and_then(|pair| match pair { - Pair::Punctuated(term, punct) => term.span().join(punct.span), - Pair::End(term) => Some(term.span()), - }); - - for pair in pairs { - span = span.and_then(|span| match pair { - Pair::Punctuated(term, punct) => { - span.join(term.span()).and_then(|s| s.join(punct.span)) - } - Pair::End(term) => span.join(term.span()), - }) - } - - span.unwrap_or_else(Span::call_site) - } -} - -impl Parse for Cat { - fn parse(input: ParseStream<'_>) -> Result<Self> { - Ok(Self { - terms: input.call(Punctuated::parse_separated_nonempty)?, - }) - } -} - -impl Convert for Cat { - fn convert(&self, context: &mut Context) -> ast::Term { - use ast::Term; - let mut iter = self.terms.pairs(); - let init = match iter.next().unwrap() { - Pair::Punctuated(t, p) => Ok((t.convert(context), p)), - Pair::End(t) => Err(t.convert(context)), - }; - iter.fold(init, |term, pair| { - let (fst, punct) = term.unwrap(); - match pair { - Pair::Punctuated(t, p) => { - Ok((Term::Cat(ast::Cat::new(fst, *punct, t.convert(context))), p)) - } - Pair::End(t) => Err(Term::Cat(ast::Cat::new(fst, *punct, t.convert(context)))), - } - }) - .unwrap_err() - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Alt { - cats: Punctuated<Cat, Token![|]>, -} - -impl Alt { - pub fn span(&self) -> Span { - let mut pairs = self.cats.pairs(); - let mut span = pairs.next().and_then(|pair| match pair { - Pair::Punctuated(cat, punct) => cat.span().join(punct.span), - Pair::End(cat) => Some(cat.span()), - }); - - for pair in pairs { - span = span.and_then(|span| match pair { - Pair::Punctuated(cat, punct) => { - span.join(cat.span()).and_then(|s| s.join(punct.span)) - } - Pair::End(cat) => span.join(cat.span()), - }) - } - - span.unwrap_or_else(Span::call_site) - } -} - -impl Parse for Alt { - fn parse(input: ParseStream<'_>) -> Result<Self> { - Ok(Self { - cats: input.call(Punctuated::parse_separated_nonempty)?, - }) - } -} - -impl Convert for Alt { - fn convert(&self, context: &mut Context) -> ast::Term { - use ast::Term; - let mut iter = self.cats.pairs(); - let init = match iter.next().unwrap() { - Pair::Punctuated(t, p) => Ok((t.convert(context), p)), - Pair::End(t) => Err(t.convert(context)), - }; - iter.fold(init, |cat, pair| { - let (left, punct) = cat.unwrap(); - match pair { - Pair::Punctuated(t, p) => Ok(( - Term::Alt(ast::Alt::new(left, *punct, t.convert(context))), - p, - )), - Pair::End(t) => Err(Term::Alt(ast::Alt::new(left, *punct, t.convert(context)))), - } - }) - .unwrap_err() - } -} - -pub type Expression = Alt; - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct LetStatement { - let_token: Token![let], - name: Ident, - args: Option<ArgList<Ident>>, - eq_token: Token![=], - expr: Expression, - semi_token: Token![;], -} - -impl Parse for LetStatement { - fn parse(input: ParseStream<'_>) -> Result<Self> { - let let_token = input.parse()?; - let name = input.call(Ident::parse_any)?; - let args = if input.peek(token::Paren) { - Some(input.parse()?) - } else { - None - }; - let eq_token = input.parse()?; - let expr = input.parse()?; - let semi_token = input.parse()?; - - Ok(Self { - let_token, - name, - args, - eq_token, - expr, - semi_token, - }) - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Goal { - match_token: Token![match], - expr: Expression, - semi_token: Token![;], -} - -impl Parse for Goal { - fn parse(input: ParseStream<'_>) -> Result<Self> { - let match_token = input.parse()?; - let expr = input.parse()?; - let semi_token = input.parse()?; - - Ok(Self { match_token, expr, semi_token }) - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct File { - lets: Vec<LetStatement>, - goal: Goal, -} - -impl File { - /// Returns function list and the goal. The function list consists of an - /// [`Ident`], the converted [`ast::Term`] and the number of arguments. - pub fn convert(self) -> (Vec<(Ident, ast::Term, usize)>, ast::Term) { - let mut context = Context::new(); - let map = self - .lets - .into_iter() - .map(|stmt| { - let count = stmt.args.as_ref().map(ArgList::len).unwrap_or_default(); - context.set_variables( - stmt.args - .into_iter() - .flat_map(|args| args.into_iter().map(|arg| arg.to_string())), - ); - (stmt.name, stmt.expr.convert(&mut context), count) - }) - .collect(); - let goal = self.goal.expr.convert(&mut context); - (map, goal) - } -} - -impl Parse for File { - fn parse(input: ParseStream<'_>) -> Result<Self> { - let mut lets = Vec::new(); - while input.peek(Token![let]) { - lets.push(input.parse()?); - } - - let goal = input.parse()?; - - Ok(Self { lets, goal }) - } -} - -#[cfg(test)] -mod tests { - use super::{Epsilon, Ident, Literal}; - use syn::parse_str; - - #[test] - fn parse_epsilon() { - assert!(parse_str::<Epsilon>("_").is_ok()); - } - - #[test] - fn parse_ident() { - assert_eq!(parse_str::<Ident>("x").unwrap().to_string(), "x"); - assert_eq!(parse_str::<Ident>("x_yz").unwrap().to_string(), "x_yz"); - assert_eq!(parse_str::<Ident>("a123").unwrap().to_string(), "a123"); - assert_eq!(parse_str::<Ident>("𝒢𝒢").unwrap().to_string(), "𝒢𝒢"); - assert!(parse_str::<Ident>("1").is_err()); - assert!(parse_str::<Ident>("_").is_err()); - } - - #[test] - fn parse_literal() { - assert_eq!(parse_str::<Literal>(r#""hello""#).unwrap().value(), "hello") - } -} +pub mod convert; +pub mod cst; +// impl File { +// /// Returns function list and the goal. The function list consists of an +// /// [`Ident`], the converted [`ast::Expression`] and the number of arguments. +// pub fn convert(self) -> (Vec<(Ident, ast::Expression, usize)>, ast::Expression) { +// let mut context = Context::new(); +// let map = self +// .lets +// .into_iter() +// .map(|stmt| { +// let count = stmt.args.as_ref().map(ArgList::len).unwrap_or_default(); +// context.set_variables( +// stmt.args +// .into_iter() +// .flat_map(|args| args.into_iter().map(|arg| arg.to_string())), +// ); +// (stmt.name, stmt.expr.convert(&mut context), count) +// }) +// .collect(); +// let goal = self.goal.expr.convert(&mut context); +// (map, goal) +// } +// } + +// #[cfg(test)] +// mod tests { +// use super::{Epsilon, Ident, Literal}; +// use syn::parse_str; + +// #[test] +// fn parse_epsilon() { +// assert!(parse_str::<Epsilon>("_").is_ok()); +// } + +// #[test] +// fn parse_ident() { +// assert_eq!(parse_str::<Ident>("x").unwrap().to_string(), "x"); +// assert_eq!(parse_str::<Ident>("x_yz").unwrap().to_string(), "x_yz"); +// assert_eq!(parse_str::<Ident>("a123").unwrap().to_string(), "a123"); +// assert_eq!(parse_str::<Ident>("𝒢𝒢").unwrap().to_string(), "𝒢𝒢"); +// assert!(parse_str::<Ident>("1").is_err()); +// assert!(parse_str::<Ident>("_").is_err()); +// } + +// #[test] +// fn parse_literal() { +// assert_eq!(parse_str::<Literal>(r#""hello""#).unwrap().value(), "hello") +// } +// } |