From 8939af712e6c3ae5d23019ec5dd941f1dae6c7fb Mon Sep 17 00:00:00 2001 From: Greg Brown Date: Thu, 19 Nov 2020 17:11:09 +0000 Subject: Switch typing to use traits --- src/ast/mod.rs | 892 +++++++++++++++++++++++++++++++++++------------------- src/ast/typed.rs | 132 ++++++++ src/nibble/mod.rs | 67 ++-- 3 files changed, 753 insertions(+), 338 deletions(-) create mode 100644 src/ast/typed.rs diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e8b4d57..0e3feea 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,400 +1,654 @@ -use std::collections::BTreeSet; +use std::convert::Infallible; +use std::fmt::Display; + +use proc_macro2::Span; +use syn::{Ident, LitStr, Token}; +use typed::FirstSetContext; +use typed::FlastSetContext; +use typed::NullContext; + +use self::typed::FirstSet; +use self::typed::FlastSet; +use self::typed::Type; pub mod convert; +pub mod typed; -const ITER_LIMIT: usize = 16; +fn fix R>(init: R, mut step: F) -> R { + let mut res = init; + let mut last = None; -type Ident = String; + while last.map(|v| v != res).unwrap_or(true) { + last = Some(res); + res = step(last.as_ref().unwrap()); + } -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Term { - Epsilon, - Bottom, - Literal(String), - Cat(Box, Box), - Alt(Box, Box), - Fix(Box), // Uses de Bruijn indices - Variable(usize), - Call(Ident, Vec), + res } -fn first_to_null(vars: &[(bool, BTreeSet)]) -> Vec { - vars.iter().map(|(null, _)| *null).collect::>() +#[derive(Copy, Clone, Debug)] +pub struct Epsilon { + span: Span, } -fn flast_to_null(vars: &[(bool, BTreeSet, BTreeSet)]) -> Vec { - vars.iter().map(|(null, _, _)| *null).collect::>() +impl Epsilon { + pub fn new(span: Span) -> Self { + Self { span } + } } -fn flast_to_first(vars: &[(bool, BTreeSet, BTreeSet)]) -> Vec<(bool, BTreeSet)> { - vars.iter() - .map(|(null, first, _)| (*null, first.clone())) - .collect::>() +impl Type for Epsilon { + type Err = Infallible; + + fn is_nullable(&self, _context: &mut C) -> Option { + Some(true) + } + + fn first_set(&self, _context: &mut C) -> Option { + Some(FirstSet::new()) + } + + fn flast_set(&self, _context: &mut C) -> Option { + Some(FlastSet::new()) + } + + fn well_typed(self, _context: &mut C) -> Result { + Ok(Typed::Epsilon) + } } -fn ctx_to_flast(ctx: &[Term]) -> Vec<(bool, BTreeSet, BTreeSet)> { - match ctx { - [] => Vec::new(), - [.., term] => { - let mut rest = ctx_to_flast(&ctx[..ctx.len() - 1]); - let null = term.null(&flast_to_null(&rest)); - let first = term.first(&flast_to_first(&rest)); - let flast = term.flast(&rest); - rest.push((null, first, flast)); - rest - } +pub type Literal = LitStr; + +impl Type for Literal { + type Err = Infallible; + + fn is_nullable(&self, _context: &mut C) -> Option { + Some(self.value().is_empty()) + } + + fn first_set(&self, _context: &mut C) -> Option { + Some(FirstSet::of_str(&self.value())) + } + + fn flast_set(&self, _context: &mut C) -> Option { + Some(FlastSet::new()) + } + + fn well_typed(self, _context: &mut C) -> Result { + Ok(Typed::Literal(self.value())) } } -fn ctx_to_first(ctx: &[Term]) -> Vec<(bool, BTreeSet)> { - match ctx { - [] => Vec::new(), - [.., term] => { - let mut rest = ctx_to_first(&ctx[..ctx.len() - 1]); - let null = term.null(&first_to_null(&rest)); - let first = term.first(&rest); - rest.push((null, first)); - rest +#[derive(Clone, Debug)] +pub struct Cat { + fst: Box, + punct: Token![.], + snd: Box, +} + +impl Cat { + pub fn new(fst: Term, punct: Token![.], snd: Term) -> Self { + Self { + fst: Box::new(fst), + punct, + snd: Box::new(snd), } } } -/// NOTE: This assumes the variables are well-formed. Need to fix in general -impl Term { - pub fn null(&self, vars: &[bool]) -> bool { - match self { - Self::Epsilon => true, - Self::Bottom => false, - Self::Literal(s) => s.is_empty(), - Self::Cat(fst, snd) => fst.null(vars) && snd.null(vars), - Self::Alt(fst, snd) => fst.null(vars) || snd.null(vars), - Self::Fix(inner) => { - let mut res = false; - let mut last = None; - let mut vars = vars.to_owned(); - let mut i = 0; - - while last.map(|s| s != res).unwrap_or(true) { - if i >= ITER_LIMIT { - panic!("Too many iterations") - } else { - i += 1 - } - - last = Some(res); - vars.push(res); - res = inner.null(&vars); - vars.pop(); - } - - res - } - Self::Variable(index) => vars[vars.len() - index - 1], - Self::Call(_ident, _args) => unimplemented!(), +impl Type for Cat { + type Err = CatError; + + fn is_nullable(&self, context: &mut C) -> Option { + Some(self.fst.is_nullable(context)? && self.snd.is_nullable(context)?) + } + + fn first_set(&self, context: &mut C) -> Option { + let set = self.fst.first_set(context)?; + + if self.fst.is_nullable(context)? { + Some(set.union(self.snd.first_set(context)?)) + } else { + Some(set) } } - pub fn first(&self, vars: &[(bool, BTreeSet)]) -> BTreeSet { - match self { - Self::Epsilon => BTreeSet::new(), - Self::Bottom => BTreeSet::new(), - Self::Literal(s) => { - let mut set = BTreeSet::new(); - if let Some(c) = s.chars().next() { - set.insert(c); - } - set - } - Self::Cat(fst, snd) => { - let mut set = fst.first(vars); - if fst.null(&first_to_null(vars)) { - set.append(&mut snd.first(vars)) - } - set - } - Self::Alt(fst, snd) => { - let mut set = fst.first(vars); - set.append(&mut snd.first(vars)); - set - } - Self::Fix(inner) => { - let mut res = BTreeSet::new(); - let mut last = None; - let null = self.null(&first_to_null(vars)); - let mut vars = vars.to_owned(); - let mut i = 0; - - while last.map(|s| s != res).unwrap_or(true) { - if i >= ITER_LIMIT { - panic!("Too many iterations") - } else { - i += 1 - } - - last = Some(res.clone()); - vars.push((null, res)); - res = inner.first(&vars); - vars.pop(); - } - - res - } - Self::Variable(index) => vars[vars.len() - index - 1].1.clone(), - Self::Call(_, _) => unimplemented!(), + fn flast_set(&self, context: &mut C) -> Option { + let set = self.snd.flast_set(context)?; + + if self.snd.is_nullable(context)? { + Some( + set.union_first(self.snd.first_set(context)?) + .union(self.fst.flast_set(context)?), + ) + } else { + Some(set) } } - pub fn flast(&self, vars: &[(bool, BTreeSet, BTreeSet)]) -> BTreeSet { - match self { - Self::Epsilon => BTreeSet::new(), - Self::Bottom => BTreeSet::new(), - Self::Literal(_) => BTreeSet::new(), - Self::Cat(fst, snd) => { - let mut set = snd.flast(vars); - if snd.null(&flast_to_null(vars)) { - set.append(&mut snd.first(&flast_to_first(vars))); - set.append(&mut fst.flast(vars)); - } - set - } - Self::Alt(fst, snd) => { - let mut set = fst.flast(vars); - set.append(&mut snd.flast(vars)); - set - } - Self::Fix(inner) => { - let mut res = BTreeSet::new(); - let mut last = None; - let null = self.null(&flast_to_null(vars)); - let first = self.first(&flast_to_first(vars)); - let mut vars = vars.to_owned(); - let mut i = 0; - - while last.map(|s| s != res).unwrap_or(true) { - if i >= ITER_LIMIT { - panic!("Too many iterations") - } else { - i += 1 - } - - last = Some(res.clone()); - vars.push((null, first.clone(), res)); - res = inner.flast(&vars); - vars.pop(); - } - - res - } - Self::Variable(index) => vars[vars.len() - index - 1].2.clone(), - Self::Call(_, _) => unimplemented!(), + fn well_typed(self, context: &mut C) -> Result { + let fst = self + .fst + .well_typed(context) + .map_err(|e| CatError::First(Box::new(e)))?; + let snd = self + .snd + .well_typed(context) + .map_err(|e| CatError::Second(Box::new(e)))?; + + if fst.is_nullable() { + Err(CatError::FirstNullable(fst)) + } else if !fst.flast_set().intersect_first(&snd.first_set()).is_empty() { + Err(CatError::FirstFlastOverlap(fst, snd)) + } else { + Ok(Typed::Cat(Box::new(fst), Box::new(snd))) } } +} - pub fn type_check(self, ctx: &[Self]) -> Result { - match self { - Self::Epsilon => Ok(Typed::Epsilon), - Self::Bottom => Ok(Typed::Bottom), - Self::Literal(s) => Ok(Typed::Literal(s)), - Self::Cat(fst, snd) => { - let vars = ctx_to_flast(ctx); - if fst.null(&flast_to_null(&vars)) { - Err(TypeError::CatFirstNull(*fst, *snd)) - } else if fst.flast(&vars).intersection(&snd.first(&flast_to_first(&vars))).next().is_some() { - Err(TypeError::CatFirstFlastOverlap(*fst, *snd)) - } else { - Ok(Typed::Cat(Box::new(fst.type_check(ctx)?), Box::new(snd.type_check(ctx)?))) - } - } - Self::Alt(fst, snd) => { - let vars = ctx_to_first(ctx); - let null = first_to_null(&vars); - if fst.null(&null) && snd.null(&null) { - Err(TypeError::AltBothNull(*fst, *snd)) - } else if fst.first(&vars).intersection(&snd.first(&vars)).next().is_some() { - Err(TypeError::AltFirstOverlap(*fst, *snd)) - } else { - Ok(Typed::Alt(Box::new(fst.type_check(ctx)?), Box::new(snd.type_check(ctx)?))) - } - } - Self::Fix(inner) => { - let mut ctx = ctx.to_owned(); - ctx.push(Self::Fix(inner.clone())); - Ok(Typed::Fix(Box::new(inner.type_check(&ctx)?))) - } - Self::Variable(index) => { - if index < ctx.len() { - Ok(Typed::Variable(index)) - } else { - Err(TypeError::FreeVariable(index)) - } - } - Self::Call(_, _) => unimplemented!("No functions yet") +#[derive(Debug)] +pub enum CatError { + First(Box), + Second(Box), + FirstNullable(Typed), + FirstFlastOverlap(Typed, Typed), +} + +impl Display for CatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +#[derive(Clone, Debug)] +pub struct Alt { + left: Box, + punct: Token![|], + right: Box, +} + +impl Alt { + pub fn new(left: Term, punct: Token![|], right: Term) -> Self { + Self { + left: Box::new(left), + punct, + right: Box::new(right), } } } -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum TypeError { - CatFirstNull(Term, Term), - CatFirstFlastOverlap(Term, Term), - AltBothNull(Term, Term), - AltFirstOverlap(Term, Term), - FreeVariable(usize) +impl Type for Alt { + type Err = AltError; + + fn is_nullable(&self, context: &mut C) -> Option { + Some(self.left.is_nullable(context)? || self.right.is_nullable(context)?) + } + + fn first_set(&self, context: &mut C) -> Option { + Some( + self.left + .first_set(context)? + .union(self.right.first_set(context)?), + ) + } + + fn flast_set(&self, context: &mut C) -> Option { + Some( + self.left + .flast_set(context)? + .union(self.right.flast_set(context)?), + ) + } + + fn well_typed(self, context: &mut C) -> Result { + let left = self + .left + .well_typed(context) + .map_err(|e| AltError::Left(Box::new(e)))?; + let right = self + .right + .well_typed(context) + .map_err(|e| AltError::Right(Box::new(e)))?; + + if left.is_nullable() && right.is_nullable() { + Err(AltError::BothNullable(left, right)) + } else if !left.first_set().intersect(&right.first_set()).is_empty() { + Err(AltError::FirstOverlap(left, right)) + } else { + Ok(Typed::Alt(Box::new(left), Box::new(right))) + } + } } -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Typed { - Epsilon, - Bottom, - Literal(String), - Cat(Box, Box), - Alt(Box, Box), - Fix(Box), - Variable(usize), - Call(Ident, Vec), +#[derive(Debug)] +pub enum AltError { + Left(Box), + Right(Box), + BothNullable(Typed, Typed), + FirstOverlap(Typed, Typed), } -#[cfg(test)] -mod tests { - use super::*; +impl Display for AltError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} - #[test] - fn null_epsilon() { - assert!(Term::Epsilon.null(&[])); +#[derive(Clone, Debug)] +pub struct Fix { + span: Span, + arg: Ident, + inner: Box, +} + +impl Fix { + pub fn new(arg: Ident, inner: Term, span: Span) -> Self { + Self { + arg, + inner: Box::new(inner), + span, + } } +} + +impl Type for Fix { + type Err = TermError; - #[test] - fn not_null_bottom() { - assert!(!Term::Bottom.null(&[])); + fn is_nullable(&self, context: &mut C) -> Option { + fix(Some(false), |last| { + last.as_ref() + .copied() + .and_then(|null| context.push_nullable(null, |ctx| self.inner.is_nullable(ctx))) + }) } - #[test] - fn not_null_normal_literal() { - assert!(!Term::Literal("x".to_owned()).null(&[])) + fn first_set(&self, context: &mut C) -> Option { + let nullable = self.is_nullable(context)?; + fix(Some(FirstSet::new()), |last| { + last.as_ref().cloned().and_then(|first_set| { + context.push_first_set(nullable, first_set, |ctx| self.inner.first_set(ctx)) + }) + }) } - #[test] - fn null_empty_literal() { - assert!(Term::Literal("".to_owned()).null(&[])) + fn flast_set(&self, context: &mut C) -> Option { + let nullable = self.is_nullable(context)?; + let first_set = self.first_set(context)?; + fix(Some(FlastSet::new()), |last| { + last.as_ref().cloned().and_then(|flast_set| { + context.push_flast_set(nullable, first_set.clone(), flast_set, |ctx| { + self.inner.flast_set(ctx) + }) + }) + }) } - #[test] - fn null_cat_both_null() { - assert!(Term::Cat(Box::new(Term::Epsilon), Box::new(Term::Epsilon)).null(&[])) + fn well_typed(self, context: &mut C) -> Result { + // FIXME: free variables cause panic + let nullable = self.is_nullable(context).unwrap(); + let first_set = self.first_set(context).unwrap(); + let flast_set = self.flast_set(context).unwrap(); + + context + .push_flast_set(nullable, first_set, flast_set, |ctx| { + self.inner.well_typed(ctx) + }) + .map(|inner| Typed::Fix(Box::new(inner))) } +} + +#[derive(Clone, Debug)] +pub struct Variable { + name: Ident, + index: usize, +} - #[test] - fn not_null_cat_first_not_null() { - assert!(!Term::Cat(Box::new(Term::Bottom), Box::new(Term::Epsilon)).null(&[])) +impl Variable { + pub fn new(name: Ident, index: usize) -> Self { + Self { name, index } } +} - #[test] - fn not_null_cat_second_not_null() { - assert!(!Term::Cat(Box::new(Term::Epsilon), Box::new(Term::Bottom)).null(&[])) +impl Type for Variable { + type Err = VariableError; + + fn is_nullable(&self, context: &mut C) -> Option { + context.get_nullable(self.index) + } + + fn first_set(&self, context: &mut C) -> Option { + context.get_first_set(self.index) } - #[test] - fn not_null_alt_both_not_null() { - assert!(!Term::Alt(Box::new(Term::Bottom), Box::new(Term::Bottom)).null(&[])) + fn flast_set(&self, context: &mut C) -> Option { + context.get_flast_set(self.index) } - #[test] - fn null_alt_first_null() { - assert!(Term::Alt(Box::new(Term::Epsilon), Box::new(Term::Bottom)).null(&[])) + fn well_typed(self, context: &mut C) -> Result { + match context.get_flast_set(self.index) { + Some(_) => Ok(Typed::Variable(self.index)), + None => Err(VariableError::FreeVariable(self)), + } } +} + +#[derive(Debug)] +pub enum VariableError { + FreeVariable(Variable), +} - #[test] - fn null_alt_second_null() { - assert!(Term::Alt(Box::new(Term::Bottom), Box::new(Term::Epsilon)).null(&[])) +impl Display for VariableError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() } +} - #[test] - fn not_null_fix_simple_inner() { - assert!(!Term::Fix(Box::new(Term::Bottom)).null(&[])) +#[derive(Clone, Debug)] +pub struct Call { + span: Span, + name: Ident, + args: Vec, +} + +impl Call { + pub fn new(name: Ident, args: Vec, span: Span) -> Self { + Self { name, args, span } } +} - #[test] - fn null_fix_simple_inner() { - assert!(Term::Fix(Box::new(Term::Epsilon)).null(&[])) +impl Type for Call { + type Err = Infallible; + + fn is_nullable(&self, _context: &mut C) -> Option { + todo!() } - #[test] - fn not_null_fix_var_inner() { - assert!(!Term::Fix(Box::new(Term::Variable(0))).null(&[])) + fn first_set(&self, _context: &mut C) -> Option { + todo!() } - #[test] - fn null_fix_var_inner() { - assert!(Term::Fix(Box::new(Term::Alt( - Box::new(Term::Epsilon), - Box::new(Term::Variable(0)) - ))) - .null(&[])) + fn flast_set(&self, _context: &mut C) -> Option { + todo!() } - #[test] - fn first_epsilon_empty() { - assert_eq!(Term::Epsilon.first(&[]), BTreeSet::new()) + fn well_typed(self, _context: &mut C) -> Result { + todo!() } +} + +#[derive(Clone, Debug)] +pub enum Term { + Epsilon(Epsilon), + Literal(Literal), + Cat(Cat), + Alt(Alt), + Fix(Fix), + Variable(Variable), + Call(Call), +} + +impl Type for Term { + type Err = TermError; - #[test] - fn first_bottom_empty() { - assert_eq!(Term::Bottom.first(&[]), BTreeSet::new()) + fn is_nullable(&self, context: &mut C) -> Option { + match self { + Self::Epsilon(e) => e.is_nullable(context), + Self::Literal(e) => e.is_nullable(context), + Self::Cat(e) => e.is_nullable(context), + Self::Alt(e) => e.is_nullable(context), + Self::Fix(e) => e.is_nullable(context), + Self::Variable(e) => e.is_nullable(context), + Self::Call(e) => e.is_nullable(context), + } } - #[test] - fn first_literal_first_char() { - let set = vec!['x'].into_iter().collect(); - assert_eq!(Term::Literal("x".to_owned()).first(&[]), set); - let set = vec!['h'].into_iter().collect(); - assert_eq!(Term::Literal("hello".to_owned()).first(&[]), set); - assert_eq!(Term::Literal("".to_owned()).first(&[]), BTreeSet::new()); + fn first_set(&self, context: &mut C) -> Option { + match self { + Self::Epsilon(e) => e.first_set(context), + Self::Literal(e) => e.first_set(context), + Self::Cat(e) => e.first_set(context), + Self::Alt(e) => e.first_set(context), + Self::Fix(e) => e.first_set(context), + Self::Variable(e) => e.first_set(context), + Self::Call(e) => e.first_set(context), + } } - #[test] - fn first_cat_first_not_null() { - let set = vec!['x'].into_iter().collect(); - assert_eq!( - Term::Cat( - Box::new(Term::Literal("x".to_owned())), - Box::new(Term::Literal("y".to_owned())) - ) - .first(&[]), - set - ); - } - - #[test] - fn first_cat_first_null() { - let set = vec!['x', 'y'].into_iter().collect(); - assert_eq!( - Term::Cat( - Box::new(Term::Alt( - Box::new(Term::Epsilon), - Box::new(Term::Literal("x".to_owned())) - )), - Box::new(Term::Literal("y".to_owned())) - ) - .first(&[]), - set - ); + fn flast_set(&self, context: &mut C) -> Option { + match self { + Self::Epsilon(e) => e.flast_set(context), + Self::Literal(e) => e.flast_set(context), + Self::Cat(e) => e.flast_set(context), + Self::Alt(e) => e.flast_set(context), + Self::Fix(e) => e.flast_set(context), + Self::Variable(e) => e.flast_set(context), + Self::Call(e) => e.flast_set(context), + } } - // TODO: test flast + fn well_typed(self, context: &mut C) -> Result { + match self { + Self::Epsilon(e) => e.well_typed(context).map_err(TermError::from), + Self::Literal(e) => e.well_typed(context).map_err(TermError::from), + Self::Cat(e) => e.well_typed(context).map_err(TermError::from), + Self::Alt(e) => e.well_typed(context).map_err(TermError::from), + Self::Fix(e) => e.well_typed(context).map_err(TermError::from), + Self::Variable(e) => e.well_typed(context).map_err(TermError::from), + Self::Call(e) => e.well_typed(context).map_err(TermError::from), + } + } +} + +#[derive(Debug)] +pub enum TermError { + Cat(CatError), + Alt(AltError), + Variable(VariableError), +} + +impl From for TermError { + fn from(other: Infallible) -> Self { + match other {} + } +} + +impl From for TermError { + fn from(other: CatError) -> Self{ + Self::Cat(other) + } +} + +impl From for TermError { + fn from(other: AltError) -> Self{ + Self::Alt(other) + } +} + +impl From for TermError { + fn from(other: VariableError) -> Self{ + Self::Variable(other) + } +} - #[test] - fn types_epsilon() { - assert_eq!(Term::Epsilon.type_check(&[]), Ok(Typed::Epsilon)) +impl Display for TermError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() } +} - #[test] - fn types_bottom() { - assert_eq!(Term::Bottom.type_check(&[]), Ok(Typed::Bottom)) +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Typed { + Epsilon, + Bottom, + Literal(String), + Cat(Box, Box), + Alt(Box, Box), + Fix(Box), + Variable(usize), + Call(Ident, Vec), +} + +impl Typed { + pub fn is_nullable(&self) -> bool { + todo!() + } + + pub fn first_set(&self) -> FirstSet { + todo!() } - #[test] - fn types_literal() { - assert_eq!(Term::Literal("x".to_owned()).type_check(&[]), Ok(Typed::Literal("x".to_owned()))); - assert_eq!(Term::Literal("".to_owned()).type_check(&[]), Ok(Typed::Literal("".to_owned()))) + pub fn flast_set(&self) -> FlastSet { + todo!() } } + +// #[cfg(test)] +// mod tests { +// use super::*; + +// #[test] +// fn null_epsilon() { +// assert!(Term::Epsilon.null(&[])); +// } + +// #[test] +// fn not_null_bottom() { +// assert!(!Term::Bottom.null(&[])); +// } + +// #[test] +// fn not_null_normal_literal() { +// assert!(!Term::Literal("x".to_owned()).null(&[])) +// } + +// #[test] +// fn null_empty_literal() { +// assert!(Term::Literal("".to_owned()).null(&[])) +// } + +// #[test] +// fn null_cat_both_null() { +// assert!(Term::Cat(Box::new(Term::Epsilon), Box::new(Term::Epsilon)).null(&[])) +// } + +// #[test] +// fn not_null_cat_first_not_null() { +// assert!(!Term::Cat(Box::new(Term::Bottom), Box::new(Term::Epsilon)).null(&[])) +// } + +// #[test] +// fn not_null_cat_second_not_null() { +// assert!(!Term::Cat(Box::new(Term::Epsilon), Box::new(Term::Bottom)).null(&[])) +// } + +// #[test] +// fn not_null_alt_both_not_null() { +// assert!(!Term::Alt(Box::new(Term::Bottom), Box::new(Term::Bottom)).null(&[])) +// } + +// #[test] +// fn null_alt_first_null() { +// assert!(Term::Alt(Box::new(Term::Epsilon), Box::new(Term::Bottom)).null(&[])) +// } + +// #[test] +// fn null_alt_second_null() { +// assert!(Term::Alt(Box::new(Term::Bottom), Box::new(Term::Epsilon)).null(&[])) +// } + +// #[test] +// fn not_null_fix_simple_inner() { +// assert!(!Term::Fix(Box::new(Term::Bottom)).null(&[])) +// } + +// #[test] +// fn null_fix_simple_inner() { +// assert!(Term::Fix(Box::new(Term::Epsilon)).null(&[])) +// } + +// #[test] +// fn not_null_fix_var_inner() { +// assert!(!Term::Fix(Box::new(Term::Variable(0))).null(&[])) +// } + +// #[test] +// fn null_fix_var_inner() { +// assert!(Term::Fix(Box::new(Term::Alt( +// Box::new(Term::Epsilon), +// Box::new(Term::Variable(0)) +// ))) +// .null(&[])) +// } + +// #[test] +// fn first_epsilon_empty() { +// assert_eq!(Term::Epsilon.first(&[]), BTreeSet::new()) +// } + +// #[test] +// fn first_bottom_empty() { +// assert_eq!(Term::Bottom.first(&[]), BTreeSet::new()) +// } + +// #[test] +// fn first_literal_first_char() { +// let set = vec!['x'].into_iter().collect(); +// assert_eq!(Term::Literal("x".to_owned()).first(&[]), set); +// let set = vec!['h'].into_iter().collect(); +// assert_eq!(Term::Literal("hello".to_owned()).first(&[]), set); +// assert_eq!(Term::Literal("".to_owned()).first(&[]), BTreeSet::new()); +// } + +// #[test] +// fn first_cat_first_not_null() { +// let set = vec!['x'].into_iter().collect(); +// assert_eq!( +// Term::Cat( +// Box::new(Term::Literal("x".to_owned())), +// Box::new(Term::Literal("y".to_owned())) +// ) +// .first(&[]), +// set +// ); +// } + +// #[test] +// fn first_cat_first_null() { +// let set = vec!['x', 'y'].into_iter().collect(); +// assert_eq!( +// Term::Cat( +// Box::new(Term::Alt( +// Box::new(Term::Epsilon), +// Box::new(Term::Literal("x".to_owned())) +// )), +// Box::new(Term::Literal("y".to_owned())) +// ) +// .first(&[]), +// set +// ); +// } + +// // TODO: test flast + +// #[test] +// fn types_epsilon() { +// assert_eq!(Term::Epsilon.type_check(&[]), Ok(Typed::Epsilon)) +// } + +// #[test] +// fn types_bottom() { +// assert_eq!(Term::Bottom.type_check(&[]), Ok(Typed::Bottom)) +// } + +// #[test] +// fn types_literal() { +// assert_eq!( +// Term::Literal("x".to_owned()).type_check(&[]), +// Ok(Typed::Literal("x".to_owned())) +// ); +// assert_eq!( +// Term::Literal("".to_owned()).type_check(&[]), +// Ok(Typed::Literal("".to_owned())) +// ) +// } +// } diff --git a/src/ast/typed.rs b/src/ast/typed.rs new file mode 100644 index 0000000..48ca740 --- /dev/null +++ b/src/ast/typed.rs @@ -0,0 +1,132 @@ +use super::Typed; +use std::collections::BTreeSet; +use std::fmt::Display; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct FirstSet { + inner: BTreeSet, +} + +impl FirstSet { + pub fn new() -> Self { + Self { + inner: BTreeSet::new(), + } + } + + pub fn of_str(s: &str) -> Self { + let mut inner = BTreeSet::new(); + s.chars().next().map(|c| inner.insert(c)); + + Self { inner } + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn union(mut self, mut other: Self) -> Self { + self.inner.append(&mut other.inner); + self + } + + pub fn intersect(&self, other: &Self) -> Self { + Self { + inner: self.inner.intersection(&other.inner).copied().collect(), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct FlastSet { + inner: BTreeSet, +} + +impl FlastSet { + pub fn new() -> Self { + Self { + inner: BTreeSet::new(), + } + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn union_first(mut self, mut other: FirstSet) -> Self { + self.inner.append(&mut other.inner); + self + } + + pub fn union(mut self, mut other: Self) -> Self { + self.inner.append(&mut other.inner); + self + } + + pub fn intersect_first(&self, other: &FirstSet) -> Self { + Self { + inner: self.inner.intersection(&other.inner).copied().collect(), + } + } + + pub fn intersect(&self, other: &Self) -> Self { + Self { + inner: self.inner.intersection(&other.inner).copied().collect(), + } + } +} + +pub trait NullContext { + type PushNull: NullContext; + + fn get_nullable(&self, index: usize) -> Option; + + fn push_nullable R, R>(&mut self, nullable: bool, f: F) -> R; +} + +pub trait FirstSetContext: NullContext { + type PushFirstSet: FirstSetContext; + + fn get_first_set(&self, index: usize) -> Option; + + fn push_first_set R, R>( + &mut self, + nullable: bool, + first_set: FirstSet, + f: F, + ) -> R; +} + +pub trait FlastSetContext: FirstSetContext { + type PushFlastSet: FlastSetContext; + + fn get_flast_set(&self, index: usize) -> Option; + + fn push_flast_set R, R>( + &mut self, + nullable: bool, + first_set: FirstSet, + flast_set: FlastSet, + f: F, + ) -> R; +} + +pub trait Type { + type Err: Display; + + /// # Errors + /// Returns [`None`] if the nullity cannot be determined. + fn is_nullable(&self, context: &mut C) -> Option; + + /// # Errors + /// Returns [`None`] if the first set cannot be determined. + fn first_set(&self, context: &mut C) -> Option; + + /// # Errors + /// Returns [`None`] if the flast set cannot be determined. + fn flast_set(&self, context: &mut C) -> Option; + + /// # Errors + /// Returns an [`Err`] if this term is not well typed. + fn well_typed(self, context: &mut C) -> Result; +} diff --git a/src/nibble/mod.rs b/src/nibble/mod.rs index 478e0cf..b93f97c 100644 --- a/src/nibble/mod.rs +++ b/src/nibble/mod.rs @@ -1,10 +1,10 @@ -pub mod parse; - use crate::ast::{ self, convert::{Context, Convert}, }; +use proc_macro2::Span; use syn::ext::IdentExt; +use syn::punctuated::Pair; use syn::punctuated::Punctuated; use syn::{ parse::{Parse, ParseStream}, @@ -30,7 +30,7 @@ impl Parse for Epsilon { impl Convert for Epsilon { fn convert(self, _: &Context) -> ast::Term { - ast::Term::Epsilon + ast::Term::Epsilon(ast::Epsilon::new(self.paren_tok.span)) } } @@ -42,9 +42,10 @@ impl Convert for Ident { let name = self.to_string(); if let Some(variable) = context.get(&name) { - Term::Variable(variable) + Term::Variable(ast::Variable::new(self, variable)) } else { - Term::Call(name, vec![]) + let span = self.span(); + Term::Call(ast::Call::new(self, Vec::new(), span)) } } } @@ -53,7 +54,7 @@ type Literal = syn::LitStr; impl Convert for Literal { fn convert(self, _context: &Context) -> ast::Term { - ast::Term::Literal(self.value()) + ast::Term::Literal(self) } } @@ -81,10 +82,21 @@ impl Parse for Call { impl Convert for Call { fn convert(self, context: &Context) -> ast::Term { use ast::Term; - Term::Call( - self.name.to_string(), - self.args.into_iter().map(|e| e.convert(context)).collect(), - ) + let span = self.name + .span() + .join(self.paren_tok.span) + .unwrap_or_else(Span::call_site); + Term::Call(ast::Call::new( + self.name, + self.args + .into_pairs() + .map(|pair| match pair { + Pair::Punctuated(t, _) => t.convert(context), + Pair::End(t) => t.convert(context), + }) + .collect(), + span, + )) } } @@ -118,8 +130,14 @@ impl Convert for Fix { fn convert(self, context: &Context) -> ast::Term { use ast::Term; let expr = self.expr; - Term::Fix(Box::new( - context.push(self.arg.to_string(), |c| expr.convert(c)), + let arg_name = self.arg.to_string(); + Term::Fix(ast::Fix::new( + self.arg, + context.push(arg_name, |c| expr.convert(c)), + self.bracket_token + .span + .join(self.paren_token.span) + .unwrap_or_else(Span::call_site), )) } } @@ -195,9 +213,8 @@ impl Parse for Term { impl Convert for Term { fn convert(self, context: &Context) -> ast::Term { - use ast::Term; match self { - Self::Epsilon(_) => Term::Epsilon, + Self::Epsilon(e) => e.convert(context), Self::Ident(i) => i.convert(context), Self::Literal(l) => l.convert(context), Self::Call(c) => c.convert(context), @@ -223,8 +240,14 @@ impl Parse for Cat { impl Convert for Cat { fn convert(self, context: &Context) -> ast::Term { use ast::Term; - self.terms.into_iter().rfold(Term::Epsilon, |exp, term| { - Term::Cat(Box::new(term.convert(context)), Box::new(exp)) + let mut iter = self.terms.into_pairs(); + let init = match iter.next_back().unwrap() { + Pair::Punctuated(_, _) => unreachable!(), + Pair::End(t) => t.convert(context), + }; + iter.rfold(init, |term, pair| match pair { + Pair::Punctuated(t, p) => Term::Cat(ast::Cat::new(t.convert(context), p, term)), + Pair::End(_) => unreachable!(), }) } } @@ -245,8 +268,14 @@ impl Parse for Alt { impl Convert for Alt { fn convert(self, context: &Context) -> ast::Term { use ast::Term; - self.cats.into_iter().rfold(Term::Bottom, |exp, cat| { - Term::Alt(Box::new(cat.convert(context)), Box::new(exp)) + let mut iter = self.cats.into_pairs(); + let init = match iter.next_back().unwrap() { + Pair::Punctuated(_, _) => unreachable!(), + Pair::End(t) => t.convert(context), + }; + iter.rfold(init, |term, pair| match pair { + Pair::Punctuated(t, p) => Term::Alt(ast::Alt::new(t.convert(context), p, term)), + Pair::End(_) => unreachable!(), }) } } @@ -255,8 +284,8 @@ type Expression = Alt; #[cfg(test)] mod tests { - use syn::parse_str; use super::{Epsilon, Ident, Literal}; + use syn::parse_str; #[test] fn parse_epsilon() { -- cgit v1.2.3