diff options
author | Greg Brown <gmb60@cam.ac.uk> | 2021-01-14 11:42:55 +0000 |
---|---|---|
committer | Greg Brown <gmb60@cam.ac.uk> | 2021-01-14 11:42:55 +0000 |
commit | aac3549a72663c523a456b2f5d7c3b77f509cdd6 (patch) | |
tree | 562824f3cfa5feca791715c733f7749197bb7e7a /src/chomp/typed | |
parent | 0d01692c97ea8ca6fc4b229e5b9678cb252bceda (diff) |
Add labelled expressions.
Restructure project (again).
Convert `Cat` and `Alt` from binary to n+2-ary.
Diffstat (limited to 'src/chomp/typed')
-rw-r--r-- | src/chomp/typed/context.rs | 57 | ||||
-rw-r--r-- | src/chomp/typed/error.rs | 150 | ||||
-rw-r--r-- | src/chomp/typed/infer.rs | 145 | ||||
-rw-r--r-- | src/chomp/typed/lower.rs | 41 | ||||
-rw-r--r-- | src/chomp/typed/mod.rs | 343 |
5 files changed, 736 insertions, 0 deletions
diff --git a/src/chomp/typed/context.rs b/src/chomp/typed/context.rs new file mode 100644 index 0000000..5c8d398 --- /dev/null +++ b/src/chomp/typed/context.rs @@ -0,0 +1,57 @@ +use crate::chomp::ast::Variable; + +use super::Type; + +#[derive(Debug, Default)] +pub struct Context { + vars: Vec<Type>, + unguard_points: Vec<usize>, +} + +impl Context { + pub fn new() -> Self { + Self::default() + } + + pub fn depth(&self) -> usize { + self.vars.len() + } + + pub fn is_unguarded(&self, var: &Variable) -> Option<bool> { + if self.vars.len() <= var.index { + None + } else if self.unguard_points.is_empty() { + Some(false) + } else { + Some(self.unguard_points[self.unguard_points.len() - 1] + var.index >= self.vars.len()) + } + } + + pub fn with_unguard<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R { + self.unguard_points.push(self.vars.len()); + let res = f(self); + self.unguard_points.pop(); + res + } + + pub fn get_variable_type(&self, var: &Variable) -> Result<&Type, GetVariableError> { + match self.is_unguarded(var) { + None => Err(GetVariableError::FreeVariable), + Some(false) => Err(GetVariableError::GuardedVariable), + Some(true) => Ok(&self.vars[self.vars.len() - var.index - 1]), + } + } + + pub fn with_variable_type<F: FnOnce(&mut Self) -> R, R>(&mut self, ty: Type, f: F) -> R { + self.vars.push(ty); + let res = f(self); + self.vars.pop(); + res + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum GetVariableError { + FreeVariable, + GuardedVariable, +} diff --git a/src/chomp/typed/error.rs b/src/chomp/typed/error.rs new file mode 100644 index 0000000..bb807fc --- /dev/null +++ b/src/chomp/typed/error.rs @@ -0,0 +1,150 @@ +use std::{ + error::Error, + fmt::{self, Display}, +}; + +use proc_macro2::Span; + +use crate::chomp::{ast::Variable, Name}; + +use super::{context::GetVariableError, TypedExpression}; + +/// A type error when using a fix point variable. +#[derive(Debug)] +pub struct VariableError { + pub inner: GetVariableError, + pub var: Variable, + pub span: Option<Span>, + pub name: Option<Name>, +} + +impl PartialEq for VariableError { + fn eq(&self, other: &Self) -> bool { + todo!() + } +} + +impl Eq for VariableError {} + +impl From<VariableError> for syn::Error { + fn from(other: VariableError) -> Self { + todo!() + } +} + +impl Display for VariableError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + todo!() + } +} + +impl Error for VariableError {} + +/// A type error when concatenating two terms. +#[derive(Debug)] +pub enum CatError { + /// The first term was unexpectedly nullable. + FirstNullable(TypedExpression, Option<Span>), + /// The flast set of the first term intersects the first set of the second. + FirstFlastOverlap(Vec<TypedExpression>, Option<Span>, TypedExpression), +} + +impl PartialEq for CatError { + fn eq(&self, other: &Self) -> bool { + todo!() + } +} + +impl Eq for CatError {} + +impl From<CatError> for syn::Error { + fn from(other: CatError) -> Self { + todo!() + } +} + +impl Display for CatError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + todo!() + } +} + +impl Error for CatError {} + +/// A type error when alternating two terms. +#[derive(Debug)] +pub enum AltError { + /// Both terms are nullable. + BothNullable(Vec<TypedExpression>, Option<Span>, TypedExpression), + /// The first sets of the two terms intersect. + FirstOverlap(Vec<TypedExpression>, Option<Span>, TypedExpression), +} + +impl PartialEq for AltError { + fn eq(&self, other: &Self) -> bool { + todo!() + } +} + +impl Eq for AltError {} + +impl From<AltError> for syn::Error { + fn from(other: AltError) -> Self { + todo!() + } +} + +impl Display for AltError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + todo!() + } +} + +impl Error for AltError {} + +#[derive(Debug, Eq, PartialEq)] +pub enum TypeError { + Cat(CatError), + Alt(AltError), + Variable(VariableError), +} + +impl From<CatError> for TypeError { + fn from(other: CatError) -> Self { + Self::Cat(other) + } +} + +impl From<AltError> for TypeError { + fn from(other: AltError) -> Self { + Self::Alt(other) + } +} + +impl From<VariableError> for TypeError { + fn from(other: VariableError) -> Self { + Self::Variable(other) + } +} + +impl From<TypeError> for syn::Error { + fn from(other: TypeError) -> Self { + match other { + TypeError::Cat(e) => e.into(), + TypeError::Alt(e) => e.into(), + TypeError::Variable(e) => e.into(), + } + } +} + +impl Display for TypeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Cat(e) => e.fmt(f), + Self::Alt(e) => e.fmt(f), + Self::Variable(e) => e.fmt(f), + } + } +} + +impl Error for TypeError {} diff --git a/src/chomp/typed/infer.rs b/src/chomp/typed/infer.rs new file mode 100644 index 0000000..0161471 --- /dev/null +++ b/src/chomp/typed/infer.rs @@ -0,0 +1,145 @@ +use proc_macro2::Span; + +use crate::chomp::{ + ast::{Alt, Call, Cat, Epsilon, Fix, Literal, Parameter, Variable}, + visit::{Folder, Visitable}, + Name, +}; + +use super::{ + context::Context, + error::{TypeError, VariableError}, + Type, Typed, TypedExpression, +}; + +#[derive(Debug)] +pub struct TypeInfer<'a> { + pub context: &'a mut Context, +} + +impl Folder for TypeInfer<'_> { + type Out = Result<TypedExpression, TypeError>; + + fn fold_epsilon(&mut self, name: Option<Name>, span: Option<Span>, eps: Epsilon) -> Self::Out { + Ok(TypedExpression { + inner: super::Epsilon::from(eps).into(), + name, + span, + }) + } + + fn fold_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Out { + Ok(TypedExpression { + inner: super::Literal::from(lit).into(), + name, + span, + }) + } + + fn fold_cat(&mut self, name: Option<Name>, span: Option<Span>, cat: Cat) -> Self::Out { + let first = cat.first.fold(self)?; + let second = cat.second; + let rest = cat.rest; + let punct = cat.punct; + self.context + .with_unguard(|context| -> Result<TypedExpression, TypeError> { + let mut infer = TypeInfer { context }; + let second = second.fold(&mut infer)?; + let rest = rest + .into_iter() + .map(|(punct, term)| -> Result<_, TypeError> { + Ok((punct.map(|p| p.span), term.fold(&mut infer)?)) + }) + .collect::<Result<Vec<_>, _>>()?; + Ok(TypedExpression { + inner: super::Cat::new(first, punct.map(|p| p.span), second, rest)?.into(), + name, + span, + }) + }) + } + + fn fold_alt(&mut self, name: Option<Name>, span: Option<Span>, alt: Alt) -> Self::Out { + let first = alt.first.fold(self)?; + let second = alt.second; + let rest = alt.rest; + let punct = alt.punct; + self.context + .with_unguard(|context| -> Result<TypedExpression, TypeError> { + let mut infer = TypeInfer { context }; + let second = second.fold(&mut infer)?; + let rest = rest + .into_iter() + .map(|(punct, term)| -> Result<_, TypeError> { + Ok((punct.map(|p| p.span), term.fold(&mut infer)?)) + }) + .collect::<Result<Vec<_>, _>>()?; + Ok(TypedExpression { + inner: super::Alt::new(first, punct.map(|p| p.span), second, rest)?.into(), + name, + span, + }) + }) + } + + fn fold_fix(&mut self, name: Option<Name>, span: Option<Span>, fix: Fix) -> Self::Out { + let mut ty = Type::default(); + + loop { + let last = ty; + let res = self.context.with_variable_type(last.clone(), |context| { + fix.inner.clone().fold(&mut TypeInfer { context }) + })?; + ty = res.get_type().clone(); + + if last == ty { + return Ok(TypedExpression { + inner: super::Fix { + inner: Box::new(res), + } + .into(), + name, + span, + }); + } + } + } + + fn fold_variable( + &mut self, + name: Option<Name>, + span: Option<Span>, + var: Variable, + ) -> Self::Out { + let ty = match self.context.get_variable_type(&var) { + Ok(ty) => ty.clone(), + Err(inner) => { + return Err(VariableError { + inner, + var, + span, + name, + } + .into()) + } + }; + Ok(TypedExpression { + inner: super::Variable { inner: var, ty }.into(), + name, + span, + }) + } + + fn fold_parameter( + &mut self, + _name: Option<Name>, + _span: Option<Span>, + _param: Parameter, + ) -> Self::Out { + todo!() + } + + fn fold_call(&mut self, _name: Option<Name>,_span: Option<Span>, _call: Call) -> Self::Out { + todo!() + } +} diff --git a/src/chomp/typed/lower.rs b/src/chomp/typed/lower.rs new file mode 100644 index 0000000..37589a1 --- /dev/null +++ b/src/chomp/typed/lower.rs @@ -0,0 +1,41 @@ +use proc_macro2::Span; + +use crate::chomp::Name; + +use super::{Alt, Cat, Epsilon, Fix, Literal, RawTypedExpression, TypedExpression, Variable}; + +pub trait Backend: Default { + type Id; + type Code; + + fn gen_epsilon(&mut self, name: Option<Name>, span: Option<Span>, eps: Epsilon) -> Self::Id; + + fn gen_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Id; + + fn gen_cat(&mut self, name: Option<Name>, span: Option<Span>, cat: Cat) -> Self::Id; + + fn gen_alt(&mut self, name: Option<Name>, span: Option<Span>, alt: Alt) -> Self::Id; + + fn gen_fix(&mut self, name: Option<Name>, span: Option<Span>, fix: Fix) -> Self::Id; + + fn gen_variable(&mut self, name: Option<Name>, span: Option<Span>, var: Variable) -> Self::Id; + + fn emit_code(self, name: Option<Name>, span: Option<Span>, id: Self::Id) -> Self::Code; +} + +pub trait GenerateCode { + fn gen<B: Backend>(self, backend: &mut B) -> B::Id; +} + +impl GenerateCode for TypedExpression { + fn gen<B: Backend>(self, backend: &mut B) -> B::Id { + match self.inner { + RawTypedExpression::Epsilon(e) => backend.gen_epsilon(self.name, self.span, e), + RawTypedExpression::Literal(l) => backend.gen_literal(self.name, self.span, l), + RawTypedExpression::Cat(c) => backend.gen_cat(self.name, self.span, c), + RawTypedExpression::Alt(a) => backend.gen_alt(self.name, self.span, a), + RawTypedExpression::Fix(f) => backend.gen_fix(self.name, self.span, f), + RawTypedExpression::Variable(v) => backend.gen_variable(self.name, self.span, v), + } + } +} diff --git a/src/chomp/typed/mod.rs b/src/chomp/typed/mod.rs new file mode 100644 index 0000000..e9aed79 --- /dev/null +++ b/src/chomp/typed/mod.rs @@ -0,0 +1,343 @@ +use std::{hash, iter}; + +use proc_macro2::Span; + +use super::{ + ast, + set::{FirstSet, FlastSet}, + Name, +}; + +pub mod context; +pub mod error; +pub mod lower; + +mod infer; + +pub use self::infer::TypeInfer; +use self::error::{AltError, CatError}; + +#[derive(Debug, Default, Clone, Eq, Hash, PartialEq)] +pub struct Type { + nullable: bool, + first_set: FirstSet, + flast_set: FlastSet, +} + +impl Type { + pub const fn new(nullable: bool, first_set: FirstSet, flast_set: FlastSet) -> Self { + Self { + nullable, + first_set, + flast_set, + } + } + + pub fn of_str(s: &str) -> Self { + Self { + nullable: s.is_empty(), + first_set: FirstSet::of_str(s), + flast_set: FlastSet::default(), + } + } + + pub fn nullable(&self) -> bool { + self.nullable + } + + pub fn first_set(&self) -> &FirstSet { + &self.first_set + } + + pub fn flast_set(&self) -> &FlastSet { + &self.flast_set + } + + pub fn cat(self, other: Self) -> Self { + Self { + nullable: self.nullable && other.nullable, + first_set: self.first_set.union(if self.nullable { + other.first_set.clone() + } else { + FirstSet::default() + }), + flast_set: other.flast_set.union(if other.nullable { + self.flast_set.union_first(other.first_set) + } else { + FlastSet::default() + }), + } + } + + pub fn alt(self, other: Self) -> Self { + Self { + nullable: self.nullable || other.nullable, + first_set: self.first_set.union(other.first_set), + flast_set: self.flast_set.union(other.flast_set), + } + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Epsilon { + ty: Type, +} + +impl From<ast::Epsilon> for Epsilon { + fn from(_: ast::Epsilon) -> Self { + Self { + ty: Type::new(true, FirstSet::default(), FlastSet::default()), + } + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Literal { + inner: ast::Literal, + ty: Type, +} + +impl Literal { + pub fn inner(&self) -> &ast::Literal { + &self.inner + } +} + +impl From<ast::Literal> for Literal { + fn from(inner: ast::Literal) -> Self { + let ty = Type::of_str(&inner); + Self { inner, ty } + } +} + +impl From<Literal> for ast::Literal { + fn from(lit: Literal) -> Self { + lit.inner + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Cat { + terms: Vec<TypedExpression>, + ty: Type, +} + +impl Cat { + fn new<I: IntoIterator<Item = (Option<Span>, TypedExpression)>>( + first: TypedExpression, + punct: Option<Span>, + second: TypedExpression, + rest: I, + ) -> Result<Self, CatError> { + if first.get_type().nullable() { + return Err(CatError::FirstNullable(first, punct)); + } + + iter::once((punct, second)) + .chain(rest) + .try_fold( + (first.get_type().clone(), vec![first]), + |(ty, mut terms), (punct, right)| { + if !ty + .flast_set() + .intersect_first(right.get_type().first_set()) + .is_empty() + { + Err(CatError::FirstFlastOverlap(terms, punct, right)) + } else { + let ty = ty.cat(right.get_type().clone()); + terms.push(right); + Ok((ty, terms)) + } + }, + ) + .map(|(ty, terms)| Self { ty, terms }) + } +} + +impl IntoIterator for Cat { + type Item = TypedExpression; + + type IntoIter = <Vec<TypedExpression> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.terms.into_iter() + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Alt { + terms: Vec<TypedExpression>, + ty: Type, +} + +impl Alt { + pub fn new<I: IntoIterator<Item = (Option<Span>, TypedExpression)>>( + first: TypedExpression, + punct: Option<Span>, + second: TypedExpression, + rest: I, + ) -> Result<Self, AltError> { + iter::once((punct, second)) + .chain(rest) + .try_fold( + (first.get_type().clone(), vec![first]), + |(ty, mut terms), (punct, right)| { + if ty.nullable() && right.get_type().nullable() { + Err(AltError::BothNullable(terms, punct, right)) + } else if !ty + .first_set() + .intersect(right.get_type().first_set()) + .is_empty() + { + Err(AltError::FirstOverlap(terms, punct, right)) + } else { + let ty = ty.alt(right.get_type().clone()); + terms.push(right); + Ok((ty, terms)) + } + }, + ) + .map(|(ty, terms)| Self { ty, terms }) + } +} + +impl IntoIterator for Alt { + type Item = TypedExpression; + + type IntoIter = <Vec<TypedExpression> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.terms.into_iter() + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Fix { + inner: Box<TypedExpression>, +} + +impl Fix { + pub fn unwrap(self) -> TypedExpression { + *self.inner + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Variable { + inner: ast::Variable, + ty: Type, +} + +impl Variable { + pub fn index(&self) -> usize { + self.inner.index + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +enum RawTypedExpression { + Epsilon(Epsilon), + Literal(Literal), + Cat(Cat), + Alt(Alt), + Fix(Fix), + Variable(Variable), +} + +impl From<Epsilon> for RawTypedExpression { + fn from(eps: Epsilon) -> Self { + Self::Epsilon(eps) + } +} + +impl From<Literal> for RawTypedExpression { + fn from(lit: Literal) -> Self { + Self::Literal(lit) + } +} + +impl From<Cat> for RawTypedExpression { + fn from(cat: Cat) -> Self { + Self::Cat(cat) + } +} + +impl From<Alt> for RawTypedExpression { + fn from(alt: Alt) -> Self { + Self::Alt(alt) + } +} + +impl From<Fix> for RawTypedExpression { + fn from(fix: Fix) -> Self { + Self::Fix(fix) + } +} + +impl From<Variable> for RawTypedExpression { + fn from(var: Variable) -> Self { + Self::Variable(var) + } +} + +#[derive(Clone, Debug)] +pub struct TypedExpression { + inner: RawTypedExpression, + pub name: Option<Name>, + pub span: Option<Span>, +} + +impl PartialEq for TypedExpression { + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner && self.name == other.name + } +} + +impl Eq for TypedExpression {} + +impl hash::Hash for TypedExpression { + fn hash<H: hash::Hasher>(&self, state: &mut H) { + self.inner.hash(state); + self.name.hash(state); + } +} + +pub trait Typed { + fn get_type(&self) -> &Type; +} + +macro_rules! leaf_typed { + ($ty:ty, $field:ident) => { + impl Typed for $ty { + fn get_type(&self) -> &Type { + &self.$field + } + } + }; +} + +leaf_typed!(Epsilon, ty); +leaf_typed!(Literal, ty); +leaf_typed!(Cat, ty); +leaf_typed!(Alt, ty); +leaf_typed!(Variable, ty); + +impl Typed for Fix { + fn get_type(&self) -> &Type { + self.inner.get_type() + } +} + +impl Typed for TypedExpression { + fn get_type(&self) -> &Type { + match &self.inner { + RawTypedExpression::Epsilon(e) => e.get_type(), + RawTypedExpression::Literal(l) => l.get_type(), + RawTypedExpression::Cat(c) => c.get_type(), + RawTypedExpression::Alt(a) => a.get_type(), + RawTypedExpression::Fix(f) => f.get_type(), + RawTypedExpression::Variable(v) => v.get_type(), + } + } +} |