diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ast/mod.rs | 489 | ||||
-rw-r--r-- | src/ast/typed.rs | 8 | ||||
-rw-r--r-- | src/main.rs | 22 | ||||
-rw-r--r-- | src/nibble/mod.rs | 2 |
4 files changed, 366 insertions, 155 deletions
diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 280a646..07d32ec 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,7 +1,14 @@ +use std::collections::BTreeSet; +use std::collections::HashMap; use std::convert::Infallible; +use std::error::Error; use std::fmt::Display; use proc_macro2::Span; +use proc_macro2::TokenStream; +use proc_macro2::TokenTree; +use quote::format_ident; +use quote::quote; use syn::{Ident, LitStr, Token}; use typed::FirstSetContext; use typed::FlastSetContext; @@ -567,7 +574,9 @@ impl Display for TermError { } } -#[derive(Clone, Debug, Eq, PartialEq)] +impl Error for TermError {} + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] enum TypeKind { Epsilon, Literal(String), @@ -577,7 +586,60 @@ enum TypeKind { Variable(usize), } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Debug)] +pub struct Code { + id: usize, +} + +impl TypeKind { + /// Produces ident for the type and token stream for implementation + fn emit_code(self, context: &mut CodeContext) -> Code { + match self { + Self::Epsilon => context.epsilon(), + Self::Literal(s) => context.literal(s), + Self::Cat(fst, snd) => { + let Typed { kind: fst_kind, .. } = *fst; + let Typed { kind: snd_kind, .. } = *snd; + let fst_code = fst_kind.emit_code(context); + let snd_code = snd_kind.emit_code(context); + context.cat(fst_code, snd_code) + } + Self::Alt(left, right) => { + let Typed { + kind: left_kind, + nullable: left_null, + first_set: left_first, + .. + } = *left; + let Typed { + kind: right_kind, + nullable: right_null, + first_set: right_first, + .. + } = *right; + let left_code = left_kind.emit_code(context); + let right_code = right_kind.emit_code(context); + context.alt( + left_code, + left_null, + left_first, + right_code, + right_null, + right_first, + ) + } + Self::Fix(inner) => { + let Typed { + kind: inner_kind, .. + } = *inner; + context.fix(inner_kind) + } + Self::Variable(index) => context.variable(index), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct Typed { kind: TypeKind, nullable: bool, @@ -597,155 +659,278 @@ impl Typed { pub fn flast_set(&self) -> &FlastSet { &self.flast_set } + + pub fn emit_code(self, name: Ident) -> TokenStream { + let mut context = CodeContext::new(); + let code = self.kind.emit_code(&mut context); + context.all_code(name, code) + } +} + +#[derive(Debug)] +pub struct CodeContext { + data: Vec<(TokenStream, TokenStream, BTreeSet<usize>)>, + lit_map: HashMap<String, usize>, + cat_map: HashMap<(usize, usize), usize>, + alt_map: HashMap<(usize, usize), usize>, + fix_map: HashMap<TypeKind, usize>, + var_map: HashMap<usize, usize>, // Key is fix point ID + context: Vec<usize>, } -// #[cfg(test)] -// mod tests { -// use super::*; - -// #[test] -// fn null_epsilon() { -// assert!(Term::Epsilon.null(&[])); -// } - -// #[test] -// fn not_null_bottom() { -// assert!(!Term::Bottom.null(&[])); -// } - -// #[test] -// fn not_null_normal_literal() { -// assert!(!Term::Literal("x".to_owned()).null(&[])) -// } - -// #[test] -// fn null_empty_literal() { -// assert!(Term::Literal("".to_owned()).null(&[])) -// } - -// #[test] -// fn null_cat_both_null() { -// assert!(Term::Cat(Box::new(Term::Epsilon), Box::new(Term::Epsilon)).null(&[])) -// } - -// #[test] -// fn not_null_cat_first_not_null() { -// assert!(!Term::Cat(Box::new(Term::Bottom), Box::new(Term::Epsilon)).null(&[])) -// } - -// #[test] -// fn not_null_cat_second_not_null() { -// assert!(!Term::Cat(Box::new(Term::Epsilon), Box::new(Term::Bottom)).null(&[])) -// } - -// #[test] -// fn not_null_alt_both_not_null() { -// assert!(!Term::Alt(Box::new(Term::Bottom), Box::new(Term::Bottom)).null(&[])) -// } - -// #[test] -// fn null_alt_first_null() { -// assert!(Term::Alt(Box::new(Term::Epsilon), Box::new(Term::Bottom)).null(&[])) -// } - -// #[test] -// fn null_alt_second_null() { -// assert!(Term::Alt(Box::new(Term::Bottom), Box::new(Term::Epsilon)).null(&[])) -// } - -// #[test] -// fn not_null_fix_simple_inner() { -// assert!(!Term::Fix(Box::new(Term::Bottom)).null(&[])) -// } - -// #[test] -// fn null_fix_simple_inner() { -// assert!(Term::Fix(Box::new(Term::Epsilon)).null(&[])) -// } - -// #[test] -// fn not_null_fix_var_inner() { -// assert!(!Term::Fix(Box::new(Term::Variable(0))).null(&[])) -// } - -// #[test] -// fn null_fix_var_inner() { -// assert!(Term::Fix(Box::new(Term::Alt( -// Box::new(Term::Epsilon), -// Box::new(Term::Variable(0)) -// ))) -// .null(&[])) -// } - -// #[test] -// fn first_epsilon_empty() { -// assert_eq!(Term::Epsilon.first(&[]), BTreeSet::new()) -// } - -// #[test] -// fn first_bottom_empty() { -// assert_eq!(Term::Bottom.first(&[]), BTreeSet::new()) -// } - -// #[test] -// fn first_literal_first_char() { -// let set = vec!['x'].into_iter().collect(); -// assert_eq!(Term::Literal("x".to_owned()).first(&[]), set); -// let set = vec!['h'].into_iter().collect(); -// assert_eq!(Term::Literal("hello".to_owned()).first(&[]), set); -// assert_eq!(Term::Literal("".to_owned()).first(&[]), BTreeSet::new()); -// } - -// #[test] -// fn first_cat_first_not_null() { -// let set = vec!['x'].into_iter().collect(); -// assert_eq!( -// Term::Cat( -// Box::new(Term::Literal("x".to_owned())), -// Box::new(Term::Literal("y".to_owned())) -// ) -// .first(&[]), -// set -// ); -// } - -// #[test] -// fn first_cat_first_null() { -// let set = vec!['x', 'y'].into_iter().collect(); -// assert_eq!( -// Term::Cat( -// Box::new(Term::Alt( -// Box::new(Term::Epsilon), -// Box::new(Term::Literal("x".to_owned())) -// )), -// Box::new(Term::Literal("y".to_owned())) -// ) -// .first(&[]), -// set -// ); -// } - -// // TODO: test flast - -// #[test] -// fn types_epsilon() { -// assert_eq!(Term::Epsilon.type_check(&[]), Ok(Typed::Epsilon)) -// } - -// #[test] -// fn types_bottom() { -// assert_eq!(Term::Bottom.type_check(&[]), Ok(Typed::Bottom)) -// } - -// #[test] -// fn types_literal() { -// assert_eq!( -// Term::Literal("x".to_owned()).type_check(&[]), -// Ok(Typed::Literal("x".to_owned())) -// ); -// assert_eq!( -// Term::Literal("".to_owned()).type_check(&[]), -// Ok(Typed::Literal("".to_owned())) -// ) -// } -// } +impl CodeContext { + fn new() -> Self { + let eps = (quote! {()}, TokenStream::new(), BTreeSet::new()); + + Self { + data: vec![eps], + lit_map: HashMap::new(), + cat_map: HashMap::new(), + alt_map: HashMap::new(), + fix_map: HashMap::new(), + var_map: HashMap::new(), + context: Vec::new(), + } + } + + fn epsilon(&mut self) -> Code { + Code { id: 0 } + } + + fn literal(&mut self, lit: String) -> Code { + if let Some(&id) = self.lit_map.get(&lit) { + Code { id } + } else { + let id = self.data.len(); + let name = format_ident!("Lit{}", id); + let doc_name = format!( + r#"The literal string `"{}"`."#, + lit.escape_debug().collect::<String>() + ); + let tokens = quote! { + #[doc=#doc_name] + pub struct #name; + + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + input.take_str(#lit).map(|()| #name) + } + } + }; + + self.data + .push((TokenTree::from(name).into(), tokens, BTreeSet::new())); + self.lit_map.insert(lit, id); + + Code { id } + } + } + + fn cat(&mut self, fst: Code, snd: Code) -> Code { + if let Some(&id) = self.cat_map.get(&(fst.id, snd.id)) { + Code { id } + } else { + let id = self.data.len(); + let fst_ty = self.data[fst.id].0.clone(); + let snd_ty = self.data[snd.id].0.clone(); + self.data.push(( + quote! {(#fst_ty, #snd_ty)}, + TokenStream::new(), + [fst.id, snd.id].iter().cloned().collect(), + )); + self.cat_map.insert((fst.id, snd.id), id); + Code { id } + } + } + + fn alt( + &mut self, + left_code: Code, + left_null: bool, + left_first: FirstSet, + right_code: Code, + right_null: bool, + right_first: FirstSet, + ) -> Code { + if let Some(&id) = self.alt_map.get(&(left_code.id, right_code.id)) { + Code { id } + } else { + let id = self.data.len(); + let left_ty = self.data[left_code.id].0.clone(); + let right_ty = self.data[right_code.id].0.clone(); + let name = format_ident!("Alt{}", id); + let mut tokens = quote! { + pub enum #name { + Left(#left_ty), + Right(#right_ty), + } + }; + + let other = if left_null { + let iter = right_first.into_iter(); + + quote! { + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + match input.peek() { + #(Some(#iter))|* => input.parse().map(Self::Right), + _ => input.parse().map(Self::Left), + } + } + } + } + } else if right_null { + let iter = left_first.into_iter(); + + quote! { + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + match input.peek() { + #(Some(#iter))|* => input.parse().map(Self::Left), + _ => input.parse().map(Self::Right), + } + } + } + } + } else { + let iter_first = left_first.clone().union(right_first.clone()).into_iter(); + let iter_left = left_first.into_iter(); + let iter_right = right_first.into_iter(); + + quote! { + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + match input.peek().ok_or(Error::EndOfStream)? { + #(#iter_left)|* => input.parse().map(Self::Left), + #(#iter_right)|* => input.parse().map(Self::Right), + c => input.error(Error::BadBranch(c, &[#(#iter_first),*])) + } + } + } + } + }; + + tokens.extend(other); + self.data.push(( + TokenTree::from(name).into(), + tokens, + [left_code.id, right_code.id].iter().cloned().collect(), + )); + self.alt_map.insert((left_code.id, right_code.id), id); + Code { id } + } + } + + fn fix(&mut self, inner: TypeKind) -> Code { + if let Some(&id) = self.fix_map.get(&inner) { + Code { id } + } else { + let id = self.data.len(); + let name = format_ident!("Fix{}", id); + self.data.push(( + TokenTree::from(name.clone()).into(), + TokenStream::new(), + BTreeSet::new(), + )); + self.context.push(id); + let inner = inner.emit_code(self).id; + let inner_ty = self.data[inner].0.clone(); + let tokens = quote! { + pub struct #name(#inner_ty); + + impl Parse for #name { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + input.parse().map(Self) + } + } + }; + self.data[id].1 = tokens; + self.data[id].2 = [inner].iter().cloned().collect(); + Code { id } + } + } + + fn variable(&mut self, index: usize) -> Code { + let fix_id = self.context[self.context.len() - index - 1]; + if let Some(&id) = self.var_map.get(&fix_id) { + Code { id } + } else { + let id = self.data.len(); + let fix_ty = self.data[fix_id].0.clone(); + let name = quote! {Box<#fix_ty>}; + self.data.push((name, TokenStream::new(), BTreeSet::new())); + self.var_map.insert(fix_id, id); + Code { id } + } + } + + pub fn all_code(&mut self, name: Ident, code: Code) -> TokenStream { + let root = self.data[code.id].clone(); + let mut tokens = root.1; + let mut completed = [code.id].iter().cloned().collect::<BTreeSet<_>>(); + let mut todo = root.2; + + while !todo.is_empty() { + let mut next = BTreeSet::new(); + completed.extend(todo.clone()); + + for ty in todo { + let ty = self.data[ty].clone(); + tokens.extend(ty.1); + next.extend(ty.2.into_iter().filter(|id| !completed.contains(id))); + } + + todo = next; + } + + let root_ty = root.0; + tokens.extend(quote! { + pub type #name = #root_ty; + + pub enum Error { + BadBranch(char, &'static [char]), + EndOfStream, + } + + pub type Result<T> = std::result::Result<T, Error>; + + pub trait Parser: Iterator<Item = char> { + fn peek(&mut self) -> Option<char>; + + fn parse<T: Parse>(&mut self) -> Result<T> where Self: Sized { + T::parse(self) + } + + fn take_str(&mut self, s: &str) -> Result<()>; + + fn error<T>(&mut self, e: Error) -> Result<T>; + } + + pub trait Parse: Sized { + fn parse<P: Parser>(input: &mut P) -> Result<Self>; + } + + impl Parse for () { + fn parse<P: Parser>(_input: &mut P) -> Result<Self> { + Ok(()) + } + } + + impl<A: Parse, B: Parse> Parse for (A, B) { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + let a = input.parse()?; + let b = input.parse()?; + Ok((a, b)) + } + } + + impl<T: Parse + Sized> Parse for Box<T> { + fn parse<P: Parser>(input: &mut P) -> Result<Self> { + input.parse().map(Box::new) + } + } + }); + + tokens + } +} diff --git a/src/ast/typed.rs b/src/ast/typed.rs index dee13be..a0b8d1b 100644 --- a/src/ast/typed.rs +++ b/src/ast/typed.rs @@ -3,7 +3,7 @@ use super::VariableError; use std::collections::BTreeSet; use std::fmt::Display; -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct FirstSet { inner: BTreeSet<char>, } @@ -36,9 +36,13 @@ impl FirstSet { inner: self.inner.intersection(&other.inner).copied().collect(), } } + + pub fn into_iter(self) -> impl Iterator<Item = char> { + self.inner.into_iter() + } } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct FlastSet { inner: BTreeSet<char>, } diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..9eb762a --- /dev/null +++ b/src/main.rs @@ -0,0 +1,22 @@ +use chomp::{ + ast::{ + convert::{Context, Convert}, + typed::Type, + }, + nibble::Expression, +}; +use proc_macro2::Span; +use std::io::{self, Error, ErrorKind, Read, Write}; +use syn::Ident; + +fn main() -> io::Result<()> { + let mut input = String::new(); + io::stdin().read_to_string(&mut input)?; + let nibble: Expression = + syn::parse_str(&input).map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + let term = nibble.convert(&Context::new()); + // FIXME: better error handling here + let typed = term.well_typed(&mut Vec::new()).unwrap(); + let code = typed.emit_code(Ident::new("Ast", Span::call_site())); + write!(io::stdout(), "{:#}", code) +} diff --git a/src/nibble/mod.rs b/src/nibble/mod.rs index b93f97c..18c0da7 100644 --- a/src/nibble/mod.rs +++ b/src/nibble/mod.rs @@ -280,7 +280,7 @@ impl Convert for Alt { } } -type Expression = Alt; +pub type Expression = Alt; #[cfg(test)] mod tests { |