summaryrefslogtreecommitdiff
path: root/src/lower/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lower/mod.rs')
-rw-r--r--src/lower/mod.rs506
1 files changed, 468 insertions, 38 deletions
diff --git a/src/lower/mod.rs b/src/lower/mod.rs
index 242fa47..06099e3 100644
--- a/src/lower/mod.rs
+++ b/src/lower/mod.rs
@@ -1,58 +1,488 @@
-use crate::chomp::typed::{
- Alt, Cat, Epsilon, Fix, Literal, RawTypedExpression, TypedExpression, Variable,
+use std::{
+ collections::{BTreeSet, HashMap},
+ convert::TryInto,
};
-pub mod rust;
+use heck::{CamelCase, SnakeCase};
+use proc_macro2::{Ident, Span, TokenStream, TokenTree};
+use quote::{format_ident, quote, quote_spanned};
+use syn::{Index, LitStr};
-pub trait Backend: Default {
- type Id;
- type Code;
+use crate::chomp::{
+ ast,
+ set::FirstSet,
+ typed::{
+ lower::{Backend, GenerateCode},
+ Alt, Cat, Epsilon, Fix, Literal, Type, Typed, TypedExpression, Variable,
+ },
+ Name,
+};
- fn gen_epsilon(&mut self, eps: Epsilon) -> Self::Id;
+#[derive(Clone, Debug)]
+struct Ty {
+ name: TokenStream,
+ rest: TokenStream,
+ deps: BTreeSet<usize>,
+}
- fn gen_literal(&mut self, lit: Literal) -> Self::Id;
+#[derive(Debug)]
+pub struct RustBackend {
+ // Indexed by ID, stores type, impl and dependencies
+ data: Vec<Ty>,
+ eps_id: Option<usize>,
+ lit_map: HashMap<String, usize>,
+ cat_map: HashMap<Vec<usize>, usize>,
+ alt_map: HashMap<Vec<usize>, usize>,
+ fix_map: HashMap<TypedExpression, usize>,
+ var_map: HashMap<usize, usize>, // Key is fix point ID
+ name_map: HashMap<Ident, usize>,
+ can_char: BTreeSet<usize>,
+ context: Vec<usize>,
+}
- fn gen_cat(&mut self, cat: Cat) -> Self::Id;
+impl RustBackend {
+ fn new_type_name(&mut self, prefix: &str, name: Option<Name>, span: Span) -> (usize, Ident) {
+ let id = self.data.len();
- fn gen_alt(&mut self, alt: Alt) -> Self::Id;
+ match name {
+ None => (id, format_ident!("{}{}", prefix, id + 1, span = span)),
+ Some(name) => {
+ let name = name.to_camel_case().into_ident(span);
+ let count = self.name_map.entry(name.clone()).or_insert(0);
+ *count += 1;
+ (id, format_ident!("{}{}", name, count))
+ }
+ }
+ }
- fn gen_fix(&mut self, fix: Fix) -> Self::Id;
+ fn recurse_exprs<I: IntoIterator<Item = TypedExpression>>(
+ &mut self,
+ iter: I,
+ ) -> (Vec<Type>, Vec<(Option<Name>, Span)>, Vec<usize>) {
+ let (ty_name_span, ids): (Vec<_>, _) = iter
+ .into_iter()
+ .map(|e| {
+ (
+ (
+ e.get_type().clone(),
+ (e.name.clone(), e.span.unwrap_or_else(Span::call_site)),
+ ),
+ e.gen(self),
+ )
+ })
+ .unzip();
+ let (ty, name_span) = ty_name_span.into_iter().unzip();
+ (ty, name_span, ids)
+ }
+
+ fn indexes<'a, I: 'a + IntoIterator<Item = Span>>(
+ prefix: &'a str,
+ iter: I,
+ ) -> impl Iterator<Item = (Index, Ident)> + '_ {
+ iter.into_iter().enumerate().map(move |(idx, span)| {
+ (
+ Index {
+ index: idx.try_into().unwrap(),
+ span,
+ },
+ format_ident!("{}{}", prefix, idx, span = span),
+ )
+ })
+ }
+
+ fn ty_names<'a, I: 'a + IntoIterator<Item = usize>>(
+ &'a self,
+ iter: I,
+ ) -> impl Iterator<Item = TokenStream> + '_ {
+ iter.into_iter().map(move |id| self.data[id].name.clone())
+ }
- fn gen_variable(&mut self, var: Variable) -> Self::Id;
+ fn name_parts_snake<'a, I: 'a + IntoIterator<Item = (Option<Name>, Span)>>(
+ default: &'a str,
+ iter: I,
+ ) -> impl Iterator<Item = Ident> + '_ {
+ let mut name_map = HashMap::new();
+ iter.into_iter()
+ .enumerate()
+ .map(move |(index, (name, span))| match name {
+ None => format_ident!("{}{}", default, index + 1, span = span),
+ Some(name) => {
+ let name = name.to_snake_case().into_ident(span);
+ let count = name_map.entry(name.clone()).or_insert(0usize);
+ *count += 1;
+ format_ident!("{}{}", name, count)
+ }
+ })
+ }
- fn emit_code(self, id: Self::Id) -> Self::Code;
+ fn name_parts_camel<'a, I: 'a + IntoIterator<Item = (Option<Name>, Span)>>(
+ default: &'a str,
+ iter: I,
+ ) -> impl Iterator<Item = Ident> + '_ {
+ let mut name_map = HashMap::new();
+ iter.into_iter()
+ .enumerate()
+ .map(move |(index, (name, span))| match name {
+ None => format_ident!("{}{}", default, index + 1, span = span),
+ Some(name) => {
+ let name = name.to_camel_case().into_ident(span);
+ let count = name_map.entry(name.clone()).or_insert(0usize);
+ *count += 1;
+ format_ident!("{}{}", name, count)
+ }
+ })
+ }
}
-pub trait GenerateCode {
- fn gen<B: Backend>(self, backend: &mut B) -> B::Id;
+impl Default for RustBackend {
+ fn default() -> Self {
+ Self {
+ data: Vec::new(),
+ eps_id: None,
+ lit_map: HashMap::new(),
+ cat_map: HashMap::new(),
+ alt_map: HashMap::new(),
+ fix_map: HashMap::new(),
+ var_map: HashMap::new(),
+ name_map: HashMap::new(),
+ can_char: BTreeSet::new(),
+ context: Vec::new(),
+ }
+ }
}
-macro_rules! generate_leaf {
- ($ty:ty, $gen:ident) => {
- impl GenerateCode for $ty {
- fn gen<B: Backend>(self, backend: &mut B) -> B::Id {
- backend.$gen(self)
+impl Backend for RustBackend {
+ type Id = usize;
+
+ type Code = TokenStream;
+
+ fn gen_epsilon(&mut self, _name: Option<Name>, _span: Option<Span>, _eps: Epsilon) -> Self::Id {
+ if let Some(id) = self.eps_id {
+ return id;
+ }
+
+ let id = self.data.len();
+ self.data.push(Ty {
+ name: quote! {Epsilon},
+ rest: TokenStream::new(),
+ deps: BTreeSet::new(),
+ });
+ id
+ }
+
+ fn gen_literal(&mut self, name: Option<Name>, span: Option<Span>, lit: Literal) -> Self::Id {
+ let span = span.unwrap_or_else(Span::call_site);
+
+ let lit: ast::Literal = lit.into();
+ if let Some(&id) = self.lit_map.get(&lit) {
+ return id;
+ }
+
+ let (id, name) = self.new_type_name("Lit", name, span);
+ let doc_name = format!(r#"The literal string `{:?}`."#, lit);
+ let lit = LitStr::new(&lit, span);
+
+ let mut rest = quote_spanned! {span=>
+ #[doc=#doc_name]
+ #[derive(Clone, Debug, Eq, Hash, PartialEq)]
+ pub struct #name;
+
+ impl ::std::fmt::Display for #name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ write!(f, "{}", #lit)
+ }
+ }
+
+ impl Parse for #name {
+ fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
+ input.consume_str(#lit).map(|()| #name)
+ }
}
+ };
+
+ if lit.value().len() == 1 {
+ self.can_char.insert(id);
+ let c = lit.value().chars().next().unwrap();
+ rest.extend(quote_spanned! {span=>
+ impl From<#name> for char {
+ fn from(_: #name) -> Self {
+ #c
+ }
+ }
+ });
}
- };
-}
-generate_leaf!(Epsilon, gen_epsilon);
-generate_leaf!(Literal, gen_literal);
-generate_leaf!(Cat, gen_cat);
-generate_leaf!(Alt, gen_alt);
-generate_leaf!(Fix, gen_fix);
-generate_leaf!(Variable, gen_variable);
-
-impl GenerateCode for TypedExpression {
- fn gen<B: Backend>(self, backend: &mut B) -> B::Id {
- match self.inner {
- RawTypedExpression::Epsilon(e) => e.gen(backend),
- RawTypedExpression::Literal(l) => l.gen(backend),
- RawTypedExpression::Cat(c) => c.gen(backend),
- RawTypedExpression::Alt(a) => a.gen(backend),
- RawTypedExpression::Fix(f) => f.gen(backend),
- RawTypedExpression::Variable(v) => v.gen(backend),
+ self.lit_map.insert(lit.value(), id);
+ self.data.push(Ty {
+ name: TokenTree::from(name).into(),
+ rest,
+ deps: BTreeSet::new(),
+ });
+
+ id
+ }
+
+ fn gen_cat(&mut self, name: Option<Name>, span: Option<Span>, cat: Cat) -> Self::Id {
+ let span = span.unwrap_or_else(Span::call_site);
+ let (_, name_spans, ids) = self.recurse_exprs(cat);
+
+ if let Some(&id) = self.cat_map.get(&ids) {
+ return id;
}
+
+ let (id, name) = self.new_type_name("Cat", name, span);
+ let ty_names: Vec<_> = self.ty_names(ids.iter().copied()).collect();
+ let named = name_spans.iter().any(|(name, _)| name.is_some());
+ let (indexes, number_parts): (Vec<_>, Vec<_>) =
+ Self::indexes("part", name_spans.iter().cloned().map(|(_, span)| span)).unzip();
+ let name_parts: Vec<_> = Self::name_parts_snake("part", name_spans).collect();
+ let rest = if named {
+ quote_spanned! {span=>
+ #[derive(Clone, Debug, Eq, Hash, PartialEq)]
+ pub struct #name {
+ #(pub #name_parts: #ty_names),*
+ }
+
+ impl ::std::fmt::Display for #name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ #(self.#name_parts.fmt(f)?;)*
+ Ok(())
+ }
+ }
+
+ impl Parse for #name {
+ fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
+ #(let #number_parts = input.take()?;)*
+ Ok(Self {#(#name_parts: #number_parts),*})
+ }
+ }
+ }
+ } else {
+ quote_spanned! {span=>
+ #[derive(Clone, Debug, Eq, Hash, PartialEq)]
+ pub struct #name(#(pub #ty_names),*);
+
+ impl ::std::fmt::Display for #name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ #(self.#indexes.fmt(f)?;)*
+ Ok(())
+ }
+ }
+
+ impl Parse for #name {
+ fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
+ #(let #number_parts = input.take()?;)*
+ Ok(Self(#(#number_parts),*))
+ }
+ }
+ }
+ };
+
+ self.cat_map.insert(ids.clone(), id);
+ self.data.push(Ty {
+ name: TokenTree::from(name).into(),
+ rest,
+ deps: ids.into_iter().collect(),
+ });
+ id
+ }
+
+ fn gen_alt(&mut self, name: Option<Name>, span: Option<Span>, alt: Alt) -> Self::Id {
+ let span = span.unwrap_or_else(Span::call_site);
+ let (tys, name_spans, ids) = self.recurse_exprs(alt);
+
+ if let Some(&id) = self.alt_map.get(&ids) {
+ return id;
+ }
+
+ let (id, name) = self.new_type_name("Alt", name, span);
+ let ty_names: Vec<_> = self.ty_names(ids.iter().copied()).collect();
+ let name_parts: Vec<_> = Self::name_parts_camel("Branch", name_spans).collect();
+
+ let nullable = tys
+ .iter()
+ .enumerate()
+ .find(|(_, ty)| ty.nullable())
+ .map(|(idx, _)| name_parts[idx].clone());
+ let (first_alts, firsts): (Vec<_>, Vec<_>) = tys
+ .iter()
+ .map(|ty| ty.first_set())
+ .cloned()
+ .enumerate()
+ .filter(|(_, fs)| !fs.is_empty())
+ .map(|(idx, fs)| {
+ let fs = fs.into_iter();
+ (name_parts[idx].clone(), quote! {#(Some(#fs))|*})
+ })
+ .unzip();
+ let all_firsts = tys
+ .iter()
+ .map(|ty| ty.first_set())
+ .cloned()
+ .flat_map(FirstSet::into_iter);
+
+ let mut rest = quote_spanned! {span=>
+ #[derive(Clone, Debug, Eq, Hash, PartialEq)]
+ pub enum #name {
+ #(#name_parts(#ty_names)),*
+ }
+
+ impl ::std::fmt::Display for #name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ match self {
+ #(Self::#name_parts(inner) => inner.fmt(f)),*
+ }
+ }
+ }
+ };
+
+ rest.extend(if let Some(nullable) = nullable {
+ quote_spanned! {span=>
+ impl Parse for #name {
+ fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
+ match input.peek() {
+ #(#firsts => Ok(Self::#first_alts(input.take()?)),)*
+ _ => Ok(Self::#nullable(input.take()?))
+ }
+ }
+ }
+ }
+ } else {
+ quote_spanned! {span=>
+ impl Parse for #name {
+ fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
+ match input.peek() {
+ #(#firsts => Ok(Self::#first_alts(input.take()?)),)*
+ Some(c) => Err(TakeError::BadBranch(input.pos(), c, &[#(#all_firsts),*])),
+ None => Err(TakeError::EndOfStream(input.pos()))
+ }
+ }
+ }
+ }
+ });
+
+ if ids.iter().all(|id| self.can_char.contains(id)) {
+ self.can_char.insert(id);
+ rest.extend(quote_spanned! {span=>
+ impl From<#name> for char {
+ fn from(alt: #name) -> Self {
+ match alt {
+ #(#name::#name_parts(inner) => inner.into()),*
+ }
+ }
+ }
+ })
+ }
+
+ self.alt_map.insert(ids.clone(), id);
+ self.data.push(Ty {
+ name: TokenTree::from(name).into(),
+ rest,
+ deps: ids.into_iter().collect(),
+ });
+ id
+ }
+
+ fn gen_fix(&mut self, name: Option<Name>, span: Option<Span>, fix: Fix) -> Self::Id {
+ let span = span.unwrap_or_else(Span::call_site);
+ let inner = fix.unwrap();
+
+ if let Some(&id) = self.fix_map.get(&inner) {
+ return id;
+ }
+
+ let (id, name) = self.new_type_name("Fix", name, span);
+ self.fix_map.insert(inner.clone(), id);
+
+ self.data.push(Ty {
+ name: TokenTree::from(name.clone()).into(),
+ rest: TokenStream::new(),
+ deps: BTreeSet::new(),
+ });
+
+ self.context.push(id);
+ let inner = inner.gen(self);
+ self.context.pop();
+
+ let inner_ty = self.data[inner].name.clone();
+ let rest = quote_spanned! {span=>
+ #[derive(Clone, Debug, Eq, Hash, PartialEq)]
+ pub struct #name(pub #inner_ty);
+
+ impl ::std::fmt::Display for #name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ self.0.fmt(f)
+ }
+ }
+
+ impl Parse for #name {
+ fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
+ input.take().map(Self)
+ }
+ }
+ };
+ self.data[id].rest = rest;
+ self.data[id].deps = [inner].iter().cloned().collect();
+ id
+ }
+
+ fn gen_variable(&mut self, name: Option<Name>, span: Option<Span>, var: Variable) -> Self::Id {
+ let span = span.unwrap_or_else(Span::call_site);
+ let fix_id = self.context[self.context.len() - var.index() - 1];
+
+ if let Some(&id) = self.var_map.get(&fix_id) {
+ return id;
+ }
+
+ let id = self.data.len();
+ let fix_ty = self.data[fix_id].name.clone();
+ let name = quote! {Box<#fix_ty>};
+ self.var_map.insert(fix_id, id);
+ self.data.push(Ty {
+ name,
+ rest: TokenStream::new(),
+ deps: BTreeSet::new(),
+ });
+
+ id
+ }
+
+ fn emit_code(self, name: Option<Name>, span: Option<Span>, id: Self::Id) -> Self::Code {
+ let span = span.unwrap_or_else(Span::call_site);
+
+ let mut tokens = quote_spanned! {span=>
+ use ::chewed::*;
+ };
+
+ let mut todo = [id].iter().copied().collect::<BTreeSet<_>>();
+ let mut completed = BTreeSet::new();
+
+ while !todo.is_empty() {
+ let next = todo.clone();
+ completed.append(&mut todo);
+
+ for ty in next {
+ let ty = self.data[ty].clone();
+ tokens.extend(ty.rest);
+ todo.extend(ty.deps.into_iter().filter(|id| !completed.contains(id)));
+ }
+ }
+
+ let root_name = self.data[id].name.clone();
+
+ tokens.extend(if let Some(name) = name {
+ let name = name.into_ident(span);
+ let count = self.name_map.get(&name).copied().unwrap_or_default() + 1;
+ let name = format_ident!("{}{}", name, count);
+ quote_spanned! {span=>
+ pub type #name = #root_name;
+ }
+ } else {
+ quote_spanned! {span=>
+ pub type Ast = #root_name;
+ }
+ });
+
+ tokens
}
}