summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Brown <gmb60@cam.ac.uk>2021-03-01 13:36:55 +0000
committerGreg Brown <gmb60@cam.ac.uk>2021-03-01 13:36:55 +0000
commit7ba3f96d49489aec9ff4a536c2df416287e49aca (patch)
tree02e5334a179b2103063d91ed8d1ef8ce658427b8
parent7934aa9af22e8fa3c33a45bc08e732a73c0cabf5 (diff)
Initial work on charsetscharset
-rw-r--r--src/chomp/set.rs165
-rw-r--r--src/lower/mod.rs56
2 files changed, 176 insertions, 45 deletions
diff --git a/src/chomp/set.rs b/src/chomp/set.rs
index 0661ab6..8f97e8a 100644
--- a/src/chomp/set.rs
+++ b/src/chomp/set.rs
@@ -1,8 +1,31 @@
-use std::collections::BTreeSet;
+use std::{collections::BTreeSet, convert::TryInto};
+
+// From 1.50 std::core::iter::range lines 425--450
+fn step_char_forward(c : char) -> Option<char> {
+ let start = u32::from(c);
+ let mut res = start.checked_add(1)?;
+
+ if start < 0xD800 && 0xD800 <= res {
+ res = start.checked_add(0x800)?;
+ }
+
+ res.try_into().ok()
+}
+
+fn step_char_backward(c : char) -> Option<char> {
+ let start = u32::from(c);
+ let mut res = start.checked_sub(1)?;
+
+ if start >= 0xE000 && 0xE000 > res {
+ res = start.checked_sub(0x800)?;
+ }
+
+ res.try_into().ok()
+}
#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)]
pub struct FirstSet {
- inner: BTreeSet<char>,
+ inner: Vec<(char, char)>,
}
impl FirstSet {
@@ -11,32 +34,108 @@ impl FirstSet {
}
pub fn of_str(s: &str) -> Self {
- let mut inner = BTreeSet::new();
- s.chars().next().map(|c| inner.insert(c));
-
- Self { inner }
+ Self {
+ inner: s.chars().next().into_iter().map(|c| (c, c)).collect(),
+ }
}
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
- pub fn union(mut self, mut other: Self) -> Self {
- self.inner.append(&mut other.inner);
- self
+ pub fn union(self, mut other: Self) -> Self {
+ let mut out = Vec::new();
+ let mut iters = [self.into_iter().peekable(), other.into_iter().peekable()];
+
+ 'outer: loop {
+ // Look at the next two ranges, and find the minimum next character
+ let (min, mut max) = match (iters[0].peek(), iters[1].peek()) {
+ // Both inputs exhausted
+ (None, None) => break,
+ // Add remainder to output
+ (None, Some(_)) => {
+ out.extend(iters[1]);
+ break
+ }
+ (Some(_), None) => {
+ out.extend(iters[0]);
+ break
+ }
+ // The interesting case
+ (Some((left, left_max)), Some((right, right_max))) => {
+ if left > right {
+ iters.swap(0, 1);
+ (*right, *right_max)
+ } else {
+ (*left, *left_max)
+ }
+ }
+ };
+
+ max = match step_char_forward(max) {
+ Some(c) => c,
+ None => {
+ // assert!(max == char::MAX)
+ // This swallows all other ranges
+ out.push((min, max));
+ break;
+ }
+ };
+
+ // There are a few cases:
+ // - No overlap:
+ // ----
+ // ----
+ // - Extend:
+ // ----
+ // ----
+ // - Swallow:
+ // ---------
+ // ----
+ // We loop until there is no overlap.
+ // For extend, we step the left and swap.
+ // For swallow, we step the right only
+ while let Some((other_min, other_max)) = iters[1].peek() {
+ if *other_min > max {
+ // No overlap
+ break;
+ }
+
+ // max is one past range, other_max is in range, so we use >=
+ if *other_max >= max {
+ // Extend
+ iters[0].next();
+ iters.swap(0, 1);
+ max = match step_char_forward(*other_max) {
+ Some(c) => c,
+ None => { out.push((min, max)); break 'outer }
+ };
+ } else {
+ // Swallow
+ iters[1].next();
+ }
+ }
+
+ let end = step_char_backward(max).expect("char + 1 - 1 != char");
+ out.push((min, end));
+ iters[0].next();
+ }
+
+ Self {inner: out}
}
pub fn intersect(&self, other: &Self) -> Self {
- Self {
- inner: self.inner.intersection(&other.inner).copied().collect(),
- }
+ todo!()
+ // Self {
+ // inner: self.inner.intersection(&other.inner).copied().collect(),
+ // }
}
}
impl IntoIterator for FirstSet {
- type Item = char;
+ type Item = (char, char);
- type IntoIter = <BTreeSet<char> as IntoIterator>::IntoIter;
+ type IntoIter = <Vec<(char, char)> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.inner.into_iter()
@@ -58,8 +157,9 @@ impl FlastSet {
}
pub fn union_first(mut self, mut other: FirstSet) -> Self {
- self.inner.append(&mut other.inner);
- self
+ // self.inner.append(&mut other.inner);
+ // self
+ todo!()
}
pub fn union(mut self, mut other: Self) -> Self {
@@ -68,9 +168,10 @@ impl FlastSet {
}
pub fn intersect_first(&self, other: &FirstSet) -> Self {
- Self {
- inner: self.inner.intersection(&other.inner).copied().collect(),
- }
+ // Self {
+ // inner: self.inner.intersection(&other.inner).copied().collect(),
+ // }
+ todo!()
}
pub fn intersect(&self, other: &Self) -> Self {
@@ -89,3 +190,29 @@ impl IntoIterator for FlastSet {
self.inner.into_iter()
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_first_union_none() {
+ let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]};
+ let right = FirstSet {inner: vec![]};
+ assert_eq!(left.union(right), left)
+ }
+
+ #[test]
+ fn test_first_union_shallow() {
+ let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]};
+ let right = FirstSet {inner: vec![('b', 'y')]};
+ assert_eq!(left.union(right), left)
+ }
+
+ #[test]
+ fn test_first_union_extend() {
+ let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]};
+ let right = FirstSet {inner: vec![('[', '`')]};
+ assert_eq!(left.union(right), FirstSet {inner: vec![('A', 'z')]})
+ }
+}
diff --git a/src/lower/mod.rs b/src/lower/mod.rs
index dcd0f1f..45be7aa 100644
--- a/src/lower/mod.rs
+++ b/src/lower/mod.rs
@@ -300,17 +300,19 @@ impl Backend for RustBackend {
.enumerate()
.find(|(_, ty)| ty.nullable())
.map(|(idx, _)| name_parts[idx].clone());
- let (first_alts, firsts): (Vec<_>, Vec<_>) = tys
- .iter()
- .map(Type::first_set)
- .cloned()
- .enumerate()
- .filter(|(_, fs)| !fs.is_empty())
- .map(|(idx, fs)| {
- let fs = fs.into_iter();
- (name_parts[idx].clone(), quote! {#(Some(#fs))|*})
- })
- .unzip();
+ todo!();
+ // let (first_alts, firsts): (Vec<_>, Vec<_>) = tys
+ // .iter()
+ // .map(Type::first_set)
+ // .cloned()
+ // .enumerate()
+ // .filter(|(_, fs)| !fs.is_empty())
+ // .map(|(idx, fs)| {
+ // let fs = fs.into_iter();
+ // // (name_parts[idx].clone(), quote! {#(Some(#fs))|*})
+ // todo!()
+ // })
+ // .unzip();
let all_firsts = tys
.iter()
.map(Type::first_set)
@@ -336,25 +338,27 @@ impl Backend for RustBackend {
quote_spanned! {span=>
impl Parse for #name {
fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
- match input.peek() {
- #(#firsts => Ok(Self::#first_alts(input.take()?)),)*
- _ => Ok(Self::#nullable(input.take()?))
- }
+ todo!()
+ // match input.peek() {
+ // #(#firsts => Ok(Self::#first_alts(input.take()?)),)*
+ // _ => Ok(Self::#nullable(input.take()?))
+ // }
}
}
}
} else {
- quote_spanned! {span=>
- impl Parse for #name {
- fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
- match input.peek() {
- #(#firsts => Ok(Self::#first_alts(input.take()?)),)*
- Some(c) => Err(TakeError::BadBranch(input.pos(), c, &[#(#all_firsts),*])),
- None => Err(TakeError::EndOfStream(input.pos()))
- }
- }
- }
- }
+ todo!()
+ // quote_spanned! {span=>
+ // impl Parse for #name {
+ // fn take<P: Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
+ // match input.peek() {
+ // #(#firsts => Ok(Self::#first_alts(input.take()?)),)*
+ // Some(c) => Err(TakeError::BadBranch(input.pos(), c, &[#(#all_firsts),*])),
+ // None => Err(TakeError::EndOfStream(input.pos()))
+ // }
+ // }
+ // }
+ // }
});
if ids.iter().all(|id| self.can_char.contains(id)) {