summaryrefslogtreecommitdiff
path: root/src/chomp
diff options
context:
space:
mode:
authorGreg Brown <gmb60@cam.ac.uk>2021-03-01 13:36:55 +0000
committerGreg Brown <gmb60@cam.ac.uk>2021-03-01 13:36:55 +0000
commit7ba3f96d49489aec9ff4a536c2df416287e49aca (patch)
tree02e5334a179b2103063d91ed8d1ef8ce658427b8 /src/chomp
parent7934aa9af22e8fa3c33a45bc08e732a73c0cabf5 (diff)
Initial work on charsetscharset
Diffstat (limited to 'src/chomp')
-rw-r--r--src/chomp/set.rs165
1 files changed, 146 insertions, 19 deletions
diff --git a/src/chomp/set.rs b/src/chomp/set.rs
index 0661ab6..8f97e8a 100644
--- a/src/chomp/set.rs
+++ b/src/chomp/set.rs
@@ -1,8 +1,31 @@
-use std::collections::BTreeSet;
+use std::{collections::BTreeSet, convert::TryInto};
+
+// From 1.50 std::core::iter::range lines 425--450
+fn step_char_forward(c : char) -> Option<char> {
+ let start = u32::from(c);
+ let mut res = start.checked_add(1)?;
+
+ if start < 0xD800 && 0xD800 <= res {
+ res = start.checked_add(0x800)?;
+ }
+
+ res.try_into().ok()
+}
+
+fn step_char_backward(c : char) -> Option<char> {
+ let start = u32::from(c);
+ let mut res = start.checked_sub(1)?;
+
+ if start >= 0xE000 && 0xE000 > res {
+ res = start.checked_sub(0x800)?;
+ }
+
+ res.try_into().ok()
+}
#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)]
pub struct FirstSet {
- inner: BTreeSet<char>,
+ inner: Vec<(char, char)>,
}
impl FirstSet {
@@ -11,32 +34,108 @@ impl FirstSet {
}
pub fn of_str(s: &str) -> Self {
- let mut inner = BTreeSet::new();
- s.chars().next().map(|c| inner.insert(c));
-
- Self { inner }
+ Self {
+ inner: s.chars().next().into_iter().map(|c| (c, c)).collect(),
+ }
}
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
- pub fn union(mut self, mut other: Self) -> Self {
- self.inner.append(&mut other.inner);
- self
+ pub fn union(self, mut other: Self) -> Self {
+ let mut out = Vec::new();
+ let mut iters = [self.into_iter().peekable(), other.into_iter().peekable()];
+
+ 'outer: loop {
+ // Look at the next two ranges, and find the minimum next character
+ let (min, mut max) = match (iters[0].peek(), iters[1].peek()) {
+ // Both inputs exhausted
+ (None, None) => break,
+ // Add remainder to output
+ (None, Some(_)) => {
+ out.extend(iters[1]);
+ break
+ }
+ (Some(_), None) => {
+ out.extend(iters[0]);
+ break
+ }
+ // The interesting case
+ (Some((left, left_max)), Some((right, right_max))) => {
+ if left > right {
+ iters.swap(0, 1);
+ (*right, *right_max)
+ } else {
+ (*left, *left_max)
+ }
+ }
+ };
+
+ max = match step_char_forward(max) {
+ Some(c) => c,
+ None => {
+ // assert!(max == char::MAX)
+ // This swallows all other ranges
+ out.push((min, max));
+ break;
+ }
+ };
+
+ // There are a few cases:
+ // - No overlap:
+ // ----
+ // ----
+ // - Extend:
+ // ----
+ // ----
+ // - Swallow:
+ // ---------
+ // ----
+ // We loop until there is no overlap.
+ // For extend, we step the left and swap.
+ // For swallow, we step the right only
+ while let Some((other_min, other_max)) = iters[1].peek() {
+ if *other_min > max {
+ // No overlap
+ break;
+ }
+
+ // max is one past range, other_max is in range, so we use >=
+ if *other_max >= max {
+ // Extend
+ iters[0].next();
+ iters.swap(0, 1);
+ max = match step_char_forward(*other_max) {
+ Some(c) => c,
+ None => { out.push((min, max)); break 'outer }
+ };
+ } else {
+ // Swallow
+ iters[1].next();
+ }
+ }
+
+ let end = step_char_backward(max).expect("char + 1 - 1 != char");
+ out.push((min, end));
+ iters[0].next();
+ }
+
+ Self {inner: out}
}
pub fn intersect(&self, other: &Self) -> Self {
- Self {
- inner: self.inner.intersection(&other.inner).copied().collect(),
- }
+ todo!()
+ // Self {
+ // inner: self.inner.intersection(&other.inner).copied().collect(),
+ // }
}
}
impl IntoIterator for FirstSet {
- type Item = char;
+ type Item = (char, char);
- type IntoIter = <BTreeSet<char> as IntoIterator>::IntoIter;
+ type IntoIter = <Vec<(char, char)> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.inner.into_iter()
@@ -58,8 +157,9 @@ impl FlastSet {
}
pub fn union_first(mut self, mut other: FirstSet) -> Self {
- self.inner.append(&mut other.inner);
- self
+ // self.inner.append(&mut other.inner);
+ // self
+ todo!()
}
pub fn union(mut self, mut other: Self) -> Self {
@@ -68,9 +168,10 @@ impl FlastSet {
}
pub fn intersect_first(&self, other: &FirstSet) -> Self {
- Self {
- inner: self.inner.intersection(&other.inner).copied().collect(),
- }
+ // Self {
+ // inner: self.inner.intersection(&other.inner).copied().collect(),
+ // }
+ todo!()
}
pub fn intersect(&self, other: &Self) -> Self {
@@ -89,3 +190,29 @@ impl IntoIterator for FlastSet {
self.inner.into_iter()
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_first_union_none() {
+ let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]};
+ let right = FirstSet {inner: vec![]};
+ assert_eq!(left.union(right), left)
+ }
+
+ #[test]
+ fn test_first_union_shallow() {
+ let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]};
+ let right = FirstSet {inner: vec![('b', 'y')]};
+ assert_eq!(left.union(right), left)
+ }
+
+ #[test]
+ fn test_first_union_extend() {
+ let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]};
+ let right = FirstSet {inner: vec![('[', '`')]};
+ assert_eq!(left.union(right), FirstSet {inner: vec![('A', 'z')]})
+ }
+}