use std::{collections::BTreeSet, convert::TryInto}; // From 1.50 std::core::iter::range lines 425--450 fn step_char_forward(c : char) -> Option { let start = u32::from(c); let mut res = start.checked_add(1)?; if start < 0xD800 && 0xD800 <= res { res = start.checked_add(0x800)?; } res.try_into().ok() } fn step_char_backward(c : char) -> Option { let start = u32::from(c); let mut res = start.checked_sub(1)?; if start >= 0xE000 && 0xE000 > res { res = start.checked_sub(0x800)?; } res.try_into().ok() } #[derive(Clone, Debug, Default, Eq, PartialEq, Hash)] pub struct FirstSet { inner: Vec<(char, char)>, } impl FirstSet { pub fn new() -> Self { Self::default() } pub fn of_str(s: &str) -> Self { Self { inner: s.chars().next().into_iter().map(|c| (c, c)).collect(), } } pub fn is_empty(&self) -> bool { self.inner.is_empty() } pub fn union(self, mut other: Self) -> Self { let mut out = Vec::new(); let mut iters = [self.into_iter().peekable(), other.into_iter().peekable()]; 'outer: loop { // Look at the next two ranges, and find the minimum next character let (min, mut max) = match (iters[0].peek(), iters[1].peek()) { // Both inputs exhausted (None, None) => break, // Add remainder to output (None, Some(_)) => { out.extend(iters[1]); break } (Some(_), None) => { out.extend(iters[0]); break } // The interesting case (Some((left, left_max)), Some((right, right_max))) => { if left > right { iters.swap(0, 1); (*right, *right_max) } else { (*left, *left_max) } } }; max = match step_char_forward(max) { Some(c) => c, None => { // assert!(max == char::MAX) // This swallows all other ranges out.push((min, max)); break; } }; // There are a few cases: // - No overlap: // ---- // ---- // - Extend: // ---- // ---- // - Swallow: // --------- // ---- // We loop until there is no overlap. // For extend, we step the left and swap. // For swallow, we step the right only while let Some((other_min, other_max)) = iters[1].peek() { if *other_min > max { // No overlap break; } // max is one past range, other_max is in range, so we use >= if *other_max >= max { // Extend iters[0].next(); iters.swap(0, 1); max = match step_char_forward(*other_max) { Some(c) => c, None => { out.push((min, max)); break 'outer } }; } else { // Swallow iters[1].next(); } } let end = step_char_backward(max).expect("char + 1 - 1 != char"); out.push((min, end)); iters[0].next(); } Self {inner: out} } pub fn intersect(&self, other: &Self) -> Self { todo!() // Self { // inner: self.inner.intersection(&other.inner).copied().collect(), // } } } impl IntoIterator for FirstSet { type Item = (char, char); type IntoIter = as IntoIterator>::IntoIter; fn into_iter(self) -> Self::IntoIter { self.inner.into_iter() } } #[derive(Clone, Debug, Default, Eq, PartialEq, Hash)] pub struct FlastSet { inner: BTreeSet, } impl FlastSet { pub fn new() -> Self { Self::default() } pub fn is_empty(&self) -> bool { self.inner.is_empty() } pub fn union_first(mut self, mut other: FirstSet) -> Self { // self.inner.append(&mut other.inner); // self todo!() } pub fn union(mut self, mut other: Self) -> Self { self.inner.append(&mut other.inner); self } pub fn intersect_first(&self, other: &FirstSet) -> Self { // Self { // inner: self.inner.intersection(&other.inner).copied().collect(), // } todo!() } pub fn intersect(&self, other: &Self) -> Self { Self { inner: self.inner.intersection(&other.inner).copied().collect(), } } } impl IntoIterator for FlastSet { type Item = char; type IntoIter = as IntoIterator>::IntoIter; fn into_iter(self) -> Self::IntoIter { self.inner.into_iter() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_first_union_none() { let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]}; let right = FirstSet {inner: vec![]}; assert_eq!(left.union(right), left) } #[test] fn test_first_union_shallow() { let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]}; let right = FirstSet {inner: vec![('b', 'y')]}; assert_eq!(left.union(right), left) } #[test] fn test_first_union_extend() { let left = FirstSet { inner: vec![('A', 'Z'), ('a', 'z')]}; let right = FirstSet {inner: vec![('[', '`')]}; assert_eq!(left.union(right), FirstSet {inner: vec![('A', 'z')]}) } }