diff options
author | Greg Brown <gmb60@cam.ac.uk> | 2021-02-06 13:37:03 +0000 |
---|---|---|
committer | Greg Brown <gmb60@cam.ac.uk> | 2021-02-06 13:37:03 +0000 |
commit | 51ec48e03b2e02be02ccb9826b4638d312df67b4 (patch) | |
tree | d284d34eea27e086658170fd3cc56999cfa931c0 /chomp-macro | |
parent | bd6d4e22a3980ff937ab3e2df48c064bdc2918bd (diff) |
Add `chomp-bench` crate so benchmarks can use a build script.
Diffstat (limited to 'chomp-macro')
-rw-r--r-- | chomp-macro/Cargo.toml | 8 | ||||
-rw-r--r-- | chomp-macro/benches/json/main.rs | 285 | ||||
-rw-r--r-- | chomp-macro/benches/json/nibble.rs | 233 |
3 files changed, 0 insertions, 526 deletions
diff --git a/chomp-macro/Cargo.toml b/chomp-macro/Cargo.toml index 3bae0ef..ae6df66 100644 --- a/chomp-macro/Cargo.toml +++ b/chomp-macro/Cargo.toml @@ -11,13 +11,5 @@ chomp = {path = ".."} version = "1.0.58" features = ["parsing", "proc-macro"] -[dev-dependencies] -chewed = {path = "../chewed"} -criterion = "0.3.3" - [lib] proc_macro = true - -[[bench]] -name = "json" -harness = false diff --git a/chomp-macro/benches/json/main.rs b/chomp-macro/benches/json/main.rs deleted file mode 100644 index f5587b3..0000000 --- a/chomp-macro/benches/json/main.rs +++ /dev/null @@ -1,285 +0,0 @@ -use std::{collections::HashMap, convert::TryInto, fmt, ops::RangeInclusive}; - -use chewed::{IterWrapper, Parse, Parser, TakeError}; -use criterion::{ - criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration, - Throughput, -}; - -mod nibble; - -fn decode_pair(one: u16, other: u16) -> u32 { - // Ranges are confusingly backwards - const LOW_SURROGATE_RANGE: RangeInclusive<u16> = 0xDC00..=0xDFFF; - const HIGH_SURROGATE_RANGE: RangeInclusive<u16> = 0xD800..=0xDBFF; - - let (low, high) = if LOW_SURROGATE_RANGE.contains(&one) { - assert!(HIGH_SURROGATE_RANGE.contains(&other)); - (one, other) - } else { - assert!(LOW_SURROGATE_RANGE.contains(&other)); - assert!(HIGH_SURROGATE_RANGE.contains(&one)); - (other, one) - }; - - u32::from(high - 0xD800) * 0x400 + u32::from(low - 0xDC00) + 0x10000 -} - -#[derive(Debug)] -enum Value { - Null, - Bool(bool), - Number(f64), - String(String), - Array(Vec<Value>), - Object(HashMap<String, Value>), -} - -impl fmt::Display for Value { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fn write_str(s: &str, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "\"")?; - for c in s.chars() { - match c { - '\x20'..='\x21' | '\x23'..='\x5B' | '\x5D'..='\u{10FFFF}' => write!(f, "{}", c), - '\x22' => write!(f, r#"\""#), - '\x5C' => write!(f, r#"\\"#), - // '\x2F' => write!(f, r#"\/"#), - '\x08' => write!(f, r#"\b"#), - '\x0C' => write!(f, r#"\f"#), - '\x0A' => write!(f, r#"\n"#), - '\x0D' => write!(f, r#"\r"#), - '\x09' => write!(f, r#"\t"#), - _ => { - let codepoint = u32::from(c) - 0x10000; - let high: u16 = (codepoint / 0x400 + 0xD800).try_into().unwrap(); - let low: u16 = (codepoint % 0x400 + 0xDC00).try_into().unwrap(); - write!(f, r#"\u{:04X}\u{:04X}"#, high, low) - } - }?; - } - write!(f, "\"") - } - - match self { - Self::Null => write!(f, "null"), - Self::Bool(b) => write!(f, "{}", b), - Self::Number(n) => write!(f, "{}", n), - Self::String(s) => write_str(s, f), - Self::Array(a) => { - write!(f, "[")?; - let mut iter = a.iter(); - if let Some(last) = iter.next_back() { - for val in iter { - write!(f, "{}, ", val)?; - } - - write!(f, "{}", last)?; - } - write!(f, "]") - } - Self::Object(o) => { - '{'.fmt(f)?; - let mut iter = o.iter(); - if let Some((last_key, last_val)) = iter.next() { - for (key, val) in iter { - write_str(key, f)?; - write!(f, " : {}, ", val)?; - } - - write_str(last_key, f)?; - write!(f, " : {}", last_val)?; - } - '}'.fmt(f) - } - } - } -} - -impl Parse for Value { - fn take<P: chewed::Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> { - const WS: &[char] = &[' ', '\t', '\n', '\r']; - const FIRST: &[char] = &[ - 't', 'f', 'n', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '"', '[', '{', - ]; - const FIRST_WS: &[char] = &[ - 't', 'f', 'n', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '"', '[', '{', - ' ', '\t', '\n', '\r', - ]; - - fn skip_ws<P: chewed::Parser + ?Sized>(input: &mut P) { - input.skip_while(|c| WS.contains(&c)) - } - - fn parse_u16<P: chewed::Parser + ?Sized>(input: &mut P) -> Result<u16, TakeError> { - const HEX: &[char] = &[ - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', - 'A', 'B', 'C', 'D', 'E', 'F', - ]; - let mut chars = ['\0'; 4]; - input.take_chars_from(HEX, &mut chars)?; - let mut out = 0; - for c in &chars { - match c { - '0' => out *= 16, - '1' => out *= 16 + 1, - '2' => out *= 16 + 2, - '3' => out *= 16 + 3, - '4' => out *= 16 + 4, - '5' => out *= 16 + 5, - '6' => out *= 16 + 6, - '7' => out *= 16 + 7, - '8' => out *= 16 + 8, - '9' => out *= 16 + 9, - 'a' | 'A' => out *= 16 + 10, - 'b' | 'B' => out *= 16 + 11, - 'c' | 'C' => out *= 16 + 12, - 'd' | 'D' => out *= 16 + 13, - 'e' | 'E' => out *= 16 + 14, - 'f' | 'F' => out *= 16 + 15, - _ => unreachable!(), - }; - } - Ok(out) - } - - fn parse_str<P: chewed::Parser + ?Sized>(input: &mut P) -> Result<String, TakeError> { - input.consume_str("\"")?; - let mut s = String::new(); - loop { - match input - .next() - .ok_or_else(|| TakeError::EndOfStream(input.pos()))? - { - '"' => return Ok(s), - c @ '\x20'..='\x21' | c @ '\x23'..='\x5B' | c @ '\x5D'..='\u{10FFFF}' => { - s.push(c) - } - '\\' => { - match input - .next() - .ok_or_else(|| TakeError::EndOfStream(input.pos()))? - { - '\"' => s.push('\x22'), - '\\' => s.push('\x5C'), - '/' => s.push('\x2F'), - 'b' => s.push('\x08'), - 'f' => s.push('\x0C'), - 'n' => s.push('\x0A'), - 'r' => s.push('\x0D'), - 't' => s.push('\x09'), - 'u' => { - let v = parse_u16(input)?; - let codepoint = if (0xD800..=0xDFFF).contains(&v) { - input.consume_str(r#"\u"#)?; - let other = parse_u16(input)?; - decode_pair(v, other) - } else { - u32::from(v) - }; - s.push(codepoint.try_into().unwrap()) - } - c => return Err(TakeError::BadBranch(input.pos(), c, todo!())), - } - } - c => return Err(TakeError::BadBranch(input.pos(), c, todo!())), - } - } - } - - skip_ws(input); - - let res = match input - .peek() - .ok_or_else(|| TakeError::EndOfStream(input.pos()))? - { - 'n' => input.consume_str("null").map(|_| Value::Null), - 't' => input.consume_str("true").map(|_| Value::Bool(true)), - 'f' => input.consume_str("false").map(|_| Value::Bool(false)), - '0'..='9' | '-' => { - let mut s = String::new(); - s.push(input.next().unwrap()); - while input.peek().map_or(false, |c| { - matches!(c, '0'..='9' | '+' | '-' | '.' | 'e' | 'E') - }) { - s.push(input.next().unwrap()); - } - s.parse().map(Value::Number).map_err(|_| todo!()) - } - '"' => parse_str(input).map(Value::String), - '[' => { - const ARRAY_TAIL: &[char] = &[',', ']']; - input.consume_str("[")?; - let a = input - .iter_strict(Self::take, ',', ']', ARRAY_TAIL, FIRST_WS) - .collect::<Result<_, _>>() - .map(Value::Array)?; - input.consume_str("]")?; - Ok(a) - } - '{' => { - const OBJECT_TAIL: &[char] = &[',', '}']; - input.consume_str("{")?; - let o = input - .iter_strict( - |p| { - skip_ws(p); - let key = parse_str(p)?; - skip_ws(p); - p.consume_str(":")?; - p.take().map(|val| (key, val)) - }, - ',', - '}', - OBJECT_TAIL, - &['"', ' ', '\t', '\n', '\r'], - ) - .collect::<Result<_, _>>() - .map(Value::Object)?; - input.consume_str("}")?; - Ok(o) - } - c => Err(TakeError::BadBranch(input.pos(), c, FIRST)), - }?; - - skip_ws(input); - Ok(res) - } -} - -const INPUTS: &[&str] = &[ - r#"true"#, - r#"[true, false]"#, - r#"{"first" : null, "second" : 123}"#, - r#"{"first": [ true, "Hello there" ], "second": [123, -12.4e-7]}"#, - r#"{"first": [ true, "Hello there" ], "second": [123, -12.4e-7], "third": {"left": "Random text", "right": ["\ud83c\udf24\ufe0f"]}}"#, -]; - -fn parse_chewed(input: &str) -> Value { - IterWrapper::new(input.chars()) - .parse::<nibble::Ast>() - .unwrap() - .into() -} - -fn parse_handwritten(input: &str) -> Value { - IterWrapper::new(input.chars()).parse().unwrap() -} - -fn bench_parse(c: &mut Criterion) { - let plot_config = PlotConfiguration::default().summary_scale(AxisScale::Logarithmic); - let mut group = c.benchmark_group("JSON"); - group.plot_config(plot_config); - for (i, input) in INPUTS.iter().enumerate() { - group.throughput(Throughput::Bytes(input.len() as u64)); - group.bench_with_input(BenchmarkId::new("Chewed", i), *input, |b, i| { - b.iter(|| parse_chewed(i)) - }); - group.bench_with_input(BenchmarkId::new("Handwritten", i), *input, |b, i| { - b.iter(|| parse_handwritten(i)) - }); - } -} - -criterion_group!(benches, bench_parse); -criterion_main!(benches); diff --git a/chomp-macro/benches/json/nibble.rs b/chomp-macro/benches/json/nibble.rs deleted file mode 100644 index 511c54a..0000000 --- a/chomp-macro/benches/json/nibble.rs +++ /dev/null @@ -1,233 +0,0 @@ -use super::{decode_pair, Value}; - -use std::{collections::HashMap, convert::TryInto, ops::RangeInclusive}; - -use chomp_macro::nibble; - -// Note: this is only an ASCII subset. Need to add character sets. -nibble! { - let opt(x) = _ : None | x : Some; - let plus(x) = [rec](x . opt(rec)); - let star(x) = [rec](opt(x . rec)); - let sep(x, p) = [rec](x . opt(p . rec)); - - let ws_char = " " | "\t" | "\n" | "\r"; - let ws = star(ws_char); - let digit_1_9 = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"; - let digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"; - - let unsigned_number = (("0" | digit_1_9 . star(digit)) : Int) - . (opt("." . plus(digit)) : Frac) - . (opt(("e" | "E") . opt("+" | "-") . plus(digit)) : Exp); - let number = unsigned_number : Positive | "-" . unsigned_number : Negative; - - let hex = - "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | - "a" | "b" | "c" | "d" | "e" | "f" | - "A" | "B" | "C" | "D" | "E" | "F" ; - let char = - (" " | "!" | "#" | "$" | "%" | "&" | "'" | - "(" | ")" | "*" | "+" | "," | "-" | "." | "/" | - "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | - "8" | "9" | ":" | ";" | "<" | "=" | ">" | "?" | - "@" | "A" | "B" | "C" | "D" | "E" | "F" | "G" | - "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | - "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | - "X" | "Y" | "Z" | "[" | "]" | "^" | "_" | - "`" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | - "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | - "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | - "x" | "y" | "z" | "{" | "|" | "}" | "~") : Literal | - "\\" . ( - ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") : Ascii | - "u" . hex . hex . hex . hex : Unicode - ) : Escape; - - let string = "\"" . star(char) . "\""; - - let member(value) = (string : Key) . ws . ":" . ws . (value : Value) . ws; - let object(value) = "{" . ws . opt(sep(member(value), "," . ws)) . "}"; - - let array(value) = "[" . ws . opt(sep(value . ws, "," . ws)) . "]"; - - let value = [value]( - "true" : True | - "false" : False | - "null" : Null | - number : Number | - string : String | - object(value) : Object | - array(value) : Array - ); - - match [rec](ws_char . rec | value . ws); -} - -impl From<Ast> for Value { - fn from(mut ast: Ast) -> Self { - loop { - match ast.0 { - Alt184::Branch1(cat) => ast = *cat.rec1, - Alt184::Branch2(cat) => return cat.value1.into(), - } - } - } -} - -impl From<Value1> for Value { - fn from(value: Value1) -> Self { - match value.0 { - Alt182::Null1(_) => Self::Null, - Alt182::True1(_) => Self::Bool(true), - Alt182::False1(_) => Self::Bool(false), - Alt182::Number1(n) => Self::Number(n.into()), - Alt182::String1(s) => Self::String(s.into()), - Alt182::Object1(o) => Self::Object(o.into()), - Alt182::Array1(a) => Self::Array(a.into()), - } - } -} - -impl From<Number1> for f64 { - fn from(number: Number1) -> Self { - number.to_string().parse().unwrap() - } -} - -impl From<String1> for String { - fn from(string: String1) -> Self { - string.star1.into_iter().collect() - } -} - -impl Iterator for Star2 { - type Item = char; - - fn next(&mut self) -> Option<Self::Item> { - fn next(star: &mut Star2) -> Option<Char1> { - match std::mem::replace(star, Star2(Opt9::None1(Epsilon))).0 { - Opt9::None1(_) => None, - Opt9::Some1(s) => { - *star = *s.rec1; - Some(s.char1) - } - } - } - - fn decode(u: Unicode1) -> u16 { - let chars: [char; 4] = [u.hex1.into(), u.hex2.into(), u.hex3.into(), u.hex4.into()]; - u16::from_str_radix(&chars.iter().collect::<String>(), 16).unwrap() - } - - const SURROGATE_PAIR_RANGE: RangeInclusive<u16> = 0xD800..=0xDFFF; - - match next(self) { - None => None, - Some(Char1::Literal1(l)) => Some(l.into()), - Some(Char1::Escape1(e)) => match e.1 { - Alt148::Ascii1(Ascii1::Branch1(_)) => Some('\"'), - Alt148::Ascii1(Ascii1::Branch2(_)) => Some('\\'), - Alt148::Ascii1(Ascii1::Branch3(_)) => Some('/'), - Alt148::Ascii1(Ascii1::Branch4(_)) => Some('\x08'), - Alt148::Ascii1(Ascii1::Branch5(_)) => Some('\x0c'), - Alt148::Ascii1(Ascii1::Branch6(_)) => Some('\n'), - Alt148::Ascii1(Ascii1::Branch7(_)) => Some('\r'), - Alt148::Ascii1(Ascii1::Branch8(_)) => Some('\t'), - Alt148::Unicode1(u) => { - let codepoint = decode(u); - - if SURROGATE_PAIR_RANGE.contains(&codepoint) { - let other = if let Some(Char1::Escape1(e)) = next(self) { - if let Alt148::Unicode1(u) = e.1 { - decode(u) - } else { - panic!() - } - } else { - panic!() - }; - - assert!(SURROGATE_PAIR_RANGE.contains(&other)); - - Some(decode_pair(codepoint, other).try_into().unwrap()) - } else { - Some(u32::from(codepoint).try_into().unwrap()) - } - } - }, - } - } -} - -impl From<Object1> for HashMap<String, Value> { - fn from(object: Object1) -> Self { - match object.opt1 { - Opt12::None1(_) => HashMap::new(), - Opt12::Sep1(s) => s - .into_iter() - .map(|m| (m.string1.into(), (*m.value1).into())) - .collect(), - } - } -} - -impl IntoIterator for Sep1 { - type Item = Member1; - - type IntoIter = Sep1Iter; - - fn into_iter(self) -> Self::IntoIter { - Sep1Iter(Some(self)) - } -} - -pub struct Sep1Iter(Option<Sep1>); - -impl Iterator for Sep1Iter { - type Item = Member1; - - fn next(&mut self) -> Option<Self::Item> { - let inner = self.0.take()?.0; - let res = inner.member1; - self.0 = match inner.opt1 { - Opt11::None1(_) => None, - Opt11::Some1(s) => Some(*s.rec1), - }; - Some(res) - } -} - -impl From<Array1> for Vec<Value> { - fn from(array: Array1) -> Self { - match array.opt1 { - Opt14::None1(_) => Vec::new(), - Opt14::Sep1(s) => s.into_iter().map(|x| (*x.value1).into()).collect(), - } - } -} - -impl IntoIterator for Sep2 { - type Item = X1; - - type IntoIter = Sep2Iter; - - fn into_iter(self) -> Self::IntoIter { - Sep2Iter(Some(self)) - } -} - -pub struct Sep2Iter(Option<Sep2>); - -impl Iterator for Sep2Iter { - type Item = X1; - - fn next(&mut self) -> Option<Self::Item> { - let inner = self.0.take()?.0; - let res = inner.x1; - self.0 = match inner.opt1 { - Opt13::None1(_) => None, - Opt13::Some1(s) => Some(*s.rec1), - }; - Some(res) - } -} |