summaryrefslogtreecommitdiff
path: root/chomp-macro/benches/json
diff options
context:
space:
mode:
Diffstat (limited to 'chomp-macro/benches/json')
-rw-r--r--chomp-macro/benches/json/main.rs285
-rw-r--r--chomp-macro/benches/json/nibble.rs233
2 files changed, 0 insertions, 518 deletions
diff --git a/chomp-macro/benches/json/main.rs b/chomp-macro/benches/json/main.rs
deleted file mode 100644
index f5587b3..0000000
--- a/chomp-macro/benches/json/main.rs
+++ /dev/null
@@ -1,285 +0,0 @@
-use std::{collections::HashMap, convert::TryInto, fmt, ops::RangeInclusive};
-
-use chewed::{IterWrapper, Parse, Parser, TakeError};
-use criterion::{
- criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration,
- Throughput,
-};
-
-mod nibble;
-
-fn decode_pair(one: u16, other: u16) -> u32 {
- // Ranges are confusingly backwards
- const LOW_SURROGATE_RANGE: RangeInclusive<u16> = 0xDC00..=0xDFFF;
- const HIGH_SURROGATE_RANGE: RangeInclusive<u16> = 0xD800..=0xDBFF;
-
- let (low, high) = if LOW_SURROGATE_RANGE.contains(&one) {
- assert!(HIGH_SURROGATE_RANGE.contains(&other));
- (one, other)
- } else {
- assert!(LOW_SURROGATE_RANGE.contains(&other));
- assert!(HIGH_SURROGATE_RANGE.contains(&one));
- (other, one)
- };
-
- u32::from(high - 0xD800) * 0x400 + u32::from(low - 0xDC00) + 0x10000
-}
-
-#[derive(Debug)]
-enum Value {
- Null,
- Bool(bool),
- Number(f64),
- String(String),
- Array(Vec<Value>),
- Object(HashMap<String, Value>),
-}
-
-impl fmt::Display for Value {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- fn write_str(s: &str, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(f, "\"")?;
- for c in s.chars() {
- match c {
- '\x20'..='\x21' | '\x23'..='\x5B' | '\x5D'..='\u{10FFFF}' => write!(f, "{}", c),
- '\x22' => write!(f, r#"\""#),
- '\x5C' => write!(f, r#"\\"#),
- // '\x2F' => write!(f, r#"\/"#),
- '\x08' => write!(f, r#"\b"#),
- '\x0C' => write!(f, r#"\f"#),
- '\x0A' => write!(f, r#"\n"#),
- '\x0D' => write!(f, r#"\r"#),
- '\x09' => write!(f, r#"\t"#),
- _ => {
- let codepoint = u32::from(c) - 0x10000;
- let high: u16 = (codepoint / 0x400 + 0xD800).try_into().unwrap();
- let low: u16 = (codepoint % 0x400 + 0xDC00).try_into().unwrap();
- write!(f, r#"\u{:04X}\u{:04X}"#, high, low)
- }
- }?;
- }
- write!(f, "\"")
- }
-
- match self {
- Self::Null => write!(f, "null"),
- Self::Bool(b) => write!(f, "{}", b),
- Self::Number(n) => write!(f, "{}", n),
- Self::String(s) => write_str(s, f),
- Self::Array(a) => {
- write!(f, "[")?;
- let mut iter = a.iter();
- if let Some(last) = iter.next_back() {
- for val in iter {
- write!(f, "{}, ", val)?;
- }
-
- write!(f, "{}", last)?;
- }
- write!(f, "]")
- }
- Self::Object(o) => {
- '{'.fmt(f)?;
- let mut iter = o.iter();
- if let Some((last_key, last_val)) = iter.next() {
- for (key, val) in iter {
- write_str(key, f)?;
- write!(f, " : {}, ", val)?;
- }
-
- write_str(last_key, f)?;
- write!(f, " : {}", last_val)?;
- }
- '}'.fmt(f)
- }
- }
- }
-}
-
-impl Parse for Value {
- fn take<P: chewed::Parser + ?Sized>(input: &mut P) -> Result<Self, TakeError> {
- const WS: &[char] = &[' ', '\t', '\n', '\r'];
- const FIRST: &[char] = &[
- 't', 'f', 'n', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '"', '[', '{',
- ];
- const FIRST_WS: &[char] = &[
- 't', 'f', 'n', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '"', '[', '{',
- ' ', '\t', '\n', '\r',
- ];
-
- fn skip_ws<P: chewed::Parser + ?Sized>(input: &mut P) {
- input.skip_while(|c| WS.contains(&c))
- }
-
- fn parse_u16<P: chewed::Parser + ?Sized>(input: &mut P) -> Result<u16, TakeError> {
- const HEX: &[char] = &[
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
- 'A', 'B', 'C', 'D', 'E', 'F',
- ];
- let mut chars = ['\0'; 4];
- input.take_chars_from(HEX, &mut chars)?;
- let mut out = 0;
- for c in &chars {
- match c {
- '0' => out *= 16,
- '1' => out *= 16 + 1,
- '2' => out *= 16 + 2,
- '3' => out *= 16 + 3,
- '4' => out *= 16 + 4,
- '5' => out *= 16 + 5,
- '6' => out *= 16 + 6,
- '7' => out *= 16 + 7,
- '8' => out *= 16 + 8,
- '9' => out *= 16 + 9,
- 'a' | 'A' => out *= 16 + 10,
- 'b' | 'B' => out *= 16 + 11,
- 'c' | 'C' => out *= 16 + 12,
- 'd' | 'D' => out *= 16 + 13,
- 'e' | 'E' => out *= 16 + 14,
- 'f' | 'F' => out *= 16 + 15,
- _ => unreachable!(),
- };
- }
- Ok(out)
- }
-
- fn parse_str<P: chewed::Parser + ?Sized>(input: &mut P) -> Result<String, TakeError> {
- input.consume_str("\"")?;
- let mut s = String::new();
- loop {
- match input
- .next()
- .ok_or_else(|| TakeError::EndOfStream(input.pos()))?
- {
- '"' => return Ok(s),
- c @ '\x20'..='\x21' | c @ '\x23'..='\x5B' | c @ '\x5D'..='\u{10FFFF}' => {
- s.push(c)
- }
- '\\' => {
- match input
- .next()
- .ok_or_else(|| TakeError::EndOfStream(input.pos()))?
- {
- '\"' => s.push('\x22'),
- '\\' => s.push('\x5C'),
- '/' => s.push('\x2F'),
- 'b' => s.push('\x08'),
- 'f' => s.push('\x0C'),
- 'n' => s.push('\x0A'),
- 'r' => s.push('\x0D'),
- 't' => s.push('\x09'),
- 'u' => {
- let v = parse_u16(input)?;
- let codepoint = if (0xD800..=0xDFFF).contains(&v) {
- input.consume_str(r#"\u"#)?;
- let other = parse_u16(input)?;
- decode_pair(v, other)
- } else {
- u32::from(v)
- };
- s.push(codepoint.try_into().unwrap())
- }
- c => return Err(TakeError::BadBranch(input.pos(), c, todo!())),
- }
- }
- c => return Err(TakeError::BadBranch(input.pos(), c, todo!())),
- }
- }
- }
-
- skip_ws(input);
-
- let res = match input
- .peek()
- .ok_or_else(|| TakeError::EndOfStream(input.pos()))?
- {
- 'n' => input.consume_str("null").map(|_| Value::Null),
- 't' => input.consume_str("true").map(|_| Value::Bool(true)),
- 'f' => input.consume_str("false").map(|_| Value::Bool(false)),
- '0'..='9' | '-' => {
- let mut s = String::new();
- s.push(input.next().unwrap());
- while input.peek().map_or(false, |c| {
- matches!(c, '0'..='9' | '+' | '-' | '.' | 'e' | 'E')
- }) {
- s.push(input.next().unwrap());
- }
- s.parse().map(Value::Number).map_err(|_| todo!())
- }
- '"' => parse_str(input).map(Value::String),
- '[' => {
- const ARRAY_TAIL: &[char] = &[',', ']'];
- input.consume_str("[")?;
- let a = input
- .iter_strict(Self::take, ',', ']', ARRAY_TAIL, FIRST_WS)
- .collect::<Result<_, _>>()
- .map(Value::Array)?;
- input.consume_str("]")?;
- Ok(a)
- }
- '{' => {
- const OBJECT_TAIL: &[char] = &[',', '}'];
- input.consume_str("{")?;
- let o = input
- .iter_strict(
- |p| {
- skip_ws(p);
- let key = parse_str(p)?;
- skip_ws(p);
- p.consume_str(":")?;
- p.take().map(|val| (key, val))
- },
- ',',
- '}',
- OBJECT_TAIL,
- &['"', ' ', '\t', '\n', '\r'],
- )
- .collect::<Result<_, _>>()
- .map(Value::Object)?;
- input.consume_str("}")?;
- Ok(o)
- }
- c => Err(TakeError::BadBranch(input.pos(), c, FIRST)),
- }?;
-
- skip_ws(input);
- Ok(res)
- }
-}
-
-const INPUTS: &[&str] = &[
- r#"true"#,
- r#"[true, false]"#,
- r#"{"first" : null, "second" : 123}"#,
- r#"{"first": [ true, "Hello there" ], "second": [123, -12.4e-7]}"#,
- r#"{"first": [ true, "Hello there" ], "second": [123, -12.4e-7], "third": {"left": "Random text", "right": ["\ud83c\udf24\ufe0f"]}}"#,
-];
-
-fn parse_chewed(input: &str) -> Value {
- IterWrapper::new(input.chars())
- .parse::<nibble::Ast>()
- .unwrap()
- .into()
-}
-
-fn parse_handwritten(input: &str) -> Value {
- IterWrapper::new(input.chars()).parse().unwrap()
-}
-
-fn bench_parse(c: &mut Criterion) {
- let plot_config = PlotConfiguration::default().summary_scale(AxisScale::Logarithmic);
- let mut group = c.benchmark_group("JSON");
- group.plot_config(plot_config);
- for (i, input) in INPUTS.iter().enumerate() {
- group.throughput(Throughput::Bytes(input.len() as u64));
- group.bench_with_input(BenchmarkId::new("Chewed", i), *input, |b, i| {
- b.iter(|| parse_chewed(i))
- });
- group.bench_with_input(BenchmarkId::new("Handwritten", i), *input, |b, i| {
- b.iter(|| parse_handwritten(i))
- });
- }
-}
-
-criterion_group!(benches, bench_parse);
-criterion_main!(benches);
diff --git a/chomp-macro/benches/json/nibble.rs b/chomp-macro/benches/json/nibble.rs
deleted file mode 100644
index 511c54a..0000000
--- a/chomp-macro/benches/json/nibble.rs
+++ /dev/null
@@ -1,233 +0,0 @@
-use super::{decode_pair, Value};
-
-use std::{collections::HashMap, convert::TryInto, ops::RangeInclusive};
-
-use chomp_macro::nibble;
-
-// Note: this is only an ASCII subset. Need to add character sets.
-nibble! {
- let opt(x) = _ : None | x : Some;
- let plus(x) = [rec](x . opt(rec));
- let star(x) = [rec](opt(x . rec));
- let sep(x, p) = [rec](x . opt(p . rec));
-
- let ws_char = " " | "\t" | "\n" | "\r";
- let ws = star(ws_char);
- let digit_1_9 = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9";
- let digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9";
-
- let unsigned_number = (("0" | digit_1_9 . star(digit)) : Int)
- . (opt("." . plus(digit)) : Frac)
- . (opt(("e" | "E") . opt("+" | "-") . plus(digit)) : Exp);
- let number = unsigned_number : Positive | "-" . unsigned_number : Negative;
-
- let hex =
- "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
- "a" | "b" | "c" | "d" | "e" | "f" |
- "A" | "B" | "C" | "D" | "E" | "F" ;
- let char =
- (" " | "!" | "#" | "$" | "%" | "&" | "'" |
- "(" | ")" | "*" | "+" | "," | "-" | "." | "/" |
- "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
- "8" | "9" | ":" | ";" | "<" | "=" | ">" | "?" |
- "@" | "A" | "B" | "C" | "D" | "E" | "F" | "G" |
- "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" |
- "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" |
- "X" | "Y" | "Z" | "[" | "]" | "^" | "_" |
- "`" | "a" | "b" | "c" | "d" | "e" | "f" | "g" |
- "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" |
- "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" |
- "x" | "y" | "z" | "{" | "|" | "}" | "~") : Literal |
- "\\" . (
- ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") : Ascii |
- "u" . hex . hex . hex . hex : Unicode
- ) : Escape;
-
- let string = "\"" . star(char) . "\"";
-
- let member(value) = (string : Key) . ws . ":" . ws . (value : Value) . ws;
- let object(value) = "{" . ws . opt(sep(member(value), "," . ws)) . "}";
-
- let array(value) = "[" . ws . opt(sep(value . ws, "," . ws)) . "]";
-
- let value = [value](
- "true" : True |
- "false" : False |
- "null" : Null |
- number : Number |
- string : String |
- object(value) : Object |
- array(value) : Array
- );
-
- match [rec](ws_char . rec | value . ws);
-}
-
-impl From<Ast> for Value {
- fn from(mut ast: Ast) -> Self {
- loop {
- match ast.0 {
- Alt184::Branch1(cat) => ast = *cat.rec1,
- Alt184::Branch2(cat) => return cat.value1.into(),
- }
- }
- }
-}
-
-impl From<Value1> for Value {
- fn from(value: Value1) -> Self {
- match value.0 {
- Alt182::Null1(_) => Self::Null,
- Alt182::True1(_) => Self::Bool(true),
- Alt182::False1(_) => Self::Bool(false),
- Alt182::Number1(n) => Self::Number(n.into()),
- Alt182::String1(s) => Self::String(s.into()),
- Alt182::Object1(o) => Self::Object(o.into()),
- Alt182::Array1(a) => Self::Array(a.into()),
- }
- }
-}
-
-impl From<Number1> for f64 {
- fn from(number: Number1) -> Self {
- number.to_string().parse().unwrap()
- }
-}
-
-impl From<String1> for String {
- fn from(string: String1) -> Self {
- string.star1.into_iter().collect()
- }
-}
-
-impl Iterator for Star2 {
- type Item = char;
-
- fn next(&mut self) -> Option<Self::Item> {
- fn next(star: &mut Star2) -> Option<Char1> {
- match std::mem::replace(star, Star2(Opt9::None1(Epsilon))).0 {
- Opt9::None1(_) => None,
- Opt9::Some1(s) => {
- *star = *s.rec1;
- Some(s.char1)
- }
- }
- }
-
- fn decode(u: Unicode1) -> u16 {
- let chars: [char; 4] = [u.hex1.into(), u.hex2.into(), u.hex3.into(), u.hex4.into()];
- u16::from_str_radix(&chars.iter().collect::<String>(), 16).unwrap()
- }
-
- const SURROGATE_PAIR_RANGE: RangeInclusive<u16> = 0xD800..=0xDFFF;
-
- match next(self) {
- None => None,
- Some(Char1::Literal1(l)) => Some(l.into()),
- Some(Char1::Escape1(e)) => match e.1 {
- Alt148::Ascii1(Ascii1::Branch1(_)) => Some('\"'),
- Alt148::Ascii1(Ascii1::Branch2(_)) => Some('\\'),
- Alt148::Ascii1(Ascii1::Branch3(_)) => Some('/'),
- Alt148::Ascii1(Ascii1::Branch4(_)) => Some('\x08'),
- Alt148::Ascii1(Ascii1::Branch5(_)) => Some('\x0c'),
- Alt148::Ascii1(Ascii1::Branch6(_)) => Some('\n'),
- Alt148::Ascii1(Ascii1::Branch7(_)) => Some('\r'),
- Alt148::Ascii1(Ascii1::Branch8(_)) => Some('\t'),
- Alt148::Unicode1(u) => {
- let codepoint = decode(u);
-
- if SURROGATE_PAIR_RANGE.contains(&codepoint) {
- let other = if let Some(Char1::Escape1(e)) = next(self) {
- if let Alt148::Unicode1(u) = e.1 {
- decode(u)
- } else {
- panic!()
- }
- } else {
- panic!()
- };
-
- assert!(SURROGATE_PAIR_RANGE.contains(&other));
-
- Some(decode_pair(codepoint, other).try_into().unwrap())
- } else {
- Some(u32::from(codepoint).try_into().unwrap())
- }
- }
- },
- }
- }
-}
-
-impl From<Object1> for HashMap<String, Value> {
- fn from(object: Object1) -> Self {
- match object.opt1 {
- Opt12::None1(_) => HashMap::new(),
- Opt12::Sep1(s) => s
- .into_iter()
- .map(|m| (m.string1.into(), (*m.value1).into()))
- .collect(),
- }
- }
-}
-
-impl IntoIterator for Sep1 {
- type Item = Member1;
-
- type IntoIter = Sep1Iter;
-
- fn into_iter(self) -> Self::IntoIter {
- Sep1Iter(Some(self))
- }
-}
-
-pub struct Sep1Iter(Option<Sep1>);
-
-impl Iterator for Sep1Iter {
- type Item = Member1;
-
- fn next(&mut self) -> Option<Self::Item> {
- let inner = self.0.take()?.0;
- let res = inner.member1;
- self.0 = match inner.opt1 {
- Opt11::None1(_) => None,
- Opt11::Some1(s) => Some(*s.rec1),
- };
- Some(res)
- }
-}
-
-impl From<Array1> for Vec<Value> {
- fn from(array: Array1) -> Self {
- match array.opt1 {
- Opt14::None1(_) => Vec::new(),
- Opt14::Sep1(s) => s.into_iter().map(|x| (*x.value1).into()).collect(),
- }
- }
-}
-
-impl IntoIterator for Sep2 {
- type Item = X1;
-
- type IntoIter = Sep2Iter;
-
- fn into_iter(self) -> Self::IntoIter {
- Sep2Iter(Some(self))
- }
-}
-
-pub struct Sep2Iter(Option<Sep2>);
-
-impl Iterator for Sep2Iter {
- type Item = X1;
-
- fn next(&mut self) -> Option<Self::Item> {
- let inner = self.0.take()?.0;
- let res = inner.x1;
- self.0 = match inner.opt1 {
- Opt13::None1(_) => None,
- Opt13::Some1(s) => Some(*s.rec1),
- };
- Some(res)
- }
-}