diff options
author | Greg Brown <gmb60@cam.ac.uk> | 2021-02-06 14:52:06 +0000 |
---|---|---|
committer | Greg Brown <gmb60@cam.ac.uk> | 2021-02-06 14:52:06 +0000 |
commit | a1faa3688f37f0498b9569304bc13ec0f729db07 (patch) | |
tree | 48d8d0f3c4075a94fd9a1b8d1ced377eb676cb5c | |
parent | c24c0b0955116928c75db404f431ea845bec0602 (diff) |
Add LALRPOP parser for JSON.
-rw-r--r-- | chomp-bench/src/json/lalr.lalrpop | 66 | ||||
-rw-r--r-- | chomp-bench/src/json/mod.rs | 2 |
2 files changed, 68 insertions, 0 deletions
diff --git a/chomp-bench/src/json/lalr.lalrpop b/chomp-bench/src/json/lalr.lalrpop new file mode 100644 index 0000000..99f86ab --- /dev/null +++ b/chomp-bench/src/json/lalr.lalrpop @@ -0,0 +1,66 @@ +use super::{decode_pair, Value}; + +use std::{collections::HashMap, convert::TryInto, str::FromStr}; + +grammar; + +pub RawValue: Value = { + "null" => Value::Null, + "true" => Value::Bool(true), + "false" => Value::Bool(false), + Num => Value::Number(<>), + Str => Value::String(<>), + "[" <Comma<RawValue>> "]" => Value::Array(<>), + "{" <Comma<(<Str> ":" <RawValue>)>> "}" => Value::Object(<>.into_iter().collect()), +}; + +pub Num: f64 = r"-?(0|[1-9][0-9]*)(\.[0-9]*)([eE][-+]?[0-9]+)" => f64::from_str(<>).unwrap(); + +pub Str: String = r#""([^\\"]|\\.)*""# => { + let mut out = String::new(); + let mut iter = <>.chars(); + while let Some(c) = iter.next() { + let c = match c { + c @ '\x20'..='\x21' | c @ '\x23'..='\x5B' | c @ '\x5D'..='\u{10FFFF}' => c, + '\\' => match iter.next().unwrap() { + '\"' => '\x22', + '\\' => '\x5C', + '/' => '\x2F', + 'b' => '\x08', + 'f' => '\x0C', + 'n' => '\x0A', + 'r' => '\x0D', + 't' => '\x09', + 'u' => { + let v = iter.by_ref().take(4).collect::<String>(); + assert_eq!(v.len(), 4); + let v = u16::from_str_radix(&v, 16).unwrap(); + let codepoint = if (0xD800..=0xDFFF).contains(&v) { + let skipped = iter.by_ref().take(2).collect::<String>(); + assert_eq!(skipped, "\\u"); + let o = iter.by_ref().take(4).collect::<String>(); + assert_eq!(o.len(), 4); + let o = u16::from_str_radix(&o, 16).unwrap(); + decode_pair(v, o) + } else { + u32::from(v) + }; + codepoint.try_into().unwrap() + } + _ => panic!(), + } + _ => panic!(), + }; + out.push(c); + } + out +}; + +Comma<T>: Vec<T> = { + T? => <>.into_iter().collect(), + <v : (<T> ",")+> <e : T> => { + let mut v = v; + v.push(e); + v + } +} diff --git a/chomp-bench/src/json/mod.rs b/chomp-bench/src/json/mod.rs index 442200c..2279010 100644 --- a/chomp-bench/src/json/mod.rs +++ b/chomp-bench/src/json/mod.rs @@ -1,8 +1,10 @@ use std::{collections::HashMap, convert::TryInto, fmt, ops::RangeInclusive}; use chewed::{Parse, TakeError}; +use lalrpop_util::lalrpop_mod; pub mod nibble; +lalrpop_mod!(pub lalr, "/json/lalr.rs"); fn decode_pair(one: u16, other: u16) -> u32 { // Ranges are confusingly backwards |