summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Brown <gmb60@cam.ac.uk>2021-02-06 14:52:06 +0000
committerGreg Brown <gmb60@cam.ac.uk>2021-02-06 14:52:06 +0000
commita1faa3688f37f0498b9569304bc13ec0f729db07 (patch)
tree48d8d0f3c4075a94fd9a1b8d1ced377eb676cb5c
parentc24c0b0955116928c75db404f431ea845bec0602 (diff)
Add LALRPOP parser for JSON.
-rw-r--r--chomp-bench/src/json/lalr.lalrpop66
-rw-r--r--chomp-bench/src/json/mod.rs2
2 files changed, 68 insertions, 0 deletions
diff --git a/chomp-bench/src/json/lalr.lalrpop b/chomp-bench/src/json/lalr.lalrpop
new file mode 100644
index 0000000..99f86ab
--- /dev/null
+++ b/chomp-bench/src/json/lalr.lalrpop
@@ -0,0 +1,66 @@
+use super::{decode_pair, Value};
+
+use std::{collections::HashMap, convert::TryInto, str::FromStr};
+
+grammar;
+
+pub RawValue: Value = {
+ "null" => Value::Null,
+ "true" => Value::Bool(true),
+ "false" => Value::Bool(false),
+ Num => Value::Number(<>),
+ Str => Value::String(<>),
+ "[" <Comma<RawValue>> "]" => Value::Array(<>),
+ "{" <Comma<(<Str> ":" <RawValue>)>> "}" => Value::Object(<>.into_iter().collect()),
+};
+
+pub Num: f64 = r"-?(0|[1-9][0-9]*)(\.[0-9]*)([eE][-+]?[0-9]+)" => f64::from_str(<>).unwrap();
+
+pub Str: String = r#""([^\\"]|\\.)*""# => {
+ let mut out = String::new();
+ let mut iter = <>.chars();
+ while let Some(c) = iter.next() {
+ let c = match c {
+ c @ '\x20'..='\x21' | c @ '\x23'..='\x5B' | c @ '\x5D'..='\u{10FFFF}' => c,
+ '\\' => match iter.next().unwrap() {
+ '\"' => '\x22',
+ '\\' => '\x5C',
+ '/' => '\x2F',
+ 'b' => '\x08',
+ 'f' => '\x0C',
+ 'n' => '\x0A',
+ 'r' => '\x0D',
+ 't' => '\x09',
+ 'u' => {
+ let v = iter.by_ref().take(4).collect::<String>();
+ assert_eq!(v.len(), 4);
+ let v = u16::from_str_radix(&v, 16).unwrap();
+ let codepoint = if (0xD800..=0xDFFF).contains(&v) {
+ let skipped = iter.by_ref().take(2).collect::<String>();
+ assert_eq!(skipped, "\\u");
+ let o = iter.by_ref().take(4).collect::<String>();
+ assert_eq!(o.len(), 4);
+ let o = u16::from_str_radix(&o, 16).unwrap();
+ decode_pair(v, o)
+ } else {
+ u32::from(v)
+ };
+ codepoint.try_into().unwrap()
+ }
+ _ => panic!(),
+ }
+ _ => panic!(),
+ };
+ out.push(c);
+ }
+ out
+};
+
+Comma<T>: Vec<T> = {
+ T? => <>.into_iter().collect(),
+ <v : (<T> ",")+> <e : T> => {
+ let mut v = v;
+ v.push(e);
+ v
+ }
+}
diff --git a/chomp-bench/src/json/mod.rs b/chomp-bench/src/json/mod.rs
index 442200c..2279010 100644
--- a/chomp-bench/src/json/mod.rs
+++ b/chomp-bench/src/json/mod.rs
@@ -1,8 +1,10 @@
use std::{collections::HashMap, convert::TryInto, fmt, ops::RangeInclusive};
use chewed::{Parse, TakeError};
+use lalrpop_util::lalrpop_mod;
pub mod nibble;
+lalrpop_mod!(pub lalr, "/json/lalr.rs");
fn decode_pair(one: u16, other: u16) -> u32 {
// Ranges are confusingly backwards