diff options
author | Greg Brown <greg.brown01@ed.ac.uk> | 2022-12-21 15:47:12 +0000 |
---|---|---|
committer | Greg Brown <greg.brown01@ed.ac.uk> | 2022-12-21 15:47:12 +0000 |
commit | 4bad0e68e4b984db004d75ab87ca5a181d223190 (patch) | |
tree | 95ac5fe8a30215c7c47cded8d017701b4095d02f /src/Obs/Parser.idr | |
parent | cb4ec1e7c1551c1d28f00ba11b062daa78e8c4e3 (diff) |
Improve parsing.
Diffstat (limited to 'src/Obs/Parser.idr')
-rw-r--r-- | src/Obs/Parser.idr | 319 |
1 files changed, 163 insertions, 156 deletions
diff --git a/src/Obs/Parser.idr b/src/Obs/Parser.idr index 95d8e96..0ccba86 100644 --- a/src/Obs/Parser.idr +++ b/src/Obs/Parser.idr @@ -1,6 +1,7 @@ module Obs.Parser import Data.String +import Data.Vect import Obs.Logging import Obs.Sort @@ -15,8 +16,8 @@ import Text.Parser %default total data ObsTokenKind - = OTNewline - | OTIgnore + = OTNewlines + | OTSpaces | OTIdent -- Keywords | OTProp @@ -49,8 +50,8 @@ data ObsTokenKind | OTColon Eq ObsTokenKind where - OTNewline == OTNewline = True - OTIgnore == OTIgnore = True + OTNewlines == OTNewlines = True + OTSpaces == OTSpaces = True OTIdent == OTIdent = True OTProp == OTProp = True OTSet == OTSet = True @@ -84,8 +85,8 @@ TokenKind ObsTokenKind where TokType OTNat = Nat TokType _ = () - tokValue OTNewline s = () - tokValue OTIgnore s = () + tokValue OTNewlines s = () + tokValue OTSpaces s = () tokValue OTIdent s = s tokValue OTProp s = () tokValue OTSet s = () @@ -116,9 +117,35 @@ TokenKind ObsTokenKind where ObsToken : Type ObsToken = Token ObsTokenKind -ignored : WithBounds ObsToken -> Bool -ignored (MkBounded (Tok OTIgnore _) _ _) = True -ignored _ = False +Show ObsToken where + show (Tok OTNewlines s) = "\\n" + show (Tok OTSpaces s) = " " + show (Tok OTIdent s) = s + show (Tok OTProp s) = "Prop" + show (Tok OTSet s) = "Set" + show (Tok OTNat s) = s + show (Tok OTFst s) = "fst" + show (Tok OTSnd s) = "snd" + show (Tok OTPoint s) = "tt" + show (Tok OTVoid s) = "Void" + show (Tok OTAbsurd s) = "absurd" + show (Tok OTRefl s) = "refl" + show (Tok OTTransp s) = "transp" + show (Tok OTCast s) = "cast" + show (Tok OTCastRefl s) = "castRefl" + show (Tok OTUnit s) = "()" + show (Tok OTPOpen s) = "(" + show (Tok OTPClose s) = ")" + show (Tok OTAOpen s) = "<" + show (Tok OTAClose s) = ">" + show (Tok OTBackslash s) = "\\" + show (Tok OTThinArrow s) = "->" + show (Tok OTFatArrow s) = "=>" + show (Tok OTProduct s) = "**" + show (Tok OTComma s) = "," + show (Tok OTTilde s) = "~" + show (Tok OTEqual s) = "=" + show (Tok OTColon s) = ":" identifier : Lexer identifier = alpha <+> many alphaNum @@ -139,7 +166,7 @@ keywords = ] obsTokenMap : TokenMap ObsToken -obsTokenMap = toTokenMap [(newline, OTNewline), (spaces, OTIgnore)] ++ +obsTokenMap = toTokenMap [(newlines, OTNewlines), (spaces, OTSpaces)] ++ [ (identifier, \s => case lookup s keywords of (Just kind) => Tok kind s @@ -160,175 +187,154 @@ obsTokenMap = toTokenMap [(newline, OTNewline), (spaces, OTIgnore)] ++ , (exact ":", OTColon) ] -newlines : Grammar state ObsToken False () -newlines = map (\_ => ()) $ many (match OTNewline) +op : Type -> Type -> Nat -> Type +op a b 0 = b +op a b (S n) = a -> op a b n + +uncurry : (n : _) -> op a b n -> Vect n a -> b +uncurry 0 f [] = f +uncurry (S n) f (x :: xs) = uncurry n (f x) xs + +headForms : List (ObsTokenKind, (n ** (Vect n (WithBounds Syntax) -> Syntax))) +headForms = + [ (OTFst, (1 ** uncurry 1 Fst)) + , (OTSnd, (1 ** uncurry 1 Snd)) + , (OTPoint, (0 ** uncurry 0 Point)) + , (OTVoid, (0 ** uncurry 0 Bottom)) + , (OTAbsurd, (2 ** uncurry 2 Absurd)) + , (OTRefl, (1 ** uncurry 1 Refl)) + , (OTTransp, (5 ** uncurry 5 Transp)) + , (OTCast, (3 ** uncurry 3 Cast)) + , (OTCastRefl, (1 ** uncurry 1 CastId)) + , (OTUnit, (0 ** uncurry 0 Top)) + ] + +declForms : List (ObsTokenKind, (WithBounds String -> op (WithBounds Syntax) Syntax 2)) +declForms = [(OTThinArrow, Pi), (OTProduct, Sigma)] -parens : {c : _} -> Grammar state ObsToken c a -> Grammar state ObsToken True a -parens g = - match OTPOpen *> - newlines *> - g <* - newlines <* - match OTPClose +count : (n : _) -> Grammar state tok True a -> Grammar state tok (isSucc n) (Vect n a) +count 0 p = pure [] +count (S n) p = [| p :: count n p |] ident : Grammar state ObsToken True (WithBounds String) ident = bounds $ match OTIdent -var : Grammar state ObsToken True Syntax -var = [| Var (ident <* commit) |] - -prop : Grammar state ObsToken True Syntax -prop = do - prop <- bounds $ match OTProp <* commit - pure (Sort prop.bounds Prop) - -set0 : Grammar state ObsToken True Syntax -set0 = do - set <- bounds $ match OTSet <* commit - pure (Sort set.bounds (Set 0)) - -top : Grammar state ObsToken True Syntax -top = do - unit <- bounds $ match OTUnit <* commit - pure (Top unit.bounds) - -point : Grammar state ObsToken True Syntax -point = do - point <- bounds $ match OTPoint <* commit - pure (Point point.bounds) - -bottom : Grammar state ObsToken True Syntax -bottom = do - void <- bounds $ match OTVoid <* commit - pure (Bottom void.bounds) - -atomicNoSet : Grammar state ObsToken True Syntax -atomicNoSet = (var <|> prop <|> top <|> point <|> bottom) <* commit - -atomic : Grammar state ObsToken True Syntax -atomic = (atomicNoSet <|> set0) <* commit - -set : Grammar state ObsToken True Syntax -set = do - i <- [| mergeBounds (bounds $ match OTSet) (bounds $ map Set $ option 0 $ match OTNat) |] <* commit - pure (Sort i.bounds i.val) - -lambda : Grammar state ObsToken True (Syntax -> Syntax) -lambda = do - name <- bounds $ match OTBackslash *> commit *> ident <* newlines <* match OTFatArrow <* newlines - pure (Lambda name.bounds name.val) - -projFst : Grammar state ObsToken True (Syntax -> Syntax) -projFst = do - fst <- bounds $ match OTFst <* commit - pure (Fst fst.bounds) - -projSnd : Grammar state ObsToken True (Syntax -> Syntax) -projSnd = do - snd <- bounds $ match OTSnd <* commit - pure (Snd snd.bounds) - -botElim : Grammar state ObsToken True (Syntax -> Syntax -> Syntax) -botElim = do - absurd <- bounds $ match OTAbsurd <* commit - pure (Absurd absurd.bounds) - -refl : Grammar state ObsToken True (Syntax -> Syntax) -refl = do - refl <- bounds $ match OTRefl <* commit - pure (Refl refl.bounds) - -transp : Grammar state ObsToken True (Syntax -> Syntax -> Syntax -> Syntax -> Syntax -> Syntax) -transp = do - transp <- bounds $ match OTTransp <* commit - pure (Transp transp.bounds) - -cast : Grammar state ObsToken True (Syntax -> Syntax -> Syntax -> Syntax) -cast = do - cast <- bounds $ match OTCast <* commit - pure (Cast cast.bounds) - -castRefl : Grammar state ObsToken True (Syntax -> Syntax) -castRefl = do - castRefl <- bounds $ match OTCastRefl <* commit - pure (CastId castRefl.bounds) - --- NOTE: these functions are all total. -partial -pi : WithBounds (WithBounds String, Syntax) -> Grammar state ObsToken True (Syntax -> Syntax) -partial -sigma : WithBounds (WithBounds String, Syntax) -> Grammar state ObsToken True (Syntax -> Syntax) -partial -pair : Grammar state ObsToken True Syntax -> Grammar state ObsToken True Syntax -> Grammar state ObsToken True Syntax +whitespace : Grammar state ObsToken True () +whitespace = map (\_ => ()) $ some (optional (match OTNewlines) *> match OTSpaces) + +parens : Lazy (Grammar state ObsToken True a) -> Grammar state ObsToken True a +parens p = match OTPOpen *> optional whitespace *> p <* optional whitespace <* match OTPClose + +var : Grammar state ObsToken True (WithBounds Syntax) +var = map (map Var) ident + +noArgSort : Grammar state ObsToken True (WithBounds Sort) +noArgSort = bounds $ + map (const Prop) (match OTProp <* commit) <|> + map (const (Set 0)) (match OTSet <* commit) + +sort : Grammar state ObsToken True (WithBounds Sort) +sort = bounds $ + map (const Prop) (match OTProp <* commit) <|> + map Set (match OTSet *> commit *> option 0 (match OTNat <* commit)) + +decl : Lazy (Grammar state ObsToken True a) -> Grammar state ObsToken True (WithBounds String, a) +decl p = [| MkPair (ident <* whitespace <* match OTColon <* whitespace) p |] + +pair : Lazy (Grammar state ObsToken True a) -> Grammar state ObsToken True (a, a) +pair p = + match OTAOpen *> commit *> + [| MkPair + (optional whitespace *> p <* optional whitespace <* match OTComma <* commit) + (optional whitespace *> p <* optional whitespace) |] + <* match OTAClose <* commit + partial -term : Grammar state ObsToken True Syntax +noSortsTerm : Grammar state ObsToken True (WithBounds Syntax) partial -precGteApp : Grammar state ObsToken True Syntax +term : Grammar state ObsToken True (WithBounds Syntax) partial -precGteEqual : Grammar state ObsToken True Syntax +head : Grammar state ObsToken True (WithBounds Syntax) partial -exp' : Grammar state ObsToken True Syntax +spine : Grammar state ObsToken True (WithBounds Syntax) partial -exp : Grammar state ObsToken True Syntax +equals : Grammar state ObsToken True (WithBounds Syntax) partial -decl : Grammar state ObsToken True (WithBounds String, Syntax) - -pi decl = do - _ <- match OTThinArrow <* commit <* newlines - Parser.Core.pure (uncurry (Pi decl.bounds) decl.val) - -sigma decl = do - _ <- match OTProduct <* commit <* newlines - Parser.Core.pure (uncurry (Sigma decl.bounds) decl.val) - -pair fst snd = do - pair <- bounds $ [| MkPair (match OTAOpen *> fst <* newlines <* match OTComma <* newlines) (snd <* newlines <* match OTAClose) |] - pure (uncurry (Pair pair.bounds) pair.val) - -term = atomic <|> pair exp exp <|> parens exp -precGteApp = atomicNoSet <|> set <|> pair exp exp <|> parens exp -precGteEqual = - map (\ty => let (t, t') = ty.val in maybe t (Equal ty.bounds t) t') $ - bounds [| MkPair precGteApp (optional (newlines *> match OTTilde *> commit *> newlines *> precGteApp)) |] -exp' = - precGteEqual <|> - (projFst <*> term) <|> - (projSnd <*> term) <|> - (botElim <*> term <*> term) <|> - (refl <*> term) <|> - (transp <*> term <*> term <*> term <*> term <*> term) <|> - (Parser.cast <*> term <*> term <*> term) <|> - (castRefl <*> term) +exp : Grammar state ObsToken True (WithBounds Syntax) + +noSortsTerm = + parens exp <|> + var <|> + bounds (map (uncurry Pair) (pair exp)) + +term = noSortsTerm <|> map (map Sort) noArgSort + +head = + noSortsTerm <|> + map (map Sort) sort <|> + bounds + (choiceMap + (\(hd, (n ** f)) => match hd *> commit *> [| f (count n (whitespace *> term)) |]) + headForms) + +spine = map (uncurry $ foldl (\t, u => joinBounds (map (\_ => map (\_ => App t u) u) t))) $ + [| MkPair head (many (whitespace *> term)) |] + +equals = map (\(t, u) => maybe t (\u => joinBounds (map (\_ => map (\_ => Equal t u) u) t)) u) $ + [| MkPair spine (optional (whitespace *> match OTTilde *> commit *> whitespace *> spine)) |] + exp = - (bounds (parens decl) <* many (match OTNewline) >>= (\decl => (pi decl <|> sigma decl) <*> exp)) <|> - (lambda <*> exp) <|> - (map (\(t, ts) => foldl1 App (t :: ts)) [| MkPair exp' (many term) |]) -decl = [| MkPair ident (match OTColon *> commit *> exp) |] + equals <|> + bounds + (map (\((var, a), (b, f)) => f var a b) $ + [| MkPair + (parens (decl exp) <* whitespace) + (choiceMap + (\(op, f) => match op *> commit *> whitespace *> [| MkPair exp (pure f) |]) + declForms) + |]) <|> + bounds + (map (uncurry Lambda) $ + match OTBackslash *> commit *> + [| MkPair (ident <* whitespace <* match OTFatArrow <* whitespace) exp |]) partial -def : Grammar state ObsToken True (WithBounds String, Syntax) -def = [| MkPair ident (match OTEqual *> exp) |] +def : Grammar state ObsToken True (WithBounds String, WithBounds Syntax) +def = [| MkPair (ident <* whitespace <* match OTEqual <* whitespace) exp |] partial fullDef : Grammar state ObsToken True Definition fullDef = seq - [| MkPair decl (match OTNewline *> def) |] + [| MkPair (decl exp) (optional whitespace *> match OTNewlines *> def) |] (\((name, ty), (name', tm)) => if name.val == name'.val - then pure (MkDefinition {bounds = name.bounds, name = name.val, ty, tm}) + then pure (MkDefinition {name = name, ty, tm}) else fatalLoc name.bounds "name mismatch for declaration and definition") partial file : Grammar state ObsToken False (List Definition) -file = many (match OTNewline) *> sepBy (some (match OTNewline)) fullDef <* many (match OTNewline) <* eof - -postprocess : List (WithBounds ObsToken) -> List (WithBounds ObsToken) -postprocess [] = [] -postprocess [tok] = if ignored tok then [] else [tok] -postprocess (tok :: tail@(tok' :: toks)) = case (tok.val.kind, tok'.val.kind) of - (OTPOpen, OTPClose) => map (\_ => Tok OTUnit "()") (mergeBounds tok tok') :: postprocess toks - _ => if ignored tok then postprocess tail else tok :: postprocess tail +file = + optional (whitespace *> match OTNewlines) *> + sepEndBy (optional whitespace *> match OTNewlines) fullDef <* + eof + +whitespaceIrrelevant : WithBounds ObsToken -> WithBounds ObsToken +whitespaceIrrelevant a = case (a.val.kind) of + OTSpaces => removeIrrelevance a + OTNewlines => removeIrrelevance a + _ => a + +-- Combines tokens for nicer parsing +mergeToks : List (WithBounds ObsToken) -> List (WithBounds ObsToken) +mergeToks (a :: tail@(b :: rest)) = case (a.val.kind, b.val.kind) of + (OTSpaces, OTNat) => mergeBounds a b :: mergeToks rest + (OTPOpen, OTPClose) => map (\_ => (Tok OTUnit "")) (mergeBounds a b) :: mergeToks rest + _ => a :: mergeToks tail +mergeToks toks = toks + +preprocess : List (WithBounds ObsToken) -> List (WithBounds ObsToken) +preprocess = mergeToks . map whitespaceIrrelevant castErr : Either (List1 (ParsingError tok)) a -> Logging ann a castErr (Right x) = pure x @@ -348,6 +354,7 @@ parse : String -> Logging ann (List Definition) parse str = do let (toks, _, _, "") = lex obsTokenMap str | (_, l, c, rem) => inScope "lex" $ fatal "failed to lex input" - (defs, []) <- inScope "parse" $ castErr $ parse file $ postprocess toks - | (_, (t :: _)) => inScope "parse" $ fatal "unparsed tokens" + let toks = preprocess toks + (defs, []) <- inScope "parse" $ castErr $ parse file $ toks + | (_, ts) => inScope "parse" $ fatal "unparsed tokens" pure defs |