From 43463df9899d00f0f86c3b655b84a5d5ce75402d Mon Sep 17 00:00:00 2001 From: Chloe Brown Date: Sat, 25 Mar 2023 16:52:15 +0000 Subject: Extract parsing and evaluation to new modules. --- src/CC/Term/Parse.idr | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 src/CC/Term/Parse.idr (limited to 'src/CC/Term/Parse.idr') diff --git a/src/CC/Term/Parse.idr b/src/CC/Term/Parse.idr new file mode 100644 index 0000000..cef4f0e --- /dev/null +++ b/src/CC/Term/Parse.idr @@ -0,0 +1,146 @@ +module CC.Term.Parse + +import CC.Name +import CC.Term +import CC.Term.Raw + +import Data.List + +import Text.Lexer +import Text.Parser + +-- Tokens ---------------------------------------------------------------------- + +data CCTokenKind + = TIgnore + | TIdent + -- Keywords + | TLet + -- Symbols + | TLambda + | TPOpen + | TPClose + | TComma + | TFatArrow + | TDef + | TSemi + +%name CCTokenKind k + +Eq CCTokenKind where + TIgnore == TIgnore = True + TIdent == TIdent = True + TLet == TLet = True + TLambda == TLambda = True + TPOpen == TPOpen = True + TPClose == TPClose = True + TComma == TComma = True + TFatArrow == TFatArrow = True + TDef == TDef = True + TSemi == TSemi = True + _ == _ = False + +TokenKind CCTokenKind where + TokType TIdent = String + TokType _ = () + + tokValue TIgnore s = () + tokValue TIdent s = s + tokValue TLet s = () + tokValue TLambda s = () + tokValue TPOpen s = () + tokValue TPClose s = () + tokValue TComma s = () + tokValue TFatArrow s = () + tokValue TDef s = () + tokValue TSemi s = () + +export +CCToken : Type +CCToken = Token CCTokenKind + +-- Lexer ----------------------------------------------------------------------- + +ignored : List Lexer +ignored = [spaces, lineComment (exact "--"), blockComment (exact "{-") (exact "-}")] + +identifier : Lexer +identifier = alpha <+> many alphaNum + +keywords : List (String, CCTokenKind) +keywords = [("let", TLet)] + +tokenMap : TokenMap CCToken +tokenMap = + toTokenMap (map (, TIgnore) ignored) ++ + [(identifier, \s => + case lookup s keywords of + Just kind => Tok kind s + Nothing => Tok TIdent s) + ] ++ toTokenMap [ + (exact "\\", TLambda), + (exact "\x03BB", TLambda), + (exact "(", TPOpen), + (exact ")", TPClose), + (exact ",", TComma), + (exact "=>", TFatArrow), + (exact "=", TDef), + (exact ";", TSemi) + ] + +-- Parser ---------------------------------------------------------------------- + +%tcinline +parens : {b : Bool} -> Grammar state CCToken b ty -> Grammar state CCToken True ty +parens p = match TPOpen >> commit >> p >>= (\t => match TPClose *> commit *> pure t) + +parseTerm : Grammar state CCToken True RawTerm +parseLambda : Grammar state CCToken True RawTerm +parseLet : Grammar state CCToken True RawTerm +parseSpine : Grammar state CCToken True RawTerm +parseAtom : Grammar state CCToken True RawTerm + +parseTerm = parseLambda <|> parseLet <|> parseSpine + +parseLambda = do + match TLambda + commit + names <- sepBy1 (match TComma >> commit) (MkName <$> (bounds $ match TIdent)) + commit + match TFatArrow + commit + t <- parseTerm + pure $ foldr1By RAbs (flip RAbs t) names + +parseLet = do + match TLet + commit + n <- bounds $ match TIdent + commit + match TDef + commit + t <- parseTerm + match TSemi + commit + u <- parseTerm + pure $ RLet (MkName n) t u + +parseSpine = foldl1 RApp <$> some parseAtom + +parseAtom = + (RVar <$> MkName <$> (bounds $ match TIdent <* commit)) <|> + (parens parseTerm) + +parseSrc : Grammar state CCToken True RawTerm +parseSrc = parseTerm <* eof + +export +parseString : String -> Either (List1 (ParsingError CCToken)) RawTerm +parseString str = do + let (tokens, _, _, "") = lex tokenMap str + | (_, line, col, rest) => + let bounds = MkBounds { startCol = col, startLine = line, endCol = col, endLine = line } in + Left (Error "failed to lex input" (Just bounds) ::: []) + (val, []) <- parse parseSrc $ filter (\tok => tok.val.kind /= TIgnore) tokens + | (_, fst :: rest) => Left (Error "unexpected token" (Just fst.bounds) ::: []) + pure val -- cgit v1.2.3