module CC.Term.Parse import CC.Name import CC.Term import CC.Term.Raw import Data.Bool import Data.List import Text.Lexer import Text.Parser %hide Text.Parser.match -- Utilities ------------------------------------------------------------------- expandBindList : Functor f => (f (Maybe Name), RawTerm) -> f RawBinding expandBindList (xs, t) = map (`Of` t) xs -- Tokens ---------------------------------------------------------------------- data CCTokenKind = TIgnore -- Variables | TIdent -- Let Syntax | TLet | TDef | TSemi -- Universes | TSet -- Abstraction | TThinArrow | TLambda | TComma | TFatArrow -- Symbols | TUnderscore | TColon | TPOpen | TPClose %name CCTokenKind k Eq CCTokenKind where TIgnore == TIgnore = True TIdent == TIdent = True TLet == TLet = True TDef == TDef = True TSemi == TSemi = True TSet == TSet = True TThinArrow == TThinArrow = True TLambda == TLambda = True TComma == TComma = True TFatArrow == TFatArrow = True TUnderscore == TUnderscore = True TColon == TColon = True TPOpen == TPOpen = True TPClose == TPClose = True _ == _ = False TokenKind CCTokenKind where TokType TIdent = String TokType _ = () tokValue TIgnore s = () tokValue TIdent s = s tokValue TLet s = () tokValue TDef s = () tokValue TSemi s = () tokValue TSet s = () tokValue TThinArrow s = () tokValue TLambda s = () tokValue TComma s = () tokValue TFatArrow s = () tokValue TUnderscore s = () tokValue TColon s = () tokValue TPOpen s = () tokValue TPClose s = () Interpolation CCTokenKind where interpolate TIgnore = "whitespace" interpolate TIdent = "identifier" interpolate TLet = #""let""# interpolate TDef = #""=""# interpolate TSemi = #"";""# interpolate TSet = #""Set""# interpolate TThinArrow = #""->""# interpolate TLambda = #""\""# interpolate TComma = #"",""# interpolate TFatArrow = #""=>""# interpolate TUnderscore = #""_""# interpolate TColon = #"":""# interpolate TPOpen = #""(""# interpolate TPClose = #"")""# export CCToken : Type CCToken = Token CCTokenKind -- Lexer ----------------------------------------------------------------------- ignored : List Lexer ignored = [spaces, lineComment (exact "--"), blockComment (exact "{-") (exact "-}")] identifier : Lexer identifier = alpha <+> many alphaNum keywords : List (String, CCTokenKind) keywords = [("let", TLet), ("Set", TSet)] tokenMap : TokenMap CCToken tokenMap = -- Ignored toTokenMap (map (, TIgnore) ignored) ++ -- Identifiers [(identifier, \s => case lookup s keywords of Just kind => Tok kind s Nothing => Tok TIdent s) ] ++ toTokenMap [ -- Abstraction (exact "->", TThinArrow), (exact "\\", TLambda), (exact "\x03BB", TLambda), (exact ",", TComma), (exact "=>", TFatArrow), -- Symbols (exact "_", TUnderscore), (exact ":", TColon), (exact "(", TPOpen), (exact ")", TPClose), -- Let syntax (has to be after fat arrow) (exact "=", TDef), (exact ";", TSemi) ] -- Parser ---------------------------------------------------------------------- match : (kind : CCTokenKind) -> Grammar state CCToken True (TokType kind) match k = terminal "expected \{k}" $ \t => if t.kind == k then Just $ tokValue k t.text else Nothing %tcinline parens : {b : Bool} -> Grammar state CCToken b ty -> Grammar state CCToken True ty parens p = match TPOpen >> p >>= (\t => match TPClose *> pure t) parseIdent : Grammar state CCToken True Name parseIdent = MkName <$> (bounds $ match TIdent) parseBinder : Grammar state CCToken True (Maybe Name) parseBinder = either Just (const Nothing) <$> (parseIdent <||> match TUnderscore) %tcinline withBounds : Grammar state CCToken True RawTerm -> Grammar state CCToken True RawTerm withBounds p = Bounded <$> bounds p parseBindList : Grammar state CCToken True (List1 (Maybe Name)) parseBindList = sepBy1 (match TComma) parseBinder parseAnnot : (commit : Bool) -> Grammar state CCToken True RawTerm parseMaybeAnnot : (commit : Bool) -> Grammar state CCToken False (Maybe RawTerm) parseAnnotArgs : (commit : Bool) -> Grammar state CCToken True (List1 (Maybe Name), RawTerm) parseOptArrow : (commit : Bool) -> Grammar state CCToken False (Maybe RawTerm) parseTerm : Grammar state CCToken True RawTerm parsePi : Grammar state CCToken True RawTerm parseLambda : Grammar state CCToken True RawTerm parseLet : Grammar state CCToken True RawTerm parseChain : Grammar state CCToken True RawTerm parseSpine : Grammar state CCToken True RawTerm parseAtom : Grammar state CCToken True RawTerm parseAnnot False = match TColon >> parseTerm parseAnnot True = match TColon >> commit >> parseTerm parseMaybeAnnot b = Just <$> parseAnnot b <|> pure Nothing parseAnnotArgs b = [| MkPair parseBindList (parseAnnot b) |] parseOptArrow False = Just <$> (match TThinArrow >> parseTerm) <|> pure Nothing parseOptArrow True = Just <$> (match TThinArrow >> commit >> parseTerm) <|> pure Nothing parseTerm = withBounds (parsePi <|> parseLambda <|> parseLet <|> parseChain) parsePi = do dom <- parens (parseAnnotArgs False) match TThinArrow cod <- parseTerm pure $ foldr1By Pi (flip Pi cod) $ expandBindList dom parseLambda = do match TLambda commit names <- parseBindList commit match TFatArrow commit t <- parseTerm pure $ foldr1By Abs (flip Abs t) names parseLet = do match TLet commit n <- parseIdent commit ty <- parseMaybeAnnot True match TDef commit t <- parseTerm match TSemi commit u <- parseTerm pure $ Let n ty t u parseChain = do spine <- parseSpine maybeCod <- parseOptArrow True pure $ case maybeCod of Just cod => Pi (Nothing `Of` spine) cod Nothing => spine parseSpine = foldl1 App <$> some parseAtom parseAtom = withBounds (Var <$> parseIdent <* commit) <|> withBounds (Set <$ match TSet <* commit) <|> parens parseTerm parseSrc : Grammar state CCToken True RawTerm parseSrc = parseTerm <* eof export parseString : String -> Either (List1 (ParsingError CCToken)) RawTerm parseString str = do let (tokens, _, _, "") = lex tokenMap str | (_, line, col, rest) => let bounds = MkBounds { startCol = col, startLine = line, endCol = col, endLine = line } in Left (Error "failed to lex input" (Just bounds) ::: []) (val, []) <- parse parseSrc $ filter (\tok => tok.val.kind /= TIgnore) tokens | (_, fst :: rest) => Left (Error "unexpected token" (Just fst.bounds) ::: []) pure val