summaryrefslogtreecommitdiff
path: root/src/CC/Term/Parse.idr
blob: 5c8956a1acfedf6def81d18736b91d2899fdf633 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
module CC.Term.Parse

import CC.Name
import CC.Term
import CC.Term.Raw

import Data.Bool
import Data.List

import Text.Lexer
import Text.Parser

%hide Text.Parser.match

-- Utilities -------------------------------------------------------------------

expandBindList : Functor f => (f (Maybe Name), RawTerm) -> f RawBinding
expandBindList (xs, t) = map (`Of` t) xs

-- Tokens ----------------------------------------------------------------------

data CCTokenKind
  = TIgnore
  -- Variables
  | TIdent
  -- Let Syntax
  | TLet
  | TDef
  | TSemi
  -- Universes
  | TSet
  -- Abstraction
  | TThinArrow
  | TLambda
  | TComma
  | TFatArrow
  -- Symbols
  | TUnderscore
  | TColon
  | TPOpen
  | TPClose

%name CCTokenKind k

Eq CCTokenKind where
  TIgnore == TIgnore = True

  TIdent == TIdent = True

  TLet == TLet = True
  TDef == TDef = True
  TSemi == TSemi = True

  TSet == TSet = True

  TThinArrow == TThinArrow = True
  TLambda == TLambda = True
  TComma == TComma = True
  TFatArrow == TFatArrow = True

  TUnderscore == TUnderscore = True
  TColon == TColon = True
  TPOpen == TPOpen = True
  TPClose == TPClose = True

  _ == _ = False

TokenKind CCTokenKind where
  TokType TIdent = String
  TokType _ = ()

  tokValue TIgnore s = ()

  tokValue TIdent s = s

  tokValue TLet s = ()
  tokValue TDef s = ()
  tokValue TSemi s = ()

  tokValue TSet s = ()

  tokValue TThinArrow s = ()
  tokValue TLambda s = ()
  tokValue TComma s = ()
  tokValue TFatArrow s = ()

  tokValue TUnderscore s = ()
  tokValue TColon s = ()
  tokValue TPOpen s = ()
  tokValue TPClose s = ()

Interpolation CCTokenKind where
  interpolate TIgnore = "whitespace"

  interpolate TIdent = "identifier"

  interpolate TLet = #""let""#
  interpolate TDef = #""=""#
  interpolate TSemi = #"";""#

  interpolate TSet = #""Set""#

  interpolate TThinArrow = #""->""#
  interpolate TLambda = #""\""#
  interpolate TComma = #"",""#
  interpolate TFatArrow = #""=>""#

  interpolate TUnderscore = #""_""#
  interpolate TColon = #"":""#
  interpolate TPOpen = #""(""#
  interpolate TPClose = #"")""#

export
CCToken : Type
CCToken = Token CCTokenKind

-- Lexer -----------------------------------------------------------------------

ignored : List Lexer
ignored = [spaces, lineComment (exact "--"), blockComment (exact "{-") (exact "-}")]

identifier : Lexer
identifier = alpha <+> many alphaNum

keywords : List (String, CCTokenKind)
keywords = [("let", TLet), ("Set", TSet)]

tokenMap : TokenMap CCToken
tokenMap =
  -- Ignored
  toTokenMap (map (, TIgnore) ignored) ++
  -- Identifiers
  [(identifier, \s =>
    case lookup s keywords of
      Just kind => Tok kind s
      Nothing => Tok TIdent s)
  ] ++ toTokenMap [
    -- Abstraction
    (exact "->", TThinArrow),
    (exact "\\", TLambda),
    (exact "\x03BB", TLambda),
    (exact ",", TComma),
    (exact "=>", TFatArrow),
    -- Symbols
    (exact "_", TUnderscore),
    (exact ":", TColon),
    (exact "(", TPOpen),
    (exact ")", TPClose),
    -- Let syntax (has to be after fat arrow)
    (exact "=", TDef),
    (exact ";", TSemi)
  ]

-- Parser ----------------------------------------------------------------------

match : (kind : CCTokenKind) -> Grammar state CCToken True (TokType kind)
match k = terminal "expected \{k}" $
  \t => if t.kind == k then Just $ tokValue k t.text else Nothing

%tcinline
parens : {b : Bool} -> Grammar state CCToken b ty -> Grammar state CCToken True ty
parens p = match TPOpen >> p >>= (\t => match TPClose *> pure t)

parseIdent : Grammar state CCToken True Name
parseIdent = MkName <$> (bounds $ match TIdent)

parseBinder : Grammar state CCToken True (Maybe Name)
parseBinder = either Just (const Nothing) <$> (parseIdent <||> match TUnderscore)

%tcinline
withBounds : Grammar state CCToken True RawTerm -> Grammar state CCToken True RawTerm
withBounds p = Bounded <$> bounds p

parseBindList : Grammar state CCToken True (List1 (Maybe Name))
parseBindList = sepBy1 (match TComma) parseBinder

parseAnnot : (commit : Bool) -> Grammar state CCToken True RawTerm
parseMaybeAnnot : (commit : Bool) -> Grammar state CCToken False (Maybe RawTerm)
parseAnnotArgs : (commit : Bool) -> Grammar state CCToken True (List1 (Maybe Name), RawTerm)
parseOptArrow : (commit : Bool) -> Grammar state CCToken False (Maybe RawTerm)

parseTerm : Grammar state CCToken True RawTerm
parsePi : Grammar state CCToken True RawTerm
parseLambda : Grammar state CCToken True RawTerm
parseLet : Grammar state CCToken True RawTerm
parseChain : Grammar state CCToken True RawTerm
parseSpine : Grammar state CCToken True RawTerm
parseAtom : Grammar state CCToken True RawTerm

parseAnnot False = match TColon >> parseTerm
parseAnnot True = match TColon >> commit >> parseTerm

parseMaybeAnnot b = Just <$> parseAnnot b <|> pure Nothing

parseAnnotArgs b = [| MkPair parseBindList (parseAnnot b) |]

parseOptArrow False = Just <$> (match TThinArrow >> parseTerm) <|> pure Nothing
parseOptArrow True = Just <$> (match TThinArrow >> commit >> parseTerm) <|> pure Nothing

parseTerm = withBounds (parsePi <|> parseLambda <|> parseLet <|> parseChain)

parsePi = do
  dom <- parens (parseAnnotArgs False)
  match TThinArrow
  cod <- parseTerm
  pure $ foldr1By Pi (flip Pi cod) $ expandBindList dom

parseLambda = do
  match TLambda
  commit
  names <- parseBindList
  commit
  match TFatArrow
  commit
  t <- parseTerm
  pure $ foldr1By Abs (flip Abs t) names

parseLet = do
  match TLet
  commit
  n <- parseIdent
  commit
  ty <- parseMaybeAnnot True
  match TDef
  commit
  t <- parseTerm
  match TSemi
  commit
  u <- parseTerm
  pure $ Let n ty t u

parseChain = do
  spine <- parseSpine
  maybeCod <- parseOptArrow True
  pure $ case maybeCod of
    Just cod => Pi (Nothing `Of` spine) cod
    Nothing => spine

parseSpine = foldl1 App <$> some parseAtom

parseAtom =
  withBounds (Var <$> parseIdent <* commit) <|>
  withBounds (Set <$ match TSet <* commit) <|>
  parens parseTerm

parseSrc : Grammar state CCToken True RawTerm
parseSrc = parseTerm <* eof

export
parseString : String -> Either (List1 (ParsingError CCToken)) RawTerm
parseString str = do
  let (tokens, _, _, "") = lex tokenMap str
    | (_, line, col, rest) =>
      let bounds = MkBounds { startCol = col, startLine = line, endCol = col, endLine = line } in
      Left (Error "failed to lex input" (Just bounds) ::: [])
  (val, []) <- parse parseSrc $ filter (\tok => tok.val.kind /= TIgnore) tokens
    | (_, fst :: rest) => Left (Error "unexpected token" (Just fst.bounds) ::: [])
  pure val