Skip to content

Commit

Permalink
improve parser more
Browse files Browse the repository at this point in the history
  • Loading branch information
robertmuth committed Apr 26, 2024
1 parent 9def3e3 commit ebc7faf
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 63 deletions.
2 changes: 1 addition & 1 deletion FrontEnd/ConcteteSyntax/print_argv.concrete.cw
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import fmt
fun strlen(s ^u8) uint:
let! i uint = 0
-- pinc is adds an integer to a pointer it also has an options bound
while pinc(s, i)^!= 0:
while pinc(s, i)^ != 0:
set i += 1
return i

Expand Down
145 changes: 83 additions & 62 deletions FrontEnd/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ class TK_KIND(enum.Enum):

_OPERATORS_WITH_EXCL_SUFFIX = [
"&",
"^",
]

_OPERATORS_WITH_EQ_SUFFIX = [
Expand Down Expand Up @@ -343,7 +344,7 @@ def next(self) -> TK:
out.comments.append(tk)
else:
self._peek_cache_small = tk
print(out)
# print(out)
for a in annotations:
if a.srcloc.lineno == out.srcloc.lineno:
out.column = a.column
Expand All @@ -370,14 +371,6 @@ def match_or_die(self, kind: TK_KIND, text=None):
return self.next()


def _ExpectToken(inp: Lexer, kind: TK_KIND, text=None) -> TK:
tk = inp.next()
if tk.kind != kind or text is not None and tk.text != text:
cwast.CompilerError(
tk.srcloc, f"Expected {kind}, got {tk.kind} [{tk.text}]")
return tk


_OPERATOR_LIKE = {
"unwrap": "E",
"uniontypetag": "E",
Expand Down Expand Up @@ -434,6 +427,7 @@ def _PParseArrayType(inp: Lexer, tk: TK, _precedence) -> Any:


_FUN_LIKE = {
"len": (cwast.ExprLen, "E"),
"pinc": (cwast.ExprPointer, "EE"),
"pdec": (cwast.ExprPointer, "EE"),
"offsetof": (cwast.ExprOffsetof, "TF"),
Expand Down Expand Up @@ -473,17 +467,25 @@ def _PParseKeywordConstants(inp: Lexer, tk: TK, _precedence) -> Any:
return ctor(*params)

else:
assert False
assert False, f"{tk}"


def _PParseStr(_inp: Lexer, tk: TK, _precedence) -> Any:
return cwast.ValString(tk.text)


def _PParseChar(_inp: Lexer, tk: TK, _precedence) -> Any:
return cwast.ValNum(tk.text)


_PREFIX_EXPR_PARSERS = {
TK_KIND.KW: (10, _PParseKeywordConstants),
TK_KIND.ID: (10, _PParseId),
TK_KIND.NUM: (10, _PParseNum),
TK_KIND.SQUARE_OPEN: (10, _PParseArrayType),
TK_KIND.STR: (10, _PParseStr),
TK_KIND.CHAR: (10, _PParseChar),

}


Expand All @@ -495,11 +497,12 @@ def _PParserInfixOp(inp: Lexer, lhs, tk: TK, precedence) -> Any:
def _PParseFunctionCall(inp: Lexer, callee, tk: TK, precedence) -> Any:
assert tk.kind is TK_KIND.PAREN_OPEN
args = []
if not inp.match(TK_KIND.PAREN_CLOSED):
first = True
while not inp.match(TK_KIND.PAREN_CLOSED):
if not first:
inp.match_or_die(TK_KIND.COMMA)
first = False
args.append(_ParseExpr(inp))
while inp.match(TK_KIND.COMMA):
args.append(_ParseExpr(inp))
inp.match_or_die(TK_KIND.PAREN_CLOSED)
return cwast.ExprCall(callee, args)


Expand Down Expand Up @@ -538,6 +541,11 @@ def _PParseDeref(_inp: Lexer, pointer, _tk: TK, _precedence) -> Any:
return cwast.ExprDeref(pointer)


def _PParseFieldAccess(inp: Lexer, rec, _tk: TK, _precedence) -> Any:
field = inp.match_or_die(TK_KIND.ID)
return cwast.ExprField(rec, field.text)


_INFIX_EXPR_PARSERS = {
"<": (10, _PParserInfixOp),
"<=": (10, _PParserInfixOp),
Expand All @@ -552,12 +560,16 @@ def _PParseDeref(_inp: Lexer, pointer, _tk: TK, _precedence) -> Any:
"/": (10, _PParserInfixOp),
"*": (10, _PParserInfixOp),
#
"||": (10, _PParserInfixOp),
"&&": (10, _PParserInfixOp),
#
"&-&": (10, _PParserInfixOp),
#
"(": (10, _PParseFunctionCall),
"{": (10, _PParseInitializer),
"[": (10, _PParseIndex),
"^": (10, _PParseDeref),
".": (10, _PParseFieldAccess)

}

Expand All @@ -567,58 +579,66 @@ def _ParseTypeExpr(inp: Lexer):
if tk.kind is TK_KIND.ID:
return cwast.Id(tk.text)
elif tk.kind is TK_KIND.KW:

if tk.text == "auto":
return cwast.TypeAuto()
elif tk.text in ("slice", "slice!"):
inp.match_or_die(TK_KIND.PAREN_OPEN)
type = _ParseTypeExpr(inp)
inp.match_or_die(TK_KIND.PAREN_CLOSED)
return cwast.TypeSlice(type, mut=tk.text.endswith("!"))
elif tk.text == "union":
inp.match_or_die(TK_KIND.PAREN_OPEN)
members = []
first = True
while not inp.match(TK_KIND.PAREN_CLOSED):
if not first:
inp.match_or_die(TK_KIND.COMMA)
first = False
members.append(_ParseTypeExpr(inp))
return cwast.TypeUnion(members)
kind = cwast.KeywordToBaseTypeKind(tk.text)
assert kind is not cwast.BASE_TYPE_KIND.INVALID
assert kind is not cwast.BASE_TYPE_KIND.INVALID, f"{tk}"
return cwast.TypeBase(kind)
elif tk.text == '[':
if inp.peek().text == "]":
_ExpectToken(inp, TK_KIND.SQUARE_CLOSED)
type = _ParseTypeExpr(inp)
return cwast.TypeSlice(type, False)
elif inp.peek().text == "!":
_ExpectToken(inp, TK_KIND.OP, "!")
_ExpectToken(inp, TK_KIND.SQUARE_CLOSED)
type = _ParseTypeExpr(inp)
return cwast.TypeSlice(type, True)
else:
dim = _ParseTypeExpr(inp)
_ExpectToken(inp, TK_KIND.SQUARE_CLOSED)
type = _ParseTypeExpr(inp)
return cwast.TypeArray(dim, type)
dim = _ParseExpr(inp)
inp.match_or_die(TK_KIND.SQUARE_CLOSED)
type = _ParseTypeExpr(inp)
return cwast.TypeArray(dim, type)
elif tk.text == "sig":
assert False

elif tk.text == "^":
rest = _ParseTypeExpr(inp)
return cwast.TypePtr(rest)
elif tk.text == "^!":
rest = _ParseTypeExpr(inp)
return cwast.TypePtr(rest, mut=True)
else:
assert False, f"unexpected token {tk}"


def _ParseFormalParams(inp: Lexer):
out = []
inp.match_or_die(TK_KIND.PAREN_OPEN)
if not inp.match(TK_KIND.PAREN_CLOSED):
first = True
while not inp.match(TK_KIND.PAREN_CLOSED):
if not first:
inp.match_or_die(TK_KIND.COMMA)
first = False
name = inp.match_or_die(TK_KIND.ID)
type = _ParseTypeExpr(inp)
out.append(cwast.FunParam(name.text, type))
while inp.match(TK_KIND.COMMA):
name = inp.match_or_die(TK_KIND.ID)
type = _ParseTypeExpr(inp)
out.append(cwast.FunParam(name.text, type))
inp.match_or_die(TK_KIND.PAREN_CLOSED)
return out


def _ParseMacroCall(inp: Lexer) -> Any:
args = []
if not inp.match(TK_KIND.PAREN_CLOSED):
first = True
while not inp.match(TK_KIND.PAREN_CLOSED):
if not first:
inp.match_or_die(TK_KIND.COMMA)
first = False
args.append(_ParseExpr(inp))
while inp.match(TK_KIND.COMMA):
args.append(_ParseExpr(inp))
inp.match_or_die(TK_KIND.PAREN_CLOSED)
return args


Expand Down Expand Up @@ -677,13 +697,6 @@ def _ParseStatement(inp: Lexer):
return cwast.StmtIf(cond, stmts_t, stmts_f)
elif kw.text in ("trylet", "trylet!"):
assert False
tokens.append(_ExpectToken(inp, TK_KIND.ID))
_ParseTypeExpr(inp)
_ExpectToken(inp, TK_KIND.OP, text="=")
_ParseExpr(inp)
_ExpectToken(inp, TK_KIND.COMMA)
_ExpectToken(inp, TK_KIND.ID)
_ParseStatementList(inp)
elif kw.text == "set":
lhs = _ParseExpr(inp)
kind = inp.match_or_die(TK_KIND.OP)
Expand All @@ -694,7 +707,10 @@ def _ParseStatement(inp: Lexer):
op = cwast.ASSIGNMENT_SHORTCUT[kind.text]
return cwast.StmtCompoundAssignment(op, lhs, rhs)
elif kw.text == "return":
val = _ParseExpr(inp)
if inp.peek().srcloc.lineno == kw.srcloc.lineno:
val = _ParseExpr(inp)
else:
val = cwast.ValVoid()
return cwast.StmtReturn(val)
elif kw.text == "for":
name = inp.match_or_die(TK_KIND.ID)
Expand Down Expand Up @@ -738,26 +754,32 @@ def _ParseStatementList(inp: Lexer):


def _ParseCondList(inp: Lexer):
_ExpectToken(inp, TK_KIND.COLON)
inp.match_or_die(TK_KIND.COLON)
indent = inp.peek().column
cases = []
while True:
tk = inp.peek()
if tk.column < indent:
break
_ExpectToken(inp, TK_KIND.KW, text="case")
_ParseExpr(inp)
_ParseStatementList(inp)
inp.match_or_die(TK_KIND.KW, "case")
cond = _ParseExpr(inp)
stmts = _ParseStatementList(inp)
cases.append(cwast.Case(cond, stmts))
return cwast.StmtCond(cases)


def _ParseFieldList(inp: Lexer):
_ExpectToken(inp, TK_KIND.COLON)
inp.match_or_die(TK_KIND.COLON)
indent = inp.peek().column
out = []
while True:
tk = inp.peek()
if tk.column < indent:
break
_ExpectToken(inp, TK_KIND.ID)
_ParseTypeExpr(inp)
name = inp.match_or_die(TK_KIND.ID)
type = _ParseTypeExpr(inp)
out.append(cwast.RecField(name.text, type))
return out


def _ParseFun(kw, inp: Lexer):
Expand All @@ -771,16 +793,15 @@ def _ParseFun(kw, inp: Lexer):
def _ParseTopLevel(inp: Lexer):
kw = inp.next()
if kw.text == "import":
name = _ExpectToken(inp, TK_KIND.ID)
name = inp.match_or_die(TK_KIND.ID)
out = cwast.Import(name.text, "", [])
return out
elif kw.text == "fun":
return _ParseFun(kw, inp)
elif kw.text == "rec":
name = _ExpectToken(inp, TK_KIND.ID)
name = inp.match_or_die(TK_KIND.ID)
fields = _ParseFieldList(inp)
out = cwast.DefRec(name.text, fields)
return out
return cwast.DefRec(name.text, fields)
elif kw.text in ("global", "global!"):
name = inp.match_or_die(TK_KIND.ID)
if inp.match(TK_KIND.OP, "="):
Expand All @@ -800,9 +821,9 @@ def _ParseTopLevel(inp: Lexer):
def _ParseModule(inp: Lexer):
# comments, annotations = _ParseOptionalCommentsAttributes(inp)
# print(comments, annotations)
kw = _ExpectToken(inp, TK_KIND.KW, "module")
name = _ExpectToken(inp, TK_KIND.ID)
_ExpectToken(inp, TK_KIND.COLON)
kw = inp.match_or_die(TK_KIND.KW, "module")
name = inp.match_or_die(TK_KIND.ID)
kw = inp.match_or_die(TK_KIND.COLON)
out = cwast.DefMod(name.text, [], [])

while True:
Expand Down

0 comments on commit ebc7faf

Please sign in to comment.