From ebc7fafe56a74a81f1041d0c995ba178be0cebda Mon Sep 17 00:00:00 2001 From: Robert Muth Date: Thu, 25 Apr 2024 21:22:15 -0400 Subject: [PATCH] improve parser more --- .../ConcteteSyntax/print_argv.concrete.cw | 2 +- FrontEnd/parse.py | 145 ++++++++++-------- 2 files changed, 84 insertions(+), 63 deletions(-) diff --git a/FrontEnd/ConcteteSyntax/print_argv.concrete.cw b/FrontEnd/ConcteteSyntax/print_argv.concrete.cw index 732afc26..7782151c 100644 --- a/FrontEnd/ConcteteSyntax/print_argv.concrete.cw +++ b/FrontEnd/ConcteteSyntax/print_argv.concrete.cw @@ -6,7 +6,7 @@ import fmt fun strlen(s ^u8) uint: let! i uint = 0 -- pinc is adds an integer to a pointer it also has an options bound - while pinc(s, i)^!= 0: + while pinc(s, i)^ != 0: set i += 1 return i diff --git a/FrontEnd/parse.py b/FrontEnd/parse.py index 85799473..2575b383 100755 --- a/FrontEnd/parse.py +++ b/FrontEnd/parse.py @@ -161,6 +161,7 @@ class TK_KIND(enum.Enum): _OPERATORS_WITH_EXCL_SUFFIX = [ "&", + "^", ] _OPERATORS_WITH_EQ_SUFFIX = [ @@ -343,7 +344,7 @@ def next(self) -> TK: out.comments.append(tk) else: self._peek_cache_small = tk - print(out) + # print(out) for a in annotations: if a.srcloc.lineno == out.srcloc.lineno: out.column = a.column @@ -370,14 +371,6 @@ def match_or_die(self, kind: TK_KIND, text=None): return self.next() -def _ExpectToken(inp: Lexer, kind: TK_KIND, text=None) -> TK: - tk = inp.next() - if tk.kind != kind or text is not None and tk.text != text: - cwast.CompilerError( - tk.srcloc, f"Expected {kind}, got {tk.kind} [{tk.text}]") - return tk - - _OPERATOR_LIKE = { "unwrap": "E", "uniontypetag": "E", @@ -434,6 +427,7 @@ def _PParseArrayType(inp: Lexer, tk: TK, _precedence) -> Any: _FUN_LIKE = { + "len": (cwast.ExprLen, "E"), "pinc": (cwast.ExprPointer, "EE"), "pdec": (cwast.ExprPointer, "EE"), "offsetof": (cwast.ExprOffsetof, "TF"), @@ -473,17 +467,25 @@ def _PParseKeywordConstants(inp: Lexer, tk: TK, _precedence) -> Any: return ctor(*params) else: - assert False + assert False, f"{tk}" + def _PParseStr(_inp: Lexer, tk: TK, _precedence) -> Any: return cwast.ValString(tk.text) + +def _PParseChar(_inp: Lexer, tk: TK, _precedence) -> Any: + return cwast.ValNum(tk.text) + + _PREFIX_EXPR_PARSERS = { TK_KIND.KW: (10, _PParseKeywordConstants), TK_KIND.ID: (10, _PParseId), TK_KIND.NUM: (10, _PParseNum), TK_KIND.SQUARE_OPEN: (10, _PParseArrayType), TK_KIND.STR: (10, _PParseStr), + TK_KIND.CHAR: (10, _PParseChar), + } @@ -495,11 +497,12 @@ def _PParserInfixOp(inp: Lexer, lhs, tk: TK, precedence) -> Any: def _PParseFunctionCall(inp: Lexer, callee, tk: TK, precedence) -> Any: assert tk.kind is TK_KIND.PAREN_OPEN args = [] - if not inp.match(TK_KIND.PAREN_CLOSED): + first = True + while not inp.match(TK_KIND.PAREN_CLOSED): + if not first: + inp.match_or_die(TK_KIND.COMMA) + first = False args.append(_ParseExpr(inp)) - while inp.match(TK_KIND.COMMA): - args.append(_ParseExpr(inp)) - inp.match_or_die(TK_KIND.PAREN_CLOSED) return cwast.ExprCall(callee, args) @@ -538,6 +541,11 @@ def _PParseDeref(_inp: Lexer, pointer, _tk: TK, _precedence) -> Any: return cwast.ExprDeref(pointer) +def _PParseFieldAccess(inp: Lexer, rec, _tk: TK, _precedence) -> Any: + field = inp.match_or_die(TK_KIND.ID) + return cwast.ExprField(rec, field.text) + + _INFIX_EXPR_PARSERS = { "<": (10, _PParserInfixOp), "<=": (10, _PParserInfixOp), @@ -552,12 +560,16 @@ def _PParseDeref(_inp: Lexer, pointer, _tk: TK, _precedence) -> Any: "/": (10, _PParserInfixOp), "*": (10, _PParserInfixOp), # + "||": (10, _PParserInfixOp), + "&&": (10, _PParserInfixOp), + # "&-&": (10, _PParserInfixOp), # "(": (10, _PParseFunctionCall), "{": (10, _PParseInitializer), "[": (10, _PParseIndex), "^": (10, _PParseDeref), + ".": (10, _PParseFieldAccess) } @@ -567,32 +579,40 @@ def _ParseTypeExpr(inp: Lexer): if tk.kind is TK_KIND.ID: return cwast.Id(tk.text) elif tk.kind is TK_KIND.KW: - if tk.text == "auto": return cwast.TypeAuto() + elif tk.text in ("slice", "slice!"): + inp.match_or_die(TK_KIND.PAREN_OPEN) + type = _ParseTypeExpr(inp) + inp.match_or_die(TK_KIND.PAREN_CLOSED) + return cwast.TypeSlice(type, mut=tk.text.endswith("!")) + elif tk.text == "union": + inp.match_or_die(TK_KIND.PAREN_OPEN) + members = [] + first = True + while not inp.match(TK_KIND.PAREN_CLOSED): + if not first: + inp.match_or_die(TK_KIND.COMMA) + first = False + members.append(_ParseTypeExpr(inp)) + return cwast.TypeUnion(members) kind = cwast.KeywordToBaseTypeKind(tk.text) - assert kind is not cwast.BASE_TYPE_KIND.INVALID + assert kind is not cwast.BASE_TYPE_KIND.INVALID, f"{tk}" return cwast.TypeBase(kind) elif tk.text == '[': - if inp.peek().text == "]": - _ExpectToken(inp, TK_KIND.SQUARE_CLOSED) - type = _ParseTypeExpr(inp) - return cwast.TypeSlice(type, False) - elif inp.peek().text == "!": - _ExpectToken(inp, TK_KIND.OP, "!") - _ExpectToken(inp, TK_KIND.SQUARE_CLOSED) - type = _ParseTypeExpr(inp) - return cwast.TypeSlice(type, True) - else: - dim = _ParseTypeExpr(inp) - _ExpectToken(inp, TK_KIND.SQUARE_CLOSED) - type = _ParseTypeExpr(inp) - return cwast.TypeArray(dim, type) + dim = _ParseExpr(inp) + inp.match_or_die(TK_KIND.SQUARE_CLOSED) + type = _ParseTypeExpr(inp) + return cwast.TypeArray(dim, type) elif tk.text == "sig": assert False + elif tk.text == "^": rest = _ParseTypeExpr(inp) return cwast.TypePtr(rest) + elif tk.text == "^!": + rest = _ParseTypeExpr(inp) + return cwast.TypePtr(rest, mut=True) else: assert False, f"unexpected token {tk}" @@ -600,25 +620,25 @@ def _ParseTypeExpr(inp: Lexer): def _ParseFormalParams(inp: Lexer): out = [] inp.match_or_die(TK_KIND.PAREN_OPEN) - if not inp.match(TK_KIND.PAREN_CLOSED): + first = True + while not inp.match(TK_KIND.PAREN_CLOSED): + if not first: + inp.match_or_die(TK_KIND.COMMA) + first = False name = inp.match_or_die(TK_KIND.ID) type = _ParseTypeExpr(inp) out.append(cwast.FunParam(name.text, type)) - while inp.match(TK_KIND.COMMA): - name = inp.match_or_die(TK_KIND.ID) - type = _ParseTypeExpr(inp) - out.append(cwast.FunParam(name.text, type)) - inp.match_or_die(TK_KIND.PAREN_CLOSED) return out def _ParseMacroCall(inp: Lexer) -> Any: args = [] - if not inp.match(TK_KIND.PAREN_CLOSED): + first = True + while not inp.match(TK_KIND.PAREN_CLOSED): + if not first: + inp.match_or_die(TK_KIND.COMMA) + first = False args.append(_ParseExpr(inp)) - while inp.match(TK_KIND.COMMA): - args.append(_ParseExpr(inp)) - inp.match_or_die(TK_KIND.PAREN_CLOSED) return args @@ -677,13 +697,6 @@ def _ParseStatement(inp: Lexer): return cwast.StmtIf(cond, stmts_t, stmts_f) elif kw.text in ("trylet", "trylet!"): assert False - tokens.append(_ExpectToken(inp, TK_KIND.ID)) - _ParseTypeExpr(inp) - _ExpectToken(inp, TK_KIND.OP, text="=") - _ParseExpr(inp) - _ExpectToken(inp, TK_KIND.COMMA) - _ExpectToken(inp, TK_KIND.ID) - _ParseStatementList(inp) elif kw.text == "set": lhs = _ParseExpr(inp) kind = inp.match_or_die(TK_KIND.OP) @@ -694,7 +707,10 @@ def _ParseStatement(inp: Lexer): op = cwast.ASSIGNMENT_SHORTCUT[kind.text] return cwast.StmtCompoundAssignment(op, lhs, rhs) elif kw.text == "return": - val = _ParseExpr(inp) + if inp.peek().srcloc.lineno == kw.srcloc.lineno: + val = _ParseExpr(inp) + else: + val = cwast.ValVoid() return cwast.StmtReturn(val) elif kw.text == "for": name = inp.match_or_die(TK_KIND.ID) @@ -738,26 +754,32 @@ def _ParseStatementList(inp: Lexer): def _ParseCondList(inp: Lexer): - _ExpectToken(inp, TK_KIND.COLON) + inp.match_or_die(TK_KIND.COLON) indent = inp.peek().column + cases = [] while True: tk = inp.peek() if tk.column < indent: break - _ExpectToken(inp, TK_KIND.KW, text="case") - _ParseExpr(inp) - _ParseStatementList(inp) + inp.match_or_die(TK_KIND.KW, "case") + cond = _ParseExpr(inp) + stmts = _ParseStatementList(inp) + cases.append(cwast.Case(cond, stmts)) + return cwast.StmtCond(cases) def _ParseFieldList(inp: Lexer): - _ExpectToken(inp, TK_KIND.COLON) + inp.match_or_die(TK_KIND.COLON) indent = inp.peek().column + out = [] while True: tk = inp.peek() if tk.column < indent: break - _ExpectToken(inp, TK_KIND.ID) - _ParseTypeExpr(inp) + name = inp.match_or_die(TK_KIND.ID) + type = _ParseTypeExpr(inp) + out.append(cwast.RecField(name.text, type)) + return out def _ParseFun(kw, inp: Lexer): @@ -771,16 +793,15 @@ def _ParseFun(kw, inp: Lexer): def _ParseTopLevel(inp: Lexer): kw = inp.next() if kw.text == "import": - name = _ExpectToken(inp, TK_KIND.ID) + name = inp.match_or_die(TK_KIND.ID) out = cwast.Import(name.text, "", []) return out elif kw.text == "fun": return _ParseFun(kw, inp) elif kw.text == "rec": - name = _ExpectToken(inp, TK_KIND.ID) + name = inp.match_or_die(TK_KIND.ID) fields = _ParseFieldList(inp) - out = cwast.DefRec(name.text, fields) - return out + return cwast.DefRec(name.text, fields) elif kw.text in ("global", "global!"): name = inp.match_or_die(TK_KIND.ID) if inp.match(TK_KIND.OP, "="): @@ -800,9 +821,9 @@ def _ParseTopLevel(inp: Lexer): def _ParseModule(inp: Lexer): # comments, annotations = _ParseOptionalCommentsAttributes(inp) # print(comments, annotations) - kw = _ExpectToken(inp, TK_KIND.KW, "module") - name = _ExpectToken(inp, TK_KIND.ID) - _ExpectToken(inp, TK_KIND.COLON) + kw = inp.match_or_die(TK_KIND.KW, "module") + name = inp.match_or_die(TK_KIND.ID) + kw = inp.match_or_die(TK_KIND.COLON) out = cwast.DefMod(name.text, [], []) while True: