Skip to content

Commit

Permalink
improve parser more
Browse files Browse the repository at this point in the history
  • Loading branch information
robertmuth committed Apr 26, 2024
1 parent ebc7faf commit 2dc110c
Showing 1 changed file with 77 additions and 35 deletions.
112 changes: 77 additions & 35 deletions FrontEnd/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ class TK_KIND(enum.Enum):
KW = enum.auto()
ANNOTATION = enum.auto()
COMMENT = enum.auto()
OP = enum.auto()
OP1 = enum.auto()
OP2 = enum.auto()
COMMA = enum.auto()
COLON = enum.auto()
EOL = enum.auto()
Expand Down Expand Up @@ -145,27 +146,19 @@ class TK_KIND(enum.Enum):
for k in _KEYWORDS_WITH_EXCL_SUFFIX:
KEYWORDS[k] = TK_KIND.SPECIAL_MUT
for k in _KEYWORDS_OPERATOR_EQ_SUFFIX:
KEYWORDS[k] = TK_KIND.OP
KEYWORDS[k] = TK_KIND.OP2

# Note, order is important: e.g. >> must come before >
_OPERATORS_SIMPLE = [
"~",
_OPERATORS_SIMPLE2 = [
"&&",
"||",
"^",
".",
"!=",
"&-&",
"!",
]


_OPERATORS_WITH_EXCL_SUFFIX = [
"&",
"^",
]

_OPERATORS_WITH_EQ_SUFFIX = [
"!",
"=",
">>>",
"<<<",
Expand All @@ -180,17 +173,30 @@ class TK_KIND(enum.Enum):
"%"
]

_OPERATORS_SIMPLE1 = [
# "-",
"^",
"!",
]

_OPERATORS_WITH_EXCL_SUFFIX = [
"&",
"^",
]

OPERATORS = {}
for o in _OPERATORS_SIMPLE:
OPERATORS[o] = TK_KIND.OP
for o in _OPERATORS_SIMPLE2:
OPERATORS[o] = TK_KIND.OP2
for o in _OPERATORS_WITH_EQ_SUFFIX:
OPERATORS[o] = TK_KIND.SPECIAL_ASSIGN
for o in _KEYWORDS_OPERATOR_EQ_SUFFIX:
OPERATORS[o] = TK_KIND.SPECIAL_ASSIGN
for o in _OPERATORS_SIMPLE1:
OPERATORS[o] = TK_KIND.OP1
for o in _OPERATORS_WITH_EXCL_SUFFIX:
OPERATORS[o] = TK_KIND.SPECIAL_MUT


ANNOTATION_RE = r"@[a-zA-Z]+"
ID_RE = r"[$_a-zA-Z](?:[_a-zA-Z0-9]|::)*#?"
NUM_RE = r"[0-9](?:[_0-9a-f.xp])*(?:sint|uint|[sru][0-9]+)?"
Expand All @@ -202,9 +208,12 @@ class TK_KIND(enum.Enum):
_R_STR_START_RE = r'r"(?:[^"])*'
_STR_END_RE = '(?:"|$)' # Note, this also covers the unterminated case

_operators = ([re.escape(x) for x in _OPERATORS_SIMPLE] +
[re.escape(x) for x in _OPERATORS_WITH_EQ_SUFFIX] +
[re.escape(x) for x in _OPERATORS_WITH_EXCL_SUFFIX])
_operators2 = ([re.escape(x) for x in _OPERATORS_SIMPLE2] +
[re.escape(x) for x in _OPERATORS_WITH_EQ_SUFFIX])

_operators1 = (
[re.escape(x) for x in _OPERATORS_WITH_EXCL_SUFFIX] +
[re.escape(x) for x in _OPERATORS_SIMPLE1])

_token_spec = [
(TK_KIND.ANNOTATION.name, ANNOTATION_RE),
Expand All @@ -223,7 +232,8 @@ class TK_KIND(enum.Enum):
(TK_KIND.WS.name, "[ \t]+"),
(TK_KIND.STR.name, "(?:" + _R_STR_START_RE + \
"|" + _STR_START_RE + ")" + _STR_END_RE),
(TK_KIND.OP.name, "|".join(_operators)),
(TK_KIND.OP2.name, "|".join(_operators2)),
(TK_KIND.OP1.name, "|".join(_operators1)),
(TK_KIND.CHAR.name, CHAR_RE),
]

Expand All @@ -238,6 +248,7 @@ class TK_KIND(enum.Enum):
assert TOKEN_RE.fullmatch("<<<")
assert not TOKEN_RE.fullmatch("<<<<")
assert TOKEN_RE.fullmatch("aa")
#assert TOKEN_RE.fullmatch("^!")

# print(TOKEN_RE.findall("zzzzz+aa*7u8 <<<<"))

Expand Down Expand Up @@ -292,7 +303,7 @@ def next_token(self) -> TK:
kind = TK_KIND.KW
if self._current_line.startswith("!", len(token)):
token = token + "!"
elif kind == TK_KIND.OP:
elif kind in (TK_KIND.OP2, TK_KIND.OP1):
k = OPERATORS[token]
if k == TK_KIND.SPECIAL_ASSIGN:
if self._current_line.startswith("=", len(token)):
Expand Down Expand Up @@ -398,7 +409,7 @@ def _ParseExpr(inp: Lexer, precedence=0):
tk = inp.next()
prec, parser = _PREFIX_EXPR_PARSERS.get(tk.kind, (0, None))
if not parser:
raise RuntimeError(f"could not parse '{tk.kind}'")
raise RuntimeError(f"could not parse '{tk}'")
lhs = parser(inp, tk, prec)
while True:
tk = inp.peek()
Expand Down Expand Up @@ -478,8 +489,15 @@ def _PParseChar(_inp: Lexer, tk: TK, _precedence) -> Any:
return cwast.ValNum(tk.text)


def _PParsePrefix(inp: Lexer, tk: TK, precedence) -> Any:
rhs = _ParseExpr(inp, precedence)
kind = cwast.UNARY_EXPR_SHORTCUT[tk.text]
return cwast.Expr1(kind, rhs)


_PREFIX_EXPR_PARSERS = {
TK_KIND.KW: (10, _PParseKeywordConstants),
TK_KIND.OP1: (10, _PParsePrefix),
TK_KIND.ID: (10, _PParseId),
TK_KIND.NUM: (10, _PParseNum),
TK_KIND.SQUARE_OPEN: (10, _PParseArrayType),
Expand Down Expand Up @@ -514,19 +532,35 @@ def _ParseArrayInit(inp: Lexer) -> Any:
return cwast.IndexVal(_ParseExpr(inp), index)


def _ParseArrayInit(inp: Lexer) -> Any:
field = ""
if inp.peek().text.startswith("."):
field = inp.next().text
inp.match_or_die(TK_KIND.OP2, "=")
val = _ParseExpr(inp)
return cwast.FieldVal(val, field)


def _PParseInitializer(inp: Lexer, type, tk: TK, _precedence) -> Any:
assert tk.kind is TK_KIND.CURLY_OPEN
if isinstance(type, cwast.Id):
assert False, "NYI - record initializer"
inits = []
first = True
while not inp.match(TK_KIND.CURLY_CLOSED):
if not first:
inp.match_or_die(TK_KIND.COMMA)
first = False
inits.append(_ParseRecInit(inp))
return cwast.ValRec(type, inits)
else:
assert isinstance(type, cwast.TypeArray)
inits = []
if not inp.match(TK_KIND.CURLY_CLOSED):
first = True
while not inp.match(TK_KIND.CURLY_CLOSED):
if not first:
inp.match_or_die(TK_KIND.COMMA)
first = False
inits.append(_ParseArrayInit(inp))
while inp.match(TK_KIND.COMMA):
inits.append(_ParseArrayInit(inp))
inp.match_or_die(TK_KIND.CURLY_CLOSED)

return cwast.ValArray(type.size, type.type, inits)


Expand Down Expand Up @@ -672,12 +706,12 @@ def _ParseStatement(inp: Lexer):
assert kw.kind is TK_KIND.KW, f"{kw}"
if kw.text in ("let", "let!"):
name = inp.match_or_die(TK_KIND.ID)
if inp.match(TK_KIND.OP, "="):
if inp.match(TK_KIND.OP2, "="):
type = cwast.TypeAuto()
init = _ParseExpr(inp)
else:
type = _ParseTypeExpr(inp)
if inp.match(TK_KIND.OP, "="):
if inp.match(TK_KIND.OP2, "="):
init = _ParseExpr(inp)
else:
init = cwast.ValAuto()
Expand All @@ -696,10 +730,19 @@ def _ParseStatement(inp: Lexer):
stmts_f = _ParseStatementList(inp)
return cwast.StmtIf(cond, stmts_t, stmts_f)
elif kw.text in ("trylet", "trylet!"):
assert False
name = inp.match_or_die(TK_KIND.ID)
type = _ParseTypeExpr(inp)
inp.match_or_die(TK_KIND.OP2, "=")
expr = _ParseExpr(inp)
inp.match_or_die(TK_KIND.COMMA)
name2 = inp.match_or_die(TK_KIND.ID)
stmts = _ParseStatementList(inp)
return cwast.MacroInvoke(cwast.Id(kw.text),
[cwast.Id(name), type, expr, cwast.Id(name2),
cwast.EphemeralList(stmts, colon=True)])
elif kw.text == "set":
lhs = _ParseExpr(inp)
kind = inp.match_or_die(TK_KIND.OP)
kind = inp.match_or_die(TK_KIND.OP2)
rhs = _ParseExpr(inp)
if kind.text == "=":
return cwast.StmtAssignment(lhs, rhs)
Expand All @@ -714,7 +757,7 @@ def _ParseStatement(inp: Lexer):
return cwast.StmtReturn(val)
elif kw.text == "for":
name = inp.match_or_die(TK_KIND.ID)
inp.match_or_die(TK_KIND.OP, "=")
inp.match_or_die(TK_KIND.OP2, "=")
start = _ParseExpr(inp)
inp.match_or_die(TK_KIND.COMMA)
end = _ParseExpr(inp)
Expand All @@ -732,8 +775,7 @@ def _ParseStatement(inp: Lexer):
stmts = _ParseStatementList(inp)
return cwast.StmtBlock(label, stmts)
elif kw.text == "cond":
assert False
_ParseCondList(inp)
return _ParseCondList(inp)
else:
assert False, f"{kw}"

Expand Down Expand Up @@ -804,12 +846,12 @@ def _ParseTopLevel(inp: Lexer):
return cwast.DefRec(name.text, fields)
elif kw.text in ("global", "global!"):
name = inp.match_or_die(TK_KIND.ID)
if inp.match(TK_KIND.OP, "="):
if inp.match(TK_KIND.OP2, "="):
type = cwast.TypeAuto()
init = _ParseExpr(inp)
else:
type = _ParseTypeExpr(inp)
if inp.match(TK_KIND.OP, "="):
if inp.match(TK_KIND.OP2, "="):
init = _ParseExpr(inp)
else:
init = cwast.ValAuto()
Expand Down

0 comments on commit 2dc110c

Please sign in to comment.