Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Python3 target support for the Rust Grammar. #4204

Merged
merged 4 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/Cpp/transformGrammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def transform_grammar(file_path):
line = re.sub(r"(this\.)", 'this->', line)
line = re.sub(r"(_input\.)", '_input->', line)
line = re.sub(r"(\.getType\(\))", '->getType()', line)
line = re.sub(r"(\.next)", '->next', line)
output_file.write(line)

print("Writing ...")
Expand Down
14 changes: 7 additions & 7 deletions rust/Java/RustLexerBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ public RustLexerBase(CharStream input){
super(input);
}

Token lt1;
Token lt2;
Token current;
Token previous;

@Override
public Token nextToken() {
Token next = super.nextToken();

if (next.getChannel() == Token.DEFAULT_CHANNEL) {
// Keep track of the last token on the default channel.
this.lt2 = this.lt1;
this.lt1 = next;
this.previous = this.current;
this.current = next;
}

return next;
Expand Down Expand Up @@ -46,9 +46,9 @@ public boolean floatDotPossible(){
}

public boolean floatLiteralPossible(){
if(this.lt1 == null || this.lt2 == null) return true;
if(this.lt1.getType() != RustLexer.DOT) return true;
switch (this.lt2.getType()){
if(this.current == null || this.previous == null) return true;
if(this.current.getType() != RustLexer.DOT) return true;
switch (this.previous.getType()){
case RustLexer.CHAR_LITERAL:
case RustLexer.STRING_LITERAL:
case RustLexer.RAW_STRING_LITERAL:
Expand Down
90 changes: 90 additions & 0 deletions rust/Python3/RustLexerBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from typing import TextIO, Optional
from antlr4 import *
from antlr4.InputStream import InputStream

class RustLexerBase(Lexer):
flt_mp = set()
RustLexer = None
def __init__(self, input: InputStream, output: TextIO = ...) -> None:
super().__init__(input, output)
self.token_lookbehind: tuple[Optional[Token], Optional[Token]] = (None, None)
try:
from .RustLexer import RustLexer
except ImportError:
from RustLexer import RustLexer
RustLexerBase.RustLexer = RustLexer
RustLexerBase.flt_mp = {
RustLexer.STRING_LITERAL,
RustLexer.RAW_STRING_LITERAL,
RustLexer.BYTE_LITERAL,
RustLexer.BYTE_STRING_LITERAL,
RustLexer.RAW_BYTE_STRING_LITERAL,
RustLexer.INTEGER_LITERAL,
RustLexer.DEC_LITERAL,
RustLexer.HEX_LITERAL,
RustLexer.OCT_LITERAL,
RustLexer.BIN_LITERAL,
RustLexer.KW_SUPER,
RustLexer.KW_SELFVALUE,
RustLexer.KW_SELFTYPE,
RustLexer.KW_CRATE,
RustLexer.KW_DOLLARCRATE,
RustLexer.RCURLYBRACE,
RustLexer.RSQUAREBRACKET,
RustLexer.RPAREN,
RustLexer.KW_AWAIT,
RustLexer.NON_KEYWORD_IDENTIFIER,
RustLexer.RAW_IDENTIFIER,
RustLexer.KW_MACRORULES,
RustLexer.GT
}

"""LOOK BEHIND TOKENS"""
def nextToken(self):
next: Token = super().nextToken()

if next.channel == Token.DEFAULT_CHANNEL:
self.token_lookbehind = self.token_lookbehind[1], next

return next

def SOF(self):
return self._input.LA(-1) <= 0

def next(self, expect) -> bool:
if isinstance(expect, str):
return chr(self._input.LA(1)) == expect
else:
return self._input.LA(1) == expect

def floatDotPossible(self):
next = chr(self._input.LA(1))
# print(f'INFO: floatpossible ? {next} = {chr(next)}')
# if isinstance(next, int):

# next = RustLexer.literalNames[next]
# next = next[1:-1]
# if not isinstance(next, str):
# next = next.text

if next in [".", "_"]:
return False
if next == "f":
if chr(self._input.LA(2)) == "3" and chr(self._input.LA(3)) == "2":
return True
if chr(self._input.LA(2)) == "6" and self._input.LA(3) == "4":
return True
return False
if next.isalpha():
return False
return True

def floatLiteralPossible(self):
prev, current = self.token_lookbehind

if prev == None or current == None:
return True
elif current.type != RustLexerBase.RustLexer.DOT:
return True
else:
return prev.type not in RustLexerBase.flt_mp
10 changes: 10 additions & 0 deletions rust/Python3/RustParserBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from antlr4 import *
try:
from .RustLexer import RustLexer
except ImportError:
from RustLexer import RustLexer


class RustParserBase(Parser):
def next(self, expect: str) -> bool:
return self._input.LA(1) == RustLexer.literalNames.index(f"'{expect}'")
50 changes: 50 additions & 0 deletions rust/Python3/testing_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Run this script from the Python directory to ensure examples work.


Timo Barnard ([email protected])
"""
from antlr4 import *
import os
from io import StringIO
from RustLexer import RustLexer
from RustParser import RustParser
import traceback
import time
examples_clean = [l.path for l in os.scandir('../examples') if l.name.endswith('.rs')]
examples_tree = [l.path for l in os.scandir('../examples') if l.name.endswith('.rs.tree')]

def test_grammar(filepath, should_pass = True):
inputstream = FileStream(filepath,encoding='utf-8')
output = StringIO()
lexer = RustLexer(inputstream, output)
tokenstream = CommonTokenStream(lexer)
parser = RustParser(tokenstream, output)
did_pass = None
start, end = None, None
try:
start = time.time_ns()
print(parser.crate().toStringTree(parser.ruleNames))
end = time.time_ns()
did_pass = True
except Exception as e:
end = time.time_ns()
did_pass = False
output.write('\n'*2)
output.write(' *'*10)
output.write(' EXCEPTION ')
output.write('* '*20)
output.write('\n')
traceback.print_exc(file=output)
output.write("\n"*2)
result = output.getvalue()
did_pass = result.replace(" ",'').replace('\n','') == ''
d = ""
if end != None:
d = f'{(end - start)/ 1_000_000_000}s'
print(f"{filepath} -> {'pass' if did_pass else 'fail'} {d}")
print(result)

if __name__ == "__main__":
for example in examples_clean:
test_grammar(example)
32 changes: 32 additions & 0 deletions rust/Python3/transformGrammar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import sys, os, re, shutil
from glob import glob
from pathlib import Path

def main(argv):
for file in glob("*.g4"):
fix(file)

def fix(file_path):
print("Altering " + file_path)
if not os.path.exists(file_path):
print(f"Could not find file: {file_path}")
sys.exit(1)
parts = os.path.split(file_path)
file_name = parts[-1]

shutil.move(file_path, file_path + ".bak")
input_file = open(file_path + ".bak",'r')
output_file = open(file_path, 'w')
for x in input_file:
if 'this.' in x:
x = re.sub(r"(?<!\w)this\.","self.",x)

output_file.write(x)
output_file.flush()

print("Writing ...")
input_file.close()
output_file.close()

if __name__ == '__main__':
main(sys.argv)
3 changes: 2 additions & 1 deletion rust/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ Install the parser into the local Maven repository with `mvn install`.
## Known limitation

- Only v2018+ stable feature is implemented.
- Checks about isolated `\r` is not implemented.
- Checks about isolated `\r` is not implemented.
- Python version uses different language files due to semantic predicates.
Loading
Loading