antlr · teverett · Aug 19, 2024 · Aug 13, 2024 · Aug 15, 2024 · Aug 16, 2024
diff --git a/rust/Cpp/transformGrammar.py b/rust/Cpp/transformGrammar.py
@@ -28,6 +28,7 @@ def transform_grammar(file_path):
                 line = re.sub(r"(this\.)", 'this->', line)
                 line = re.sub(r"(_input\.)", '_input->', line)
                 line = re.sub(r"(\.getType\(\))", '->getType()', line)
+                line = re.sub(r"(\.next)", '->next', line)
                 output_file.write(line)
 
     print("Writing ...")

diff --git a/rust/Java/RustLexerBase.java b/rust/Java/RustLexerBase.java
@@ -5,17 +5,17 @@ public RustLexerBase(CharStream input){
         super(input);
     }
 
-    Token lt1;
-    Token lt2;
+    Token current;
+    Token previous;
 
     @Override
     public Token nextToken() {
         Token next = super.nextToken();
 
         if (next.getChannel() == Token.DEFAULT_CHANNEL) {
             // Keep track of the last token on the default channel.
-            this.lt2 = this.lt1;
-            this.lt1 = next;
+            this.previous = this.current;
+            this.current = next;
         }
 
         return next;
@@ -46,9 +46,9 @@ public boolean floatDotPossible(){
     }
 
     public boolean floatLiteralPossible(){
-        if(this.lt1 == null || this.lt2 == null) return true;
-        if(this.lt1.getType() != RustLexer.DOT) return true;
-        switch (this.lt2.getType()){
+        if(this.current == null || this.previous == null) return true;
+        if(this.current.getType() != RustLexer.DOT) return true;
+        switch (this.previous.getType()){
             case RustLexer.CHAR_LITERAL:
             case RustLexer.STRING_LITERAL:
             case RustLexer.RAW_STRING_LITERAL:

diff --git a/rust/Python3/RustLexerBase.py b/rust/Python3/RustLexerBase.py
@@ -0,0 +1,90 @@
+from typing import TextIO, Optional
+from antlr4 import *
+from antlr4.InputStream import InputStream
+
+class RustLexerBase(Lexer):
+    flt_mp = set()
+    RustLexer = None
+    def __init__(self, input: InputStream, output: TextIO = ...) -> None:
+        super().__init__(input, output)
+        self.token_lookbehind: tuple[Optional[Token], Optional[Token]] = (None, None)
+        try:
+            from .RustLexer import RustLexer
+        except ImportError:
+            from RustLexer import RustLexer
+        RustLexerBase.RustLexer = RustLexer
+        RustLexerBase.flt_mp = {
+                RustLexer.STRING_LITERAL,
+                RustLexer.RAW_STRING_LITERAL,
+                RustLexer.BYTE_LITERAL,
+                RustLexer.BYTE_STRING_LITERAL,
+                RustLexer.RAW_BYTE_STRING_LITERAL,
+                RustLexer.INTEGER_LITERAL,
+                RustLexer.DEC_LITERAL,
+                RustLexer.HEX_LITERAL,
+                RustLexer.OCT_LITERAL,
+                RustLexer.BIN_LITERAL,
+                RustLexer.KW_SUPER,
+                RustLexer.KW_SELFVALUE,
+                RustLexer.KW_SELFTYPE,
+                RustLexer.KW_CRATE,
+                RustLexer.KW_DOLLARCRATE,
+                RustLexer.RCURLYBRACE,
+                RustLexer.RSQUAREBRACKET,
+                RustLexer.RPAREN,
+                RustLexer.KW_AWAIT,
+                RustLexer.NON_KEYWORD_IDENTIFIER,
+                RustLexer.RAW_IDENTIFIER,
+                RustLexer.KW_MACRORULES,   
+                RustLexer.GT 
+            }
+
+        """LOOK BEHIND TOKENS"""
+    def nextToken(self):
+        next: Token = super().nextToken()
+
+        if next.channel == Token.DEFAULT_CHANNEL:
+            self.token_lookbehind = self.token_lookbehind[1], next
+
+        return next
+
+    def SOF(self):
+        return self._input.LA(-1) <= 0
+
+    def next(self, expect) -> bool:
+        if isinstance(expect, str):
+            return chr(self._input.LA(1)) == expect
+        else:
+            return self._input.LA(1) == expect
+
+    def floatDotPossible(self):
+        next = chr(self._input.LA(1))
+        # print(f'INFO: floatpossible ? {next} = {chr(next)}')
+        # if isinstance(next, int):
+
+        #     next = RustLexer.literalNames[next]
+        #     next = next[1:-1]
+        # if not isinstance(next, str):
+        #     next = next.text
+
+        if next in [".", "_"]:
+            return False
+        if next == "f":
+            if chr(self._input.LA(2)) == "3" and chr(self._input.LA(3)) == "2":
+                return True
+            if chr(self._input.LA(2)) == "6" and self._input.LA(3) == "4":
+                return True
+            return False
+        if next.isalpha():
+            return False
+        return True
+
+    def floatLiteralPossible(self):
+        prev, current = self.token_lookbehind
+
+        if prev == None or current == None:
+            return True
+        elif current.type != RustLexerBase.RustLexer.DOT:
+            return True
+        else:
+            return prev.type not in RustLexerBase.flt_mp 
diff --git a/rust/Python3/RustParserBase.py b/rust/Python3/RustParserBase.py
@@ -0,0 +1,10 @@
+from antlr4 import *
+try:
+    from .RustLexer import RustLexer
+except ImportError:
+    from RustLexer import RustLexer
+
+
+class RustParserBase(Parser):
+    def next(self, expect: str) -> bool:
+        return self._input.LA(1) == RustLexer.literalNames.index(f"'{expect}'")
diff --git a/rust/Python3/testing_script.py b/rust/Python3/testing_script.py
@@ -0,0 +1,50 @@
+"""
+Run this script from the Python directory to ensure examples work.
+
+
+Timo Barnard ([email protected])
+"""
+from antlr4 import *
+import os
+from io import StringIO
+from RustLexer import RustLexer
+from RustParser import RustParser
+import traceback
+import time
+examples_clean = [l.path for l in os.scandir('../examples') if l.name.endswith('.rs')]
+examples_tree = [l.path for l in os.scandir('../examples') if l.name.endswith('.rs.tree')]
+
+def test_grammar(filepath, should_pass = True):
+    inputstream = FileStream(filepath,encoding='utf-8')
+    output = StringIO()
+    lexer = RustLexer(inputstream, output)
+    tokenstream = CommonTokenStream(lexer)
+    parser = RustParser(tokenstream, output)
+    did_pass = None 
+    start, end = None, None
+    try:
+        start = time.time_ns()
+        print(parser.crate().toStringTree(parser.ruleNames))
+        end = time.time_ns()
+        did_pass = True 
+    except Exception as e:
+        end = time.time_ns()
+        did_pass = False 
+        output.write('\n'*2)
+        output.write(' *'*10)
+        output.write(' EXCEPTION ')
+        output.write('* '*20)
+        output.write('\n')
+        traceback.print_exc(file=output)
+        output.write("\n"*2)
+    result = output.getvalue()
+    did_pass = result.replace(" ",'').replace('\n','') == ''
+    d = ""
+    if end != None:
+        d = f'{(end - start)/ 1_000_000_000}s' 
+    print(f"{filepath} -> {'pass' if did_pass else 'fail'} {d}")
+    print(result)
+
+if __name__ == "__main__":
+    for example in examples_clean:
+        test_grammar(example)
diff --git a/rust/Python3/transformGrammar.py b/rust/Python3/transformGrammar.py
@@ -0,0 +1,32 @@
+import sys, os, re, shutil
+from glob import glob
+from pathlib import Path
+
+def main(argv):
+    for file in glob("*.g4"):
+        fix(file)
+
+def fix(file_path):
+    print("Altering " + file_path)
+    if not os.path.exists(file_path):
+        print(f"Could not find file: {file_path}")
+        sys.exit(1)
+    parts = os.path.split(file_path)
+    file_name = parts[-1]
+
+    shutil.move(file_path, file_path + ".bak")
+    input_file = open(file_path + ".bak",'r')
+    output_file = open(file_path, 'w')
+    for x in input_file:
+        if 'this.' in x:
+            x = re.sub(r"(?<!\w)this\.","self.",x)
+
+        output_file.write(x)
+        output_file.flush()
+
+    print("Writing ...")
+    input_file.close()
+    output_file.close()
+
+if __name__ == '__main__':
+    main(sys.argv)
diff --git a/rust/README.md b/rust/README.md
@@ -15,4 +15,5 @@ Install the parser into the local Maven repository with `mvn install`.
 ## Known limitation
 
 - Only v2018+ stable feature is implemented.
-- Checks about isolated `\r` is not implemented. 
+- Checks about isolated `\r` is not implemented. 
+- Python version uses different language files due to semantic predicates.