jplag · tsaglam · Aug 29, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/docs/4.-Adding-New-Languages.md b/docs/4.-Adding-New-Languages.md
@@ -510,6 +510,31 @@ protected File getTestFileLocation() {
 }
 ```
 
+## Testing token positions
+
+The precise position of a token can be relevant for the visualization in the report viewer. To make sure the token positions are extracted correctly language modules should include some tests for that.
+
+Writing such tests can be done using a specific syntax in the test sources directly.
+Such a file can look like this:
+```java
+>class Test {
+>    int test;
+$    | J_VARDEF 8
+>}
+```
+
+Every line that is prefixed with '>' will be interpreted as a line of test source code.
+
+Every line starting with '$' contains information about one expected token. The token is expected in the first source line above this one.
+The '|' marks the column the token should be in. It is followed by one space, then the name of the token (The name of the enum constant).
+Finally, separated by another space is the length of the token.
+A single file may contain any number of tokens.
+The test will verify that at least one token with those exact properties exists.
+
+These test files have to be added in the TestDataCollector. Put all test files in a single directory and specify it through collector.addTokenPositionTests("<directory>").
+If the directory is in the default location for test files, a relative path is enough, otherwise a full path has to be specified.
+
+
 # Adding code highlighting to the report-viewer
 To ensure your language gets properly registered and its code is correctly highlighted in the report-viewer:
 1) Add your language to the `ParserLanguage` enum in 'src/model/Language.ts'. As the value for the entry use its frontend name.

diff --git a/language-testutils/src/test/java/de/jplag/testutils/LanguageModuleTest.java b/language-testutils/src/test/java/de/jplag/testutils/LanguageModuleTest.java
@@ -14,6 +14,7 @@
 import java.util.Collection;
 import java.util.List;
 
+import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Assumptions;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.DisplayName;
@@ -32,6 +33,7 @@
 import de.jplag.testutils.datacollector.TestData;
 import de.jplag.testutils.datacollector.TestDataCollector;
 import de.jplag.testutils.datacollector.TestSourceIgnoredLinesCollector;
+import de.jplag.testutils.datacollector.TokenPositionTestData;
 
 /**
  * Base class for language module tests. Automatically adds all common tests types for jplag languages.
@@ -174,7 +176,7 @@ final List<TestDataCollector.TokenListTest> testTokensContainedData() {
     final void testTokenSequence(TestDataCollector.TokenListTest test) throws ParsingException, IOException {
         List<TokenType> actual = extractTokenTypes(test.data());
         List<TokenType> expected = new ArrayList<>(test.tokens());
-        if (expected.get(expected.size() - 1) != SharedTokenType.FILE_END) {
+        if (expected.getLast() != SharedTokenType.FILE_END) {
             expected.add(SharedTokenType.FILE_END);
         }
         assertTokensMatch(expected, actual, "Extracted token from " + test.data().describeTestSource() + " does not match expected sequence.");
@@ -196,6 +198,45 @@ final List<TestDataCollector.TokenListTest> testTokenSequenceData() {
         return ignoreEmptyTestType(this.collector.getTokenSequenceTest());
     }
 
+    /**
+     * Tests if the tokens specified for the token position tests are present in the sources
+     * @param testData The specifications of the expected tokens and the test source
+     * @throws ParsingException If the parsing fails
+     * @throws IOException If IO operations fail. If this happens, that should be unrelated to the test itself.
+     */
+    @ParameterizedTest
+    @MethodSource("getTokenPositionTestData")
+    @DisplayName("Tests if the extracted tokens contain the tokens specified in the test files.")
+    final void testTokenPositions(TokenPositionTestData testData) throws ParsingException, IOException {
+        List<Token> extractedTokens = parseTokens(testData);
+        List<TokenPositionTestData.TokenData> failedTokens = new ArrayList<>();
+
+        for (TokenPositionTestData.TokenData expectedToken : testData.getExpectedTokens()) {
+            TokenType expectedType = this.languageTokens.stream().filter(type -> type.toString().equals(expectedToken.typeName())).findFirst()
+                    .orElseThrow(() -> new IOException(String.format("The token type %s does not exist.", expectedToken.typeName())));
+
+            if (extractedTokens.stream().noneMatch(token -> token.getType() == expectedType && token.getLine() == expectedToken.lineNumber()
+                    && token.getColumn() == expectedToken.columnNumber() && token.getLength() == expectedToken.length())) {
+                failedTokens.add(expectedToken);
+            }
+        }
+
+        if (!failedTokens.isEmpty()) {
+            String failureDescriptors = String.join(System.lineSeparator(),
+                    failedTokens.stream().map(
+                            token -> token.typeName() + " at (" + token.lineNumber() + ":" + token.columnNumber() + ") with length " + token.length())
+                            .toList());
+            fail("Some tokens weren't extracted with the correct properties:" + System.lineSeparator() + failureDescriptors);
+        }
+    }
+
+    /**
+     * @return All token positions tests that are configured
+     */
+    final List<TokenPositionTestData> getTokenPositionTestData() {
+        return ignoreEmptyTestType(this.collector.getTokenPositionTestData());
+    }
+
     /**
      * Tests all configured test sources for a monotone order of tokens
      * @param data The test source
@@ -231,8 +272,7 @@ final void testMonotoneTokenOrder(TestData data) throws ParsingException, IOExce
     final void testTokenSequencesEndsWithFileEnd(TestData data) throws ParsingException, IOException {
         List<Token> tokens = parseTokens(data);
 
-        assertEquals(SharedTokenType.FILE_END, tokens.get(tokens.size() - 1).getType(),
-                "Last token in " + data.describeTestSource() + " is not file end.");
+        assertEquals(SharedTokenType.FILE_END, tokens.getLast().getType(), "Last token in " + data.describeTestSource() + " is not file end.");
     }
 
     /**
@@ -251,6 +291,11 @@ final void collectTestData() {
         collectTestData(this.collector);
     }
 
+    @AfterAll
+    final void deleteTemporaryFiles() {
+        TemporaryFileHolder.deleteTemporaryFiles();
+    }
+
     private List<Token> parseTokens(TestData source) throws ParsingException, IOException {
         List<Token> tokens = source.parseTokens(this.language);
         logger.info(TokenPrinter.printTokens(tokens));

diff --git a/language-testutils/src/test/java/de/jplag/testutils/TemporaryFileHolder.java b/language-testutils/src/test/java/de/jplag/testutils/TemporaryFileHolder.java
@@ -0,0 +1,20 @@
+package de.jplag.testutils;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Stores all temporary files that are created for a {@link LanguageModuleTest} and provides the option to delete them
+ */
+public class TemporaryFileHolder {
+    public static List<File> temporaryFiles = new ArrayList<>();
+
+    /**
+     * Deletes all temporary files that have been created up to this point
+     */
+    public static void deleteTemporaryFiles() {
+        temporaryFiles.forEach(File::delete);
+        temporaryFiles.clear();
+    }
+}
diff --git a/language-testutils/src/test/java/de/jplag/testutils/datacollector/InlineTestData.java b/language-testutils/src/test/java/de/jplag/testutils/datacollector/InlineTestData.java
@@ -8,6 +8,7 @@
 import de.jplag.Language;
 import de.jplag.ParsingException;
 import de.jplag.Token;
+import de.jplag.testutils.TemporaryFileHolder;
 import de.jplag.util.FileUtils;
 
 /**
@@ -25,7 +26,7 @@ public List<Token> parseTokens(Language language) throws ParsingException, IOExc
         File file = File.createTempFile("testSource", language.suffixes()[0]);
         FileUtils.write(file, this.testData);
         List<Token> tokens = language.parse(Collections.singleton(file));
-        file.delete();
+        TemporaryFileHolder.temporaryFiles.add(file);
         return tokens;
     }
 

diff --git a/language-testutils/src/test/java/de/jplag/testutils/datacollector/TestDataCollector.java b/language-testutils/src/test/java/de/jplag/testutils/datacollector/TestDataCollector.java
@@ -1,10 +1,13 @@
 package de.jplag.testutils.datacollector;
 
 import java.io.File;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -18,6 +21,7 @@ public class TestDataCollector {
     private final List<TestData> tokenCoverageData;
     private final List<TokenListTest> containedTokenData;
     private final List<TokenListTest> tokenSequenceTest;
+    private final List<TokenPositionTestData> tokenPositionTestData;
 
     private final List<TestData> allTestData;
 
@@ -34,6 +38,7 @@ public TestDataCollector(File testFileLocation) {
         this.tokenCoverageData = new ArrayList<>();
         this.containedTokenData = new ArrayList<>();
         this.tokenSequenceTest = new ArrayList<>();
+        this.tokenPositionTestData = new ArrayList<>();
 
         this.allTestData = new ArrayList<>();
     }
@@ -73,6 +78,28 @@ public TestDataContext inlineSource(String... sources) {
         return new TestDataContext(data);
     }
 
+    /**
+     * Adds all files from the given directory for token position tests. The sources can still be used for other tests,
+     * using the returned {@link TestDataContext}
+     * @param directoryName The name of the directory containing the token position tests.
+     * @return The context containing the added sources
+     * @throws IOException If the files cannot be read
+     */
+    public TestDataContext addTokenPositionTests(String directoryName) {
+        File directory = new File(this.testFileLocation, directoryName);
+        Set<TestData> allTestsInDirectory = new HashSet<>();
+        for (File file : Objects.requireNonNull(directory.listFiles())) {
+            try {
+                TokenPositionTestData data = new TokenPositionTestData(file);
+                allTestsInDirectory.add(data);
+                this.tokenPositionTestData.add(data);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+        return new TestDataContext(allTestsInDirectory);
+    }
+
     /**
      * @return The test data that should be checked for source coverage
      */
@@ -101,6 +128,10 @@ public List<TokenListTest> getTokenSequenceTest() {
         return Collections.unmodifiableList(tokenSequenceTest);
     }
 
+    public List<TokenPositionTestData> getTokenPositionTestData() {
+        return Collections.unmodifiableList(this.tokenPositionTestData);
+    }
+
     /**
      * @return The list of all test data
      */

diff --git a/language-testutils/src/test/java/de/jplag/testutils/datacollector/TokenPositionTestData.java b/language-testutils/src/test/java/de/jplag/testutils/datacollector/TokenPositionTestData.java
@@ -0,0 +1,92 @@
+package de.jplag.testutils.datacollector;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import de.jplag.Language;
+import de.jplag.ParsingException;
+import de.jplag.Token;
+import de.jplag.testutils.TemporaryFileHolder;
+import de.jplag.util.FileUtils;
+
+/**
+ * Test sources with token information Reads token position test specifications form a file and provides the token
+ * information for tests. The sources can be used as regular test sources.
+ */
+public class TokenPositionTestData implements TestData {
+    private final List<String> sourceLines;
+    private final List<TokenData> expectedTokens;
+
+    private final String descriptor;
+
+    /**
+     * @param testFile The file containing the test specifications
+     * @throws IOException If the file cannot be read
+     */
+    public TokenPositionTestData(File testFile) throws IOException {
+        this.sourceLines = new ArrayList<>();
+        this.expectedTokens = new ArrayList<>();
+        this.descriptor = "(Token position file: " + testFile.getName() + ")";
+        this.readFile(testFile);
+    }
+
+    private void readFile(File testFile) throws IOException {
+        List<String> testFileLines = FileUtils.readFileContent(testFile).lines().toList();
+        int currentLine = 0;
+
+        for (String sourceLine : testFileLines) {
+            if (sourceLine.charAt(0) == '>') {
+                this.sourceLines.add(sourceLine.substring(1));
+                currentLine++;
+            }
+
+            if (sourceLine.charAt(0) == '$') {
+                int column = sourceLine.indexOf('|');
+                String[] tokenDescriptionParts = sourceLine.split(" ", 0);
+
+                String typeName = tokenDescriptionParts[tokenDescriptionParts.length - 2];
+                int length = Integer.parseInt(tokenDescriptionParts[tokenDescriptionParts.length - 1]);
+                this.expectedTokens.add(new TokenData(typeName, currentLine, column, length));
+            }
+        }
+    }
+
+    @Override
+    public List<Token> parseTokens(Language language) throws ParsingException, IOException {
+        File file = File.createTempFile("testSource", language.suffixes()[0]);
+        FileUtils.write(file, String.join(System.lineSeparator(), sourceLines));
+        List<Token> tokens = language.parse(Collections.singleton(file));
+        TemporaryFileHolder.temporaryFiles.add(file);
+        return tokens;
+    }
+
+    @Override
+    public String[] getSourceLines() {
+        return this.sourceLines.toArray(new String[0]);
+    }
+
+    @Override
+    public String describeTestSource() {
+        return this.descriptor;
+    }
+
+    /**
+     * @return A list of the expected tokens for this test source
+     */
+    public List<TokenData> getExpectedTokens() {
+        return expectedTokens;
+    }
+
+    /**
+     * Information about a single token
+     * @param typeName The name of the token type
+     * @param lineNumber The line the token is in (1 based)
+     * @param columnNumber The column the token is in (1 based)
+     * @param length The length of the token
+     */
+    public record TokenData(String typeName, int lineNumber, int columnNumber, int length) {
+    }
+}
diff --git a/languages/java/src/test/java/de/jplag/java/JavaLanguageTest.java b/languages/java/src/test/java/de/jplag/java/JavaLanguageTest.java
@@ -73,6 +73,8 @@ protected void collectTestData(TestDataCollector collector) {
 
         collector.testFile("AnonymousVariables.java").testTokenSequence(J_CLASS_BEGIN, J_METHOD_BEGIN, J_VARDEF, J_IF_BEGIN, J_IF_END, J_METHOD_END,
                 J_CLASS_END);
+
+        collector.addTokenPositionTests("tokenPositions");
     }
 
     @Override

diff --git a/languages/java/src/test/resources/de/jplag/java/tokenPositions/VarDef_1.java b/languages/java/src/test/resources/de/jplag/java/tokenPositions/VarDef_1.java
@@ -0,0 +1,4 @@
+>class Test {
+>    int test;
+$    | J_VARDEF 8
+>}
diff --git a/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java b/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java
@@ -83,8 +83,8 @@ private void parseFile(File file) throws ParsingException {
     /**
      * Adds a new {@link Token} to the current token list.
      * @param type the type of the new {@link Token}
-     * @param line the line of the Token in the current file
-     * @param start the start column of the Token in the line
+     * @param line the lineNumber of the Token in the current file
+     * @param start the start column of the Token in the lineNumber
      * @param length the length of the Token
      */
     /* package-private */ void addToken(TokenType type, int line, int start, int length) {