Skip to content

Commit

Permalink
refactor(analyzer): Port Java's walkFileTree() to Kotlin's walk()
Browse files Browse the repository at this point in the history
This implicitly also improves the logging scope, as the `logger` is now
not anymore called on an anonymous `SimpleFileVisitor` instance.

Signed-off-by: Sebastian Schuberth <[email protected]>
  • Loading branch information
sschuberth committed Nov 13, 2023
1 parent ccabd1f commit 7eb2ffe
Showing 1 changed file with 39 additions and 53 deletions.
92 changes: 39 additions & 53 deletions analyzer/src/main/kotlin/PackageManager.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,7 @@ package org.ossreviewtoolkit.analyzer

import java.io.File
import java.nio.file.FileSystems
import java.nio.file.FileVisitResult
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.SimpleFileVisitor
import java.nio.file.attribute.BasicFileAttributes

import kotlin.io.path.invariantSeparatorsPathString
import kotlin.time.measureTime
Expand Down Expand Up @@ -115,63 +111,53 @@ abstract class PackageManager(

val result = mutableMapOf<PackageManagerFactory, MutableList<File>>()
val rootPath = directory.toPath()
val distinctPackageManagers = packageManagers.distinct()

Files.walkFileTree(
rootPath,
object : SimpleFileVisitor<Path>() {
override fun preVisitDirectory(dir: Path, attributes: BasicFileAttributes): FileVisitResult {
if (IGNORED_DIRECTORY_MATCHERS.any { it.matches(dir) }) {
logger.info {
"Not analyzing directory '$dir' as it is hard-coded to be ignored."
}
directory.walk().onEnter { dir ->
val dirAsPath = dir.toPath()

return FileVisitResult.SKIP_SUBTREE
}

if (excludes.isPathExcluded(rootPath, dir)) {
logger.info {
"Not analyzing directory '$dir' as it is excluded."
}

return FileVisitResult.SKIP_SUBTREE
}
when {
IGNORED_DIRECTORY_MATCHERS.any { it.matches(dirAsPath) } -> {
logger.info { "Not analyzing directory '$dir' as it is hard-coded to be ignored." }
false
}

val dirAsFile = dir.toFile()
excludes.isPathExcluded(rootPath, dirAsPath) -> {
logger.info { "Not analyzing directory '$dir' as it is excluded." }
false
}

// Note that although FileVisitOption.FOLLOW_LINKS is not set, this would still follow junctions
// on Windows, so do a better check here.
if (dirAsFile.isSymbolicLink()) {
logger.info { "Not following symbolic link to directory '$dir'." }
return FileVisitResult.SKIP_SUBTREE
}
dir.isSymbolicLink() -> {
logger.info { "Not following symbolic link to directory '$dir'." }
false
}

val filesInDir = dirAsFile.walk().maxDepth(1).filter {
it.isFile && !excludes.isPathExcluded(rootPath, it.toPath())
}.toList()

packageManagers.distinct().forEach { manager ->
// Create a list of lists of matching files per glob.
val matchesPerGlob = manager.matchersForDefinitionFiles.mapNotNull { glob ->
// Create a list of files in the current directory that match the current glob.
val filesMatchingGlob = filesInDir.filter { glob.matches(it.toPath()) }
filesMatchingGlob.takeIf { it.isNotEmpty() }
}

if (matchesPerGlob.isNotEmpty()) {
// Only consider all matches for the first glob that has matches. This is because globs
// are defined in order of priority, and multiple globs may just be alternative ways to
// detect the exact same project.
// That is, at the example of a PIP project, if a directory contains all three files
// "requirements-py2.txt", "requirements-py3.txt" and "setup.py", only consider the
// former two as they match the glob with the highest priority, but ignore "setup.py".
result.getOrPut(manager) { mutableListOf() } += matchesPerGlob.first()
}
}
else -> true
}
}.filter { it.isDirectory }.forEach { dir ->
val filesInCurrentDir = dir.walk().maxDepth(1).filter {
it.isFile && !excludes.isPathExcluded(rootPath, it.toPath())
}.toList()

distinctPackageManagers.forEach { manager ->
// Create a list of lists of matching files per glob.
val matchesPerGlob = manager.matchersForDefinitionFiles.mapNotNull { glob ->
// Create a list of files in the current directory that match the current glob.
val filesMatchingGlob = filesInCurrentDir.filter { glob.matches(it.toPath()) }
filesMatchingGlob.takeIf { it.isNotEmpty() }
}

return FileVisitResult.CONTINUE
if (matchesPerGlob.isNotEmpty()) {
// Only consider all matches for the first glob that has matches. This is because globs
// are defined in order of priority, and multiple globs may just be alternative ways to
// detect the exact same project.
// That is, at the example of a PIP project, if a directory contains all three files
// "requirements-py2.txt", "requirements-py3.txt" and "setup.py", only consider the
// former two as they match the glob with the highest priority, but ignore "setup.py".
result.getOrPut(manager) { mutableListOf() } += matchesPerGlob.first()
}
}
)
}

return result
}
Expand Down

0 comments on commit 7eb2ffe

Please sign in to comment.