diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/CsafLoader.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/CsafLoader.kt index e5dcc5f8..412f94ba 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/CsafLoader.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/CsafLoader.kt @@ -19,6 +19,7 @@ package io.github.csaf.sbom.retrieval import io.github.csaf.sbom.schema.generated.Aggregator import io.github.csaf.sbom.schema.generated.Csaf import io.github.csaf.sbom.schema.generated.Provider +import io.github.csaf.sbom.schema.generated.ROLIEFeed import io.ktor.client.* import io.ktor.client.call.* import io.ktor.client.engine.* @@ -96,6 +97,19 @@ class CsafLoader(engine: HttpClientEngine = Java.create()) { suspend fun fetchDocument(url: String, ctx: RetrievalContext): Result = Result.of { get(url, ctx.responseCallback()).also(ctx.jsonCallback()) } + /** + * Fetch and parse a ROLE feed from a given URL. + * + * @param url the URL where the ROLIE feed is found + * @param responseCallback An optional callback to further evaluate the [HttpResponse]. + * @return The resulting [ROLIEFeed], wrapped in a [Result] monad, if successful. A failed + * [Result] wrapping the thrown [Throwable] in case of an error. + */ + suspend fun fetchROLIEFeed( + url: String, + responseCallback: ((HttpResponse) -> Unit)? = null + ): Result = Result.of { get(url, responseCallback ?: {}) } + /** * Fetch an arbitrary URL's content as plain text [String], falling back to UTF-8 if no charset * is provided. diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/Main.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/Main.kt index 9c1198a4..5246597b 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/Main.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/Main.kt @@ -25,6 +25,7 @@ fun main(args: Array) { RetrievedProvider.from(args[0]) .onSuccess { provider -> println("Discovered provider-metadata.json @ ${provider.json.canonical_url}") + println("Expected documents: ${provider.countExpectedDocuments()}") // Retrieve all documents from all feeds. Note: we currently only support index.txt for (result in provider.fetchDocuments()) { result.onSuccess { doc -> diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt index 20044b8d..4918f778 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt @@ -21,14 +21,14 @@ import io.github.csaf.sbom.retrieval.roles.CSAFProviderRole import io.github.csaf.sbom.retrieval.roles.CSAFPublisherRole import io.github.csaf.sbom.retrieval.roles.CSAFTrustedProviderRole import io.github.csaf.sbom.schema.generated.Provider +import io.github.csaf.sbom.schema.generated.Provider.Feed +import io.github.csaf.sbom.schema.generated.ROLIEFeed import java.util.* import java.util.concurrent.CompletableFuture import java.util.stream.Stream import java.util.stream.StreamSupport import kotlinx.coroutines.* -import kotlinx.coroutines.channels.ReceiveChannel -import kotlinx.coroutines.channels.produce -import kotlinx.coroutines.channels.toList +import kotlinx.coroutines.channels.* import kotlinx.coroutines.future.future /** @@ -36,6 +36,7 @@ import kotlinx.coroutines.future.future * "trusted provider"), including its metadata (in the form of [Provider]) as well as functionality * to retrieve its documents. */ +@OptIn(ExperimentalCoroutinesApi::class) class RetrievedProvider(val json: Provider) : Validatable { /** @@ -51,21 +52,22 @@ class RetrievedProvider(val json: Provider) : Validatable { } /** - * This function fetches all directory indices referenced by this provider. + * This function fetches all directory indices referenced by this provider and sends them to a + * [ReceiveChannel]. * * @param loader The instance of [CsafLoader] used for fetching of online resources. * @param channelCapacity The capacity of the channels used to buffer parallel fetches. Defaults * to [DEFAULT_CHANNEL_CAPACITY]. - * @return The fetched [Result]s, representing index contents or fetch errors. + * @return A [ReceiveChannel] containing the fetched [Result]s, representing index contents or + * fetch errors. */ - @OptIn(ExperimentalCoroutinesApi::class) fun fetchDocumentIndices( loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY ): ReceiveChannel>> { - @Suppress("SimpleRedundantLet") val directoryUrls = (json.distributions ?: emptySet()).mapNotNull { distribution -> + @Suppress("SimpleRedundantLet") distribution.directory_url?.let { it.toString().trimEnd('/') } } // This channel collects up to `channelCapacity` directory indices concurrently. @@ -83,6 +85,37 @@ class RetrievedProvider(val json: Provider) : Validatable { } } + /** + * This function fetches all ROLIE feeds referenced by this provider and sends them to a + * [ReceiveChannel]. + * + * @param loader The instance of [CsafLoader] used for fetching of online resources. + * @param channelCapacity The capacity of the channels used to buffer parallel fetches. Defaults + * to [DEFAULT_CHANNEL_CAPACITY]. + * @return A [ReceiveChannel] containing the fetched [Result]s, representing ROLIE feeds' + * contents (as [ROLIEFeed]) or fetch errors. + */ + fun fetchRolieFeeds( + loader: CsafLoader = lazyLoader, + channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY + ): ReceiveChannel>> { + val feeds = json.distributions?.mapNotNull { it.rolie }?.flatMap { it.feeds } ?: listOf() + + // This channel collects up to `channelCapacity` feeds concurrently. + val rolieChannel = + ioScope.produce(capacity = channelCapacity) { + for (feed in feeds) { + send(feed to async { loader.fetchROLIEFeed(feed.url.toString()) }) + } + } + // This terminal channel is a simple "rendezvous channel" for awaiting the Results. + return ioScope.produce { + for ((feed, feedDeferred) in rolieChannel) { + send(feed to feedDeferred.await()) + } + } + } + /** * This function sums up the expected number of [RetrievedDocument]s that will be fetched from * this Provider. @@ -92,22 +125,11 @@ class RetrievedProvider(val json: Provider) : Validatable { * to [DEFAULT_CHANNEL_CAPACITY]. * @return The expected number of [RetrievedDocument]s provided. */ - @OptIn(ExperimentalCoroutinesApi::class) suspend fun countExpectedDocuments( loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY ): Int { - val indexChannel = fetchDocumentIndices(loader, channelCapacity) - // This second channel collects up to `channelCapacity` Results concurrently, which - // represent CSAF Documents or errors from fetching or validation. - val documentCountChannel = - ioScope.produce(capacity = channelCapacity) { - for ((_, indexResult) in indexChannel) { - indexResult.onSuccess { send(it.lines().size) } - } - } - // This terminal channel is a simple "rendezvous channel" for awaiting the Results. - return documentCountChannel.toList().sum() + return fetchAllDocumentUrls(loader, channelCapacity).toList().filter { it.isSuccess }.size } /** @@ -123,35 +145,15 @@ class RetrievedProvider(val json: Provider) : Validatable { loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY ): ReceiveChannel> { - val indexChannel = fetchDocumentIndices(loader, channelCapacity) + val documentUrlChannel = fetchAllDocumentUrls(loader, channelCapacity) // This second channel collects up to `channelCapacity` Results concurrently, which // represent CSAF Documents or errors from fetching or validation. val documentJobChannel = ioScope.produce>>(capacity = channelCapacity) { - for ((directoryUrl, indexResult) in indexChannel) { - indexResult.fold( - { index -> - index.lines().map { line -> - send( - async { - val csafUrl = "$directoryUrl/$line" - RetrievedDocument.from(csafUrl, loader, role) - } - ) - } - }, - { e -> - send( - async { - Result.failure( - Exception( - "Failed to fetch index.txt from directory at $directoryUrl", - e - ) - ) - } - ) - } + for (result in documentUrlChannel) { + result.fold( + { send(async { RetrievedDocument.from(it, loader, role) }) }, + { send(async { Result.failure(it) }) } ) } } @@ -163,6 +165,93 @@ class RetrievedProvider(val json: Provider) : Validatable { } } + /** + * Returns a channel that produces all URLs from ROLIE feeds and directory indices without + * duplicates. + * + * @param loader The instance of [CsafLoader] used for fetching of online resources. + * @param channelCapacity The capacity of the channels used to buffer parallel fetches. Defaults + * to [DEFAULT_CHANNEL_CAPACITY]. + * @return + */ + @OptIn(ExperimentalCoroutinesApi::class) + fun fetchAllDocumentUrls( + loader: CsafLoader = lazyLoader, + channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY + ): ReceiveChannel> { + val urlResultChannel = + ioScope.produce(capacity = channelCapacity) { + fetchDocumentUrlsFromIndices(fetchDocumentIndices(loader, channelCapacity)) + fetchDocumentUrlsFromRolieFeeds(fetchRolieFeeds(loader, channelCapacity)) + } + return ioScope.produce { + val seenUrls = mutableSetOf() + for (urlResult in urlResultChannel) { + urlResult.fold( + { + if (seenUrls.add(it)) { + send(Result.success(it)) + } + }, + { send(Result.failure(it)) } + ) + } + } + } + + /** + * Sends the URLs obtained from the [indexChannel] to the given [ProducerScope]. + * + * @param indexChannel The source channel providing directory index data. + * @receiver urlChannel The target channel where URLs are sent to. + */ + private suspend fun SendChannel>.fetchDocumentUrlsFromIndices( + indexChannel: ReceiveChannel>> + ) { + for ((directoryUrl, indexResult) in indexChannel) { + indexResult.fold( + { index -> + index.lines().map { line -> send(Result.success("$directoryUrl/$line")) } + }, + { e -> + send( + Result.failure( + Exception( + "Failed to fetch index.txt from directory at $directoryUrl", + e + ) + ) + ) + } + ) + } + } + + /** + * Sends the URLs obtained from the [rolieChannel] feeds to the given [ProducerScope]. + * + * @param rolieChannel The source channel providing ROLIE feed data. + * @receiver urlChannel The target channel where URLs are sent to. + */ + private suspend fun SendChannel>.fetchDocumentUrlsFromRolieFeeds( + rolieChannel: ReceiveChannel>> + ) { + for ((feed, rolieResult) in rolieChannel) { + rolieResult.fold( + { rolie -> + rolie.feed.entry.map { entry -> + send(Result.success(entry.content.src.toString())) + } + }, + { e -> + send( + Result.failure(Exception("Failed to fetch ROLIE feed from ${feed.url}", e)) + ) + } + ) + } + } + /** * This method provides the [Result]s of [fetchDocuments] as a Java [Stream] for usage in * non-Kotlin environments. diff --git a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/CsafLoaderTest.kt b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/CsafLoaderTest.kt index 08581758..b8ed85bf 100644 --- a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/CsafLoaderTest.kt +++ b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/CsafLoaderTest.kt @@ -102,4 +102,13 @@ class CsafLoaderTest { } assertFalse { result.isSuccess } } + + @Test + fun testFetchROLIEFeed() = runTest { + val result = + loader.fetchROLIEFeed("does-not-really-exist.json") { + assertSame(HttpStatusCode.NotFound, it.status) + } + assertFalse { result.isSuccess } + } } diff --git a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt index 13519f03..5d4b002f 100644 --- a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt +++ b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt @@ -20,13 +20,14 @@ import io.github.csaf.sbom.validation.ValidationException import kotlin.test.* import kotlinx.coroutines.channels.toList import kotlinx.coroutines.test.runTest +import org.junit.jupiter.api.assertThrows class RetrievedProviderTest { init { CsafLoader.defaultLoaderFactory = { CsafLoader(mockEngine()) } } - @Test fun testRetrievedProviderFrom() = runTest { providerTest("example.com") } + @Test fun testRetrievedProviderFrom() = runTest { providerTest("example.com", 4) } @Test fun testRetrievedProviderFromSecurityTxt() = runTest { @@ -70,16 +71,20 @@ class RetrievedProviderTest { assertFalse(documentIndexResults[1].second.isSuccess) } - private suspend fun providerTest(domain: String) { + @Test + fun testFetchRolieFeeds() = runTest { + val provider = RetrievedProvider.from("example.com").getOrThrow() + val rolieFeedsResults = provider.fetchRolieFeeds().toList() + assertEquals(1, rolieFeedsResults.size, "Expected exactly 1 result: One parsed ROLIE feed") + assertTrue(rolieFeedsResults[0].second.isSuccess) + } + + private suspend fun providerTest(domain: String, numResults: Int = 5) { val provider = RetrievedProvider.from(domain).getOrThrow() val expectedDocumentCount = provider.countExpectedDocuments() assertEquals(3, expectedDocumentCount, "Expected 3 documents") val documentResults = provider.fetchDocuments().toList() - assertEquals( - 4, - documentResults.size, - "Expected exactly 4 results: One document, two document errors, one index error" - ) + assertEquals(numResults, documentResults.size, "Expected exactly $numResults results") // Check some random property on successful document assertEquals( "Bundesamt für Sicherheit in der Informationstechnik", @@ -106,4 +111,28 @@ class RetrievedProviderTest { documentResults[3].exceptionOrNull()?.message ) } + + @Test + fun testFetchAllDocumentUrls() = runTest { + val provider = RetrievedProvider.from("example.com").getOrThrow() + val urlResults = provider.fetchAllDocumentUrls().toList() + + assertEquals(4, urlResults.size, "Expected exactly 4 results") + assertEquals( + "https://example.com/directory/2022/bsi-2022-0001.json", + urlResults[0].getOrThrow() + ) + assertEquals( + "https://example.com/directory/2022/bsi-2022_2-01.json", + urlResults[1].getOrThrow() + ) + assertEquals( + "https://example.com/directory/2024/does-not-exist.json", + urlResults[2].getOrThrow() + ) + assertEquals( + "Failed to fetch index.txt from directory at https://example.com/invalid-directory", + (assertThrows { urlResults[3].getOrThrow() }).message + ) + } } diff --git a/csaf-retrieval/src/test/resources/example.com/.well-known/csaf/feed-tlp-white.json b/csaf-retrieval/src/test/resources/example.com/.well-known/csaf/feed-tlp-white.json new file mode 100644 index 00000000..51a9d159 --- /dev/null +++ b/csaf-retrieval/src/test/resources/example.com/.well-known/csaf/feed-tlp-white.json @@ -0,0 +1,52 @@ +{ + "feed": { + "id": "example-csaf-feed-tlp-white", + "title": "Example CSAF feed (TLP:WHITE)", + "link": [ + { + "rel": "self", + "href": "https://example.com/.well-known/csaf/feed-tlp-white.json" + } + ], + "category": [ + { + "scheme": "urn:ietf:params:rolie:category:information-type", + "term": "csaf" + } + ], + "updated": "2021-01-01T12:00:00.000Z", + "entry": [ + { + "id": "2020-ESA-001", + "title": "Example Security Advisory 001", + "link": [ + { + "rel": "self", + "href": "https://example.com/directory/2022/bsi-2022-0001.json" + }, + { + "rel": "hash", + "href": "https://example.com/directory/2022/bsi-2022-0001.json.sha512" + }, + { + "rel": "signature", + "href": "https://example.com/directory/2022/bsi-2022-0001.json.asc" + } + ], + "published": "2021-01-01T11:00:00.000Z", + "updated": "2021-01-01T12:00:00.000Z", + "summary": { + "content": "Vulnerabilities fixed in ABC 0.0.1" + }, + "content": { + "type": "application/json", + "src": "https://example.com/directory/2022/bsi-2022-0001.json" + }, + "format": { + "schema": "https://docs.oasis-open.org/csaf/csaf/v2.0/csaf_json_schema.json", + "version": "2.0" + } + } + ] + } +} \ No newline at end of file diff --git a/csaf-schema/src/main/resources/schema/ROLIE_feed_json_schema.json b/csaf-schema/src/main/resources/schema/ROLIE_feed_json_schema.json new file mode 100644 index 00000000..8afe6460 --- /dev/null +++ b/csaf-schema/src/main/resources/schema/ROLIE_feed_json_schema.json @@ -0,0 +1,291 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/oasis-tcs/csaf/master/csaf_2.0/json_schema/ROLIE_feed_json_schema.json", + "title": "ROLIE Feed auxiliary Schema", + "description": "Representation of CSAF ROLIE feed as a JSON document.", + "$defs": { + "json_link_t": { + "title": "JSON Link", + "description": "Contains the URL of the JSON file.", + "type": "string", + "format": "uri", + "pattern": "^https://.+\\.json$" + }, + "link_t": { + "title": "List of Links", + "description": "Contains a list of links related to the current context.", + "type": "array", + "prefixItems": [ + { + "title": "Link", + "description": "Specifies the JSON link.", + "type": "object", + "required": [ + "rel", + "href" + ], + "properties": { + "href": { + "title": "Hyper reference", + "description": "Contains the URL of the JSON file.", + "$ref": "#/$defs/json_link_t" + }, + "rel": { + "title": "Relationship", + "description": "Contains the relationship value of the link.", + "type": "string", + "enum": [ + "self" + ] + } + } + } + ], + "minItems": 1, + "uniqueItems": true, + "items": { + "title": "Link", + "description": "Specifies a single link.", + "type": "object", + "required": [ + "rel", + "href" + ], + "properties": { + "href": { + "title": "Hyper reference", + "description": "Contains the URL of the link.", + "type": "string", + "format": "uri" + }, + "rel": { + "title": "Relationship", + "description": "Contains the relationship value of the link.", + "type": "string", + "minLength": 1 + } + } + } + } + }, + "type": "object", + "required": [ + "feed" + ], + "properties": { + "feed": { + "title": "CSAF ROLIE feed", + "description": "Contains all information of the feed.", + "type": "object", + "required": [ + "id", + "title", + "link", + "category", + "updated", + "entry" + ], + "properties": { + "id": { + "title": "ID", + "description": "Contains a unique identifier for this ROLIE feed.", + "type": "string", + "pattern": "^[a-zA-Z0-9+\\-_\\.]+$", + "minLength": 1 + }, + "title": { + "title": "Feed title", + "description": "Contains the title for this ROLIE feed.", + "type": "string", + "minLength": 1 + }, + "link": { + "title": "List of Links", + "description": "Contains a list of links related to this feed.", + "$ref": "#/$defs/link_t" + }, + "category": { + "title": "List of Categories", + "description": "Contains a list of categories related to this feed.", + "type": "array", + "prefixItems": [ + { + "title": "CSAF ROLIE category", + "description": "Contains the required ROLIE category value.", + "type": "object", + "required": [ + "scheme", + "term" + ], + "properties": { + "scheme": { + "title": "Scheme", + "description": "Contains the URI of the scheme to use.", + "type": "string", + "enum": [ + "urn:ietf:params:rolie:category:information-type" + ] + }, + "term": { + "title": "Term", + "description": "Contains the term that is valid in the context of the scheme.", + "type": "string", + "enum": [ + "csaf" + ] + } + } + } + ], + "minItems": 1, + "uniqueItems": true, + "items": { + "title": "Category", + "description": "Specifies a single category.", + "type": "object", + "required": [ + "scheme", + "term" + ], + "properties": { + "scheme": { + "title": "Scheme", + "description": "Contains the URI of the scheme to use.", + "type": "string", + "format": "uri" + }, + "term": { + "title": "Term", + "description": "Contains the term that is valid in the context of the scheme.", + "type": "string", + "minLength": 1 + } + } + } + }, + "updated": { + "title": "Updated", + "description": "Contains the date and time this feed was updated the last time.", + "type": "string", + "format": "date-time" + }, + "entry": { + "title": "List of Entries", + "description": "Contains a list of feed entries.", + "type": "array", + "uniqueItems": true, + "items": { + "title": "Entry", + "description": "Contains all information for a single feed entry.", + "type": "object", + "required": [ + "id", + "title", + "link", + "published", + "updated", + "content", + "format" + ], + "properties": { + "id": { + "title": "ID", + "description": "Contains the document tracking ID of the CSAF document.", + "type": "string", + "pattern": "^[\\S](.*[\\S])?$", + "minLength": 1 + }, + "title": { + "title": "Title", + "description": "Contains the document title of the CSAF document.", + "type": "string", + "minLength": 1 + }, + "link": { + "title": "List of Links", + "description": "Contains a list of links related to this entry.", + "$ref": "#/$defs/link_t" + }, + "published": { + "title": "Published", + "description": "Contains the date and time this entry was initially added to the feed.", + "type": "string", + "format": "date-time" + }, + "updated": { + "title": "Updated", + "description": "Contains the date and time this entry was the last time updated in the feed.", + "type": "string", + "format": "date-time" + }, + "summary": { + "title": "Summary", + "description": "Contains the summary of the CSAF document.", + "type": "object", + "properties": { + "content": { + "title": "Content", + "description": "Contains the actual text of the summary.", + "type": "string", + "minLength": 1 + } + } + }, + "content": { + "title": "Content of the entry", + "description": "Contains information about the content.", + "type": "object", + "required": [ + "type", + "src" + ], + "properties": { + "src": { + "title": "Source Code", + "description": "Contains a link to the source code of the file", + "$ref": "#/$defs/json_link_t" + }, + "type": { + "title": "MIME type", + "description": "Contains the MIME type of the content.", + "type": "string", + "enum": [ + "application/json" + ] + } + } + }, + "format": { + "title": "Format", + "description": "Contains information about the format of the entry.", + "type": "object", + "required": [ + "schema", + "version" + ], + "properties": { + "schema": { + "title": "Schema of the entry", + "description": "Contains the schema the CSAF document is valid against.", + "type": "string", + "enum": [ + "https://docs.oasis-open.org/csaf/csaf/v2.0/csaf_json_schema.json" + ] + }, + "version": { + "title": "CSAF Version", + "description": "Contains the CSAF version the document was written in.", + "type": "string", + "enum": [ + "2.0" + ] + } + } + } + } + } + } + } + } + } +} \ No newline at end of file