From 77751a6604cee7c0897fb422e5641420143649a7 Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Thu, 10 Aug 2023 09:44:47 +0200 Subject: [PATCH] Configuration --- build.sbt | 3 + http4s/src/main/resources/reference.conf | 82 +++++ .../CollectorApp.scala | 35 +- .../CollectorService.scala | 47 ++- .../Config.scala | 304 ++++++++++++++++++ .../model.scala | 24 +- .../CollectorServiceSpec.scala | 63 ++-- .../TestUtils.scala | 89 ++++- project/Dependencies.scala | 27 +- .../PrintingSink.scala | 28 +- .../StdoutCollector.scala | 53 +-- .../sinks/PrintingSinkSpec.scala | 26 +- 12 files changed, 593 insertions(+), 188 deletions(-) create mode 100644 http4s/src/main/resources/reference.conf create mode 100644 http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/Config.scala diff --git a/build.sbt b/build.sbt index 78112e2bf..23ea94c6c 100644 --- a/build.sbt +++ b/build.sbt @@ -134,6 +134,9 @@ lazy val http4s = project Dependencies.Libraries.badRows, Dependencies.Libraries.collectorPayload, Dependencies.Libraries.slf4j, + Dependencies.Libraries.decline, + Dependencies.Libraries.circeGeneric, + Dependencies.Libraries.circeConfig, Dependencies.Libraries.specs2 ) ) diff --git a/http4s/src/main/resources/reference.conf b/http4s/src/main/resources/reference.conf new file mode 100644 index 000000000..32db682a7 --- /dev/null +++ b/http4s/src/main/resources/reference.conf @@ -0,0 +1,82 @@ +collector { + paths {} + + p3p { + policyRef = "/w3c/p3p.xml" + CP = "NOI DSP COR NID PSA OUR IND COM NAV STA" + } + + crossDomain { + enabled = false + domains = [ "*" ] + secure = true + } + + cookie { + enabled = true + expiration = 365 days + name = sp + secure = true + httpOnly = true + sameSite = "None" + } + + doNotTrackCookie { + enabled = false + name = "" + value = "" + } + + cookieBounce { + enabled = false + name = "n3pc" + fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" + } + + redirectMacro { + enabled = false + } + + rootResponse { + enabled = false + statusCode = 302 + headers = {} + body = "" + } + + cors { + accessControlMaxAge = 60 minutes + } + + streams { + useIpAddressAsPartitionKey = false + + buffer { + byteLimit = 3145728 + recordLimit = 500 + timeLimit = 5000 + } + } + + monitoring { + metrics { + statsd { + enabled = false + hostname = localhost + port = 8125 + period = 10 seconds + prefix = snowplow.collector + } + } + } + + ssl { + enable = false + redirect = false + port = 443 + } + + enableDefaultRedirect = false + + redirectDomains = [] +} \ No newline at end of file diff --git a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorApp.scala b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorApp.scala index 82074116d..a4f9f2292 100644 --- a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorApp.scala +++ b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorApp.scala @@ -1,20 +1,26 @@ package com.snowplowanalytics.snowplow.collectors.scalastream +import java.net.InetSocketAddress + +import org.typelevel.log4cats.Logger +import org.typelevel.log4cats.slf4j.Slf4jLogger + +import scala.concurrent.duration.{DurationLong, FiniteDuration} + import cats.implicits._ + import cats.effect.{Async, ExitCode, Sync} import cats.effect.kernel.Resource + import fs2.io.net.Network + import com.comcast.ip4s.IpLiteralSyntax + import org.http4s.HttpApp import org.http4s.server.Server import org.http4s.ember.server.EmberServerBuilder import org.http4s.blaze.server.BlazeServerBuilder import org.http4s.netty.server.NettyServerBuilder -import org.typelevel.log4cats.Logger -import org.typelevel.log4cats.slf4j.Slf4jLogger - -import java.net.InetSocketAddress -import scala.concurrent.duration.{DurationLong, FiniteDuration} import com.snowplowanalytics.snowplow.collectors.scalastream.model._ @@ -24,18 +30,21 @@ object CollectorApp { Slf4jLogger.getLogger[F] def run[F[_]: Async]( - mkGood: Resource[F, Sink[F]], - mkBad: Resource[F, Sink[F]], - config: CollectorConfig, + args: List[String], appName: String, - appVersion: String + appVersion: String, + mkSinks: Config.Streams => Resource[F, Sinks[F]] ): F[ExitCode] = { val resources = for { - bad <- mkBad - good <- mkGood + config <- Resource.eval(Config.parse(args)) + sinks <- mkSinks(config.streams) _ <- withGracefulShutdown(610.seconds) { - val sinks = CollectorSinks(good, bad) - val collectorService: CollectorService[F] = new CollectorService[F](config, sinks, appName, appVersion) + val collectorService: CollectorService[F] = new CollectorService[F]( + config, + Sinks(sinks.good, sinks.bad), + appName, + appVersion + ) buildHttpServer[F](new CollectorRoutes[F](collectorService).value) } } yield () diff --git a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorService.scala b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorService.scala index 75cddc2e9..a61ac7e0b 100644 --- a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorService.scala +++ b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorService.scala @@ -39,8 +39,8 @@ trait Service[F[_]] { } class CollectorService[F[_]: Sync]( - config: CollectorConfig, - sinks: CollectorSinks[F], + config: Config, + sinks: Sinks[F], appName: String, appVersion: String ) extends Service[F] { @@ -68,8 +68,7 @@ class CollectorService[F[_]: Sync]( for { body <- body hostname <- hostname - // TODO: Get ipAsPartitionKey from config - (ipAddress, partitionKey) = ipAndPartitionKey(ip, ipAsPartitionKey = false) + (ipAddress, partitionKey) = ipAndPartitionKey(ip, config.streams.useIpAddressAsPartitionKey) // TODO: nuid should be set properly nuid = UUID.randomUUID().toString event = buildEvent( @@ -87,7 +86,7 @@ class CollectorService[F[_]: Sync]( now <- Clock[F].realTime setCookie = cookieHeader( headers = request.headers, - cookieConfig = config.cookieConfig, + cookieConfig = config.cookie, networkUserId = nuid, doNotTrack = doNotTrack, spAnonymous = spAnonymous, @@ -176,32 +175,28 @@ class CollectorService[F[_]: Sync]( */ def cookieHeader( headers: Headers, - cookieConfig: Option[CookieConfig], + cookieConfig: Config.Cookie, networkUserId: String, doNotTrack: Boolean, spAnonymous: Option[String], now: FiniteDuration ): Option[`Set-Cookie`] = - if (doNotTrack) { - None - } else { - spAnonymous match { - case Some(_) => None - case None => - cookieConfig.map { config => - val responseCookie = ResponseCookie( - name = config.name, - content = networkUserId, - expires = Some(HttpDate.unsafeFromEpochSecond((now + config.expiration).toSeconds)), - domain = cookieDomain(headers, config.domains, config.fallbackDomain), - path = Some("/"), - sameSite = config.sameSite, - secure = config.secure, - httpOnly = config.httpOnly - ) - `Set-Cookie`(responseCookie) - } - } + (doNotTrack, cookieConfig.enabled, spAnonymous) match { + case (true, _, _) => None + case (_, false, _) => None + case (_, _, Some(_)) => None + case _ => + val responseCookie = ResponseCookie( + name = cookieConfig.name, + content = networkUserId, + expires = Some(HttpDate.unsafeFromEpochSecond((now + cookieConfig.expiration).toSeconds)), + domain = cookieDomain(headers, cookieConfig.domains, cookieConfig.fallbackDomain), + path = Some("/"), + sameSite = cookieConfig.sameSite, + secure = cookieConfig.secure, + httpOnly = cookieConfig.httpOnly + ) + Some(`Set-Cookie`(responseCookie)) } /** diff --git a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/Config.scala b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/Config.scala new file mode 100644 index 000000000..ae89f23b9 --- /dev/null +++ b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/Config.scala @@ -0,0 +1,304 @@ +package com.snowplowanalytics.snowplow.collectors.scalastream + +import java.nio.file.{Files, Path} + +import scala.concurrent.duration.FiniteDuration +import scala.collection.JavaConverters._ + +import com.typesafe.config.ConfigFactory + +import io.circe.config.syntax._ +import io.circe.generic.semiauto._ +import io.circe.Decoder +import io.circe._ + +import cats.implicits._ + +import cats.effect.Sync + +import org.http4s.SameSite + +import com.monovore.decline.{Command, Opts} + +object Config { + + def parse[F[_]: Sync]( + args: List[String] + ): F[Config] = { + val appConfig = Opts.option[Path]("config", "Path to HOCON configuration", "c", "config.hocon") + val parser = Command[Path]("collector", "Snowplow application that collects tracking events")(appConfig) + for { + path <- parser.parse(args) match { + case Left(help) => + Sync[F].raiseError(new IllegalArgumentException(s"can't read CLI arguments. $help")) + case Right(path) => + Sync[F].pure(path) + } + config <- parseConfigFile(path) + } yield config + } + + private def parseConfigFile[F[_]: Sync](path: Path): F[Config] = + Either.catchNonFatal(Files.readAllLines(path).asScala.mkString("\n")) match { + case Right(raw) => + val config = ConfigFactory + .parseString(raw) + .resolve() + .withFallback(ConfigFactory.load("application.conf")) + .withFallback(ConfigFactory.load("reference.conf")) + config.as[CollectorConfig] match { + case Right(parsed) => + Sync[F].pure(parsed.collector) + case Left(err) => + Sync[F].raiseError(new IllegalArgumentException(s"can't parse config file $path. Error: $err")) + } + case Left(err) => + Sync[F].raiseError(new IllegalArgumentException(s"can't read content of file $path. Error: $err")) + } + + final case class CollectorConfig( + collector: Config + ) + final case class P3P( + policyRef: String, + CP: String + ) + final case class CrossDomain( + enabled: Boolean, + domains: List[String], + secure: Boolean + ) + final case class Cookie( + enabled: Boolean, + name: String, + expiration: FiniteDuration, + domains: List[String], + fallbackDomain: Option[String], + secure: Boolean, + httpOnly: Boolean, + sameSite: Option[SameSite] + ) + final case class DoNotTrackCookie( + enabled: Boolean, + name: String, + value: String + ) + final case class CookieBounce( + enabled: Boolean, + name: String, + fallbackNetworkUserId: String, + forwardedProtocolHeader: Option[String] + ) + final case class RedirectMacro( + enabled: Boolean, + placeholder: Option[String] + ) + final case class RootResponse( + enabled: Boolean, + statusCode: Int, + headers: Map[String, String], + body: String + ) + final case class CORS( + accessControlMaxAge: FiniteDuration + ) + final case class Streams( + good: String, + bad: String, + useIpAddressAsPartitionKey: Boolean, + sink: Sink, + buffer: Buffer + ) + sealed trait Sink { + val maxBytes: Int + } + object Sink { + final case class Kinesis( + maxBytes: Int, + region: String, + threadPoolSize: Int, + aws: AWSCreds, + backoffPolicy: BackoffPolicy, + customEndpoint: Option[String], + sqsGoodBuffer: Option[String], + sqsBadBuffer: Option[String], + sqsMaxBytes: Int, + startupCheckInterval: FiniteDuration + ) extends Sink { + val endpoint = customEndpoint.getOrElse(region match { + case cn @ "cn-north-1" => s"https://kinesis.$cn.amazonaws.com.cn" + case cn @ "cn-northwest-1" => s"https://kinesis.$cn.amazonaws.com.cn" + case _ => s"https://kinesis.$region.amazonaws.com" + }) + } + final case class Sqs( + maxBytes: Int, + region: String, + threadPoolSize: Int, + aws: AWSCreds, + backoffPolicy: BackoffPolicy, + startupCheckInterval: FiniteDuration + ) extends Sink + final case class PubSub( + maxBytes: Int, + googleProjectId: String, + backoffPolicy: PubSubBackoffPolicy, + startupCheckInterval: FiniteDuration, + retryInterval: FiniteDuration + ) extends Sink + final case class Kafka( + maxBytes: Int, + brokers: String, + retries: Int, + producerConf: Option[Map[String, String]] + ) extends Sink + final case class Nsq( + maxBytes: Int, + host: String, + port: Int + ) extends Sink + final case class Rabbitmq( + maxBytes: Int, + username: String, + password: String, + virtualHost: String, + host: String, + port: Int, + backoffPolicy: RabbitMQBackoffPolicy, + routingKeyGood: String, + routingKeyBad: String, + threadPoolSize: Option[Int] + ) extends Sink + final case class Stdout( + maxBytes: Int + ) extends Sink + final case class AWSCreds( + accessKey: String, + secretKey: String + ) + final case class BackoffPolicy( + minBackoff: Long, + maxBackoff: Long, + maxRetries: Int + ) + final case class PubSubBackoffPolicy( + minBackoff: Long, + maxBackoff: Long, + totalBackoff: Long, + multiplier: Double, + initialRpcTimeout: Long, + maxRpcTimeout: Long, + rpcTimeoutMultiplier: Double + ) + final case class RabbitMQBackoffPolicy( + minBackoff: Long, + maxBackoff: Long, + multiplier: Double + ) + } + final case class Buffer( + byteLimit: Long, + recordLimit: Long, + timeLimit: Long + ) + final case class Monitoring( + metrics: Metrics + ) + final case class Metrics( + statsd: Statsd + ) + final case class Statsd( + enabled: Boolean, + hostname: String, + port: Int, + period: FiniteDuration, + prefix: String + ) + final case class SSL( + enable: Boolean, + redirect: Boolean, + port: Int + ) + implicit val p3pDecoder: Decoder[P3P] = deriveDecoder[P3P] + implicit val crossDomainDecoder: Decoder[CrossDomain] = deriveDecoder[CrossDomain] + implicit val sameSiteDecoder: Decoder[SameSite] = Decoder.instance { cur => + cur.as[String].map(_.toLowerCase) match { + case Right("none") => Right(SameSite.None) + case Right("strict") => Right(SameSite.Strict) + case Right("lax") => Right(SameSite.Lax) + case Right(other) => + Left(DecodingFailure(s"sameSite $other is not supported. Accepted values: None, Strict, Lax", cur.history)) + case Left(err) => Left(err) + } + } + implicit val cookieDecoder: Decoder[Cookie] = deriveDecoder[Cookie] + implicit val doNotTrackCookieDecoder: Decoder[DoNotTrackCookie] = deriveDecoder[DoNotTrackCookie] + implicit val cookieBounceDecoder: Decoder[CookieBounce] = deriveDecoder[CookieBounce] + implicit val redirectMacroDecoder: Decoder[RedirectMacro] = deriveDecoder[RedirectMacro] + implicit val rootResponseDecoder: Decoder[RootResponse] = deriveDecoder[RootResponse] + implicit val corsDecoder: Decoder[CORS] = deriveDecoder[CORS] + implicit val bufferDecoder: Decoder[Buffer] = deriveDecoder[Buffer] + implicit val awsCredsDecoder: Decoder[Sink.AWSCreds] = deriveDecoder[Sink.AWSCreds] + implicit val backoffPolicyDecoder: Decoder[Sink.BackoffPolicy] = deriveDecoder[Sink.BackoffPolicy] + implicit val kinesisDecoder: Decoder[Sink.Kinesis] = deriveDecoder[Sink.Kinesis] + implicit val sqsDecoder: Decoder[Sink.Sqs] = deriveDecoder[Sink.Sqs] + implicit val pubSubBackoffPolicyDecoder: Decoder[Sink.PubSubBackoffPolicy] = deriveDecoder[Sink.PubSubBackoffPolicy] + implicit val pubSubDecoder: Decoder[Sink.PubSub] = deriveDecoder[Sink.PubSub] + implicit val kafkaDecoder: Decoder[Sink.Kafka] = deriveDecoder[Sink.Kafka] + implicit val nsqDecoder: Decoder[Sink.Nsq] = deriveDecoder[Sink.Nsq] + implicit val rabbitMQBackoffPolicyDecoder: Decoder[Sink.RabbitMQBackoffPolicy] = + deriveDecoder[Sink.RabbitMQBackoffPolicy] + implicit val rabbitMQDecoder: Decoder[Sink.Rabbitmq] = deriveDecoder[Sink.Rabbitmq] + implicit val stdoutDecoder: Decoder[Sink.Stdout] = deriveDecoder[Sink.Stdout] + implicit val sinkDecoder: Decoder[Sink] = Decoder.instance { cur => + val sinkType = cur.downField("enabled") + sinkType.as[String].map(_.toLowerCase) match { + case Right("kinesis") => + cur.as[Sink.Kinesis] + case Right("sqs") => + cur.as[Sink.Sqs] + case Right("google-pub-sub") => + cur.as[Sink.PubSub] + case Right("rabbitmq") => + cur.as[Sink.Rabbitmq] + case Right("nsq") => + cur.as[Sink.Nsq] + case Right("kafka") => + cur.as[Sink.Stdout] + case Right("stdout") => + cur.as[Sink.Stdout] + case Right(other) => + Left(DecodingFailure(s"Enabled sink type $other is not supported", sinkType.history)) + case Left(DecodingFailure(_, List(CursorOp.DownField("enabled")))) => + Left(DecodingFailure("Cannot find 'enabled' string in sink configuration", sinkType.history)) + case Left(other) => + Left(other) + } + } + implicit val streamsDecoder: Decoder[Streams] = deriveDecoder[Streams] + implicit val statsdDecoder: Decoder[Statsd] = deriveDecoder[Statsd] + implicit val metricsDecoder: Decoder[Metrics] = deriveDecoder[Metrics] + implicit val monitoringDecoder: Decoder[Monitoring] = deriveDecoder[Monitoring] + implicit val sslDecoder: Decoder[SSL] = deriveDecoder[SSL] + implicit val configDecoder: Decoder[Config] = deriveDecoder[Config] + implicit val collectorConfigDecoder: Decoder[CollectorConfig] = deriveDecoder[CollectorConfig] +} + +final case class Config( + interface: String, + port: Int, + paths: Map[String, String], + p3p: Config.P3P, + crossDomain: Config.CrossDomain, + cookie: Config.Cookie, + doNotTrackCookie: Config.DoNotTrackCookie, + cookieBounce: Config.CookieBounce, + redirectMacro: Config.RedirectMacro, + rootResponse: Config.RootResponse, + cors: Config.CORS, + streams: Config.Streams, + monitoring: Config.Monitoring, + ssl: Config.SSL, + enableDefaultRedirect: Boolean, + redirectDomains: Set[String] +) diff --git a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/model.scala b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/model.scala index 18c0b4563..66ab01fbe 100644 --- a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/model.scala +++ b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/model.scala @@ -1,9 +1,5 @@ package com.snowplowanalytics.snowplow.collectors.scalastream -import scala.concurrent.duration._ - -import org.http4s.SameSite - import io.circe.Json object model { @@ -12,7 +8,7 @@ object model { * Case class for holding both good and * bad sinks for the Stream Collector. */ - final case class CollectorSinks[F[_]](good: Sink[F], bad: Sink[F]) + final case class Sinks[F[_]](good: Sink[F], bad: Sink[F]) /** * Case class for holding the results of @@ -30,22 +26,4 @@ object model { * @param failedBigEvents List of events that were too large */ final case class SplitBatchResult(goodBatches: List[List[Json]], failedBigEvents: List[Json]) - - final case class CookieConfig( - enabled: Boolean, - name: String, - expiration: FiniteDuration, - domains: List[String], - fallbackDomain: Option[String], - secure: Boolean, - httpOnly: Boolean, - sameSite: Option[SameSite] - ) - - final case class CollectorConfig( - paths: Map[String, String], - cookie: CookieConfig - ) { - val cookieConfig = if (cookie.enabled) Some(cookie) else None - } } diff --git a/http4s/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorServiceSpec.scala b/http4s/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorServiceSpec.scala index 08720df71..58b64852b 100644 --- a/http4s/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorServiceSpec.scala +++ b/http4s/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/CollectorServiceSpec.scala @@ -2,25 +2,34 @@ package com.snowplowanalytics.snowplow.collectors.scalastream import scala.concurrent.duration._ import scala.collection.JavaConverters._ + +import org.specs2.mutable.Specification + +import org.typelevel.ci._ + +import org.apache.thrift.{TDeserializer, TSerializer} + +import com.comcast.ip4s.IpAddress + +import cats.data.NonEmptyList + import cats.effect.{Clock, IO} import cats.effect.unsafe.implicits.global -import cats.data.NonEmptyList -import com.snowplowanalytics.snowplow.CollectorPayload.thrift.model1.CollectorPayload + import org.http4s._ import org.http4s.headers._ import org.http4s.implicits._ -import org.typelevel.ci._ -import com.comcast.ip4s.IpAddress -import org.specs2.mutable.Specification + +import com.snowplowanalytics.snowplow.CollectorPayload.thrift.model1.CollectorPayload + import com.snowplowanalytics.snowplow.collectors.scalastream.model._ -import org.apache.thrift.{TDeserializer, TSerializer} class CollectorServiceSpec extends Specification { case class ProbeService(service: CollectorService[IO], good: TestSink, bad: TestSink) val service = new CollectorService[IO]( - config = TestUtils.testConf, - sinks = CollectorSinks[IO](new TestSink, new TestSink), + config = TestUtils.testConfig, + sinks = Sinks[IO](new TestSink, new TestSink), appName = "appName", appVersion = "appVersion" ) @@ -31,8 +40,8 @@ class CollectorServiceSpec extends Specification { val good = new TestSink val bad = new TestSink val service = new CollectorService[IO]( - config = TestUtils.testConf, - sinks = CollectorSinks[IO](good, bad), + config = TestUtils.testConfig, + sinks = Sinks[IO](good, bad), appName = "appName", appVersion = "appVersion" ) @@ -254,7 +263,7 @@ class CollectorServiceSpec extends Specification { } "cookieHeader" in { - val testCookieConfig = CookieConfig( + val testCookieConfig = Config.Cookie( enabled = true, name = "name", expiration = 5.seconds, @@ -268,30 +277,29 @@ class CollectorServiceSpec extends Specification { "give back a cookie header with the appropriate configuration" in { val nuid = "nuid" - val conf = testCookieConfig val Some(`Set-Cookie`(cookie)) = service.cookieHeader( headers = Headers.empty, - cookieConfig = Some(conf), + cookieConfig = testCookieConfig, networkUserId = nuid, doNotTrack = false, spAnonymous = None, now = now ) - cookie.name shouldEqual conf.name + cookie.name shouldEqual testCookieConfig.name cookie.content shouldEqual nuid cookie.domain shouldEqual None cookie.path shouldEqual Some("/") cookie.expires must beSome - (cookie.expires.get.toDuration - now).toMillis must beCloseTo(conf.expiration.toMillis, 1000L) + (cookie.expires.get.toDuration - now).toMillis must beCloseTo(testCookieConfig.expiration.toMillis, 1000L) cookie.secure must beFalse cookie.httpOnly must beFalse cookie.extension must beEmpty } - "give back None if no configuration is given" in { + "give back None if cookie is not enabled" in { service.cookieHeader( headers = Headers.empty, - cookieConfig = None, + cookieConfig = testCookieConfig.copy(enabled = false), networkUserId = "nuid", doNotTrack = false, spAnonymous = None, @@ -299,10 +307,9 @@ class CollectorServiceSpec extends Specification { ) shouldEqual None } "give back None if doNoTrack is true" in { - val conf = testCookieConfig service.cookieHeader( headers = Headers.empty, - cookieConfig = Some(conf), + cookieConfig = testCookieConfig, networkUserId = "nuid", doNotTrack = true, spAnonymous = None, @@ -310,10 +317,9 @@ class CollectorServiceSpec extends Specification { ) shouldEqual None } "give back None if SP-Anonymous header is present" in { - val conf = testCookieConfig service.cookieHeader( headers = Headers.empty, - cookieConfig = Some(conf), + cookieConfig = testCookieConfig, networkUserId = "nuid", doNotTrack = true, spAnonymous = Some("*"), @@ -330,7 +336,7 @@ class CollectorServiceSpec extends Specification { val Some(`Set-Cookie`(cookie)) = service.cookieHeader( headers = Headers.empty, - cookieConfig = Some(conf), + cookieConfig = conf, networkUserId = nuid, doNotTrack = false, spAnonymous = None, @@ -342,7 +348,7 @@ class CollectorServiceSpec extends Specification { cookie.extension must beNone service.cookieHeader( headers = Headers.empty, - cookieConfig = Some(conf), + cookieConfig = conf, networkUserId = nuid, doNotTrack = true, spAnonymous = None, @@ -352,7 +358,7 @@ class CollectorServiceSpec extends Specification { } "cookieDomain" in { - val testCookieConfig = CookieConfig( + val testCookieConfig = Config.Cookie( enabled = true, name = "name", expiration = 5.seconds, @@ -364,9 +370,8 @@ class CollectorServiceSpec extends Specification { ) "not return a domain" in { "if a list of domains is not supplied in the config and there is no fallback domain" in { - val headers = Headers.empty - val cookieConfig = testCookieConfig - service.cookieDomain(headers, cookieConfig.domains, cookieConfig.fallbackDomain) shouldEqual None + val headers = Headers.empty + service.cookieDomain(headers, testCookieConfig.domains, testCookieConfig.fallbackDomain) shouldEqual None } "if a list of domains is supplied in the config but the Origin request header is empty and there is no fallback domain" in { val headers = Headers.empty @@ -519,8 +524,8 @@ class CollectorServiceSpec extends Specification { "should pass on the original path if no mapping for it can be found" in { val service = new CollectorService( - TestUtils.testConf.copy(paths = Map.empty[String, String]), - CollectorSinks(new TestSink, new TestSink), + TestUtils.testConfig.copy(paths = Map.empty[String, String]), + Sinks(new TestSink, new TestSink), "", "" ) diff --git a/http4s/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/TestUtils.scala b/http4s/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/TestUtils.scala index a60a79c0a..9fd0e8869 100644 --- a/http4s/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/TestUtils.scala +++ b/http4s/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/TestUtils.scala @@ -1,25 +1,90 @@ package com.snowplowanalytics.snowplow.collectors.scalastream import scala.concurrent.duration._ -import com.snowplowanalytics.snowplow.collectors.scalastream.model._ + +import org.http4s.SameSite + +import com.snowplowanalytics.snowplow.collectors.scalastream.Config._ object TestUtils { - val testConf = CollectorConfig( + val testConfig = Config( + interface = "0.0.0.0", + port = 8080, paths = Map( "/com.acme/track" -> "/com.snowplowanalytics.snowplow/tp2", "/com.acme/redirect" -> "/r/tp2", "/com.acme/iglu" -> "/com.snowplowanalytics.iglu/v1" ), - cookie = CookieConfig( - enabled = true, - name = "sp", - expiration = 365.days, - domains = List.empty, - fallbackDomain = None, - secure = false, - httpOnly = false, - sameSite = None - ) + p3p = P3P( + "/w3c/p3p.xml", + "NOI DSP COR NID PSA OUR IND COM NAV STA" + ), + crossDomain = CrossDomain( + false, + List("*"), + true + ), + cookie = Cookie( + true, + "sp", + 365.days, + Nil, + None, + true, + true, + Some(SameSite.None) + ), + doNotTrackCookie = DoNotTrackCookie( + false, + "", + "" + ), + cookieBounce = CookieBounce( + false, + "n3pc", + "00000000-0000-4000-A000-000000000000", + None + ), + redirectMacro = RedirectMacro( + false, + None + ), + rootResponse = RootResponse( + false, + 302, + Map.empty[String, String], + "" + ), + cors = CORS(60.minutes), + streams = Streams( + "raw", + "bad-1", + false, + Sink.Stdout(1000000000), + Buffer( + 3145728, + 500, + 5000 + ) + ), + monitoring = Monitoring( + Metrics( + Statsd( + false, + "localhost", + 8125, + 10.seconds, + "snowplow.collector" + ) + ) + ), + ssl = SSL( + false, + false, + 443 + ), + enableDefaultRedirect = false, + redirectDomains = Set.empty[String] ) } diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 6f225e087..f0b442c06 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -46,16 +46,19 @@ object Dependencies { val scopt = "4.0.1" val pureconfig = "0.17.2" val akkaHttpMetrics = "1.7.1" - val badRows = "2.1.1" + val badRows = "2.2.1" val log4cats = "2.6.0" + val http4s = "0.23.23" + val blaze = "0.23.15" + val http4sNetty = "0.5.9" + val decline = "2.4.1" + val circe = "0.14.1" + val circeConfig = "0.10.0" // Scala (test only) val specs2 = "4.11.0" val specs2CE = "0.4.1" val testcontainers = "0.40.10" val catsRetry = "2.1.0" - val http4s = "0.23.23" - val blaze = "0.23.15" - val http4sNetty = "0.5.9" val http4sIT = "0.21.33" } @@ -92,13 +95,15 @@ object Dependencies { val akkaHttpMetrics = "fr.davit" %% "akka-http-metrics-datadog" % V.akkaHttpMetrics val log4cats = "org.typelevel" %% "log4cats-slf4j" % V.log4cats - - //http4s - val http4sDsl = "org.http4s" %% "http4s-dsl" % V.http4s - val http4sEmber = "org.http4s" %% "http4s-ember-server" % V.http4s - val http4sBlaze = "org.http4s" %% "http4s-blaze-server" % V.blaze - val http4sNetty = "org.http4s" %% "http4s-netty-server" % V.http4sNetty - + // http4s + val http4sDsl = "org.http4s" %% "http4s-dsl" % V.http4s + val http4sEmber = "org.http4s" %% "http4s-ember-server" % V.http4s + val http4sBlaze = "org.http4s" %% "http4s-blaze-server" % V.blaze + val http4sNetty = "org.http4s" %% "http4s-netty-server" % V.http4sNetty + val decline = "com.monovore" %% "decline" % V.decline + val circeGeneric = "io.circe" %% "circe-generic" % V.circe + val circeConfig = "io.circe" %% "circe-config" % V.circeConfig + // Scala (test only) val specs2 = "org.specs2" %% "specs2-core" % V.specs2 % Test val specs2It = "org.specs2" %% "specs2-core" % V.specs2 % IntegrationTest diff --git a/stdout/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/PrintingSink.scala b/stdout/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/PrintingSink.scala index e6117809d..661839903 100644 --- a/stdout/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/PrintingSink.scala +++ b/stdout/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/PrintingSink.scala @@ -1,29 +1,19 @@ -/* - * Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ package com.snowplowanalytics.snowplow.collectors.scalastream -import cats.effect.Sync -import cats.implicits._ - import java.io.PrintStream import java.util.Base64 -class PrintingSink[F[_]: Sync](stream: PrintStream) extends Sink[F] { +import cats.implicits._ + +import cats.effect.Sync + +class PrintingSink[F[_]: Sync]( + maxByteS: Int, + stream: PrintStream +) extends Sink[F] { private val encoder: Base64.Encoder = Base64.getEncoder.withoutPadding() - override val maxBytes: Int = Int.MaxValue // TODO: configurable? + override val maxBytes: Int = maxByteS override def isHealthy: F[Boolean] = Sync[F].pure(true) override def storeRawEvents(events: List[Array[Byte]], key: String): F[Unit] = diff --git a/stdout/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/StdoutCollector.scala b/stdout/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/StdoutCollector.scala index 7a1a7f592..e97b8983f 100644 --- a/stdout/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/StdoutCollector.scala +++ b/stdout/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/StdoutCollector.scala @@ -1,48 +1,29 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ package com.snowplowanalytics.snowplow.collectors.scalastream -import scala.concurrent.duration._ +import cats.effect.{ExitCode, IO, IOApp, Sync} import cats.effect.kernel.Resource -import cats.effect.{ExitCode, IO, IOApp} + +import com.snowplowanalytics.snowplow.collectors.scalastream.Config.Sink.Stdout import com.snowplowanalytics.snowplow.collectors.scalastream.generated.BuildInfo import com.snowplowanalytics.snowplow.collectors.scalastream.model._ object StdoutCollector extends IOApp { - def run(args: List[String]): IO[ExitCode] = { - val good = Resource.pure[IO, Sink[IO]](new PrintingSink[IO](System.out)) - val bad = Resource.pure[IO, Sink[IO]](new PrintingSink[IO](System.err)) + def run(args: List[String]): IO[ExitCode] = CollectorApp.run[IO]( - good, - bad, - CollectorConfig( - Map.empty, - cookie = CookieConfig( - enabled = true, - name = "sp", - expiration = 365.days, - domains = List.empty, - fallbackDomain = None, - secure = false, - httpOnly = false, - sameSite = None - ) - ), + args, BuildInfo.shortName, - BuildInfo.version + BuildInfo.version, + mkSinks ) - } + + def mkSinks[F[_]: Sync](config: Config.Streams): Resource[F, Sinks[F]] = + config.sink match { + case Stdout(maxBytes) => + val good = new PrintingSink(maxBytes, System.out) + val bad = new PrintingSink(maxBytes, System.err) + Resource.pure(Sinks(good, bad)) + case other => + Resource.eval(Sync[F].raiseError(new IllegalArgumentException(s"sink $other is not stdout"))) + } } diff --git a/stdout/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/PrintingSinkSpec.scala b/stdout/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/PrintingSinkSpec.scala index 359006c92..a7c6a69be 100644 --- a/stdout/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/PrintingSinkSpec.scala +++ b/stdout/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/PrintingSinkSpec.scala @@ -1,33 +1,21 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ package com.snowplowanalytics.snowplow.collectors.scalastream.sinks +import java.io.{ByteArrayOutputStream, PrintStream} +import java.nio.charset.StandardCharsets + +import org.specs2.mutable.Specification + import cats.effect.IO import cats.effect.unsafe.implicits.global -import com.snowplowanalytics.snowplow.collectors.scalastream.PrintingSink -import org.specs2.mutable.Specification -import java.io.{ByteArrayOutputStream, PrintStream} -import java.nio.charset.StandardCharsets +import com.snowplowanalytics.snowplow.collectors.scalastream.PrintingSink class PrintingSinkSpec extends Specification { "Printing sink" should { "print provided bytes encoded as BASE64 string" in { val baos = new ByteArrayOutputStream() - val sink = new PrintingSink[IO](new PrintStream(baos)) + val sink = new PrintingSink[IO](Integer.MAX_VALUE, new PrintStream(baos)) val input = "Something" sink.storeRawEvents(List(input.getBytes(StandardCharsets.UTF_8)), "key").unsafeRunSync()