Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 0.24.0 #208

Merged
merged 2 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Version 0.24.0 (2024-08-02)
---------------------------
Limit JSON depth of the schema in the validateSchema (#207)

Version 0.23.0 (2024-05-08)
---------------------------
Vector instead of List for parquet types and values (#205)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Schema DDL itself does not provide any CLI and expose only Scala API.
Schema DDL is compiled against Scala 2.12 and 2.13 and available on Maven Central. In order to use it with SBT, include following module:

```scala
libraryDependencies += "com.snowplowanalytics" %% "schema-ddl" % "0.23.0"
libraryDependencies += "com.snowplowanalytics" %% "schema-ddl" % "0.24.0"
```


Expand Down Expand Up @@ -64,5 +64,5 @@ limitations under the License.
[contributing]: https://docs.snowplow.io/docs/contributing/
[contributing-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/contributing.png

[api-reference]: https://snowplow.github.io/schema-ddl/0.23.0/com/snowplowanalytics/iglu/schemaddl/index.html
[api-reference]: https://snowplow.github.io/schema-ddl/0.24.0/com/snowplowanalytics/iglu/schemaddl/index.html
[api-reference-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/techdocs.png
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ import scala.jdk.CollectionConverters._

import cats.data.{Validated, ValidatedNel, NonEmptyList}
import cats.syntax.validated._
import cats.syntax.either._

import com.fasterxml.jackson.databind.ObjectMapper
import com.networknt.schema.{SpecVersion, JsonSchema, JsonSchemaFactory, SchemaValidatorsConfig}

import io.circe.jackson.circeToJackson
import io.circe.jackson.schemaddl.{circeToJackson, CirceToJsonError}

import com.snowplowanalytics.iglu.core.SelfDescribingSchema.SelfDescribingUri
import com.snowplowanalytics.iglu.core.circe.MetaSchemas
Expand Down Expand Up @@ -187,42 +188,53 @@ object SelfSyntaxChecker {
.build()
.getSchema(new ObjectMapper().readTree(SelfSchemaText))

def validateSchema(schema: Json): ValidatedNel[Message, Unit] = {
val jacksonJson = circeToJackson(schema)
val laxValidation = V4SchemaIgluCore
.validate(jacksonJson)
.asScala
.map(_ -> Linter.Level.Error) // It is an error to fail validation against v4 spec
.toMap
val selfValidation = V4SchemaSelfSyntax
.validate(jacksonJson)
.asScala
.map(_ -> Linter.Level.Error) // It is an error to fail validation of Iglu's `$schema` and `self` properties
.toMap
val strictValidation = V4SchemaStrict
.validate(jacksonJson)
.asScala
.map(_ -> Linter.Level.Warning) // It is a warning to fail the strict validation
.toMap
@deprecated("Use `validateSchema(schema, maxJsonDepth)`", "0.24.0")
def validateSchema(schema: Json): ValidatedNel[Message, Unit] =
validateSchema(schema, Int.MaxValue)

(strictValidation ++ laxValidation ++ selfValidation) // Order is important: Errors override Warnings for identical messages
.toList
.map { case (message, level) =>
val pointer = JsonPath.parse(message.getPath).map(JsonPath.toPointer) match {
case Right(Right(value)) => value
case Right(Left(inComplete)) => inComplete
case Left(_) => Pointer.Root
def validateSchema(schema: Json, maxJsonDepth: Int): ValidatedNel[Message, Unit] =
circeToJackson(schema, maxJsonDepth).toValidated
.leftMap {
case CirceToJsonError.MaxDepthExceeded =>
NonEmptyList.one(
Message(Pointer.Root, CirceToJsonError.MaxDepthExceeded.message, Linter.Level.Error)
)
}
.andThen { jacksonJson =>
val laxValidation = V4SchemaIgluCore
.validate(jacksonJson)
.asScala
.map(_ -> Linter.Level.Error) // It is an error to fail validation against v4 spec
.toMap
val selfValidation = V4SchemaSelfSyntax
.validate(jacksonJson)
.asScala
.map(_ -> Linter.Level.Error) // It is an error to fail validation of Iglu's `$schema` and `self` properties
.toMap
val strictValidation = V4SchemaStrict
.validate(jacksonJson)
.asScala
.map(_ -> Linter.Level.Warning) // It is a warning to fail the strict validation
.toMap

(strictValidation ++ laxValidation ++ selfValidation) // Order is important: Errors override Warnings for identical messages
.toList
.map { case (message, level) =>
val pointer = JsonPath.parse(message.getPath).map(JsonPath.toPointer) match {
case Right(Right(value)) => value
case Right(Left(inComplete)) => inComplete
case Left(_) => Pointer.Root
}
Message(pointer, message.getMessage, level)
}.valid.swap match {
case Validated.Invalid(Nil) =>
().validNel
case Validated.Invalid(h :: t) =>
NonEmptyList(h, t).invalid
case Validated.Valid(_) =>
().validNel
}
Message(pointer, message.getMessage, level)
}.valid.swap match {
case Validated.Invalid(Nil) =>
().validNel
case Validated.Invalid(h :: t) =>
NonEmptyList(h, t).invalid
case Validated.Valid(_) =>
().validNel
}
}
}

/**
* Validates that a self-describing JSON contains the correct schema keyword.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright (c) 2014-2024 Snowplow Analytics Ltd. All rights reserved.
*
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
* You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the Apache License Version 2.0 is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
*/

package io.circe.jackson.schemaddl

sealed trait CirceToJsonError extends Product with Serializable {
def message: String
}

object CirceToJsonError {
case object MaxDepthExceeded extends CirceToJsonError {
override def message: String = "Maximum allowed JSON depth exceeded"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2014-2024 Snowplow Analytics Ltd. All rights reserved.
*
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
* You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the Apache License Version 2.0 is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
*/

package io.circe
package jackson

import cats.syntax.either._
import cats.syntax.traverse._

import scala.jdk.CollectionConverters._

import java.math.{BigDecimal => JBigDecimal}

import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node._

/** A hack to add max json depth check to circeToJackson */
package object schemaddl {
private val negativeZeroJson: Json = Json.fromDoubleOrNull(-0.0)

/**
* Converts given circe's Json instance to Jackson's JsonNode
* Numbers with exponents exceeding Integer.MAX_VALUE are converted to strings
* @param json instance of circe's Json
* @return converted JsonNode
*/
def circeToJackson(json: Json, maxJsonDepth: Int): Either[CirceToJsonError, JsonNode] =
if (maxJsonDepth <= 0) CirceToJsonError.MaxDepthExceeded.asLeft
else
json.fold(
NullNode.instance.asRight,
BooleanNode.valueOf(_).asRight,
number => {
if (json == negativeZeroJson)
DoubleNode.valueOf(number.toDouble)
else
number match {
case _: JsonBiggerDecimal | _: JsonBigDecimal =>
number.toBigDecimal
.map(bigDecimal => DecimalNode.valueOf(bigDecimal.underlying))
.getOrElse(TextNode.valueOf(number.toString))
case JsonLong(x) => LongNode.valueOf(x)
case JsonDouble(x) => DoubleNode.valueOf(x)
case JsonFloat(x) => FloatNode.valueOf(x)
case JsonDecimal(x) =>
try {
DecimalNode.valueOf(new JBigDecimal(x))
} catch {
case _: NumberFormatException => TextNode.valueOf(x)
}
}
}.asRight,
TextNode.valueOf(_).asRight,
array => array.traverse(circeToJackson(_, maxJsonDepth - 1))
.map { l => JsonNodeFactory.instance.arrayNode.addAll(l.asJava) },
obj => obj.toList.traverse {
case (k, v) => circeToJackson(v, maxJsonDepth - 1).map((k, _))
}.map { l =>
objectNodeSetAll(
JsonNodeFactory.instance.objectNode,
l.toMap.asJava
)
}
)

def objectNodeSetAll(node: ObjectNode, fields: java.util.Map[String, JsonNode]): JsonNode =
node.setAll[JsonNode](fields)
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import com.snowplowanalytics.iglu.schemaddl.jsonschema.Linter.Level.{Error, Warn
import com.snowplowanalytics.iglu.schemaddl.jsonschema.Linter.Message

class SelfSyntaxCheckerSpec extends Specification {
val DefaultMaxJsonDepth = 10

"validateSchema" should {
"recognize invalid schema property" in {
val jsonSchema =
Expand Down Expand Up @@ -72,7 +74,7 @@ class SelfSyntaxCheckerSpec extends Specification {
]
}"""

SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like {
SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like {
case NonEmptyList
(Message(
pointer,
Expand Down Expand Up @@ -103,7 +105,7 @@ class SelfSyntaxCheckerSpec extends Specification {
}
}"""

SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like {
SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like {
case NonEmptyList(
Message(
pointer,
Expand All @@ -129,7 +131,7 @@ class SelfSyntaxCheckerSpec extends Specification {
"properties": { }
}"""

SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like {
SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like {
case NonEmptyList
(Message(
pointer,
Expand Down Expand Up @@ -171,7 +173,7 @@ class SelfSyntaxCheckerSpec extends Specification {
"properties": { }
}"""

SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like {
SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like {
case NonEmptyList(Message(_, msg, Error), Nil) =>
msg must contain("does not match the regex pattern")
}
Expand Down Expand Up @@ -205,7 +207,7 @@ class SelfSyntaxCheckerSpec extends Specification {
"properties": { }
}"""

SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like {
SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like {
case NonEmptyList(Message(_, msg, Error), Nil) =>
msg must contain("does not match the regex pattern")
}
Expand All @@ -229,7 +231,7 @@ class SelfSyntaxCheckerSpec extends Specification {
"properties": { }
}"""

SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like {
SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like {
case NonEmptyList(Message(_, msg, Error), Nil) =>
msg must contain("does not match the regex pattern")
}
Expand All @@ -251,7 +253,7 @@ class SelfSyntaxCheckerSpec extends Specification {
"properties": { }
}"""

SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like {
SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like {
case NonEmptyList(Message(_, msg, Error), Nil) =>
msg must contain("does not match the regex pattern")
}
Expand All @@ -274,7 +276,46 @@ class SelfSyntaxCheckerSpec extends Specification {
"properties": { }
}"""

SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beRight
SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beRight
}

"disallow schema that exceeds maximum allowed JSON depth" in {
val jsonSchema =
json"""{
"$$schema" : "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"description": "Schema for an example event",
"self": {
"vendor": "com.snowplowanalytics",
"name": "example_event",
"format": "jsonschema",
"version": "1-0-0"
},
"type": "object",
"properties": {
"example_field": {
"type": "array",
"description": "the example_field is a collection of user names",
"users": {
"type": "object",
"properties": {
"name": {
"type": "string",
"maxLength": 128
}
},
"required": [
"id"
],
"additionalProperties": false
}
}
}
}"""

SelfSyntaxChecker.validateSchema(jsonSchema, 5).toEither must beLeft.like {
case NonEmptyList(Message(_, msg, Error), Nil) =>
msg must contain("Maximum allowed JSON depth exceeded")
}
}
}
}
Loading