From 3f9ad4d1d1e848f4dec07e248c68b863b60919d8 Mon Sep 17 00:00:00 2001 From: voropaevp Date: Fri, 2 Jun 2023 15:07:44 +0100 Subject: [PATCH] Refactor Redshift migrations (close #173) --- build.sbt | 17 - .../iglu.schemaddl/StringUtils.scala | 61 +- .../iglu.schemaddl/experimental/Bumps.scala | 91 -- .../experimental/VersionTree.scala | 263 ----- .../iglu.schemaddl/jsonschema/Delta.scala | 92 -- .../iglu.schemaddl/jsonschema/Pointer.scala | 4 +- .../iglu.schemaddl/migrations/FlatData.scala | 81 -- .../iglu.schemaddl/migrations/Migration.scala | 77 -- .../migrations/SchemaDiff.scala | 120 -- .../migrations/SchemaList.scala | 281 ----- .../iglu.schemaddl/package.scala | 70 +- .../iglu.schemaddl/redshift/AlterTable.scala | 100 -- .../iglu.schemaddl/redshift/Begin.scala | 33 - .../iglu.schemaddl/redshift/Column.scala | 74 -- .../redshift/ColumnAttribute.scala | 73 -- .../redshift/ColumnConstraint.scala | 38 - .../redshift/CommentBlock.scala | 40 - .../iglu.schemaddl/redshift/CommentOn.scala | 26 - .../redshift/CreateSchema.scala | 17 - .../iglu.schemaddl/redshift/CreateTable.scala | 79 -- .../iglu.schemaddl/redshift/DataType.scala | 78 -- .../iglu.schemaddl/redshift/Ddl.scala | 44 - .../iglu.schemaddl/redshift/Empty.scala | 18 - .../iglu.schemaddl/redshift/End.scala | 17 - .../iglu.schemaddl/redshift/RefTable.scala | 26 - .../iglu.schemaddl/redshift/ShredModel.scala | 188 +++ .../redshift/ShredModelEntry.scala | 191 +++ .../iglu.schemaddl/redshift/Statement.scala | 36 - .../redshift/TableAttribute.scala | 53 - .../redshift/TableConstraint.scala | 37 - .../redshift/generators/DdlFile.scala | 85 -- .../redshift/generators/DdlGenerator.scala | 226 ---- .../generators/EncodeSuggestions.scala | 47 - .../generators/JsonPathGenerator.scala | 118 -- .../generators/MigrationGenerator.scala | 159 --- .../ColumnTypeSuggestions.scala} | 71 +- .../internal}/FlatSchema.scala | 109 +- .../redshift/internal/Migrations.scala | 105 ++ .../iglu.schemaddl/redshift/package.scala | 104 ++ .../iglu/schemaddl/SpecHelpers.scala | 15 +- .../schemaddl/experimental/BumpsSpec.scala | 107 -- .../experimental/VersionTreeSpec.scala | 225 ---- .../schemaddl/migrations/FlatDataSpec.scala | 211 ---- .../schemaddl/migrations/FlatSchemaSpec.scala | 1049 ----------------- .../schemaddl/migrations/MigrationSpec.scala | 496 -------- .../schemaddl/migrations/SchemaDiffSpec.scala | 91 -- .../schemaddl/migrations/SchemaListSpec.scala | 317 ----- .../schemaddl/redshift/ShredModelSpec.scala | 706 +++++++++++ .../redshift/generators/DdlFileSpec.scala | 339 ------ .../generators/DdlGeneratorSpec.scala | 172 --- .../generators/MigrationGeneratorSpec.scala | 324 ----- .../generators/TypeSuggestionsSpec.scala | 83 -- .../redshift/internal/FlatSchemaSpec.scala | 531 +++++++++ .../internal/ShredModelEntrySpec.scala | 214 ++++ .../jsonschema/json4s/ArraySerializers.scala | 86 -- .../jsonschema/json4s/CommonSerializers.scala | 114 -- .../jsonschema/json4s/Formats.scala | 41 - .../jsonschema/json4s/NumberSerializers.scala | 63 - .../jsonschema/json4s/ObjectSerializers.scala | 103 -- .../jsonschema/json4s/StringSerializers.scala | 69 -- .../jsonschema/json4s/implicits.scala | 49 - .../jsonschema/json4s/ArraySpec.scala | 50 - .../jsonschema/json4s/CommonSpec.scala | 127 -- .../jsonschema/json4s/NumberSpec.scala | 84 -- .../jsonschema/json4s/ObjectSpec.scala | 91 -- .../jsonschema/json4s/StringSpec.scala | 62 - 66 files changed, 2139 insertions(+), 7029 deletions(-) delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/experimental/Bumps.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/experimental/VersionTree.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/Delta.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/FlatData.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/Migration.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/SchemaDiff.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/SchemaList.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/AlterTable.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Begin.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Column.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnAttribute.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnConstraint.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentBlock.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentOn.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateSchema.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateTable.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/DataType.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Ddl.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Empty.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/End.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/RefTable.scala create mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ShredModel.scala create mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ShredModelEntry.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Statement.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableAttribute.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableConstraint.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlFile.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlGenerator.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/EncodeSuggestions.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/JsonPathGenerator.scala delete mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/MigrationGenerator.scala rename modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/{generators/TypeSuggestions.scala => internal/ColumnTypeSuggestions.scala} (82%) rename modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{migrations => redshift/internal}/FlatSchema.scala (62%) create mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/Migrations.scala create mode 100644 modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/package.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/experimental/BumpsSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/experimental/VersionTreeSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/FlatDataSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/FlatSchemaSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/MigrationSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/SchemaDiffSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/SchemaListSpec.scala create mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/ShredModelSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlFileSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlGeneratorSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/MigrationGeneratorSpec.scala delete mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/TypeSuggestionsSpec.scala create mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/internal/FlatSchemaSpec.scala create mode 100644 modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/internal/ShredModelEntrySpec.scala delete mode 100644 modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ArraySerializers.scala delete mode 100644 modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/CommonSerializers.scala delete mode 100644 modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/Formats.scala delete mode 100644 modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/NumberSerializers.scala delete mode 100644 modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ObjectSerializers.scala delete mode 100644 modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/StringSerializers.scala delete mode 100644 modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/implicits.scala delete mode 100644 modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ArraySpec.scala delete mode 100644 modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/CommonSpec.scala delete mode 100644 modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/NumberSpec.scala delete mode 100644 modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ObjectSpec.scala delete mode 100644 modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/StringSpec.scala diff --git a/build.sbt b/build.sbt index 13d450a1..ca71a717 100644 --- a/build.sbt +++ b/build.sbt @@ -39,20 +39,3 @@ lazy val core = project.in(file("modules/core")) Dependencies.Libraries.specs2Scalacheck, Dependencies.Libraries.specs2Cats )) - -lazy val json4s = project.in(file("modules/json4s")) - .settings( - name := "schema-ddl-json4s", - description := "Json4s-compatible entities for Schema DDL", - ) - .settings(BuildSettings.basicSettigns) - .settings(BuildSettings.commonSettings) - .settings(BuildSettings.publishSettings) - .settings(libraryDependencies ++= Seq( - Dependencies.Libraries.igluCoreJson4s, - Dependencies.Libraries.specs2, - Dependencies.Libraries.scalaCheck, - Dependencies.Libraries.specs2Scalacheck, - Dependencies.Libraries.specs2Cats - )) - .dependsOn(core) diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/StringUtils.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/StringUtils.scala index 2f0307ef..be8ae920 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/StringUtils.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/StringUtils.scala @@ -12,72 +12,19 @@ */ package com.snowplowanalytics.iglu.schemaddl -// IgluCore -import com.snowplowanalytics.iglu.core.SchemaMap /** * Utilities for manipulating Strings */ object StringUtils { - /** - * Create a Redshift Table name from a schema - * - * "iglu:com.acme/PascalCase/jsonschema/13-0-0" -> "com_acme_pascal_case_13" - * - * @param schemaMap full Schema description - * @return the Redshift Table name - */ - def getTableName(schemaMap: SchemaMap): String = { - // Split the vendor's reversed domain name using underscores rather than dots - val snakeCaseOrganization = schemaMap - .schemaKey - .vendor - .replaceAll( """\.""", "_") - .replaceAll("-", "_") - .toLowerCase - - // Change the name from PascalCase to snake_case if necessary - val snakeCaseName = snakeCase(schemaMap.schemaKey.name) - - s"${snakeCaseOrganization}_${snakeCaseName}_${schemaMap.schemaKey.version.model}" - } - /** * Transforms CamelCase string into snake_case * Also replaces all hyphens with underscores */ val snakeCase: String => String = str => str.replaceAll("([A-Z]+)([A-Z][a-z])", "$1_$2") - .replaceAll("([a-z\\d])([A-Z])", "$1_$2") - .replaceAll("-", "_") - .toLowerCase - - /** - * Checks if comma-delimited string contains only integers (including negative) - * - * @param string string with items delimited by comma - * @return true if string contains only integers - */ - def isIntegerList(string: String): Boolean = { - val elems = string.split(",").toList - if (elems.isEmpty) { false } - else { - elems.forall { s => - s.headOption match { - case Some('-') if s.length > 1 => s.tail.forall(_.isDigit) - case _ => s.forall(_.isDigit) } - } - } - } - - /** - * Utility object to match convertible strings - */ - object IntegerAsString { - def unapply(s : String) : Option[Int] = try { - Some(s.toInt) - } catch { - case _: java.lang.NumberFormatException => None - } - } + .replaceAll("([a-z\\d])([A-Z])", "$1_$2") + .replaceAll("-", "_") + .toLowerCase + } diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/experimental/Bumps.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/experimental/Bumps.scala deleted file mode 100644 index d17b3bac..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/experimental/Bumps.scala +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.experimental - -import com.snowplowanalytics.iglu.core.VersionKind -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Delta -import com.snowplowanalytics.iglu.schemaddl.migrations.SchemaDiff - -/** Module responsible for version-bump recognition */ -object Bumps { - - /** A function deciding if `SchemaDiff` matches certain SchemaVer convention */ - type VersionPredicate = SchemaDiff => Boolean - - val ModelChecks: List[VersionPredicate] = - List(required, typeChange) - val RevisionChecks: List[VersionPredicate] = - List(typeWidening, constraintWidening) - val AdditionChecks: List[VersionPredicate] = - List(optionalAdded) - - /** Get a difference between two schemas and return a version part that must be bumped */ - def getPointer(diff: SchemaDiff): Option[VersionKind] = - if (ModelChecks.exists(p => p(diff))) Some(VersionKind.Model) - else if (RevisionChecks.exists(p => p(diff))) Some(VersionKind.Revision) - else if (AdditionChecks.exists(p => p(diff))) Some(VersionKind.Addition) - else None - - /** New required property added or existing one became required */ - def required(diff: SchemaDiff): Boolean = { - val newProperties = !diff.added.forall { case (_, schema) => schema.canBeNull } - val becameRequiredType = diff.modified.exists(becameRequired(_.`type`)) - val becameRequiredEnum = diff.modified.exists(becameRequired(_.enum)) - newProperties || becameRequiredType || becameRequiredEnum - } - - /** Changed or restricted type */ - def typeChange(diff: SchemaDiff): Boolean = - diff.modified.exists { modified => - modified.getDelta.`type` match { - case Delta.Changed(Some(from), Some(to)) => to.isSubsetOf(from) && from != to - case Delta.Changed(None, Some(_)) => true - case _ => false - } - } - - /** Revisioned type */ - def typeWidening(diff: SchemaDiff): Boolean = - diff.modified.exists { modified => - modified.getDelta.`type` match { - case Delta.Changed(Some(_), None) => true - case Delta.Changed(Some(from), Some(to)) => from.isSubsetOf(to) && from != to - case Delta.Changed(None, None) => false - case Delta.Changed(None, Some(_)) => false - } - } - - /** Any constraints changed */ - def constraintWidening(diff: SchemaDiff): Boolean = - diff.modified - .map(_.getDelta) - .exists { delta => - delta.multipleOf.nonEmpty || - delta.minimum.nonEmpty || - delta.maximum.nonEmpty || - delta.maxLength.nonEmpty || - delta.minLength.nonEmpty - } - - def becameRequired[A](getter: Delta => Delta.Changed[A])(m: SchemaDiff.Modified): Boolean = - getter(m.getDelta) match { - case d @ Delta.Changed(_, _) if d.nonEmpty => - val wasOptional = m.from.canBeNull - val becameRequired = !m.to.canBeNull - wasOptional && becameRequired - case _ => false - } - - def optionalAdded(diff: SchemaDiff): Boolean = - diff.added.forall(_._2.canBeNull) -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/experimental/VersionTree.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/experimental/VersionTree.scala deleted file mode 100644 index da614535..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/experimental/VersionTree.scala +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.experimental - -import cats.data.NonEmptyList -import cats.implicits._ - -import com.snowplowanalytics.iglu.core.SchemaVer -import com.snowplowanalytics.iglu.schemaddl.experimental.VersionTree._ - -/** - * The order preserving tree, containing all versions and satisfying following properties: - * - A version is _clustered_ with previous ones if higher group matches - * e.g. for 1-0-0 and 1-0-1 both higher groups (MODEL and REVISION) match - * e.g. for 1-0-1 and 1-1-0 only MODEL matches, so same MODEL cluster, but new REVISION cluster - * - A version spawns a new cluster if previous higher group is either smaller or larger - * e.g. 1-0-0, 1-1-0, 1-0-1 is a valid version list, but has three separate REVISION clusters - * - There's no gaps between versions (e.g. [1-0-0, 1-0-2] is impossible) - * - Tree is non-empty and always starts with 1-0-0 - * - * @param models list of MODEL clusters in reverse order (latest one is head) - * e.g. (1, [0-0, 0-1]), (2, [0-0]), (1, [0-2, 1-0]) - */ -final case class VersionTree private(models: NonEmptyList[(Model, Revisions)]) extends AnyVal { - // TODO: name all things consistently - - /** Get all versions in their natural order */ - def versionList: VersionTree.VersionList = { - val list = for { - (model, revisions) <- models.toList - (revision, additions) <- revisions.revisions.toList - addition <- additions.values.toList - } yield SchemaVer.Full(model, revision, addition) - - VersionTree.VersionList(NonEmptyList.fromListUnsafe(list.reverse)) - } - - /** Get all SchemaVers in particular MODEL (used for migrations) */ - def modelGroupList(model: Model): Option[NonEmptyList[SchemaVer]] = { - val list = for { - (model, revisions) <- models.filter { case (m, _) => m == model } - (revision, additions) <- revisions.revisions.toList - addition <- additions.values.toList - } yield SchemaVer.Full(model, revision, addition) - - NonEmptyList.fromList(list.reverse) - } - - /** Try to add a next version to the tree, which can be rejected if any properties don't hold */ - def add(version: SchemaVer.Full): Either[AddingError, VersionTree] = { - for { - placement <- getSetPlacement(modelsSet, version.model, false) - aggregatedAdditions = getAdditions(version.model, version.revision) - revision <- placement match { - case SetPlacement.ContinueCurrent => - NonEmptyList.fromList(getRevisions(version.model)) match { - case Some(aggregated) => - latestRevision.add(aggregated, aggregatedAdditions, version).map { revisions => - VersionTree(NonEmptyList((version.model, revisions), this.models.tail)) - } - case None => - throw new IllegalArgumentException(s"Invalid state of VersionTree, ${version.model} revisions cannot be empty in ${this}") - } - case SetPlacement.SpawnNext => - val revisionSet = NonEmptyList.fromList(getRevisions(version.model)) match { - case Some(aggregated) => aggregated - case None => NonEmptyList.of(version.revision) - } - for { - additionGroup <- Additions.spawn(aggregatedAdditions, version.addition) // just 1-0-x - revisionsGroup = Revisions(NonEmptyList.of((version.revision, additionGroup))) - _ <- getSetPlacement(revisionSet, version.revision, true) - } yield VersionTree((version.model, revisionsGroup) :: this.models) - - } - } yield revision - } - - def show: String = models.map { case (model, revisions) => s"+ $model\n${revisions.show}" }.toList.mkString("\n") - - /** Get all revisions for a particular `model` (duplicates are possible) */ - private def getRevisions(model: Model) = - models - .collect { case (m, group) if m == model => group } - .flatMap(_.revisions.map(_._1).toList) - - /** Get all revisions for a particular `model` (duplicates are possible) */ - private def getAdditions(model: Model, revision: Revision) = - models - .toList - .collect { case (m, group) if m == model => group } - .flatMap { revisions => revisions.getAdditions(revision) } - - private def latestRevision = models.head._2 - private def modelsSet = models.map(_._1) -} - - -/** - * Group - continuous, but possibly *unclosed* sequence of child versions (opposed to always closed Set?), - * e.g. 2,3 additions of 0 revision (but 0,1,4,5 are "outside") - * - * Set - continuous and always closed sequence of child versions (opposed to possibly *unclosed* Group) - * e.g. 0,1,2,3,4,5 additions of 0 revision (nothing else in the revision) - * - * Highest - largest number in a whole Set (5th addition) - * Latest - largest number in a whole Group (3rd addition) - * - * case class X is a group, it has information about all its Xs and children Ys - */ -object VersionTree { - - type Model = Int - type Revision = Int - type Addition = Int - - /** List of consistent naturally ordered versions, entirely isomorphic to the original tree */ - case class VersionList private(versions: NonEmptyList[SchemaVer.Full]) extends AnyVal { - def toTree: VersionTree = - build(versions.toList.reverse) - .toOption - .getOrElse(throw new IllegalStateException(s"VersionList $versions is not isomorphic to the tree")) - } - - /** A tree with only 1-0-0 */ - val Root = VersionTree(NonEmptyList.of( - (1, Revisions(NonEmptyList.of( - (0, Additions( - NonEmptyList.of(0)) - )) - ))) - ) - - /** Error happened during tree building */ - sealed trait BuildingError - object BuildingError { - final case object EmptyTree extends BuildingError - final case class InvalidInit(remaining: NonEmptyList[SchemaVer.Full]) extends BuildingError - final case class InvalidTree(addingError: AddingError, tree: VersionTree, problem: SchemaVer.Full) extends BuildingError - } - - /** Error happened during adding a particular version to the tree */ - sealed trait AddingError - object AddingError { - final case object AlreadyExists extends AddingError - final case class AdditionGaps(elements: NonEmptyList[Addition]) extends AddingError - final case class RevisionGaps(elements: NonEmptyList[Revision]) extends AddingError - final case class ModelGaps(elements: NonEmptyList[Model]) extends AddingError - } - - def build(versions: List[SchemaVer.Full]): Either[BuildingError, VersionTree] = - versions match { - case Nil => BuildingError.EmptyTree.asLeft - case SchemaVer.Full(1,0,0) :: other => - other.foldLeft(Root.asRight[BuildingError]) { (acc, cur) => - acc match { - case Right(tree) => tree.add(cur) match { - case Right(result) => result.asRight - case Left(error) => BuildingError.InvalidTree(error, tree, cur).asLeft - } - case Left(error) => error.asLeft - - } - } - case init :: other => BuildingError.InvalidInit(NonEmptyList(init, other)).asLeft - } - - final case class Revisions private(revisions: NonEmptyList[(Revision, Additions)]) extends AnyVal { - /** - * Add `version` to this cluster. MODEL is irrelevant - * @param revisionSet all REVISIONs in parent MODEL (`version.model`) - * that can reside in other groups - * @param additions all additions in this `MODEL-REVISION` group - * @param version SchemaVer to add - * @return updated REVISIONs cluster if version can be added, error otherwise - */ - def add(revisionSet: NonEmptyList[Revision], additions: List[Addition], version: SchemaVer.Full): Either[AddingError, Revisions] = { - for { - positionInRevision <- getSetPlacement(revisionSet, version.revision, true) - updated <- positionInRevision match { - case SetPlacement.ContinueCurrent => - latestAddition.add(additions, version.addition).map { additionsGroup => - Revisions(NonEmptyList((version.revision, additionsGroup), revisions.tail)) - } - case SetPlacement.SpawnNext => - for { - additionGroups <- Additions.spawn(additions, version.addition) - } yield Revisions((version.revision, additionGroups) :: this.revisions) - } - } yield updated - } - - def show: String = revisions.map { case (rev, additions) => s" - $rev ${additions.show}" }.toList.mkString("\n") - - /** Return ADDITIONs only in current MODEL group */ - private[schemaddl] def getAdditions(revision: Int): List[Addition] = - revisions // Unlike getRevisions it can be empty list - .collect { case (r, group) if r == revision => group } - .flatMap(_.values.toList) - - private def latestAddition = revisions.head._2 - } - - final case class Additions private(values: NonEmptyList[Addition]) extends AnyVal { - /** - * Add `addition` to `group` - * @param aggregated additions across whole Set - * @param addition version to add - */ - private[schemaddl] def add(aggregated: List[Int], addition: Int): Either[AddingError, Additions] = { - getAdditionPosition(aggregated, addition).as(Additions(addition :: values)) - } - - def show: String = values.mkString_("[", ",", "]") - } - - object Additions { - private[schemaddl] def spawn(additionSet: List[Addition], addition: Int): Either[AddingError, Additions] = - additionSet match { - case Nil if addition == 0 => Additions(NonEmptyList.of(addition)).asRight - case list => - getAdditionPosition(list, addition).as(Additions(NonEmptyList.of(addition))) - } - } - - private[schemaddl] sealed trait SetPlacement - private[schemaddl] object SetPlacement { - /** Latest version is smaller, need to spawn new cluster */ - case object SpawnNext extends SetPlacement - /** Latest version is what we're looking for, need to continue the cluster */ - case object ContinueCurrent extends SetPlacement - } - - /** Check if there are any gaps in `set` (MODEL or REVISION) */ - private[schemaddl] def getSetPlacement(set: NonEmptyList[Int], versionNumber: Int, zeroBased: Boolean): Either[AddingError, SetPlacement] = { - val placement = if (set.head == versionNumber) SetPlacement.ContinueCurrent else SetPlacement.SpawnNext - val check = gapCheck(versionNumber :: set, zeroBased).as(placement) - check.leftMap { gaps => if (zeroBased) AddingError.RevisionGaps(gaps) else AddingError.ModelGaps(gaps) } - } - - private[schemaddl] def getAdditionPosition(set: List[Int], addition: Int): Either[AddingError, Unit] = - for { - _ <- gapCheck(NonEmptyList(addition, set), true).leftMap(AddingError.AdditionGaps.apply) - result <- if (addition == set.maximumOption.getOrElse(0) + 1) ().asRight else AddingError.AlreadyExists.asLeft - } yield result - - private def gapCheck(elements: NonEmptyList[Int], zeroBased: Boolean): Either[NonEmptyList[Int], Unit] = { - val start = if (zeroBased) 0 else 1 - val max = elements.maximum - val diff = Range(start, max).diff(elements.toList).toList - NonEmptyList.fromList(diff).toRight(()).swap - } -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/Delta.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/Delta.scala deleted file mode 100644 index 868bbce8..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/Delta.scala +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.jsonschema - -// Shadow Java Enum -import java.lang.{ Enum => _} - -// This library -import properties._ - -import Delta._ -case class Delta(multipleOf: Changed[NumberProperty.MultipleOf] = unchanged, - minimum: Changed[NumberProperty.Minimum] = unchanged, - maximum: Changed[NumberProperty.Maximum] = unchanged, - - maxLength: Changed[StringProperty.MaxLength] = unchanged, - minLength: Changed[StringProperty.MinLength] = unchanged, - pattern: Changed[StringProperty.Pattern] = unchanged, - format: Changed[StringProperty.Format] = unchanged, - - items: Changed[ArrayProperty.Items] = unchanged, - additionalItems: Changed[ArrayProperty.AdditionalItems] = unchanged, - minItems: Changed[ArrayProperty.MinItems] = unchanged, - maxItems: Changed[ArrayProperty.MaxItems] = unchanged, - - properties: Changed[ObjectProperty.Properties] = unchanged, - additionalProperties: Changed[ObjectProperty.AdditionalProperties] = unchanged, - required: Changed[ObjectProperty.Required] = unchanged, - patternProperties: Changed[ObjectProperty.PatternProperties] = unchanged, - - `type`: Changed[CommonProperties.Type] = unchanged, - enum: Changed[CommonProperties.Enum] = unchanged, - oneOf: Changed[CommonProperties.OneOf] = unchanged, - description: Changed[CommonProperties.Description] = unchanged) { - - private[iglu] val allProperties: List[Changed[JsonSchemaProperty]] = - List(multipleOf, minimum, maximum, maxLength, minLength, - pattern, format, items, additionalItems, minItems, maxItems, properties, - additionalProperties, required, patternProperties, `type`, enum, oneOf, description) - - def getChanged: List[Changed[JsonSchemaProperty]] = - allProperties.filter(_.nonEmpty) - -} - -object Delta { - case class Changed[+A](was: Option[A], became: Option[A]) { - def nonEmpty: Boolean = was.isDefined || became.isDefined - } - - def build(original: Schema, target: Schema): Delta = - Delta( - check(original.multipleOf, target.multipleOf), - check(original.minimum, target.minimum), - check(original.maximum, target.maximum), - - check(original.maxLength, target.maxLength), - check(original.minLength, target.minLength), - check(original.pattern, target.pattern), - check(original.format, target.format), - - check(original.items, target.items), - check(original.additionalItems, target.additionalItems), - check(original.minItems, target.minItems), - check(original.maxItems, target.maxItems), - - check(original.properties, target.properties), - check(original.additionalProperties, target.additionalProperties), - check(original.required, target.required), - check(original.patternProperties, target.patternProperties), - - check(original.`type`, target.`type`), - check(original.enum, target.enum), - check(original.oneOf, target.oneOf), - check(original.description, target.description)) - - def check[A <: JsonSchemaProperty](one: Option[A], two: Option[A]): Changed[A] = - if (one == two) unchanged[A] else Changed(one, two) - - def unchanged[A <: JsonSchemaProperty]: Changed[A] = - Changed[A](None, None) -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/Pointer.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/Pointer.scala index df5f55df..0e1c06e0 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/Pointer.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/Pointer.scala @@ -13,9 +13,9 @@ package com.snowplowanalytics.iglu.schemaddl.jsonschema import cats.syntax.either._ +import com.snowplowanalytics.iglu.schemaddl.StringUtils import scala.annotation.tailrec - import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer._ sealed trait Pointer extends Product with Serializable { @@ -64,6 +64,8 @@ object Pointer { def downProperty(schemaProperty: SchemaProperty): SchemaPointer = SchemaPointer(Cursor.DownProperty(schemaProperty) :: value) + def getName: String = forData.path.map(StringUtils.snakeCase).mkString(".") + /** Filter out all Schema-specific properties and `oneOf`s */ def forData: JsonPointer = { val result = value.reverse.foldLeft(List.empty[Cursor]) { (acc, cur) => diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/FlatData.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/FlatData.scala deleted file mode 100644 index 26b37b31..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/FlatData.scala +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.migrations - -import io.circe.{ACursor, Json, JsonObject} - -import cats.syntax.show._ - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer.JsonPointer -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer - -object FlatData { - - /** - * Transform JSON to TSV, where columns order match the table - * @param data actual JSON data to transform - * @param source state of schema, providing proper order - * @param getValue function used to extract a custom type from JSON - * @param default in case JsonPointer points to a missing key - */ - def flatten[A](data: Json, source: SchemaList, getValue: Json => A, default: A): List[A] = - FlatSchema.extractProperties(source).map { case (pointer, _) => getPath(pointer.forData, data, getValue, default) } - - /** Extract data from JSON payload using JsonPointer */ - def getPath[A](pointer: JsonPointer, json: Json, getValue: Json => A, default: A): A = { - def go(cursor: List[Pointer.Cursor], data: ACursor): A = - cursor match { - case Nil => - data.focus.map(getValue).getOrElse(default) - case Pointer.Cursor.DownField(field) :: t => - go(t, data.downField(field)) - case Pointer.Cursor.At(i) :: t => - go(t, data.downN(i)) - case Pointer.Cursor.DownProperty(_) :: _ => - throw new IllegalStateException(s"Iglu Schema DDL tried to use invalid pointer ${pointer.show} for payload ${json.noSpaces}") - } - - go(pointer.get, json.hcursor) - } - - /** Example of `getValue` for `flatten`. Makes no difference between empty string and null */ - def getString(escapeString: Option[String => String])(json: Json): String = - escapeString match { - case None => json.fold("", transformBool, _ => json.show, identity, _ => json.noSpaces, _ => json.noSpaces) - case Some(f) => - json.fold("", - transformBool, - _ => json.show, - f, - a => Json.fromValues(escapeArray(f)(a)).noSpaces, - o => Json.fromJsonObject(escapeObject(f)(o)).noSpaces) - } - - - def escapeJson(f: String => String)(json: Json): Json = - json.fold( - Json.Null, - Json.fromBoolean, - Json.fromJsonNumber, - x => Json.fromString(f(x)), - x => Json.fromValues(escapeArray(f)(x)), - x => Json.fromJsonObject(escapeObject(f)(x))) - - def escapeArray(f: String => String)(array: Vector[Json]): Vector[Json] = - array.map(escapeJson(f)) - - def escapeObject(f: String => String)(obj: JsonObject): JsonObject = - obj.mapValues(escapeJson(f)) - - def transformBool(b: Boolean): String = if (b) "1" else "0" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/Migration.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/Migration.scala deleted file mode 100644 index efd259e8..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/Migration.scala +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.migrations - -// cats -import cats.implicits._ - -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} - -/** - * [[SchemaDiff]] with its metadata - * - * @param vendor vendor of a schema being changed - * @param name name of a schema being changed - * @param from source schema version - * @param to target schema version - * @param diff actuall migration data. Ordered map of added Schema properties - */ -case class Migration(vendor: String, name: String, from: SchemaVer.Full, to: SchemaVer.Full, diff: SchemaDiff) { - override def toString: String = s"Migration of $vendor/$name from ${from.asString} to ${to.asString} with $diff" -} - -object Migration { - - /** - * Get a migration from current state to the latest known schema - * where error can be if schema key does not belong to these schemas - * or schema key is already a latest state - * @param current schemaKey of current state - * @param schemas schemas of model group which ordered according to - * their version - * @return return Either.left in case of error cases which is specified - * above or Migration as Either.right - */ - def migrateFrom(current: SchemaKey, schemas: SchemaList.Full): Either[BuildError, Migration] = - schemas.schemas.dropWhile_(_.self.schemaKey != current) match { - case Nil => BuildError.UnknownSchemaKey.asLeft - case _ :: Nil => BuildError.NoOp.asLeft - case _ => - val i = schemas.schemas.toList.map(_.self.schemaKey).indexOf(current) - schemas.afterIndex(i) - .toRight(BuildError.InvalidState) - .map(fromSegment) - } - - /** Build migration from a [[SchemaList.Segment]] to the last schema in list of successive schemas */ - def fromSegment(source: SchemaList.Segment): Migration = { - val base = source.schemas.head.self.schemaKey - val diff = SchemaDiff.build(source) - Migration(base.vendor, base.name, base.version, source.schemas.last.self.schemaKey.version, diff) - } - - /** Represents error cases which can be get from `MigrateFrom` function */ - sealed trait BuildError extends Product with Serializable - - case object BuildError { - - /** Returned when current schema is not found in the given schemas */ - case object UnknownSchemaKey extends BuildError - - /** Schema is last version of given schemas - no migration required */ - case object NoOp extends BuildError - - /** Unknown error */ - case object InvalidState extends BuildError - } -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/SchemaDiff.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/SchemaDiff.scala deleted file mode 100644 index 1a8b81b3..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/SchemaDiff.scala +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.migrations - -import com.snowplowanalytics.iglu.core.SelfDescribingSchema - -import com.snowplowanalytics.iglu.schemaddl._ -import com.snowplowanalytics.iglu.schemaddl.jsonschema.{Pointer, Schema} -import com.snowplowanalytics.iglu.schemaddl.migrations.SchemaList._ - -/** - * This class represents differences between *two* Schemas. Preserves no order. - * The essence of [[Migration]] - * - * @param added list of properties sorted by their appearance in JSON Schemas - * @param modified list of properties changed in target Schema; - * if some property was added in successive Schema and modified - * after that, it should appear in [[added]] - * @param removed set of keys removed in target Schema - */ -case class SchemaDiff(added: List[(Pointer.SchemaPointer, Schema)], - modified: Set[SchemaDiff.Modified], - removed: List[(Pointer.SchemaPointer, Schema)]) { - - def merge(other: SchemaDiff): SchemaDiff = - SchemaDiff(added ++ FlatSchema.postProcess(other.added.toSet), modified ++ other.modified, removed ++ other.removed) -} - -object SchemaDiff { - - val empty = SchemaDiff(List.empty, Set.empty, List.empty) - - case class Modified(pointer: Pointer.SchemaPointer, from: Schema, to: Schema) { - /** Show only properties that were changed */ - def getDelta = jsonschema.Delta.build(from, to) - } - - // We should assume a property that if two particular schemas delta result in X pointer, - // No two schemas between them can give pointer higher than X - - /** - * Generate diff from source list of properties to target though sequence of intermediate - * - * @param source source list of JSON Schema properties - * @param target non-empty list of successive JSON Schema properties including target - * @return diff between two Schmea - */ - def diff(source: SubSchemas, target: SubSchemas): SchemaDiff = { - val addedKeys = getAddedKeys(source, target).toList - val modified = getModifiedProperties(source, target) - val removedKeys = getRemovedProperties(source, target).toList - SchemaDiff(addedKeys, modified, removedKeys) - } - - /** Build `SchemaDiff` from list of schemas */ - def build(source: Segment): SchemaDiff = { - val result = source.schemas.tail.foldLeft(DiffMerge.init(source.schemas.head)) { - case (acc, SelfDescribingSchema(_, schema)) => - val subschemas = FlatSchema.build(schema).subschemas - val diff = SchemaDiff.diff(acc.previous, subschemas) - DiffMerge(acc.diff.merge(diff), subschemas) - } - result.diff - } - - /** - * Get list of new properties in order they appear in subsequent Schemas - * - * @param source original Schema - * @param successive all subsequent Schemas - * @return possibly empty list of keys in correct order - */ - def getAddedKeys(source: SubSchemas, successive: SubSchemas): Set[(Pointer.SchemaPointer, Schema)] = { - val sourceKeys = source.map(_._1) - successive.foldLeft(Set.empty[(Pointer.SchemaPointer, Schema)]) { case (acc, (pointer, schema)) => - if (sourceKeys.contains(pointer)) acc - else acc + (pointer -> schema) - } - } - - /** - * Get list of JSON Schema properties modified between two versions - * - * @param source original list of JSON Schema properties - * @param target final list of JSON Schema properties - * @return set of properties changed in target Schema - */ - def getModifiedProperties(source: SubSchemas, target: SubSchemas): Set[Modified] = - target.flatMap { case (pointer, targetSchema) => - source.find { case (p, _) => p == pointer } match { - case None => Set.empty[Modified] - case Some((_, sourceSchema)) if sourceSchema == targetSchema => Set.empty[Modified] - case Some((_, sourceSchema)) => Set(Modified(pointer, sourceSchema, targetSchema)) - } - } - - def getRemovedProperties(source: SubSchemas, target: SubSchemas): SubSchemas = - source.foldLeft(Set.empty[(Pointer.SchemaPointer, Schema)]) { - case (acc, (pointer, s)) => - val removed = !target.exists { case (p, _) => pointer == p } - if (removed) acc + (pointer -> s) else acc - } - - private case class DiffMerge(diff: SchemaDiff, previous: SubSchemas) - - private object DiffMerge { - def init(origin: IgluSchema): DiffMerge = - DiffMerge(SchemaDiff.empty, FlatSchema.build(origin.schema).subschemas) - } -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/SchemaList.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/SchemaList.scala deleted file mode 100644 index 0c09b9d0..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/SchemaList.scala +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ - -package com.snowplowanalytics.iglu.schemaddl.migrations - -// cats -import cats.{ Functor, Monad } -import cats.data.{ EitherT, NonEmptyList, Ior } -import cats.implicits._ -import cats.kernel.Order - -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaMap, SchemaVer, SchemaList => SchemaKeyList} - -import com.snowplowanalytics.iglu.schemaddl.{IgluSchema, ModelGroup} -import com.snowplowanalytics.iglu.schemaddl.experimental.VersionTree - -/** - * Properly grouped and ordered list of schemas - * Core migration data structure- no valid migrations can be constructed - * without knowing it Schemas should always belong to the same vendor/name/model - * and sorted by schema-creation time (static order can be known only for - * unambiguous groups) and have all known versions (no gaps) Isomorphic to - * Iglu Core's `SchemaList` - */ -sealed trait SchemaList extends Product with Serializable { - /** Get the latest `SchemaMap` in a group */ - def latest: SchemaMap = this match { - case SchemaList.Full(schemas) => schemas.last.self - case SchemaList.Single(schema) => schema.self - } - - /** Drop all schemas *after* certain version; Return `None` if vendor/name do not match */ - def until(schemaKey: SchemaKey): Option[SchemaList] = { - val vendor = latest.schemaKey.vendor - val name = latest.schemaKey.name - val model = latest.schemaKey.version.model - - if (vendor != schemaKey.vendor || name != schemaKey.name || model != schemaKey.version.model) None - else this match { - case _: SchemaList.Single => this.some - case SchemaList.Full(schemas) => - val list = schemas.toList.span(_.self.schemaKey != schemaKey) match { - case (before, after) => NonEmptyList.fromListUnsafe(before ::: after.take(1)) - } - SchemaList.Full(list).some - } - } -} - -object SchemaList { - - /** Multiple schemas, grouped by model and in canonical order */ - sealed abstract case class Full(schemas: NonEmptyList[IgluSchema]) extends SchemaList { - - /** Create [[Segment]] from schemas in current [[SchemaList.Full]] */ - def toSegment: Segment = Segment(schemas) - - /** Create segments from all possible combinations of NonEmptyList(source,between,destination), */ - def extractSegments: NonEmptyList[Segment] = { - val res = SchemaList.buildMatrix(schemas.toList).map { case (_, _, nel) => Segment(nel) } - NonEmptyList.fromListUnsafe(res) - } - - /** Create new segment with items after index */ - private[migrations] def afterIndex(i: Int): Option[Segment] = - schemas.zipWithIndex.collect { case (s, c) if c >= i => s } match { - case h :: t => Some(Segment(NonEmptyList(h, t))) - case _ => None - } - } - object Full { - private[migrations] def apply(schemas: NonEmptyList[IgluSchema]): Full = new Full(schemas) {} - } - - /** Single init schema (e.g. `1-0-0`, `3-0-0`). No migrations should be involved */ - sealed abstract case class Single(schema: IgluSchema) extends SchemaList - object Single { - private[migrations] def apply(schema: IgluSchema): Single = new Single(schema) {} - } - - // Incomplete SchemaLists - - /** Has all properties of [[SchemaList.Full]], except absence of gaps */ - sealed abstract case class Segment(schemas: NonEmptyList[IgluSchema]) - object Segment { - private[migrations] def apply(schemas: NonEmptyList[IgluSchema]): Segment = new Segment(schemas) {} - } - - /** Has all properties of [[SchemaList.Full]], except canonical order */ - sealed abstract case class ModelGroupSet(schemas: NonEmptyList[IgluSchema]) - object ModelGroupSet { - /** Split schemas into a lists grouped by model group (still no order implied) */ - def groupSchemas(schemas: NonEmptyList[IgluSchema]): NonEmptyList[ModelGroupSet] = - schemas.groupByNem(schema => getModelGroup(schema.self)).toNel.map { - case (_, nel) => new ModelGroupSet(nel) {} - } - } - - // Constructors - - /** - * Fetch from Iglu Server and parse each schema from `SchemaKeyList`, using generic resolution function - * (IO-dependent) valid constructor of `SchemaList` - * @param keys non-empty properly ordered list of `SchemaKey`s, fetched from Iglu Server - * @param fetch resolution function - * @return properly ordered list of parsed JSON Schemas - */ - def fromSchemaList[F[_]: Monad, E](keys: SchemaKeyList, fetch: SchemaKey => EitherT[F, E, IgluSchema]): EitherT[F, E, SchemaList] = - keys.schemas.traverse(key => fetch(key)).map { - case Nil => throw new IllegalStateException("Result list can not be empty") - case h :: Nil => Single(h) - case h :: t => Full(NonEmptyList(h, t)) - } - - /** - * Build SchemaLists from fetched IgluSchemas. Given EitherT should - * wrap fetching schemas from /schemas endpoint of Iglu Server - * because they need to ordered. - * @param fetch EitherT which wraps list of ordered Iglu Schemas - * @return list of SchemaLists which created from fetched schemas - */ - def fromFetchedSchemas[F[_]: Functor, E](fetch: EitherT[F, E, NonEmptyList[IgluSchema]]): EitherT[F, E, NonEmptyList[SchemaList]] = - fetch.map(ModelGroupSet.groupSchemas(_).map(buildWithoutReorder)) - - /** - * Construct [[SchemaList]] from list of schemas, but only if order is unambiguous and no gaps - * If order is ambiguous (left returned) then the only safe order can be retrieved from - * Iglu Server (by `fromSchemaList`), use other constructors on your own risk - * @param modelGroup non-empty list of schema belonging to the same [[ModelGroup]] - * @return error object as Either.left in case of transformation is not successful or - * created SchemaList as Either.right if everything is okay - */ - def fromUnambiguous(modelGroup: ModelGroupSet): Either[BuildError, SchemaList] = - modelGroup.schemas match { - case NonEmptyList(h, Nil) => - Single(h).asRight - case schemas if ambiguos(schemas.map(key)) => - BuildError.AmbiguousOrder(modelGroup).asLeft - case schemas if !noGapsInModelGroup(schemas.map(key)) => - BuildError.GapInModelGroup(modelGroup).asLeft - case schemas if withinRevision(schemas.map(key)) => - Full(schemas.sortBy(_.self.schemaKey.version.addition)).asRight - case schemas if onlyInits(schemas.map(key)) => - Full(schemas.sortBy(_.self.schemaKey.version.revision)).asRight - case _ => BuildError.UnexpectedState(modelGroup).asLeft - } - - /** - * Construct `SchemaList` from list of schemas, if there is no gaps. - * Order given model group according to their schema key and resulting - * ordering might not be correct if given schema list ambiguous - * therefore it is not safe to use this function with ambiguous schema list. - * @param modelGroup non-empty list of schema belonging to the same `ModelGroup` - * @return error object as Either.left in case of transformation is not successful or - * created SchemaList as Either.right if everything is okay - */ - def unsafeBuildWithReorder(modelGroup: ModelGroupSet): Either[BuildError, SchemaList] = { - val sortedSchemas = modelGroup.schemas.sortBy(_.self.schemaKey)(Order.fromOrdering(SchemaKey.ordering)) - sortedSchemas match { - case NonEmptyList(h, Nil) => - Single(h).asRight - case schemas if !noGapsInModelGroup(schemas.map(key)) => - BuildError.GapInModelGroup(modelGroup).asLeft - case _ => - Full(sortedSchemas).asRight - } - } - - /** - * Construct `SchemaList`s from unordered list - * - * @param schemas non-empty list of schemas which can belong to different model groups - * @return non-empty list of errors while creating SchemaLists in Ior.left and - * non-empty list of SchemaList which created from given schemas in Ior.right - */ - def buildMultiple(schemas: NonEmptyList[IgluSchema]): Ior[NonEmptyList[BuildError], NonEmptyList[SchemaList]] = - ModelGroupSet.groupSchemas(schemas).nonEmptyPartition(fromUnambiguous) - - /** - * Construct SingleSchema from given Schema if it is first version of its model group - * @param schema IgluSchems to create SingleSchema - * @return None if given schema is not first version of its model group - * Some(SingleSchema(schema)) otherwise - */ - def buildSingleSchema(schema: IgluSchema): Option[SchemaList] = { - val version = schema.self.schemaKey.version - if (version.model >= 1 && version.revision == 0 && version.addition == 0) - Some(Single(schema)) - else - None - } - - /** - * Construct SchemaList from given model group without reordering - * its schema list - * @param modelGroup ModelGroup to create SchemaList - * @return created SchemaList from given model group - */ - private def buildWithoutReorder(modelGroup: ModelGroupSet): SchemaList = - modelGroup.schemas match { - case NonEmptyList(h, Nil) => Single(h) - case schemas => Full(schemas) - } - - /** [[SchemaList]] construction errors */ - sealed trait BuildError extends Product with Serializable - - object BuildError { - /** - * Given model group have schemas which could not be ordered unambiguously. - * For example, [1-0-0, 1-1-0, 1-0-1] schema list could not be ordered - * unambiguously because it could be either `[1-0-0, 1-0-1, 1-1-0]` or - * `[1-0-0, 1-1-0, 1-0-1]` - */ - case class AmbiguousOrder(schemas: ModelGroupSet) extends BuildError - - /** Gap in the schema list, e.g. `[1-0-0, 1-0-2]` is missing 1-0-1 version */ - case class GapInModelGroup(schemas: ModelGroupSet) extends BuildError - - /** Unknown error, should never be reached */ - case class UnexpectedState(schemas: ModelGroupSet) extends BuildError - } - - /** - * Get list of all possible combinations of (source, destination, List(source,between,destination)), - * - * {{{ - * >>> buildMatrix(List(1,2,3) - * List((1,2,NonEmptyList(1, 2)), (1,3,NonEmptyList(1, 2, 3)), (2,3,NonEmptyList(2, 3))) - * }}} - */ - private def buildMatrix[A](as: List[A]) = { - val ordered = as.zipWithIndex - for { - (from, fromIdx) <- ordered - (to, toIdx) <- ordered - cell <- NonEmptyList.fromList(ordered.filter { case (_, i) => i >= fromIdx && i <= toIdx }) match { - case None => Nil - case Some(NonEmptyList(_, Nil)) => Nil - case Some(nel) => List((from, to, nel.map(_._1))) - } - } yield cell - } - - /** Extract meaningful schema group */ - private def getModelGroup(schemaMap: SchemaMap): ModelGroup = - (schemaMap.schemaKey.vendor, schemaMap.schemaKey.name, schemaMap.schemaKey.version.model) - - // helper functions - private def ambiguos(keys: NonEmptyList[SchemaKey]): Boolean = - !withinRevision(keys) && !onlyInits(keys) - private def noGapsInModelGroup(keys: NonEmptyList[SchemaKey]): Boolean = { - val initialSchema = SchemaVer.Full(1, 0, 0) - val initialVersions = keys.map(_.version).toList - // since gaps in the model groups tried to be detected in this function, - // some of the groups' model number can be 2, in that case initial schema (1-0-0) - // is added in order to not get missing init schema error - val versions = if (keys.map(model).toList.distinct.contains(1)) initialVersions else initialSchema :: initialVersions - VersionTree.build(versions).fold(_ => false, _ => true) - } - private def withinRevision(keys: NonEmptyList[SchemaKey]): Boolean = - keys.map(model).toList.distinct.lengthCompare(1) == 0 && - keys.map(revision).toList.distinct.lengthCompare(1) == 0 - private def onlyInits(keys: NonEmptyList[SchemaKey]): Boolean = - keys.map(model).toList.distinct.lengthCompare(1) == 0 && - keys.map(addition).forall(a => a == 0) - private def model(key: SchemaKey): Int = key.version.model - private def revision(key: SchemaKey): Int = key.version.revision - private def addition(key: SchemaKey): Int = key.version.addition - private def key(schema: IgluSchema): SchemaKey = schema.self.schemaKey -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/package.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/package.scala index 432c74ec..76677ae1 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/package.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/package.scala @@ -1,77 +1,11 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ package com.snowplowanalytics.iglu -import cats.data.NonEmptyList - -import core.{SchemaMap, SelfDescribingSchema} - -import schemaddl.jsonschema.{Pointer, Schema} -import com.snowplowanalytics.iglu.schemaddl.migrations.Migration - +import com.snowplowanalytics.iglu.core.SelfDescribingSchema +import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema package object schemaddl { - /** - * Set of Schemas properties attached to corresponding JSON Pointers - * Unlike their original Schemas, these have `null` among types if they're not required - */ - type SubSchemas = Set[(Pointer.SchemaPointer, Schema)] - - /** - * List of Schemas properties attached to corresponding JSON Pointers - * Unlike SubSchemas, they are ordered according to nullness of field, - * name of field and version which field is added - */ - type Properties = List[(Pointer.SchemaPointer, Schema)] - - /** - * Map of Schemas to all its possible target schemas - * Examples: - * com.acme/event/1-0-0 -> [1-0-0/1-0-1, 1-0-0/1-0-2, 1-0-0/1-0-3] - * com.acme/event/1-0-1 -> [1-0-1/1-0-2, 1-0-1/1-0-3] - * com.acme/event/1-0-2 -> [1-0-2/1-0-3] - * com.acme/config/1-1-0 -> [1-1-0/1-0-1] - */ - type MigrationMap = Map[SchemaMap, NonEmptyList[Migration]] - - /** - * Schema criterion restricted to revision: vendor/name/m-r-* - * Tuple using as root key to bunch of Schemas differing only by addition - * (vendor, name, model, revision) - * Hypothetical "lower" AdditionGroup could contain only one Schema - */ - type RevisionGroup = (String, String, Int, Int) - - /** - * Schema criterion restricted to model: vendor/name/m-*-* - * Tuple using as root key to bunch of Schemas differing only by addition - * (vendor, name, model) - */ - type ModelGroup = (String, String, Int) - - /** - * Intermediate nested structure used to group schemas by revision - * Examples: - * com.acme/event/1-0-* -> [[MigrationMap]] - * com.acme/event/1-1-* -> [[MigrationMap]] - * com.acme/config/1-1-* -> [[MigrationMap]] - * com.google/schema/1-0-* -> [[MigrationMap]] - */ - type RevisionMigrationMap = Map[RevisionGroup, MigrationMap] - /** * Self-describing Schema container for JValue */ type IgluSchema = SelfDescribingSchema[Schema] - } diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/AlterTable.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/AlterTable.scala deleted file mode 100644 index 82d14bc6..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/AlterTable.scala +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * Class holding data to alter some table with single [[AlterTableStatement]] - * @see http://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE.html - * - * ALTER TABLE table_name - * { - * ADD table_constraint | - * DROP CONSTRAINT constraint_name [ RESTRICT | CASCADE ] | - * OWNER TO new_owner | - * RENAME TO new_name | - * RENAME COLUMN column_name TO new_name | - * ADD [ COLUMN ] column_name column_type - * [ DEFAULT default_expr ] - * [ ENCODE encoding ] - * [ NOT NULL | NULL ] | - * DROP [ COLUMN ] column_name [ RESTRICT | CASCADE ] } - * - * where table_constraint is: - * - * [ CONSTRAINT constraint_name ] - * { UNIQUE ( column_name [, ... ] ) | - * PRIMARY KEY ( column_name [, ... ] ) | - * FOREIGN KEY (column_name [, ... ] ) - * REFERENCES reftable [ ( refcolumn ) ]} - */ -case class AlterTable(tableName: String, statement: AlterTableStatement) extends Statement { - def toDdl = s"ALTER TABLE $tableName ${statement.toDdl}" -} - -/** - * Sum-type to represent some statement - */ -sealed trait AlterTableStatement extends Ddl - -sealed trait DropModeValue extends Ddl -case object CascadeDrop extends DropModeValue { def toDdl = "CASCADE" } -case object RestrictDrop extends DropModeValue { def toDdl = "RESTRICT" } - -case class DropMode(value: DropModeValue) extends Ddl { - def toDdl = value.toDdl -} - -case class AddConstraint(tableConstraint: TableConstraint) extends AlterTableStatement { - def toDdl = s"ADD ${tableConstraint.toDdl}" -} - -case class DropConstraint(constraintName: String, mode: Option[DropMode]) extends AlterTableStatement { - def toDdl = s"DROP $constraintName${mode.map(" " + _.toDdl).getOrElse("")}" -} - -case class OwnerTo(newOwner: String) extends AlterTableStatement { - def toDdl = s"OWNER TO $newOwner" -} - -case class RenameTo(newName: String) extends AlterTableStatement { - def toDdl = s"RENAME TO $newName" -} - -case class RenameColumn(columnName: String, newName: String) extends AlterTableStatement { - def toDdl = s"RENAME COLUMN $columnName TO $newName" -} - -case class AddColumn( - columnName: String, - columnType: DataType, - default: Option[Default], - encode: Option[CompressionEncoding], - nullability: Option[Nullability] -) extends AlterTableStatement { - def toDdl = { - val attrs = List(nullability, encode, default).flatten.map(_.toDdl).mkString(" ") - s"""ADD COLUMN "$columnName" ${columnType.toDdl} $attrs""" - } -} - -case class DropColumn(columnName: String, mode: Option[DropMode]) extends Ddl { - def toDdl = s"DROP COLUMN $columnName${mode.map(" " + _.toDdl).getOrElse("")}" -} - -case class AlterType( - columnName: String, - dataType: DataType -) extends AlterTableStatement { - def toDdl = - s"""ALTER "$columnName" TYPE ${dataType.toDdl}""" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Begin.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Begin.scala deleted file mode 100644 index d6d7ceef..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Begin.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -case class Begin(isolationLevel: Option[IsolationLevel.type], permission: Option[Permission]) extends Statement { - def toDdl = { - val attrs = List(isolationLevel, permission).flatten.map(_.toDdl) - s"BEGIN TRANSACTION${envelope(attrs)}" - } - - private def envelope(attrs: List[String]): String = attrs match { - case _ :: _ => attrs.mkString(" ") - case Nil => "" - } -} - -sealed trait Permission extends Ddl -case object ReadWriteIsolation extends Permission { def toDdl = "READ WRITE" } -case object ReadOnly extends Permission { def toDdl = "READ ONLY" } - -case object IsolationLevel extends Ddl { - def toDdl = "ISOLATION LEVEL SERIALIZABLE" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Column.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Column.scala deleted file mode 100644 index f418366e..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Column.scala +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * Class holding all information about Redshift's column - * - * @param columnName column_name - * @param dataType data_type such as INTEGER, VARCHAR, etc - * @param columnAttributes set of column_attributes such as ENCODE - * @param columnConstraints set of column_constraints such as NOT NULL - */ -case class Column( - columnName: String, - dataType: DataType, - columnAttributes: Set[ColumnAttribute] = Set.empty[ColumnAttribute], - columnConstraints: Set[ColumnConstraint] = Set.empty[ColumnConstraint] -) extends Ddl { - - /** - * Formatted column's DDL - * Calling method must provide length for each tab via Tuple5 - * - * @param tabs tuple of lengths (prepend, table_name, data_type, etc) - * @return formatted DDL - */ - def toFormattedDdl(tabs: (Int, Int, Int, Int, Int)): String = - withTabs(tabs._1, " ") + - withTabs(tabs._2, nameDdl) + - withTabs(tabs._3, dataTypeDdl) + - withTabs(tabs._4, attributesDdl) + - withTabs(tabs._5, constraintsDdl) - - /** - * Compact way to output column - * - * @return string representing column without formatting - */ - def toDdl = toFormattedDdl((1, 1, 1, 1, 1)) - - // Get warnings only from data types suggestions - override val warnings = dataType.warnings - - /** - * column_name ready to output with surrounding quotes to prevent odd chars - * from breaking the table - */ - val nameDdl = "\"" + columnName + "\" " - - /** - * data_type ready to output - */ - val dataTypeDdl = dataType.toDdl - - /** - * column_attributes ready to output if exists - */ - val attributesDdl = columnAttributes.map(" " + _.toDdl).mkString(" ") - - /** - * column_constraints ready to output if exists - */ - val constraintsDdl = columnConstraints.map(" " + _.toDdl).mkString(" ") -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnAttribute.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnAttribute.scala deleted file mode 100644 index 0799de11..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnAttribute.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * column_attributes are: - * [ DEFAULT default_expr ] - * [ IDENTITY ( seed, step ) ] - * [ ENCODE encoding ] - * [ DISTKEY ] - * [ SORTKEY ] - */ -sealed trait ColumnAttribute extends Ddl - -case class Default(value: String) extends ColumnAttribute { - def toDdl = s"DEFAULT $value" -} - -case class Identity(seed: Int, step: Int) extends ColumnAttribute { - def toDdl = s"IDENTITY ($seed, $step)" -} - -case object DistKey extends ColumnAttribute { - def toDdl = "DISTKEY" -} - -case object SortKey extends ColumnAttribute { - def toDdl = "SORTKEY" -} - -/** - * Compression encodings - * http://docs.aws.amazon.com/redshift/latest/dg/c_Compression_encodings.html - */ -case class CompressionEncoding(value: CompressionEncodingValue) extends ColumnAttribute { - def toDdl = s"ENCODE ${value.toDdl}" -} - -sealed trait CompressionEncodingValue extends Ddl - -case object RawEncoding extends CompressionEncodingValue { def toDdl = "RAW" } - -case object ByteDictEncoding extends CompressionEncodingValue { def toDdl = "BYTEDICT" } - -case object DeltaEncoding extends CompressionEncodingValue { def toDdl = "DELTA" } - -case object Delta32kEncoding extends CompressionEncodingValue { def toDdl = "DELTA32K" } - -case object LzoEncoding extends CompressionEncodingValue { def toDdl = "LZO" } - -case object Mostly8Encoding extends CompressionEncodingValue { def toDdl = "MOSTLY8ENCODING" } - -case object Mostly16Encoding extends CompressionEncodingValue { def toDdl = "MOSTLY16ENCODING" } - -case object Mostly32Encoding extends CompressionEncodingValue { def toDdl = "MOSTLY32ENCODING" } - -case object RunLengthEncoding extends CompressionEncodingValue { def toDdl = "RUNLENGTH" } - -case object Text255Encoding extends CompressionEncodingValue { def toDdl = "TEXT255" } - -case object Text32KEncoding extends CompressionEncodingValue { def toDdl = "TEXT32K" } - -case object ZstdEncoding extends CompressionEncodingValue { def toDdl = "ZSTD"} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnConstraint.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnConstraint.scala deleted file mode 100644 index 34f60a1a..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnConstraint.scala +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * column_constraints are: - * [ { NOT NULL | NULL } ] - * [ { UNIQUE | PRIMARY KEY } ] - * [ REFERENCES reftable [ ( refcolumn ) ] ] - */ -sealed trait ColumnConstraint extends Ddl - -sealed trait NullabilityValue extends Ddl -case object Null extends NullabilityValue { def toDdl = "NULL" } -case object NotNull extends NullabilityValue { def toDdl = "NOT NULL" } - -case class Nullability(value: NullabilityValue) extends ColumnConstraint { - def toDdl = value.toDdl -} - -sealed trait KeyConstraintValue extends Ddl -case object Unique extends KeyConstraintValue { def toDdl = "UNIQUE" } -case object PrimaryKey extends KeyConstraintValue { def toDdl = "PRIMARY KEY" } - -case class KeyConstaint(value: KeyConstraintValue) extends ColumnConstraint { - def toDdl = value.toDdl -} - diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentBlock.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentBlock.scala deleted file mode 100644 index 4e9d93ef..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentBlock.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * Class representing comment block in Ddl file - * Can be rendered into file along with other Ddl-statements - * - * @param lines sequence of lines - * @param prepend optional amount of spaces to prepend delimiter (--) - */ -case class CommentBlock(lines: Vector[String], prepend: Int = 0) extends Statement { - import CommentBlock._ - - override val separator = "" - - def toDdl = lines.map(l => "--" + emptyOrSpace(l)).mkString("\n") -} - -object CommentBlock { - def apply(line: String, prepend: Int): CommentBlock = - CommentBlock(Vector(line), prepend) - - /** - * Don't prepend empty strings with space - */ - private def emptyOrSpace(line: String): String = - if (line.nonEmpty) s" $line" - else "" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentOn.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentOn.scala deleted file mode 100644 index 0b38d4aa..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentOn.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * COMMENT ON - * { TABLE object_name | COLUMN object_name.column_name | - * CONSTRAINT constraint_name ON table_name | - * DATABASE object_name | - * VIEW object_name } - * IS 'text' - */ -case class CommentOn(tableName: String, comment: String) extends Statement { - override val separator = ";" - def toDdl = s"COMMENT ON TABLE $tableName IS '$comment'" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateSchema.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateSchema.scala deleted file mode 100644 index 3750487b..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateSchema.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -case class CreateSchema(schemaName: String) extends Statement { - def toDdl = s"CREATE SCHEMA IF NOT EXISTS $schemaName" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateTable.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateTable.scala deleted file mode 100644 index 2039693c..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateTable.scala +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * Class holding all information about Redshift's table - * - * @param tableName table_name - * @param columns iterable of all columns DDLs - * @param tableConstraints set of table_constraints such as PRIMARY KEY - * @param tableAttributes set of table_attributes such as DISTSTYLE - */ -case class CreateTable( - tableName: String, - columns: List[Column], - tableConstraints: Set[TableConstraint] = Set.empty[TableConstraint], - tableAttributes: Set[TableAttribute] = Set.empty[TableAttribute] -) extends Statement { - - def toDdl = { - val columnsDdl = columns.map(_.toFormattedDdl(tabulation) - .replaceAll("\\s+$", "")) - .mkString(",\n") - s"""CREATE TABLE IF NOT EXISTS $tableName ( - |$columnsDdl$getConstraints - |)$getAttributes""".stripMargin - } - - // Collect warnings from every column - override val warnings = columns.flatMap(_.warnings) - - // Tuple with lengths of each column in formatted DDL file - private val tabulation = { - def getLength(f: Column => Int): Int = - columns.foldLeft(0)((acc, b) => if (acc > f(b)) acc else f(b)) - - val prepend = 4 - val first = getLength(_.nameDdl.length) - val second = getLength(_.dataType.toDdl.length) - val third = getLength(_.attributesDdl.length) - val fourth = getLength(_.constraintsDdl.length) - - (prepend, first, second, third, fourth) - } - - /** - * Format constraints for table - * - * @return string with formatted table_constaints - */ - private def getConstraints: String = { - if (tableConstraints.isEmpty) "" - else ",\n" + tableConstraints.map(c => withTabs(tabulation._1, " ") + c.toDdl). - - mkString("\n") - } - /** - * Format attributes for table - * - * @return string with formatted table_attributes - */ - private def getAttributes: String = { - if (tableConstraints.isEmpty) "" - else "\n" + tableAttributes.map(_.toDdl). - - mkString("\n") - } -} - diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/DataType.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/DataType.scala deleted file mode 100644 index 9f048541..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/DataType.scala +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * Data types - * http://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html - */ -sealed trait DataType extends Ddl - -case object RedshiftTimestamp extends DataType { - def toDdl = "TIMESTAMP" -} - -case object RedshiftDate extends DataType { - def toDdl = "DATE" -} - -case object RedshiftSmallInt extends DataType { - def toDdl = "SMALLINT" -} - -case object RedshiftInteger extends DataType { - def toDdl = "INT" -} - -case object RedshiftBigInt extends DataType { - def toDdl = "BIGINT" -} - -case object RedshiftReal extends DataType { - def toDdl = "REAL" -} - -case object RedshiftDouble extends DataType { - def toDdl = "DOUBLE PRECISION" -} - -case class RedshiftDecimal(precision: Option[Int], scale: Option[Int]) extends DataType { - def toDdl = (precision, scale) match { - case (Some(p), Some(s)) => s"DECIMAL ($p, $s)" - case _ => "DECIMAL" - } -} - -case object RedshiftBoolean extends DataType { - def toDdl = "BOOLEAN" -} - -case class RedshiftVarchar(size: Int) extends DataType { - def toDdl = s"VARCHAR($size)" -} - -case class RedshiftChar(size: Int) extends DataType { - def toDdl = s"CHAR($size)" -} - -// CUSTOM - -/** - * These predefined data types assembles into usual Redshift data types, but - * can store additional information such as warnings. - * Using to prevent output on DDL-generation step. - */ -case class ProductType(override val warnings: List[String], size: Option[Int]) extends DataType { - def toDdl = s"VARCHAR(${size.getOrElse(4096)})" -} - diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Ddl.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Ddl.scala deleted file mode 100644 index d33dcf8b..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Ddl.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl -package redshift - -/** - * Base class for everything that can be represented as Redshift DDL - */ -trait Ddl { - /** - * Output actual DDL as string - * - * @return valid DDL - */ - def toDdl: String - - /** - * Aggregates all warnings from child elements - */ - val warnings: List[String] = Nil - - /** - * Append specified amount of ``spaces`` to the string to produce formatted DDL - * - * @param spaces amount of spaces - * @param str string itself - * @return string with spaces - */ - def withTabs(spaces: Int, str: String): String = - if (str.isEmpty) " " * spaces - else if (spaces <= str.length) str - else str + (" " * (spaces - str.length)) -} - diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Empty.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Empty.scala deleted file mode 100644 index 47710ceb..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Empty.scala +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -case object Empty extends Statement { - override val separator = "" - def toDdl = "" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/End.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/End.scala deleted file mode 100644 index 1d5bef78..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/End.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -case object End extends Statement { - def toDdl = "END TRANSACTION" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/RefTable.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/RefTable.scala deleted file mode 100644 index e7aa5ca3..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/RefTable.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * Reference table. Used in foreign key and table constraint - * - * @param reftable name of table - * @param refcolumn optional column - */ -case class RefTable(reftable: String, refcolumn: Option[String]) extends Ddl { - def toDdl = { - val column = refcolumn.map("(" + _ + ")").getOrElse("") - s"REFERENCES $reftable$column" - } -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ShredModel.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ShredModel.scala new file mode 100644 index 00000000..bb7239f6 --- /dev/null +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ShredModel.scala @@ -0,0 +1,188 @@ +package com.snowplowanalytics.iglu.schemaddl.redshift + +import cats.data.NonEmptyList +import cats.syntax.parallel._ +import cats.syntax.show._ +import cats.syntax.either._ +import io.circe.Json +import com.snowplowanalytics.iglu.core.SchemaKey +import com.snowplowanalytics.iglu.schemaddl.IgluSchema +import com.snowplowanalytics.iglu.schemaddl.StringUtils.snakeCase +import com.snowplowanalytics.iglu.schemaddl.jsonschema.{Pointer, Schema} +import com.snowplowanalytics.iglu.schemaddl.redshift.internal.{FlatSchema, Migrations} +import ShredModelEntry.ColumnType +import com.snowplowanalytics.iglu.schemaddl.redshift.internal.Migrations._ + +import math.abs + +/** + * Shredded table model for redshift. + * + * Extracting data from json into tsv-ready List[String] + * Warehouse table representation. "Good" version of this trait also supports migrations. + * + * entries - list of model entries, containing the schema pointers and bottom level sub schemas + * schemaKey - schema key of corresponding top level schema + */ +sealed trait ShredModel extends Product with Serializable { + def entries: List[ShredModelEntry] + + def schemaKey: SchemaKey + + def tableName: String + + + final lazy val baseTableName: String = { + // Split the vendor's reversed domain name using underscores rather than dots + val snakeCaseOrganization = schemaKey + .vendor + .replaceAll("""\.""", "_") + .replaceAll("-", "_") + .toLowerCase + + // Change the name from PascalCase to snake_case if necessary + val snakeCaseName = snakeCase(schemaKey.name) + + s"${snakeCaseOrganization}_${snakeCaseName}_${schemaKey.version.model}" + } + + // use this for the loader column expansion + final def columnNamesQuoted: List[String] = entries.map(e => s""""${e.columnName}"""") + + /** + * + * @param dbSchema - name of the warehouse schema + * @return + */ + final def toTableSql(dbSchema: String): String = + s"""CREATE TABLE IF NOT EXISTS $dbSchema.$tableName ( + |${entries.show}, + | FOREIGN KEY (root_id) REFERENCES $dbSchema.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE $dbSchema.$tableName IS '${schemaKey.toSchemaUri}'; + |""".stripMargin + + final def jsonToStrings(json: Json): List[String] = entries.map(e => e.stringFactory(json)) + +} + +object ShredModel { + + case class GoodModel(entries: List[ShredModelEntry], + schemaKey: SchemaKey, + migrations: Migrations + ) extends ShredModel { + + /** + * Generates a sql snippet for migration between lower and upper bounds, if no bounds provided migrates from the + * first schema in family to the last + * + * @param dbSchema - name of the warehouse schema + * @param maybeLowerBound - lower bound for schema versions + * @param maybeUpperBound - upper bound for schema versions + * @return SQL script for upgrading schema + */ + def migrationSql(dbSchema: String, maybeLowerBound: Option[SchemaKey] = None, maybeUpperBound: Option[SchemaKey] = None): String = migrations.toSql(tableName, dbSchema, maybeLowerBound, maybeUpperBound) + + def migrationsInTransaction(maybeLowerBound: Option[SchemaKey] = None, maybeUpperBound: Option[SchemaKey] = None): List[ColumnAddition] = migrations.inTransaction(maybeLowerBound, maybeUpperBound) + + def migrationsOutTransaction(maybeLowerBound: Option[SchemaKey] = None, maybeUpperBound: Option[SchemaKey] = None): List[VarcharExtension] = migrations.outTransaction(maybeLowerBound, maybeUpperBound) + + def allMigrations: List[NonBreaking] = migrations.values.toList + + + def getMigrationsFor(key: SchemaKey): List[NonBreaking] = migrations.getMigrationsFor(key) + + /** + * Merge two good models, evaluating feasibility of this merge and updating migrations. + * Change vector could be column additions or varchar size expansion. + * + * @param that next schema model in the family, that would merge on top of this + * @return either + * Left ModelShred of the that schema tupled with non emtpy list of breaking changes that prevented the merge + * Right merged ModelShred of this with that schema tupled with list of non breaking changes required to make a + * perform a merge. + */ + def merge(that: GoodModel): Either[RecoveryModel, GoodModel] = { + val thisLookup = entries.map { e => (e.columnName, e) }.toMap + val thatLookup = that.entries.map { e => (e.columnName, e) }.toMap + val additions: List[ShredModelEntry] = + that.entries + .filter(col => !thisLookup.contains(col.columnName)) + .map(entry => (entry.ptr, entry.subSchema)) + .foldLeft(Set.empty[(Pointer.SchemaPointer, Schema)])((acc, s) => acc + s) + // this fold, toList preserves the order as it was in the older library versions < 0.18.0 + .toList + .map { case (ptr, subSchema) => ShredModelEntry(ptr, subSchema, isLateAddition = true) } + val additionsMigration: List[ColumnAddition] = additions.map(ColumnAddition.apply) + val removals: Either[NonEmptyList[Breaking], List[NonBreaking]] = entries + .filter(col => !thatLookup.contains(col.columnName)) + .parTraverse { + case s if !s.isNullable => NullableRequired(s).asLeft.toEitherNel + case _ => NoChanges.asRight + } + val modifications: Either[NonEmptyList[Breaking], List[NonBreaking]] = + that.entries + .filter(col => thisLookup.contains(col.columnName)) + .parTraverse(newCol => { + val oldCol = thisLookup(newCol.columnName) + val (newType, newNullability, newEncoding) = (newCol.columnType, newCol.isNullable, newCol.compressionEncoding) + val (oldType, oldNullability, oldEncoding) = (oldCol.columnType, oldCol.isNullable, oldCol.compressionEncoding) + if (!oldNullability & newNullability) + NullableRequired(oldCol).asLeft.toEitherNel + else if (newEncoding != oldEncoding) + IncompatibleEncoding(oldCol, newCol).asLeft.toEitherNel + else newType match { + case ColumnType.RedshiftVarchar(newSize) => oldType match { + case ColumnType.RedshiftVarchar(oldSize) if newSize > oldSize => VarcharExtension(oldCol, newCol).asRight + case ColumnType.RedshiftVarchar(oldSize) if newSize <= oldSize => NoChanges.asRight + case _ => IncompatibleTypes(oldCol, newCol).asLeft.toEitherNel + } + case _ if newType == oldType => NoChanges.asRight + case _ => IncompatibleTypes(oldCol, newCol).asLeft.toEitherNel + } + }) + val allChanges: Either[NonEmptyList[Breaking], List[NonBreaking]] = (modifications, removals) match { + case (Right(x), Right(y)) => (x ++ y).asRight[NonEmptyList[Breaking]] + case (Right(_), l@Left(_)) => l + case (l@Left(_), Right(_)) => l + case (Left(x), Left(y)) => (x ::: y).asLeft[List[NonBreaking]] + } + (for { + changes <- allChanges + extensions = changes.collect { case e: VarcharExtension => e } + modifedEntries = entries.map( + entry => extensions.collectFirst { + case s if s.old == entry => s.newEntry + }.getOrElse(entry) + ) + } yield GoodModel( + modifedEntries ++ additions, + that.schemaKey, + migrations ++ Migrations(that.schemaKey, extensions ++ additionsMigration) + )) + .leftMap(that.makeRecovery) + } + + val tableName: String = baseTableName + + private[redshift] def makeRecovery(errors: NonEmptyList[Breaking]): RecoveryModel = new RecoveryModel(entries, schemaKey, errors) + } + + case class RecoveryModel(entries: List[ShredModelEntry], + schemaKey: SchemaKey, + errors: NonEmptyList[Breaking]) extends ShredModel { + def errorAsStrings: NonEmptyList[String] = errors.map(_.report) + + val tableName = s"${baseTableName}_${schemaKey.version.addition}_${schemaKey.version.revision}_recovered_${abs(entries.show.hashCode())}" + } + + def good(s: IgluSchema): GoodModel = good(s.self.schemaKey, s.schema) + + def good(k: SchemaKey, s: Schema): GoodModel = new GoodModel(FlatSchema.extractProperties(s), k, Migrations.empty(k)) + +} \ No newline at end of file diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ShredModelEntry.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ShredModelEntry.scala new file mode 100644 index 00000000..5db26c11 --- /dev/null +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ShredModelEntry.scala @@ -0,0 +1,191 @@ +package com.snowplowanalytics.iglu.schemaddl.redshift + +import cats.Show +import cats.syntax.show._ +import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer.SchemaPointer +import com.snowplowanalytics.iglu.schemaddl.jsonschema.{Pointer, Schema} +import com.snowplowanalytics.iglu.schemaddl.redshift.internal.ColumnTypeSuggestions.columnTypeSuggestions +import io.circe.{ACursor, Json} + +import scala.annotation.tailrec + +/** + * Single bottom level entry of the schema model. Each entry matches a single column in warehouse. + * + * @param ptr - json pointer. A cursor that could be used to extract the data from json event. + * @param subSchema - jsonschema of the element to where pointer is directed. + * @param isLateAddition - entry added as a result of migration, so it must be NOT NULL. + */ +case class ShredModelEntry( + ptr: SchemaPointer, + subSchema: Schema, + isLateAddition: Boolean + ) { + + /** + * columnName, nullability, columnType and compressionEncoding are used for SQL statement definition of corresponding + * redshift column. + */ + lazy val columnName: String = ptr.getName + + lazy val isNullable: Boolean = isLateAddition || subSchema.canBeNull + + lazy val columnType: ShredModelEntry.ColumnType = columnTypeSuggestions + .find(_.apply(subSchema).isDefined) + .flatMap(_.apply(subSchema)) + .getOrElse(ShredModelEntry.ColumnType.RedshiftVarchar(ShredModelEntry.VARCHAR_SIZE)) + + lazy val compressionEncoding: ShredModelEntry.CompressionEncoding = (subSchema.`enum`, columnType) match { + case (Some(_), ShredModelEntry.ColumnType.RedshiftVarchar(size)) if size <= 255 => + ShredModelEntry.CompressionEncoding.Text255Encoding + case (_, ShredModelEntry.ColumnType.RedshiftBoolean) => ShredModelEntry.CompressionEncoding.RunLengthEncoding + case (_, ShredModelEntry.ColumnType.RedshiftDouble) => ShredModelEntry.CompressionEncoding.RawEncoding + case _ => ShredModelEntry.CompressionEncoding.ZstdEncoding + } + + /** + * Extract the string representation of this entry from the event body. Factory relies on the validation done by the + * enrich. So column type is not validated against the jsonTypes. + * + * @param json body of json event + * @return Either a casting error (pointer was incompatible with the event) or string serialization of the payload, + */ + def stringFactory(json: Json): String = { + @tailrec + def go(cursor: List[Pointer.Cursor], data: ACursor): String = + cursor match { + case Nil => data.focus.map( + json => json.fold( + jsonNull = ShredModelEntry.NullCharacter, + jsonBoolean = if (_) "1" else "0", + jsonNumber = _ => json.show, + jsonString = ShredModelEntry.escapeTsv, + jsonArray = _ => ShredModelEntry.escapeTsv(json.noSpaces), + jsonObject = _ => ShredModelEntry.escapeTsv(json.noSpaces) + ) + ).getOrElse(ShredModelEntry.NullCharacter) + case Pointer.Cursor.DownField(field) :: t => + go(t, data.downField(field)) + case Pointer.Cursor.At(i) :: t => + go(t, data.downN(i)) + case Pointer.Cursor.DownProperty(_) :: t => go(t, data) + } + + go(ptr.get, json.hcursor) + } +} + +object ShredModelEntry { + + def apply(ptr: SchemaPointer, subSchema: Schema): ShredModelEntry = + ShredModelEntry(ptr, subSchema, isLateAddition = false) + + val VARCHAR_SIZE = 4096 + + val NullCharacter: String = "\\N" + + private def escapeTsv(s: String): String = + if (s == NullCharacter) "\\\\N" + else s.replace('\t', ' ').replace('\n', ' ') + + + sealed trait ColumnType + + implicit val showProps: Show[List[ShredModelEntry]] = Show.show(props => { + val colsAsString = props.map(prop => + (s""""${prop.columnName}"""", prop.columnType.show, prop.compressionEncoding.show, if (prop.isNullable) "" else "NOT NULL") + ) + val extraCols = List( + (""""schema_vendor"""", "VARCHAR(128)", "ENCODE ZSTD", "NOT NULL"), + (""""schema_name"""", "VARCHAR(128)", "ENCODE ZSTD", "NOT NULL"), + (""""schema_format"""", "VARCHAR(128)", "ENCODE ZSTD", "NOT NULL"), + (""""schema_version"""", "VARCHAR(128)", "ENCODE ZSTD", "NOT NULL"), + (""""root_id"""", "CHAR(36)", "ENCODE RAW", "NOT NULL"), + (""""root_tstamp"""", "TIMESTAMP", "ENCODE ZSTD", "NOT NULL"), + (""""ref_root"""", "VARCHAR(255)", "ENCODE ZSTD", "NOT NULL"), + (""""ref_tree"""", "VARCHAR(1500)", "ENCODE ZSTD", "NOT NULL"), + (""""ref_parent"""", "VARCHAR(255)", "ENCODE ZSTD", "NOT NULL") + ) + val allCols = extraCols ++ colsAsString + val (mName, mType, mComp) = allCols.foldLeft((0, 0, 0))( + (acc, col) => ( + math.max(col._1.length, acc._1), + math.max(col._2.length, acc._2), + math.max(col._3.length, acc._3), + )) + val fmtStr = s" %-${mName}s %${-mType}s %-${mComp}s %s" + + allCols + .map(cols => fmtStr.format(cols._1, cols._2, cols._3, cols._4).replaceAll("""\s+$""", "")) + .mkString(",\n") + }) + + object ColumnType { + + implicit val typeShow: Show[ColumnType] = Show.show { + case RedshiftTimestamp => "TIMESTAMP" + case RedshiftDate => "DATE" + case RedshiftSmallInt => "SMALLINT" + case RedshiftInteger => "INT" + case RedshiftBigInt => "BIGINT" + case RedshiftDouble => "DOUBLE PRECISION" + case RedshiftDecimal(precision, scale) => (precision, scale) match { + case (Some(p), Some(s)) => s"DECIMAL ($p, $s)" + case _ => "DECIMAL" + } + case RedshiftBoolean => "BOOLEAN" + case RedshiftVarchar(size) => s"VARCHAR($size)" + case RedshiftChar(size) => s"CHAR($size)" + case ProductType(size) => s"VARCHAR(${size.getOrElse(VARCHAR_SIZE)})" + } + + case object RedshiftTimestamp extends ColumnType + + case object RedshiftDate extends ColumnType + + case object RedshiftSmallInt extends ColumnType + + case object RedshiftInteger extends ColumnType + + case object RedshiftBigInt extends ColumnType + + case object RedshiftDouble extends ColumnType + + case class RedshiftDecimal(precision: Option[Int], scale: Option[Int]) extends ColumnType + + case object RedshiftBoolean extends ColumnType + + case class RedshiftVarchar(size: Int) extends ColumnType + + case class RedshiftChar(size: Int) extends ColumnType + + /** + * These predefined data types assembles into usual Redshift data types, but + * can store additional information such as warnings. + * Using to prevent output on DDL-generation step. + */ + case class ProductType(size: Option[Int]) extends ColumnType + } + + + sealed trait CompressionEncoding + + object CompressionEncoding { + + implicit val compressionEncodingShow: Show[CompressionEncoding] = Show.show { + case RawEncoding => s"ENCODE RAW" + case Text255Encoding => s"ENCODE TEXT255" + case ZstdEncoding => s"ENCODE ZSTD" + case RunLengthEncoding => "ENCODE RUNLENGTH" + } + + case object RawEncoding extends CompressionEncoding + + case object RunLengthEncoding extends CompressionEncoding + + case object Text255Encoding extends CompressionEncoding + + case object ZstdEncoding extends CompressionEncoding + } + +} \ No newline at end of file diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Statement.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Statement.scala deleted file mode 100644 index 9d943533..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Statement.scala +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -/** - * Trait for *independent* SQL DDL statements. - * Unlike simple Ddl objects, these can be used as stand-alone - * commands and be content of file. - * We're always using semicolon in the end of statements - */ -trait Statement extends Ddl with Product with Serializable { - /** - * Symbol used to separate statement from other. - * Usually it is a semicolon, however special statements, like - * empty line or comment don't use separators - * Container class (not Statement) handles separators as well as newlines - */ - val separator: String = ";" - - /** - * Properly render statement with separator - * Use it instead `toDdl` on Statement objects - */ - def render: String = - toDdl + separator -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableAttribute.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableAttribute.scala deleted file mode 100644 index e9e1bdb9..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableAttribute.scala +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -// cats -import cats.data.NonEmptyList - -/** - * table_attributes are: - * [ DISTSTYLE { EVEN | KEY | ALL } ] - * [ DISTKEY ( column_name ) ] - * [ [COMPOUND | INTERLEAVED ] SORTKEY ( column_name [, ...] ) ] - */ -sealed trait TableAttribute extends Ddl - -sealed trait DiststyleValue extends Ddl -case object Even extends DiststyleValue { def toDdl = "EVEN" } -case object Key extends DiststyleValue { def toDdl = "KEY" } -case object All extends DiststyleValue { def toDdl = "ALL" } - -sealed trait Sortstyle extends Ddl - -case object CompoundSortstyle extends Sortstyle { - def toDdl = "COMPOUND" -} - -case object InterleavedSortstyle extends Sortstyle { - def toDdl = "INTERLEAVED" -} - -case class Diststyle(diststyle: DiststyleValue) extends TableAttribute { - def toDdl = "DISTSTYLE " + diststyle.toDdl -} - -// Don't confuse with redshift.DistKey which is applicable for columns -case class DistKeyTable(columnName: String) extends TableAttribute { - def toDdl = s"DISTKEY ($columnName)" -} - -// Don't confuse with redshift.SortKey which is applicable for columns -case class SortKeyTable(sortstyle: Option[Sortstyle], columns: NonEmptyList[String]) extends TableAttribute { - def toDdl = sortstyle.map(_.toDdl + " ").getOrElse("") + "SORTKEY (" + columns.toList.mkString(",") + ")" -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableConstraint.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableConstraint.scala deleted file mode 100644 index b4608adc..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableConstraint.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift - -// cats -import cats.data.NonEmptyList - -/** - * table_constraints are: - * [ UNIQUE ( column_name [, ... ] ) ] - * [ PRIMARY KEY ( column_name [, ... ] ) ] - * [ FOREIGN KEY (column_name [, ... ] ) REFERENCES reftable [ ( refcolumn ) ] - */ -sealed trait TableConstraint extends Ddl - -case class UniqueKeyTable(columns: NonEmptyList[String]) extends TableConstraint { - def toDdl = s"UNIQUE (${columns.toList.mkString(", ")})" -} - -case class PrimaryKeyTable(columns: NonEmptyList[String]) extends TableConstraint { - def toDdl = s"PRIMARY KEY (${columns.toList.mkString(", ")})" -} - -case class ForeignKeyTable(columns: NonEmptyList[String], reftable: RefTable) extends TableConstraint { - def toDdl = s"FOREIGN KEY (${columns.toList.mkString(", ")}) ${reftable.toDdl}" -} - diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlFile.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlFile.scala deleted file mode 100644 index 0ef68b45..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlFile.scala +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift -package generators - -/** - * File representing sequence of valid DDL statements able to be printed - * - * @todo Redshift table definition is tightly coupled with JSONPath file - * and client code needs to reprocess it to have same order, but - * different column names - * - * @param statements sequence of valid independent DDL statements - */ -case class DdlFile(statements: List[Statement]) { - import DdlFile._ - - /** - * Convert content of file into string - * - * @return most concise string representation - */ - def render: String = render(List(formatAlterTable, formatCommentOn)) - - /** - * Convert content of file into string using list of predefined formatters - * WARNING: setting delimiters is on behalf of formatters, - * so empty `formatters` list produce invalid output - * - * @param formatters list of partial functions applicable for some subsets of DDL statements - * @return formatted representation - */ - def render(formatters: List[StatementFormatter]): String = { - val format: Statement => String = chooseFormatter(formatters) - val formatted = statements.foldLeft(List.empty[String]) { (acc, cur) => format(cur) :: acc } - formatted.reverse.mkString("\n") - } - - /** - * Aggregates all warnings from child statements - */ - def warnings: List[String] = statements.flatMap(_.warnings) -} - -object DdlFile { - - /** - * Type alias representing some partial function able to format particular DDL statement - */ - // TODO: refactor formatters being able to format whole File, not just Statement - type StatementFormatter = PartialFunction[Statement, String] - - /** - * Helper function choosing first formatter defined on some statement type - * - * @param formatters list of partial functions able to reformat statement - * @param statement actual DDL statement - * @return string representation - */ - def chooseFormatter(formatters: List[StatementFormatter])(statement: Statement): String = - formatters - .find(_.isDefinedAt(statement)) - .map(formatter => formatter(statement)) - .getOrElse(statement.toDdl + statement.separator) - - // TODO: formatter shouldn't know anything about actual string-representation, only spaces and newlines - val formatAlterTable: StatementFormatter = { - case ddl: AlterTable => - s" ALTER TABLE ${ddl.tableName}\n ${ddl.statement.toDdl};" - } - - val formatCommentOn: StatementFormatter = { - case ddl: CommentOn => s" ${ddl.toDdl};" - } -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlGenerator.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlGenerator.scala deleted file mode 100644 index 88e91017..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlGenerator.scala +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift.generators - -import scala.annotation.tailrec - -import cats.data.NonEmptyList - -import com.snowplowanalytics.iglu.core.SchemaMap - -import com.snowplowanalytics.iglu.schemaddl.Properties - -import com.snowplowanalytics.iglu.schemaddl.redshift._ -import com.snowplowanalytics.iglu.schemaddl.redshift.generators.EncodeSuggestions._ -import com.snowplowanalytics.iglu.schemaddl.redshift.generators.TypeSuggestions._ - -import com.snowplowanalytics.iglu.schemaddl.migrations.FlatSchema - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer - - -/** Generates a Redshift DDL File from a Flattened JsonSchema */ -object DdlGenerator { - - /** - * Make a DDL header from the self-describing info - * - * @param schemaMap self-describing info - * @param schemaName optional schema name - * @return SQL comment - */ - def getTableComment(tableName: String, schemaName: Option[String], schemaMap: SchemaMap): CommentOn = { - val schema = schemaName.map(_ + ".").getOrElse("") - CommentOn(schema + tableName, schemaMap.schemaKey.toSchemaUri) - } - - /** - * Generates Redshift CreateTable object with all columns, attributes and constraints - * - * @param orderedSubSchemas subschemas which are ordered wrt to updates, nullness and alphabetic order - * @param name table name - * @param dbSchema optional redshift schema name - * @param raw do not produce any Snowplow specific columns (like root_id) - * @param size default length for VARCHAR - * @return CreateTable object with all data about table creation - */ - def generateTableDdl(orderedSubSchemas: Properties, name: String, dbSchema: Option[String], size: Int, raw: Boolean): CreateTable = { - val columns = getColumnsDdl(orderedSubSchemas, size) - if (raw) getRawTableDdl(dbSchema, name, columns) - else getAtomicTableDdl(dbSchema, name, columns) - } - - // Columns with data taken from self-describing schema - private[redshift] val selfDescSchemaColumns = List( - Column("schema_vendor", RedshiftVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("schema_name", RedshiftVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("schema_format", RedshiftVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("schema_version", RedshiftVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))) - ) - - // Snowplow-specific columns - private[redshift] val parentageColumns = List( - Column("root_id", RedshiftChar(36), Set(CompressionEncoding(RawEncoding)), Set(Nullability(NotNull))), - Column("root_tstamp", RedshiftTimestamp, Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("ref_root", RedshiftVarchar(255), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("ref_tree", RedshiftVarchar(1500), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("ref_parent", RedshiftVarchar(255), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))) - ) - - - /** - * Generate DDL for atomic (with Snowplow-specific columns and attributes) table - * - * @param dbSchema optional redshift schema name - * @param name table name - * @param columns list of generated DDLs for columns - * @return full CREATE TABLE statement ready to be rendered - */ - private def getAtomicTableDdl(dbSchema: Option[String], name: String, columns: List[Column]): CreateTable = { - val schema = dbSchema.getOrElse("atomic") - val fullTableName = schema + "." + name - val tableConstraints = Set[TableConstraint](RedshiftDdlDefaultForeignKey(schema)) - val tableAttributes = Set[TableAttribute]( // Snowplow-specific attributes - Diststyle(Key), - DistKeyTable("root_id"), - SortKeyTable(None, NonEmptyList.of("root_tstamp")) - ) - - CreateTable( - fullTableName, - selfDescSchemaColumns ++ parentageColumns ++ columns, - tableConstraints, - tableAttributes - ) - } - - /** - * Generate DDL forraw (without Snowplow-specific columns and attributes) table - * - * @param dbSchema optional redshift schema name - * @param name table name - * @param columns list of generated DDLs for columns - * @return full CREATE TABLE statement ready to be rendered - */ - private def getRawTableDdl(dbSchema: Option[String], name: String, columns: List[Column]): CreateTable = { - val fullTableName = dbSchema.map(_ + "." + name).getOrElse(name) - CreateTable(fullTableName, columns) - } - - /** - * Get DDL for Foreign Key for specified schema - * - * @param schemaName Redshift's schema - * @return ForeignKey constraint - */ - private def RedshiftDdlDefaultForeignKey(schemaName: String) = { - val reftable = RefTable(schemaName + ".events", Some("event_id")) - ForeignKeyTable(NonEmptyList.of("root_id"), reftable) - } - - /** - * Processes the Map of Data elements pulled from the JsonSchema and - * generates DDL object for it with it's name, constrains, attributes - * data type, etc - */ - private[schemaddl] def getColumnsDdl(orderedSubSchemas: Properties, varcharSize: Int): List[Column] = - for { - (jsonPointer, schema) <- orderedSubSchemas.filter { case (p, _) => !p.equals(Pointer.Root) } - columnName = FlatSchema.getName(jsonPointer) - dataType = getDataType(schema, varcharSize, columnName) - encoding = getEncoding(schema, dataType, columnName) - constraints = getConstraints(!schema.canBeNull) - } yield Column(columnName, dataType, columnAttributes = Set(encoding), columnConstraints = constraints) - - - // List of data type suggestions - val dataTypeSuggestions: List[DataTypeSuggestion] = List( - complexEnumSuggestion, - productSuggestion, - timestampSuggestion, - dateSuggestion, - arraySuggestion, - integerSuggestion, - numberSuggestion, - booleanSuggestion, - charSuggestion, - uuidSuggestion, - varcharSuggestion - ) - - // List of compression encoding suggestions - val encodingSuggestions: List[EncodingSuggestion] = List(text255Suggestion, lzoSuggestion, zstdSuggestion) - - - /** - * Takes each suggestion out of ``dataTypeSuggesions`` and decide whether - * current properties satisfy it, then return the data type - * If nothing suggested VARCHAR with ``varcharSize`` returned as default - * - * @param properties is a string we need to recognize - * @param varcharSize default size for unhandled properties and strings - * without any data about length - * @param columnName to produce warning - * @param suggestions list of functions can recognize encode type - * @return some format or none if nothing suites - */ - @tailrec private[schemaddl] def getDataType( - properties: Schema, - varcharSize: Int, - columnName: String, - suggestions: List[DataTypeSuggestion] = dataTypeSuggestions) - : DataType = { - - suggestions match { - case Nil => RedshiftVarchar(varcharSize) // Generic - case suggestion :: tail => suggestion(properties, columnName) match { - case Some(format) => format - case None => getDataType(properties, varcharSize, columnName, tail) - } - } - } - - /** - * Takes each suggestion out of ``compressionEncodingSuggestions`` and - * decide whether current properties satisfy it, then return the compression - * encoding. - * If nothing suggested ZSTD Encoding returned as default - * - * @param properties is a string we need to recognize - * @param dataType redshift data type for current column - * @param columnName to produce warning - * @param suggestions list of functions can recognize encode type - * @return some format or none if nothing suites - */ - @tailrec private[schemaddl] def getEncoding( - properties: Schema, - dataType: DataType, - columnName: String, - suggestions: List[EncodingSuggestion] = encodingSuggestions) - : CompressionEncoding = { - - suggestions match { - case Nil => CompressionEncoding(ZstdEncoding) // ZSTD is default for user-generated - case suggestion :: tail => suggestion(properties, dataType, columnName) match { - case Some(encoding) => CompressionEncoding(encoding) - case None => getEncoding(properties, dataType, columnName, tail) - } - } - } - - private[schemaddl] def getConstraints(notNull: Boolean) = { - if (notNull) Set[ColumnConstraint](Nullability(NotNull)) - else Set.empty[ColumnConstraint] - } -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/EncodeSuggestions.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/EncodeSuggestions.scala deleted file mode 100644 index f06adee9..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/EncodeSuggestions.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift -package generators - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema - -object EncodeSuggestions { - /** - * Type alias for function suggesting an compression encoding based on map of - * JSON Schema properties - */ - type EncodingSuggestion = (Schema, DataType, String) => Option[CompressionEncodingValue] - - // Often repeated strings benefit from text255 - val text255Suggestion: EncodingSuggestion = (schema, dataType, _) => - (schema.`enum`, dataType) match { - case (Some(_), RedshiftVarchar(size)) if size <= 255 => - Some(Text255Encoding) - case _ => None - } - - // Suggest LZO Encoding for boolean, double precision and real - val lzoSuggestion: EncodingSuggestion = (_, dataType, _) => - dataType match { - case RedshiftBoolean => Some(RunLengthEncoding) - case RedshiftDouble => Some(RawEncoding) - case RedshiftReal => Some(RawEncoding) - case _ => None - } - - val zstdSuggestion: EncodingSuggestion = (_, dataType, _) => - dataType match { - case RedshiftVarchar(_) => Some(ZstdEncoding) - case _ => None - } -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/JsonPathGenerator.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/JsonPathGenerator.scala deleted file mode 100644 index b0892466..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/JsonPathGenerator.scala +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift -package generators - -// This project -import DdlGenerator._ -import com.snowplowanalytics.iglu.schemaddl.Properties -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer - -/** - * Converts lists of keys into a JsonPath file. - */ -object JsonPathGenerator { - - private object JsonPathPrefix { - val Schema = "$.schema." - val Hierarchy = "$.hierarchy." - val Data = "$.data." - } - - private val JsonPathSchemaFields = List( - "vendor", - "name", - "format", - "version" - ) - - private val JsonPathHierarchyFields = List( - "rootId", - "rootTstamp", - "refRoot", - "refTree", - "refParent" - ) - - private val JsonPathFileHeader = List( - "{", - " \"jsonpaths\": [" - ) - - private val JsonPathFileFooter = List( - " ]", - "}" - ) - - /** - * Returns a validated JsonPath file based on the list of DDL columns. - * This function should be tied to constructed CreateTable's DDL to preserve - * correct order of columns (for example they could be rearranged). - * - * @param orderedSubSchemas subschemas which are ordered wrt to updates, nullness and alphabetic order - * @param rawMode decide whether snowplow-specific columns expected - * @return a JsonPath String containing all of the relevant fields - */ - def getJsonPathsFile(orderedSubSchemas: Properties, rawMode: Boolean = false): String = { - val columnNames: List[String] = - if (rawMode) { orderedSubSchemas.map { case (p, _) => JsonPathPrefix.Data + pointerToJsonPath(p) } } // everything is data in raw mode - else { // add schema and hierarchy otherwise - val dataColumns = orderedSubSchemas - .map { case (p, _) => pointerToJsonPath(p) } - .filterNot(selfDescSchemaColumns.map(_.columnName).contains(_)) - .filterNot(parentageColumns.map(_.columnName).contains(_)) - - val schemaFieldList = JsonPathSchemaFields.map(JsonPathPrefix.Schema + _) - val hierarchyFieldList = JsonPathHierarchyFields.map(JsonPathPrefix.Hierarchy + _) - val dataFieldList = dataColumns.map(JsonPathPrefix.Data + _) - - schemaFieldList ++ hierarchyFieldList ++ dataFieldList - } - - (JsonPathFileHeader ++ formatFields(columnNames) ++ JsonPathFileFooter).mkString("\n") - } - - /** - * Adds whitespace to the front of each string in the list for formatting - * purposes. - * - * @param fields The fields that need to have white space added - * @return the formatted fields - */ - private[schemaddl] def formatFields(fields: List[String]): List[String] = { - val prefix = "".padTo(8, ' ') - for { - field <- fields - } yield { - val suffix = if (isLast(fields, field)) "" else "," - prefix + "\"" + field + "\"" + suffix - } - } - - /** - * Calculates whether or not the string passed is the last string of a list - * - * @param list The list of strings that need to be tested - * @param test The test string which needs to be assessed - * @return a boolean stating whether or not it is the last string - */ - private[schemaddl] def isLast(list: List[String], test: String): Boolean = - if (list.last == test) true else false - - - /** - * Convert SchemaPointer to json path - */ - def pointerToJsonPath(jsonPointer: Pointer.SchemaPointer): String = - jsonPointer.forData.path.mkString(".") -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/MigrationGenerator.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/MigrationGenerator.scala deleted file mode 100644 index a2a76d52..00000000 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/MigrationGenerator.scala +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift -package generators - -// Iglu Core -import com.snowplowanalytics.iglu.core._ -import com.snowplowanalytics.iglu.schemaddl.migrations.{ Migration, FlatSchema } - -// This library -import com.snowplowanalytics.iglu.schemaddl.StringUtils._ -import com.snowplowanalytics.iglu.schemaddl.jsonschema._ -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties -import com.snowplowanalytics.iglu.schemaddl.migrations.SchemaDiff - -// This library -import DdlGenerator._ - -/** - * Module containing all logic to generate DDL files with information required - * to migration from one version of Schema to another - */ -object MigrationGenerator { - - /** - * Generate full ready to be rendered DDL file containing all migration - * statements and additional data like previous version of table - * - * @param migration common JSON Schema migration object with - * path (from-to) and diff - * @param varcharSize size VARCHARs by default - * @param tableSchema DB schema for table (atomic by default) - * @return DDL file containing list of statements ready to be printed - */ - def generateMigration(migration: Migration, varcharSize: Int, tableSchema: Option[String]): DdlFile = { - val schemaMap = SchemaMap(migration.vendor, migration.name, "jsonschema", migration.to) - val oldSchemaUri = SchemaMap(migration.vendor, migration.name, "jsonschema", migration.from).schemaKey.toSchemaUri - val tableName = getTableName(schemaMap) // e.g. com_acme_event_1 - val tableNameFull = tableSchema.map(_ + ".").getOrElse("") + tableName // e.g. atomic.com_acme_event_1 - - val added = - if (migration.diff.added.nonEmpty) - migration.diff.added.map { - case (pointer, schema) => - buildAlterTableAdd(tableNameFull, varcharSize, (pointer, schema)) - } - else Nil - - val modified = - migration.diff.modified.toList.flatMap { - case modified if maxLengthIncreased(modified) || enumLonger(modified) => - buildAlterTableMaxLength(tableNameFull, varcharSize, modified) - case _ => - None - } - - val header = getHeader(tableName, oldSchemaUri) - val comment = CommentOn(tableNameFull, schemaMap.schemaKey.toSchemaUri) - - val statements = - if (modified.isEmpty && added.isEmpty) List(EmptyAdded, Empty, comment, Empty) - else if (modified.isEmpty) List(Begin(None, None), Empty) ++ added :+ Empty :+ comment :+ Empty :+ End - else if (added.isEmpty) modified ++ List(Empty, comment, Empty) - else (modified :+ Empty) ++ List(Begin(None, None), Empty) ++ added :+ Empty :+ comment :+ Empty :+ End - - - DdlFile(List(header, Empty) ++ statements) - } - - val EmptyAdded = CommentBlock("NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION", 3) - - /** - * Generate comment block for for migration file with information about - * previous version of table - * - * @param tableName name of migrating table - * @param oldSchemaUri Schema URI extracted from internal database store - * @return DDL statement with header - */ - def getHeader(tableName: String, oldSchemaUri: String): CommentBlock = - CommentBlock(Vector( - "WARNING: only apply this file to your database if the following SQL returns the expected:", - "", - s"SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = '$tableName';", - " obj_description", - "-----------------", - s" $oldSchemaUri", - " (1 row)")) - - /** - * Generate single ALTER TABLE statement to add a new property - * - * @param tableName name of migrating table - * @param varcharSize default size for VARCHAR - * @param pair pair of property name and its Schema properties like - * length, maximum, etc - * @return DDL statement altering single column in table by adding new property - */ - def buildAlterTableAdd(tableName: String, varcharSize: Int, pair: (Pointer.SchemaPointer, Schema)): AlterTable = - pair match { - case (pointer, properties) => - val columnName = FlatSchema.getName(pointer) - val dataType = getDataType(properties, varcharSize, columnName) - val encoding = getEncoding(properties, dataType, columnName) - AlterTable(tableName, AddColumn(snakeCase(columnName), dataType, None, Some(encoding), None)) - } - - /** - * Generate single ALTER TABLE statement that increases the length of a varchar in-place - * - * @param tableName name of migrating table - * @param varcharSize default size for VARCHAR - * @param modified field whose length gets increased - * @return DDL statement altering single column in table by increasing the sieadding new property - */ - def buildAlterTableMaxLength(tableName: String, varcharSize: Int, modified: SchemaDiff.Modified): Option[AlterTable] = { - val columnName = FlatSchema.getName(modified.pointer) - val dataType = getDataType(modified.to, varcharSize, columnName) - val encodingFrom = getEncoding(modified.to, dataType, columnName) - val encodingTo = getEncoding(modified.to, dataType, columnName) - if (EncodingsForbiddingAlter.contains(encodingFrom.value) || EncodingsForbiddingAlter.contains(encodingTo.value)) None - else Some(AlterTable(tableName, AlterType(columnName, dataType))) - } - - /** - * List of column encodings that don't support length extension - * @see https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE.html - */ - val EncodingsForbiddingAlter: List[CompressionEncodingValue] = - List(ByteDictEncoding, RunLengthEncoding, Text255Encoding, Text32KEncoding) - - /** @return true if the field is string and its maxLength got increased */ - private[generators] def maxLengthIncreased(modified: SchemaDiff.Modified): Boolean = - modified.from.`type`.exists(_.possiblyWithNull(CommonProperties.Type.String)) && - modified.to.`type`.exists(_.possiblyWithNull(CommonProperties.Type.String)) && - modified.getDelta.maxLength.was.exists { was => - modified.getDelta.maxLength.became.exists { became => - became.value > was.value - } - } - - /** @return true if the field is enum with new value longer than the existing ones */ - private[generators] def enumLonger(modified: SchemaDiff.Modified): Boolean = - modified.getDelta.enum.was.exists { was => - modified.getDelta.enum.became.exists { became => - became.value.map(_.asString).collect { case Some(s) => s.length }.max > was.value.map(_.asString).collect { case Some(s) => s.length }.max - } - } -} diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/TypeSuggestions.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/ColumnTypeSuggestions.scala similarity index 82% rename from modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/TypeSuggestions.scala rename to modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/ColumnTypeSuggestions.scala index 0b4f1bf7..b24435f3 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/TypeSuggestions.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/ColumnTypeSuggestions.scala @@ -10,32 +10,32 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift.generators +package com.snowplowanalytics.iglu.schemaddl.redshift.internal // cats -import cats.instances.option._ -import cats.instances.list._ -import cats.instances.int._ import cats.instances.bigInt._ +import cats.instances.int._ +import cats.instances.list._ +import cats.instances.option._ import cats.syntax.eq._ -import cats.syntax.traverse._ import cats.syntax.foldable._ - +import cats.syntax.traverse._ +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelEntry import io.circe.Json +import scala.annotation.tailrec + // This project import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.StringProperty.Format import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties.Type -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.NumberProperty.{ MultipleOf, Maximum } -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.StringProperty.{ MinLength, MaxLength } - -import com.snowplowanalytics.iglu.schemaddl.redshift._ +import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.NumberProperty.{Maximum, MultipleOf} +import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.StringProperty.{Format, MaxLength, MinLength} +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelEntry.ColumnType._ /** * Module containing functions for data type suggestions */ -object TypeSuggestions { +private[redshift] object ColumnTypeSuggestions { val DefaultArraySize: Int = 65535 @@ -43,10 +43,10 @@ object TypeSuggestions { * Type alias for function suggesting an encode type based on map of * JSON Schema properties */ - type DataTypeSuggestion = (Schema, String) => Option[DataType] + type TypeSuggestion = Schema => Option[ShredModelEntry.ColumnType] // For complex enums Suggest VARCHAR with length of longest element - val complexEnumSuggestion: DataTypeSuggestion = (properties, _) => + val complexEnumSuggestion: TypeSuggestion = properties=> properties.enum match { case Some(enums) if isComplexEnum(enums.value) => val longest = excludeNull(enums.value).map(_.noSpaces.length).maximumOption.getOrElse(16) @@ -55,39 +55,39 @@ object TypeSuggestions { } // Suggest VARCHAR(4096) for all product types. Should be in the beginning - val productSuggestion: DataTypeSuggestion = (properties, columnName) => + val productSuggestion: TypeSuggestion = properties => properties.`type` match { case Some(t: Type.Union) if t.isUnion => val typeSet = t.value - Type.Null if (typeSet == Set(Type.Boolean, Type.Integer)) - Some(ProductType(List(s"Product type ${t.asJson.noSpaces} encountered in $columnName"), Some(Int.MaxValue.toString.length))) + Some(ProductType( Some(Int.MaxValue.toString.length))) else - Some(ProductType(List(s"Product type ${t.asJson.noSpaces} encountered in $columnName"), None)) + Some(ProductType(None)) case _ => None } - val timestampSuggestion: DataTypeSuggestion = (properties, _) => + val timestampSuggestion: TypeSuggestion = properties=> (properties.`type`, properties.format) match { case (Some(types), Some(Format.DateTimeFormat)) if types.possiblyWithNull(Type.String) => Some(RedshiftTimestamp) case _ => None } - val dateSuggestion: DataTypeSuggestion = (properties, _) => + val dateSuggestion: TypeSuggestion = properties=> (properties.`type`, properties.format) match { case (Some(types), Some(Format.DateFormat)) if types.possiblyWithNull(Type.String) => Some(RedshiftDate) case _ => None } - val arraySuggestion: DataTypeSuggestion = (properties, _) => + val arraySuggestion: TypeSuggestion = properties=> properties.`type` match { case Some(types) if types.possiblyWithNull(Type.Array) => Some(RedshiftVarchar(DefaultArraySize)) case _ => None } - val numberSuggestion: DataTypeSuggestion = (properties, _) => + val numberSuggestion: TypeSuggestion = properties=> (properties.`type`, properties.multipleOf) match { case (Some(types), Some(MultipleOf.NumberMultipleOf(m))) if types.possiblyWithNull(Type.Number) && m == BigDecimal(1,2) => Some(RedshiftDecimal(Some(36), Some(2))) @@ -99,7 +99,7 @@ object TypeSuggestions { None } - val integerSuggestion: DataTypeSuggestion = (properties, _) => { + val integerSuggestion: TypeSuggestion = properties=> { (properties.`type`, properties.maximum, properties.enum, properties.multipleOf) match { case (Some(types), Some(max), _, _) if types.possiblyWithNull(Type.Integer) => getIntSize(max) @@ -114,7 +114,7 @@ object TypeSuggestions { } } - val charSuggestion: DataTypeSuggestion = (properties, _) => { + val charSuggestion: TypeSuggestion = properties=> { (properties.`type`, properties.minLength, properties.maxLength) match { case (Some(types), Some(MinLength(min)), Some(MaxLength(max))) if min === max && types.possiblyWithNull(Type.String) => @@ -123,14 +123,14 @@ object TypeSuggestions { } } - val booleanSuggestion: DataTypeSuggestion = (properties, _) => { + val booleanSuggestion: TypeSuggestion = properties=> { properties.`type` match { case Some(types) if types.possiblyWithNull(Type.Boolean) => Some(RedshiftBoolean) case _ => None } } - val uuidSuggestion: DataTypeSuggestion = (properties, _) => { + val uuidSuggestion: TypeSuggestion = properties=> { (properties.`type`, properties.format) match { case (Some(types), Some(Format.UuidFormat)) if types.possiblyWithNull(Type.String) => Some(RedshiftChar(36)) @@ -138,7 +138,7 @@ object TypeSuggestions { } } - val varcharSuggestion: DataTypeSuggestion = (properties, _) => { + val varcharSuggestion: TypeSuggestion = properties=> { (properties.`type`, properties.maxLength, properties.enum, properties.format) match { case (Some(types), _, _, Some(Format.Ipv6Format)) if types.possiblyWithNull(Type.String) => Some(RedshiftVarchar(39)) @@ -160,6 +160,20 @@ object TypeSuggestions { } } + val columnTypeSuggestions: List[TypeSuggestion] = List( + complexEnumSuggestion, + productSuggestion, + timestampSuggestion, + dateSuggestion, + arraySuggestion, + integerSuggestion, + numberSuggestion, + booleanSuggestion, + charSuggestion, + uuidSuggestion, + varcharSuggestion + ) + private def jsonLength(json: Json): Int = json.fold(0, b => b.toString.length, _ => json.noSpaces.length, _.length, _ => json.noSpaces.length, _ => json.noSpaces.length) @@ -178,13 +192,13 @@ object TypeSuggestions { * @param max upper bound * @return Long representing biggest possible value or None if it's not Int */ - private def getIntSize(max: Maximum): Option[DataType] = + private def getIntSize(max: Maximum): Option[ShredModelEntry.ColumnType] = max match { case Maximum.IntegerMaximum(bigInt) => getIntSize(bigInt) case Maximum.NumberMaximum(_) => Some(RedshiftDecimal(None, None)) } - private def getIntSize(max: BigInt): Option[DataType] = + private def getIntSize(max: BigInt): Option[ShredModelEntry.ColumnType] = if (max <= Short.MaxValue.toInt) Some(RedshiftSmallInt) else if (max <= Int.MaxValue) Some(RedshiftInteger) else Some(RedshiftBigInt) @@ -210,6 +224,7 @@ object TypeSuggestions { * @param predicates list of predicates to check * @param quantity required quantity */ + @tailrec private def somePredicates(instances: List[Json], predicates: List[Json => Boolean], quantity: Int): Boolean = if (quantity == 0) true else predicates match { diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/FlatSchema.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/FlatSchema.scala similarity index 62% rename from modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/FlatSchema.scala rename to modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/FlatSchema.scala index ab2a24b2..dcb9e494 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/migrations/FlatSchema.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/FlatSchema.scala @@ -10,41 +10,40 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.migrations - -import scala.annotation.tailrec -import scala.collection.immutable.ListMap +package com.snowplowanalytics.iglu.schemaddl.redshift.internal import cats.data.State import cats.instances.list._ import cats.instances.option._ import cats.syntax.alternative._ - -import io.circe.Json - -import com.snowplowanalytics.iglu.schemaddl.{ SubSchemas, StringUtils, Properties } import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer.SchemaPointer -import com.snowplowanalytics.iglu.schemaddl.jsonschema.circe.implicits._ import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties -import com.snowplowanalytics.iglu.schemaddl.jsonschema.{Pointer, Schema} import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties.Type +import com.snowplowanalytics.iglu.schemaddl.jsonschema.{Pointer, Schema} +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelEntry + +import scala.annotation.tailrec /** - * An object represents flattened JSON Schema, i.e. JSON Schema processed with an algorithm - * that unfolds nested JSON structure into sequence of typed pointers - JSON Pointers, - * pointing to leaf schemas - schemas that cannot be flattened further. - * Leaf schemas are mostly primitive values (strings, booleans etc), but also can be something - * that could not be flattened - * - * This is mostly a transitive tool and should not be used by user-code and instead user - * should be creating *ordered* list of typed pointers from multiple schema via `FlatSchema.extractProperties` - * - * @param subschemas set of typed pointers (order should not matter at this point) - * @param required keys listed in `required` property, whose parents also listed in `required` - * some of parent properties still can be `null` and thus not required - * @param parents keys that are not primitive, but can contain important information (e.g. nullability) - */ -final case class FlatSchema(subschemas: SubSchemas, required: Set[SchemaPointer], parents: SubSchemas) { + * An object represents flattened JSON Schema, i.e. JSON Schema processed with an algorithm + * that unfolds nested JSON structure into sequence of typed pointers - JSON Pointers, + * pointing to leaf schemas - schemas that cannot be flattened further. + * Leaf schemas are mostly primitive values (strings, booleans etc), but also can be something + * that could not be flattened + * + * This is mostly a transitive tool and should not be used by user-code and instead user + * should be creating *ordered* list of typed pointers from multiple schema via `FlatSchema.extractProperties` + * + * @param subschemas set of typed pointers (order should not matter at this point) + * @param required keys listed in `required` property, whose parents also listed in `required` + * some of parent properties still can be `null` and thus not required + * @param parents keys that are not primitive, but can contain important information (e.g. nullability) + */ +private[redshift] final case class FlatSchema( + subschemas: FlatSchema.SubSchemas, + required: Set[SchemaPointer], + parents: FlatSchema.SubSchemas + ) { // TODO: remove parents - we can preserve nullability without them /** Add a JSON Pointer that can be converted into a separate column */ @@ -72,34 +71,26 @@ final case class FlatSchema(subschemas: SubSchemas, required: Set[SchemaPointer] /** All parents are required */ @tailrec def nestedRequired(current: SchemaPointer): Boolean = current.parent.flatMap(_.parent) match { - case None | Some(Pointer.Root) => true // Technically None should not be reached + case None | Some(Pointer.Root) => true // Technically None should not be reached case Some(parent) => required.contains(parent) && nestedRequired(parent) } - /** Any parent properties contain `null` in `type` or `enum` */ - def nestedNullable(pointer: SchemaPointer): Boolean = - parents - .filter { case (p, _) => p.isParentOf(pointer) } - .foldLeft(false) { case (acc, (_, schema)) => - schema.`type`.exists(_.nullable) || schema.enum.exists(_.value.contains(Json.Null)) || acc - } - def checkUnionSubSchema(pointer: SchemaPointer): Boolean = subschemas .filter { case (p, _) => p.isParentOf(pointer) } .foldLeft(false) { case (acc, (_, schema)) => schema.`type`.exists(_.isUnion) || acc } - - def toMap: Map[SchemaPointer, Schema] = ListMap(subschemas.toList: _*) - - def show: String = subschemas - .map { case (pointer, schema) => s"${pointer.show} -> ${Schema.normalize[Json](schema).noSpaces}" } - .mkString("\n") } object FlatSchema { + /** + * Set of Schemas properties attached to corresponding JSON Pointers + * Unlike their original Schemas, these have `null` among types if they're not required + */ + private[redshift] type SubSchemas = Set[(Pointer.SchemaPointer, Schema)] + /** * Main function for flattening multiple schemas, preserving their lineage * in their properties. If user software (e.g. RDB Shredder) produces @@ -107,36 +98,30 @@ object FlatSchema { * * Builds subschemas which are ordered according to nullness of field, * name of field and which version field is added - * @param source List of ordered schemas to create ordered subschemas + * + * @param schema Top level schema to create ordered subschemas * @return list of typed pointers which are ordered according to criterias specified * above */ - def extractProperties(source: SchemaList): Properties = - source match { - case s: SchemaList.Single => - val origin = build(s.schema.schema) - postProcess(origin.subschemas) - case s: SchemaList.Full => - val origin = build(s.schemas.head.schema) - val originColumns = postProcess(origin.subschemas) - val addedColumns = Migration.fromSegment(s.toSegment).diff.added - originColumns ++ addedColumns - } + def extractProperties(schema: Schema): List[ShredModelEntry] = postProcess(build(schema).subschemas) + .map(pair => ShredModelEntry.apply(pair._1, pair._2)) - /** Build [[FlatSchema]] from a single schema. Must be used only if there's only one schema */ + + /** Build FlatSchema from a single schema. Must be used only if there's only one schema */ def build(schema: Schema): FlatSchema = Schema.traverse(schema, FlatSchema.save).runS(FlatSchema.empty).value /** Check if `current` JSON Pointer has all parent elements also required */ + /** Redshift-specific */ // TODO: type object with properties can be primitive if properties are empty - def isLeaf(schema: Schema): Boolean = { + private def isLeaf(schema: Schema): Boolean = { val isNested = schema.withType(CommonProperties.Type.Object) && schema.properties.isDefined isHeterogeneousUnion(schema) || !isNested } /** This property shouldn't have been added (FlatSchemaSpec.e4) */ - def shouldBeIgnored(pointer: SchemaPointer, flatSchema: FlatSchema): Boolean = + private def shouldBeIgnored(pointer: SchemaPointer, flatSchema: FlatSchema): Boolean = pointer.value.exists { case Pointer.Cursor.DownProperty(Pointer.SchemaProperty.Items) => true case Pointer.Cursor.DownProperty(Pointer.SchemaProperty.PatternProperties) => true @@ -149,7 +134,7 @@ object FlatSchema { case _ => false }) || flatSchema.checkUnionSubSchema(pointer) - def getRequired(cur: SchemaPointer, schema: Schema): Set[SchemaPointer] = + private def getRequired(cur: SchemaPointer, schema: Schema): Set[SchemaPointer] = schema .required.map(_.value.toSet) .getOrElse(Set.empty) @@ -157,7 +142,7 @@ object FlatSchema { val empty: FlatSchema = FlatSchema(Set.empty, Set.empty, Set.empty) - def save(pointer: SchemaPointer, schema: Schema): State[FlatSchema, Unit] = + private def save(pointer: SchemaPointer, schema: Schema): State[FlatSchema, Unit] = State.modify[FlatSchema] { flatSchema => if (shouldBeIgnored(pointer, flatSchema)) flatSchema @@ -172,19 +157,15 @@ object FlatSchema { } /** Sort and clean-up */ - def postProcess(subschemas: SubSchemas): List[(Pointer.SchemaPointer, Schema)] = + private def postProcess(subschemas: SubSchemas): List[(Pointer.SchemaPointer, Schema)] = subschemas.toList.sortBy { case (pointer, schema) => - (schema.canBeNull, getName(pointer)) + (schema.canBeNull, pointer.getName) } match { case List((SchemaPointer(Nil), s)) if s.properties.forall(_.value.isEmpty) => List() case other => other } - /** Get normalized name */ - def getName(jsonPointer: Pointer.SchemaPointer): String = - jsonPointer.forData.path.map(StringUtils.snakeCase).mkString(".") - /** Check if schema contains `oneOf` with different types */ private[schemaddl] def isHeterogeneousUnion(schema: Schema): Boolean = { val oneOfCheck = schema.oneOf match { @@ -193,7 +174,7 @@ object FlatSchema { case CommonProperties.Type.Union(set) => set.toList case singular => List(singular) } - types.distinct .filterNot(_ == CommonProperties.Type.Null) .length > 1 + types.distinct.filterNot(_ == CommonProperties.Type.Null).length > 1 case None => false } val unionTypeCheck = schema.`type`.forall(t => t.isUnion) diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/Migrations.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/Migrations.scala new file mode 100644 index 00000000..3532e909 --- /dev/null +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/internal/Migrations.scala @@ -0,0 +1,105 @@ +package com.snowplowanalytics.iglu.schemaddl.redshift.internal + +import cats.syntax.show._ +import com.snowplowanalytics.iglu.core.SchemaKey +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelEntry +import com.snowplowanalytics.iglu.schemaddl.redshift.internal.Migrations._ +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelEntry.ColumnType._ + + +private[redshift] case class Migrations(private[Migrations] val migrations: List[(SchemaKey, List[Migrations.NonBreaking])]) { + + def values: Iterable[NonBreaking] = migrations.flatMap(_._2) + + def getMigrationsFor(key: SchemaKey): List[NonBreaking] = migrations.collectFirst { + case (k, nonBreaking) if k == key => nonBreaking + }.get + + def inTransaction(maybeExclLowerBound: Option[SchemaKey], maybeIncUpperBound: Option[SchemaKey] = None): List[Migrations.ColumnAddition] = + migrations + .reverse + .dropWhile { case (k, _) => maybeIncUpperBound.getOrElse(migrations.last._1) != k } + .takeWhile { case (k, _) => maybeExclLowerBound.getOrElse(migrations.head._1) != k } + .reverse + .flatMap { case (_, a) => a } + .collect { case a: Migrations.ColumnAddition => a } + + def outTransaction(maybeExclLowerBound: Option[SchemaKey], maybeIncUpperBound: Option[SchemaKey] = None): List[Migrations.VarcharExtension] = + migrations + .reverse + .dropWhile { case (k, _) => maybeIncUpperBound.getOrElse(migrations.last._1) != k } + .takeWhile { case (k, _) => maybeExclLowerBound.getOrElse(migrations.head._1) != k } + .reverse + .flatMap { case (_, a) => a } + .collect { case a: Migrations.VarcharExtension => a } + + def toSql(tableName: String, dbSchema: String, maybeExclLowerBound: Option[SchemaKey] = None, maybeIncUpperBound: Option[SchemaKey] = None): String = + s"""|-- WARNING: only apply this file to your database if the following SQL returns the expected: + |-- + |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = '$tableName'; + |-- obj_description + |-- ----------------- + |-- ${maybeExclLowerBound.getOrElse(migrations.head._1).toSchemaUri} + |-- (1 row) + | + |""".stripMargin + + outTransaction(maybeExclLowerBound, maybeIncUpperBound).map { case Migrations.VarcharExtension(old, newEntry) => + s""" ALTER TABLE $dbSchema.$tableName + | ALTER COLUMN "${old.columnName}" TYPE ${newEntry.columnType.show}; + |""".stripMargin + }.mkString + + (inTransaction(maybeExclLowerBound, maybeIncUpperBound).map { case Migrations.ColumnAddition(column) => + s""" ALTER TABLE $dbSchema.$tableName + | ADD COLUMN "${column.columnName}" ${column.columnType.show} ${column.compressionEncoding.show}; + |""".stripMargin + } match { + case Nil => s"""| + |-- NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION + | + |COMMENT ON TABLE $dbSchema.$tableName IS '${maybeIncUpperBound.getOrElse(migrations.last._1).toSchemaUri}'; + |""".stripMargin + case h :: t => s"""| + |BEGIN TRANSACTION; + | + |${(h :: t).mkString} + | COMMENT ON TABLE $dbSchema.$tableName IS '${maybeIncUpperBound.getOrElse(migrations.last._1).toSchemaUri}'; + | + |END TRANSACTION;""".stripMargin + }) + + def ++(that: Migrations): Migrations = Migrations(migrations ++ that.migrations) +} + +object Migrations { + + def empty(k: SchemaKey): Migrations = Migrations(k, Nil) + + def apply(schemaKey: SchemaKey, migrations: List[Migrations.NonBreaking]): Migrations = + Migrations(List((schemaKey, migrations))) + + implicit val ord: Ordering[SchemaKey] = SchemaKey.ordering + + sealed trait NonBreaking extends Product with Serializable + + case class VarcharExtension(old: ShredModelEntry, newEntry: ShredModelEntry) extends NonBreaking + + case class ColumnAddition(column: ShredModelEntry) extends NonBreaking + + case object NoChanges extends NonBreaking + + sealed trait Breaking extends Product with Serializable { + def report: String = this match { + case IncompatibleTypes(old, changed) => + s"Incompatible types in column ${old.columnName} old ${old.columnType} new ${changed.columnType}" + case IncompatibleEncoding(old, changed) => + s"Incompatible encoding in column ${old.columnName} old type ${old.columnType}/${old.compressionEncoding} new type ${changed.columnType}/${changed.compressionEncoding}" + case NullableRequired(old) => s"Making required column nullable ${old.columnName}" + } + } + + case class IncompatibleTypes(old: ShredModelEntry, changed: ShredModelEntry) extends Breaking + + case class IncompatibleEncoding(old: ShredModelEntry, changed: ShredModelEntry) extends Breaking + + case class NullableRequired(old: ShredModelEntry) extends Breaking +} \ No newline at end of file diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/package.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/package.scala new file mode 100644 index 00000000..af376a2f --- /dev/null +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/package.scala @@ -0,0 +1,104 @@ +package com.snowplowanalytics.iglu.schemaddl + +import cats.data.NonEmptyList +import cats.syntax.option._ +import cats.syntax.either._ +import com.snowplowanalytics.iglu.core.SchemaKey +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModel.GoodModel +import com.snowplowanalytics.iglu.schemaddl.redshift.internal.Migrations + +import scala.collection.mutable + +package object redshift { + + // See the merge method scala doc for reference + def assessRedshiftMigration( + src: IgluSchema, + tgt: IgluSchema + ): Either[NonEmptyList[Migrations.Breaking], List[Migrations.NonBreaking]] = + ShredModel.good(src).merge(ShredModel.good(tgt)).map(_.allMigrations).leftMap(_.errors) + + def assessRedshiftMigration( + src: List[IgluSchema], + tgt: IgluSchema + ): Either[NonEmptyList[Migrations.Breaking], List[Migrations.NonBreaking]] = + src match { + case Nil => Nil.asRight + case ::(head, tl) => getFinalMergedModel(NonEmptyList(head, tl)) + .merge(ShredModel.good(tgt)) + .leftMap(_.errors) + .map(_.getMigrationsFor(tgt.self.schemaKey)) + } + + def isRedshiftMigrationBreaking(src: List[IgluSchema], + tgt: IgluSchema): Boolean = + assessRedshiftMigration(src, tgt).isLeft + + def isRedshiftMigrationBreaking(src: IgluSchema, tgt: IgluSchema): Boolean = + assessRedshiftMigration(src, tgt).isLeft + + def getFinalMergedModel(schemas: NonEmptyList[IgluSchema]): GoodModel = + foldMapMergeRedshiftSchemas(schemas).values.collectFirst { + case model: GoodModel => model + }.get // first schema always would be there due to Nel, so `get` is safe + + /** + * Build a map between schema key and their models. + * + * @param schemas - ordered list of schemas for the same family + * @return + */ + def foldMapRedshiftSchemas(schemas: NonEmptyList[IgluSchema]): collection.Map[SchemaKey, ShredModel] = { + val acc = mutable.Map.empty[SchemaKey, ShredModel] + var maybeLastGoodModel = Option.empty[GoodModel] + val models = schemas.map(ShredModel.good) + + // first pass to build the mapping between key and corresponding model + models.toList.foreach(model => maybeLastGoodModel match { + case Some(lastModel) => lastModel.merge(model) match { + case Left(badModel) => acc.update(model.schemaKey, badModel) + // We map original model here, as opposed to merged one. + case Right(mergedModel) => acc.update(model.schemaKey, model) + maybeLastGoodModel = mergedModel.some + } + case None => + acc.update(model.schemaKey, model) + maybeLastGoodModel = model.some + }) + + acc + } + + + /** + * Build a map between schema key and a merged or recovered model. For example if schemas X and Y and mergable, both + * would link to schema XY (product). + * + * @param schemas - ordered list of schemas for the same family + * @return + */ + def foldMapMergeRedshiftSchemas(schemas: NonEmptyList[IgluSchema]): collection.Map[SchemaKey, ShredModel] = { + val models = schemas.map(ShredModel.good) + var lastGoodModel = models.head + val acc: mutable.Map[SchemaKey, ShredModel] = mutable.Map(models.head.schemaKey -> models.head) + + // first pass to build the mapping between key and accumulated model + models.tail.foreach { model => + lastGoodModel.merge(model) match { + case Left(badModel) => + acc.update(model.schemaKey, badModel) + case Right(mergedModel) => + acc.update(mergedModel.schemaKey, mergedModel) + lastGoodModel = mergedModel + } + } + + // seconds pass to backfill the last model version for initial keys. + acc.map { + case (k, model) => (k, if (model.isInstanceOf[GoodModel]) + lastGoodModel + else + model) + }.toMap + } +} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/SpecHelpers.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/SpecHelpers.scala index 4fde7dc0..043b53c8 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/SpecHelpers.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/SpecHelpers.scala @@ -14,15 +14,11 @@ package com.snowplowanalytics.iglu.schemaddl import cats.syntax.either._ import cats.syntax.show._ - import io.circe.Json -import io.circe.parser.{ parse => parseJson } - -import jsonschema.{ Schema, Pointer } +import io.circe.parser.{parse => parseJson} +import jsonschema.{Pointer, Schema} import jsonschema.circe.implicits._ -import com.snowplowanalytics.iglu.schemaddl.migrations.FlatSchema - object SpecHelpers { def parseSchema(string: String): Schema = parseJson(string) @@ -30,11 +26,6 @@ object SpecHelpers { .flatMap(json => Schema.parse[Json](json).toRight("SpecHelpers.parseSchema received invalid JSON Schema")) .fold(s => throw new RuntimeException(s), identity) - def extractOrder(orderedSubSchemas: Properties): List[String] = - orderedSubSchemas.map { - case (p, _) => FlatSchema.getName(p) - } - implicit class JsonOps(json: Json) { def schema: Schema = Schema.parse(json).getOrElse(throw new RuntimeException("SpecHelpers.parseSchema received invalid JSON Schema")) @@ -42,6 +33,6 @@ object SpecHelpers { implicit class StringOps(str: String) { def jsonPointer: Pointer.SchemaPointer = - Pointer.parseSchemaPointer(str).fold(x => x, x => x) + Pointer.parseSchemaPointer(str).fold(identity, identity) } } diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/experimental/BumpsSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/experimental/BumpsSpec.scala deleted file mode 100644 index 0c308054..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/experimental/BumpsSpec.scala +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.experimental - -import com.snowplowanalytics.iglu.core.VersionKind -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ -import com.snowplowanalytics.iglu.schemaddl.migrations.SchemaDiff - -import io.circe.literal._ -import org.specs2.mutable.Specification - - -class BumpsSpec extends Specification { - "required" should { - "recognize added required property" >> { - val diff = SchemaDiff.empty.copy(added = List( - "/properties/foo".jsonPointer -> json"""{"type": "string"}""".schema - )) - - Bumps.required(diff) must beTrue - } - - "skip added optional (via enum) property" >> { - val diff = SchemaDiff.empty.copy(added = List( - "/properties/foo".jsonPointer -> json"""{"type": "string", "enum": ["foo", null]}""".schema - )) - - Bumps.required(diff) must beFalse - } - - "recognize changed property being made required" >> { - val modified = SchemaDiff.Modified( - "/properties/foo".jsonPointer, - json"""{"type": "string", "enum": ["foo", null]}""".schema, - json"""{"type": "string"}""".schema) - - val diff = SchemaDiff.empty.copy(modified = Set(modified)) - - Bumps.required(diff) must beTrue - } - - "skip changed optional -> optional property" >> { - val modified = SchemaDiff.Modified( - "/properties/foo".jsonPointer, - json"""{"type": "string"}""".schema, - json"""{"type": ["string", "integer"]}""".schema) - - val diff = SchemaDiff.empty.copy(modified = Set(modified)) - - Bumps.required(diff) must beFalse - } - } - - "getPointer" should { - "identify type widening as a revision" >> { - val modified = SchemaDiff.Modified( - "/properties/foo".jsonPointer, - json"""{"type": "string"}""".schema, - json"""{"type": ["string", "integer"]}""".schema) - - val diff = SchemaDiff.empty.copy(modified = Set(modified)) - - Bumps.getPointer(diff) must beSome(VersionKind.Revision: VersionKind) - } - - "identify constraint change as a revision" >> { - val modified = SchemaDiff.Modified( - "/properties/foo".jsonPointer, - json"""{"type": "string", "maxLength": 10}""".schema, - json"""{"type": "string", "maxLength": 12}""".schema) - - val diff = SchemaDiff.empty.copy(modified = Set(modified)) - - Bumps.getPointer(diff) must beSome(VersionKind.Revision: VersionKind) - } - - "identify added optional property AND constraint change as a revision" >> { - val addedProps = "/properties/bar".jsonPointer -> json"""{"type": ["string", "null"]}""".schema - - val modified = SchemaDiff.Modified( - "/properties/foo".jsonPointer, - json"""{"type": "string", "maxLength": 10}""".schema, - json"""{"type": "string", "maxLength": 12}""".schema) - - val diff = SchemaDiff.empty.copy(added = List(addedProps), modified = Set(modified)) - - Bumps.getPointer(diff) must beSome(VersionKind.Revision: VersionKind) - } - - "identify added optional property as an addition" >> { - val addedProps = "/properties/bar".jsonPointer -> json"""{"type": ["string", "null"]}""".schema - val diff = SchemaDiff.empty.copy(added = List(addedProps)) - - Bumps.getPointer(diff) must beSome(VersionKind.Addition: VersionKind) - } - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/experimental/VersionTreeSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/experimental/VersionTreeSpec.scala deleted file mode 100644 index 3552eafb..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/experimental/VersionTreeSpec.scala +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.experimental - -import cats.data.NonEmptyList - -import com.snowplowanalytics.iglu.core.SchemaVer - -import org.scalacheck.{Arbitrary, Gen} -import org.specs2.{ScalaCheck, Specification} - -class VersionTreeSpec extends Specification with ScalaCheck { def is = s2""" - add an addition after new revision $e1 - add two new additions $e2 - Models.add merges in next version $e3 - Revisions.add merges in next version $e4 - Additions.add merges in next version $e5 - Models.add merges versions in random order $e6 - VersionTree.build fails to build a tree with duplicate entry $e7 - VersionTree.build fails to build a tree with missing addition $e8 - VersionTree.build fails to build a tree with two missing additions $e9 - VersionTree.build fails to build a tree with missing revision $e10 - VersionTree.build builds an isomorphic VersionList $e11 - """ - - def e1 = { - val next = for { - one <- VersionTree.Root.add(SchemaVer.Full(1,0,1)) - two <- one.add(SchemaVer.Full(1,1,0)) - four <- two.add(SchemaVer.Full(1,0,2)) - } yield four - - val expected = List(SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,1), SchemaVer.Full(1,1,0), SchemaVer.Full(1,0,2)) - next.map(_.versionList.versions.toList) must beRight(expected) - } - - def e2 = { - val next = for { - first <- VersionTree.Root.add(SchemaVer.Full(1,0,1)) - second <- first.add(SchemaVer.Full(1,0,2)) - } yield second - val expected = List(SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,1), SchemaVer.Full(1,0,2)) - - next.map(_.versionList.versions.toList) must beRight(expected) - } - - def e3 = { - import VersionTree._ - import cats.data.NonEmptyList - - val init = VersionTree.Root - - // Two additions in the same revision group - val expected = VersionTree(NonEmptyList.of( - (1, Revisions(NonEmptyList.of( - (0, Additions(NonEmptyList.of(1, 0)))) // 1, 0 likely - ))) - ) - - val result = init.add(SchemaVer.Full(1,0,1)) - result must beRight(expected) - } - - def e4 = { - import VersionTree._ - import cats.data.NonEmptyList - - val init = VersionTree.Root.models.head._2 - - // Two additions in the same revision group - val expected = Revisions(NonEmptyList.of( - (0, Additions(NonEmptyList.of(1, 0)))) - ) - - init.add(NonEmptyList.of(0), List(0), SchemaVer.Full(1,0,1)) must beRight(expected) - } - - def e5 = { - import VersionTree._ - import cats.data.NonEmptyList - - val init = Additions(NonEmptyList.of(0)) - val expected = Additions(NonEmptyList.of(1, 0)) - - init.add(List(0), 1) must beRight(expected) - } - - def e6 = { - val next = for { - one <- VersionTree.Root.add(SchemaVer.Full(1,0,1)) - two <- one.add(SchemaVer.Full(1,1,0)) - seven <- two.add(SchemaVer.Full(2,0,0)) - thr <- seven.add(SchemaVer.Full(2,1,0)) - foru <- thr.add(SchemaVer.Full(2,2,0)) - } yield foru - - val expected = List(SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,1), SchemaVer.Full(1,1,0), - SchemaVer.Full(2,0,0), SchemaVer.Full(2,1,0), SchemaVer.Full(2,2,0)) - next.map(_.versionList.versions.toList) must beRight(expected) - } - - - def e7 = { - val original = List(SchemaVer.Full(1,0,0), - SchemaVer.Full(1,0,1), SchemaVer.Full(1,1,0), - SchemaVer.Full(2,0,0), SchemaVer.Full(2,1,0), SchemaVer.Full(2,2,0), - SchemaVer.Full(2,1,0) - ) - - VersionTree.build(original).map(_.versionList.versions.toList) must beLeft.like { - case VersionTree.BuildingError.InvalidTree(VersionTree.AddingError.AlreadyExists, _, SchemaVer.Full(2,1,0)) => ok - case _ => ko - } - } - - def e8 = { - import VersionTree._ - - val original = List(SchemaVer.Full(1,0,0), SchemaVer.Full(2,0,1)) - - VersionTree.build(original).map(_.versionList.versions.toList) must beLeft.like { - case BuildingError.InvalidTree(AddingError.AdditionGaps(NonEmptyList(0, Nil)), _, SchemaVer.Full(2,0,1)) => ok - case _ => ko - } - } - - def e9 = { - import VersionTree._ - - val original = List(SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,3)) - - VersionTree.build(original).map(_.versionList.versions.toList) must beLeft.like { - case BuildingError.InvalidTree(AddingError.AdditionGaps(NonEmptyList(1, List(2))), _, SchemaVer.Full(1,0,3)) => ok - case _ => ko - } - } - - def e10 = { - import VersionTree._ - - val original = List(SchemaVer.Full(1,0,0), SchemaVer.Full(1,2,0)) - - VersionTree.build(original).map(_.versionList.versions.toList) must beLeft.like { - case BuildingError.InvalidTree(AddingError.RevisionGaps(NonEmptyList(1, Nil)), _, SchemaVer.Full(1,2,0)) => ok - case _ => ko - } - } - - def e11 = { - import VersionTreeSpec._ - - prop { versions: NonEmptyList[SchemaVer.Full] => - val result = VersionTree.build(versions.toList) - result must beRight.like { - case tree => tree.versionList.versions must beEqualTo(versions) - } - } - } - -} - -object VersionTreeSpec { - sealed trait SchemaVerPoint extends Product with Serializable - object SchemaVerPoint { - case object Model extends SchemaVerPoint - case object Revision extends SchemaVerPoint - case object Addition extends SchemaVerPoint - - val gen = Gen.frequency((1, Model), (2, Revision), (10, Addition)) - } - - def shuffled(start: SchemaVer.Full): Gen[NonEmptyList[SchemaVer.Full]] = { - def go(acc: List[SchemaVer.Full]): Gen[NonEmptyList[SchemaVer.Full]] = - for { - stop <- Gen.frequency((10, false), (1, true)) - previous <- Gen.oneOf(acc) - nextPoint <- SchemaVerPoint.gen - next = nextPoint match { - case SchemaVerPoint.Model => - previous.copy(model = previous.model + 1, revision = 0, addition = 0) - case SchemaVerPoint.Revision => - previous.copy(revision = previous.revision + 1, addition = 0) - case SchemaVerPoint.Addition => - previous.copy(addition = previous.addition + 1) - } - updated = if (acc.contains(next)) acc else next :: acc - result <- if (stop) Gen.const(NonEmptyList.fromListUnsafe(updated)) else go(updated) - } yield result - - go(List(start)) - } - - - def sequential(start: SchemaVer.Full): Gen[NonEmptyList[SchemaVer.Full]] = { - def go(acc: List[SchemaVer.Full], previous: SchemaVer.Full): Gen[NonEmptyList[SchemaVer.Full]] = - for { - stop <- Gen.frequency((10, false), (1, true)) - nextPoint <- SchemaVerPoint.gen - next = nextPoint match { - case SchemaVerPoint.Model => - previous.copy(model = previous.model + 1, revision = 0, addition = 0) - case SchemaVerPoint.Revision => - previous.copy(revision = previous.revision + 1, addition = 0) - case SchemaVerPoint.Addition => - previous.copy(addition = previous.addition + 1) - } - result <- if (stop) Gen.const(NonEmptyList(previous, acc)) else go(previous :: acc, next) - } yield result - - go(Nil, start) - } - - implicit val versionListArb: Arbitrary[NonEmptyList[SchemaVer.Full]] = - Arbitrary(VersionTreeSpec.shuffled(SchemaVer.Full(1,0,0)).map(_.reverse)) -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/FlatDataSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/FlatDataSpec.scala deleted file mode 100644 index 1de07fc6..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/FlatDataSpec.scala +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.migrations - -import cats.data.NonEmptyList - -import io.circe.literal._ - -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaMap, SchemaVer, SelfDescribingSchema} - -import org.specs2.mutable.Specification - -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ - -class FlatDataSpec extends Specification { - "getPath" should { - "getPath goes into nested data" >> { - val path = "/properties/foo/properties/bar".jsonPointer.forData - val data = json"""{"foo": {"bar": "success"}}""" - - FlatData.getPath(path, data, FlatData.getString(None), "") must beEqualTo("success") - } - } - - "flatten" should { - "process complex schema list" >> { - val schemaA = - json"""{"type": "object", "additionalProperties": false, "properties": { - "a": {"type": "string"} - }}""".schema - val schemaB = - json"""{"type": "object", "additionalProperties": false, "properties": { - "a": {"type": "string"}, - "b": {"type": "object", "additionalProperties": false, "properties": {"ba": {}}} - }}""".schema - val schemaC = - json"""{"type": "object", "additionalProperties": false, "properties": { - "a": {"type": "string"}, - "b": {"type": "object", "additionalProperties": false, "properties": {"ba": {}}}, - "c": {"type": "integer"} - }}""".schema - val schemaD = - json"""{"type": "object", "additionalProperties": false, "properties": { - "a": {"type": "string"}, - "b": {"type": "object", "additionalProperties": false, "properties": {"ba": {}, "bb": {}}}, - "c": {"type": "integer"} - }}""".schema - - val schemas = NonEmptyList.of( - SelfDescribingSchema(SchemaMap(SchemaKey("com.acme", "test", "jsonschema", SchemaVer.Full(1,0,0))), schemaA), - SelfDescribingSchema(SchemaMap(SchemaKey("com.acme", "test", "jsonschema", SchemaVer.Full(1,0,1))), schemaB), - SelfDescribingSchema(SchemaMap(SchemaKey("com.acme", "test", "jsonschema", SchemaVer.Full(1,0,2))), schemaC), - SelfDescribingSchema(SchemaMap(SchemaKey("com.acme", "test", "jsonschema", SchemaVer.Full(1,0,3))), schemaD) - ) - - val schemaList = SchemaList.buildMultiple(schemas).right.get.head - - val expected = List("one", "two", "three", "four") - - val data = json"""{"a": "one", "b": {"ba": "two", "bb": "four"}, "c": "three"}""" - val result = FlatData.flatten(data, schemaList, FlatData.getString(None), "") - - result must beEqualTo(expected) - } - - "process schema which contains oneOf" >> { - val json = json""" - { - "type": "object", - "properties": { - "union": { - "oneOf": [ - { - "type": "object", - "properties": { - "object_without_properties": { "type": "object" } - } - }, - { - "type": "string" - } - ] - } - }, - "required": ["union"], - "additionalProperties": false - } - """.schema - - val schema = - SelfDescribingSchema(SchemaMap(SchemaKey("com.acme", "test", "jsonschema", SchemaVer.Full(1,0,0))), json) - - val schemaList = SchemaList.buildSingleSchema(schema).get - - val data1 = json"""{"union": "union_value"}""" - val result1 = FlatData.flatten(data1, schemaList, FlatData.getString(None), "") - val expected1 = List("union_value") - val comp1 = result1 must beEqualTo(expected1) - - val data2 = json"""{"union": {"foo": "foo_val", "bar": "bar_val"}}""" - val result2 = FlatData.flatten(data2, schemaList, FlatData.getString(None), "") - val expected2 = List("""{"foo":"foo_val","bar":"bar_val"}""") - val comp2 = result2 must beEqualTo(expected2) - - val expected3 = List("""{"object_without_properties":"val"}""") - val data3 = json"""{"union": {"object_without_properties": "val"}}""" - val result3 = FlatData.flatten(data3, schemaList, FlatData.getString(None), "") - val comp3 = result3 must beEqualTo(expected3) - - comp1 and comp2 and comp3 - } - - "process schema which contains list array" >> { - val schema = json""" - { - "type": "object", - "properties": { - "someBool": { "type": "boolean" }, - "someArray": { - "type": "array", - "items": [{"type": "integer"}, {"type": "string"}] - } - } - } """.schema - - val schemas = NonEmptyList.of( - SelfDescribingSchema(SchemaMap(SchemaKey("com.acme", "test", "jsonschema", SchemaVer.Full(1,0,0))), schema) - ) - val schemaList = SchemaList.buildMultiple(schemas).right.get.head - - val data = json"""{"someBool": true, "someArray": ["item1", "item2", "item3"]}""" - val result = FlatData.flatten(data, schemaList, FlatData.getString(None), "") - val expected = List("""["item1","item2","item3"]""", "1") - - result must beEqualTo(expected) - } - - "process schema which contains tuple array" >> { - val schema = json""" - { - "type": "object", - "properties": { - "someBool": { "type": "boolean" }, - "someArray": { - "type": "array", - "items": {"type": "integer"} - } - } - } """.schema - - val schemas = NonEmptyList.of( - SelfDescribingSchema(SchemaMap(SchemaKey("com.acme", "test", "jsonschema", SchemaVer.Full(1,0,0))), schema) - ) - val schemaList = SchemaList.buildMultiple(schemas).right.get.head - - val data = json"""{"someBool": true, "someArray": ["item1","item2","item3"]}""" - val result = FlatData.flatten(data, schemaList, FlatData.getString(None), "") - val expected = List("""["item1","item2","item3"]""", "1") - - result must beEqualTo(expected) - } - - "process schema which contains union type" >> { - val schema = json""" - { - "type": "object", - "properties": { - "a": { - "type": ["integer", "object"], - "properties": { - "b": { "type": "string" }, - "c": { "type": "integer" } - } - } - } - } """.schema - - val schemas = NonEmptyList.of( - SelfDescribingSchema(SchemaMap(SchemaKey("com.acme", "test", "jsonschema", SchemaVer.Full(1,0,0))), schema) - ) - val schemaList = SchemaList.buildMultiple(schemas).right.get.head - - val data1 = json"""{"a": 2}""" - val result1 = FlatData.flatten(data1, schemaList, FlatData.getString(None), "") - val expected1 = List("2") - val comp1 = result1 must beEqualTo(expected1) - - val data2 = json"""{"a":{"b":1,"c":"val"}}""" - val result2 = FlatData.flatten(data2, schemaList, FlatData.getString(None), "") - val expected2 = List("""{"b":1,"c":"val"}""") - val comp2 = result2 must beEqualTo(expected2) - - val data3 = json"""{"a":{"key1":"value1","key2":"value2"}}""" - val result3 = FlatData.flatten(data3, schemaList, FlatData.getString(None), "") - val expected3 = List("""{"key1":"value1","key2":"value2"}""") - val comp3 = result3 must beEqualTo(expected3) - - comp1 and comp2 and comp3 - } - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/FlatSchemaSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/FlatSchemaSpec.scala deleted file mode 100644 index 9e36d1e0..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/FlatSchemaSpec.scala +++ /dev/null @@ -1,1049 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.migrations - -import cats.implicits._ -import cats.data._ - -import io.circe.literal._ - -import org.specs2.mutable.Specification -import org.specs2.matcher.Matcher - -import com.snowplowanalytics.iglu.core.SelfDescribingSchema -import com.snowplowanalytics.iglu.core.{SchemaMap, SchemaVer} -import com.snowplowanalytics.iglu.schemaddl.SubSchemas -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties.{Description, Type} -import com.snowplowanalytics.iglu.schemaddl.jsonschema.{Pointer, Schema} -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ - - -class FlatSchemaSpec extends Specification { - - "build" should { - "recognize a JSON schema without properties" >> { - val schema = json"""{"type": "object"}""".schema - val expected = FlatSchema( - Set(Pointer.Root -> Schema.empty.copy(`type` = Some(Type.Object))), - Set.empty, - Set.empty) - - FlatSchema.build(schema) must beEqualTo(expected) - } - - "recognize an object property without 'properties' as primitive" >> { - val json = json""" - { - "type": "object", - "properties": { - "nested": { - "type": "object", - "properties": { - "object_without_properties": { - "type": "object" - } - } - } - } - } - """.schema - - val subSchemas = Set( - "/properties/nested/properties/object_without_properties".jsonPointer -> - json"""{"type": ["object", "null"]}""".schema) - - val result = FlatSchema.build(json) - - val parentsExpectation = result.parents.map(_._1) must contain(Pointer.Root, "/properties/nested".jsonPointer) - - (result.subschemas must beEqualTo(subSchemas)) and (result.required must beEmpty) and parentsExpectation - } - - "recognize an empty self-describing schema as empty FlatSchema" >> { - val json = json""" - { - "description": "Wildcard schema #1 to match any valid JSON instance", - "self": { - "vendor": "com.snowplowanalytics.iglu", - "name": "anything-a", - "format": "jsonschema", - "version": "1-0-0" - } - } - """.schema - val description = "Wildcard schema #1 to match any valid JSON instance" - val expected = FlatSchema(Set( - Pointer.Root -> Schema.empty.copy(description = Some(Description(description)))), - Set.empty, - Set.empty) - - val res = FlatSchema.build(json) - - res must beEqualTo(expected) - } - - "recognize an array as primitive" >> { - val schema = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false - } - """.schema - - val expected = Set( - "/properties/foo".jsonPointer -> - json"""{"type": ["array", "null"], "items": {"type": "string"}}""".schema - ) - - val result = FlatSchema.build(schema) - - val subschemasExpectation = result.subschemas must beEqualTo(expected) - val requiredExpectation = result.required must beEmpty - val parentsExpectation = result.parents.map(_._1) must contain(Pointer.Root) - - subschemasExpectation and requiredExpectation and parentsExpectation - } - - "transform [object,string] union type into single primitive" >> { - val schema = json""" - { - "type": "object", - "properties": { - "foo": { - "type": ["string", "object"], - "properties": { - "one": { - "type": "string" - }, - "two": { - "type": "integer" - } - } - }, - "a_field": { - "type": ["string", "integer"] - }, - "b_field": { - "type": "string" - }, - "c_field": { - "type": ["integer", "number"] - }, - "d_field": { - "type": "object", - "properties": { - "one": { - "type": ["integer", "object"], - "properties": { - "two": { - "type": "string" - }, - "three": { - "type": "integer" - } - } - } - } - } - }, - "additionalProperties": false - }""".schema - - val result = FlatSchema.build(schema) - - val expectedSubschemas = Set( - "/properties/foo".jsonPointer -> - json"""{ - "type": ["string", "object", "null"], - "properties": { - "one": { - "type": "string" - }, - "two": { - "type": "integer" - } - } - }""".schema, - "/properties/d_field/properties/one".jsonPointer -> - json"""{ - "type": ["integer", "object", "null"], - "properties": { - "two": { - "type": "string" - }, - "three": { - "type": "integer" - } - } - }""".schema, - "/properties/a_field".jsonPointer -> json"""{"type": ["string", "integer", "null"]}""".schema, - "/properties/b_field".jsonPointer -> json"""{"type": ["string", "null"]}""".schema, - "/properties/c_field".jsonPointer -> json"""{"type": ["integer", "number", "null"]}""".schema - ) - - result.subschemas must beEqualTo(expectedSubschemas) and (result.required must beEmpty) - } - - "recognize oneOf with object and string as primitive" >> { - val json = json""" - { - "type": "object", - "properties": { - "union": { - "oneOf": [ - { - "type": "object", - "properties": { - "object_without_properties": { "type": "object" } - } - }, - { - "type": "string" - } - ] - } - }, - "additionalProperties": false - } - """.schema - - val subSchemas = Set( - "/properties/union".jsonPointer -> - json"""{ - "oneOf": [ - { - "type": "object", - "properties": { - "object_without_properties": { "type": "object" } - } - }, - { - "type": "string" - } - ] - }""".schema.copy(`type` = Some(Type.Null)) - ) - - val result = FlatSchema.build(json) - - (result.subschemas must beEqualTo(subSchemas)) and (result.required must beEmpty) - } - - "recognize an optional enum field" >> { - val schema = json""" - { - "type": "object", - "properties": { - "enum_field": { - "enum": [ - "event", - "exception", - "item" - ] - }, - "nonInteractionHit": { - "type": ["boolean", "null"] - } - }, - "additionalProperties": false - } - """.schema - - val expectedSubSchemas = Set( - "/properties/enum_field".jsonPointer -> - json"""{"enum": ["event","exception","item"]}""".schema.copy(`type` = Some(Type.Null)), - "/properties/nonInteractionHit".jsonPointer -> - json"""{"type": ["boolean", "null"]}""".schema) - - val result = FlatSchema.build(schema) - - (result.subschemas must beEqualTo(expectedSubSchemas)) and (result.required must beEmpty) - } - - "recognize an optional nested enum field" >> { - val schema = json""" - { - "type": "object", - "properties": { - "a_field": { - "type": "object", - "properties": { - "enum_field": { - "enum": [ - "event", - "exception", - "item" - ] - } - } - }, - "nonInteractionHit": { - "type": ["boolean", "null"] - } - }, - "additionalProperties": false - } - """.schema - - val expectedSubSchemas = Set( - "/properties/a_field/properties/enum_field".jsonPointer -> - json"""{"enum": ["event","exception","item"]}""".schema.copy(`type` = Some(Type.Null)), - "/properties/nonInteractionHit".jsonPointer -> - json"""{"type": ["boolean", "null"]}""".schema) - - val result = FlatSchema.build(schema) - - (result.subschemas must beEqualTo(expectedSubSchemas)) and (result.required must beEmpty) - } - - "recognize a field without type" >> { - val schema = json""" - { - "type": "object", - "properties": { - "a_field": { "type": "string" }, - "b_field": {} - } - } - """.schema - - val expectedSubSchemas = Set( - "/properties/a_field".jsonPointer -> json"""{"type": ["string", "null"]}""".schema, - "/properties/b_field".jsonPointer -> Schema.empty.copy(`type` = Some(Type.Null)) - ) - - val result = FlatSchema.build(schema) - - (result.subschemas must beEqualTo(expectedSubSchemas)) and (result.required must beEmpty) - } - - "add all required properties and skips not-nested required" >> { - val schema = json""" - { - "type": "object", - "required": ["foo"], - "properties": { - "foo": { - "type": "object", - "required": ["one"], - "properties": { - "one": { - "type": "string" - }, - "nonRequiredNested": { - "type": "object", - "required": ["nestedRequired"], - "properties": { - "nestedRequired": {"type": "integer"} - } - } - } - } - }, - "additionalProperties": false - } - """.schema - - val result = FlatSchema.build(schema) - - val expectedRequired = Set("/properties/foo".jsonPointer, "/properties/foo/properties/one".jsonPointer) - val expectedSubschemas = Set( - "/properties/foo/properties/nonRequiredNested/properties/nestedRequired".jsonPointer -> - json"""{"type": ["integer", "null"]}""".schema, - "/properties/foo/properties/one".jsonPointer -> - json"""{"type": "string"}""".schema - ) - - val required = result.required must bePointers(expectedRequired) - val subschemas = result.subschemas must beEqualTo(expectedSubschemas) - - required and subschemas - } - - "skip properties inside patternProperties" >> { - val schema = json""" - { - "type": "object", - "required": ["one"], - "properties": { - "one": { - "type": "object", - "required": ["two"], - "properties": { - "two": { - "type": "string" - }, - "withProps": { - "type": "object", - "patternProperties": { - ".excluded": {"type": "string"}, - ".excluded-with-required": { - "type": "object", - "properties": { - "also-excluded": {"type": "integer"} - } - } - }, - "properties": { - "included": {"type": "integer"} - } - } - } - } - }, - "additionalProperties": false - } - """.schema - - val result = FlatSchema.build(schema) - - val expectedRequired = Set("/properties/one".jsonPointer, "/properties/one/properties/two".jsonPointer) - val expectedSubschemas = Set( - "/properties/one/properties/two".jsonPointer -> - json"""{"type": "string"}""".schema, - "/properties/one/properties/withProps/properties/included".jsonPointer -> - json"""{"type": ["integer", "null"]}""".schema - ) - - val required = result.required must bePointers(expectedRequired) - val subschemas = result.subschemas must beEqualTo(expectedSubschemas) - - required and subschemas - } - - "recognize an oneOf as sum type" >> { - val json = json""" - { - "type": "object", - "properties": { - "union": { - "oneOf": [ - { - "type": "object", - "properties": { - "one": { "type": "integer" } - } - }, - { - "type": "object", - "properties": { - "two": { "type": "string" } - } - } - ] - } - }, - "additionalProperties": false - } - """.schema - - val subSchemas = Set( - "/properties/union".jsonPointer -> - json"""{ - "oneOf": [ - { - "type": "object", - "properties": { - "one": { "type": "integer" } - } - }, - { - "type": "object", - "properties": { - "two": { "type": "string" } - } - } - ] - }""".schema.copy(`type` = Some(Type.Null)) - ) - - val result = FlatSchema.build(json) - - (result.subschemas must beEqualTo(subSchemas)) and (result.required must beEmpty) - - } - } - - "nestedRequired" should { - "return true if all parent properties are required (no null in type)" >> { - val subschemas: SubSchemas = - Set("/deeply".jsonPointer, "/deeply/nested".jsonPointer, "/other/property".jsonPointer) - .map((p: Pointer.SchemaPointer) => p -> Schema.empty) - - val schema = FlatSchema(subschemas, Set("/deeply".jsonPointer, "/deeply/nested".jsonPointer), Set.empty[(Pointer.SchemaPointer, Schema)]) - val result = schema.nestedRequired("/deeply/nested/property".jsonPointer) - - result must beTrue - } - } - - "isHeterogeneousUnion" should { - "recognize a Schema with oneOf" >> { - val json = json""" - { - "oneOf": [ - { - "type": "object", - "properties": { - "object_without_properties": { "type": "object" } - } - }, - { - "type": "string" - } - ] - } - """.schema - - FlatSchema.isHeterogeneousUnion(json) must beTrue - } - } - - "extractProperties" should { - "create correct ordered subschemas from 1-0-0 to 1-0-1" >> { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string", - "maxLength": 20 - }, - "a_field": { - "type": "integer" - }, - "b_field": { - "type": "integer" - } - }, - "required": ["b_field"], - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val schemaList = SchemaList.Full(NonEmptyList.of(initialSchema, secondSchema)) - - val res = extractOrder(FlatSchema.extractProperties(schemaList)) - - val expected = List("foo", "b_field", "a_field") - - res must beEqualTo(expected) - } - - "create correct ordered subschemas from 1-0-0 to 1-0-2" >> { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string", - "maxLength": 20 - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val third = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string", - "maxLength": 20 - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "aField": { - "type": "integer" - }, - "cField": { - "type": "integer" - }, - "dField": { - "type": "string" - } - }, - "required": ["bar", "cField"], - "additionalProperties": false - } - """.schema - val thirdSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,2)), third) - - val schemaList = SchemaList.Full(NonEmptyList.of(initialSchema, secondSchema, thirdSchema)) - - val res = extractOrder(FlatSchema.extractProperties(schemaList)) - - val expected = List("foo", "bar", "c_field", "a_field", "d_field") - - res must beEqualTo(expected) - } - - "create two properties if a property became union [integer,object]" >> { - val initial = json""" - { - "type": "object", - "properties": { - "bar": { - "type": "string" - }, - "foo": { - "type": "object", - "maximum": 4000, - "properties": { - "insideObject": {} - } - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "bar": { - "type": "string", - "maxLength": 20 - }, - "foo": { - "type": ["integer", "object"], - "maximum": 4000, - "properties": { - "insideObject": {} - } - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val schemaListOne = SchemaList.Full(NonEmptyList.of(initialSchema, secondSchema)) - val res = extractOrder(FlatSchema.extractProperties(schemaListOne)) - val expected = List("bar", "foo.inside_object", "foo") - - res must beEqualTo(expected) - } - - "create correct ordered subschemas for complex schema" >> { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string", - "maxLength": 20 - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "a_field": { - "type": "object", - "properties": { - "b_field": { - "type": "string" - }, - "c_field": { - "type": "object", - "properties": { - "d_field": { - "type": "string" - }, - "e_field": { - "type": "string" - } - } - }, - "d_field": { - "type": "object" - } - }, - "required": ["d_field"] - }, - "b_field": { - "type": "integer" - }, - "c_field": { - "type": "integer" - }, - "d_field": { - "type": "object", - "properties": { - "e_field": { - "type": "string" - }, - "f_field": { - "type": "string" - } - } - } - }, - "required": ["a_field"], - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val schemaList = SchemaList.Full(NonEmptyList.of(initialSchema, secondSchema)) - - val res = extractOrder(FlatSchema.extractProperties(schemaList)) - - val expected = List( - "bar", - "foo", - "a_field.d_field", - "a_field.b_field", - "a_field.c_field.d_field", - "a_field.c_field.e_field", - "b_field", - "c_field", - "d_field.e_field", - "d_field.f_field" - ) - - res must beEqualTo(expected) - } - - "create correct ordered subschemas for complex schema" >> { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string", - "maxLength": 20 - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "a_field": { - "type": "object", - "properties": { - "b_field": { - "type": "string" - }, - "c_field": { - "type": "object", - "properties": { - "d_field": { - "type": "string" - }, - "e_field": { - "type": "string" - } - } - }, - "d_field": { - "type": "object" - } - }, - "required": ["d_field"] - }, - "b_field": { - "type": "integer" - }, - "c_field": { - "type": "integer" - }, - "d_field": { - "type": "object", - "properties": { - "e_field": { - "type": "string" - }, - "f_field": { - "type": "string" - } - } - } - }, - "required": ["a_field"], - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val third = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string", - "maxLength": 20 - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "a_field": { - "type": "object", - "properties": { - "b_field": { - "type": "string" - }, - "c_field": { - "type": "object", - "properties": { - "d_field": { - "type": "string" - }, - "e_field": { - "type": "string" - } - } - }, - "d_field": { - "type": "object" - } - }, - "required": ["d_field"] - }, - "b_field": { - "type": "integer" - }, - "c_field": { - "type": "integer" - }, - "d_field": { - "type": "object", - "properties": { - "e_field": { - "type": "string" - }, - "f_field": { - "type": "string" - } - } - }, - "e_field": { - "type": "object", - "properties": { - "f_field": { - "type": "string" - }, - "g_field": { - "type": "string" - } - }, - "required": ["g_field"] - }, - "f_field": { - "type": "string" - }, - "g_field": { - "type": "string" - } - }, - "required": ["a_field", "f_field", "e_field"], - "additionalProperties": false - } - """.schema - val thirdSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,2)), third) - - val schemaList = SchemaList.Full(NonEmptyList.of(initialSchema, secondSchema, thirdSchema)) - - val res = extractOrder(FlatSchema.extractProperties(schemaList)) - - val expected = List( - "bar", - "foo", - "a_field.d_field", - "a_field.b_field", - "a_field.c_field.d_field", - "a_field.c_field.e_field", - "b_field", - "c_field", - "d_field.e_field", - "d_field.f_field", - "e_field.g_field", - "f_field", - "e_field.f_field", - "g_field" - ) - - res must beEqualTo(expected) - } - - "duplicate columns if convention has been changed between camelCase and snake_case" >> { - // Cases like this should not be ADDITIONs in a first place and filtered out at schema creation time, - // but there are legacy schemas with cases like this - val initial = json""" - { - "type": "object", - "properties": { - "foo_bar": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "fooBar": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val schemaList = SchemaList.Full(NonEmptyList.of(initialSchema, secondSchema)) - - val res = extractOrder(FlatSchema.extractProperties(schemaList)) - - val expected = List("foo_bar", "foo_bar") - - res must beEqualTo(expected) - } - - "shred object-only oneOf into multiple columns" >> { - val input = json"""{ - "type": "object", - "properties": { - "errors": { - "oneOf": [ - { - "required": ["type", "parsingError"], - "properties": { - "parsingError": { - "type": "string" - }, - "type": { - "enum": ["Parsing"] - } - } - }, - { - "properties": { - "resolutionError": { }, - "type": { - "enum": ["SchemaResolution", "SchemaListResolution"] - } - } - } - ] - } - } - }""".schema - - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), input) - val schemaList = SchemaList.Full(NonEmptyList.of(initialSchema)) - val res = extractOrder(FlatSchema.extractProperties(schemaList)) - - skipped("Not supported yet") - res must beEqualTo(List("errors.parsingError", "errors.resolutionError", "errors.type")) - } - - "shred an empty schema into 0 columns" >> { - val initial = json""" {"type": "object"}""".schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - val schemaList = SchemaList.Full(NonEmptyList.of(initialSchema)) - val properties = FlatSchema.extractProperties(schemaList) - val res = extractOrder(properties) - res must beEmpty - } - } - - def bePointers(expected: Set[Pointer.SchemaPointer]): Matcher[Set[Pointer.SchemaPointer]] = { actual: Set[Pointer.SchemaPointer] => - val result = s"""|actual: ${actual.toList.map(_.show).sortBy(_.length).mkString(", ")} - |expected: ${expected.toList.map(_.show).sortBy(_.length).mkString(", ")}""".stripMargin - (actual == expected, result) - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/MigrationSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/MigrationSpec.scala deleted file mode 100644 index 21e2b88e..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/MigrationSpec.scala +++ /dev/null @@ -1,496 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.migrations - -import io.circe.literal._ -import cats.data._ -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaMap, SchemaVer, SelfDescribingSchema} -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ -import com.snowplowanalytics.iglu.schemaddl.migrations.Migration.BuildError -import com.snowplowanalytics.iglu.schemaddl.IgluSchema -import SchemaList._ -import org.specs2.Specification - -class MigrationSpec extends Specification { def is = s2""" - Check common Schema migrations - create correct addition migration from 1-0-0 to 1-0-1 $e1 - create correct addition migrations from 1-0-0 to 1-0-2 $e2 - create correct addition/modification migrations from 1-0-0 to 1-0-2 $e3 - migrateFrom function in Migration - return error when schemaKey not found in given schemas $e9 - return error when schemaKey is latest state of given schemas $e10 - create migration as expected when schemaKey is initial version of given schemas $e11 - create migration as expected when schemaKey is second version of given schemas $e12 - """ - - private def createSchemaListFull(schemas: NonEmptyList[IgluSchema]) = { - val schemaList = SchemaList.buildMultiple(schemas).right.get.collect { case s: Full => s} - NonEmptyList.fromListUnsafe(schemaList) - } - - def e1 = { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val fromSchema = json"""{"type": ["integer", "null"], "maximum": 4000}""".schema - val fromPointer = "/properties/bar".jsonPointer - - val migration = Migration( - "com.acme", - "example", - SchemaVer.Full(1,0,0), - SchemaVer.Full(1,0,1), - SchemaDiff( - List(fromPointer -> fromSchema), - Set.empty, - List.empty - ) - ) - - val segment = SchemaList.Segment(NonEmptyList.of(initialSchema, secondSchema)) - - Migration.fromSegment(segment) must beEqualTo(migration) - } - - def e2 = { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val third = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "baz": { - "type": "array" - } - }, - "additionalProperties": false - } - """.schema - val thirdSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,2)), third) - - val migration1 = Migration( - "com.acme", - "example", - SchemaVer.Full(1,0,0), - SchemaVer.Full(1,0,1), - SchemaDiff( - List("/properties/bar".jsonPointer -> json"""{"type": ["integer", "null"], "maximum": 4000}""".schema), - Set.empty, - List.empty - ) - ) - val comp1 = Migration.fromSegment(SchemaList.Segment(NonEmptyList.of(initialSchema, secondSchema))) must beEqualTo(migration1) - - val migration2 = Migration( - "com.acme", - "example", - SchemaVer.Full(1,0,0), - SchemaVer.Full(1,0,2), - SchemaDiff( - List( - "/properties/bar".jsonPointer -> json"""{"type": ["integer", "null"], "maximum": 4000}""".schema, - "/properties/baz".jsonPointer -> json"""{"type": ["array", "null"]}""".schema), - Set.empty, - List.empty - ) - ) - val comp2 = Migration.fromSegment(SchemaList.Segment(NonEmptyList.of(initialSchema, secondSchema, thirdSchema))) must beEqualTo(migration2) - - val migration3 = Migration( - "com.acme", - "example", - SchemaVer.Full(1,0,1), - SchemaVer.Full(1,0,2), - SchemaDiff( - List("/properties/baz".jsonPointer -> json"""{"type": ["array", "null"]}""".schema), - Set.empty, - List.empty - ) - ) - val comp3 = Migration.fromSegment(SchemaList.Segment(NonEmptyList.of(secondSchema, thirdSchema))) must beEqualTo(migration3) - - comp1 and comp2 and comp3 - } - - def e3 = { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string", - "maxLength": 20 - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val migration = Migration( - "com.acme", - "example", - SchemaVer.Full(1,0,0), - SchemaVer.Full(1,0,1), - SchemaDiff( - List("/properties/bar".jsonPointer -> json"""{"type": ["integer", "null"], "maximum": 4000}""".schema), - Set( - SchemaDiff.Modified( - "/properties/foo".jsonPointer, - json"""{"type":["string","null"]}""".schema, - json"""{"type":["string","null"],"maxLength": 20}""".schema) - ), - List.empty - ) - ) - - Migration.fromSegment(SchemaList.Segment(NonEmptyList.of(initialSchema, secondSchema))) must beEqualTo(migration) - } - - def e9 = { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val third = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "baz": { - "type": "array" - } - }, - "additionalProperties": false - } - """.schema - val thirdSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,2)), third) - - val nonExistingSchemaKey = SchemaKey("com.acme", "non-existing", "jsonschema", SchemaVer.Full(1,0,0)) - val orderedSchemas = createSchemaListFull(NonEmptyList.of(initialSchema, secondSchema, thirdSchema)).head - - val res = Migration.migrateFrom(nonExistingSchemaKey, orderedSchemas) - - res must beLeft(BuildError.UnknownSchemaKey: BuildError) - } - - def e10 = { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val third = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "baz": { - "type": "array" - } - }, - "additionalProperties": false - } - """.schema - val thirdSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,2)), third) - - val orderedSchemas = createSchemaListFull(NonEmptyList.of(initialSchema, secondSchema, thirdSchema)).head - - val res = Migration.migrateFrom(thirdSchema.self.schemaKey, orderedSchemas) - - res must beLeft(BuildError.NoOp: BuildError) - } - - def e11 = { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val third = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "baz": { - "type": "array" - } - }, - "additionalProperties": false - } - """.schema - val thirdSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,2)), third) - - val migration = Migration( - "com.acme", - "example", - SchemaVer.Full(1,0,0), - SchemaVer.Full(1,0,2), - SchemaDiff( - List( - "/properties/bar".jsonPointer -> json"""{"type": ["integer", "null"], "maximum": 4000}""".schema, - "/properties/baz".jsonPointer -> json"""{"type": ["array", "null"]}""".schema), - Set.empty, - List.empty) - ) - - val orderedSchemas = createSchemaListFull(NonEmptyList.of(initialSchema, secondSchema, thirdSchema)).head - - val res = Migration.migrateFrom(initialSchema.self.schemaKey, orderedSchemas) - - res must beRight(migration) - } - - def e12 = { - val initial = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val initialSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)), initial) - - val second = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - } - }, - "additionalProperties": false - } - """.schema - val secondSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,1)), second) - - val third = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - }, - "bar": { - "type": "integer", - "maximum": 4000 - }, - "baz": { - "type": "array" - } - }, - "additionalProperties": false - } - """.schema - val thirdSchema = SelfDescribingSchema(SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,2)), third) - - val migration = Migration( - "com.acme", - "example", - SchemaVer.Full(1,0,1), - SchemaVer.Full(1,0,2), - SchemaDiff( - List("/properties/baz".jsonPointer -> json"""{"type": ["array", "null"]}""".schema), - Set.empty, - List.empty) - ) - - val orderedSchemas = createSchemaListFull(NonEmptyList.of(initialSchema, secondSchema, thirdSchema)).head - - val res = Migration.migrateFrom(secondSchema.self.schemaKey, orderedSchemas) - - res must beRight(migration) - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/SchemaDiffSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/SchemaDiffSpec.scala deleted file mode 100644 index d9b1d615..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/SchemaDiffSpec.scala +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.migrations - -import cats.data.NonEmptyList - -import io.circe.literal._ - -import com.snowplowanalytics.iglu.core.{SchemaVer, SchemaMap, SelfDescribingSchema} - -import com.snowplowanalytics.iglu.schemaddl.IgluSchema -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers.{JsonOps, StringOps} -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer -import com.snowplowanalytics.iglu.schemaddl.migrations.SchemaDiff.Modified -import com.snowplowanalytics.iglu.schemaddl.migrations.SchemaList.ModelGroupSet - -import org.specs2.mutable.Specification - -class SchemaDiffSpec extends Specification { - "build" should { - "recognize schemas with increased length" in { - val segment = SchemaDiffSpec.unsafeSchemaList(List( - SelfDescribingSchema( - SchemaMap("v", "n", "jsonschema", SchemaVer.Full(1,0,0)), - json"""{"properties": {"one": {"type": "string", "maxLength": 32}}}""".schema - ), - SelfDescribingSchema( - SchemaMap("v", "n", "jsonschema", SchemaVer.Full(1,0,1)), - json"""{"properties": {"one": {"type": "string", "maxLength": 64}}}""".schema - ), - )).toSegment - - val expected = - Set( - Modified( - Pointer.Root, - json"""{"properties": {"one": {"type": "string", "maxLength": 32}}}""".schema, - json"""{"properties": {"one": {"type": "string", "maxLength": 64}}}""".schema - ), - Modified( - "/properties/one".jsonPointer, - json"""{"type": ["string","null"], "maxLength": 32}""".schema, - json"""{"type": ["string","null"], "maxLength": 64}""".schema - ), - ) - - SchemaDiff.build(segment).modified must beEqualTo(expected) - } - } - - "getModifiedProperties" should { - "recognize schemas with increased length" in { - val source = Set( - "/properties/bar".jsonPointer -> - json"""{"type": ["string"], "maxLength": 32}""".schema - ) - val target = Set( - "/properties/bar".jsonPointer -> - json"""{"type": ["string"], "maxLength": 64}""".schema - ) - val expected = Set(SchemaDiff.Modified( - "/properties/bar".jsonPointer, - json"""{"type": ["string"], "maxLength": 32}""".schema, - json"""{"type": ["string"], "maxLength": 64}""".schema - )) - - SchemaDiff.getModifiedProperties(source, target) must beEqualTo(expected) - } - } -} - -object SchemaDiffSpec { - def unsafeSchemaList(list: List[IgluSchema]): SchemaList.Full = { - val modelGroup = ModelGroupSet.groupSchemas(NonEmptyList.fromListUnsafe(list)).head - SchemaList.fromUnambiguous(modelGroup) match { - case Right(f: SchemaList.Full) => f - case Left(value) => throw new RuntimeException(value.toString) - case Right(_) => throw new RuntimeException("Not Full") - } - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/SchemaListSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/SchemaListSpec.scala deleted file mode 100644 index 0c851a50..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/migrations/SchemaListSpec.scala +++ /dev/null @@ -1,317 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ - -package com.snowplowanalytics.iglu.schemaddl.migrations - -import scala.util.Random -import io.circe.literal._ -import cats.data._ -import com.snowplowanalytics.iglu.core.{SchemaMap, SchemaVer, SelfDescribingSchema} -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ -import com.snowplowanalytics.iglu.schemaddl.migrations.SchemaList.BuildError._ -import SchemaList._ -import org.specs2.Specification - -class SchemaListSpec extends Specification { def is = s2""" - Check SchemaList - extract correct segments $e1 - afterIndex returns correct segment when given index in the range $e2 - create correct groups $e3 - safe build from model group function return error when given list contains ambiguous schema list $e4 - safe build from model group function return error when given list contains gaps $e5 - safe build from model group function creates SchemaList correctly when everything is okay $e6 - unsafe build from model group with reordering function return error when given list contains gaps $e7 - unsafe build from model group with reordering function creates SchemaList correctly when everything is okay $e8 - multiple build function return as expected $e9 - single schema build function return as expected $e10 - """ - - def e1 = { - val schemaMap = SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)) - - val schemaListFull1 = Full(createSchemas(schemaMap, 2)) - - val expected1 = NonEmptyList.of( - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,0)), - schemaMap.version(SchemaVer.Full(1,0,1)) - ) - ) - - val comp1 = schemaListFull1.extractSegments.extractSchemaMaps must beEqualTo(expected1) - - val schemaListFull2 = Full(createSchemas(schemaMap, 4)) - - val expected2 = NonEmptyList.of( - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,0)), - schemaMap.version(SchemaVer.Full(1,0,1)) - ), - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,0)), - schemaMap.version(SchemaVer.Full(1,0,1)), - schemaMap.version(SchemaVer.Full(1,0,2)) - ), - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,0)), - schemaMap.version(SchemaVer.Full(1,0,1)), - schemaMap.version(SchemaVer.Full(1,0,2)), - schemaMap.version(SchemaVer.Full(1,0,3)) - ), - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,1)), - schemaMap.version(SchemaVer.Full(1,0,2)) - ), - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,1)), - schemaMap.version(SchemaVer.Full(1,0,2)), - schemaMap.version(SchemaVer.Full(1,0,3)) - ), - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,2)), - schemaMap.version(SchemaVer.Full(1,0,3)) - ) - ) - - val comp2 = schemaListFull2.extractSegments.extractSchemaMaps must beEqualTo(expected2) - - comp1 and comp2 - } - - def e2 = { - val schemaMap = SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)) - val schemas = createSchemas(schemaMap, 4) - - val schemaListFull = Full(schemas) - - val res1 = schemaListFull.afterIndex(1).extractSchemaMaps must beSome( - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,1)), - schemaMap.version(SchemaVer.Full(1,0,2)), - schemaMap.version(SchemaVer.Full(1,0,3)) - ) - ) - - val res2 = schemaListFull.afterIndex(3).extractSchemaMaps must beSome( - NonEmptyList.of( - schemaMap.version(SchemaVer.Full(1,0,3)) - ) - ) - - val res3 = schemaListFull.afterIndex(5).extractSchemaMaps must beNone - - res1 and res2 and res3 - } - - def e3 = { - val schemaMap1 = SchemaMap("com.acme", "example1", "jsonschema", SchemaVer.Full(1,0,0)) - val schemaMap2 = SchemaMap("com.acme", "example2", "jsonschema", SchemaVer.Full(1,0,0)) - val schemaMap3 = SchemaMap("com.acme", "example3", "jsonschema", SchemaVer.Full(1,0,0)) - val schemaMap4 = SchemaMap("com.acme", "example4", "jsonschema", SchemaVer.Full(1,0,0)) - - val group1 = createSchemas(schemaMap1, 2) - val group2 = createSchemas(schemaMap2, 4) - val group3 = createSchemas(schemaMap3, 6) - val group4 = createSchemas(schemaMap4, 1) - - val output = ModelGroupSet.groupSchemas( - group1.concatNel(group2).concatNel(group3).concatNel(group4) - ).map(_.schemas) - - output.toList.toSet must beEqualTo(Set(group1, group2, group3, group4)) - } - - def e4 = { - val schemaMap = SchemaMap("com.acme", "example1", "jsonschema", SchemaVer.Full(1,0,0)) - - val tempGroup = createSchemas(schemaMap, 2) - val group = tempGroup.append(tempGroup.last.copy(self = schemaMap.version(SchemaVer.Full(1,1,0)))) - - val modelGroup = ModelGroupSet.groupSchemas(group).head - - val output = SchemaList.fromUnambiguous(modelGroup) - - output must beLeft(AmbiguousOrder(modelGroup)) - } - - def e5 = { - val schemaMap = SchemaMap("com.acme", "example1", "jsonschema", SchemaVer.Full(1,0,0)) - - val tempGroup1 = createSchemas(schemaMap, 2) - val modelGroupWithGapInAddition = ModelGroupSet.groupSchemas( - tempGroup1 - .append(tempGroup1.last.copy(self = schemaMap.version(SchemaVer.Full(1,0,2)))) - .append(tempGroup1.last.copy(self = schemaMap.version(SchemaVer.Full(1,0,4)))) - ).head - - val comp1 = SchemaList.fromUnambiguous(modelGroupWithGapInAddition) must beLeft(GapInModelGroup(modelGroupWithGapInAddition)) - - val tempGroup2 = createSchemas(schemaMap, 2, addition = false) - val modelGroupWithGapInRevision = ModelGroupSet.groupSchemas( - tempGroup2 - .append(tempGroup2.last.copy(self = schemaMap.version(SchemaVer.Full(1,1,0)))) - .append(tempGroup2.last.copy(self = schemaMap.version(SchemaVer.Full(1,3,0)))) - ).head - - val comp2 = SchemaList.fromUnambiguous(modelGroupWithGapInRevision) must beLeft(GapInModelGroup(modelGroupWithGapInRevision)) - - comp1 and comp2 - } - - def e6 = { - val schemaMap = SchemaMap("com.acme", "example1", "jsonschema", SchemaVer.Full(1,0,0)) - - val modelGroup1 = ModelGroupSet.groupSchemas(createSchemas(schemaMap, 4)).head - val schemaListFullComp = SchemaList.fromUnambiguous(modelGroup1) must beRight(Full(modelGroup1.schemas)) - - val modelGroup2 = ModelGroupSet.groupSchemas(createSchemas(schemaMap, 1)).head - val singleSchemaComp = SchemaList.fromUnambiguous(modelGroup2) must beRight(Single(modelGroup2.schemas.head)) - - schemaListFullComp and singleSchemaComp - } - - def e7 = { - val schemaMap = SchemaMap("com.acme", "example1", "jsonschema", SchemaVer.Full(1,0,0)) - - val tempGroup1 = createSchemas(schemaMap, 2) - val modelGroupWithGapInAddition = ModelGroupSet.groupSchemas( - tempGroup1 - .append(tempGroup1.last.copy(self = schemaMap.version(SchemaVer.Full(1,0,2)))) - .append(tempGroup1.last.copy(self = schemaMap.version(SchemaVer.Full(1,0,4)))) - ).head - - val comp1 = SchemaList.unsafeBuildWithReorder(modelGroupWithGapInAddition) must beLeft(GapInModelGroup(modelGroupWithGapInAddition)) - - val tempGroup2 = createSchemas(schemaMap, 2, addition = false) - val modelGroupWithGapInRevision = ModelGroupSet.groupSchemas( - tempGroup2 - .append(tempGroup2.last.copy(self = schemaMap.version(SchemaVer.Full(1,1,0)))) - .append(tempGroup2.last.copy(self = schemaMap.version(SchemaVer.Full(1,3,0)))) - ).head - - val comp2 = SchemaList.unsafeBuildWithReorder(modelGroupWithGapInRevision) must beLeft(GapInModelGroup(modelGroupWithGapInRevision)) - - comp1 and comp2 - } - - def e8 = { - val schemaMap = SchemaMap("com.acme", "example1", "jsonschema", SchemaVer.Full(1,0,0)) - - val tempGroup = createSchemas(schemaMap, 2, true, 2) - val group = tempGroup - .append(tempGroup.last.copy(self = schemaMap.version(SchemaVer.Full(2,1,0)))) - .append(tempGroup.last.copy(self = schemaMap.version(SchemaVer.Full(2,1,1)))) - val shuffled = NonEmptyList.fromListUnsafe(Random.shuffle(group.toList)) - val shuffledModelGroup = ModelGroupSet.groupSchemas(shuffled).head - val notShuffledModelGroup = ModelGroupSet.groupSchemas(group).head - val comp1 = SchemaList.unsafeBuildWithReorder(shuffledModelGroup) must beRight(Full(notShuffledModelGroup.schemas)) - - val modelGroupWithSingleItem = ModelGroupSet.groupSchemas(createSchemas(schemaMap, 1)).head - val comp2 = SchemaList.unsafeBuildWithReorder(modelGroupWithSingleItem) must beRight(Single(modelGroupWithSingleItem.schemas.head)) - - comp1 and comp2 - } - - def e9 = { - val schemaMap1 = SchemaMap("com.acme", "example1", "jsonschema", SchemaVer.Full(1,0,0)) - val schemaMap2 = SchemaMap("com.acme", "example2", "jsonschema", SchemaVer.Full(1,0,0)) - val schemaMap3 = SchemaMap("com.acme", "example3", "jsonschema", SchemaVer.Full(1,0,0)) - val schemaMap4 = SchemaMap("com.acme", "example4", "jsonschema", SchemaVer.Full(1,0,0)) - - val tempGroup1 = createSchemas(schemaMap1, 2) - val ambiguousGroup = tempGroup1.append(tempGroup1.last.copy(self = schemaMap1.version(SchemaVer.Full(1,1,0)))) - val ambiguousModelGroup = ModelGroupSet.groupSchemas(ambiguousGroup).head - - val tempGroup2 = createSchemas(schemaMap2, 2) - val groupWithGap = tempGroup2 - .append(tempGroup2.last.copy(self = schemaMap2.version(SchemaVer.Full(1,0,2)))) - .append(tempGroup2.last.copy(self = schemaMap2.version(SchemaVer.Full(1,0,4)))) - val modelGroupWithGap = ModelGroupSet.groupSchemas(groupWithGap).head - - val correctMultiple = createSchemas(schemaMap3, 4) - val correctSingle = createSchemas(schemaMap4, 1) - - val res = SchemaList.buildMultiple( - ambiguousGroup.concatNel(groupWithGap).concatNel(correctMultiple).concatNel(correctSingle) - ) - - val expected = Ior.both( - NonEmptyList.of( - AmbiguousOrder(ambiguousModelGroup), - GapInModelGroup(modelGroupWithGap) - ), - NonEmptyList.of( - Full(correctMultiple), - Single(correctSingle.head) - ) - ) - - res must beEqualTo(expected) - } - - def e10 = { - val schemaMap = SchemaMap("com.acme", "example", "jsonschema", SchemaVer.Full(1,0,0)) - val schema = createSchemas(schemaMap, 1).head - - val schemaWithModelZero = schema.copy(self = schemaMap.version(SchemaVer.Full(0,0,0))) - val schemaWithModelZeroComp = SchemaList.buildSingleSchema(schemaWithModelZero) must beNone - - val schemaWithNonZeroRevision = schema.copy(self = schemaMap.version(SchemaVer.Full(1,1,0))) - val schemaWithNonZeroRevisionComp = SchemaList.buildSingleSchema(schemaWithNonZeroRevision) must beNone - - val schemaWithNonZeroAddition = schema.copy(self = schemaMap.version(SchemaVer.Full(1,0,1))) - val schemaWithNonZeroAdditionComp = SchemaList.buildSingleSchema(schemaWithNonZeroAddition) must beNone - - val schemaWithCorrectVersion = schema.copy(self = schemaMap.version(SchemaVer.Full(1,0,0))) - val schemaWithCorrectVersionComp = SchemaList.buildSingleSchema(schemaWithCorrectVersion) must beSome(Single(schemaWithCorrectVersion)) - - schemaWithModelZeroComp - .and(schemaWithNonZeroRevisionComp) - .and(schemaWithNonZeroAdditionComp) - .and(schemaWithCorrectVersionComp) - } - - private def createSchemas(schemaMap: SchemaMap, count: Int, addition: Boolean = true, model: Int = 1) = { - val schemaJson = json""" - { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - }, - "additionalProperties": false - } - """.schema - val res = (0 until count).map { i => - val version = if (addition) SchemaVer.Full(model, 0, i) else SchemaVer.Full(model, i, 0) - SelfDescribingSchema(schemaMap.version(version), schemaJson) - } - NonEmptyList.fromListUnsafe(res.toList) - } - - private implicit class ChangeSchemaMapVersion(val schemaMap: SchemaMap) { - def version(version: SchemaVer.Full): SchemaMap = - SchemaMap(schemaMap.schemaKey.copy(version = version)) - } - - private implicit class ExtractSchemaMapsFromSegments(val schemaListSegments: NonEmptyList[Segment]) { - def extractSchemaMaps: NonEmptyList[NonEmptyList[SchemaMap]] = - schemaListSegments.map(_.schemas.map(_.self)) - } - - private implicit class ExtractSchemaMapsFromOptionalSegment(val schemaListSegment: Option[Segment]) { - def extractSchemaMaps: Option[NonEmptyList[SchemaMap]] = - schemaListSegment.map(_.schemas.map(_.self)) - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/ShredModelSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/ShredModelSpec.scala new file mode 100644 index 00000000..852dd927 --- /dev/null +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/ShredModelSpec.scala @@ -0,0 +1,706 @@ +package com.snowplowanalytics.iglu.schemaddl.redshift + +import cats.data.NonEmptyList +import cats.syntax.either._ +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaMap, SchemaVer, SelfDescribingSchema} +import org.specs2.mutable.Specification +import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModel.{GoodModel, RecoveryModel} +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelSpec.{ModelMergeOps, dummyKey, dummyKey1, dummyKey2, dummyModel} +import com.snowplowanalytics.iglu.schemaddl.redshift.internal.Migrations.NullableRequired +import io.circe.literal._ + +class ShredModelSpec extends Specification { + "model sql representation" should { + "render shred table, ordering column by nullability" in { + dummyModel.toTableSql("custom") must beEqualTo( + """CREATE TABLE IF NOT EXISTS custom.com_acme_example_1 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "a_field.d_field" VARCHAR(4096) ENCODE ZSTD NOT NULL, + | "e_field.g_field" VARCHAR(4096) ENCODE ZSTD NOT NULL, + | "a_field.b_field" VARCHAR(4096) ENCODE ZSTD, + | "a_field.c_field.d_field" VARCHAR(4096) ENCODE ZSTD, + | "a_field.c_field.e_field" VARCHAR(4096) ENCODE ZSTD, + | "b_field" BIGINT ENCODE ZSTD, + | "bar" SMALLINT ENCODE ZSTD, + | "c_field" BIGINT ENCODE ZSTD, + | "d_field.e_field" VARCHAR(4096) ENCODE ZSTD, + | "d_field.f_field" VARCHAR(4096) ENCODE ZSTD, + | "e_field.f_field" VARCHAR(4096) ENCODE ZSTD, + | "f_field" VARCHAR(4096) ENCODE ZSTD, + | "foo" VARCHAR(20) ENCODE ZSTD, + | "g_field" VARCHAR(4096) ENCODE ZSTD, + | FOREIGN KEY (root_id) REFERENCES custom.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE custom.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-0'; + |""".stripMargin) + } + "render recovery table" in { + dummyModel.makeRecovery(NonEmptyList.one(NullableRequired( + ShredModelEntry("/".jsonPointer, json"""{"type": "string"}""".schema) + ))).toTableSql("custom") must beEqualTo( + """CREATE TABLE IF NOT EXISTS custom.com_acme_example_1_0_0_recovered_235658654 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "a_field.d_field" VARCHAR(4096) ENCODE ZSTD NOT NULL, + | "e_field.g_field" VARCHAR(4096) ENCODE ZSTD NOT NULL, + | "a_field.b_field" VARCHAR(4096) ENCODE ZSTD, + | "a_field.c_field.d_field" VARCHAR(4096) ENCODE ZSTD, + | "a_field.c_field.e_field" VARCHAR(4096) ENCODE ZSTD, + | "b_field" BIGINT ENCODE ZSTD, + | "bar" SMALLINT ENCODE ZSTD, + | "c_field" BIGINT ENCODE ZSTD, + | "d_field.e_field" VARCHAR(4096) ENCODE ZSTD, + | "d_field.f_field" VARCHAR(4096) ENCODE ZSTD, + | "e_field.f_field" VARCHAR(4096) ENCODE ZSTD, + | "f_field" VARCHAR(4096) ENCODE ZSTD, + | "foo" VARCHAR(20) ENCODE ZSTD, + | "g_field" VARCHAR(4096) ENCODE ZSTD, + | FOREIGN KEY (root_id) REFERENCES custom.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE custom.com_acme_example_1_0_0_recovered_235658654 IS 'iglu:com.acme/example/jsonschema/1-0-0'; + |""".stripMargin) + } + } + + "factory method" should { + "transform events" in { + + dummyModel.jsonToStrings( + json"""{ + "a_field": {"d_field": "zzzz", "g_field": "gggg"}, + "e_field": {"g_field": "xxxx"}, + "bar": "ssss" + }""") must beEqualTo(List( + "zzzz", "xxxx", "\\N", "\\N", "\\N", "\\N", "ssss", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N")) + } + "transform events with special characters" in { + dummyModel.jsonToStrings( + json"""{ + "a_field": {"d_field": "z\tzzz", "g_field": "gggg"}, + "e_field": {"g_field": "xxxx"}, + "bar": "ssss" + }""") must beEqualTo(List( + "z zzz", "xxxx", "\\N", "\\N", "\\N", "\\N", "ssss", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N")) + } + } + + "model migrations" should { + "should merge with varchar widening" in { + val s1 = ShredModel.good(dummyKey, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }} + }""".schema) + val s2 = ShredModel.good(dummyKey1, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 30 + }} + }""".schema) + s1.merge(s2).toTestString must beRight(( + """CREATE TABLE IF NOT EXISTS s.com_acme_example_1 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "foo" VARCHAR(30) ENCODE ZSTD, + | FOREIGN KEY (root_id) REFERENCES s.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-1'; + | + |-- WARNING: only apply this file to your database if the following SQL returns the expected: + |-- + |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; + |-- obj_description + |-- ----------------- + |-- iglu:com.acme/example/jsonschema/1-0-0 + |-- (1 row) + | + | ALTER TABLE s.com_acme_example_1 + | ALTER COLUMN "foo" TYPE VARCHAR(30); + | + |-- NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION + | + |COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-1'; + |""".stripMargin + )) + } + + "should detect int to str conversion errors" in { + val s1 = SelfDescribingSchema( + SchemaMap(SchemaKey("dummy-vendor", "dummy-name", "jsonschema", SchemaVer.Full(1, 0, 0))), + json"""{ + "type": "object", + "properties": { + "stringKey": { + "type": "string" + } + }, + "self": { + "vendor": "com.snowplowanalytics", + "name": "json1", + "format": "jsonschema", + "version": "1-0-0" + }, + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#" + }""".schema) + + + val s2 = SelfDescribingSchema( + SchemaMap(SchemaKey("test.lukasz", "snowman", "jsonschema", SchemaVer.Full(1, 0, 0))), + json"""{ + "type": "object", + "properties": { + "stringKey": { + "type": "integer" + } + }, + "self": { + "vendor": "dummy-vendor", + "name": "dummy-name", + "format": "jsonschema", + "version": "1-0-0" + }, + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#" + }""".schema) + + isRedshiftMigrationBreaking(List(s1), s2) must beTrue + } + + "should make a recovery model when incompatible encodings are merged" in { + + val s1 = ShredModel.good(dummyKey, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }} + }""".schema) + val s2 = ShredModel.good(dummyKey1, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "number" + }} + }""".schema) + + s1.merge(s2).toTestString must beLeft( + """CREATE TABLE IF NOT EXISTS s.com_acme_example_1_1_0_recovered_194359593 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "foo" DOUBLE PRECISION ENCODE RAW, + | FOREIGN KEY (root_id) REFERENCES s.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE s.com_acme_example_1_1_0_recovered_194359593 IS 'iglu:com.acme/example/jsonschema/1-0-1'; + | + |Incompatible encoding in column foo old type RedshiftVarchar(20)/ZstdEncoding new type RedshiftDouble/RawEncoding""".stripMargin + ) + } + + "should make a recovery model when not null field is removed" in { + val s1 = ShredModel.good(dummyKey, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }}, + "required": ["foo"] + }""".schema) + val s2 = ShredModel.good(dummyKey1, + json"""{ + "type": "object", + "properties": { + "foo1": { + "type": "number" + }} + }""".schema) + + s1.merge(s2).toTestString must beLeft( + """CREATE TABLE IF NOT EXISTS s.com_acme_example_1_1_0_recovered_1202144068 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "foo1" DOUBLE PRECISION ENCODE RAW, + | FOREIGN KEY (root_id) REFERENCES s.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE s.com_acme_example_1_1_0_recovered_1202144068 IS 'iglu:com.acme/example/jsonschema/1-0-1'; + | + |Making required column nullable foo""".stripMargin + ) + } + + + "should make a ignore varchar narrowing" in { + val s1 = ShredModel.good(dummyKey, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }} + }""".schema) + val s2 = ShredModel.good(dummyKey1, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 10 + }} + }""".schema) + + s1.merge(s2).toTestString must beRight( + """CREATE TABLE IF NOT EXISTS s.com_acme_example_1 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "foo" VARCHAR(20) ENCODE ZSTD, + | FOREIGN KEY (root_id) REFERENCES s.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-1'; + | + |-- WARNING: only apply this file to your database if the following SQL returns the expected: + |-- + |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; + |-- obj_description + |-- ----------------- + |-- iglu:com.acme/example/jsonschema/1-0-0 + |-- (1 row) + | + | + |-- NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION + | + |COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-1'; + |""".stripMargin + ) + } + + "should merge multiple schemas" in { + val s1 = ShredModel.good(dummyKey, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }} + }""".schema) + val s2 = ShredModel.good(dummyKey1, + json"""{ + "type": "object", + "properties": { + "bar": { + "type": "string", + "maxLength": 10 + }} + }""".schema) + val s3 = ShredModel.good(dummyKey2, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 30 + }} + }""".schema) + s1.merge(s2).flatMap(_.merge(s3)).toTestString must beRight( + """CREATE TABLE IF NOT EXISTS s.com_acme_example_1 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "foo" VARCHAR(30) ENCODE ZSTD, + | "bar" VARCHAR(10) ENCODE ZSTD, + | FOREIGN KEY (root_id) REFERENCES s.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-2'; + | + |-- WARNING: only apply this file to your database if the following SQL returns the expected: + |-- + |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; + |-- obj_description + |-- ----------------- + |-- iglu:com.acme/example/jsonschema/1-0-0 + |-- (1 row) + | + | ALTER TABLE s.com_acme_example_1 + | ALTER COLUMN "foo" TYPE VARCHAR(30); + | + |BEGIN TRANSACTION; + | + | ALTER TABLE s.com_acme_example_1 + | ADD COLUMN "bar" VARCHAR(10) ENCODE ZSTD; + | + | COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-2'; + | + |END TRANSACTION;""".stripMargin + ) + } + + "should merge multiple schemas when only adding columns" in { + val s1 = SelfDescribingSchema(SchemaMap(dummyKey), + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }} + }""".schema) + val s2 = SelfDescribingSchema(SchemaMap(dummyKey1), + json"""{ + "type": "object", + "properties": { + "zoo": { + "type": "number" + }} + }""".schema) + val s3 = SelfDescribingSchema(SchemaMap(dummyKey2), + json"""{ + "type": "object", + "properties": { + "foo1": { + "type": "string", + "maxLength": 30 + }} + }""".schema) + + getFinalMergedModel(NonEmptyList.of(s1, s2, s3)) + .asRight[RecoveryModel].toTestString must beRight( + """CREATE TABLE IF NOT EXISTS s.com_acme_example_1 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "foo" VARCHAR(20) ENCODE ZSTD, + | "zoo" DOUBLE PRECISION ENCODE RAW, + | "foo1" VARCHAR(30) ENCODE ZSTD, + | FOREIGN KEY (root_id) REFERENCES s.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-2'; + | + |-- WARNING: only apply this file to your database if the following SQL returns the expected: + |-- + |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; + |-- obj_description + |-- ----------------- + |-- iglu:com.acme/example/jsonschema/1-0-0 + |-- (1 row) + | + | + |BEGIN TRANSACTION; + | + | ALTER TABLE s.com_acme_example_1 + | ADD COLUMN "zoo" DOUBLE PRECISION ENCODE RAW; + | ALTER TABLE s.com_acme_example_1 + | ADD COLUMN "foo1" VARCHAR(30) ENCODE ZSTD; + | + | COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-2'; + | + |END TRANSACTION;""".stripMargin + ) + } + + "should merge multiple schemas skipping broken one in the middle" in { + val s1 = SelfDescribingSchema(SchemaMap(dummyKey), + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }} + }""".schema) + val s2 = SelfDescribingSchema(SchemaMap(dummyKey1), + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "number" + }} + }""".schema) + val s3 = SelfDescribingSchema(SchemaMap(dummyKey2), + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 30 + }} + }""".schema) + + getFinalMergedModel(NonEmptyList.of(s1, s2, s3)).asInstanceOf[GoodModel] + .asRight[RecoveryModel].toTestString must beRight( + """CREATE TABLE IF NOT EXISTS s.com_acme_example_1 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "foo" VARCHAR(30) ENCODE ZSTD, + | FOREIGN KEY (root_id) REFERENCES s.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-2'; + | + |-- WARNING: only apply this file to your database if the following SQL returns the expected: + |-- + |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; + |-- obj_description + |-- ----------------- + |-- iglu:com.acme/example/jsonschema/1-0-0 + |-- (1 row) + | + | ALTER TABLE s.com_acme_example_1 + | ALTER COLUMN "foo" TYPE VARCHAR(30); + | + |-- NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION + | + |COMMENT ON TABLE s.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-2'; + |""".stripMargin + ) + } + + "should merge multiple schemas merged with broken one in the middle and it should get a recovery model" in { + val s1 = SelfDescribingSchema(SchemaMap(dummyKey), + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }} + }""".schema) + val s2 = SelfDescribingSchema(SchemaMap(dummyKey1), + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "number" + }} + }""".schema) + val s3 = SelfDescribingSchema(SchemaMap(dummyKey2), + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 30 + }} + }""".schema) + + foldMapRedshiftSchemas(NonEmptyList.of(s1, s2, s3))(dummyKey1).asInstanceOf[RecoveryModel] + .asLeft[GoodModel].toTestString must beLeft( + """CREATE TABLE IF NOT EXISTS s.com_acme_example_1_1_0_recovered_194359593 ( + | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "foo" DOUBLE PRECISION ENCODE RAW, + | FOREIGN KEY (root_id) REFERENCES s.events(event_id) + |) + |DISTSTYLE KEY + |DISTKEY (root_id) + |SORTKEY (root_tstamp); + | + |COMMENT ON TABLE s.com_acme_example_1_1_0_recovered_194359593 IS 'iglu:com.acme/example/jsonschema/1-0-1'; + | + |Incompatible encoding in column foo old type RedshiftVarchar(20)/ZstdEncoding new type RedshiftDouble/RawEncoding""".stripMargin + ) + } + + } +} + +object ShredModelSpec { + val dummyKey = SchemaKey("com.acme", "example", "jsonschema", SchemaVer.Full(1, 0, 0)) + val dummyKey1 = SchemaKey("com.acme", "example", "jsonschema", SchemaVer.Full(1, 0, 1)) + val dummyKey2 = SchemaKey("com.acme", "example", "jsonschema", SchemaVer.Full(1, 0, 2)) + val dummyModel = ShredModel.good(dummyKey, + json"""{ + "type": "object", + "properties": { + "foo": { + "type": "string", + "maxLength": 20 + }, + "bar": { + "type": "integer", + "maximum": 4000 + }, + "a_field": { + "type": "object", + "properties": { + "b_field": { + "type": "string" + }, + "c_field": { + "type": "object", + "properties": { + "d_field": { + "type": "string" + }, + "e_field": { + "type": "string" + } + } + }, + "d_field": { + "type": "string" + } + }, + "required": ["d_field"] + }, + "b_field": { + "type": "integer" + }, + "c_field": { + "type": "integer" + }, + "d_field": { + "type": "object", + "properties": { + "e_field": { + "type": "string" + }, + "f_field": { + "type": "string" + } + } + }, + "e_field": { + "type": "object", + "properties": { + "f_field": { + "type": "string" + }, + "g_field": { + "type": "string" + } + }, + "required": ["g_field"] + }, + "f_field": { + "type": "string" + }, + "g_field": { + "type": "string" + } + }, + "required": ["a_field", "e_field"] +}""".schema) + + implicit class ModelMergeOps(result: Either[RecoveryModel, GoodModel]) { + def toTestString: Either[String, String] = result.leftMap(badModel => + (badModel.toTableSql("s") + "\n" + badModel.errorAsStrings.toList.mkString("\n"))) + .map(goodModel => goodModel.toTableSql("s") + "\n" + goodModel.migrationSql("s", None)) + } + +} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlFileSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlFileSpec.scala deleted file mode 100644 index 8724e803..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlFileSpec.scala +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift -package generators - -import cats.data.NonEmptyList - -import io.circe.literal._ - -// specs2 -import org.specs2.Specification - -import com.snowplowanalytics.iglu.core.{SchemaMap, SchemaVer} -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ -import com.snowplowanalytics.iglu.schemaddl.migrations.FlatSchema - -class DdlFileSpec extends Specification { def is = s2""" - Check DDL File specification - render correct table definition $e1 - render correct table definition when given schema contains oneOf $e2 - render correct table definition when given schema contains oneOf $e3 - render correct table definition when given schema contains union type $e4 - render correct table definition with table constraints $e5 - render correct table definition when given schema contains additional properties $e6 - """ - - def e1 = { - val header = CommentBlock(Vector( - "AUTO-GENERATED BY schema-ddl DO NOT EDIT", - "Generator: schema-ddl 0.2.0", - "Generated: 2016-03-31 15:52" - )) - val schemaCreate = CreateSchema("atomic") - - val createTable = CreateTable( - "launch_missles_1", - List( - Column("status", RedshiftVarchar(64), Set(DistKey), Set(Nullability(NotNull))), - Column("missionName", RedshiftVarchar(128), Set(), Set(Nullability(NotNull))), - Column("geo_longitude", RedshiftDouble, Set(), Set()), - Column("geo_latitude", RedshiftDouble, Set(), Set()), - Column("rocket.model", RedshiftInteger, Set(), Set(Nullability(NotNull))), - Column("rocket.series", RedshiftInteger, Set(), Set(Nullability(Null))) - ) - ) - val commentOn = DdlGenerator.getTableComment( - "launch_missles_1", - Some("atomic"), - SchemaMap("com.acme", "event", "jsonschema", SchemaVer.Full(1,2,1)) - ) - - // no formatters - val ddl = DdlFile(List(header, schemaCreate, createTable, commentOn)).render(Nil) - - // ordering happens in DdlGenerator.getTableDdl function - ddl must beEqualTo( - """|-- AUTO-GENERATED BY schema-ddl DO NOT EDIT - |-- Generator: schema-ddl 0.2.0 - |-- Generated: 2016-03-31 15:52 - |CREATE SCHEMA IF NOT EXISTS atomic; - |CREATE TABLE IF NOT EXISTS launch_missles_1 ( - | "status" VARCHAR(64) DISTKEY NOT NULL, - | "missionName" VARCHAR(128) NOT NULL, - | "geo_longitude" DOUBLE PRECISION, - | "geo_latitude" DOUBLE PRECISION, - | "rocket.model" INT NOT NULL, - | "rocket.series" INT NULL - |); - |COMMENT ON TABLE atomic.launch_missles_1 IS 'iglu:com.acme/event/jsonschema/1-2-1';""".stripMargin) - } - - def e2 = { - val json = json""" - { - "type": "object", - "properties": { - "union": { - "oneOf": [ - { - "type": "object", - "properties": { - "object_without_properties": { "type": "object" } - } - }, - { - "type": "string" - } - ] - } - }, - "additionalProperties": false - } - """.schema - val expected = - """CREATE TABLE IF NOT EXISTS atomic.table_name ( - | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "root_id" CHAR(36) ENCODE RAW NOT NULL, - | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, - | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, - | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "union" VARCHAR(1024) ENCODE ZSTD, - | FOREIGN KEY (root_id) REFERENCES atomic.events(event_id) - |) - |DISTSTYLE KEY - |DISTKEY (root_id) - |SORTKEY (root_tstamp);""".stripMargin - - val flatSchema = FlatSchema.build(json) - val orderedSubSchemas = FlatSchema.postProcess(flatSchema.subschemas) - val schemaCreate = DdlGenerator.generateTableDdl(orderedSubSchemas, "table_name", None, 1024, false) - val ddl = DdlFile(List(schemaCreate)).render(Nil) - ddl must beEqualTo(expected) - } - - def e3 = { - val json = json""" - { - "type": "object", - "properties": { - "union": { - "oneOf": [ - { - "type": "integer" - }, - { - "type": "string" - } - ] - } - }, - "additionalProperties": false - } - """.schema - val expected = - """CREATE TABLE IF NOT EXISTS atomic.table_name ( - | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "root_id" CHAR(36) ENCODE RAW NOT NULL, - | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, - | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, - | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "union" VARCHAR(1024) ENCODE ZSTD, - | FOREIGN KEY (root_id) REFERENCES atomic.events(event_id) - |) - |DISTSTYLE KEY - |DISTKEY (root_id) - |SORTKEY (root_tstamp);""".stripMargin - - val flatSchema = FlatSchema.build(json) - val orderedSubSchemas = FlatSchema.postProcess(flatSchema.subschemas) - val schemaCreate = DdlGenerator.generateTableDdl(orderedSubSchemas, "table_name", None, 1024, false) - val ddl = DdlFile(List(schemaCreate)).render(Nil) - ddl must beEqualTo(expected) - } - - def e4 = { - val json = json""" - { - "type": "object", - "properties": { - "union": { - "type": ["string", "object"], - "properties": { - "one": { "type": "string" }, - "second": { "type": "string" } - } - }, - "union2": { - "type": ["string", "object"], - "properties": { - "one": { "type": "string" }, - "second": { "type": "string" } - } - } - }, - "additionalProperties": false - } - """.schema - val expected = - """CREATE TABLE IF NOT EXISTS atomic.table_name ( - | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "root_id" CHAR(36) ENCODE RAW NOT NULL, - | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, - | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, - | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "union" VARCHAR(4096) ENCODE ZSTD, - | "union2" VARCHAR(4096) ENCODE ZSTD, - | FOREIGN KEY (root_id) REFERENCES atomic.events(event_id) - |) - |DISTSTYLE KEY - |DISTKEY (root_id) - |SORTKEY (root_tstamp);""".stripMargin - - val flatSchema = FlatSchema.build(json) - val orderedSubSchemas = FlatSchema.postProcess(flatSchema.subschemas) - val schemaCreate = DdlGenerator.generateTableDdl(orderedSubSchemas, "table_name", None, 1024, false) - val ddl = DdlFile(List(schemaCreate)).render(Nil) - ddl must beEqualTo(expected) - } - - def e5 = { - val schemaCreate = CreateTable( - "atomic.table_name", - List( - Column("schema_vendor",RedshiftVarchar(128),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("schema_name",RedshiftVarchar(128),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("schema_format",RedshiftVarchar(128),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("schema_version",RedshiftVarchar(128),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("root_id",RedshiftChar(36),Set(CompressionEncoding(RawEncoding)),Set(Nullability(NotNull))), - Column("root_tstamp",RedshiftTimestamp,Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("ref_root",RedshiftVarchar(255),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("ref_tree",RedshiftVarchar(1500),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("ref_parent",RedshiftVarchar(255),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("union",ProductType(List("Product type [\"string\",\"object\",\"null\"] encountered in union"),None),Set(CompressionEncoding(ZstdEncoding)),Set()), - Column("union2",ProductType(List("Product type [\"string\",\"object\",\"null\"] encountered in union2"),None),Set(CompressionEncoding(ZstdEncoding)),Set()) - ), - Set( - ForeignKeyTable(NonEmptyList.of("root_id", "root_tstamp"),RefTable("atomic.events",Some("event_id"))), - PrimaryKeyTable(NonEmptyList.of("root_id", "root_tstamp")), - UniqueKeyTable(NonEmptyList.of("root_id", "root_tstamp")), - ), - Set(Diststyle(Key), DistKeyTable("root_id"), SortKeyTable(None,NonEmptyList.one("root_tstamp"))) - ) - - val expected = - """CREATE TABLE IF NOT EXISTS atomic.table_name ( - | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "root_id" CHAR(36) ENCODE RAW NOT NULL, - | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, - | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, - | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "union" VARCHAR(4096) ENCODE ZSTD, - | "union2" VARCHAR(4096) ENCODE ZSTD, - | FOREIGN KEY (root_id, root_tstamp) REFERENCES atomic.events(event_id) - | PRIMARY KEY (root_id, root_tstamp) - | UNIQUE (root_id, root_tstamp) - |) - |DISTSTYLE KEY - |DISTKEY (root_id) - |SORTKEY (root_tstamp);""".stripMargin - - - val ddl = DdlFile(List(schemaCreate)).render(Nil) - ddl must beEqualTo(expected) - } - - def e6 = { - val json = json""" - { - "self": { - "vendor": "com.snowplowanalytics.snowplow", - "name": "site_search", - "format": "jsonschema", - "version": "1-0-0" - }, - "type": "object", - "properties": { - "terms": { - "type": "array", - "items": { - "type": "string" - }, - "minItems": 1 - }, - "filters": { - "type": "object", - "additionalProperties": { - "type": [ - "string", - "boolean" - ] - } - }, - "totalResults": { - "type": "integer", - "minimum": 0, - "maximum": 2147483647 - }, - "pageResults": { - "type": "integer", - "minimum": 0, - "maximum": 2147483647 - } - } - } - """.schema - val expected = - """CREATE TABLE IF NOT EXISTS atomic.table_name ( - | "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, - | "root_id" CHAR(36) ENCODE RAW NOT NULL, - | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, - | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, - | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, - | "filters" VARCHAR(1024) ENCODE ZSTD, - | "page_results" INT ENCODE ZSTD, - | "terms" VARCHAR(65535) ENCODE ZSTD, - | "total_results" INT ENCODE ZSTD, - | FOREIGN KEY (root_id) REFERENCES atomic.events(event_id) - |) - |DISTSTYLE KEY - |DISTKEY (root_id) - |SORTKEY (root_tstamp);""".stripMargin - - val flatSchema = FlatSchema.build(json) - val orderedSubSchemas = FlatSchema.postProcess(flatSchema.subschemas) - val schemaCreate = DdlGenerator.generateTableDdl(orderedSubSchemas, "table_name", None, 1024, false) - val ddl = DdlFile(List(schemaCreate)).render(Nil) - ddl must beEqualTo(expected) - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlGeneratorSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlGeneratorSpec.scala deleted file mode 100644 index 287c9fb4..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlGeneratorSpec.scala +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift.generators - - -import cats.data.NonEmptyList - -import io.circe.literal._ - -// Specs2 -import org.specs2.Specification - -// This library -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Pointer -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties.Type - -import com.snowplowanalytics.iglu.schemaddl.redshift._ - -// TODO: union type specs (string, object) - -class DdlGeneratorSpec extends Specification { def is = s2""" - Check DDL generation specification - Generate correct DDL for atomic table $e1 - Generate correct DDL for with runlength encoding for booleans $e2 - Generate correct DDL when enum schema is nullable $e3 - Generate correct DDL when only pointer is root pointer $e4 - Generate correct DDL with sum type $e5 - """ - - def e1 = { - val orderedSubSchemas = List( - "/foo".jsonPointer -> json"""{"type": "string", "maxLength": 30}""".schema, - "/bar".jsonPointer -> json"""{"enum": ["one","two","three",null]}""".schema - ) - - val resultDdl = CreateTable( - "atomic.launch_missles", - DdlGenerator.selfDescSchemaColumns ++ - DdlGenerator.parentageColumns ++ - List( - Column("foo",RedshiftVarchar(30),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("bar",RedshiftVarchar(5),Set(CompressionEncoding(Text255Encoding)),Set()) - ), - Set(ForeignKeyTable(NonEmptyList.of("root_id"),RefTable("atomic.events",Some("event_id")))), - Set(Diststyle(Key), DistKeyTable("root_id"),SortKeyTable(None,NonEmptyList.of("root_tstamp"))) - ) - - val ddl = DdlGenerator.generateTableDdl(orderedSubSchemas, "launch_missles", None, 4096, false) - - ddl must beEqualTo(resultDdl) - } - - def e2 = { - val orderedSubSchemas = List( - "/foo".jsonPointer -> json"""{"type": "boolean"}""".schema, - "/baz".jsonPointer -> json"""{"type": "boolean"}""".schema, - "/bar".jsonPointer -> json"""{"enum": ["one","two","three"]}""".schema - ) - - val resultDdl = CreateTable( - "atomic.launch_missles", - DdlGenerator.selfDescSchemaColumns ++ - DdlGenerator.parentageColumns ++ - List( - Column("foo",RedshiftBoolean,Set(CompressionEncoding(RunLengthEncoding)),Set(Nullability(NotNull))), - Column("baz",RedshiftBoolean,Set(CompressionEncoding(RunLengthEncoding)),Set(Nullability(NotNull))), - Column("bar",RedshiftVarchar(5),Set(CompressionEncoding(Text255Encoding)),Set(Nullability(NotNull))) - ), - Set(ForeignKeyTable(NonEmptyList.of("root_id"),RefTable("atomic.events",Some("event_id")))), - Set(Diststyle(Key), DistKeyTable("root_id"),SortKeyTable(None,NonEmptyList.of("root_tstamp"))) - ) - - val ddl = DdlGenerator.generateTableDdl(orderedSubSchemas, "launch_missles", None, 4096, false) - - ddl must beEqualTo(resultDdl) - } - - def e3 = { - val enumSchemaWithNull = json"""{"enum": ["one","two","three"]}""".schema.copy(`type` = Some(Type.Null)) - val orderedSubSchemas = List( - "/foo".jsonPointer -> json"""{"type": "boolean"}""".schema, - "/baz".jsonPointer -> json"""{"type": "boolean"}""".schema, - "/enumField".jsonPointer -> enumSchemaWithNull - ) - - val resultDdl = CreateTable( - "atomic.launch_missles", - DdlGenerator.selfDescSchemaColumns ++ - DdlGenerator.parentageColumns ++ - List( - Column("foo",RedshiftBoolean,Set(CompressionEncoding(RunLengthEncoding)),Set(Nullability(NotNull))), - Column("baz",RedshiftBoolean,Set(CompressionEncoding(RunLengthEncoding)),Set(Nullability(NotNull))), - Column("enum_field",RedshiftVarchar(5),Set(CompressionEncoding(Text255Encoding)),Set()) - ), - Set(ForeignKeyTable(NonEmptyList.of("root_id"),RefTable("atomic.events",Some("event_id")))), - Set(Diststyle(Key), DistKeyTable("root_id"),SortKeyTable(None,NonEmptyList.of("root_tstamp"))) - ) - - val ddl = DdlGenerator.generateTableDdl(orderedSubSchemas, "launch_missles", None, 4096, false) - - ddl must beEqualTo(resultDdl) - } - - def e4 = { - val orderedSubSchemas = List( - Pointer.Root -> Schema.empty - ) - - val ddl = DdlGenerator.generateTableDdl(orderedSubSchemas, "launch_missles", None, 4096, false) - - val resultDdl = CreateTable( - "atomic.launch_missles", - DdlGenerator.selfDescSchemaColumns ++ - DdlGenerator.parentageColumns, - Set(ForeignKeyTable(NonEmptyList.of("root_id"),RefTable("atomic.events",Some("event_id")))), - Set(Diststyle(Key), DistKeyTable("root_id"),SortKeyTable(None,NonEmptyList.of("root_tstamp"))) - ) - - ddl must beEqualTo(resultDdl) - } - - def e5 = { - - val subSchemas = List( - "/properties/union".jsonPointer -> - json"""{ - "oneOf": [ - { - "type": "object", - "properties": { - "one": { "type": "integer" } - } - }, - { - "type": "object", - "properties": { - "two": { "type": "string" } - } - } - ] - }""".schema.copy(`type` = Some(Type.Null)) - ) - - val ddl = DdlGenerator - .generateTableDdl(subSchemas, "launch_missles", None, 4096, false) - .columns - - val resultDdl = CreateTable( - "atomic.launch_missles", - DdlGenerator.selfDescSchemaColumns ++ - DdlGenerator.parentageColumns :+ - Column("union",RedshiftVarchar(4096),Set(CompressionEncoding(ZstdEncoding)),Set()), - Set(ForeignKeyTable(NonEmptyList.of("root_id"),RefTable("atomic.events",Some("event_id")))), - Set(Diststyle(Key), DistKeyTable("root_id"),SortKeyTable(None,NonEmptyList.of("root_tstamp"))) - ).columns - - ddl must beEqualTo(resultDdl) - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/MigrationGeneratorSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/MigrationGeneratorSpec.scala deleted file mode 100644 index f25448af..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/MigrationGeneratorSpec.scala +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift -package generators - -import com.snowplowanalytics.iglu.schemaddl.migrations.{Migration, SchemaDiff} -import io.circe.literal._ - -// specs2 -import org.specs2.Specification - -// Iglu -import com.snowplowanalytics.iglu.core.SchemaVer - -// This library -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ -import com.snowplowanalytics.iglu.schemaddl.jsonschema.{ Pointer, Schema } - -class MigrationGeneratorSpec extends Specification { def is = s2""" - Redshift migration - generates addition migration with one new column $e1 - generates addition migration without visible changes $e2 - generates addition migration with three new columns $e3 - generates migration when increasing maxLength $e4 - generates migration when increasing maxLength and adding a field $e5 - generates migration when increasing maxLength of nullable field $e6 - doesn't generate migration when altering column with unsupported encoding $e7 - maxLengthIncreased - correctly detects when maxLength has been increased $e8 - enumLonger - correctly detects when enum gets new longer value $e9 - """ - - val emptyModified = Set.empty[SchemaDiff.Modified] - val emptySubschemas = List.empty[(Pointer.SchemaPointer, Schema)] - - def e1 = { - val diff = SchemaDiff(List("status".jsonPointer -> json"""{"type": ["string", "null"]}""".schema), emptyModified, emptySubschemas) - val schemaMigration = Migration("com.acme", "launch_missles", SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,1), diff) - val ddlMigration = MigrationGenerator.generateMigration(schemaMigration, 4096, Some("atomic")).render - - val result = - """|-- WARNING: only apply this file to your database if the following SQL returns the expected: - |-- - |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_launch_missles_1'; - |-- obj_description - |-- ----------------- - |-- iglu:com.acme/launch_missles/jsonschema/1-0-0 - |-- (1 row) - | - |BEGIN TRANSACTION; - | - | ALTER TABLE atomic.com_acme_launch_missles_1 - | ADD COLUMN "status" VARCHAR(4096) ENCODE ZSTD; - | - | COMMENT ON TABLE atomic.com_acme_launch_missles_1 IS 'iglu:com.acme/launch_missles/jsonschema/1-0-1'; - | - |END TRANSACTION;""".stripMargin - - ddlMigration must beEqualTo(result) - } - - def e2 = { - val diff = SchemaDiff.empty - val schemaMigration = Migration("com.acme", "launch_missles", SchemaVer.Full(2,0,0), SchemaVer.Full(2,0,1), diff) - val ddlMigration = MigrationGenerator.generateMigration(schemaMigration, 4096, Some("atomic")).render - - val result = - """|-- WARNING: only apply this file to your database if the following SQL returns the expected: - |-- - |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_launch_missles_2'; - |-- obj_description - |-- ----------------- - |-- iglu:com.acme/launch_missles/jsonschema/2-0-0 - |-- (1 row) - | - |-- NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION - | - | COMMENT ON TABLE atomic.com_acme_launch_missles_2 IS 'iglu:com.acme/launch_missles/jsonschema/2-0-1'; - |""".stripMargin - - ddlMigration must beEqualTo(result) - } - - def e3 = { - val newProps = List( - "/status".jsonPointer -> json"""{"type": ["string", "null"]}""".schema, - "/launch_time".jsonPointer -> json"""{"type": ["string", "null"], "format": "date-time"}""".schema, - "/latitude".jsonPointer -> json"""{"type": "number", "minimum": -90, "maximum": 90}""".schema, - "/longitude".jsonPointer ->json"""{"type": "number", "minimum": -180, "maximum": 180}""".schema) - - val diff = SchemaDiff(newProps, emptyModified, emptySubschemas) - val schemaMigration = Migration("com.acme", "launch_missles", SchemaVer.Full(1,0,2), SchemaVer.Full(1,0,3), diff) - val ddlMigration = MigrationGenerator.generateMigration(schemaMigration, 4096, Some("atomic")).render - - // TODO: NOT NULL columns should be first - val result = - """|-- WARNING: only apply this file to your database if the following SQL returns the expected: - |-- - |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_launch_missles_1'; - |-- obj_description - |-- ----------------- - |-- iglu:com.acme/launch_missles/jsonschema/1-0-2 - |-- (1 row) - | - |BEGIN TRANSACTION; - | - | ALTER TABLE atomic.com_acme_launch_missles_1 - | ADD COLUMN "status" VARCHAR(4096) ENCODE ZSTD; - | ALTER TABLE atomic.com_acme_launch_missles_1 - | ADD COLUMN "launch_time" TIMESTAMP ENCODE ZSTD; - | ALTER TABLE atomic.com_acme_launch_missles_1 - | ADD COLUMN "latitude" DOUBLE PRECISION ENCODE RAW; - | ALTER TABLE atomic.com_acme_launch_missles_1 - | ADD COLUMN "longitude" DOUBLE PRECISION ENCODE RAW; - | - | COMMENT ON TABLE atomic.com_acme_launch_missles_1 IS 'iglu:com.acme/launch_missles/jsonschema/1-0-3'; - | - |END TRANSACTION;""".stripMargin - - ddlMigration must beEqualTo(result) - } - - def e4 = { - val added = List.empty[(Pointer.SchemaPointer, Schema)] - val modified = Set( - SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"type": "string", "maxLength": "1024"}""".schema, - json"""{"type": "string", "maxLength": "2048"}""".schema - ) - ) - val removed = List.empty[(Pointer.SchemaPointer, Schema)] - - val diff = SchemaDiff(added, modified, removed) - val schemaMigration = Migration("com.acme", "example", SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,1), diff) - val ddlMigration = MigrationGenerator.generateMigration(schemaMigration, 4096, Some("atomic")).render - - val result = - """|-- WARNING: only apply this file to your database if the following SQL returns the expected: - |-- - |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; - |-- obj_description - |-- ----------------- - |-- iglu:com.acme/example/jsonschema/1-0-0 - |-- (1 row) - | - | ALTER TABLE atomic.com_acme_example_1 - | ALTER "foo" TYPE VARCHAR(2048); - | - | COMMENT ON TABLE atomic.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-1'; - |""".stripMargin - - ddlMigration must beEqualTo(result) - } - - def e5 = { - val added = List( - "/foo2".jsonPointer -> json"""{"type": ["string", "null"], "maxLength": "1024"}""".schema - ) - val modified = Set( - SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"type": "string", "maxLength": "1024"}""".schema, - json"""{"type": "string", "maxLength": "2048"}""".schema - ) - ) - val removed = List.empty[(Pointer.SchemaPointer, Schema)] - - val diff = SchemaDiff(added, modified, removed) - val schemaMigration = Migration("com.acme", "example", SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,1), diff) - val ddlMigration = MigrationGenerator.generateMigration(schemaMigration, 4096, Some("atomic")).render - - val result = - """|-- WARNING: only apply this file to your database if the following SQL returns the expected: - |-- - |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; - |-- obj_description - |-- ----------------- - |-- iglu:com.acme/example/jsonschema/1-0-0 - |-- (1 row) - | - | ALTER TABLE atomic.com_acme_example_1 - | ALTER "foo" TYPE VARCHAR(2048); - | - |BEGIN TRANSACTION; - | - | ALTER TABLE atomic.com_acme_example_1 - | ADD COLUMN "foo2" VARCHAR(1024) ENCODE ZSTD; - | - | COMMENT ON TABLE atomic.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-1'; - | - |END TRANSACTION;""".stripMargin - - ddlMigration must beEqualTo(result) - } - - def e6 = { - val added = List( - "/foo2".jsonPointer -> json"""{"type": ["string", "null"], "maxLength": "1024"}""".schema - ) - val modified = Set( - SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"type": ["string","null"], "maxLength": "1024"}""".schema, - json"""{"type": ["string","null"], "maxLength": "2048"}""".schema - ) - ) - val removed = List.empty[(Pointer.SchemaPointer, Schema)] - - val diff = SchemaDiff(added, modified, removed) - val schemaMigration = Migration("com.acme", "example", SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,1), diff) - val ddlMigration = MigrationGenerator.generateMigration(schemaMigration, 4096, Some("atomic")).render - - val result = - """|-- WARNING: only apply this file to your database if the following SQL returns the expected: - |-- - |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; - |-- obj_description - |-- ----------------- - |-- iglu:com.acme/example/jsonschema/1-0-0 - |-- (1 row) - | - | ALTER TABLE atomic.com_acme_example_1 - | ALTER "foo" TYPE VARCHAR(2048); - | - |BEGIN TRANSACTION; - | - | ALTER TABLE atomic.com_acme_example_1 - | ADD COLUMN "foo2" VARCHAR(1024) ENCODE ZSTD; - | - | COMMENT ON TABLE atomic.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-1'; - | - |END TRANSACTION;""".stripMargin - - ddlMigration must beEqualTo(result) - } - - def e7 = { - val added = List.empty[(Pointer.SchemaPointer, Schema)] - val modified = Set( - SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"type": "string", "enum": ["FOO", "BAR"]}""".schema, - json"""{"type": "string", "enum": ["FOO", "BAR", "FOOBAR"]}""".schema - ) - ) - val removed = List.empty[(Pointer.SchemaPointer, Schema)] - - val diff = SchemaDiff(added, modified, removed) - val schemaMigration = Migration("com.acme", "example", SchemaVer.Full(1,0,0), SchemaVer.Full(1,0,1), diff) - val ddlMigration = MigrationGenerator.generateMigration(schemaMigration, 4096, Some("atomic")).render - - val result = - """|-- WARNING: only apply this file to your database if the following SQL returns the expected: - |-- - |-- SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = 'com_acme_example_1'; - |-- obj_description - |-- ----------------- - |-- iglu:com.acme/example/jsonschema/1-0-0 - |-- (1 row) - | - |-- NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION - | - | COMMENT ON TABLE atomic.com_acme_example_1 IS 'iglu:com.acme/example/jsonschema/1-0-1'; - |""".stripMargin - - ddlMigration must beEqualTo(result) - } - - def e8 = { - val modifiedY = SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"type": "string", "maxLength": "1024"}""".schema, - json"""{"type": "string", "maxLength": "2048"}""".schema - ) - val yes = MigrationGenerator.maxLengthIncreased(modifiedY) must beTrue - - val modifiedN = SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"type": "string", "maxLength": "2048"}""".schema, - json"""{"type": "string", "maxLength": "1024"}""".schema - ) - val no = MigrationGenerator.maxLengthIncreased(modifiedN) must beFalse - - yes and no - } - - def e9 = { - val modifiedY1 = SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"type": "string", "enum": ["FOO", "BAR"]}""".schema, - json"""{"type": "string", "enum": ["FOO", "BAR", "FOOBAR"]}""".schema - ) - val yes1 = MigrationGenerator.enumLonger(modifiedY1) must beTrue - - val modifiedY2 = SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"enum": ["FOO", "BAR"]}""".schema, - json"""{"enum": ["FOO", "BAR", "FOOBAR"]}""".schema - ) - val yes2 = MigrationGenerator.enumLonger(modifiedY2) must beTrue - - val modifiedN = SchemaDiff.Modified( - "/foo".jsonPointer, - json"""{"type": "string", "enum": ["FOO", "BAR"]}""".schema, - json"""{"type": "string", "enum": ["FO", "BA"]}""".schema - ) - val no = MigrationGenerator.enumLonger(modifiedN) must beFalse - - yes1 and yes2 and no - } - -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/TypeSuggestionsSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/TypeSuggestionsSpec.scala deleted file mode 100644 index 1cf49642..00000000 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/TypeSuggestionsSpec.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.redshift.generators - -import io.circe.literal._ - -import com.snowplowanalytics.iglu.schemaddl.redshift._ - -import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ - -// specs2 -import org.specs2.mutable.Specification - -class TypeSuggestionsSpec extends Specification { - "getDataType" should { - "suggest decimal for multipleOf == 0.01" in { - val props = json"""{"type": "number", "multipleOf": 0.01}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftDecimal(Some(36), Some(2))) - } - "suggest integer for multipleOf == 1" in { - val props = json"""{"type": "number", "multipleOf": 1}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftInteger) - } - "handle string" in { - val props = json"""{"type": "string"}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftVarchar(16)) - } - "handle string with maxLength" in { - val props = json"""{"type": "string", "maxLength": 42}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftVarchar(42)) - } - "handle string with enum" in { - val props = json"""{"type": "string", "enum": ["one", "two"]}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftVarchar(3)) - } - "handle string with enum and maxLength" in { - val props = json"""{"type": "string", "enum": ["one", "two"], "maxLength": 42}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftVarchar(3)) - } - "handle invalid enum" in { - val props = json"""{"type": "integer", "multipleOf": 1, "enum": [2,3,5,"hello",32]}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftVarchar(7)) - } - "recognize string,null maxLength == minLength as CHAR" in { - val props = json"""{"type": ["string","null"], "minLength": "12", "maxLength": "12"}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftChar(12)) - } - "recognize number with product type" in { - val props = json"""{"type": ["number","null"]}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftDouble) - } - "recognize integer with product type" in { - val props = json"""{"type": ["integer","null"]}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftBigInt) - } - "recognize timestamp" in { - val props = json"""{"type": "string", "format": "date-time"}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftTimestamp) - } - "recognize full date" in { - val props = json"""{"type": "string", "format": "date"}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftDate) - } - "recogninze numbers bigger than Long.MaxValue" in { - val props = json"""{"type": "integer", "maximum": 9223372036854775808}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftBigInt) - } - "fallback to VARCHAR(65535) for arrays" in { - val props = json"""{"type": "array"}""".schema - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftVarchar(65535)) - } - } -} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/internal/FlatSchemaSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/internal/FlatSchemaSpec.scala new file mode 100644 index 00000000..ee987e6f --- /dev/null +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/internal/FlatSchemaSpec.scala @@ -0,0 +1,531 @@ +/* + * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.iglu.schemaddl.redshift.internal + +import cats.implicits._ +import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ +import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties.{Description, Type} +import com.snowplowanalytics.iglu.schemaddl.jsonschema.{Pointer, Schema} +import io.circe.literal._ +import org.specs2.matcher.Matcher +import org.specs2.mutable.Specification + + +class FlatSchemaSpec extends Specification { + + "build" should { + "recognize a JSON schema without properties" >> { + val schema = json"""{"type": "object"}""".schema + val expected = FlatSchema( + Set(Pointer.Root -> Schema.empty.copy(`type` = Some(Type.Object))), + Set.empty, + Set.empty) + + FlatSchema.build(schema) must beEqualTo(expected) + } + + "recognize an object property without 'properties' as primitive" >> { + val json = + json""" + { + "type": "object", + "properties": { + "nested": { + "type": "object", + "properties": { + "object_without_properties": { + "type": "object" + } + } + } + } + } + """.schema + + val subSchemas = Set( + "/properties/nested/properties/object_without_properties".jsonPointer -> + json"""{"type": ["object", "null"]}""".schema) + + val result = FlatSchema.build(json) + + val parentsExpectation = result.parents.map(_._1) must contain(Pointer.Root, "/properties/nested".jsonPointer) + + (result.subschemas must beEqualTo(subSchemas)) and (result.required must beEmpty) and parentsExpectation + } + + "recognize an empty self-describing schema as empty FlatSchema" >> { + val json = + json""" + { + "description": "Wildcard schema #1 to match any valid JSON instance", + "self": { + "vendor": "com.snowplowanalytics.iglu", + "name": "anything-a", + "format": "jsonschema", + "version": "1-0-0" + } + } + """.schema + val description = "Wildcard schema #1 to match any valid JSON instance" + val expected = FlatSchema(Set( + Pointer.Root -> Schema.empty.copy(description = Some(Description(description)))), + Set.empty, + Set.empty) + + val res = FlatSchema.build(json) + + res must beEqualTo(expected) + } + + "recognize an array as primitive" >> { + val schema = + json""" + { + "type": "object", + "properties": { + "foo": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false + } + """.schema + + val expected = Set( + "/properties/foo".jsonPointer -> + json"""{"type": ["array", "null"], "items": {"type": "string"}}""".schema + ) + + val result = FlatSchema.build(schema) + + val subschemasExpectation = result.subschemas must beEqualTo(expected) + val requiredExpectation = result.required must beEmpty + val parentsExpectation = result.parents.map(_._1) must contain(Pointer.Root) + + subschemasExpectation and requiredExpectation and parentsExpectation + } + + "transform [object,string] union type into single primitive" >> { + val schema = + json""" + { + "type": "object", + "properties": { + "foo": { + "type": ["string", "object"], + "properties": { + "one": { + "type": "string" + }, + "two": { + "type": "integer" + } + } + }, + "a_field": { + "type": ["string", "integer"] + }, + "b_field": { + "type": "string" + }, + "c_field": { + "type": ["integer", "number"] + }, + "d_field": { + "type": "object", + "properties": { + "one": { + "type": ["integer", "object"], + "properties": { + "two": { + "type": "string" + }, + "three": { + "type": "integer" + } + } + } + } + } + }, + "additionalProperties": false + }""".schema + + val result = FlatSchema.build(schema) + + val expectedSubschemas = Set( + "/properties/foo".jsonPointer -> + json"""{ + "type": ["string", "object", "null"], + "properties": { + "one": { + "type": "string" + }, + "two": { + "type": "integer" + } + } + }""".schema, + "/properties/d_field/properties/one".jsonPointer -> + json"""{ + "type": ["integer", "object", "null"], + "properties": { + "two": { + "type": "string" + }, + "three": { + "type": "integer" + } + } + }""".schema, + "/properties/a_field".jsonPointer -> json"""{"type": ["string", "integer", "null"]}""".schema, + "/properties/b_field".jsonPointer -> json"""{"type": ["string", "null"]}""".schema, + "/properties/c_field".jsonPointer -> json"""{"type": ["integer", "number", "null"]}""".schema + ) + + result.subschemas must beEqualTo(expectedSubschemas) and (result.required must beEmpty) + } + + "recognize oneOf with object and string as primitive" >> { + val json = + json""" + { + "type": "object", + "properties": { + "union": { + "oneOf": [ + { + "type": "object", + "properties": { + "object_without_properties": { "type": "object" } + } + }, + { + "type": "string" + } + ] + } + }, + "additionalProperties": false + } + """.schema + + val subSchemas = Set( + "/properties/union".jsonPointer -> + json"""{ + "oneOf": [ + { + "type": "object", + "properties": { + "object_without_properties": { "type": "object" } + } + }, + { + "type": "string" + } + ] + }""".schema.copy(`type` = Some(Type.Null)) + ) + + val result = FlatSchema.build(json) + + (result.subschemas must beEqualTo(subSchemas)) and (result.required must beEmpty) + } + + "recognize an optional enum field" >> { + val schema = + json""" + { + "type": "object", + "properties": { + "enum_field": { + "enum": [ + "event", + "exception", + "item" + ] + }, + "nonInteractionHit": { + "type": ["boolean", "null"] + } + }, + "additionalProperties": false + } + """.schema + + val expectedSubSchemas = Set( + "/properties/enum_field".jsonPointer -> + json"""{"enum": ["event","exception","item"]}""".schema.copy(`type` = Some(Type.Null)), + "/properties/nonInteractionHit".jsonPointer -> + json"""{"type": ["boolean", "null"]}""".schema) + + val result = FlatSchema.build(schema) + + (result.subschemas must beEqualTo(expectedSubSchemas)) and (result.required must beEmpty) + } + + "recognize an optional nested enum field" >> { + val schema = + json""" + { + "type": "object", + "properties": { + "a_field": { + "type": "object", + "properties": { + "enum_field": { + "enum": [ + "event", + "exception", + "item" + ] + } + } + }, + "nonInteractionHit": { + "type": ["boolean", "null"] + } + }, + "additionalProperties": false + } + """.schema + + val expectedSubSchemas = Set( + "/properties/a_field/properties/enum_field".jsonPointer -> + json"""{"enum": ["event","exception","item"]}""".schema.copy(`type` = Some(Type.Null)), + "/properties/nonInteractionHit".jsonPointer -> + json"""{"type": ["boolean", "null"]}""".schema) + + val result = FlatSchema.build(schema) + + (result.subschemas must beEqualTo(expectedSubSchemas)) and (result.required must beEmpty) + } + + "recognize a field without type" >> { + val schema = + json""" + { + "type": "object", + "properties": { + "a_field": { "type": "string" }, + "b_field": {} + } + } + """.schema + + val expectedSubSchemas = Set( + "/properties/a_field".jsonPointer -> json"""{"type": ["string", "null"]}""".schema, + "/properties/b_field".jsonPointer -> Schema.empty.copy(`type` = Some(Type.Null)) + ) + + val result = FlatSchema.build(schema) + + (result.subschemas must beEqualTo(expectedSubSchemas)) and (result.required must beEmpty) + } + + "add all required properties and skips not-nested required" >> { + val schema = + json""" + { + "type": "object", + "required": ["foo"], + "properties": { + "foo": { + "type": "object", + "required": ["one"], + "properties": { + "one": { + "type": "string" + }, + "nonRequiredNested": { + "type": "object", + "required": ["nestedRequired"], + "properties": { + "nestedRequired": {"type": "integer"} + } + } + } + } + }, + "additionalProperties": false + } + """.schema + + val result = FlatSchema.build(schema) + + val expectedRequired = Set("/properties/foo".jsonPointer, "/properties/foo/properties/one".jsonPointer) + val expectedSubschemas = Set( + "/properties/foo/properties/nonRequiredNested/properties/nestedRequired".jsonPointer -> + json"""{"type": ["integer", "null"]}""".schema, + "/properties/foo/properties/one".jsonPointer -> + json"""{"type": "string"}""".schema + ) + + val required = result.required must bePointers(expectedRequired) + val subschemas = result.subschemas must beEqualTo(expectedSubschemas) + + required and subschemas + } + + "skip properties inside patternProperties" >> { + val schema = + json""" + { + "type": "object", + "required": ["one"], + "properties": { + "one": { + "type": "object", + "required": ["two"], + "properties": { + "two": { + "type": "string" + }, + "withProps": { + "type": "object", + "patternProperties": { + ".excluded": {"type": "string"}, + ".excluded-with-required": { + "type": "object", + "properties": { + "also-excluded": {"type": "integer"} + } + } + }, + "properties": { + "included": {"type": "integer"} + } + } + } + } + }, + "additionalProperties": false + } + """.schema + + val result = FlatSchema.build(schema) + + val expectedRequired = Set("/properties/one".jsonPointer, "/properties/one/properties/two".jsonPointer) + val expectedSubschemas = Set( + "/properties/one/properties/two".jsonPointer -> + json"""{"type": "string"}""".schema, + "/properties/one/properties/withProps/properties/included".jsonPointer -> + json"""{"type": ["integer", "null"]}""".schema + ) + + val required = result.required must bePointers(expectedRequired) + val subschemas = result.subschemas must beEqualTo(expectedSubschemas) + + required and subschemas + } + + "recognize an oneOf as sum type" >> { + val json = + json""" + { + "type": "object", + "properties": { + "union": { + "oneOf": [ + { + "type": "object", + "properties": { + "one": { "type": "integer" } + } + }, + { + "type": "object", + "properties": { + "two": { "type": "string" } + } + } + ] + } + }, + "additionalProperties": false + } + """.schema + + val subSchemas = Set( + "/properties/union".jsonPointer -> + json"""{ + "oneOf": [ + { + "type": "object", + "properties": { + "one": { "type": "integer" } + } + }, + { + "type": "object", + "properties": { + "two": { "type": "string" } + } + } + ] + }""".schema.copy(`type` = Some(Type.Null)) + ) + + val result = FlatSchema.build(json) + + (result.subschemas must beEqualTo(subSchemas)) and (result.required must beEmpty) + + } + } + + "nestedRequired" should { + "return true if all parent properties are required (no null in type)" >> { + val subschemas: FlatSchema.SubSchemas = + Set("/deeply".jsonPointer, "/deeply/nested".jsonPointer, "/other/property".jsonPointer) + .map((p: Pointer.SchemaPointer) => p -> Schema.empty) + + val schema = FlatSchema(subschemas, Set("/deeply".jsonPointer, "/deeply/nested".jsonPointer), Set.empty[(Pointer.SchemaPointer, Schema)]) + val result = schema.nestedRequired("/deeply/nested/property".jsonPointer) + + result must beTrue + } + } + + "isHeterogeneousUnion" should { + "recognize a Schema with oneOf" >> { + val json = + json""" + { + "oneOf": [ + { + "type": "object", + "properties": { + "object_without_properties": { "type": "object" } + } + }, + { + "type": "string" + } + ] + } + """.schema + + FlatSchema.isHeterogeneousUnion(json) must beTrue + } + } + + def bePointers(expected: Set[Pointer.SchemaPointer]): Matcher[Set[Pointer.SchemaPointer]] = { actual: Set[Pointer.SchemaPointer] => + val result = + s"""|actual: ${actual.toList.map(_.show).sortBy(_.length).mkString(", ")} + |expected: ${expected.toList.map(_.show).sortBy(_.length).mkString(", ")}""".stripMargin + (actual == expected, result) + } +} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/internal/ShredModelEntrySpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/internal/ShredModelEntrySpec.scala new file mode 100644 index 00000000..3d4e6408 --- /dev/null +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/internal/ShredModelEntrySpec.scala @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.iglu.schemaddl.redshift.internal + +import com.snowplowanalytics.iglu.schemaddl.SpecHelpers._ +import com.snowplowanalytics.iglu.schemaddl.redshift.internal.ShredModelEntrySpec.{dummyPtr, refJson} +import io.circe.literal._ +import cats.syntax.show._ +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelEntry + +// specs2 +import org.specs2.mutable.Specification +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelEntry.ColumnType._ +import com.snowplowanalytics.iglu.schemaddl.redshift.ShredModelEntry.CompressionEncoding._ + +class ShredModelEntrySpec extends Specification { + + "type inference" should { + "suggest decimal for multipleOf == 0.01" in { + val props = json"""{"type": "number", "multipleOf": 0.01}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftDecimal(Some(36), Some(2))) + } + "suggest integer for multipleOf == 1" in { + val props = json"""{"type": "number", "multipleOf": 1}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftInteger) + } + "handle string" in { + val props = json"""{"type": "string"}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftVarchar(ShredModelEntry.VARCHAR_SIZE)) + } + "handle string with maxLength" in { + val props = json"""{"type": "string", "maxLength": 42}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftVarchar(42)) + } + "handle string with enum" in { + val props = json"""{"type": "string", "enum": ["one", "two"]}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftVarchar(3)) + } + "handle string with enum and maxLength" in { + val props = json"""{"type": "string", "enum": ["one", "two"], "maxLength": 42}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftVarchar(3)) + } + "handle invalid enum" in { + val props = json"""{"type": "integer", "multipleOf": 1, "enum": [2,3,5,"hello",32]}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftVarchar(7)) + } + "recognize string,null maxLength == minLength as CHAR" in { + val props = json"""{"type": ["string","null"], "minLength": "12", "maxLength": "12"}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftChar(12)) + } + "recognize number with product type" in { + val props = json"""{"type": ["number","null"]}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftDouble) + } + "recognize integer with product type" in { + val props = json"""{"type": ["integer","null"]}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftBigInt) + } + "recognize timestamp" in { + val props = json"""{"type": "string", "format": "date-time"}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftTimestamp) + } + "recognize full date" in { + val props = json"""{"type": "string", "format": "date"}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftDate) + } + "recogninze numbers bigger than Long.MaxValue" in { + val props = json"""{"type": "integer", "maximum": 9223372036854775808}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftBigInt) + } + "fallback to VARCHAR(65535) for arrays" in { + val props = json"""{"type": "array"}""".schema + ShredModelEntry(dummyPtr, props).columnType must beEqualTo(RedshiftVarchar(65535)) + } + } + + "suggest compression" should { + "suggest Text255Encoding for enums less then 255 in length" in { + val props = json"""{"type": "string", "enum": ["one", "two"], "maxLength": 42}""".schema + ShredModelEntry(dummyPtr, props).compressionEncoding must beEqualTo(Text255Encoding) + } + + "suggest RunLengthEncoding for booleans" in { + val props = json"""{"type": "boolean"}""".schema + ShredModelEntry(dummyPtr, props).compressionEncoding must beEqualTo(RunLengthEncoding) + } + "suggest RawEncoding for doubles" in { + val props = json"""{"type": ["number","null"]}""".schema + ShredModelEntry(dummyPtr, props).compressionEncoding must beEqualTo(RawEncoding) + } + "suggest ZstdEncoding for everything else" in { + val props = json"""{"type": "string"}""".schema + ShredModelEntry(dummyPtr, props).compressionEncoding must beEqualTo(ZstdEncoding) + } + } + + "convert to column definition" should { + "align column width" in { + val cols = List( + ShredModelEntry("/ptr".jsonPointer, json"""{"type": "array"}""".schema), + ShredModelEntry("/ptrlooong".jsonPointer, json"""{"type": "boolean"}""".schema), + ShredModelEntry("/ptrlooon".jsonPointer, json"""{"type": "string"}""".schema), + ShredModelEntry("/ptrs".jsonPointer, json"""{"type": "string", "format": "date-time"}""".schema), + ShredModelEntry("/ptrlooong1111".jsonPointer, json"""{"type": ["number","null"]}""".schema), + ) + cols.show must beEqualTo( + """ "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ptr" VARCHAR(65535) ENCODE ZSTD NOT NULL, + | "ptrlooong" BOOLEAN ENCODE RUNLENGTH NOT NULL, + | "ptrlooon" VARCHAR(4096) ENCODE ZSTD NOT NULL, + | "ptrs" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ptrlooong1111" DOUBLE PRECISION ENCODE RAW""".stripMargin) + } + "align column width to extra columns when other columns are smaller" in { + val cols = List( + ShredModelEntry("/ptr".jsonPointer, json"""{"type": "string"}""".schema) + ) + cols.show must beEqualTo( + """| "schema_vendor" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_name" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_format" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "schema_version" VARCHAR(128) ENCODE ZSTD NOT NULL, + | "root_id" CHAR(36) ENCODE RAW NOT NULL, + | "root_tstamp" TIMESTAMP ENCODE ZSTD NOT NULL, + | "ref_root" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ref_tree" VARCHAR(1500) ENCODE ZSTD NOT NULL, + | "ref_parent" VARCHAR(255) ENCODE ZSTD NOT NULL, + | "ptr" VARCHAR(4096) ENCODE ZSTD NOT NULL""".stripMargin) + } + } + + "String factory" should { + "extract jsonNull" in { + ShredModelEntry( + "/nullPtr".jsonPointer, + json"""{"type": ["number","null"]}""".schema + ).stringFactory(refJson) must beEqualTo("\\N") + } + "extract jsonBoolean" in { + ShredModelEntry( + "/boolPtr".jsonPointer, + json"""{"type": "boolean"}""".schema + ).stringFactory(refJson) must beEqualTo("1") + } + "extract jsonNumber" in { + ShredModelEntry( + "/numPtr".jsonPointer, + json"""{"type": "number"}""".schema + ).stringFactory(refJson) must beEqualTo("9999") + } + "extract jsonString" in { + ShredModelEntry( + "/strPtr".jsonPointer, + json"""{"type": "string"}""".schema + ).stringFactory(refJson) must beEqualTo("a") + } + "extract jsonArray" in { + ShredModelEntry( + "/arrayPtr".jsonPointer, + json"""{"type": "array"}""".schema + ).stringFactory(refJson) must beEqualTo("[\"a\",\"b\"]") + } + "extract jsonObject" in { + ShredModelEntry( + "/objPtr".jsonPointer, + json"""{"type": "object"}""".schema + ).stringFactory(refJson) must beEqualTo("{\"a\":\"b\"}") + } + "extract nested field" in { + ShredModelEntry( + "/objPtr/a".jsonPointer, + json"""{"type": "string"}""".schema + ).stringFactory(refJson) must beEqualTo("b") + } + "extract null from missing nested field" in { + ShredModelEntry( + "/objPtr/b/s/c".jsonPointer, + json"""{"type": "string"}""".schema + ).stringFactory(refJson) must beEqualTo("\\N") + } + } +} + +object ShredModelEntrySpec { + val refJson = + json"""{ + "arrayPtr": ["a", "b"], + "objPtr": {"a": "b"}, + "strPtr": "a", + "numPtr": 9999, + "nullPtr": null, + "boolPtr": true + }""" + val dummyPtr = "/ptr".jsonPointer + val dummySchema = json"""{"type": "string"}""".schema +} \ No newline at end of file diff --git a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ArraySerializers.scala b/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ArraySerializers.scala deleted file mode 100644 index 6a326078..00000000 --- a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ArraySerializers.scala +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl -package jsonschema.json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.compactJson - -// this library -import jsonschema.Schema -import jsonschema.properties.ArrayProperty._ - -object ArraySerializers { - - implicit val formats = DefaultFormats - import implicits._ - - object ItemsSerializer extends CustomSerializer[Items](_ => ( - { - case schema: JObject => - Schema.parse(schema.asInstanceOf[JValue]) match { - case Some(s) => Items.ListItems(s) - case None => throw new MappingException(compactJson(schema) + " isn't Schema") - } - case tuple: JArray => - val schemas: List[Option[Schema]] = tuple.arr.map(Schema.parse(_)) - if (schemas.forall(_.isDefined)) Items.TupleItems(schemas.map(_.get)) - else throw new MappingException(compactJson(tuple) + " need to be array of Schemas") - case x => throw new MappingException(compactJson(x) + " isn't valid items") - }, - - { - case Items.ListItems(schema) => Schema.normalize(schema) - case Items.TupleItems(schemas) => JArray(schemas.map(Schema.normalize(_))) - } - )) - - object AdditionalPropertiesSerializer extends CustomSerializer[AdditionalItems](_ => ( - { - case JBool(bool) => AdditionalItems.AdditionalItemsAllowed(bool) - case obj: JObject => Schema.parse(obj.asInstanceOf[JValue]) match { - case Some(schema) => AdditionalItems.AdditionalItemsSchema(schema) - case _ => throw new MappingException(compactJson(obj) + " isn't Schema") - } - case x => throw new MappingException(compactJson(x) + " isn't bool") - }, - - { - case AdditionalItems.AdditionalItemsAllowed(value) => JBool(value) - case AdditionalItems.AdditionalItemsSchema(schema) => Schema.normalize(schema) - } - )) - - object MinItemsSerializer extends CustomSerializer[MinItems](_ => ( - { - case JInt(value) => MinItems(value) - case x => throw new MappingException(compactJson(x) + " isn't minLength") - }, - - { - case MinItems(value) => JInt(value) - } - )) - - object MaxItemsSerializer extends CustomSerializer[MaxItems](_ => ( - { - case JInt(value) => MaxItems(value) - case x => throw new MappingException(compactJson(x) + " isn't maxItems") - }, - - { - case MaxItems(value) => JInt(value) - } - )) -} diff --git a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/CommonSerializers.scala b/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/CommonSerializers.scala deleted file mode 100644 index ca2fdd90..00000000 --- a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/CommonSerializers.scala +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl -package jsonschema -package json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.{ parse, compact } - -import cats.instances.either._ -import cats.instances.list._ -import cats.syntax.traverse._ - -// Circe -import io.circe.Json - -// This library -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties._ - -import implicits._ - -object CommonSerializers { - - // TODO: replace with AST-based - private def fromCirce(json: Json): JValue = - parse(json.noSpaces) - - private def toCirce(json: JValue): Json = - json match { - case JString(string) => Json.fromString(string) - case JInt(int) => Json.fromBigInt(int) - case JLong(long) => Json.fromBigInt(long) - case JBool(bool) => Json.fromBoolean(bool) - case JArray(arr) => Json.fromValues(arr.map(toCirce)) - case JSet(set) => Json.fromValues(set.map(toCirce)) - case JDouble(num) => Json.fromDoubleOrNull(num) - case JDecimal(decimal) => Json.fromBigDecimal(decimal) - case JObject(fields) => Json.fromFields(fields.map { case (k, v) => (k, toCirce(v)) }) - case JNull => Json.Null - case JNothing => Json.Null - } - - object TypeSerializer extends CustomSerializer[Type](_ => ( - { - case JArray(ts) => - val types = ts.map { - case JString(s) => Type.fromString(s) - case s => Left(compact(s)) - } - types.sequence[Either[String, *], Type] match { - case Right(List(t)) => t - case Right(u) => Type.Union(u.toSet) - case Left(invalid) => throw new MappingException(invalid + " is not valid list of types") - } - case JString(t) => - Type.fromString(t) match { - case Right(singleType) => singleType - case Left(invalid) => throw new MappingException(invalid + " is not valid list of types") - } - case x => throw new MappingException(compact(x) + " is not valid list of types") - }, - - { - case t: Type => fromCirce(t.asJson) - } - )) - - object DescriptionSerializer extends CustomSerializer[Description](_ => ( - { - case JString(value) => Description(value) - case x => throw new MappingException(compact(x) + " isn't valid description") - }, - - { - case Description(value) => JString(value) - } - )) - - - object EnumSerializer extends CustomSerializer[Enum](_ => ( - { - case JArray(values) => Enum(values.map(toCirce)) - case x => throw new MappingException(compact(x) + " isn't valid enum") - }, - - { - case Enum(values) => JArray(values.map(fromCirce)) - } - )) - - object OneOfSerializer extends CustomSerializer[OneOf](_ => ( - { - case JArray(values) => - val schemas: List[Option[Schema]] = values.map(Schema.parse(_)) - if (schemas.forall(_.isDefined)) OneOf(schemas.map(_.get)) - else throw new MappingException(compact(JArray(values)) + " need to be array of Schemas") - }, - - { - case OneOf(schemas) => JArray(schemas.map(Schema.normalize(_))) - } - )) -} diff --git a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/Formats.scala b/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/Formats.scala deleted file mode 100644 index c7ae3399..00000000 --- a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/Formats.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.jsonschema -package json4s - -object Formats { - /** - * json4s formats for all JSON Schema properties - */ - implicit lazy val allFormats: org.json4s.Formats = - org.json4s.DefaultFormats ++ List( - StringSerializers.FormatSerializer, - StringSerializers.MinLengthSerializer, - StringSerializers.MaxLengthSerializer, - StringSerializers.PatternSerializer, - ObjectSerializers.PropertiesSerializer, - ObjectSerializers.AdditionalPropertiesSerializer, - ObjectSerializers.RequiredSerializer, - ObjectSerializers.PatternPropertiesSerializer, - CommonSerializers.TypeSerializer, - CommonSerializers.EnumSerializer, - CommonSerializers.OneOfSerializer, - CommonSerializers.DescriptionSerializer, - NumberSerializers.MaximumSerializer, - NumberSerializers.MinimumSerializer, - NumberSerializers.MultipleOfSerializer, - ArraySerializers.AdditionalPropertiesSerializer, - ArraySerializers.MaxItemsSerializer, - ArraySerializers.MinItemsSerializer, - ArraySerializers.ItemsSerializer) -} diff --git a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/NumberSerializers.scala b/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/NumberSerializers.scala deleted file mode 100644 index 191dec76..00000000 --- a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/NumberSerializers.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl -package jsonschema.json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.compact - -// This library -import jsonschema.properties.NumberProperty._ - -object NumberSerializers { - - object MultipleOfSerializer extends CustomSerializer[MultipleOf](_ => ( - { - case JInt(value) => MultipleOf.IntegerMultipleOf(value) - case JDouble(value) => MultipleOf.NumberMultipleOf(value) - case x => throw new MappingException(compact(x) + " isn't a numeric value") - }, - - { - case MultipleOf.NumberMultipleOf(value) => JDouble(value.toDouble) - case MultipleOf.IntegerMultipleOf(value) => JInt(value) - } - )) - - object MaximumSerializer extends CustomSerializer[Maximum](_ => ( - { - case JInt(value) => Maximum.IntegerMaximum(value) - case JDouble(value) => Maximum.NumberMaximum(value) - case x => throw new MappingException(compact(x) + " isn't a numeric value") - }, - - { - case Maximum.NumberMaximum(value) => JDouble(value.toDouble) - case Maximum.IntegerMaximum(value) => JInt(value) - } - )) - - object MinimumSerializer extends CustomSerializer[Minimum](_ => ( - { - case JInt(value) => Minimum.IntegerMinimum(value) - case JDouble(value) => Minimum.NumberMinimum(value) - case x => throw new MappingException(compact(x) + " isn't numeric value") - }, - - { - case Minimum.NumberMinimum(value) => JDouble(value.toDouble) - case Minimum.IntegerMinimum(value) => JInt(value) - } - )) -} diff --git a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ObjectSerializers.scala b/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ObjectSerializers.scala deleted file mode 100644 index 4033d31c..00000000 --- a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ObjectSerializers.scala +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl -package jsonschema.json4s - -// Scala -import scala.annotation.tailrec - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.compact - -// This library -import jsonschema.Schema -import jsonschema.properties.ObjectProperty._ - - -object ObjectSerializers { - import ArraySerializers._ - import implicits._ - - @tailrec private def allString(keys: List[JValue], acc: List[String] = Nil): Option[List[String]] = { - keys match { - case Nil => Some(acc.reverse) - case JString(h) :: t => allString(t, h :: acc) - case _ => None - } - } - - object PropertiesSerializer extends CustomSerializer[Properties](_ => ( - { - case obj: JObject => - obj.extractOpt[Map[String, JObject]].map { f => - f.map { case (key, v) => (key, Schema.parse(v: JValue).get)} - } match { - case Some(p) => Properties(p) - case None => throw new MappingException("Isn't properties") - } - case x => throw new MappingException(compact(x) + " isn't properties") - }, - - { - case Properties(fields) => JObject(fields.toList.map { case (k, v) => k -> Schema.normalize(v) }) - } - )) - - object AdditionalPropertiesSerializer extends CustomSerializer[AdditionalProperties](_ => ( - { - case JBool(bool) => AdditionalProperties.AdditionalPropertiesAllowed(bool) - case obj: JObject => Schema.parse(obj: JValue) match { - case Some(schema) => AdditionalProperties.AdditionalPropertiesSchema(schema) - case None => throw new MappingException(compact(obj) + " isn't additionalProperties") - } - case x => throw new MappingException(compact(x) + " isn't bool") - }, - - { - case AdditionalProperties.AdditionalPropertiesAllowed(value) => JBool(value) - case AdditionalProperties.AdditionalPropertiesSchema(value) => Schema.normalize(value) - } - )) - - object RequiredSerializer extends CustomSerializer[Required](_ => ( - { - case JArray(keys) => allString(keys) match { - case Some(k) => Required(k) - case None => throw new MappingException("required array can contain only strings") - } - case x => throw new MappingException(compact(x) + " isn't bool") - }, - - { - case Required(keys) => JArray(keys.map(JString)) - } - )) - - object PatternPropertiesSerializer extends CustomSerializer[PatternProperties](_ => ( - { - case obj: JObject => - obj.extractOpt[Map[String, JObject]].map { f => - f.map { case (key, v) => (key, Schema.parse(v: JValue).get)} - } match { - case Some(p) => PatternProperties(p) - case None => throw new MappingException("Isn't patternProperties") - } - case x => throw new MappingException(compact(x) + " isn't patternProperties") - }, - - { - case PatternProperties(fields) => JObject(fields.toList.map { case (k, v) => k -> Schema.normalize(v) } ) - } - )) -} diff --git a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/StringSerializers.scala b/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/StringSerializers.scala deleted file mode 100644 index baead5a9..00000000 --- a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/StringSerializers.scala +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl -package jsonschema.json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.compact - -// This library -import jsonschema.properties.StringProperty._ - -object StringSerializers { - - object FormatSerializer extends CustomSerializer[Format](_ => ( - { - case JString(format) => Format.fromString(format) - case _ => throw new MappingException("Format must be string") - }, - { - case f: Format => JString(f.asString) - } - - )) - - object MinLengthSerializer extends CustomSerializer[MinLength](_ => ( - { - case JInt(value) if value >= 0 => MinLength(value) - case x => throw new MappingException(compact(x) + " isn't minLength") - }, - - { - case MinLength(value) => JInt(value) - } - )) - - - object MaxLengthSerializer extends CustomSerializer[MaxLength](_ => ( - { - case JInt(value) if value >= 0 => MaxLength(value) - case x => throw new MappingException(compact(x) + " isn't maxLength") - }, - - { - case MaxLength(value) => JInt(value) - } - )) - - object PatternSerializer extends CustomSerializer[Pattern](_ => ( - { - case JString(value) => Pattern(value) - case x => throw new MappingException(compact(x) + " isn't valid regex") - }, - - { - case Pattern(value) => JString(value) - } - )) -} diff --git a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/implicits.scala b/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/implicits.scala deleted file mode 100644 index e59a4658..00000000 --- a/modules/json4s/src/main/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/implicits.scala +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.jsonschema -package json4s - -import org.json4s._ - -object implicits { - - import com.snowplowanalytics.iglu.schemaddl.jsonschema.json4s.Formats.allFormats - - /** - * Type class instance allowing to convert json4s JValue - * into JSON Schema class - * - * So far this is single implementation, but still need - * to be imported into scope to get Schema.parse method work - */ - implicit lazy val json4sToSchema: ToSchema[JValue] = new ToSchema[JValue] { - def parse(json: JValue): Option[Schema] = - json match { - case _: JObject => - val mf = implicitly[Manifest[Schema]] - Some(json.extract[Schema](allFormats, mf)) - case _ => None - } - } - - /** - * Type class instance allowing to convert `Schema` to JValue - * - * So far this is single implementation, but still need - * to be imported into scope to get Schema.parse method work - */ - implicit lazy val json4sFromSchema: FromSchema[JValue] = new FromSchema[JValue] { - def normalize(schema: Schema): JValue = - Extraction.decompose(schema) - } -} diff --git a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ArraySpec.scala b/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ArraySpec.scala deleted file mode 100644 index 146b6611..00000000 --- a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ArraySpec.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.jsonschema.json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.parse - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.ArrayProperty.{MaxItems, MinItems} - -import implicits._ - -// specs2 -import org.specs2.Specification - -class ArraySpec extends Specification { def is = s2""" - Check JSON Schema string specification - parse correct minItems $e1 - parse incorrect (negative) minItems (DECIDE IF THIS DESIRED) $e2 - """ - - def e1 = { - val schema = parse( - """ - |{"minItems": 32} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(minItems = Some(MinItems(32)))) - } - - def e2 = { - val schema = parse( - """ - |{"maxItems": -32} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(maxItems = Some(MaxItems(-32)))) - } -} diff --git a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/CommonSpec.scala b/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/CommonSpec.scala deleted file mode 100644 index df24f762..00000000 --- a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/CommonSpec.scala +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.jsonschema.json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.parse -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperties -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.StringProperty.Format -import implicits._ - -// specs2 -import org.specs2.Specification - -class CommonSpec extends Specification { def is = s2""" - Check JSON Schema common properties - parse string-typed Schema $e1 - parse union-typed Schema $e2 - skip unknown type $e3 - parse oneOf property $e4 - """ - - def e1 = { - - val schema = parse( - """ - |{ - | "type": "string" - |} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(`type` = Some(CommonProperties.Type.String))) - } - - - def e2 = { - - val schema = parse( - """ - |{ - | "type": ["string", "null"] - |} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(`type` = Some(CommonProperties.Type.Union(Set(CommonProperties.Type.String, CommonProperties.Type.Null))))) - } - - def e3 = { - - val schema = parse( - """ - |{ - | "type": ["unknown", "string"], - | "format": "ipv4" - |} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(format = Some(Format.Ipv4Format))) - } - - def e4 = { - - val schema = parse( - """ - |{ - | "type": "object", - | "oneOf": [ - | - | { - | "properties": { - | "embedded": { - | "type": "object", - | "properties": { - | "path": { - | "type": "string" - | } - | }, - | "required": ["path"], - | "additionalProperties": false - | } - | }, - | "required": ["embedded"], - | "additionalProperties": false - | }, - | - | { - | "properties": { - | "http": { - | "type": "object", - | "properties": { - | "uri": { - | "type": "string", - | "format": "uri" - | }, - | "apikey": { - | "type": ["string", "null"] - | } - | }, - | "required": ["uri"], - | "additionalProperties": false - | } - | }, - | "required": ["http"], - | "additionalProperties": false - | } - | ] - |} - | - """.stripMargin - ) - - Schema.parse(schema).flatMap(_.oneOf) must beSome.like { - case oneOf => oneOf.value.length must beEqualTo(2) - } - } -} diff --git a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/NumberSpec.scala b/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/NumberSpec.scala deleted file mode 100644 index 8be65944..00000000 --- a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/NumberSpec.scala +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.jsonschema.json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.parse - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema - -import implicits._ - -// specs2 -import org.specs2.Specification - -class NumberSpec extends Specification { def is = s2""" - Check JSON Schema number-specific properties - correctly transform big BigInt to BigDecimal $e1 - correctly transform small BigInt to BigDecimal $e2 - correctly extract and compare number and integer equal values $e3 - don't extract non-numeric values (null) $e4 - """ - - def e1 = { - val json = parse( - """ - |{ - | "maximum": 9223372036854775807 - |} - """.stripMargin) - - Schema.parse(json).flatMap(_.maximum).map(_.getAsDecimal) must beSome(BigDecimal(9223372036854775807L)) - } - - def e2 = { - val json = parse( - """ - |{ - | "minimum": -9223372036854775806 - |} - """.stripMargin) - - Schema.parse(json).flatMap(_.minimum).map(_.getAsDecimal) must beSome(BigDecimal(-9223372036854775806L)) - } - - def e3 = { - val json = parse( - """ - |{ - | "minimum": 25, - | "maximum": 25.0 - |} - """.stripMargin) - - val minimum = Schema.parse(json).flatMap(_.minimum).map(_.getAsDecimal).get - val maximum = Schema.parse(json).flatMap(_.maximum).map(_.getAsDecimal).get - - minimum must beEqualTo(maximum) - } - - def e4 = { - val json = parse( - """ - |{ - | "minimum": null - |} - """.stripMargin) - - val minimum = Schema.parse(json).flatMap(_.minimum).map(_.getAsDecimal) - - minimum must beNone - } - -} diff --git a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ObjectSpec.scala b/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ObjectSpec.scala deleted file mode 100644 index 8ce1e0f8..00000000 --- a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/ObjectSpec.scala +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.jsonschema.json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.parse - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.StringProperty -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.ObjectProperty.{Properties, Required} - -import implicits._ - -// specs2 -import org.specs2.Specification - -class ObjectSpec extends Specification { def is = s2""" - Check JSON Schema object specification - parse object with empty properties $e1 - parse object with one property $e2 - parse object with several subschemas $e3 - parse object with required property $e4 - """ - - def e1 = { - - val schema = parse( - """ - |{ - | "properties": { - | } - |} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(properties = Some(Properties(Map.empty[String, Schema])))) - } - - def e2 = { - - val schema = parse( - """ - |{ - | "properties": { - | "key": {} - | } - |} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(properties = Some(Properties(Map("key" -> Schema()))))) - } - - - def e3 = { - - val schema = parse( - """ - |{ - | "properties": { - | "innerKey": { - | "minLength": 32 - | } - | } - |} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(properties = Some(Properties(Map("innerKey" -> Schema(minLength = Some(StringProperty.MinLength(32)))))))) - } - - def e4 = { - val schema = parse( - """ - |{ - | "required": ["one", "key", "23"] - |} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(required = Some(Required(List("one", "key", "23"))))) - } - -} diff --git a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/StringSpec.scala b/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/StringSpec.scala deleted file mode 100644 index 14d95986..00000000 --- a/modules/json4s/src/test/scala/com.snowplowanalytics.iglu.schemaddl/jsonschema/json4s/StringSpec.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.iglu.schemaddl.jsonschema.json4s - -// json4s -import org.json4s._ -import org.json4s.jackson.JsonMethods.parse - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema -import com.snowplowanalytics.iglu.schemaddl.jsonschema.ToSchema -import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.StringProperty.{Format, MaxLength, MinLength} - -import com.snowplowanalytics.iglu.schemaddl.jsonschema.json4s.implicits._ - -// specs2 -import org.specs2.Specification - -class StringSpec extends Specification { def is = s2""" - Check JSON Schema string specification - parse correct minLength $e1 - parse maxLength with ipv4 format $e2 - parse unknown format $e3 - """ - - def e1 = { - val schema = parse( - """ - |{"minLength": 32} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(minLength = Some(MinLength(32)))) - } - - def e2 = { - val schema = parse( - """ - |{"maxLength": 32, "format": "ipv4"} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(maxLength = Some(MaxLength(32)), format = Some(Format.Ipv4Format))) - } - - def e3 = { - implicitly[ToSchema[JValue]] - val schema = parse( - """ - |{"maxLength": 32, "format": "unknown"} - """.stripMargin) - - Schema.parse(schema) must beSome(Schema(maxLength = Some(MaxLength(32)), format = Some(Format.CustomFormat("unknown")))) - } -}