Skip to content

Commit

Permalink
Use 'Numeric' instead of 'Float' as BQ column type when field type in…
Browse files Browse the repository at this point in the history
… the json schema is number (close #112)
  • Loading branch information
voropaevp committed Apr 27, 2023
1 parent 539067b commit eb9089a
Show file tree
Hide file tree
Showing 10 changed files with 305 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ object Row {
value.asNumber.flatMap(_.toLong).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Float =>
value.asNumber.flatMap(_.toBigDecimal.map(_.bigDecimal)).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Numeric =>
case Type.Numeric(_, _) =>
value.asNumber.flatMap(_.toBigDecimal.map(_.bigDecimal)).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Timestamp =>
value.asString.fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
package com.snowplowanalytics.iglu.schemaddl.bigquery

import io.circe._

import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{CommonProperties, StringProperty}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.decimals
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{CommonProperties, StringProperty, NumberProperty}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.numericType.NullableWrapper

object Suggestion {

Expand All @@ -24,7 +25,7 @@ object Suggestion {
case Some(CommonProperties.Type.String) =>
Some(name => Field(name, Type.String, Mode.required(required)))
case Some(types) if types.nullable(CommonProperties.Type.String) =>
Some(name => Field(name, Type.String, Mode.Nullable) )
Some(name => Field(name, Type.String, Mode.Nullable))
case _ => None
}

Expand All @@ -40,19 +41,55 @@ object Suggestion {
val integerSuggestion: Suggestion = (schema, required) =>
schema.`type` match {
case Some(CommonProperties.Type.Integer) =>
Some(name => Field(name, Type.Integer, Mode.required(required)))
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Integer) =>
Some(name => Field(name, Type.Integer, Mode.Nullable))
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case _ => None
}

val numericSuggestion: Suggestion = (schema, required) => schema.`type` match {
case Some(CommonProperties.Type.Number) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.required(required)))
case None => None
}
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Number) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.Nullable))
case None => None
}
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, true) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.Nullable))
case None => None
}
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.required(required)))
case None => None
}
case _ => None
}

val floatSuggestion: Suggestion = (schema, required) =>
schema.`type` match {
case Some(CommonProperties.Type.Number) =>
Some(name => Field(name, Type.Float, Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, true) =>
Some(name => Field(name, Type.Float, Mode.Nullable))
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
Some(name => Field(name, Type.Float, Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Number) =>
Some(name => Field(name, Type.Float, Mode.Nullable))
Expand All @@ -66,6 +103,15 @@ object Suggestion {
case _ => None
}

private def numericWithMultiple(mult: NumberProperty.MultipleOf.NumberMultipleOf,
maximum: Option[NumberProperty.Maximum],
minimum: Option[NumberProperty.Minimum]): Type =
Type.fromGenericType(decimals.numericWithMultiple(mult, maximum, minimum))


// (Field.JsonNullability.fromNullableWrapper)


// `date-time` format usually means zoned format, which corresponds to BQ Timestamp
val timestampSuggestion: Suggestion = (schema, required) =>
(schema.`type`, schema.format) match {
Expand Down Expand Up @@ -95,22 +141,34 @@ object Suggestion {
booleanSuggestion,
stringSuggestion,
integerSuggestion,
numericSuggestion,
floatSuggestion,
complexEnumSuggestion
)

private[iglu] def fromEnum(enums: List[Json], required: Boolean): String => Field = {
def isString(json: Json) = json.isString || json.isNull

def isInteger(json: Json) = json.asNumber.exists(_.toBigInt.isDefined) || json.isNull

def isNumeric(json: Json) = json.isNumber || json.isNull

val noNull: Boolean = !enums.contains(Json.Null)

if (enums.forall(isString)) {
name => Field(name, Type.String, Mode.required(required && noNull))
} else if (enums.forall(isInteger)) {
name => Field(name, Type.Integer, Mode.required(required && noNull))
} else if (enums.forall(isNumeric)) {
name => Field(name, Type.Float, Mode.required(required && noNull))
name =>
decimals.numericEnum(enums).map {
case NullableWrapper.NullableValue(t) => Field(name, Type.fromGenericType(t), Mode.required(required && noNull))
case NullableWrapper.NotNullValue(t) => Field(name, Type.fromGenericType(t), Mode.required(required && noNull))
} match {
case Some(value) => value
// Unreachable as `None` here would mean that some `enums.forall(isNumeric)` did not work.
case None => Field(name, Type.Float, Mode.required(required && noNull))
}
} else {
name => Field(name, Type.String, Mode.required(required && noNull))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,34 @@
*/
package com.snowplowanalytics.iglu.schemaddl.bigquery

import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.numericType._
/** BigQuery field type; "array" and "null" are expressed via `Mode` */
sealed trait Type extends Product with Serializable

object Type {
case object String extends Type

case object Boolean extends Type

case object Integer extends Type

case object Float extends Type
case object Numeric extends Type

case class Numeric(precision: Int, scale: Int) extends Type

case object Date extends Type

case object DateTime extends Type

case object Timestamp extends Type

case class Record(fields: List[Field]) extends Type

def fromGenericType(`type`: NumericType) = `type` match {
case NumericType.Double => Float
case NumericType.Int32 => Integer
case NumericType.Int64 => Integer
case NumericType.Decimal(precision, scale) => Numeric(precision, scale)
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion

import numericType._
import io.circe.Json
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.NumberProperty
import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema

private[schemaddl] object decimals {

def integerType(schema: Schema): NumericType =
(schema.minimum, schema.maximum) match {
case (Some(min), Some(max)) =>
val minDecimal = min.getAsDecimal
val maxDecimal = max.getAsDecimal
if (maxDecimal <= Int.MaxValue && minDecimal >= Int.MinValue) NumericType.Int32
else if (maxDecimal <= Long.MaxValue && minDecimal >= Long.MinValue) NumericType.Int64
else NumericType.Decimal(
(maxDecimal.precision - maxDecimal.scale).max(minDecimal.precision - minDecimal.scale), 0
)
case _ => NumericType.Int64
}

def numericWithMultiple(mult: NumberProperty.MultipleOf.NumberMultipleOf,
maximum: Option[NumberProperty.Maximum],
minimum: Option[NumberProperty.Minimum]): NumericType =
(maximum, minimum) match {
case (Some(max), Some(min)) =>
val topPrecision = max match {
case NumberProperty.Maximum.IntegerMaximum(max) =>
BigDecimal(max).precision + mult.value.scale
case NumberProperty.Maximum.NumberMaximum(max) =>
max.precision - max.scale + mult.value.scale
}
val bottomPrecision = min match {
case NumberProperty.Minimum.IntegerMinimum(min) =>
BigDecimal(min).precision + mult.value.scale
case NumberProperty.Minimum.NumberMinimum(min) =>
min.precision - min.scale + mult.value.scale
}

NumericType.Decimal(topPrecision.max(bottomPrecision), mult.value.scale)

case _ =>
NumericType.Double
}


def numericEnum(enums: List[Json]): Option[NullableWrapper] = {
def go(scale: Int, max: BigDecimal, nullable: Boolean, enums: List[Json]): Option[NullableWrapper] =
enums match {
case Nil =>
val t = if ((scale == 0) && (max <= Int.MaxValue)) NumericType.Int32
else if ((scale == 0) && (max <= Long.MaxValue)) NumericType.Int64
else NumericType.Decimal(max.precision - max.scale + scale, scale)

Some(if (nullable) NullableWrapper.NullableValue(t)
else NullableWrapper.NotNullValue(t))

case Json.Null :: tail => go(scale, max, true, tail)
case h :: tail =>
h.asNumber.flatMap(_.toBigDecimal) match {
case Some(bigDecimal) =>
val nextScale = scale.max(bigDecimal.scale)
val nextMax = (if (bigDecimal > 0) bigDecimal else -bigDecimal).max(max)
go(nextScale, nextMax, nullable, tail)
case None => None
}
}

go(0, 0, false, enums)
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion

private[schemaddl] object numericType {
sealed trait NumericType extends Product with Serializable

object NumericType {
case object Double extends NumericType

case object Int32 extends NumericType

case object Int64 extends NumericType

case class Decimal(precision: Int, scale: Int) extends NumericType
}

sealed trait NullableWrapper

object NullableWrapper {

case class NullableValue(value: NumericType) extends NullableWrapper

case class NotNullValue(value: NumericType) extends NullableWrapper

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
*/
package com.snowplowanalytics.iglu.schemaddl.parquet

import Type._
import com.snowplowanalytics.iglu.schemaddl.StringUtils
import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{ArrayProperty, CommonProperties}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.mutate.Mutate
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.numericType.NullableWrapper

case class Field(name: String,
fieldType: Type,
case class Field(name: String,
fieldType: Type,
nullability: Type.Nullability)

object Field {
Expand Down Expand Up @@ -67,8 +69,20 @@ object Field {

private[parquet] object JsonNullability {
case object ExplicitlyNullable extends JsonNullability

case object NoExplicitNull extends JsonNullability

def fromNullableWrapper(wrapper: NullableWrapper): NullableType = wrapper match {
case NullableWrapper.NullableValue(t) => NullableType(
value = fromGenericType(t),
nullability = JsonNullability.ExplicitlyNullable
)
case NullableWrapper.NotNullValue(t) => NullableType(
value = fromGenericType(t),
nullability = JsonNullability.NoExplicitNull
)
}

def extractFrom(`type`: CommonProperties.Type): JsonNullability = {
if (`type`.nullable) {
JsonNullability.ExplicitlyNullable
Expand All @@ -81,13 +95,13 @@ object Field {
topSchema.`type` match {
case Some(types) if types.possiblyWithNull(CommonProperties.Type.Object) =>
NullableType(
value = buildObjectType(topSchema),
value = buildObjectType(topSchema),
nullability = JsonNullability.extractFrom(types)
)

case Some(types) if types.possiblyWithNull(CommonProperties.Type.Array) =>
NullableType(
value = buildArrayType(topSchema),
NullableType(
value = buildArrayType(topSchema),
nullability = JsonNullability.extractFrom(types)
)

Expand Down
Loading

0 comments on commit eb9089a

Please sign in to comment.