Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use 'Numeric' instead of 'Float' as BQ column type when field type in the json schema is number #162

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,27 +42,27 @@ object Field {
case Some(types) if types.possiblyWithNull(CommonProperties.Type.Object) =>
val subfields = topSchema.properties.map(_.value).getOrElse(Map.empty)
if (subfields.isEmpty) {
Suggestion.finalSuggestion(topSchema, required)(name)
Suggestion.finalSuggestion(topSchema, required )(name)
} else {
val requiredKeys = topSchema.required.toList.flatMap(_.value)
val fields = subfields.map { case (key, schema) =>
build(key, schema, requiredKeys.contains(key))
}
val subFields = fields.toList.sortBy(field => (Mode.sort(field.mode), field.name))
Field(name, Type.Record(subFields), Mode.required(required))
Field(name, Type.Record(subFields), Mode.required(required ))
}
case Some(types) if types.possiblyWithNull(CommonProperties.Type.Array) =>
topSchema.items match {
case Some(ArrayProperty.Items.ListItems(schema)) =>
build(name, schema, false).copy(mode = Mode.Repeated)
case _ =>
Suggestion.finalSuggestion(topSchema, required)(name)
Suggestion.finalSuggestion(topSchema, required )(name)
}
case _ =>
Suggestion.suggestions
.find(suggestion => suggestion(topSchema, required).isDefined)
.flatMap(_.apply(topSchema, required))
.getOrElse(Suggestion.finalSuggestion(topSchema, required))
.getOrElse(Suggestion.finalSuggestion(topSchema, required = false))
.apply(name)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ object Row {
value.asNumber.flatMap(_.toLong).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Float =>
value.asNumber.flatMap(_.toBigDecimal.map(_.bigDecimal)).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Numeric =>
case Type.Numeric(_, _) =>
value.asNumber.flatMap(_.toBigDecimal.map(_.bigDecimal)).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Timestamp =>
value.asString.fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
package com.snowplowanalytics.iglu.schemaddl.bigquery

import io.circe._

import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{CommonProperties, StringProperty}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.decimals
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{CommonProperties, StringProperty, NumberProperty}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.numericType.NullableWrapper

object Suggestion {

Expand All @@ -24,7 +25,7 @@ object Suggestion {
case Some(CommonProperties.Type.String) =>
Some(name => Field(name, Type.String, Mode.required(required)))
case Some(types) if types.nullable(CommonProperties.Type.String) =>
Some(name => Field(name, Type.String, Mode.Nullable) )
Some(name => Field(name, Type.String, Mode.Nullable))
case _ => None
}

Expand All @@ -40,19 +41,55 @@ object Suggestion {
val integerSuggestion: Suggestion = (schema, required) =>
schema.`type` match {
case Some(CommonProperties.Type.Integer) =>
Some(name => Field(name, Type.Integer, Mode.required(required)))
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Integer) =>
Some(name => Field(name, Type.Integer, Mode.Nullable))
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case _ => None
}

val numericSuggestion: Suggestion = (schema, required) => schema.`type` match {
case Some(CommonProperties.Type.Number) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.required(required)))
case None => None
}
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Number) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.Nullable))
case None => None
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you remove this entire block starting from line 59, and let everything fall through to the next case on line 67?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It won't fall though there, check the onlyNumeric implementation L180.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. OK how about this then:

case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Number) || onlyNumeric(types, true) =>

case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, true) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.Nullable))
case None => None
}
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.required(required)))
case None => None
}
case _ => None
}

val floatSuggestion: Suggestion = (schema, required) =>
schema.`type` match {
case Some(CommonProperties.Type.Number) =>
Some(name => Field(name, Type.Float, Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, true) =>
Some(name => Field(name, Type.Float, Mode.Nullable))
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
Some(name => Field(name, Type.Float, Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Number) =>
Some(name => Field(name, Type.Float, Mode.Nullable))
Expand All @@ -66,6 +103,15 @@ object Suggestion {
case _ => None
}

private def numericWithMultiple(mult: NumberProperty.MultipleOf.NumberMultipleOf,
maximum: Option[NumberProperty.Maximum],
minimum: Option[NumberProperty.Minimum]): Type =
Type.fromGenericType(decimals.numericWithMultiple(mult, maximum, minimum))


// (Field.JsonNullability.fromNullableWrapper)


// `date-time` format usually means zoned format, which corresponds to BQ Timestamp
val timestampSuggestion: Suggestion = (schema, required) =>
(schema.`type`, schema.format) match {
Expand Down Expand Up @@ -95,22 +141,34 @@ object Suggestion {
booleanSuggestion,
stringSuggestion,
integerSuggestion,
numericSuggestion,
floatSuggestion,
complexEnumSuggestion
)

private[iglu] def fromEnum(enums: List[Json], required: Boolean): String => Field = {
def isString(json: Json) = json.isString || json.isNull

def isInteger(json: Json) = json.asNumber.exists(_.toBigInt.isDefined) || json.isNull

def isNumeric(json: Json) = json.isNumber || json.isNull

val noNull: Boolean = !enums.contains(Json.Null)

if (enums.forall(isString)) {
name => Field(name, Type.String, Mode.required(required && noNull))
} else if (enums.forall(isInteger)) {
name => Field(name, Type.Integer, Mode.required(required && noNull))
} else if (enums.forall(isNumeric)) {
name => Field(name, Type.Float, Mode.required(required && noNull))
name =>
decimals.numericEnum(enums).map {
case NullableWrapper.NullableValue(t) => Field(name, Type.fromGenericType(t), Mode.required(required && noNull))
case NullableWrapper.NotNullValue(t) => Field(name, Type.fromGenericType(t), Mode.required(required && noNull))
} match {
case Some(value) => value
// Unreachable as `None` here would mean that some `enums.forall(isNumeric)` did not work.
case None => Field(name, Type.Float, Mode.required(required && noNull))
}
} else {
name => Field(name, Type.String, Mode.required(required && noNull))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,34 @@
*/
package com.snowplowanalytics.iglu.schemaddl.bigquery

import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.numericType._
/** BigQuery field type; "array" and "null" are expressed via `Mode` */
sealed trait Type extends Product with Serializable

object Type {
case object String extends Type

case object Boolean extends Type

case object Integer extends Type

case object Float extends Type
case object Numeric extends Type

case class Numeric(precision: Int, scale: Int) extends Type
istreeter marked this conversation as resolved.
Show resolved Hide resolved

case object Date extends Type

case object DateTime extends Type

case object Timestamp extends Type

case class Record(fields: List[Field]) extends Type

def fromGenericType(`type`: NumericType) = `type` match {
case NumericType.Double => Float
case NumericType.Int32 => Integer
case NumericType.Int64 => Integer
case NumericType.Decimal(precision, scale) => Numeric(precision, scale)
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion

import numericType._
import io.circe.Json
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.NumberProperty
import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema

private[schemaddl] object decimals {

def integerType(schema: Schema): NumericType =
(schema.minimum, schema.maximum) match {
case (Some(min), Some(max)) =>
val minDecimal = min.getAsDecimal
val maxDecimal = max.getAsDecimal
if (maxDecimal <= Int.MaxValue && minDecimal >= Int.MinValue) NumericType.Int32
else if (maxDecimal <= Long.MaxValue && minDecimal >= Long.MinValue) NumericType.Int64
else NumericType.Decimal(
(maxDecimal.precision - maxDecimal.scale).max(minDecimal.precision - minDecimal.scale), 0
)
case _ => NumericType.Int64
}

def numericWithMultiple(mult: NumberProperty.MultipleOf.NumberMultipleOf,
maximum: Option[NumberProperty.Maximum],
minimum: Option[NumberProperty.Minimum]): NumericType =
(maximum, minimum) match {
case (Some(max), Some(min)) =>
val topPrecision = max match {
case NumberProperty.Maximum.IntegerMaximum(max) =>
BigDecimal(max).precision + mult.value.scale
case NumberProperty.Maximum.NumberMaximum(max) =>
max.precision - max.scale + mult.value.scale
}
val bottomPrecision = min match {
case NumberProperty.Minimum.IntegerMinimum(min) =>
BigDecimal(min).precision + mult.value.scale
case NumberProperty.Minimum.NumberMinimum(min) =>
min.precision - min.scale + mult.value.scale
}

NumericType.Decimal(topPrecision.max(bottomPrecision), mult.value.scale)

case _ =>
NumericType.Double
}


def numericEnum(enums: List[Json]): Option[NullableWrapper] = {
def go(scale: Int, max: BigDecimal, nullable: Boolean, enums: List[Json]): Option[NullableWrapper] =
enums match {
case Nil =>
val t = if ((scale == 0) && (max <= Int.MaxValue)) NumericType.Int32
else if ((scale == 0) && (max <= Long.MaxValue)) NumericType.Int64
else NumericType.Decimal(max.precision - max.scale + scale, scale)

Some(if (nullable) NullableWrapper.NullableValue(t)
else NullableWrapper.NotNullValue(t))

case Json.Null :: tail => go(scale, max, true, tail)
case h :: tail =>
h.asNumber.flatMap(_.toBigDecimal) match {
case Some(bigDecimal) =>
val nextScale = scale.max(bigDecimal.scale)
val nextMax = (if (bigDecimal > 0) bigDecimal else -bigDecimal).max(max)
go(nextScale, nextMax, nullable, tail)
case None => None
}
}

go(0, 0, false, enums)
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion

private[schemaddl] object numericType {
sealed trait NumericType extends Product with Serializable

object NumericType {
case object Double extends NumericType

case object Int32 extends NumericType

case object Int64 extends NumericType

case class Decimal(precision: Int, scale: Int) extends NumericType
}

sealed trait NullableWrapper

object NullableWrapper {

case class NullableValue(value: NumericType) extends NullableWrapper

case class NotNullValue(value: NumericType) extends NullableWrapper

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
*/
package com.snowplowanalytics.iglu.schemaddl.parquet

import Type._
import com.snowplowanalytics.iglu.schemaddl.StringUtils
import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{ArrayProperty, CommonProperties}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.mutate.Mutate
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.numericType.NullableWrapper

case class Field(name: String,
fieldType: Type,
case class Field(name: String,
fieldType: Type,
nullability: Type.Nullability)

object Field {
Expand Down Expand Up @@ -67,8 +69,20 @@ object Field {

private[parquet] object JsonNullability {
case object ExplicitlyNullable extends JsonNullability

case object NoExplicitNull extends JsonNullability

def fromNullableWrapper(wrapper: NullableWrapper): NullableType = wrapper match {
case NullableWrapper.NullableValue(t) => NullableType(
value = fromGenericType(t),
nullability = JsonNullability.ExplicitlyNullable
)
case NullableWrapper.NotNullValue(t) => NullableType(
value = fromGenericType(t),
nullability = JsonNullability.NoExplicitNull
)
}

def extractFrom(`type`: CommonProperties.Type): JsonNullability = {
if (`type`.nullable) {
JsonNullability.ExplicitlyNullable
Expand All @@ -81,13 +95,13 @@ object Field {
topSchema.`type` match {
case Some(types) if types.possiblyWithNull(CommonProperties.Type.Object) =>
NullableType(
value = buildObjectType(topSchema),
value = buildObjectType(topSchema),
nullability = JsonNullability.extractFrom(types)
)

case Some(types) if types.possiblyWithNull(CommonProperties.Type.Array) =>
NullableType(
value = buildArrayType(topSchema),
NullableType(
value = buildArrayType(topSchema),
nullability = JsonNullability.extractFrom(types)
)

Expand Down
Loading