From 05acd648872af0677d46cb107e4d326d38757bf7 Mon Sep 17 00:00:00 2001 From: Nikolaos Veneti Date: Sun, 24 Nov 2019 19:02:14 +0100 Subject: [PATCH] find array type recursively & nulls always [0] --- target_bigquery/schema.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/target_bigquery/schema.py b/target_bigquery/schema.py index 503a8cd..c5a23e2 100644 --- a/target_bigquery/schema.py +++ b/target_bigquery/schema.py @@ -1,6 +1,28 @@ from google.cloud.bigquery import SchemaField +def defineArrayType(field, name): + schema_type = field.get("items").get("type") + schema_mode = "REPEATED" + schema_description = None + schema_fields = () + + if schema_type == 'array': + return defineArrayType(field['items'], name) + if isinstance(schema_type, list): + if "null" in schema_type and schema_type.index('null') != 0: + schema_type.remove('null') + schema_type.insert(0, 'null') + schema_type = schema_type[-1] + else: + schema_type = schema_type[-1] + if schema_type == "object": + schema_type = "RECORD" + schema_fields = tuple(build_schema(field.get("items"))) + + return (name, schema_type, schema_mode, schema_description, schema_fields) + + def define_schema(field, name): schema_name = name schema_type = "STRING" @@ -22,23 +44,14 @@ def define_schema(field, name): types.remove("null") else: schema_mode = "required" - single_type = list(types) - schema_type = single_type[-1] else: schema_type = field["type"] + if schema_type == "object": schema_type = "RECORD" schema_fields = tuple(build_schema(field)) - if schema_type == "array": - schema_type = field.get("items").get("type") - if isinstance(schema_type, list): - schema_type = schema_type[-1] - schema_mode = "REPEATED" - if schema_type == "object": - schema_type = "RECORD" - schema_fields = tuple(build_schema(field.get("items"))) if schema_type == "string": if "format" in field: @@ -50,6 +63,9 @@ def define_schema(field, name): if schema_type == "number": schema_type = "FLOAT" + if schema_type == "array": + return defineArrayType(field, name) + return (schema_name, schema_type, schema_mode, schema_description, schema_fields)