diff --git a/config/README.md b/config/README.md new file mode 100644 index 00000000..e69de29b diff --git a/config/external_config.json b/config/external_config.json new file mode 100644 index 00000000..1b3c3ad5 --- /dev/null +++ b/config/external_config.json @@ -0,0 +1,171 @@ +{ + "id": "dbt-common/external-catalog-config-v0", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "..", + "title": "External Catalog Config", + "type": "object", + "required": [ + "catalogs" + ], + "properties": { + "catalogs": { + "type": "array", + "items": { + "$ref": "#/$defs/externalCatalog" + } + } + }, + "$defs": { + "icebergConfiguration": { + "type": "object", + "required": [ + "table_format", + "external_location", + "internal_namespace", + "location" + ], + "properties": { + "table_format": { + "type": "string", + "description": "The table format", + "default": "iceberg" + }, + "catalog_namespace": { + "type": "string", + "description": "The namespace", + "default": "dbt" + }, + "internal_namespace": { + "type": "object", + "required": [ + "database", + "schema" + ], + "properties": { + "database": { + "type": "string", + "description": "The database" + }, + "schema": { + "type": "string", + "description": "The schema" + } + } + }, + "external_volume": { + "type": "object", + "required": [ + "type", + "path" + ], + "properties": { + "external_location": { + "type": "string", + "description": "The external location URI", + "format": "uri" + }, + "name": { + "type": "string", + "description": "The name of the volume" + } + } + } + } + }, + "glueConfiguration": { + "type": "object", + "required": [ + "table_format", + "external_location", + "location" + ], + "properties": { + "table_format": { + "type": "string", + "description": "The table format" + }, + "namespace": { + "type": "string", + "description": "The namespace", + "default": "dbt" + }, + "external_location": { + "type": "string", + "description": "The external location", + "format": "uri" + }, + "aws_account_id": { + "type": "string", + "description": "The AWS account ID" + }, + "role_arn": { + "type": "string", + "description": "The role ARN" + } + } + }, + "management": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "default": true, + "description": "Whether management is enabled" + }, + "create_if_not_exists": { + "type": "boolean", + "default": false, + "description": "Whether to create the external catalog if it does not exist" + }, + "alter_if_different": { + "type": "boolean", + "default": false, + "description": "Whether to alter the external catalog if it exists" + }, + "read_only": { + "type": "boolean", + "default": true, + "description": "Whether the external catalog is read-only" + }, + "refresh": { + "type": "string", + "enum": [ + "on-start", + "never", + "just-in-time" + ], + "default": "on-start", + "description": "Whether to refresh the external catalog" + } + } + }, + "externalCatalog": { + "type": "object", + "required": [ + "type", + "name", + "configuration", + "management" + ], + "properties": { + "name": { + "type": "string", + "description": "The name of the external catalog" + }, + "type": { + "enum": [ + "iceberg", + "glue" + ] + }, + "configuration": { + "type": "object", + "oneOf": [{"$ref": "#/$defs/icebergConfiguration"}, {"$ref": "#/$defs/glueConfiguration"}] + }, + "management": { + "$ref": "#/$defs/management" + } + } + } +} +} \ No newline at end of file diff --git a/config/pyproject.toml b/config/pyproject.toml new file mode 100644 index 00000000..83fdd0db --- /dev/null +++ b/config/pyproject.toml @@ -0,0 +1,172 @@ +[project] +name = "dbt-config" +dynamic = ["version"] +description = "The shared configuration interfaces" +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +keywords = [] +authors = [ + { name = "dbt Labs", email = "info@dbtlabs.com" }, +] +maintainers = [ + { name = "dbt Labs", email = "info@dbtlabs.com" }, +] +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "License :: OSI Approved :: Apache Software License", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "pydantic" +] + +[project.optional-dependencies] +lint = [ + "black>=23.3,<24.0", + "flake8", + "flake8-pyproject", + "flake8-docstrings", + "mypy>=1.3,<2.0", + "pytest>=7.3,<8.0", # needed for linting tests + "types-Jinja2>=2.11,<3.0", + "types-jsonschema>=4.17,<5.0", + "types-protobuf>=4.24,<5.0", + "types-python-dateutil>=2.8,<3.0", + "types-PyYAML>=6.0,<7.0", + "types-requests" +] +test = [ + "pytest>=7.3,<8.0", + "pytest-mock", + "pytest-xdist>=3.2,<4.0", + "pytest-cov>=4.1,<5.0", + "hypothesis>=6.87,<7.0", +] +build = [ + "wheel", + "twine", + "check-wheel-contents", +] + +[project.urls] +Homepage = "https://github.com/dbt-labs/dbt-common/config/" +Repository = "https://github.com/dbt-labs/dbt-common.git" +Issues = "https://github.com/dbt-labs/dbt-common/issues" +Changelog = "https://github.com/dbt-labs/dbt-common/blob/main/CHANGELOG.md" + +[tool.hatch.version] +path = "src/dbt_config/__about__.py" + +### Default env & scripts + +[tool.hatch.envs.default] +description = "Default environment with dependencies for running dbt-config" +features = ["lint", "test"] + +### Test settings, envs & scripts +[tool.hatch.envs.test] +description = "Env for running development commands for testing" +features = ["test"] + +[tool.hatch.envs.test.scripts] +unit = "python -m pytest --cov=dbt_config --cov-report=xml test/" + +### Linting settings, envs & scripts + +[tool.hatch.envs.lint] +type = "virtual" +description = "Env for running development commands for linting" +features = ["lint"] + +[tool.hatch.envs.lint.scripts] +all = [ + "- black", + "- flake8", + "- mypy", +] +black = "python -m black ." +flake8 = "python -m flake8 ." +mypy = "python -m mypy ." + +[tool.black] +line-length = 99 +target-version = ['py38'] + +[tool.flake8] +max-line-length = 99 +select = ["E", "W", "F"] +ignore = ["E203", "E501", "E741", "W503", "W504"] +exclude = [ + "venv", + ".venv", + "env*", + ".hatch/*", +] +per-file-ignores = ["*/__init__.py: F401", "*/conftest.py: F401"] +docstring-convention = "google" + +[tool.mypy] +mypy_path = "third-party-stubs/" +namespace_packages = true +warn_unused_configs = true +show_error_codes = true +disable_error_code = "attr-defined" # TODO: revisit once other mypy errors resolved +disallow_untyped_defs = false # TODO: add type annotations everywhere +warn_redundant_casts = true +ignore_missing_imports = true +exclude = [ + "env*", + "third-party-stubs/*", +] + + +### Build settings, envs & scripts + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.sdist] +exclude = [ + "/.github", + "/.changes", + ".changie.yaml", + ".gitignore", + ".pre-commit-config.yaml", + "CONTRIBUTING.md", + "/test", +] + +[tool.hatch.build.targets.wheel] + +[tool.hatch.envs.build] +description = "Env for running development commands for linting" +features = ["build"] +packages = ["src/dbt_config"] + +[tool.hatch.envs.build.scripts] +check-all = [ + "- check-wheel", + "- check-sdist", +] +check-wheel = [ + "twine check dist/*", + "find ./dist/dbt_config-*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/", + "pip freeze | grep dbt-config", +] +check-sdist = [ + "check-wheel-contents dist/*.whl --ignore W007,W008", + "find ./dist/dbt_config-*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/", + "pip freeze | grep dbt-config", +] diff --git a/config/src/dbt_config/__about__.py b/config/src/dbt_config/__about__.py new file mode 100644 index 00000000..06fbe7e6 --- /dev/null +++ b/config/src/dbt_config/__about__.py @@ -0,0 +1 @@ +version = "0.0.1" \ No newline at end of file diff --git a/config/src/dbt_config/__init__.py b/config/src/dbt_config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/config/src/dbt_config/catalog_config.py b/config/src/dbt_config/catalog_config.py new file mode 100644 index 00000000..01f10b89 --- /dev/null +++ b/config/src/dbt_config/catalog_config.py @@ -0,0 +1,73 @@ +# generated by datamodel-codegen: +# filename: external_config.json +# timestamp: 2024-10-11T03:41:52+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import List, Optional, Union + +from pydantic import AnyUrl, BaseModel, Field + + +class InternalNamespace(BaseModel): + database: str = Field(..., description='The database') + schema_: str = Field(..., alias='schema', description='The schema') + + +class ExternalVolume(BaseModel): + external_location: Optional[AnyUrl] = Field(None, description='The external location') + name: Optional[str] = Field(None, description='The name of the volume') + + +class IcebergConfiguration(BaseModel): + table_format: str = Field(..., description='The table format') + catalog_namespace: Optional[str] = Field('dbt', description='The namespace') + internal_namespace: InternalNamespace + external_volume: Optional[ExternalVolume] = None + + +class GlueConfiguration(BaseModel): + table_format: str = Field(..., description='The table format') + namespace: Optional[str] = Field('dbt', description='The namespace') + external_location: AnyUrl = Field(..., description='The external location') + aws_account_id: Optional[str] = Field(None, description='The AWS account ID') + role_arn: Optional[str] = Field(None, description='The role ARN') + + +class Refresh(Enum): + on_start = 'on-start' + never = 'never' + just_in_time = 'just-in-time' + + +class Management(BaseModel): + enabled: Optional[bool] = Field(True, description='Whether management is enabled') + create_if_not_exists: Optional[bool] = Field( + False, description='Whether to create the external catalog if it does not exist' + ) + alter_if_different: Optional[bool] = Field( + False, description='Whether to alter the external catalog if it exists' + ) + read_only: Optional[bool] = Field( + True, description='Whether the external catalog is read-only' + ) + refresh: Optional[Refresh] = Field( + 'on-start', description='Whether to refresh the external catalog' + ) + + +class Type(Enum): + iceberg = 'iceberg' + glue = 'glue' + + +class ExternalCatalog(BaseModel): + name: str = Field(..., description='The name of the external catalog') + type: Type + configuration: Union[IcebergConfiguration, GlueConfiguration] + management: Management + + +class ExternalCatalogConfig(BaseModel): + catalogs: List[ExternalCatalog] diff --git a/config/src/dbt_config/py.typed b/config/src/dbt_config/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/config/test/test_external_config.py b/config/test/test_external_config.py new file mode 100644 index 00000000..53ed56cd --- /dev/null +++ b/config/test/test_external_config.py @@ -0,0 +1,37 @@ +import yaml + +from dbt_config.catalog_config import ExternalCatalogConfig + +__EXAMPLE_VALID_CONFIG = """ +catalogs: # list of objects + - name: "titanic" # p0 name of the catalog + type: iceberg # p0 + management: # Not P0, this governs how dbt manages the catalog integration + enabled: True # p0 + create_if_not_exists: True # we will likely default this to false as it typically requires admin privileges + alter_if_different: False + refresh: "always" #oneOf: "never"|"on-run-start" + configuration: + table_format: "iceberg" # p0 delta/hudi etc + namespace: "default" + external_location: 'azfs://external-location-bucket-path/directory' + + - name: "elmers" + type: glue + management: # Not P0, this governs how dbt manages the catalog integration + create_if_not_exists: True + alter_if_different: False + read_only: True # if we try to persist a model here dbt raises an exception + configuration: + namespace: "awsdatacatalog" + external_location: 's3://external-location-bucket-path/directory' + aws_account_id: "123456089" + role_arn: "someRole" + table_format: "iceberg" +""" + + +def test_parse_external_config(): + unparsed_config = yaml.safe_load(__EXAMPLE_VALID_CONFIG) + config = ExternalCatalogConfig.model_validate(unparsed_config) + assert config.catalogs[0].name == "titanic" diff --git a/dbt_common/config/__init__.py b/dbt_common/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dbt_common/events/common_types_pb2.py b/dbt_common/events/common_types_pb2.py new file mode 100644 index 00000000..aa03438c --- /dev/null +++ b/dbt_common/events/common_types_pb2.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: common_types.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x12\x63ommon_types.proto\x12\x0bproto_types\x1a\x1fgoogle/protobuf/timestamp.proto"\x91\x02\n\tEventInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\t\x12\x0b\n\x03msg\x18\x03 \x01(\t\x12\r\n\x05level\x18\x04 \x01(\t\x12\x15\n\rinvocation_id\x18\x05 \x01(\t\x12\x0b\n\x03pid\x18\x06 \x01(\x05\x12\x0e\n\x06thread\x18\x07 \x01(\t\x12&\n\x02ts\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x30\n\x05\x65xtra\x18\t \x03(\x0b\x32!.proto_types.EventInfo.ExtraEntry\x12\x10\n\x08\x63\x61tegory\x18\n \x01(\t\x1a,\n\nExtraEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"6\n\x0eGenericMessage\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo"1\n\x11RetryExternalCall\x12\x0f\n\x07\x61ttempt\x18\x01 \x01(\x05\x12\x0b\n\x03max\x18\x02 \x01(\x05"j\n\x14RetryExternalCallMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12,\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x1e.proto_types.RetryExternalCall"#\n\x14RecordRetryException\x12\x0b\n\x03\x65xc\x18\x01 \x01(\t"p\n\x17RecordRetryExceptionMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12/\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32!.proto_types.RecordRetryException"@\n\x13SystemCouldNotWrite\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\x0e\n\x06reason\x18\x02 \x01(\t\x12\x0b\n\x03\x65xc\x18\x03 \x01(\t"n\n\x16SystemCouldNotWriteMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12.\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32 .proto_types.SystemCouldNotWrite"!\n\x12SystemExecutingCmd\x12\x0b\n\x03\x63md\x18\x01 \x03(\t"l\n\x15SystemExecutingCmdMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12-\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x1f.proto_types.SystemExecutingCmd"\x1c\n\x0cSystemStdOut\x12\x0c\n\x04\x62msg\x18\x01 \x01(\t"`\n\x0fSystemStdOutMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12\'\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x19.proto_types.SystemStdOut"\x1c\n\x0cSystemStdErr\x12\x0c\n\x04\x62msg\x18\x01 \x01(\t"`\n\x0fSystemStdErrMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12\'\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x19.proto_types.SystemStdErr",\n\x16SystemReportReturnCode\x12\x12\n\nreturncode\x18\x01 \x01(\x05"t\n\x19SystemReportReturnCodeMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12\x31\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32#.proto_types.SystemReportReturnCode"\x19\n\nFormatting\x12\x0b\n\x03msg\x18\x01 \x01(\t"\\\n\rFormattingMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12%\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x17.proto_types.Formatting"\x13\n\x04Note\x12\x0b\n\x03msg\x18\x01 \x01(\t"P\n\x07NoteMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12\x1f\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x11.proto_types.Noteb\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "common_types_pb2", _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + _EVENTINFO_EXTRAENTRY._options = None + _EVENTINFO_EXTRAENTRY._serialized_options = b"8\001" + _globals["_EVENTINFO"]._serialized_start = 69 + _globals["_EVENTINFO"]._serialized_end = 342 + _globals["_EVENTINFO_EXTRAENTRY"]._serialized_start = 298 + _globals["_EVENTINFO_EXTRAENTRY"]._serialized_end = 342 + _globals["_GENERICMESSAGE"]._serialized_start = 344 + _globals["_GENERICMESSAGE"]._serialized_end = 398 + _globals["_RETRYEXTERNALCALL"]._serialized_start = 400 + _globals["_RETRYEXTERNALCALL"]._serialized_end = 449 + _globals["_RETRYEXTERNALCALLMSG"]._serialized_start = 451 + _globals["_RETRYEXTERNALCALLMSG"]._serialized_end = 557 + _globals["_RECORDRETRYEXCEPTION"]._serialized_start = 559 + _globals["_RECORDRETRYEXCEPTION"]._serialized_end = 594 + _globals["_RECORDRETRYEXCEPTIONMSG"]._serialized_start = 596 + _globals["_RECORDRETRYEXCEPTIONMSG"]._serialized_end = 708 + _globals["_SYSTEMCOULDNOTWRITE"]._serialized_start = 710 + _globals["_SYSTEMCOULDNOTWRITE"]._serialized_end = 774 + _globals["_SYSTEMCOULDNOTWRITEMSG"]._serialized_start = 776 + _globals["_SYSTEMCOULDNOTWRITEMSG"]._serialized_end = 886 + _globals["_SYSTEMEXECUTINGCMD"]._serialized_start = 888 + _globals["_SYSTEMEXECUTINGCMD"]._serialized_end = 921 + _globals["_SYSTEMEXECUTINGCMDMSG"]._serialized_start = 923 + _globals["_SYSTEMEXECUTINGCMDMSG"]._serialized_end = 1031 + _globals["_SYSTEMSTDOUT"]._serialized_start = 1033 + _globals["_SYSTEMSTDOUT"]._serialized_end = 1061 + _globals["_SYSTEMSTDOUTMSG"]._serialized_start = 1063 + _globals["_SYSTEMSTDOUTMSG"]._serialized_end = 1159 + _globals["_SYSTEMSTDERR"]._serialized_start = 1161 + _globals["_SYSTEMSTDERR"]._serialized_end = 1189 + _globals["_SYSTEMSTDERRMSG"]._serialized_start = 1191 + _globals["_SYSTEMSTDERRMSG"]._serialized_end = 1287 + _globals["_SYSTEMREPORTRETURNCODE"]._serialized_start = 1289 + _globals["_SYSTEMREPORTRETURNCODE"]._serialized_end = 1333 + _globals["_SYSTEMREPORTRETURNCODEMSG"]._serialized_start = 1335 + _globals["_SYSTEMREPORTRETURNCODEMSG"]._serialized_end = 1451 + _globals["_FORMATTING"]._serialized_start = 1453 + _globals["_FORMATTING"]._serialized_end = 1478 + _globals["_FORMATTINGMSG"]._serialized_start = 1480 + _globals["_FORMATTINGMSG"]._serialized_end = 1572 + _globals["_NOTE"]._serialized_start = 1574 + _globals["_NOTE"]._serialized_end = 1593 + _globals["_NOTEMSG"]._serialized_start = 1595 + _globals["_NOTEMSG"]._serialized_end = 1675 +# @@protoc_insertion_point(module_scope)