Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Liquid Clustering config for table materialization #398

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,16 @@
- Fix issue where the show tables extended command is limited to 2048 characters. ([#326](https://github.com/databricks/dbt-databricks/pull/326))
- Extend python model support to cover the same config options as SQL ([#379](https://github.com/databricks/dbt-databricks/pull/379))

### Features

- Add `liquid_clustered_by` config to enable Liquid Clustering for Delta-based dbt models.

### Other

- Drop support for Python 3.7
- Support for revamped `dbt debug`


## dbt-databricks 1.5.5 (July 7, 2023)

### Fixes
Expand Down
1 change: 1 addition & 0 deletions dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class DatabricksConfig(AdapterConfig):
location_root: Optional[str] = None
partition_by: Optional[Union[List[str], str]] = None
clustered_by: Optional[Union[List[str], str]] = None
liquid_clustered_by: Optional[Union[List[str], str]] = None
buckets: Optional[int] = None
options: Optional[Dict[str, str]] = None
merge_update_columns: Optional[str] = None
Expand Down
20 changes: 20 additions & 0 deletions dbt/include/databricks/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,25 @@
{%- endif %}
{%- endmacro -%}

{% macro liquid_clustered_cols(label, required=false) -%}
{{ return(adapter.dispatch('liquid_clustered_cols', 'dbt')(label, required)) }}
{%- endmacro -%}

{% macro databricks__liquid_clustered_cols(label, required=false) -%}
{%- set cols = config.get('liquid_clustered_by', validator=validation.any[list, basestring]) -%}
{%- if cols is not none %}
{%- if cols is string -%}
{%- set cols = [cols] -%}
{%- endif -%}
{{ label }} (
{%- for item in cols -%}
{{ item }}
{%- if not loop.last -%},{%- endif -%}
{%- endfor -%}
)
{%- endif %}
{%- endmacro -%}


{% macro databricks__create_table_as(temporary, relation, compiled_code, language='sql') -%}
{%- if language == 'sql' -%}
Expand All @@ -62,6 +81,7 @@
{{ file_format_clause() }}
{{ options_clause() }}
{{ partition_cols(label="partitioned by") }}
{{ liquid_clustered_cols(label="cluster by") }}
{{ clustered_cols(label="clustered by") }}
{{ location_clause() }}
{{ comment_clause() }}
Expand Down
92 changes: 79 additions & 13 deletions tests/unit/macros/test_adapters_macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,26 @@ def test_macros_create_table_as_clusters(self):
"using delta clustered by (cluster_1,cluster_2) into 1 buckets as select 1",
)

def test_macros_create_table_as_liquid_cluster(self):
self.config["liquid_clustered_by"] = "cluster_1"
sql = self._render_create_table_as()

self.assertEqual(
sql,
"create or replace table my_table " "using delta cluster by (cluster_1) as select 1",
)

def test_macros_create_table_as_liquid_clusters(self):
self.config["liquid_clustered_by"] = ["cluster_1", "cluster_2"]
self.config["buckets"] = "1"
sql = self._render_create_table_as()

self.assertEqual(
sql,
"create or replace table my_table "
"using delta cluster by (cluster_1,cluster_2) as select 1",
)

def test_macros_create_table_as_location(self):
self.config["location_root"] = "/mnt/root"
sql = self._render_create_table_as()
Expand Down Expand Up @@ -140,6 +160,7 @@ def test_macros_create_table_as_tblproperties(self):
def test_macros_create_table_as_all_delta(self):
self.config["location_root"] = "/mnt/root"
self.config["partition_by"] = ["partition_1", "partition_2"]
self.config["liquid_clustered_by"] = ["cluster_1", "cluster_2"]
self.config["clustered_by"] = ["cluster_1", "cluster_2"]
self.config["buckets"] = "1"
self.config["persist_docs"] = {"relation": True}
Expand All @@ -154,6 +175,7 @@ def test_macros_create_table_as_all_delta(self):
"create or replace table my_table "
"using delta "
"partitioned by (partition_1,partition_2) "
"cluster by (cluster_1,cluster_2) "
"clustered by (cluster_1,cluster_2) into 1 buckets "
"location '/mnt/root/my_table' "
"comment 'Description Test' "
Expand Down Expand Up @@ -434,15 +456,22 @@ def __model(self):
def test_macros_get_constraint_sql_not_null_with_columns(self):
model = self.__model()
r = self.__render_constraint_sql({"type": "not_null", "columns": ["id", "name"]}, model)
expected = "['alter table `some_database`.`some_schema`.`some_table` change column id set not null ;', 'alter table `some_database`.`some_schema`.`some_table` change column name set not null ;']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` change column id "
"set not null ;', 'alter table `some_database`.`some_schema`.`some_table` "
"change column name set not null ;']"
) # noqa: E501

assert expected in r

def test_macros_get_constraint_sql_not_null_with_column(self):
model = self.__model()
r = self.__render_constraint_sql({"type": "not_null"}, model, model["columns"]["id"])

expected = "['alter table `some_database`.`some_schema`.`some_table` change column id set not null ;']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` change column id "
"set not null ;']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_not_null_mismatched_columns(self):
Expand All @@ -451,7 +480,10 @@ def test_macros_get_constraint_sql_not_null_mismatched_columns(self):
{"type": "not_null", "columns": ["name"]}, model, model["columns"]["id"]
)

expected = "['alter table `some_database`.`some_schema`.`some_table` change column name set not null ;']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` change column name "
"set not null ;']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_check(self):
Expand All @@ -464,7 +496,10 @@ def test_macros_get_constraint_sql_check(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint check (id != name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint check (id != name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_check_named_constraint(self):
Expand All @@ -476,7 +511,10 @@ def test_macros_get_constraint_sql_check_named_constraint(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint check (id != name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint check (id != name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_check_none_constraint(self):
Expand All @@ -487,7 +525,10 @@ def test_macros_get_constraint_sql_check_none_constraint(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint None check (id != name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint None "
"check (id != name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_check_missing_expression(self):
Expand All @@ -509,7 +550,10 @@ def test_macros_get_constraint_sql_primary_key(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint primary key(name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_primary_key_with_specified_column(self):
Expand All @@ -522,7 +566,10 @@ def test_macros_get_constraint_sql_primary_key_with_specified_column(self):
column = {"name": "id"}
r = self.__render_constraint_sql(constraint, model, column)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint primary key(name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_primary_key_with_name(self):
Expand All @@ -534,7 +581,10 @@ def test_macros_get_constraint_sql_primary_key_with_name(self):
column = {"name": "id"}
r = self.__render_constraint_sql(constraint, model, column)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(id);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint primary key(id);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_foreign_key(self):
Expand All @@ -547,7 +597,11 @@ def test_macros_get_constraint_sql_foreign_key(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name) references some_schema.parent_table;']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add "
"constraint myconstraint foreign key(name) references "
"some_schema.parent_table;']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_foreign_key_parent_column(self):
Expand All @@ -561,7 +615,11 @@ def test_macros_get_constraint_sql_foreign_key_parent_column(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name) references some_schema.parent_table(parent_name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add "
"constraint myconstraint foreign key(name) references "
"some_schema.parent_table(parent_name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_foreign_key_multiple_columns(self):
Expand All @@ -575,7 +633,11 @@ def test_macros_get_constraint_sql_foreign_key_multiple_columns(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name, id) references some_schema.parent_table(parent_name, parent_id);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint foreign key(name, id) "
"references some_schema.parent_table(parent_name, parent_id);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_foreign_key_columns_supplied_separately(self):
Expand All @@ -589,5 +651,9 @@ def test_macros_get_constraint_sql_foreign_key_columns_supplied_separately(self)
column = {"name": "id"}
r = self.__render_constraint_sql(constraint, model, column)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(id) references some_schema.parent_table(parent_name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint foreign key(id) references "
"some_schema.parent_table(parent_name);']"
) # noqa: E501
assert expected in r
Loading