From a17d66f332a782a7e81a59f8d249237f3c29612c Mon Sep 17 00:00:00 2001 From: yauhen-sobaleu Date: Tue, 25 Jul 2023 21:30:54 +0300 Subject: [PATCH] Use QUALIFY clause in `deduplicate` macro for Redshift (#811) * refactor: Use QUALIFY syntax in deduplicate() macro for Redshift * chore: Update changelog --------- Co-authored-by: Yauhen Sobaleu --- CHANGELOG.md | 2 ++ macros/sql/deduplicate.sql | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc0092d1..b7799278 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,11 @@ # Unreleased ## Fixes - deduplicate macro for Databricks now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic +- deduplicate macro for Redshift now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic ## Contributors: [@graciegoheen](https://github.com/graciegoheen) +[@yauhen-sobaleu](https://github.com/yauhen-sobaleu) # dbt utils v1.1.1 ## New features diff --git a/macros/sql/deduplicate.sql b/macros/sql/deduplicate.sql index 8d372abe..3e75579c 100644 --- a/macros/sql/deduplicate.sql +++ b/macros/sql/deduplicate.sql @@ -29,10 +29,17 @@ {%- endmacro -%} -{# Redshift should use default instead of Postgres #} +-- Redshift has the `QUALIFY` syntax: +-- https://docs.aws.amazon.com/redshift/latest/dg/r_QUALIFY_clause.html {% macro redshift__deduplicate(relation, partition_by, order_by) -%} - {{ return(dbt_utils.default__deduplicate(relation, partition_by, order_by=order_by)) }} + select * + from {{ relation }} as tt + qualify + row_number() over ( + partition by {{ partition_by }} + order by {{ order_by }} + ) = 1 {% endmacro %}