From a17d66f332a782a7e81a59f8d249237f3c29612c Mon Sep 17 00:00:00 2001
From: yauhen-sobaleu <yauhen.sobaleu@gmail.com>
Date: Tue, 25 Jul 2023 21:30:54 +0300
Subject: [PATCH] Use QUALIFY clause in `deduplicate` macro for Redshift (#811)

* refactor: Use QUALIFY syntax in deduplicate() macro for Redshift

* chore: Update changelog

---------

Co-authored-by: Yauhen Sobaleu <yauhen.sobaleu@adevinta.com>
---
 CHANGELOG.md               |  2 ++
 macros/sql/deduplicate.sql | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc0092d1..b7799278 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,9 +11,11 @@
 # Unreleased
 ## Fixes
 - deduplicate macro for Databricks now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic
+- deduplicate macro for Redshift now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic
 
 ## Contributors:
 [@graciegoheen](https://github.com/graciegoheen)
+[@yauhen-sobaleu](https://github.com/yauhen-sobaleu)
 
 # dbt utils v1.1.1
 ## New features
diff --git a/macros/sql/deduplicate.sql b/macros/sql/deduplicate.sql
index 8d372abe..3e75579c 100644
--- a/macros/sql/deduplicate.sql
+++ b/macros/sql/deduplicate.sql
@@ -29,10 +29,17 @@
 
 {%- endmacro -%}
 
-{# Redshift should use default instead of Postgres #}
+-- Redshift has the `QUALIFY` syntax:
+-- https://docs.aws.amazon.com/redshift/latest/dg/r_QUALIFY_clause.html
 {% macro redshift__deduplicate(relation, partition_by, order_by) -%}
 
-    {{ return(dbt_utils.default__deduplicate(relation, partition_by, order_by=order_by)) }}
+    select *
+    from {{ relation }} as tt
+    qualify
+        row_number() over (
+            partition by {{ partition_by }}
+            order by {{ order_by }}
+        ) = 1
 
 {% endmacro %}