Skip to content

Commit

Permalink
Merge pull request #208 from dbt-labs/let-there-be-median
Browse files Browse the repository at this point in the history
Let There Be Median
  • Loading branch information
callum-mcdata authored Jan 11, 2023
2 parents 1f1100c + 3591522 commit 9eddb15
Show file tree
Hide file tree
Showing 29 changed files with 518 additions and 163 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20230109-145530.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Adding median
time: 2023-01-09T14:55:30.09271-06:00
custom:
Author: callum-mcdata
Issue: "180"
PR: "208"
2 changes: 1 addition & 1 deletion integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ version: "1.0.0"
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: "dbt_metrics_integration_tests_postgres"
profile: "dbt_metrics_integration_tests_bigquery"

model-paths: ["models"]
analysis-paths: ["analyses"]
Expand Down
14 changes: 14 additions & 0 deletions integration_tests/models/metric_definitions/base_median_metric.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version: 2
metrics:
- name: base_median_metric
model: ref('fact_orders')
label: Total Discount ($)
timestamp: order_date
time_grains: [day, week, month, all_time]
calculation_method: median
expression: discount_total
dimensions:
- had_discount
- order_country


Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
select *
from
{{ metrics.calculate(metric('base_median_metric'),
grain='month',
dimensions=['had_discount'])
}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
select *
from
{{ metrics.calculate(metric('base_median_metric'))
}}
2 changes: 1 addition & 1 deletion macros/calculate.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
{#- Need this here, since the actual ref is nested within loops/conditions: -#}
-- depends on: {{ ref(var('dbt_metrics_calendar_model', 'dbt_metrics_default_calendar')) }}

{# ############
{#- ############
VARIABLE SETTING - Creating the metric tree and making sure metric list is a list!
############ -#}

Expand Down
3 changes: 0 additions & 3 deletions macros/get_metric_sql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,10 @@ metrics there are -#}
start_date=start_date,
end_date=end_date)
}}

{#- Next we check if it is a composite metric or single metric by checking the length of the list -#}
{#- This filter forms the basis of how we construct the SQL -#}

{#- If composite, we begin by looping through each of the metric names that make
up the composite metric. -#}

{%- for metric_name in metric_tree["parent_set"] -%}

{{ metrics.build_metric_sql(
Expand Down
5 changes: 2 additions & 3 deletions macros/sql_gen/build_metric_sql.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{%- macro build_metric_sql(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, dimensions_provided, total_dimension_count) %}

{%- set treat_null_values_as_zero = metric_dictionary.get("config").get("treat_null_values_as_zero", True) -%}
{#- This is the SQL Gen part - we've broken each component out into individual macros -#}
{#- We broke this out so it can loop for composite metrics -#}
Expand Down Expand Up @@ -28,7 +28,6 @@

{%- endif -%}


{{ metrics.gen_spine_time_cte(
metric_name=metric_dictionary.name,
grain=grain,
Expand All @@ -53,4 +52,4 @@
treat_null_values_as_zero=treat_null_values_as_zero
)}}

{% endmacro -%}
{%- endmacro -%}
10 changes: 4 additions & 6 deletions macros/sql_gen/gen_aggregate_cte.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
and THEN aggregating, we are instead aggregating from the beginning and then
joining downstream for performance. Additionally, we're using a subquery instead
of a CTE, which was significantly more performant during our testing. -#}
{#- #}
select
{%- if grain %}
Expand Down Expand Up @@ -37,12 +38,11 @@

{%- if grain %}
{{ bool_or('metric_date_day is not null') }} as has_data,
{% endif %}
{%- endif %}

{#- This line performs the relevant aggregation by calling the
gen_primary_metric_aggregate macro. Take a look at that one if you're curious -#}
{{ metrics.gen_primary_metric_aggregate(metric_dictionary.calculation_method, 'property_to_aggregate') }} as {{ metric_dictionary.name }}
from ({{ metrics.gen_base_query(
metric_dictionary=metric_dictionary,
grain=grain,
Expand All @@ -57,11 +57,9 @@
) as base_query
where 1=1
{% if metric_dictionary.window is not none and grain %}
{%- if metric_dictionary.window is not none and grain %}
and date_{{grain}} = window_filter_date
{% endif %}
{%- endif %}
{{ metrics.gen_group_by(grain, dimensions, calendar_dimensions, relevant_periods) }}
)
Expand Down
43 changes: 14 additions & 29 deletions macros/sql_gen/gen_base_query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
{% endmacro %}

{% macro default__gen_base_query(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) %}

{# This is the "base" CTE which selects the fields we need to correctly
calculate the metric. #}
calculate the metric. -#}
select

{% if grain %}
{% if grain -%}
cast(base_model.{{metric_dictionary.timestamp}} as date) as metric_date_day,
calendar_table.date_{{ grain }} as date_{{grain}},
calendar_table.date_day as window_filter_date,
Expand All @@ -17,36 +15,23 @@
calendar_table.date_{{ period }},
{% endfor -%}
{%- endif -%}
{%- endif -%}

{% for dim in dimensions %}
{%- endif %}
{#- -#}
{%- for dim in dimensions -%}
base_model.{{ dim }},
{% endfor %}

{% for calendar_dim in calendar_dimensions %}
{%- endfor -%}
{%- for calendar_dim in calendar_dimensions -%}
calendar_table.{{ calendar_dim }},
{% endfor %}


{%- if metric_dictionary.expression and metric_dictionary.expression | replace('*', '') | trim != '' %}

({{ metric_dictionary.expression }}) as property_to_aggregate
{%- elif metric_dictionary.calculation_method == 'count' -%}
{# We use 1 as the property to aggregate in count so that it matches count(*) #}
1 as property_to_aggregate
{%- else -%}
{%- do exceptions.raise_compiler_error("Expression to aggregate is required for non-count aggregation in metric `" ~ metric_dictionary.name ~ "`") -%}
{%- endif %}


{%- endfor -%}
{{ metrics.gen_property_to_aggregate(metric_dictionary, grain, dimensions, calendar_dimensions) }}
from {{ metric_dictionary.metric_model }} base_model

{% if grain or calendar_dimensions|length > 0 %}
{# -#}
{%- if grain or calendar_dimensions|length > 0 -%}
{{ metrics.gen_calendar_table_join(metric_dictionary, calendar_tbl) }}
{% endif %}

{%- endif -%}
{# #}
where 1=1

{#- -#}
{{ metrics.gen_filters(metric_dictionary, start_date, end_date) }}

{%- endmacro -%}
3 changes: 1 addition & 2 deletions macros/sql_gen/gen_calendar_cte.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
{%- macro default__gen_calendar_cte(calendar_tbl, start_date, end_date) %}

with calendar as (

{# This CTE creates our base calendar and then limits the date range for the
start and end date provided by the macro call -#}
select
Expand All @@ -23,4 +22,4 @@ with calendar as (
{% endif %}
)

{% endmacro %}
{%- endmacro -%}
34 changes: 13 additions & 21 deletions macros/sql_gen/gen_calendar_table_join.sql
Original file line number Diff line number Diff line change
@@ -1,51 +1,43 @@
{% macro gen_calendar_table_join(metric_dictionary, calendar_tbl) %}
{{ return(adapter.dispatch('gen_calendar_table_join', 'metrics')(metric_dictionary, calendar_tbl)) }}
{% endmacro %}
{%- endmacro -%}

{% macro default__gen_calendar_table_join(metric_dictionary, calendar_tbl) %}

left join {{calendar_tbl}} calendar_table
{% if metric_dictionary.window is not none %}
{%- if metric_dictionary.window is not none %}
on cast(base_model.{{metric_dictionary.timestamp}} as date) > dateadd({{metric_dictionary.window.period}}, -{{metric_dictionary.window.count}}, calendar_table.date_day)
and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar_table.date_day
{% else %}
{%- else %}
on cast(base_model.{{metric_dictionary.timestamp}} as date) = calendar_table.date_day
{% endif %}

{% endif -%}
{% endmacro %}

{% macro bigquery__gen_calendar_table_join(metric_dictionary, calendar_tbl) %}

left join {{calendar_tbl}} calendar_table
{% if metric_dictionary.window is not none %}
{%- if metric_dictionary.window is not none %}
on cast(base_model.{{metric_dictionary.timestamp}} as date) > date_sub(calendar_table.date_day, interval {{metric_dictionary.window.count}} {{metric_dictionary.window.period}})
and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar_table.date_day
{% else %}
{%- else %}
on cast(base_model.{{metric_dictionary.timestamp}} as date) = calendar_table.date_day
{% endif %}

{% endif -%}
{% endmacro %}

{% macro postgres__gen_calendar_table_join(metric_dictionary, calendar_tbl) %}

left join {{calendar_tbl}} calendar_table
{% if metric_dictionary.window is not none %}
{%- if metric_dictionary.window is not none %}
on cast(base_model.{{metric_dictionary.timestamp}} as date) > calendar_table.date_day - interval '{{metric_dictionary.window.count}} {{metric_dictionary.window.period}}'
and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar_table.date_day
{% else %}
{%- else %}
on cast(base_model.{{metric_dictionary.timestamp}} as date) = calendar_table.date_day
{% endif %}

{% endif -%}
{% endmacro %}

{% macro redshift__gen_calendar_table_join(metric_dictionary, calendar_tbl) %}

left join {{calendar_tbl}} calendar_table
{% if metric_dictionary.window is not none %}
{%- if metric_dictionary.window is not none %}
on cast(base_model.{{metric_dictionary.timestamp}} as date) > dateadd({{metric_dictionary.window.period}}, -{{metric_dictionary.window.count}}, calendar_table.date_day)
and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar_table.date_day
{% else %}
{%- else %}
on cast(base_model.{{metric_dictionary.timestamp}} as date) = calendar_table.date_day
{% endif %}

{% endif -%}
{% endmacro %}
90 changes: 43 additions & 47 deletions macros/sql_gen/gen_joined_metrics_cte.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
{%- endfor -%}
{%- endif -%}

{% set dimension_count = (dimensions | length + calendar_dimensions | length) | int %}

{%- set dimension_count = (dimensions | length + calendar_dimensions | length) | int %}
, first_join_metrics as (

select
Expand Down Expand Up @@ -67,56 +66,55 @@
coalesce({{metric_name}},0) as {{metric_name}} {%- if not loop.last -%}, {%- endif -%}
{%- endif %}
{%- endfor %}

from
{#- Loop through leaf metric list -#}
{%- for metric_name in metric_tree.parent_set -%}
{%- if loop.first %}
{{ metric_name }}__final
{%- else %}
{%- if grain %}
full outer join {{metric_name}}__final
using (
date_{{grain}}
{%- for calendar_dim in calendar_dimensions %}
, {{ calendar_dim }}
{% endfor %}
{%- for dim in dimensions %}
, {{ dim }}
{%- endfor %}
)
{#- Loop through leaf metric list -#}
{% for metric_name in metric_tree.parent_set %}
{%- if loop.first %}
from {{ metric_name }}__final
{%- else %}
{%- if grain %}
full outer join {{metric_name}}__final
using (
date_{{grain}}
{%- for calendar_dim in calendar_dimensions %}
, {{ calendar_dim }}
{% endfor %}
{%- for dim in dimensions %}
, {{ dim }}
{%- endfor %}
)
{%- else -%}
{% if dimension_count != 0 %}
full outer join {{metric_name}}__final
using (
{%- for calendar_dim in calendar_dimensions -%}
{%- if not loop.first -%},{%- endif -%} {{ calendar_dim }}
{%- endfor -%}

{%- for dim in dimensions %}
{%- if loop.first and calendar_dimensions | length == 0 -%}
{{ dim }}
{%- elif not loop.first and calendar_dimensions | length == 0 -%}
, {{ dim }}
{%- else -%}
{% if dimension_count != 0 %}
full outer join {{metric_name}}__final
using (
{%- for calendar_dim in calendar_dimensions %}
{%- if not loop.first -%},{%- endif -%} {{ calendar_dim }}
{% endfor -%}

{%- for dim in dimensions %}
{%- if loop.first and calendar_dimensions | length == 0 -%}
{{ dim }}
{%- elif not loop.first and calendar_dimensions | length == 0 -%}
, {{ dim }}
{%- else -%}
, {{ dim }}
{%- endif -%}
{%- endfor -%}
)
{%- elif dimension_count == 0 %}
cross join {{metric_name}}__final
{%- endif %}
, {{ dim }}
{%- endif -%}
{%- endfor -%}
)
{%- elif dimension_count == 0 %}
cross join {{metric_name}}__final
{%- endif %}
{%- endif -%}
{%- endfor %}
{%- endif %}
{%- endif -%}
{%- endfor %}
{# #}
)

{%- for cte_number in cte_numbers | unique | sort %}
{% set previous_cte_number = cte_number - 1 %}
{% set previous_cte_number = cte_number - 1 %}
, join_metrics__{{cte_number}} as (

select
{% if loop.first %}
{%- if loop.first %}
first_join_metrics.*
{%- else %}
join_metrics__{{previous_cte_number}}.*
Expand All @@ -137,14 +135,12 @@
, ({{ expression | replace(".metric_value","") }}) as {{ metrics_dictionary[metric].name }}
{%- endif -%}
{%- endfor -%}

{% if loop.first %}
from first_join_metrics
{%- else %}
from join_metrics__{{previous_cte_number}}
{%- endif %}


{# #}
)

{%- endfor %}
Expand Down
Loading

0 comments on commit 9eddb15

Please sign in to comment.