Skip to content

Commit

Permalink
Add option to use refer in channel classification if mkt is null (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
rlh1994 committed May 13, 2024
1 parent 63d8b16 commit fd7fee9
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 41 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
snowplow-unified 0.4.1 (2024-0X-XX)
---------------------------------------
## Summary
XXX

## Features
- New `snowplow__use_refr_if_mkt_null` variable to use `refr_` fields if `mkt_` ones are null in default channel group classification

## Fixes
- Fix an issue in the channel group classification where direct channels were sometimes ignored due to string checks

## Upgrading
Bump the snowplow-unified version in your `packages.yml` file.

snowplow-unified 0.4.0 (2024-03-25)
---------------------------------------
## Summary
Expand Down
1 change: 1 addition & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ vars:
snowplow__start_date: '2020-01-01'
# snowplow__total_all_conversions: false
snowplow__upsert_lookback_days: 30
snowplow__use_refr_if_mkt_null: false

# please refer to the macros within identifiers.sql for default values
snowplow__session_identifiers: []
Expand Down
126 changes: 87 additions & 39 deletions macros/field_definitions/channel_group_query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,26 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0


{% macro bigquery__channel_group_query() %}
{% set src_field %}
{% if var('snowplow__use_refr_if_mkt_null', false) %}
coalesce(mkt_source, refr_source)
{% else %}
mkt_source
{% endif %}
{% endset %}
{% set medium_field %}
{% if var('snowplow__use_refr_if_mkt_null', false) %}
coalesce(mkt_medium, refr_medium)
{% else %}
mkt_medium
{% endif %}
{% endset %}
{# Note that campaign has no equivalent in refer #}

case
when lower(trim(mkt_source)) = 'direct' and lower(trim(mkt_medium)) in ('not set', 'none') then 'Direct'
when lower(trim(mkt_medium)) like '%cross-network%' then 'Cross-network'
when regexp_contains(trim(mkt_medium), r'(?i)^(.*cp.*|ppc|retargeting|paid.*)$') then
when lower(trim({{ src_field }})) = 'direct' and lower(trim({{ medium_field }})) in ('not set', 'none') then 'Direct'
when lower(trim({{ medium_field }})) like '%cross-network%' then 'Cross-network'
when regexp_contains(trim({{ medium_field }}), r'(?i)^(.*cp.*|ppc|retargeting|paid.*)$') then
case
when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING'
or regexp_contains(trim(mkt_campaign), r'(?i)^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Paid Shopping'
Expand All @@ -23,28 +39,44 @@ case
when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' then 'Paid Video'
else 'Paid Other'
end
when lower(trim(mkt_medium)) in ('display', 'banner', 'expandable', 'interstitial', 'cpm') then 'Display'
when lower(trim({{ medium_field }})) in ('display', 'banner', 'expandable', 'interstitial', 'cpm') then 'Display'
when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING'
or regexp_contains(trim(mkt_campaign), r'(?i)^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Organic Shopping'
when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim(mkt_medium)) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social'
when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim({{ medium_field }})) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social'
when upper(source_category) = 'SOURCE_CATEGORY_VIDEO'
or regexp_contains(trim(mkt_medium), r'(?i)^(.*video.*)$') then 'Organic Video'
when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim(mkt_medium)) = 'organic' then 'Organic Search'
when lower(trim(mkt_medium)) in ('referral', 'app', 'link') then 'Referral'
when lower(trim(mkt_source)) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim(mkt_medium)) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email'
when lower(trim(mkt_medium)) = 'affiliate' then 'Affiliates'
when lower(trim(mkt_medium)) = 'audio' then 'Audio'
when lower(trim(mkt_source)) = 'sms' or lower(trim(mkt_medium)) = 'sms' then 'SMS'
when lower(trim(mkt_medium)) like '%push' or regexp_contains(trim(mkt_medium), r'(?i).*(mobile|notification).*') or lower(trim(mkt_source)) = 'firebase' then 'Mobile Push Notifications'
or regexp_contains(trim({{ medium_field }}), r'(?i)^(.*video.*)$') then 'Organic Video'
when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim({{ medium_field }})) = 'organic' then 'Organic Search'
when lower(trim({{ medium_field }})) in ('referral', 'app', 'link') then 'Referral'
when lower(trim({{ src_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim({{ medium_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email'
when lower(trim({{ medium_field }})) = 'affiliate' then 'Affiliates'
when lower(trim({{ medium_field }})) = 'audio' then 'Audio'
when lower(trim({{ src_field }})) = 'sms' or lower(trim({{ medium_field }})) = 'sms' then 'SMS'
when lower(trim({{ medium_field }})) like '%push' or regexp_contains(trim({{ medium_field }}), r'(?i).*(mobile|notification).*') or lower(trim({{ src_field }})) = 'firebase' then 'Mobile Push Notifications'
else 'Unassigned'
end
{% endmacro %}

{% macro default__channel_group_query() %}
{% set src_field %}
{% if var('snowplow__use_refr_if_mkt_null', false) %}
coalesce(mkt_source, refr_source)
{% else %}
mkt_source
{% endif %}
{% endset %}
{% set medium_field %}
{% if var('snowplow__use_refr_if_mkt_null', false) %}
coalesce(mkt_medium, refr_medium)
{% else %}
mkt_medium
{% endif %}
{% endset %}
{# Note that campaign has no equivalent in refer #}

case
when lower(trim(mkt_source)) = 'direct' and lower(trim(mkt_medium)) in ('not set', 'none') then 'Direct'
when lower(trim(mkt_medium)) like '%cross-network%' then 'Cross-network'
when regexp_like(lower(trim(mkt_medium)), '^(.*cp.*|ppc|retargeting|paid.*)$') then
when lower(trim({{ src_field }})) = 'direct' and lower(trim({{ medium_field }})) in ('not set', 'none') then 'Direct'
when lower(trim({{ medium_field }})) like '%cross-network%' then 'Cross-network'
when regexp_like(lower(trim({{ medium_field }})), '^(.*cp.*|ppc|retargeting|paid.*)$') then
case
when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING'
or regexp_like(lower(trim(mkt_campaign)), '^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Paid Shopping'
Expand All @@ -53,28 +85,44 @@ case
when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' then 'Paid Video'
else 'Paid Other'
end
when lower(trim(mkt_medium)) in ('display', 'banner', 'expandable', 'intersitial', 'cpm') then 'Display'
when lower(trim({{ medium_field }})) in ('display', 'banner', 'expandable', 'intersitial', 'cpm') then 'Display'
when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING'
or regexp_like(lower(trim(mkt_campaign)), '^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Organic Shopping'
when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim(mkt_medium)) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social'
when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim({{ medium_field }})) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social'
when upper(source_category) = 'SOURCE_CATEGORY_VIDEO'
or regexp_like(lower(trim(mkt_medium)), '^(.*video.*)$') then 'Organic Video'
when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim(mkt_medium)) = 'organic' then 'Organic Search'
when lower(trim(mkt_medium)) in ('referral', 'app', 'link') then 'Referral'
when lower(trim(mkt_source)) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim(mkt_medium)) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email'
when lower(trim(mkt_medium)) = 'affiliate' then 'Affiliates'
when lower(trim(mkt_medium)) = 'audio' then 'Audio'
when lower(trim(mkt_source)) = 'sms' or lower(trim(mkt_medium)) = 'sms' then 'SMS'
when lower(trim(mkt_medium)) like '%push' or regexp_like(lower(trim(mkt_medium)), '.*(mobile|notification).*') or lower(trim(mkt_source)) = 'firebase' then 'Mobile Push Notifications'
or regexp_like(lower(trim({{ medium_field }})), '^(.*video.*)$') then 'Organic Video'
when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim({{ medium_field }})) = 'organic' then 'Organic Search'
when lower(trim({{ medium_field }})) in ('referral', 'app', 'link') then 'Referral'
when lower(trim({{ src_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim({{ medium_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email'
when lower(trim({{ medium_field }})) = 'affiliate' then 'Affiliates'
when lower(trim({{ medium_field }})) = 'audio' then 'Audio'
when lower(trim({{ src_field }})) = 'sms' or lower(trim({{ medium_field }})) = 'sms' then 'SMS'
when lower(trim({{ medium_field }})) like '%push' or regexp_like(lower(trim({{ medium_field }})), '.*(mobile|notification).*') or lower(trim({{ src_field }})) = 'firebase' then 'Mobile Push Notifications'
else 'Unassigned'
end
{% endmacro %}

{% macro redshift__channel_group_query() %}
{% set src_field %}
{% if var('snowplow__use_refr_if_mkt_null', false) %}
coalesce(mkt_source, refr_source)
{% else %}
mkt_source
{% endif %}
{% endset %}
{% set medium_field %}
{% if var('snowplow__use_refr_if_mkt_null', false) %}
coalesce(mkt_medium, refr_medium)
{% else %}
mkt_medium
{% endif %}
{% endset %}
{# Note that campaign has no equivalent in refer #}

case
when lower(trim(mkt_source)) = 'direct' and lower(trim(mkt_medium)) in ('not set', 'none') then 'Direct'
when lower(trim(mkt_medium)) like '%cross-network%' then 'Cross-network'
when regexp_instr(lower(trim(mkt_medium)), '^(.*cp.*|ppc|retargeting|paid.*)$') then
when lower(trim({{ src_field }})) = 'direct' and lower(trim({{ medium_field }})) in ('not set', 'none') then 'Direct'
when lower(trim({{ medium_field }})) like '%cross-network%' then 'Cross-network'
when regexp_instr(lower(trim({{ medium_field }})), '^(.*cp.*|ppc|retargeting|paid.*)$') then
case
when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING'
or regexp_instr(lower(trim(mkt_campaign)), '^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Paid Shopping'
Expand All @@ -83,19 +131,19 @@ case
when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' then 'Paid Video'
else 'Paid Other'
end
when lower(trim(mkt_medium)) in ('display', 'banner', 'expandable', 'intersitial', 'cpm') then 'Display'
when lower(trim({{ medium_field }})) in ('display', 'banner', 'expandable', 'intersitial', 'cpm') then 'Display'
when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING'
or regexp_instr(lower(trim(mkt_campaign)), '^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Organic Shopping'
when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim(mkt_medium)) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social'
when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim({{ medium_field }})) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social'
when upper(source_category) = 'SOURCE_CATEGORY_VIDEO'
or regexp_instr(lower(trim(mkt_medium)), '^(.*video.*)$') then 'Organic Video'
when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim(mkt_medium)) = 'organic' then 'Organic Search'
when lower(trim(mkt_medium)) in ('referral', 'app', 'link') then 'Referral'
when lower(trim(mkt_source)) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim(mkt_medium)) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email'
when lower(trim(mkt_medium)) = 'affiliate' then 'Affiliates'
when lower(trim(mkt_medium)) = 'audio' then 'Audio'
when lower(trim(mkt_source)) = 'sms' or lower(trim(mkt_medium)) = 'sms' then 'SMS'
when lower(trim(mkt_medium)) like '%push' or regexp_instr(lower(trim(mkt_medium)), '.*(mobile|notification).*') or lower(trim(mkt_source)) = 'firebase' then 'Mobile Push Notifications'
or regexp_instr(lower(trim({{ medium_field }})), '^(.*video.*)$') then 'Organic Video'
when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim({{ medium_field }})) = 'organic' then 'Organic Search'
when lower(trim({{ medium_field }})) in ('referral', 'app', 'link') then 'Referral'
when lower(trim({{ src_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim({{ medium_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email'
when lower(trim({{ medium_field }})) = 'affiliate' then 'Affiliates'
when lower(trim({{ medium_field }})) = 'audio' then 'Audio'
when lower(trim({{ src_field }})) = 'sms' or lower(trim({{ medium_field }})) = 'sms' then 'SMS'
when lower(trim({{ medium_field }})) like '%push' or regexp_instr(lower(trim({{ medium_field }})), '.*(mobile|notification).*') or lower(trim({{ src_field }})) = 'firebase' then 'Mobile Push Notifications'
else 'Unassigned'
end
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,12 @@ with session_firsts as (

from {{ ref('snowplow_unified_events_this_run') }} ev
left join
{{ ref(var('snowplow__ga4_categories_seed')) }} c on lower(trim(ev.mkt_source)) = lower(c.source)
{{ ref(var('snowplow__ga4_categories_seed')) }} c on
{% if var('snowplow__use_refr_if_mkt_null', false) %}
lower(trim(coalesce(ev.mkt_source, ev.refr_source)) = lower(c.source)
{% else %}
lower(trim(ev.mkt_source)) = lower(c.source)
{% endif %}
left join
{{ ref(var('snowplow__rfc_5646_seed')) }} l on lower(ev.br_lang) = lower(l.lang_tag)
left join
Expand Down
7 changes: 6 additions & 1 deletion models/views/scratch/snowplow_unified_views_this_run.sql
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,12 @@ with prep as (

from {{ ref('snowplow_unified_events_this_run') }} as ev

left join {{ ref(var('snowplow__ga4_categories_seed')) }} c on lower(trim(ev.mkt_source)) = lower(c.source)
left join {{ ref(var('snowplow__ga4_categories_seed')) }} c on
{% if var('snowplow__use_refr_if_mkt_null', false) %}
lower(trim(coalesce(ev.mkt_source, ev.refr_source)) = lower(c.source)
{% else %}
lower(trim(ev.mkt_source)) = lower(c.source)
{% endif %}

where ev.event_name in ('page_view', 'screen_view')
and ev.view_id is not null
Expand Down

0 comments on commit fd7fee9

Please sign in to comment.