Skip to content

Commit

Permalink
Merge pull request #45 from ddxv/adtech-categories
Browse files Browse the repository at this point in the history
Adtech categories
  • Loading branch information
ddxv authored Mar 24, 2024
2 parents 42a1b0c + 8640464 commit 2c7a335
Show file tree
Hide file tree
Showing 21 changed files with 294 additions and 186 deletions.
69 changes: 59 additions & 10 deletions backend/api_app/controllers/companies.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from typing import Self

import pandas as pd
from litestar import Controller, get
from litestar.exceptions import NotFoundException

Expand All @@ -16,6 +17,38 @@
logger = get_logger(__name__)


def append_overall_categories(df: pd.DataFrame) -> pd.DataFrame:
"""Add single row for overall category."""
metric = "installs" if "installs" in df.columns else "app_count"
overall_cat_df = df.groupby("name")[[metric, f"total_{metric}"]].sum().reset_index()
overall_cat_df["mapped_category"] = "overall"
overall_cat_df["percent"] = (
overall_cat_df[metric] / overall_cat_df[f"total_{metric}"]
)
df = pd.concat([df, overall_cat_df])
df = append_games_category(df, metric)
return df


def append_games_category(df: pd.DataFrame, metric: str) -> pd.DataFrame:
"""Append a consolidated games category.
note this wouldn't work for Apple as not needed.
"""
overall_cat_df = (
df[df["mapped_category"].str.contains(r"^game")]
.groupby("name")[[metric, f"total_{metric}"]]
.sum()
.reset_index()
)
overall_cat_df["mapped_category"] = "games"
overall_cat_df["percent"] = (
overall_cat_df[metric] / overall_cat_df[f"total_{metric}"]
)
df = pd.concat([df, overall_cat_df])
return df


def companies_overview(categories: list[int]) -> TopCompanies:
"""Process networks and return TopCompanies class."""
df = get_top_companies(categories=categories, group_by_parent=False)
Expand All @@ -26,11 +59,9 @@ def companies_overview(categories: list[int]) -> TopCompanies:
group_by_parent=True,
)

total_installs = mdf["total_installs"].to_numpy()[0]

mdf = mdf[~mdf["company_name"].isna()].rename(columns={"company_name": "name"})
monthly_all = (
mdf.groupby("name")[["installs", "total_installs"]]
mdf.groupby(["mapped_category", "name"])[["installs", "total_installs"]]
.agg(
{"installs": "sum", "total_installs": "first"},
)
Expand All @@ -39,27 +70,45 @@ def companies_overview(categories: list[int]) -> TopCompanies:

monthly_parents = monthly_parents.rename(columns={"company_name": "name"})

monthly_all["percent"] = monthly_all["installs"] / total_installs
monthly_parents["percent"] = monthly_parents["installs"] / total_installs
monthly_all["percent"] = monthly_all["installs"] / monthly_all["total_installs"]
monthly_parents["percent"] = (
monthly_parents["installs"] / monthly_all["total_installs"]
)

pdf = get_top_companies(categories=categories, group_by_parent=True)
df = df[~df["name"].isna()]
pdf = pdf[~pdf["name"].isna()]

df = append_overall_categories(df)
pdf = append_overall_categories(pdf)

monthly_all = append_overall_categories(monthly_all)
monthly_parents = append_overall_categories(monthly_parents)

# Function to transform each group into a list of dictionaries
def transform_group(group: pd.Grouper) -> dict:
return group.drop(columns="mapped_category").to_dict(orient="records")

df = df.sort_values("app_count", ascending=False)
pdf = pdf.sort_values("app_count", ascending=False)
monthly_all = monthly_all.sort_values("installs", ascending=False)
monthly_parents = monthly_parents.sort_values("installs", ascending=False)
top = TopCompanies(
all_companies=df.to_dict(orient="records"),
parent_companies=pdf.to_dict(orient="records"),
monthly_all_companies=monthly_all.to_dict(orient="records"),
monthly_parent_companies=monthly_parents.to_dict(orient="records"),
all_companies=df.groupby("mapped_category").apply(transform_group).to_dict(),
parent_companies=pdf.groupby("mapped_category")
.apply(transform_group)
.to_dict(),
monthly_all_companies=monthly_all.groupby("mapped_category")
.apply(transform_group)
.to_dict(),
monthly_parent_companies=monthly_parents.groupby("mapped_category")
.apply(transform_group)
.to_dict(),
)
return top


class CompaniesController(Controller):

"""API EndPoint return for all ad tech companies."""

path = "/api/"
Expand Down
31 changes: 22 additions & 9 deletions backend/dbcon/sql/query_top_companies.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
WITH
counts AS (
SELECT
cm.mapped_category,
sac.company_id,
COUNT(DISTINCT sac.store_app) AS app_count
FROM
Expand All @@ -9,31 +10,43 @@ counts AS (
sac.company_id = cc.company_id
LEFT JOIN adtech.categories cat ON
cc.category_id = cat.id
LEFT JOIN store_apps sa ON
sac.store_app = sa.id
LEFT JOIN category_mapping cm ON
sa.category = cm.original_category
WHERE
cat.id IN :categories
GROUP BY
cm.mapped_category,
sac.company_id
),

total_app_count AS (
SELECT
cm.mapped_category,
COUNT(DISTINCT store_app)
FROM
adtech.store_apps_companies
adtech.store_apps_companies sac
LEFT JOIN store_apps sa ON
sac.store_app = sa.id
LEFT JOIN category_mapping cm ON
sa.category = cm.original_category
GROUP BY
cm.mapped_category
)

SELECT
tc.mapped_category,
c.name AS name,
tc.app_count,
total_app_count.count AS total_app_count,
tac.count AS total_app_count,
(
tc.app_count / total_app_count.count::decimal
) AS percent
tc.app_count / tac.count::decimal
) AS PERCENT
FROM
counts AS tc
LEFT JOIN adtech.companies AS c ON
tc.company_id = c.id
INNER JOIN total_app_count ON
TRUE
INNER JOIN total_app_count tac ON
tc.mapped_category = tac.mapped_category
ORDER BY
tc.app_count DESC;
tc.app_count DESC
;
35 changes: 25 additions & 10 deletions backend/dbcon/sql/query_top_parent_companies.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
WITH
parent_counts AS (
SELECT
cm.mapped_category,
COALESCE(
ccom.parent_company_id,
sac.company_id
Expand All @@ -13,34 +14,48 @@ ON
sac.company_id = cc.company_id
LEFT JOIN adtech.categories cat ON
cc.category_id = cat.id
LEFT JOIN adtech.companies ccom
ON
LEFT JOIN adtech.companies ccom ON
sac.company_id = ccom.id
LEFT JOIN store_apps sa ON
sac.store_app = sa.id
LEFT JOIN category_mapping cm ON
sa.category = cm.original_category
WHERE
cat.id IN :categories
GROUP BY
cm.mapped_category,
parent_or_self_id
),

total_app_count AS (
SELECT
cm.mapped_category,
COUNT(DISTINCT store_app)
FROM
adtech.store_apps_companies
adtech.store_apps_companies sac
LEFT JOIN store_apps sa ON
sac.store_app = sa.id
LEFT JOIN category_mapping cm ON
sa.category = cm.original_category
GROUP BY
cm.mapped_category
)

SELECT
tc.mapped_category,
c.name AS name,
tc.app_count,
total_app_count.count AS total_app_count,
tac.count AS total_app_count,
(
tc.app_count / total_app_count.count::decimal
tc.app_count / tac.count::decimal
) AS PERCENT
FROM
FROM
parent_counts AS tc
LEFT JOIN adtech.companies AS c
LEFT JOIN adtech.companies AS c
ON
tc.parent_or_self_id = c.id
INNER JOIN total_app_count
INNER JOIN total_app_count tac
ON
TRUE
ORDER BY
tc.mapped_category = tac.mapped_category
ORDER BY
tc.app_count DESC;
14 changes: 7 additions & 7 deletions frontend/src/lib/AdtechNav.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
goto(`${path}?${url.searchParams.toString()}`);
}
const networksURL: string = '/adtech/networks';
const trackersURL: string = '/adtech/trackers';
// const networksURL: string = '/adtech/networks';
// const trackersURL: string = '/adtech/trackers';
const groupByParents: string = 'parents';
const groupByBrands: string = 'brands';
const timeMonth: string = 'month';
Expand All @@ -38,7 +38,7 @@
<div class="card">
<div class="flex">
<div class="card ml-2 md:ml-4 p-2 md:p-4">
<h3 class="h5 md:h4 p-2">Time Period</h3>
<h3 class="h5 md:h4 p-2">Metric</h3>
<nav class="list-nav">
<ul class="rounded-md variant-outline-primary">
<li class="variant-soft-primary">
Expand All @@ -47,7 +47,7 @@
class={paramTimeClassesActive('time', timeMonth)}
on:click|preventDefault={() => navigateWithParams(currentPath, 'time', timeMonth)}
>
<span class="flex-auto">Most Recent 30 Days</span>
<span class="flex-auto">Installs Past 30 Days</span>
</a>
</li>
<li class="variant-soft-primary">
Expand All @@ -56,13 +56,13 @@
class={paramTimeClassesActive('time', timeAlltime)}
on:click|preventDefault={() => navigateWithParams(currentPath, 'time', timeAlltime)}
>
<span class="flex-auto">Alltime</span>
<span class="flex-auto">Count of Apps</span>
</a>
</li>
</ul>
</nav>
</div>
<div class="card mr-2 md:mr-4 p-2 md:p-4">
<!-- <div class="card mr-2 md:mr-4 p-2 md:p-4">
<h3 class="h5 md:h4 p-2">Company Category</h3>
<nav class="list-nav">
<ul class="rounded-md variant-outline-primary">
Expand All @@ -86,7 +86,7 @@
</li>
</ul>
</nav>
</div>
</div> -->

<div class="card ml-2 md:ml-4 p-2 md:p-4">
<h3 class="h5 md:h4 p-2">Granularity</h3>
Expand Down
Loading

0 comments on commit 2c7a335

Please sign in to comment.