From e0487149dfbf11d7eec8151910ea9ddf5fd7b3af Mon Sep 17 00:00:00 2001 From: james Date: Thu, 11 Jan 2024 15:53:11 +0800 Subject: [PATCH] Added pyproject and new ruff rules --- .pre-commit-config.yaml | 10 +++- backend/api_app/controllers/apps.py | 64 +++++++++++++---------- backend/api_app/controllers/categories.py | 18 +++---- backend/api_app/controllers/rankings.py | 59 ++++++++++++--------- backend/app.py | 20 +++---- backend/config/__init__.py | 6 ++- backend/dbcon/connections.py | 10 ++-- backend/dbcon/queries.py | 47 +++++++++++------ backend/pyproject.toml | 40 ++++++++++++++ 9 files changed, 181 insertions(+), 93 deletions(-) create mode 100644 backend/pyproject.toml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6bcc12f..360e547 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,6 +9,7 @@ repos: rev: v0.1.11 hooks: - id: ruff + args: [--fix] # Mypy - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.8.0 @@ -20,7 +21,14 @@ repos: rev: v4.0.0-alpha.8 hooks: - id: prettier - args: ['--config', 'frontend/.prettierrc', '--ignore-unknown', '--write', '--check'] + args: + [ + "--config", + "frontend/.prettierrc", + "--ignore-unknown", + "--write", + "--check", + ] language: node entry: frontend/node_modules/.bin/prettier require_serial: true diff --git a/backend/api_app/controllers/apps.py b/backend/api_app/controllers/apps.py index 730aba2..6464cfe 100644 --- a/backend/api_app/controllers/apps.py +++ b/backend/api_app/controllers/apps.py @@ -1,27 +1,28 @@ import datetime -import pandas as pd +import urllib.parse + import numpy as np +import pandas as pd +from litestar import Controller, get +from litestar.exceptions import NotFoundException from api_app.models import ( AppDetail, - Collection, AppGroup, + AppRank, Category, + Collection, DeveloperApps, - AppRank, ) from config import get_logger from dbcon.queries import ( get_single_app, - query_recent_apps, query_app_history, - search_apps, - query_single_developer, query_ranks_for_app, + query_recent_apps, + query_single_developer, + search_apps, ) -from litestar import Controller, get -from litestar.exceptions import NotFoundException -import urllib.parse logger = get_logger(__name__) @@ -46,7 +47,7 @@ def get_app_history(app_dict: dict) -> dict: app_hist = query_app_history(store_app) app_dict["histogram"] = app_hist.sort_values(["id"]).tail(1)["histogram"].values[0] app_dict["history_table"] = app_hist.drop(["id", "store_app"], axis=1).to_dict( - orient="records" + orient="records", ) app_hist["group"] = app_name app_hist = app_hist[ @@ -58,7 +59,7 @@ def get_app_history(app_dict: dict) -> dict: app_hist = app_hist.sort_values(xaxis_col) app_hist["date_change"] = app_hist[xaxis_col] - app_hist[xaxis_col].shift(1) app_hist["days_changed"] = app_hist["date_change"].apply( - lambda x: np.nan if pd.isnull(x) else x.days + lambda x: np.nan if pd.isnull(x) else x.days, ) change_metrics = [] for metric in metrics: @@ -90,7 +91,7 @@ def get_app_history(app_dict: dict) -> dict: "installs_rate_of_change": "Installs Rate of Change", "rating_count_rate_of_change": "Rating Count Rate of Change", "review_count_rate_of_change": "Review Count Rate of Change", - } + }, ) ) # TODO: KEEP? @@ -145,10 +146,11 @@ def get_app_overview_dict(collection: str) -> Collection: key=category_key, google=AppGroup(title="Google", apps=google_dicts), ios=AppGroup(title="iOS", apps=ios_dicts), - ) + ), ) response_collection = Collection( - title=COLLECTIONS[collection]["title"], categories=categories_dicts + title=COLLECTIONS[collection]["title"], + categories=categories_dicts, ) return response_collection @@ -158,13 +160,14 @@ class AppController(Controller): @get(path="/collections/{collection:str}", cache=3600) async def get_apps_overview(self, collection: str) -> Collection: - """ - Handles a GET request for a list of apps + """Handles a GET request for a list of apps Args: + ---- collection:collection Returns: + ------- A dictionary representation of the list of apps for homepasge """ logger.info(f"{self.path} start") @@ -176,19 +179,20 @@ async def get_apps_overview(self, collection: str) -> Collection: @get(path="/{store_id:str}", cache=3600) async def get_app_detail(self, store_id: str) -> AppDetail: - """ - Handles a GET request for a specific app. + """Handles a GET request for a specific app. store_id (str): The id of the app to retrieve. - Returns: + Returns + ------- json """ logger.info(f"{self.path} start") app_df = get_single_app(store_id) if app_df.empty: raise NotFoundException( - f"Store ID not found: {store_id!r}", status_code=404 + f"Store ID not found: {store_id!r}", + status_code=404, ) app_dict = app_df.to_dict(orient="records")[0] app_hist_dict = get_app_history(app_dict) @@ -197,20 +201,22 @@ async def get_app_detail(self, store_id: str) -> AppDetail: @get(path="/{store_id:str}/ranks", cache=3600) async def app_ranks(self, store_id: str) -> AppRank: - """ - Handles a GET request for a specific app ranks. + """Handles a GET request for a specific app ranks. Args: + ---- store_id (str): The id of the store to retrieve. Returns: + ------- json """ logger.info(f"{self.path} start") df = query_ranks_for_app(store_id=store_id) if df.empty: raise NotFoundException( - f"Ranks not found for {store_id!r}", status_code=404 + f"Ranks not found for {store_id!r}", + status_code=404, ) df["rank_group"] = df["collection"] + ": " + df["category"] latest_dict = df[df["crawled_date"].max() == df["crawled_date"]][ @@ -229,13 +235,14 @@ async def app_ranks(self, store_id: str) -> AppRank: @get(path="/developers/{developer_id:str}", cache=3600) async def get_developer_apps(self, developer_id: str) -> DeveloperApps: - """ - Handles a GET request for a specific developer. + """Handles a GET request for a specific developer. Args: + ---- developer_id (str): The id of the developer to retrieve. Returns: + ------- json """ logger.info(f"{self.path} start") @@ -243,13 +250,16 @@ async def get_developer_apps(self, developer_id: str) -> DeveloperApps: if apps_df.empty: raise NotFoundException( - f"Store ID not found: {developer_id!r}", status_code=404 + f"Store ID not found: {developer_id!r}", + status_code=404, ) developer_name = apps_df.to_dict(orient="records")[0]["developer_name"] apps_dict = apps_df.to_dict(orient="records") developer_apps = DeveloperApps( - developer_id=developer_id, title=developer_name, apps=apps_dict + developer_id=developer_id, + title=developer_name, + apps=apps_dict, ) return developer_apps diff --git a/backend/api_app/controllers/categories.py b/backend/api_app/controllers/categories.py index a86c37a..d39f57f 100644 --- a/backend/api_app/controllers/categories.py +++ b/backend/api_app/controllers/categories.py @@ -1,11 +1,9 @@ import numpy as np +from litestar import Controller, get -from api_app.models import CategoriesOverview, Category, AppGroup +from api_app.models import AppGroup, CategoriesOverview, Category from config import get_logger - - from dbcon.queries import get_appstore_categories, get_category_top_apps_by_installs -from litestar import Controller, get logger = get_logger(__name__) @@ -54,10 +52,10 @@ class CategoryController(Controller): @get(path="/", cache=True) async def get_categories_overview(self) -> CategoriesOverview: - """ - Handles a GET request for a list of categories + """Handles a GET request for a list of categories - Returns: + Returns + ------- A dictionary representation of the list of categories each with an id, name, type and total of apps """ @@ -68,10 +66,10 @@ async def get_categories_overview(self) -> CategoriesOverview: @get(path="/{category_id:str}", cache=3600) async def get_category(self, category_id: str) -> Category: - """ - Handles a GET request for a single category + """Handles a GET request for a single category - Returns: + Returns + ------- A dictionary representation of a category with ios and google apps """ diff --git a/backend/api_app/controllers/rankings.py b/backend/api_app/controllers/rankings.py index 33af534..411c3bb 100644 --- a/backend/api_app/controllers/rankings.py +++ b/backend/api_app/controllers/rankings.py @@ -1,19 +1,20 @@ +import datetime + +import pandas as pd +from litestar import Controller, get + from api_app.models import ( RankingOverview, - StoreCollections, StoreCategoryDetail, + StoreCollections, StoreRankings, ) from config import get_logger -import pandas as pd -import datetime - from dbcon.queries import ( - get_store_collection_category_map, - get_most_recent_top_ranks, get_history_top_ranks, + get_most_recent_top_ranks, + get_store_collection_category_map, ) -from litestar import Controller, get logger = get_logger(__name__) @@ -47,7 +48,7 @@ def ranking_map() -> RankingOverview: collection_id=collection_id, collection_name=collection_name, categories=category_details, - ) + ), ) overview.stores_rankings.append(rankings) return overview @@ -58,10 +59,10 @@ class RankingsController(Controller): @get(path="/", cache=True) async def get_ranking_overview(self) -> RankingOverview: - """ - Handles a GET request for a list of ranking collecitons and categories + """Handles a GET request for a list of ranking collecitons and categories - Returns: + Returns + ------- A dictionary representation of the list of categories each with an id, name, type and total of apps """ @@ -72,12 +73,15 @@ async def get_ranking_overview(self) -> RankingOverview: @get(path="/{store:int}/{collection:int}/{category:int}/short", cache=40000) async def get_short_ranks_for_category( - self, store: int, collection: int, category: int + self, + store: int, + collection: int, + category: int, ) -> dict: - """ - Handles a GET request for a store/collection/category rank + """Handles a GET request for a store/collection/category rank - Returns: + Returns + ------- A dictionary representation of a category with ios and google apps """ @@ -93,12 +97,15 @@ async def get_short_ranks_for_category( @get(path="/{store:int}/{collection:int}/{category:int}", cache=3600) async def get_ranks_for_category( - self, store: int, collection: int, category: int + self, + store: int, + collection: int, + category: int, ) -> dict: - """ - Handles a GET request for a store/collection/category rank + """Handles a GET request for a store/collection/category rank - Returns: + Returns + ------- A dictionary representation of a category with ios and google apps """ @@ -114,12 +121,15 @@ async def get_ranks_for_category( @get(path="/{store:int}/{collection:int}/{category:int}/history", cache=3600) async def get_ranks_history_for_category( - self, store: int, collection: int, category: int + self, + store: int, + collection: int, + category: int, ) -> dict: - """ - Handles a GET request for a store/collection/category rank + """Handles a GET request for a store/collection/category rank - Returns: + Returns + ------- A list of dictionary representation of a category history with ios or google apps """ @@ -143,8 +153,7 @@ async def get_ranks_history_for_category( .reset_index() ) df.loc[ - df["crawled_date"].dt.date - >= datetime.datetime.now(datetime.timezone.utc).date(), + df["crawled_date"].dt.date >= datetime.datetime.now(datetime.UTC).date(), "crawled_date", ] = last_crawled_date df["crawled_date"] = pd.to_datetime(df["crawled_date"]).dt.strftime("%Y-%m-%d") diff --git a/backend/app.py b/backend/app.py index acf1998..8efcbf7 100644 --- a/backend/app.py +++ b/backend/app.py @@ -1,18 +1,18 @@ -from api_app.controllers.apps import AppController -from api_app.controllers.categories import CategoryController -from api_app.controllers.rankings import RankingsController +import logging + from litestar import Litestar from litestar.config.cors import CORSConfig from litestar.logging import LoggingConfig - -import logging - from litestar.openapi import OpenAPIConfig, OpenAPIController +from api_app.controllers.apps import AppController +from api_app.controllers.categories import CategoryController +from api_app.controllers.rankings import RankingsController + cors_config = CORSConfig( allow_origins=[ "localhost", - ] + ], ) @@ -23,7 +23,7 @@ class MyOpenAPIController(OpenAPIController): logging_config = LoggingConfig( root={"level": logging.getLevelName(logging.INFO), "handlers": ["console"]}, formatters={ - "standard": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"} + "standard": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"}, }, ) @@ -32,7 +32,9 @@ class MyOpenAPIController(OpenAPIController): route_handlers=[AppController, CategoryController, RankingsController], cors_config=cors_config, openapi_config=OpenAPIConfig( - title="App Store API", version="0.0.1", openapi_controller=MyOpenAPIController + title="App Store API", + version="0.0.1", + openapi_controller=MyOpenAPIController, ), logging_config=logging_config, debug=True, diff --git a/backend/config/__init__.py b/backend/config/__init__.py index 2d3fbda..645ff0c 100644 --- a/backend/config/__init__.py +++ b/backend/config/__init__.py @@ -42,7 +42,9 @@ def get_logger(mod_name: str, log_name: str = "dash"): filename = f"{log_dir}/{log_name}.log" # Writes to file rotate_handler = RotatingFileHandler( - filename=filename, maxBytes=50000000, backupCount=5 + filename=filename, + maxBytes=50000000, + backupCount=5, ) logging.basicConfig( format=format, @@ -64,7 +66,7 @@ def get_logger(mod_name: str, log_name: str = "dash"): # create formatter formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + "%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) # add formatter to ch diff --git a/backend/dbcon/connections.py b/backend/dbcon/connections.py index a63b4b5..e095ea6 100644 --- a/backend/dbcon/connections.py +++ b/backend/dbcon/connections.py @@ -1,7 +1,8 @@ -from config import CONFIG, get_logger from sqlalchemy import create_engine from sshtunnel import SSHTunnelForwarder +from config import CONFIG, get_logger + logger = get_logger(__name__) @@ -46,8 +47,9 @@ def get_postgres_server_ips(server_name: str) -> tuple[str, str]: class PostgresCon: """Class for managing the connection to postgres - Parameters: - ---------------- + + Parameters + ---------- my_db: String, passed on init, string name of db my_env: String, passed on init, string name of env, 'staging' or 'prod' """ @@ -83,6 +85,6 @@ def set_engine(self): logger.info(f"Created PostgreSQL Engine {self.db_name}") except Exception as error: logger.exception( - f"PostgresCon failed to connect to {self.db_name}@{self.db_ip} {error=}" + f"PostgresCon failed to connect to {self.db_name}@{self.db_ip} {error=}", ) self.db_name = None diff --git a/backend/dbcon/queries.py b/backend/dbcon/queries.py index e4ad8b8..4a5d154 100644 --- a/backend/dbcon/queries.py +++ b/backend/dbcon/queries.py @@ -1,14 +1,16 @@ +import datetime + import numpy as np import pandas as pd -import datetime +from sqlalchemy import TextClause, text + from config import get_logger from dbcon.connections import get_db_connection -from sqlalchemy import text, TextClause logger = get_logger(__name__) -def query_recent_apps(collection: str, limit=20): +def query_recent_apps(collection: str, limit: int = 20) -> pd.DataFrame: logger.info(f"Query app_store for recent apps {collection=}") if collection == "new_weekly": table_name = "apps_new_weekly" @@ -34,7 +36,7 @@ def query_recent_apps(collection: str, limit=20): "featured_image_url", "phone_image_url_1", "tablet_image_url_1", - ] + ], ) sel_query = f""" ( @@ -72,7 +74,7 @@ def query_recent_apps(collection: str, limit=20): groups = df.groupby("store") for _store, group in groups: overall = group.sort_values(["installs", "rating_count"], ascending=False).head( - limit + limit, ) overall["mapped_category"] = "overall" df = pd.concat([df, overall], axis=0) @@ -141,7 +143,8 @@ def query_developer_updated_timestamps(start_date: str = "2021-01-01") -> pd.Dat def query_app_updated_timestamps( - table_name: str, start_date: str = "2021-01-01" + table_name: str, + start_date: str = "2021-01-01", ) -> pd.DataFrame: logger.info(f"Query updated times: {table_name=} {start_date=}") audit_join, audit_select = "", "" @@ -209,7 +212,8 @@ def query_app_updated_timestamps( def query_updated_timestamps( - table_name: str, start_date: str = "2021-01-01" + table_name: str, + start_date: str = "2021-01-01", ) -> pd.DataFrame: logger.info(f"Query updated times: {table_name=}") sel_query = f"""WITH my_dates AS ( @@ -255,7 +259,7 @@ def query_updated_timestamps( return df -def get_all_tables_in_schema(schema_name: str): +def get_all_tables_in_schema(schema_name: str) -> list[str]: logger.info("Get checks tables") sel_schema = f"""SELECT table_name FROM information_schema.tables @@ -292,7 +296,11 @@ def get_appstore_categories() -> pd.DataFrame: df = pd.read_sql(sel_query, DBCON.engine) df["store"] = df["store"].replace({1: "android", 2: "ios"}) df = pd.pivot_table( - data=df, index="category", values="app_count", columns="store", fill_value=0 + data=df, + index="category", + values="app_count", + columns="store", + fill_value=0, ).reset_index() df["total_apps"] = df["android"] + df["ios"] df = df.sort_values("total_apps", ascending=False) @@ -302,7 +310,7 @@ def get_appstore_categories() -> pd.DataFrame: def query_ranks_for_app(store_id: str, days=30) -> pd.DataFrame: start_date = ( - datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days) + datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=days) ).strftime("%Y-%m-%d") sel_query = f"""SELECT ar.crawled_date, @@ -331,7 +339,10 @@ def query_ranks_for_app(store_id: str, days=30) -> pd.DataFrame: def get_most_recent_top_ranks( - store: int, collection_id: int, category_id: int, limit: int = 25 + store: int, + collection_id: int, + category_id: int, + limit: int = 25, ) -> pd.DataFrame: sel_query = f"""SELECT ar.rank, @@ -358,10 +369,14 @@ def get_most_recent_top_ranks( def get_history_top_ranks( - store: int, collection_id: int, category_id: int, limit: int = 25, days=30 + store: int, + collection_id: int, + category_id: int, + limit: int = 25, + days=30, ) -> pd.DataFrame: start_date = ( - datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days) + datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=days) ).strftime("%Y-%m-%d") sel_query = f"""SELECT arr.crawled_date, @@ -474,7 +489,7 @@ def clean_app_df(df: pd.DataFrame) -> pd.DataFrame: for col in string_nums: df[f"{col}_num"] = df[col] df[col] = df[col].apply( - lambda x: "N/A" if (x is None or np.isnan(x)) else "{:,.0f}".format(x) + lambda x: "N/A" if (x is None or np.isnan(x)) else f"{x:,.0f}", ) df["rating"] = df["rating"].apply(lambda x: round(x, 2) if x else 0) ios_link = "https://apps.apple.com/us/app/-/id" @@ -487,7 +502,9 @@ def clean_app_df(df: pd.DataFrame) -> pd.DataFrame: ) if "developer_id" in df.columns: df["store_developer_link"] = np.where( - df["store"].str.contains("Google"), play_dev_link, ios_dev_link + df["store"].str.contains("Google"), + play_dev_link, + ios_dev_link, ) + df["developer_id"].astype(str) date_cols = ["created_at", "store_last_updated", "updated_at"] diff --git a/backend/pyproject.toml b/backend/pyproject.toml new file mode 100644 index 0000000..f8c9399 --- /dev/null +++ b/backend/pyproject.toml @@ -0,0 +1,40 @@ +[project] +authors = [{ name = "James O'Claire" }] +name = "app-store-dash-backend" +description = "Backend server for app store dashboard" +version = "0.0.1" + +[project.optional-dependencies] +dev = ["pre-commit", "psycopg2-binary"] + +dependencies = ["uvicorn", "litestar[standard]"] + +[build-system] +requires = ["setuptools", "wheel"] + + +[tool.ruff] +#select = ["E", "W", "F", "B", "I", "N", "UP"] +select = ["ALL"] + +target-version = 'py312' + +ignore = [ + "E501", # line length + "W291", # trailing space at end of line + "G", # Ignore pyflake logging as it does not like f-strings in log messages. + "RET504", # Unnecessary assignment to variable before return, but I prefer for readability. + "PD901", # avoid 'df' for dataframe, but I prefer this as the main df in each function +] + +# Avoid trying to fix flake8-bugbear (`B`) violations. +unfixable = ["B"] + +fixable = ["ALL"] + +exclude = ["tests/*"] + +[tool.mypy] +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true