-
Notifications
You must be signed in to change notification settings - Fork 0
/
app_pandas.py
43 lines (34 loc) · 1.01 KB
/
app_pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import glob
import os
from pathlib import Path
import pandas as pd
import typer
from utils import cols, with_res_logger
app = typer.Typer()
pd.set_option("display.max_rows", 10)
pandas_types = {"int": "Int64", "float": "Float64", "cat": "category", "str": "object"}
def load(year, nrows):
kwargs = dict(
dtype={k: pandas_types[v] for k, v in cols.items() if v != "date"},
parse_dates=[k for k, v in cols.items() if v == "date"],
on_bad_lines="warn",
nrows=nrows,
)
p = Path(os.environ["DATA"]) / f"{year}.csv"
if "*" in year:
return pd.concat(
(pd.read_csv(f, **kwargs) for f in glob.glob(str(p))), ignore_index=True
)
else:
return pd.read_csv(p, **kwargs)
@app.command()
@with_res_logger
def top_flop(year: str, nrows: int = None):
serie = (
load(year, nrows)
.groupby(["code_postal"])["id_mutation"]
.nunique()
.sort_values(ascending=False)
)
print(serie.head(10))
print(serie.tail(10))