Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Form to provide metadata : Headquarters, unit , currency , company name, #74

Merged
merged 15 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions app/menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def display_pages_menu() -> None:
st.markdown("# Pipeline steps")
st.page_link("pages/0_Import_File.py", label="Upload PDF")
st.page_link("pages/1_Selected_Pages.py", label="Pages selection")
st.page_link("pages/2_Merge_Tables.py", label="Merge tables")
st.page_link("pages/3_Clean_Headers.py", label="Headers setup")
st.page_link("pages/4_Clean_Tables.py", label="Tables customization")
st.page_link("pages/2_Metadata.py", label="Metadata")
st.page_link("pages/3_Merge_Tables.py", label="Merge tables")
st.page_link("pages/4_Clean_Headers.py", label="Headers setup")
st.page_link("pages/5_Clean_Tables.py", label="Tables customization")
2 changes: 1 addition & 1 deletion app/pages/1_Selected_Pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,4 @@ def set_validate() -> None:
st.session_state["working_file_pdf"].name,
[i - 1 for i in selected_pages],
)
st.switch_page("pages/2_Merge_Tables.py")
st.switch_page("pages/2_Metadata.py")
101 changes: 101 additions & 0 deletions app/pages/2_Metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import streamlit as st
from utils import set_algorithm_name, get_pdf_iframe
from menu import display_pages_menu
from country_by_country.utils.constants import (
JURIDICTIONS,
CURRENCIES,
SECTORS,
COMPANIES,
)
from Levenshtein import distance
import sys
import logging
import pandas as pd
import numpy as np
import re

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")


st.set_page_config(layout="wide", page_title="Report metadata")
st.title("Country by Country Tax Reporting analysis : Metadata")
st.subheader(
"This page will allow you to fill in metadata about the report : company name, headquarter, currency, unit, ...",
)
display_pages_menu()


if "pdf_after_page_validation" in st.session_state:
col1, col2 = st.columns(2)
with col1:
st.markdown(
get_pdf_iframe(st.session_state["pdf_after_page_validation"]),
unsafe_allow_html=True,
)
with col2:
with st.form("metadata_form"):
if "metadata" in st.session_state:
company_name = st.session_state["metadata"]["company_name"]
sector = st.session_state["metadata"]["sector"]
year = st.session_state["metadata"]["year"]
currency = st.session_state["metadata"]["currency"]
unit = st.session_state["metadata"]["unit"]
headquarter = st.session_state["metadata"]["headquarter"]
else:
company_name = None
sector = None
year = ""
currency = None
unit = None
headquarter = ""
companies = list(COMPANIES.keys())
company_name = st.selectbox(
"Company name",
companies,
index=companies.index(company_name) if company_name else 0,
)

sector = st.selectbox(
"Sector", SECTORS, index=SECTORS.index(sector) if sector else 0
)

year = st.text_input("Year", value=year)

currencies = {
(
CURRENCIES[currency]["AlphabeticCode"],
CURRENCIES[currency]["Currency"],
)
for currency in CURRENCIES
}
currencies = sorted(currencies, key=lambda x: x[0])
currencies = [f"{currency[0]} - {currency[1]}" for currency in currencies]
currency = st.selectbox(
"Currency",
currencies,
index=currencies.index(currency) if currency else 0,
)

units = ["thousands", "millions", "10 millions", "100 millions", "billions"]
unit = st.selectbox("Unit", units, index=units.index(unit) if unit else 0)

headquarters = list(JURIDICTIONS.keys())
headquarter = st.selectbox(
"Headquarter location",
headquarters,
index=headquarters.index(headquarter) if headquarter else 0,
)

submitted = st.form_submit_button(
label="Submit",
)
if submitted:
st.session_state["metadata"] = {
"company_name": company_name,
"sector": sector,
"year": year,
"currency": currency,
"unit": unit,
"headquarter": headquarter,
}
st.switch_page("pages/3_Merge_Tables.py")
4 changes: 2 additions & 2 deletions app/pages/2_Merge_Tables.py → app/pages/3_Merge_Tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def remove_table(key: str) -> None:

if "first_time_merge" not in st.session_state:
st.session_state["first_time_merge"] = False
st.switch_page("pages/3_Clean_Headers.py")
st.switch_page("pages/4_Clean_Headers.py")

col1, col2, col3 = st.columns([3, 1, 3])
with col1:
Expand Down Expand Up @@ -127,7 +127,7 @@ def remove_table(key: str) -> None:
args=(table_extractor,),
)
if validated:
st.switch_page("pages/3_Clean_Headers.py")
st.switch_page("pages/4_Clean_Headers.py")

with col3:
if merged is True:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def set_headers(algorithm_name: str) -> None:
)

if submitted:
st.switch_page("pages/4_Clean_Tables.py")
st.switch_page("pages/5_Clean_Tables.py")

st.markdown("# Current extraction")
st.markdown("The extracted table is displaye below")
Expand Down
File renamed without changes.
18 changes: 18 additions & 0 deletions app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,22 @@ def set_algorithm_name(my_key: str) -> None:

@st.cache_data
def to_csv_file(df: pd.DataFrame) -> bytes:
# Populate the columns with the metadata, if available
# They may not be available if the user skipped the metadata page
# by not clicking on Submit
if "metadata" in st.session_state:
df = df.assign(company=st.session_state["metadata"]["company_name"])
df = df.assign(sector=st.session_state["metadata"]["sector"])
df = df.assign(year=st.session_state["metadata"]["year"])
df = df.assign(currency=st.session_state["metadata"]["currency"])
df = df.assign(unit=st.session_state["metadata"]["unit"])
df = df.assign(headquarter=st.session_state["metadata"]["headquarter"])
else:
df = df.assign(company="")
df = df.assign(sector="")
df = df.assign(year="")
df = df.assign(currency="")
df = df.assign(unit="")
df = df.assign(headquarter="")

return df.to_csv(index=False).encode("utf-8")
Loading
Loading