diff --git a/app/pages/0_Import_File.py b/app/pages/0_Import_File.py index 60fc6cf..893e2d6 100644 --- a/app/pages/0_Import_File.py +++ b/app/pages/0_Import_File.py @@ -7,7 +7,7 @@ import copy from menu import display_pages_menu, display_config from pypdf import PdfReader -from utils import get_pdf_iframe, set_state +from utils import get_pdf_iframe, set_state, generate_assets from country_by_country.processor import ReportProcessor @@ -27,28 +27,6 @@ def initiate_configuration() -> None: "pagefilter" ]["type"] -def generate_assets() -> None: - assets = { - "pagefilter": {}, - "table_extractors": [], - } - - # Filtering the pages - st.session_state["proc"].page_filter( - st.session_state["working_file_pdf"].name, - assets, - ) - - logging.info(f"Assets : {assets}") - - if len(assets["pagefilter"]["selected_pages"]) == 0: - # No page has been automatically selected by the page filter - # Hence, we display the full pdf, letting the user select the pages - pdfreader = PdfReader(st.session_state["working_file_pdf"]) - number_pages = len(PdfReader(st.session_state["working_file_pdf"]).pages) - assets["pagefilter"]["selected_pages"] = list(range(number_pages)) - st.session_state["assets"] = assets - def on_pdf_file_upload() -> None: # Change states related to the pdf file upload mytmpfile.write(st.session_state.original_pdf.read()) diff --git a/app/pages/1_Selected_Pages.py b/app/pages/1_Selected_Pages.py index 9b6093f..6a352e9 100644 --- a/app/pages/1_Selected_Pages.py +++ b/app/pages/1_Selected_Pages.py @@ -1,6 +1,6 @@ import streamlit as st from country_by_country.processor import ReportProcessor -from utils import get_pdf_iframe, set_state +from utils import get_pdf_iframe, set_state, generate_assets from country_by_country.utils.utils import keep_pages from pypdf import PdfReader from menu import display_pages_menu, display_config @@ -29,7 +29,7 @@ def set_extractors() -> None: ] set_state(["config", "table_extraction"], selected_extractors_dict) st.session_state["proc"] = ReportProcessor(st.session_state["config"]) - + generate_assets() st.set_page_config(layout="wide", page_title="Pages selection") # page_icon="📈" st.title("Country by Country Tax Reporting analysis : Selected Pages") diff --git a/app/utils.py b/app/utils.py index 2c79b3f..b09d6aa 100644 --- a/app/utils.py +++ b/app/utils.py @@ -1,8 +1,10 @@ import base64 +import logging from pathlib import Path from typing import Any import pandas as pd +from pypdf import PdfReader import streamlit as st @@ -41,7 +43,6 @@ def to_csv_file(df: pd.DataFrame) -> bytes: return df.to_csv(index=False).encode("utf-8") - def set_state(key: Any, value: Any) -> None: """ Sets the session_state[key] to value. @@ -61,3 +62,24 @@ def set_state(key: Any, value: Any) -> None: nested_value[key_list[-1]] = value else: st.session_state[key] = value + +def generate_assets() -> None: + assets = { + "pagefilter": {}, + "table_extractors": [], + } + + # Filtering the pages + st.session_state["proc"].page_filter( + st.session_state["working_file_pdf"].name, + assets, + ) + + logging.info(f"Assets : {assets}") + + if len(assets["pagefilter"]["selected_pages"]) == 0: + # No page has been automatically selected by the page filter + # Hence, we display the full pdf, letting the user select the pages + number_pages = len(PdfReader(st.session_state["working_file_pdf"]).pages) + assets["pagefilter"]["selected_pages"] = list(range(number_pages)) + st.session_state["assets"] = assets \ No newline at end of file