Skip to content

Commit

Permalink
fix assets generation when selecting extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
Qouentinne committed May 5, 2024
1 parent c1b870d commit 69da052
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 26 deletions.
24 changes: 1 addition & 23 deletions app/pages/0_Import_File.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import copy
from menu import display_pages_menu, display_config
from pypdf import PdfReader
from utils import get_pdf_iframe, set_state
from utils import get_pdf_iframe, set_state, generate_assets

from country_by_country.processor import ReportProcessor

Expand All @@ -27,28 +27,6 @@ def initiate_configuration() -> None:
"pagefilter"
]["type"]

def generate_assets() -> None:
assets = {
"pagefilter": {},
"table_extractors": [],
}

# Filtering the pages
st.session_state["proc"].page_filter(
st.session_state["working_file_pdf"].name,
assets,
)

logging.info(f"Assets : {assets}")

if len(assets["pagefilter"]["selected_pages"]) == 0:
# No page has been automatically selected by the page filter
# Hence, we display the full pdf, letting the user select the pages
pdfreader = PdfReader(st.session_state["working_file_pdf"])
number_pages = len(PdfReader(st.session_state["working_file_pdf"]).pages)
assets["pagefilter"]["selected_pages"] = list(range(number_pages))
st.session_state["assets"] = assets

def on_pdf_file_upload() -> None:
# Change states related to the pdf file upload
mytmpfile.write(st.session_state.original_pdf.read())
Expand Down
4 changes: 2 additions & 2 deletions app/pages/1_Selected_Pages.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import streamlit as st
from country_by_country.processor import ReportProcessor
from utils import get_pdf_iframe, set_state
from utils import get_pdf_iframe, set_state, generate_assets
from country_by_country.utils.utils import keep_pages
from pypdf import PdfReader
from menu import display_pages_menu, display_config
Expand Down Expand Up @@ -29,7 +29,7 @@ def set_extractors() -> None:
]
set_state(["config", "table_extraction"], selected_extractors_dict)
st.session_state["proc"] = ReportProcessor(st.session_state["config"])

generate_assets()

st.set_page_config(layout="wide", page_title="Pages selection") # page_icon="📈"
st.title("Country by Country Tax Reporting analysis : Selected Pages")
Expand Down
24 changes: 23 additions & 1 deletion app/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import base64
import logging
from pathlib import Path
from typing import Any

import pandas as pd
from pypdf import PdfReader
import streamlit as st


Expand Down Expand Up @@ -41,7 +43,6 @@ def to_csv_file(df: pd.DataFrame) -> bytes:

return df.to_csv(index=False).encode("utf-8")


def set_state(key: Any, value: Any) -> None:
"""
Sets the session_state[key] to value.
Expand All @@ -61,3 +62,24 @@ def set_state(key: Any, value: Any) -> None:
nested_value[key_list[-1]] = value
else:
st.session_state[key] = value

def generate_assets() -> None:
assets = {
"pagefilter": {},
"table_extractors": [],
}

# Filtering the pages
st.session_state["proc"].page_filter(
st.session_state["working_file_pdf"].name,
assets,
)

logging.info(f"Assets : {assets}")

if len(assets["pagefilter"]["selected_pages"]) == 0:
# No page has been automatically selected by the page filter
# Hence, we display the full pdf, letting the user select the pages
number_pages = len(PdfReader(st.session_state["working_file_pdf"]).pages)
assets["pagefilter"]["selected_pages"] = list(range(number_pages))
st.session_state["assets"] = assets

0 comments on commit 69da052

Please sign in to comment.