|
import streamlit as st |
|
import pandas as pd |
|
import sys |
|
import logging |
|
|
|
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") |
|
|
|
from country_by_country.utils.utils import ( |
|
gather_tables, |
|
check_if_many, |
|
filled_table_extractors, |
|
gather_tables_with_merge, |
|
) |
|
from menu import display_pages_menu |
|
from dotenv import load_dotenv |
|
|
|
|
|
def merge_table(table_extractor: str) -> None: |
|
first_df_columns = pd.Series([]) |
|
table_list = [] |
|
for key, table in st.session_state["tables"].items(): |
|
if table_extractor in key: |
|
if first_df_columns.empty: |
|
first_df_columns = table.columns |
|
|
|
table.columns = first_df_columns |
|
table_list.append(table) |
|
|
|
st.session_state["new_tables"] = pd.concat( |
|
table_list, ignore_index=True, sort=False |
|
) |
|
|
|
|
|
def save_merge(table_extractor: str) -> None: |
|
tables_extracted_by_name = gather_tables_with_merge( |
|
st.session_state["assets"], |
|
st.session_state["new_tables"], |
|
table_extractor, |
|
) |
|
st.session_state["tables"] = tables_extracted_by_name |
|
st.session_state["algorithm_name"] = table_extractor |
|
|
|
|
|
def remove_table(key: str) -> None: |
|
del st.session_state["tables"][key] |
|
if ( |
|
"algorithm_name" in st.session_state |
|
and st.session_state["algorithm_name"] == key |
|
): |
|
del st.session_state["algorithm_name"] |
|
|
|
|
|
st.set_page_config(layout="wide", page_title="Merge Tables") |
|
st.title("Country by Country Tax Reporting analysis : Headers") |
|
st.subheader( |
|
"This page will allow you to modify the headers and to remove columns", |
|
) |
|
display_pages_menu() |
|
load_dotenv() |
|
|
|
|
|
if "tables" not in st.session_state: |
|
st.markdown( |
|
"# !! Don't change the page while the algorithms are runing, else they will start again" |
|
) |
|
|
|
|
|
if ( |
|
st.session_state.get("validate_selected_pages", False) |
|
and "pdf_after_page_validation" in st.session_state |
|
): |
|
if "tables" not in st.session_state: |
|
for table_extractor in st.session_state["proc"].table_extractors: |
|
new_asset = table_extractor(st.session_state["pdf_after_page_validation"]) |
|
st.session_state["assets"]["table_extractors"].append(new_asset) |
|
tables_extracted_by_name = gather_tables(st.session_state["assets"]) |
|
logging.info(f"Table extracted : {tables_extracted_by_name}") |
|
|
|
st.session_state["tables"] = tables_extracted_by_name |
|
|
|
if not check_if_many(st.session_state["assets"]): |
|
st.markdown("# !! Nothing to merge") |
|
|
|
if "first_time_merge" not in st.session_state: |
|
st.session_state["first_time_merge"] = False |
|
st.switch_page("pages/4_Clean_Headers.py") |
|
|
|
col1, col2, col3 = st.columns([3, 1, 3]) |
|
is_equal = True |
|
with col1: |
|
table_extractor = st.selectbox( |
|
"Choose an algorithm :", |
|
filled_table_extractors(st.session_state["assets"]), |
|
args=("selectbox2",), |
|
key="selectbox2", |
|
) |
|
|
|
number_column = None |
|
if table_extractor is not None: |
|
for key, table in st.session_state["tables"].items(): |
|
if table_extractor in key: |
|
with st.container(border=True): |
|
if not number_column: |
|
number_column = table.shape[1] |
|
else: |
|
if number_column != table.shape[1]: |
|
is_equal = False |
|
st.markdown("Table shape :" + str(table.shape)) |
|
st.markdown("Table name : " + key) |
|
st.dataframe( |
|
table, |
|
) |
|
st.button( |
|
"Remove this table", |
|
type="primary", |
|
on_click=remove_table, |
|
args=(key,), |
|
key=key, |
|
) |
|
|
|
with col2: |
|
st.markdown( |
|
"You won't be able to merge if the number of columns is not the same for each tables !!" |
|
) |
|
merged = st.button( |
|
"Merge", |
|
type="primary", |
|
on_click=merge_table, |
|
args=(table_extractor,), |
|
disabled=(False if is_equal else True), |
|
) |
|
validated = st.button( |
|
"Sauver le merge", |
|
on_click=save_merge, |
|
args=(table_extractor,), |
|
) |
|
if validated: |
|
st.switch_page("pages/4_Clean_Headers.py") |
|
|
|
with col3: |
|
if merged is True: |
|
edited_df = st.dataframe( |
|
st.session_state["new_tables"], |
|
) |
|
|