File size: 2,939 Bytes
ec6dd69
dd6a24d
ec6dd69
 
 
 
 
dd6a24d
ec6dd69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd6a24d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import base64
import logging
from pathlib import Path
from typing import Any

import pandas as pd
import streamlit as st
from pypdf import PdfReader


def get_pdf_iframe(pdf_to_process: str) -> str:
    base64_pdf = base64.b64encode(Path(pdf_to_process).read_bytes()).decode("utf-8")
    pdf_display = f"""
    <iframe src="data:application/pdf;base64,{base64_pdf}
    " width="100%" height="1000px" type="application/pdf"></iframe>
    """
    return pdf_display


def set_algorithm_name(my_key: str) -> None:
    st.session_state["algorithm_name"] = st.session_state[my_key]


@st.cache_data
def to_csv_file(df: pd.DataFrame) -> bytes:
    # Populate the columns with the metadata, if available
    # They may not be available if the user skipped the metadata page
    # by not clicking on Submit
    if "metadata" in st.session_state:
        df = df.assign(company=st.session_state["metadata"]["company_name"])
        df = df.assign(sector=st.session_state["metadata"]["sector"])
        df = df.assign(year=st.session_state["metadata"]["year"])
        df = df.assign(currency=st.session_state["metadata"]["currency"])
        df = df.assign(unit=st.session_state["metadata"]["unit"])
        df = df.assign(headquarter=st.session_state["metadata"]["headquarter"])
    else:
        df = df.assign(company="")
        df = df.assign(sector="")
        df = df.assign(year="")
        df = df.assign(currency="")
        df = df.assign(unit="")
        df = df.assign(headquarter="")

    return df.to_csv(index=False).encode("utf-8")


def set_state(key: Any, value: Any) -> None:
    """
    Sets the session_state[key] to value.
    key can be a list to reach nested values.
    Ex: ["key1", "key2"] to reach session_state["key1"]["key2"] value.
    """
    if isinstance(key, list):
        key_list = key
        nested_key_string = "session_state"
        nested_value = st.session_state
        for k in key_list[:-1]:
            try:
                nested_key_string += f"['{k}']"
                nested_value = nested_value[k]
            except KeyError as e:
                raise KeyError(f"{nested_key_string} does not exist") from e
        nested_value[key_list[-1]] = value
    else:
        st.session_state[key] = value


def generate_assets() -> None:
    assets = {
        "pagefilter": {},
        "table_extractors": [],
    }

    # Filtering the pages
    st.session_state["proc"].page_filter(
        st.session_state["working_file_pdf"].name,
        assets,
    )

    logging.info(f"Assets : {assets}")

    if len(assets["pagefilter"]["selected_pages"]) == 0:
        # No page has been automatically selected by the page filter
        # Hence, we display the full pdf, letting the user select the pages
        number_pages = len(PdfReader(st.session_state["working_file_pdf"]).pages)
        assets["pagefilter"]["selected_pages"] = list(range(number_pages))
    st.session_state["assets"] = assets