File size: 3,712 Bytes
01b8e8e
4107940
dd7488f
01b8e8e
39503cb
01b8e8e
dd7488f
01b8e8e
 
39503cb
101be32
39503cb
 
 
01b8e8e
f65e26a
 
 
 
6c3736e
 
 
 
 
f65e26a
 
 
6c3736e
39503cb
01b8e8e
dd7488f
01b8e8e
39503cb
dd7488f
 
39503cb
 
01b8e8e
 
 
 
 
 
39503cb
101be32
01b8e8e
 
39503cb
01b8e8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39503cb
01b8e8e
39503cb
1b47089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4107940
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
from interface.utils import get_pipelines, extract_text_from_url, extract_text_from_file
from interface.draw_pipelines import get_pipeline_graph


def component_select_pipeline(container):
    pipeline_names, pipeline_funcs = get_pipelines()
    with container:
        selected_pipeline = st.selectbox(
            "Select pipeline",
            pipeline_names,
            index=pipeline_names.index("Keyword Search")
            if "Keyword Search" in pipeline_names
            else 0,
        )
        if (
            st.session_state["pipeline"] is None
            or st.session_state["pipeline"]["name"] != selected_pipeline
        ):
            (
                search_pipeline,
                index_pipeline,
            ) = pipeline_funcs[pipeline_names.index(selected_pipeline)]()
            st.session_state["pipeline"] = {
                "name": selected_pipeline,
                "search_pipeline": search_pipeline,
                "index_pipeline": index_pipeline,
            }


def component_show_pipeline(pipeline):
    """Draw the pipeline"""
    with st.expander("Show pipeline"):
        fig = get_pipeline_graph(pipeline)
        st.plotly_chart(fig, use_container_width=True)


def component_show_search_result(container, results):
    with container:
        for idx, document in enumerate(results):
            st.markdown(f"### Match {idx+1}")
            st.markdown(f"**Text**: {document['text']}")
            st.markdown(f"**Document**: {document['id']}")
            if document["score"] is not None:
                st.markdown(f"**Score**: {document['score']:.3f}")
            st.markdown("---")


def component_text_input(container):
    """Draw the Text Input widget"""
    with container:
        texts = []
        doc_id = 1
        with st.expander("Enter documents"):
            while True:
                text = st.text_input(f"Document {doc_id}", key=doc_id)
                if text != "":
                    texts.append({"text": text})
                    doc_id += 1
                    st.markdown("---")
                else:
                    break
        corpus = [
            {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(texts)
        ]
        return corpus


def component_article_url(container):
    """Draw the Article URL widget"""
    with container:
        urls = []
        doc_id = 1
        with st.expander("Enter URLs"):
            while True:
                url = st.text_input(f"URL {doc_id}", key=doc_id)
                if url != "":
                    urls.append({"text": extract_text_from_url(url)})
                    doc_id += 1
                    st.markdown("---")
                else:
                    break
        corpus = [
            {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(urls)
        ]
        return corpus


def component_file_input(container):
    """Draw the extract text from file widget"""
    with container:
        files = []
        doc_id = 1
        with st.expander("Enter Files"):
            while True:
                file = st.file_uploader("Upload a .txt, .pdf, .csv file", key=doc_id)
                if file != None:
                    extracted_text = extract_text_from_file(file)
                    if extracted_text != None:
                        files.append({"text": extracted_text})
                        doc_id += 1
                        st.markdown("---")
                    else:
                        break
                else:
                    break
        corpus = [
            {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(files)
        ]
        return corpus