File size: 5,932 Bytes
f5a20b7
f560388
7c1c2ae
f560388
 
 
 
 
 
 
 
f5a20b7
 
f560388
 
 
 
 
 
 
 
 
 
0675d90
f560388
0675d90
 
 
 
 
 
 
 
 
 
1eece80
0675d90
 
f560388
 
 
 
 
 
 
 
3b77f3a
 
 
2ecca1e
 
451d492
 
 
2ecca1e
 
3149505
 
a692993
db0606a
 
a692993
 
3149505
 
f5a20b7
3db05f5
edb4bd4
3db05f5
f5a20b7
9446adc
 
f8e69ac
451d492
9446adc
f560388
 
bae639d
7c1c2ae
 
 
 
ecab2ea
7c1c2ae
 
 
1f08ed4
f560388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d896bfe
 
4e18d60
 
f560388
d896bfe
 
f560388
d896bfe
 
f560388
d896bfe
 
 
 
 
 
 
f2d0673
 
 
 
 
 
 
 
 
d896bfe
 
9446adc
451d492
f8e69ac
 
 
 
 
9446adc
 
f8e69ac
 
9446adc
f8e69ac
b7de3e2
d896bfe
 
b7de3e2
2741dff
b7de3e2
f2d0673
d896bfe
b7de3e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import math
import os
from datetime import datetime

import openai
import PyPDF2
import streamlit as st
from openai import OpenAI

from helper.utils import *

st.set_page_config(layout="wide", page_title="Document Search using QIMπŸ€–πŸ“–")
st.header("Document Search using Quantized Influence Measure (QIM)πŸ€–πŸ“–")
st.write("---")


# Streamlit sidebar setup for user interface
with st.sidebar:
    # Create an expandable instruction manual section in the sidebar
    with st.expander("Instruction Manual πŸ“–"):
        # Display the instruction manual for the Document Data Chatbot in a formatted markdown
        st.markdown(
            """
            # Document Search App Instruction Manual πŸ“–πŸ€–
            
            Welcome to the Document Search App! This guide will help you quickly start using the app to find information in your documents.
            
            ## Quick Start Guide
            
            1. **Upload Document**: Click on the "Upload documents" button in the sidebar and select your PDF or text files. Multiple files can be uploaded at once.
            2. **Enter Keywords**: After your documents are uploaded, use the chat input at the bottom of the app to type your query. For example, you could type keywords or questions related to the content you're interested in.
            3. **Review Results**: Hit 'Enter' to submit your query. The app will process your input and display the most relevant information from your documents in the form of a table right within the chat interface.
            
            ## Credits
            
            This app (URL [here](https://huggingface.co/spaces/eagle0504/document-search-q-series)) was created by Yiqiao Yin. For more about his work, visit his [website](https://www.y-yin.io/) or connect with him on [LinkedIn](https://www.linkedin.com/in/yiqiaoyin/).
            
            Thank you for using the Document Search App! We hope it serves your information retrieval needs effectively. πŸš€πŸ“ˆ
            """
        )

    # File uploader widget allowing users to upload text and PDF documents
    uploaded_files = st.file_uploader(
        "Upload documents", accept_multiple_files=True, type=["txt", "pdf"]
    )

    # Inform the user how many documents have been loaded
    st.success(f"{len(uploaded_files)} document(s) loaded...")

    # Chunk size
    chunk_size_input = st.number_input(
        "Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):",
        value=2,
        step=1,
    )

    # Quantization
    q_levels = st.number_input(
        "Insert an integer for levels of quantization:",
        value=2,
        step=1,
        min_value=2,
        max_value=31,
    )

    # Input filter
    top_n = st.number_input(
        "Insert a number (top n rows to be selected):", value=3, step=1
    )

    # Select FM
    option = st.selectbox(
        "Which foundational model would you like?", ("GPT4", "LLAMA3", "LLAMA2")
    )

    # Clear button
    clear_button = st.sidebar.button("Clear Conversation", key="clear")

    # Credit
    current_year = current_year()  # This will print the current year
    st.markdown(
        f"""
            <h6 style='text-align: left;'>Copyright Β© 2010-{current_year} Present <a href="https://www.y-yin.io/">Yiqiao Yin</a></h6>
        """,
        unsafe_allow_html=True,
    )


# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []


# Reset everything
if clear_button:
    st.session_state.messages = []


# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])


# Check if any files have been uploaded
if uploaded_files is None:
    # Display a message prompting the user to upload files
    st.info("Upload files to analyze")

elif uploaded_files:
    with st.spinner("Wait for it... πŸ€”"):
        # Process the uploaded files to extract text and source information
        # textify_output = read_and_textify(uploaded_files, chunk_size=chunk_size_input)
        textify_output = read_and_textify_advanced(uploaded_files, chunk_size=chunk_size_input)

        # Separate the output into documents (text) and their corresponding sources
        documents, sources = textify_output

        # Call the function
        query_database = list_to_nums(documents)

        # React to user input
        if prompt := st.chat_input("What is up?"):
            # Display user message in chat message container
            st.chat_message("user").markdown(prompt)
            # Add user message to chat history
            st.session_state.messages.append({"role": "user", "content": prompt})

            # Create reference table
            refs_tab = query_search(
                prompt,
                documents,
                query_database,
                sources,
                q_levels,
            )
            refs_tab = refs_tab.head(math.ceil(top_n))
            result = refs_tab

            # Call FM
            content = " ".join(list(result.sentences))
            custom_prompt = f"""
                Answer the question: {prompt} 
                
                Use the following information: {content}
                """
            if option == "GPT4":
                response = call_gpt(prompt, content)
            elif option =="LLAMA2":
                response = call_llama2(custom_prompt)
            else:
                response = call_llama(custom_prompt)

            # Display assistant response in chat message container
            with st.chat_message("assistant"):
                st.write(response)
                with st.expander("See reference:"):
                    st.table(result)

            # Add assistant response to chat history
            st.session_state.messages.append({"role": "assistant", "content": response})