File size: 5,932 Bytes
f5a20b7 f560388 7c1c2ae f560388 f5a20b7 f560388 0675d90 f560388 0675d90 1eece80 0675d90 f560388 3b77f3a 2ecca1e 451d492 2ecca1e 3149505 a692993 db0606a a692993 3149505 f5a20b7 3db05f5 edb4bd4 3db05f5 f5a20b7 9446adc f8e69ac 451d492 9446adc f560388 bae639d 7c1c2ae ecab2ea 7c1c2ae 1f08ed4 f560388 d896bfe 4e18d60 f560388 d896bfe f560388 d896bfe f560388 d896bfe f2d0673 d896bfe 9446adc 451d492 f8e69ac 9446adc f8e69ac 9446adc f8e69ac b7de3e2 d896bfe b7de3e2 2741dff b7de3e2 f2d0673 d896bfe b7de3e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import math
import os
from datetime import datetime
import openai
import PyPDF2
import streamlit as st
from openai import OpenAI
from helper.utils import *
st.set_page_config(layout="wide", page_title="Document Search using QIMπ€π")
st.header("Document Search using Quantized Influence Measure (QIM)π€π")
st.write("---")
# Streamlit sidebar setup for user interface
with st.sidebar:
# Create an expandable instruction manual section in the sidebar
with st.expander("Instruction Manual π"):
# Display the instruction manual for the Document Data Chatbot in a formatted markdown
st.markdown(
"""
# Document Search App Instruction Manual ππ€
Welcome to the Document Search App! This guide will help you quickly start using the app to find information in your documents.
## Quick Start Guide
1. **Upload Document**: Click on the "Upload documents" button in the sidebar and select your PDF or text files. Multiple files can be uploaded at once.
2. **Enter Keywords**: After your documents are uploaded, use the chat input at the bottom of the app to type your query. For example, you could type keywords or questions related to the content you're interested in.
3. **Review Results**: Hit 'Enter' to submit your query. The app will process your input and display the most relevant information from your documents in the form of a table right within the chat interface.
## Credits
This app (URL [here](https://huggingface.co/spaces/eagle0504/document-search-q-series)) was created by Yiqiao Yin. For more about his work, visit his [website](https://www.y-yin.io/) or connect with him on [LinkedIn](https://www.linkedin.com/in/yiqiaoyin/).
Thank you for using the Document Search App! We hope it serves your information retrieval needs effectively. ππ
"""
)
# File uploader widget allowing users to upload text and PDF documents
uploaded_files = st.file_uploader(
"Upload documents", accept_multiple_files=True, type=["txt", "pdf"]
)
# Inform the user how many documents have been loaded
st.success(f"{len(uploaded_files)} document(s) loaded...")
# Chunk size
chunk_size_input = st.number_input(
"Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):",
value=2,
step=1,
)
# Quantization
q_levels = st.number_input(
"Insert an integer for levels of quantization:",
value=2,
step=1,
min_value=2,
max_value=31,
)
# Input filter
top_n = st.number_input(
"Insert a number (top n rows to be selected):", value=3, step=1
)
# Select FM
option = st.selectbox(
"Which foundational model would you like?", ("GPT4", "LLAMA3", "LLAMA2")
)
# Clear button
clear_button = st.sidebar.button("Clear Conversation", key="clear")
# Credit
current_year = current_year() # This will print the current year
st.markdown(
f"""
<h6 style='text-align: left;'>Copyright Β© 2010-{current_year} Present <a href="https://www.y-yin.io/">Yiqiao Yin</a></h6>
""",
unsafe_allow_html=True,
)
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Reset everything
if clear_button:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Check if any files have been uploaded
if uploaded_files is None:
# Display a message prompting the user to upload files
st.info("Upload files to analyze")
elif uploaded_files:
with st.spinner("Wait for it... π€"):
# Process the uploaded files to extract text and source information
# textify_output = read_and_textify(uploaded_files, chunk_size=chunk_size_input)
textify_output = read_and_textify_advanced(uploaded_files, chunk_size=chunk_size_input)
# Separate the output into documents (text) and their corresponding sources
documents, sources = textify_output
# Call the function
query_database = list_to_nums(documents)
# React to user input
if prompt := st.chat_input("What is up?"):
# Display user message in chat message container
st.chat_message("user").markdown(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Create reference table
refs_tab = query_search(
prompt,
documents,
query_database,
sources,
q_levels,
)
refs_tab = refs_tab.head(math.ceil(top_n))
result = refs_tab
# Call FM
content = " ".join(list(result.sentences))
custom_prompt = f"""
Answer the question: {prompt}
Use the following information: {content}
"""
if option == "GPT4":
response = call_gpt(prompt, content)
elif option =="LLAMA2":
response = call_llama2(custom_prompt)
else:
response = call_llama(custom_prompt)
# Display assistant response in chat message container
with st.chat_message("assistant"):
st.write(response)
with st.expander("See reference:"):
st.table(result)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
|