|
import math |
|
import os |
|
from datetime import datetime |
|
|
|
import openai |
|
import PyPDF2 |
|
import streamlit as st |
|
from openai import OpenAI |
|
|
|
from helper.utils import * |
|
|
|
st.set_page_config(layout="wide", page_title="Document Search using QIMπ€π") |
|
st.header("Document Search using Quantized Influence Measure (QIM)π€π") |
|
st.write("---") |
|
|
|
|
|
|
|
with st.sidebar: |
|
|
|
with st.expander("Instruction Manual π"): |
|
|
|
st.markdown( |
|
""" |
|
# Document Search App Instruction Manual ππ€ |
|
|
|
Welcome to the Document Search App! This guide will help you quickly start using the app to find information in your documents. |
|
|
|
## Quick Start Guide |
|
|
|
1. **Upload Document**: Click on the "Upload documents" button in the sidebar and select your PDF or text files. Multiple files can be uploaded at once. |
|
2. **Enter Keywords**: After your documents are uploaded, use the chat input at the bottom of the app to type your query. For example, you could type keywords or questions related to the content you're interested in. |
|
3. **Review Results**: Hit 'Enter' to submit your query. The app will process your input and display the most relevant information from your documents in the form of a table right within the chat interface. |
|
|
|
## Credits |
|
|
|
This app (URL [here](https://huggingface.co/spaces/eagle0504/document-search-q-series)) was created by Yiqiao Yin. For more about his work, visit his [website](https://www.y-yin.io/) or connect with him on [LinkedIn](https://www.linkedin.com/in/yiqiaoyin/). |
|
|
|
Thank you for using the Document Search App! We hope it serves your information retrieval needs effectively. ππ |
|
""" |
|
) |
|
|
|
|
|
uploaded_files = st.file_uploader( |
|
"Upload documents", accept_multiple_files=True, type=["txt", "pdf"] |
|
) |
|
|
|
|
|
st.success(f"{len(uploaded_files)} document(s) loaded...") |
|
|
|
|
|
chunk_size_input = st.number_input( |
|
"Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):", |
|
value=2, |
|
step=1, |
|
) |
|
|
|
|
|
q_levels = st.number_input( |
|
"Insert an integer for levels of quantization:", |
|
value=2, |
|
step=1, |
|
min_value=2, |
|
max_value=31, |
|
) |
|
|
|
|
|
top_n = st.number_input( |
|
"Insert a number (top n rows to be selected):", value=3, step=1 |
|
) |
|
|
|
|
|
option = st.selectbox( |
|
"Which foundational model would you like?", ("GPT4", "LLAMA3") |
|
) |
|
|
|
|
|
clear_button = st.sidebar.button("Clear Conversation", key="clear") |
|
|
|
|
|
current_year = current_year() |
|
st.markdown( |
|
f""" |
|
<h6 style='text-align: left;'>Copyright Β© 2010-{current_year} Present <a href="https://www.y-yin.io/">Yiqiao Yin</a></h6> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
|
|
|
|
if clear_button: |
|
st.session_state.messages = [] |
|
|
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
|
|
if uploaded_files is None: |
|
|
|
st.info("Upload files to analyze") |
|
|
|
elif uploaded_files: |
|
with st.spinner("Wait for it... π€"): |
|
|
|
|
|
textify_output = read_and_textify_advanced(uploaded_files, chunk_size=chunk_size_input) |
|
|
|
|
|
documents, sources = textify_output |
|
|
|
|
|
query_database = list_to_nums(documents) |
|
|
|
|
|
if prompt := st.chat_input("What is up?"): |
|
|
|
st.chat_message("user").markdown(prompt) |
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
refs_tab = query_search( |
|
prompt, |
|
documents, |
|
query_database, |
|
sources, |
|
q_levels, |
|
) |
|
refs_tab = refs_tab.head(math.ceil(top_n)) |
|
result = refs_tab |
|
|
|
|
|
content = " ".join(list(result.sentences)) |
|
if option == "GPT4": |
|
response = call_gpt(prompt, content) |
|
else: |
|
response = call_llama( |
|
f""" |
|
Answer the question: {prompt} |
|
|
|
Use the following information: {content} |
|
""" |
|
) |
|
|
|
|
|
with st.chat_message("assistant"): |
|
st.write(response) |
|
with st.expander("See reference:"): |
|
st.table(result) |
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "content": response}) |
|
|