RAG / app.py
mahynski's picture
debug
dccc55e
raw
history blame
7.02 kB
import tempfile
import os
import tiktoken
import streamlit as st
from llama_index.llms.gemini import Gemini
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding, HuggingFaceInferenceAPIEmbedding
from llama_index.core import (
VectorStoreIndex,
Settings,
)
from llama_parse import LlamaParse
from transformers import AutoTokenizer
from streamlit_pdf_viewer import pdf_viewer
MAX_OUTPUT_TOKENS = 2048
def main():
with st.sidebar:
st.title('Document Summarization and QA System')
# Select Provider
provider = st.selectbox(
label="Select LLM Provider",
options=['google', 'huggingface', 'mistralai', 'openai'],
index=3
)
# Select LLM
if provider == 'google':
llm_list = ['gemini']
elif provider == 'huggingface':
llm_list = []
elif provider == 'mistralai':
llm_list =[]
elif provider == 'openai':
llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o', 'gpt-4o-mini']
else:
llm_list = []
if provider == 'huggingface':
llm_name = st.text_input(
"Enter LLM namespace/model-name",
value="microsoft/Phi-3-mini-4k-instruct",
)
# Also give the user the option for different embedding models, too
embed_name = st.text_input(
label="Enter embedding namespace/model-name",
value="BAAI/bge-small-en-v1.5",
)
else:
llm_name = st.selectbox(
label="Select LLM Model",
options=llm_list,
index=0
)
# Temperature
temperature = st.slider(
"Temperature",
min_value=0.0,
max_value=1.0,
value=0.0,
step=0.05,
)
# Enter Parsing API Key
parse_key = st.text_input(
"Enter your LlamaParse API Key",
value="llx-uxxwLr1gZmDibaHTl99ISQJtpLSjjfhgDvnosGxu92RdRlb7", # None
)
# Enter LLM API Key
llm_key = st.text_input(
"Enter your LLM provider API Key",
value=None,
)
# Create LLM
# Global tokenization needs to be consistent with LLM for token counting
# https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
if llm_key is not None:
if provider == 'openai':
os.environ["OPENAI_API_KEY"] = str(llm_key)
Settings.llm = OpenAI(
model=llm_name,
temperature=temperature,
max_tokens=MAX_OUTPUT_TOKENS
)
Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
Settings.num_output = MAX_OUTPUT_TOKENS
Settings.embed_model = OpenAIEmbedding()
Settings.context_window = 4096 # max possible
elif provider == 'huggingface':
if llm_name is not None and embed_name is not None:
os.environ['HFTOKEN'] = str(llm_key)
Settings.llm = HuggingFaceInferenceAPI(
model_name=llm_name,
token=os.environ.get("HFTOKEN"),
temperature=temperature,
max_tokens=MAX_OUTPUT_TOKENS
)
Settings.tokenizer = AutoTokenizer.from_pretrained(
llm_name,
token=os.environ.get("HFTOKEN"),
)
Settings.num_output = MAX_OUTPUT_TOKENS
Settings.embed_model = HuggingFaceInferenceAPIEmbedding(
model_name=embed_name
)
# Settings.context_window = 4096
else:
raise NotImplementedError(f"{provider} is not supported yet")
uploaded_file = st.file_uploader(
"Choose a PDF file to upload",
type=['pdf'],
accept_multiple_files=False
)
parsed_document = None
if uploaded_file is not None:
# Parse the file
parser = LlamaParse(
api_key=parse_key, # Can also be set in your env as LLAMA_CLOUD_API_KEY
result_type="text" # "markdown" and "text" are available
)
# Create a temporary directory to save the file then load and parse it
temp_dir = tempfile.TemporaryDirectory()
temp_filename = os.path.join(temp_dir.name, uploaded_file.name)
with open(temp_filename, "wb") as f:
f.write(uploaded_file.getvalue())
parsed_document = parser.load_data(temp_filename)
temp_dir.cleanup()
col1, col2 = st.columns(2)
with col2:
tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])
with tab1:
if uploaded_file is not None: # Display the pdf
bytes_data = uploaded_file.getvalue()
pdf_viewer(input=bytes_data, width=700)
with tab2:
if parsed_document is not None: # Showed the raw parsing result
st.write(parsed_document)
with col1:
st.markdown(
"""
# Instructions
1. Obtain an [API Key](https://cloud.llamaindex.ai/api-key) from LlamaParse to parse your document.
2. Obtain a similar API Key from your preferred LLM provider. Note, if you are using [Hugging Face](https://huggingface.co/models) you may need to request access to a model if it is gated.
3. Make selections at the left and upload a document to use as context.
4. Begin asking questions below!
"""
)
st.divider()
prompt_txt = 'You are a trusted scientific expert that only responds truthfully to inquiries. Summarize this document in a 3-5 sentences.'
prompt = st.text_area(
label="Enter your query.",
key="prompt_widget",
value=prompt_txt
)
run = st.button("Answer", type="primary")
if parsed_document is not None and run:
index = VectorStoreIndex.from_documents(parsed_document)
query_engine = index.as_query_engine()
response = query_engine.query(prompt)
st.write(response.response)
if __name__ == '__main__':
# Global configurations
# from llama_index.core import set_global_handler
# set_global_handler("langfuse")
# Also add API Key for this if using
st.set_page_config(layout="wide")
main()