RAG / app.py
mahynski's picture
updated token names
e9802f8
raw
history blame
5.52 kB
import tempfile
import os
import tiktoken
import streamlit as st
from llama_index.llms.gemini import Gemini
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import (
VectorStoreIndex,
Settings,
)
from llama_parse import LlamaParse
from streamlit_pdf_viewer import pdf_viewer
def main():
with st.sidebar:
st.title('Document Summarization and QA System')
# st.markdown('''
# ## About this application
# Upload a pdf to ask questions about it. This retrieval-augmented generation (RAG) workflow uses:
# - [Streamlit](https://streamlit.io/)
# - [LlamaIndex](https://docs.llamaindex.ai/en/stable/)
# - [OpenAI](https://platform.openai.com/docs/models)
# ''')
# st.write('Made by ***Nate Mahynski***')
# st.write('nathan.mahynski@nist.gov')
# Select Provider
provider = st.selectbox(
label="Select LLM Provider",
options=['google', 'huggingface', 'mistralai', 'openai'],
index=3
)
# Select LLM
if provider == 'google':
llm_list = ['gemini']
elif provider == 'huggingface':
llm_list = []
elif provider == 'mistralai':
llm_list =[]
elif provider == 'openai':
llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o', 'gpt-4o-mini']
else:
llm_list = []
llm_name = st.selectbox(
label="Select LLM Model",
options=llm_list,
index=0
)
# Temperature
temperature = st.slider(
"Temperature",
min_value=0.0,
max_value=1.0,
value=0.0,
step=0.05,
)
max_output_tokens = 2048
# Enter LLM API Key
llm_key = st.text_input(
"Enter your LLM API Key",
value=None,
)
# Create LLM
# Global tokenization needs to be consistent with LLM for token counting
# https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
if llm_key is not None:
if provider == 'openai':
os.environ["OPENAI_API_KEY"] = str(llm_key)
Settings.llm = OpenAI(
model=llm_name,
temperature=temperature,
max_tokens=max_output_tokens
)
Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
Settings.num_output = max_output_tokens
Settings.context_window = 4096 # max possible
Settings.embed_model = OpenAIEmbedding()
elif provider == 'huggingface':
os.environ['HFTOKEN'] = str(llm_key)
# Enter Parsing API Key
parse_key = st.text_input(
"Enter your LlamaParse API Key",
value=None,
)
uploaded_file = st.file_uploader(
"Choose a PDF file to upload",
type=['pdf'],
accept_multiple_files=False
)
parsed_document = None
if uploaded_file is not None:
# Parse the file
parser = LlamaParse(
api_key=parse_key, # Can also be set in your env as LLAMA_CLOUD_API_KEY
result_type="text" # "markdown" and "text" are available
)
# Create a temporary directory to save the file then load and parse it
temp_dir = tempfile.TemporaryDirectory()
temp_filename = os.path.join(temp_dir.name, uploaded_file.name)
with open(temp_filename, "wb") as f:
f.write(uploaded_file.getvalue())
parsed_document = parser.load_data(temp_filename)
temp_dir.cleanup()
col1, col2 = st.columns(2)
with col1:
st.markdown(
"""
# Instructions
1. Obtain an [API Key](https://cloud.llamaindex.ai/api-key) from LlamaParse to parse your document.
2. Obtain a similar API Key from your preferred LLM provider.
3. Make selections at the left and upload a document to use a context.
4. Begin asking questions below!
"""
)
st.divider()
prompt_txt = 'Summarize this document in a 3-5 sentences.'
prompt = st.text_area(
label="Enter your query.",
key="prompt_widget",
value=prompt_txt
)
if parsed_document is not None:
index = VectorStoreIndex.from_documents(parsed_document)
query_engine = index.as_query_engine()
response = query_engine.query(prompt)
st.write(response.response)
with col2:
tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])
with tab1:
if uploaded_file is not None: # Display the pdf
bytes_data = uploaded_file.getvalue()
pdf_viewer(input=bytes_data, width=700)
with tab2:
if parsed_document is not None: # Showed the raw parsing result
st.write(parsed_document)
if __name__ == '__main__':
# Global configurations
from llama_index.core import set_global_handler
set_global_handler("langfuse")
st.set_page_config(layout="wide")
main()