File size: 2,789 Bytes
549dce5 1e6d996 feeb9a7 7a9e2a5 feeb9a7 1460b1f 424b9ad 3a495a2 424b9ad 7670068 3a495a2 1e6d996 424b9ad feeb9a7 424b9ad c13a858 9ee17c5 c13a858 feeb9a7 424b9ad 40e5f2c 7a9e2a5 40e5f2c 1e6d996 8eec1ee 40e5f2c 7bcf03e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import streamlit as st
from llama_index.llms.gemini import Gemini
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI
from llama_index.core import (
VectorStoreIndex,
Settings,
)
from streamlit_pdf_viewer import pdf_viewer
# Global configurations
from llama_index.core import set_global_handler
set_global_handler("langfuse")
st.set_page_config(layout="wide")
with st.sidebar:
st.title('Document Summarization and QA System')
# st.markdown('''
# ## About this application
# Upload a pdf to ask questions about it. This retrieval-augmented generation (RAG) workflow uses:
# - [Streamlit](https://streamlit.io/)
# - [LlamaIndex](https://docs.llamaindex.ai/en/stable/)
# - [OpenAI](https://platform.openai.com/docs/models)
# ''')
# st.write('Made by ***Nate Mahynski***')
# st.write('nathan.mahynski@nist.gov')
# Select Provider
provider = st.selectbox(
label="Select LLM Provider",
options=['google', 'huggingface', 'mistralai', 'openai'],
index=0
)
# Select LLM
if provider == 'google':
llm_list = ['gemini']
elif provider == 'huggingface':
llm_list = []
elif provider == 'mistralai':
llm_list =[]
elif provider == 'openai':
llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o']
else:
llm_list = []
llm_name = st.selectbox(
label="Select LLM Model",
options=llm_list,
index=0
)
# Temperature
temperature = st.slider(
"Temperature",
min_value=0.0,
max_value=1.0,
value=0.0,
step=0.05,
)
max_output_tokens = 4096
# Create LLM
if provider == 'openai':
llm = OpenAI(
model=llm_name,
temperature=temperature,
max_tokens=max_tokens
)
# Global tokenization needs to be consistent with LLM
# https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
Settings.num_output = max_tokens
Settings.context_window = 4096 # max possible
# Enter Token
token = st.text_input(
"Enter your token",
value=None
)
uploaded_file = st.file_uploader(
"Choose a PDF file to upload",
type=['pdf'],
accept_multiple_files=False
)
if uploaded_file is not None:
# Parse the file
pass
col1, col2 = st.columns(2)
with col1:
pass
with col2:
if uploaded_file is not None:
# Display the pdf
bytes_data = uploaded_file.getvalue()
pdf_viewer(input=bytes_data, width=700) |