|
import tempfile |
|
import os |
|
import streamlit as st |
|
|
|
from llama_index.llms.gemini import Gemini |
|
from llama_index.llms.huggingface import HuggingFaceLLM |
|
from llama_index.llms.mistralai import MistralAI |
|
from llama_index.llms.openai import OpenAI |
|
from llama_index.core import ( |
|
VectorStoreIndex, |
|
Settings, |
|
) |
|
|
|
from llama_parse import LlamaParse |
|
|
|
from streamlit_pdf_viewer import pdf_viewer |
|
|
|
|
|
from llama_index.core import set_global_handler |
|
set_global_handler("langfuse") |
|
st.set_page_config(layout="wide") |
|
|
|
with st.sidebar: |
|
st.title('Document Summarization and QA System') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
provider = st.selectbox( |
|
label="Select LLM Provider", |
|
options=['google', 'huggingface', 'mistralai', 'openai'], |
|
index=0 |
|
) |
|
|
|
|
|
if provider == 'google': |
|
llm_list = ['gemini'] |
|
elif provider == 'huggingface': |
|
llm_list = [] |
|
elif provider == 'mistralai': |
|
llm_list =[] |
|
elif provider == 'openai': |
|
llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o'] |
|
else: |
|
llm_list = [] |
|
|
|
llm_name = st.selectbox( |
|
label="Select LLM Model", |
|
options=llm_list, |
|
index=0 |
|
) |
|
|
|
|
|
temperature = st.slider( |
|
"Temperature", |
|
min_value=0.0, |
|
max_value=1.0, |
|
value=0.0, |
|
step=0.05, |
|
) |
|
|
|
max_output_tokens = 4096 |
|
|
|
|
|
if provider == 'openai': |
|
llm = OpenAI( |
|
model=llm_name, |
|
temperature=temperature, |
|
max_tokens=max_tokens |
|
) |
|
|
|
|
|
Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode |
|
Settings.num_output = max_tokens |
|
Settings.context_window = 4096 |
|
|
|
|
|
|
|
llm_token = st.text_input( |
|
"Enter your LLM token", |
|
value=None |
|
) |
|
|
|
|
|
parse_token = st.text_input( |
|
"Enter your LlamaParse token", |
|
value=None |
|
) |
|
|
|
uploaded_file = st.file_uploader( |
|
"Choose a PDF file to upload", |
|
type=['pdf'], |
|
accept_multiple_files=False |
|
) |
|
|
|
if uploaded_file is not None: |
|
|
|
temp_dir = tempfile.TemporaryDirectory() |
|
parser = LlamaParse( |
|
api_key=parse_token, |
|
result_type="text" |
|
) |
|
|
|
filename = os.path.join('./', uploaded_file.name) |
|
with open(filename, "wb") as f: |
|
f.write(uploaded_file.getvalue()) |
|
|
|
parsed_document = parser.load_data(filename) |
|
temp_dir.cleanup() |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.write(uploaded_file) |
|
st.write(parsed_document) |
|
|
|
with col2: |
|
if uploaded_file is not None: |
|
|
|
bytes_data = uploaded_file.getvalue() |
|
pdf_viewer(input=bytes_data, width=700) |