File size: 2,789 Bytes
549dce5
 
1e6d996
 
 
 
 
feeb9a7
 
 
 
 
7a9e2a5
 
feeb9a7
 
 
1460b1f
 
424b9ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a495a2
424b9ad
 
 
 
7670068
3a495a2
1e6d996
 
 
 
 
424b9ad
 
 
 
feeb9a7
424b9ad
 
 
 
 
c13a858
 
9ee17c5
c13a858
 
 
 
 
 
feeb9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424b9ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40e5f2c
7a9e2a5
40e5f2c
1e6d996
8eec1ee
40e5f2c
 
 
7bcf03e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st

from llama_index.llms.gemini import Gemini
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI

from llama_index.core import (
    VectorStoreIndex,
    Settings,
)

from streamlit_pdf_viewer import pdf_viewer

# Global configurations
from llama_index.core import set_global_handler
set_global_handler("langfuse")
st.set_page_config(layout="wide")

with st.sidebar:
    st.title('Document Summarization and QA System')
    # st.markdown('''
    # ## About this application
    # Upload a pdf to ask questions about it. This retrieval-augmented generation (RAG) workflow uses:
    # - [Streamlit](https://streamlit.io/)
    # - [LlamaIndex](https://docs.llamaindex.ai/en/stable/)
    # - [OpenAI](https://platform.openai.com/docs/models)
    # ''')

    # st.write('Made by ***Nate Mahynski***')
    # st.write('nathan.mahynski@nist.gov')

    # Select Provider
    provider = st.selectbox(
        label="Select LLM Provider",
        options=['google', 'huggingface', 'mistralai', 'openai'],
        index=0
    )

    # Select LLM
    if provider == 'google':
        llm_list = ['gemini']
    elif provider == 'huggingface':
        llm_list = []
    elif provider == 'mistralai':
        llm_list =[]
    elif provider == 'openai':
        llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o']
    else:
        llm_list = []

    llm_name = st.selectbox(
        label="Select LLM Model",
        options=llm_list,
        index=0
    )

    # Temperature
    temperature = st.slider(
        "Temperature",
        min_value=0.0, 
        max_value=1.0, 
        value=0.0, 
        step=0.05, 
    )

    max_output_tokens = 4096

    # Create LLM
    if provider == 'openai':
        llm = OpenAI(
            model=llm_name, 
            temperature=temperature,
            max_tokens=max_tokens
        )
        # Global tokenization needs to be consistent with LLM
        # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
        Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
        Settings.num_output = max_tokens
        Settings.context_window = 4096 # max possible
    

    # Enter Token
    token = st.text_input(
        "Enter your token",
        value=None
    )

    uploaded_file = st.file_uploader(
        "Choose a PDF file to upload", 
        type=['pdf'], 
        accept_multiple_files=False
    )

    if uploaded_file is not None:
        # Parse the file
        pass

col1, col2 = st.columns(2)

with col1:
    pass

with col2:
    if uploaded_file is not None:
        # Display the pdf
        bytes_data = uploaded_file.getvalue()
        pdf_viewer(input=bytes_data, width=700)