Spaces:

mahynski
/

RAG

Sleeping

RAG

File size: 2,789 Bytes

549dce5
 
1e6d996
 
 
 
 
feeb9a7
 
 
 
 
7a9e2a5
 
feeb9a7
 
 
1460b1f
 
424b9ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a495a2
424b9ad
 
 
 
7670068
3a495a2
1e6d996
 
 
 
 
424b9ad
 
 
 
feeb9a7
424b9ad
 
 
 
 
c13a858
 
9ee17c5
c13a858
 
 
 
 
 
feeb9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424b9ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40e5f2c
7a9e2a5
40e5f2c
1e6d996
8eec1ee
40e5f2c
 
 
7bcf03e

import streamlit as st

from llama_index.llms.gemini import Gemini
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI

from llama_index.core import (
    VectorStoreIndex,
    Settings,
)

from streamlit_pdf_viewer import pdf_viewer

# Global configurations
from llama_index.core import set_global_handler
set_global_handler("langfuse")
st.set_page_config(layout="wide")

with st.sidebar:
    st.title('Document Summarization and QA System')
    # st.markdown('''
    # ## About this application
    # Upload a pdf to ask questions about it. This retrieval-augmented generation (RAG) workflow uses:
    # - [Streamlit](https://streamlit.io/)
    # - [LlamaIndex](https://docs.llamaindex.ai/en/stable/)
    # - [OpenAI](https://platform.openai.com/docs/models)
    # ''')

    # st.write('Made by ***Nate Mahynski***')
    # st.write('nathan.mahynski@nist.gov')

    # Select Provider
    provider = st.selectbox(
        label="Select LLM Provider",
        options=['google', 'huggingface', 'mistralai', 'openai'],
        index=0
    )

    # Select LLM
    if provider == 'google':
        llm_list = ['gemini']
    elif provider == 'huggingface':
        llm_list = []
    elif provider == 'mistralai':
        llm_list =[]
    elif provider == 'openai':
        llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o']
    else:
        llm_list = []

    llm_name = st.selectbox(
        label="Select LLM Model",
        options=llm_list,
        index=0
    )

    # Temperature
    temperature = st.slider(
        "Temperature",
        min_value=0.0, 
        max_value=1.0, 
        value=0.0, 
        step=0.05, 
    )

    max_output_tokens = 4096

    # Create LLM
    if provider == 'openai':
        llm = OpenAI(
            model=llm_name, 
            temperature=temperature,
            max_tokens=max_tokens
        )
        # Global tokenization needs to be consistent with LLM
        # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
        Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
        Settings.num_output = max_tokens
        Settings.context_window = 4096 # max possible
    

    # Enter Token
    token = st.text_input(
        "Enter your token",
        value=None
    )

    uploaded_file = st.file_uploader(
        "Choose a PDF file to upload", 
        type=['pdf'], 
        accept_multiple_files=False
    )

    if uploaded_file is not None:
        # Parse the file
        pass

col1, col2 = st.columns(2)

with col1:
    pass

with col2:
    if uploaded_file is not None:
        # Display the pdf
        bytes_data = uploaded_file.getvalue()
        pdf_viewer(input=bytes_data, width=700)