Spaces:

mahynski
/

RAG

Running

File size: 7,022 Bytes

import tempfile
import os
import tiktoken
import streamlit as st

from llama_index.llms.gemini import Gemini
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI 
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding, HuggingFaceInferenceAPIEmbedding

from llama_index.core import (
    VectorStoreIndex,
    Settings,
)

from llama_parse import LlamaParse

from transformers import AutoTokenizer

from streamlit_pdf_viewer import pdf_viewer

MAX_OUTPUT_TOKENS = 2048  

def main():
    with st.sidebar:
        st.title('Document Summarization and QA System')

        # Select Provider
        provider = st.selectbox(
            label="Select LLM Provider",
            options=['google', 'huggingface', 'mistralai', 'openai'],
            index=3
        )

        # Select LLM
        if provider == 'google':
            llm_list = ['gemini']
        elif provider == 'huggingface':
            llm_list = []
        elif provider == 'mistralai':
            llm_list =[]
        elif provider == 'openai':
            llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o', 'gpt-4o-mini']
        else:
            llm_list = []

        if provider == 'huggingface':
            llm_name = st.text_input(
                "Enter LLM namespace/model-name",
                value="microsoft/Phi-3-mini-4k-instruct",
            )

            # Also give the user the option for different embedding models, too
            embed_name = st.text_input(
                label="Enter embedding namespace/model-name",
                value="BAAI/bge-small-en-v1.5",
            )
        else:
            llm_name = st.selectbox(
                label="Select LLM Model",
                options=llm_list,
                index=0
            )

        # Temperature
        temperature = st.slider(
            "Temperature",
            min_value=0.0, 
            max_value=1.0, 
            value=0.0, 
            step=0.05, 
        )

        # Enter Parsing API Key
        parse_key = st.text_input(
            "Enter your LlamaParse API Key",
            value="llx-uxxwLr1gZmDibaHTl99ISQJtpLSjjfhgDvnosGxu92RdRlb7", # None
        )

        # Enter LLM API Key
        llm_key = st.text_input(
            "Enter your LLM provider API Key",
            value=None,
        )

        # Create LLM
        # Global tokenization needs to be consistent with LLM for token counting
        # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
        if llm_key is not None:
            if provider == 'openai':
                os.environ["OPENAI_API_KEY"] = str(llm_key)
                Settings.llm = OpenAI(
                    model=llm_name, 
                    temperature=temperature,
                    max_tokens=MAX_OUTPUT_TOKENS
                )
                Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
                Settings.num_output = MAX_OUTPUT_TOKENS
                Settings.embed_model = OpenAIEmbedding()
                Settings.context_window = 4096 # max possible
            elif provider == 'huggingface':
                if llm_name is not None and embed_name is not None:
                    os.environ['HFTOKEN'] = str(llm_key)
                    Settings.llm = HuggingFaceInferenceAPI(
                        model_name=llm_name, 
                        token=os.environ.get("HFTOKEN"),
                        temperature=temperature,
                        max_tokens=MAX_OUTPUT_TOKENS
                    )
                    Settings.tokenizer = AutoTokenizer.from_pretrained(
                        llm_name,
                        token=os.environ.get("HFTOKEN"),
                    )
                    Settings.num_output = MAX_OUTPUT_TOKENS
                    Settings.embed_model = HuggingFaceInferenceAPIEmbedding(
                        model_name=embed_name
                    )
                    # Settings.context_window = 4096 
            else:
                raise NotImplementedError(f"{provider} is not supported yet")

        uploaded_file = st.file_uploader(
            "Choose a PDF file to upload", 
            type=['pdf'], 
            accept_multiple_files=False
        )

        parsed_document = None
        if uploaded_file is not None:
            # Parse the file
            parser = LlamaParse(
                api_key=parse_key,  # Can also be set in your env as LLAMA_CLOUD_API_KEY
                result_type="text"  # "markdown" and "text" are available
            )

            # Create a temporary directory to save the file then load and parse it
            temp_dir = tempfile.TemporaryDirectory()
            temp_filename = os.path.join(temp_dir.name, uploaded_file.name)
            with open(temp_filename, "wb") as f:
                f.write(uploaded_file.getvalue())
            parsed_document = parser.load_data(temp_filename)
            temp_dir.cleanup()

    col1, col2 = st.columns(2)

    with col2:
        tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])

        with tab1:
            if uploaded_file is not None: # Display the pdf
                bytes_data = uploaded_file.getvalue()
                pdf_viewer(input=bytes_data, width=700)    
        
        with tab2:
            if parsed_document is not None: # Showed the raw parsing result
                st.write(parsed_document)

    with col1:
        st.markdown(
            """
            # Instructions

            1. Obtain an [API Key](https://cloud.llamaindex.ai/api-key) from LlamaParse to parse your document. 
            2. Obtain a similar API Key from your preferred LLM provider. Note, if you are using [Hugging Face](https://huggingface.co/models) you may need to request access to a model if it is gated.
            3. Make selections at the left and upload a document to use as context.
            4. Begin asking questions below!
            """
        )

        st.divider()

        prompt_txt = 'You are a trusted scientific expert that only responds truthfully to inquiries. Summarize this document in a 3-5 sentences.'
        prompt = st.text_area(
            label="Enter your query.",
            key="prompt_widget",
            value=prompt_txt
        )

        run = st.button("Answer", type="primary")

        if parsed_document is not None and run:
            index = VectorStoreIndex.from_documents(parsed_document)
            query_engine = index.as_query_engine()
            response = query_engine.query(prompt)
            st.write(response.response)

if __name__ == '__main__':
    # Global configurations
    # from llama_index.core import set_global_handler
    # set_global_handler("langfuse")
    # Also add API Key for this if using

    st.set_page_config(layout="wide")

    main()