|
import tempfile |
|
import os |
|
import tiktoken |
|
import streamlit as st |
|
|
|
from llama_index.core import ( |
|
VectorStoreIndex, |
|
Settings, |
|
) |
|
|
|
from llama_parse import LlamaParse |
|
from streamlit_pdf_viewer import pdf_viewer |
|
|
|
class MistralTokens: |
|
""" |
|
Returns tokens for MistralAI models. |
|
|
|
See: https://docs.mistral.ai/guides/tokenization/ |
|
""" |
|
def __init__(self, llm_name): |
|
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer |
|
if 'open-mistral-nemo' in llm_name: |
|
self.tokenizer = MistralTokenizer.v3(is_tekken=True) |
|
else: |
|
|
|
self.tokenizer = MistralTokenizer.from_model(llm_name) |
|
|
|
def __call__(self, input): |
|
"""This returns all the tokens indices in a list since LlamaIndex seems to count by calling `len()` on the tokenizer function.""" |
|
from mistral_common.protocol.instruct.messages import UserMessage |
|
from mistral_common.protocol.instruct.request import ChatCompletionRequest |
|
|
|
return self.tokenizer.encode_chat_completion( |
|
ChatCompletionRequest( |
|
tools=[], |
|
messages=[ |
|
UserMessage(content=input) |
|
] |
|
) |
|
).tokens |
|
|
|
class GeminiTokens: |
|
""" |
|
Returns tokens for Gemini models. |
|
|
|
See: https://medium.com/google-cloud/counting-gemini-text-tokens-locally-with-the-vertex-ai-sdk-78979fea6244 |
|
""" |
|
def __init__(self, llm_name): |
|
from vertexai.preview import tokenization |
|
self.tokenizer = tokenization.get_tokenizer_for_model(llm_name) |
|
|
|
def __call__(self, input): |
|
"""This returns all the tokens in a list since LlamaIndex seems to count by calling `len()` on the tokenizer function.""" |
|
tokens = [] |
|
for list in self.tokenizer.compute_tokens(input).token_info_list: |
|
tokens += list.tokens |
|
return tokens |
|
|
|
def main(): |
|
with st.sidebar: |
|
st.title('Document Summarization and QA System') |
|
|
|
|
|
provider = st.selectbox( |
|
label="Select LLM Provider", |
|
options=['google', 'huggingface', 'mistralai', 'openai'], |
|
index=3 |
|
) |
|
|
|
|
|
if provider == 'google': |
|
llm_list = ['gemini-1.0-pro', 'gemini-1.5-flash', 'gemini-1.5-pro'] |
|
elif provider == 'huggingface': |
|
llm_list = [] |
|
elif provider == 'mistralai': |
|
llm_list = ["mistral-large-latest", "open-mistral-nemo-latest"] |
|
elif provider == 'openai': |
|
llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o', 'gpt-4o-mini'] |
|
else: |
|
llm_list = [] |
|
|
|
if provider == 'huggingface': |
|
llm_name = st.text_input( |
|
"Enter LLM namespace/model-name", |
|
value="HuggingFaceH4/zephyr-7b-alpha", |
|
) |
|
|
|
|
|
embed_name = st.text_input( |
|
label="Enter embedding namespace/model-name", |
|
value="BAAI/bge-small-en-v1.5", |
|
) |
|
else: |
|
llm_name = st.selectbox( |
|
label="Select LLM Model", |
|
options=llm_list, |
|
index=0 |
|
) |
|
|
|
|
|
temperature = st.slider( |
|
"Temperature", |
|
min_value=0.0, |
|
max_value=1.0, |
|
value=0.0, |
|
step=0.05, |
|
) |
|
|
|
|
|
parse_key = st.text_input( |
|
"Enter your LlamaParse API Key", |
|
value=None |
|
) |
|
|
|
|
|
llm_key = st.text_input( |
|
"Enter your LLM provider API Key", |
|
value=None, |
|
) |
|
|
|
|
|
|
|
|
|
if llm_key is not None: |
|
if provider == 'google': |
|
from llama_index.llms.gemini import Gemini |
|
from llama_index.embeddings.gemini import GeminiEmbedding |
|
max_output_tokens = 8192 |
|
|
|
os.environ['GOOGLE_API_KEY'] = str(llm_key) |
|
Settings.llm = Gemini( |
|
model=f"models/{llm_name}", |
|
token=os.environ.get("GOOGLE_API_KEY"), |
|
temperature=temperature, |
|
max_tokens=max_output_tokens |
|
) |
|
Settings.tokenizer = GeminiTokens(llm_name) |
|
Settings.num_output = max_output_tokens |
|
Settings.embed_model = GeminiEmbedding( |
|
model_name="models/text-embedding-004", api_key=os.environ.get("GOOGLE_API_KEY") |
|
) |
|
if llm_name == 'gemini-1.0-pro': |
|
total_token_limit = 32760 |
|
else: |
|
total_token_limit = 1e6 |
|
Settings.context_window = total_token_limit - max_output_tokens |
|
elif provider == 'huggingface': |
|
if llm_name is not None and embed_name is not None: |
|
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI |
|
from llama_index.embeddings.huggingface import HuggingFaceInferenceAPIEmbedding |
|
from transformers import AutoTokenizer |
|
|
|
max_output_tokens = 2048 |
|
|
|
os.environ['HFTOKEN'] = str(llm_key) |
|
Settings.llm = HuggingFaceInferenceAPI( |
|
model_name=llm_name, |
|
token=os.environ.get("HFTOKEN"), |
|
temperature=temperature, |
|
max_tokens=max_output_tokens |
|
) |
|
Settings.tokenizer = AutoTokenizer.from_pretrained( |
|
llm_name, |
|
token=os.environ.get("HFTOKEN"), |
|
) |
|
Settings.num_output = max_output_tokens |
|
Settings.embed_model = HuggingFaceInferenceAPIEmbedding( |
|
model_name=embed_name |
|
) |
|
Settings.context_window = 4096 |
|
elif provider == 'mistralai': |
|
from llama_index.llms.mistralai import MistralAI |
|
from llama_index.embeddings.mistralai import MistralAIEmbedding |
|
max_output_tokens = 8192 |
|
|
|
os.environ['MISTRAL_API_KEY'] = str(llm_key) |
|
Settings.llm = MistralAI( |
|
model=llm_name, |
|
temperature=temperature, |
|
max_tokens=max_output_tokens, |
|
random_seed=42, |
|
safe_mode=True |
|
) |
|
Settings.tokenizer = MistralTokens(llm_name) |
|
Settings.num_output = max_output_tokens |
|
Settings.embed_model = MistralAIEmbedding( |
|
model_name="mistral-embed", |
|
api_key=os.environ.get("MISTRAL_API_KEY") |
|
) |
|
Settings.context_window = 128000 |
|
elif provider == 'openai': |
|
from llama_index.llms.openai import OpenAI |
|
from llama_index.embeddings.openai import OpenAIEmbedding |
|
|
|
|
|
if llm_name == 'gpt-3.5-turbo': |
|
max_output_tokens = 4096 |
|
context_window = 16385 |
|
elif llm_name == 'gpt-4' : |
|
max_output_tokens = 8192 |
|
context_window = 8192 |
|
elif llm_name == 'gpt-4-turbo' |
|
max_output_tokens = 4096 |
|
context_window = 128000 |
|
elif llm_name == 'gpt-4o': |
|
max_output_tokens = 4096 |
|
context_window = 128000 |
|
elif llm_name == 'gpt-4o-mini': |
|
max_output_tokens = 16384 |
|
context_window = 128000 |
|
|
|
os.environ["OPENAI_API_KEY"] = str(llm_key) |
|
Settings.llm = OpenAI( |
|
model=llm_name, |
|
temperature=temperature, |
|
max_tokens=max_output_tokens |
|
) |
|
Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode |
|
Settings.num_output = max_output_tokens |
|
Settings.embed_model = OpenAIEmbedding() |
|
Settings.context_window = context_window |
|
else: |
|
raise NotImplementedError(f"{provider} is not supported yet") |
|
|
|
uploaded_file = st.file_uploader( |
|
"Choose a PDF file to upload", |
|
type=['pdf'], |
|
accept_multiple_files=False |
|
) |
|
|
|
parsed_document = None |
|
if uploaded_file is not None: |
|
|
|
parser = LlamaParse( |
|
api_key=parse_key, |
|
result_type="text" |
|
) |
|
|
|
|
|
temp_dir = tempfile.TemporaryDirectory() |
|
temp_filename = os.path.join(temp_dir.name, uploaded_file.name) |
|
with open(temp_filename, "wb") as f: |
|
f.write(uploaded_file.getvalue()) |
|
parsed_document = parser.load_data(temp_filename) |
|
temp_dir.cleanup() |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col2: |
|
tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",]) |
|
|
|
with tab1: |
|
if uploaded_file is not None: |
|
bytes_data = uploaded_file.getvalue() |
|
pdf_viewer(input=bytes_data, width=700) |
|
|
|
with tab2: |
|
if parsed_document is not None: |
|
st.write(parsed_document) |
|
|
|
with col1: |
|
st.markdown( |
|
""" |
|
# Instructions |
|
|
|
1. Obtain an [API Key](https://cloud.llamaindex.ai/api-key) from LlamaParse to parse your document. |
|
2. Obtain a similar API Key from your preferred LLM provider. Note, if you are using [Hugging Face](https://huggingface.co/models) you may need to request access to a model if it is gated. |
|
3. Make selections at the left and upload a document to use as context. |
|
4. Begin asking questions below! |
|
""" |
|
) |
|
|
|
st.divider() |
|
|
|
prompt_txt = 'You are a trusted scientific expert that only responds truthfully to inquiries. Summarize this document in a 3-5 sentences.' |
|
prompt = st.text_area( |
|
label="Enter your query.", |
|
key="prompt_widget", |
|
value=prompt_txt |
|
) |
|
|
|
run = st.button("Answer", type="primary") |
|
|
|
if parsed_document is not None and run: |
|
index = VectorStoreIndex.from_documents(parsed_document) |
|
query_engine = index.as_query_engine() |
|
response = query_engine.query(prompt) |
|
st.write(response.response) |
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
main() |