Spaces:
Sleeping
Sleeping
import spaces | |
import gradio as gr | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
from llama_index.core import ( | |
StorageContext, | |
load_index_from_storage, Settings, PromptHelper | |
) | |
from llama_index.core.indices.vector_store import VectorIndexRetriever | |
from llama_index.core.query_engine import RetrieverQueryEngine | |
from llama_index.core.postprocessor import SentenceTransformerRerank, SimilarityPostprocessor | |
from llama_index.llms.huggingface import HuggingFaceLLM | |
import torch | |
PERSIST_DIR = './storage' | |
# Configure the settings | |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu") | |
Settings.llm = HuggingFaceLLM( | |
model_name="meta-llama/Meta-Llama-3-8B-Instruct", | |
tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct", | |
context_window=2048, | |
max_new_tokens=256, | |
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95}, | |
device_map="auto", | |
) | |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) | |
index = load_index_from_storage(storage_context) | |
# prompt_helper = PromptHelper( | |
# context_window=4096, | |
# num_output=512, | |
# chunk_overlap_ratio=0.1, | |
# chunk_size_limit=None | |
# ) | |
# retriever = VectorIndexRetriever( | |
# index=index, | |
# similarity_top_k=5, | |
# ) | |
# query_engine = RetrieverQueryEngine.from_args( | |
# retriever, | |
# node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)], | |
# prompt_helper=prompt_helper | |
# ) | |
rerank = SentenceTransformerRerank( | |
model="BAAI/bge-reranker-large", top_n=5 # Note here | |
) | |
query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_postprocessors=[rerank]) | |
# def chatbot_response(message, history): | |
# # Add a custom prompt template | |
# prompt = f"Based on the Elder Scrolls lore, please answer the following question:\n\n{message}\n\nAnswer:" | |
# response = query_engine.query(prompt) | |
# return str(response) | |
def chatbot_response(message, history): | |
response = query_engine.query(message) | |
return str(response) | |
iface = gr.ChatInterface( | |
fn=chatbot_response, | |
title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct (currently) It works 'okay'", | |
description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore", | |
examples=["Who is Zaraphus?", "What is the relation between Vivec and Chim?", "What is the Lunar Lorkhan?"], | |
cache_examples=True, | |
) | |
if __name__ == "__main__": | |
iface.launch() | |