Spaces:
Sleeping
Sleeping
File size: 2,597 Bytes
da93e12 f082418 5dd7727 6c79873 981a527 6c79873 39596d4 64b21dd 5dd7727 94f0475 3d9b1a5 5dd7727 5561bd8 d829f59 94f0475 6c79873 9ac9011 86518f0 6c79873 25a6760 6c79873 39596d4 86518f0 39596d4 86518f0 39596d4 86518f0 39596d4 86518f0 39596d4 86518f0 e0e7586 94f0475 b0752bd 64b21dd b0752bd 3d9b1a5 64b21dd eebc9a6 64b21dd eebc9a6 64b21dd eebc9a6 d829f59 b0752bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import spaces
import gradio as gr
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import (
StorageContext,
load_index_from_storage, Settings, PromptHelper
)
from llama_index.core.indices.vector_store import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SentenceTransformerRerank, SimilarityPostprocessor
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
PERSIST_DIR = './storage'
# Configure the settings
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
Settings.llm = HuggingFaceLLM(
model_name="meta-llama/Meta-Llama-3-8B-Instruct",
tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
context_window=2048,
max_new_tokens=256,
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
device_map="auto",
)
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)
# prompt_helper = PromptHelper(
# context_window=4096,
# num_output=512,
# chunk_overlap_ratio=0.1,
# chunk_size_limit=None
# )
# retriever = VectorIndexRetriever(
# index=index,
# similarity_top_k=5,
# )
# query_engine = RetrieverQueryEngine.from_args(
# retriever,
# node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
# prompt_helper=prompt_helper
# )
rerank = SentenceTransformerRerank(
model="BAAI/bge-reranker-large", top_n=5 # Note here
)
query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_postprocessors=[rerank])
# def chatbot_response(message, history):
# # Add a custom prompt template
# prompt = f"Based on the Elder Scrolls lore, please answer the following question:\n\n{message}\n\nAnswer:"
# response = query_engine.query(prompt)
# return str(response)
@spaces.GPU
def chatbot_response(message, history):
response = query_engine.query(message)
return str(response)
iface = gr.ChatInterface(
fn=chatbot_response,
title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct (currently) It works 'okay'",
description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore",
examples=["Who is Zaraphus?", "What is the relation between Vivec and Chim?", "What is the Lunar Lorkhan?"],
cache_examples=True,
)
if __name__ == "__main__":
iface.launch()
|