Spaces:

emarron
/

elderscrolls_lore_bot

Sleeping

emar

revert to best working version

86518f0 8 days ago

No virus

2.6 kB

	import spaces
	import gradio as gr
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.core import (
	StorageContext,
	load_index_from_storage, Settings, PromptHelper
	)
	from llama_index.core.indices.vector_store import VectorIndexRetriever
	from llama_index.core.query_engine import RetrieverQueryEngine
	from llama_index.core.postprocessor import SentenceTransformerRerank, SimilarityPostprocessor
	from llama_index.llms.huggingface import HuggingFaceLLM
	import torch
	PERSIST_DIR = './storage'

	# Configure the settings
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


	Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")

	Settings.llm = HuggingFaceLLM(
	model_name="meta-llama/Meta-Llama-3-8B-Instruct",
	tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
	context_window=2048,
	max_new_tokens=256,
	generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
	device_map="auto",
	)

	storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
	index = load_index_from_storage(storage_context)

	# prompt_helper = PromptHelper(
	# context_window=4096,
	# num_output=512,
	# chunk_overlap_ratio=0.1,
	# chunk_size_limit=None
	# )

	# retriever = VectorIndexRetriever(
	# index=index,
	# similarity_top_k=5,
	# )

	# query_engine = RetrieverQueryEngine.from_args(
	# retriever,
	# node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
	# prompt_helper=prompt_helper
	# )

	rerank = SentenceTransformerRerank(
	model="BAAI/bge-reranker-large", top_n=5 # Note here
	)
	query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_postprocessors=[rerank])


	# def chatbot_response(message, history):
	# # Add a custom prompt template
	# prompt = f"Based on the Elder Scrolls lore, please answer the following question:\n\n{message}\n\nAnswer:"
	# response = query_engine.query(prompt)
	# return str(response)


	@spaces.GPU
	def chatbot_response(message, history):
	response = query_engine.query(message)
	return str(response)

	iface = gr.ChatInterface(
	fn=chatbot_response,
	title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct (currently) It works 'okay'",
	description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore",
	examples=["Who is Zaraphus?", "What is the relation between Vivec and Chim?", "What is the Lunar Lorkhan?"],
	cache_examples=True,
	)

	if __name__ == "__main__":
	iface.launch()