import spaces import gradio as gr from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core import ( StorageContext, load_index_from_storage, Settings, PromptHelper ) from llama_index.core.indices.vector_store import VectorIndexRetriever from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.core.postprocessor import SentenceTransformerRerank, SimilarityPostprocessor from llama_index.llms.huggingface import HuggingFaceLLM import torch PERSIST_DIR = './storage' # Configure the settings DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu") Settings.llm = HuggingFaceLLM( model_name="meta-llama/Meta-Llama-3-8B-Instruct", tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct", context_window=2048, max_new_tokens=256, generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95}, device_map="auto", ) storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) index = load_index_from_storage(storage_context) # prompt_helper = PromptHelper( # context_window=4096, # num_output=512, # chunk_overlap_ratio=0.1, # chunk_size_limit=None # ) # retriever = VectorIndexRetriever( # index=index, # similarity_top_k=5, # ) # query_engine = RetrieverQueryEngine.from_args( # retriever, # node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)], # prompt_helper=prompt_helper # ) rerank = SentenceTransformerRerank( model="BAAI/bge-reranker-large", top_n=5 # Note here ) query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_postprocessors=[rerank]) # def chatbot_response(message, history): # # Add a custom prompt template # prompt = f"Based on the Elder Scrolls lore, please answer the following question:\n\n{message}\n\nAnswer:" # response = query_engine.query(prompt) # return str(response) @spaces.GPU def chatbot_response(message, history): response = query_engine.query(message) return str(response) iface = gr.ChatInterface( fn=chatbot_response, title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct (currently) It works 'okay'", description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore", examples=["Who is Zaraphus?", "What is the relation between Vivec and Chim?", "What is the Lunar Lorkhan?"], cache_examples=True, ) if __name__ == "__main__": iface.launch()