Spaces:

emarron
/

elderscrolls_lore_bot

Sleeping

App Files Files Community

emar commited on 18 days ago

Commit

94f0475

•

1 Parent(s): 3d9b1a5

updated stuff switched to a smaller model

Browse files

Files changed (5) hide show

app.py +58 -40
requirements.txt +11 -3
storage/default__vector_store.json +1 -1
storage/docstore.json +1 -1
storage/index_store.json +1 -1

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import gradio as gr
-import torch
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import (
     VectorStoreIndex,
@@ -8,59 +8,77 @@ from llama_index.core import (
     StorageContext,
     load_index_from_storage, Settings,
 )
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 hf_token = os.getenv('HF_TOKEN')
-# Configure the settings
-Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
-# Load the Llama 3 model and tokenizer
-model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
-model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token, torch_dtype=torch.float16)
-# Initialize the Llama 3 pipeline
-llama3_pipeline = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    device=0 if torch.cuda.is_available() else -1,
 )
-# Path to your local corpus directory
-PERSIST_DIR = './storage'
-# Load the existing index
-storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
-index = load_index_from_storage(storage_context)
 query_engine = index.as_query_engine()
-def chatbot_response(user_input):
-    # Retrieve context from the vector store
-    context = query_engine.query(user_input)
-    context_str = str(context)
-    # Combine user input with retrieved context
-    combined_input = f"{context_str}\n\nUser: {user_input}\nAssistant:"
-    # Generate a response using the Llama 3 pipeline
-    outputs = llama3_pipeline(
-        combined_input,
-        max_new_tokens=256,
-        eos_token_id=tokenizer.eos_token_id,
-        do_sample=True,
-        temperature=0.6,
-        top_p=0.9,
-    )
-    assistant_response = outputs[0]["generated_text"].split("Assistant:")[1].strip()
-    return assistant_response
-# Create a Gradio interface
-interface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Elder Scrolls Lore Chatbox - This one sucks more than the local and chatgpt one because of the way the data is passed.")
 # Launch the interface
 if __name__ == "__main__":
-    interface.launch()

 import os
 import gradio as gr
+from dotenv import load_dotenv
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import (
     VectorStoreIndex,
     StorageContext,
     load_index_from_storage, Settings,
 )
+from llama_index.llms.huggingface import HuggingFaceLLM
+from transformers import BitsAndBytesConfig
+load_dotenv()
 hf_token = os.getenv('HF_TOKEN')
+# Path to your local corpus directory
+PERSIST_DIR = './storage'
+corpus_directory = 'articles'
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    # bnb_4bit_compute_dtype=torch.float16,
+    # bnb_4bit_quant_type="nf4",
+    # bnb_4bit_use_double_quant=True,
 )
+# Configure the settings
+Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
+Settings.llm = HuggingFaceLLM(
+    model_name="microsoft/Phi-3-small-8k-instruct",
+    tokenizer_name="microsoft/Phi-3-small-8k-instruct",
+    context_window=3900,
+    max_new_tokens=500,
+    model_kwargs={"quantization_config": quantization_config},
+    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
+    # messages_to_prompt=messages_to_prompt,
+    # completion_to_prompt=completion_to_prompt,
+    device_map="auto",
+)
+if not os.path.exists(PERSIST_DIR):
+    # load the documents and create the index
+    documents = SimpleDirectoryReader(corpus_directory).load_data()
+    index = VectorStoreIndex.from_documents(documents)
+    # store it for later
+    index.storage_context.persist(persist_dir=PERSIST_DIR)
+else:
+    # load the existing index
+    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
+    index = load_index_from_storage(storage_context)
 query_engine = index.as_query_engine()
+# def chat():
+#     print("Chatbot is ready. Type 'exit' to end the conversation.")
+#     while True:
+#         user_input = input("You: ")
+#         if user_input.lower() == 'exit':
+#             print("Ending the chat. Goodbye!")
+#             break
+#         response = query_engine.query(user_input)
+#         print(f"Chatbot: {response}")
+def chatbot_response(message, history):
+    response = query_engine.query(message)
+    return str(response)
+iface = gr.ChatInterface(
+    fn=chatbot_response,
+    title="UESP Lore Chatbot",
+    description="Ask questions about The Elder Scrolls lore!",
+    examples=["Who is Vivec?", "Tell me about the Oblivion Crisis", "Who is King Edward?"],
+    cache_examples=True,
+)
 # Launch the interface
 if __name__ == "__main__":
+    # chat()
+    iface.launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,14 @@
 llama_index
-llama-index-llms-ollama
 llama-index-embeddings-huggingface
 gradio
-torch
-transformers

+python-dotenv~=1.0.1
 llama_index
 llama-index-embeddings-huggingface
+llama_index-llms-huggingface
+torch~=2.3.1
+transformers~=4.41.2
+numpy~=1.23.2
+openai~=1.35.3
+scikit-learn~=1.5.0
+requests~=2.32.3
+protobuf==3.20.1
 gradio
+bitsandbytes
+flash-attn==0.2.4

storage/default__vector_store.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0da22f26551b6605b28f9262e36c0952d1d907c2bc2170aa31ee6d923087650
 size 200618254

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b7f240d19ce80f9b2cdc3fe4819dbf024b823cfdccf53af1ad904d9a629e66e
 size 200618254

storage/docstore.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b99f747606c5b6661654f45b174819907ade179ea87d7b160e296a5ee93d28d9
 size 55587608

 version https://git-lfs.github.com/spec/v1
+oid sha256:135f2765537408ee1299e62888e82f2f07d42a00e8dc97dadd089ae46c17f64f
 size 55587608

storage/index_store.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47cb58780a732145dd0bf65a2f59d40e8c0efe79bee17dbae61b860edba853ac
 size 958771

 version https://git-lfs.github.com/spec/v1
+oid sha256:407c0a3877b2d21c624d4aadcce49644edb452fe2b917c39cdce9a630e5ef0d5
 size 958771