Spaces:

emarron
/

elderscrolls_lore_bot

Sleeping

App Files Files Community

emar commited on 17 days ago

Commit

25a6760

•

1 Parent(s): 4b94278

some modifications to hopefully fix gradio errors

Browse files

Files changed (1) hide show

app.py +23 -12

app.py CHANGED Viewed

@@ -1,22 +1,23 @@
 import spaces
 import gradio as gr
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.core import (
-    StorageContext,
-    load_index_from_storage, Settings,
-)
 from llama_index.llms.huggingface import HuggingFaceLLM
 import torch
 PERSIST_DIR = './storage'
 # Configure the settings
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 @spaces.GPU(duration=240)
 def setup():
-    Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
     Settings.llm = HuggingFaceLLM(
         model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
         tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
@@ -28,7 +29,7 @@ def setup():
 setup()
-# load the existing index
 @spaces.GPU
 def load_context():
     storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
@@ -36,21 +37,31 @@ def load_context():
     query_engine = index.as_query_engine()
     return query_engine
 @spaces.GPU
 def chatbot_response(message, history):
     response = query_engine.query(message)
     return str(response)
-query_engine = load_context()
 iface = gr.ChatInterface(
     fn=chatbot_response,
     title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
-    description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore  I am not paying to have Llama3 on here.",
     examples=["Who is Zaraphus?"],
     cache_examples=True,
 )

 import spaces
 import gradio as gr
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core import StorageContext, load_index_from_storage, Settings
 from llama_index.llms.huggingface import HuggingFaceLLM
 import torch
+from pydantic import BaseModel
 PERSIST_DIR = './storage'
 # Configure the settings
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Pydantic config to avoid protected namespace warning
+class Config(BaseModel):
+    model_config = {'protected_namespaces': ()}
 @spaces.GPU(duration=240)
 def setup():
+    Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device=DEVICE)
     Settings.llm = HuggingFaceLLM(
         model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
         tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
 setup()
+# Load the existing index
 @spaces.GPU
 def load_context():
     storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
     query_engine = index.as_query_engine()
     return query_engine
+query_engine = None
+def initialize_query_engine():
+    global query_engine
+    query_engine = load_context()
+# Initialize query engine at the start
+initialize_query_engine()
+# Chatbot response function
 @spaces.GPU
 def chatbot_response(message, history):
+    if query_engine is None:
+        initialize_query_engine()
     response = query_engine.query(message)
     return str(response)
+# Initialize Gradio interface
 iface = gr.ChatInterface(
     fn=chatbot_response,
     title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
+    description=(
+        "Low quality and extremely slow version of the ones you can find on the github page: "
+        "https://github.com/emarron/UESP-lore. I am not paying to have Llama3 on here."
+    ),
     examples=["Who is Zaraphus?"],
     cache_examples=True,
 )