Spaces:

emarron
/

elderscrolls_lore_bot

Running on Zero

App Files Files Community

emar commited on 17 days ago

Commit

6c79873

•

1 Parent(s): 4b52a00

reduce to simplicity?

Browse files

Files changed (1) hide show

app.py +18 -42

app.py CHANGED Viewed

@@ -1,67 +1,43 @@
-import spaces
 import gradio as gr
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.core import StorageContext, load_index_from_storage, Settings
 from llama_index.llms.huggingface import HuggingFaceLLM
 import torch
-from pydantic import BaseModel
 PERSIST_DIR = './storage'
 # Configure the settings
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Pydantic config to avoid protected namespace warning
-class Config(BaseModel):
-    model_config = {'protected_namespaces': ()}
-# @spaces.GPU(duration=240)
-def setup():
-    Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device=DEVICE)
-    Settings.llm = HuggingFaceLLM(
-        model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-        tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-        context_window=2048,
-        max_new_tokens=256,
-        generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
-        device_map="auto",
-    )
-setup()
-# Load the existing index
-# @spaces.GPU
-def load_context():
-    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
-    index = load_index_from_storage(storage_context)
-    query_engine = index.as_query_engine()
-    return query_engine
-query_engine = None
-def initialize_query_engine():
-    global query_engine
-    query_engine = load_context()
-# Initialize query engine at the start
-initialize_query_engine()
-# Chatbot response function
 @spaces.GPU
 def chatbot_response(message, history):
-    if query_engine is None:
-        initialize_query_engine()
     response = query_engine.query(message)
     return str(response)
-# Initialize Gradio interface
 iface = gr.ChatInterface(
     fn=chatbot_response,
     title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
-    description=(
-        "Low quality and extremely slow version of the ones you can find on the github page: "
-        "https://github.com/emarron/UESP-lore. I am not paying to have Llama3 on here."
-    ),
     examples=["Who is Zaraphus?"],
     cache_examples=True,
 )

+import os
 import gradio as gr
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage, Settings,
+)
 from llama_index.llms.huggingface import HuggingFaceLLM
 import torch
 PERSIST_DIR = './storage'
 # Configure the settings
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
+Settings.llm = HuggingFaceLLM(
+    model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    context_window=2048,
+    max_new_tokens=256,
+    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
+    device_map="auto",
+)
+storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
+index = load_index_from_storage(storage_context)
+query_engine = index.as_query_engine()
 @spaces.GPU
 def chatbot_response(message, history):
     response = query_engine.query(message)
     return str(response)
 iface = gr.ChatInterface(
     fn=chatbot_response,
     title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
+    description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore  I am not paying to have Llama3 on here.",
     examples=["Who is Zaraphus?"],
     cache_examples=True,
 )