Spaces:
Sleeping
Sleeping
emar
commited on
Commit
•
25a6760
1
Parent(s):
4b94278
some modifications to hopefully fix gradio errors
Browse files
app.py
CHANGED
@@ -1,22 +1,23 @@
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
4 |
-
from llama_index.core import
|
5 |
-
StorageContext,
|
6 |
-
load_index_from_storage, Settings,
|
7 |
-
)
|
8 |
from llama_index.llms.huggingface import HuggingFaceLLM
|
9 |
import torch
|
|
|
10 |
|
11 |
PERSIST_DIR = './storage'
|
12 |
|
13 |
# Configure the settings
|
14 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
|
|
|
|
|
|
|
|
|
16 |
@spaces.GPU(duration=240)
|
17 |
def setup():
|
18 |
-
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device=
|
19 |
-
|
20 |
Settings.llm = HuggingFaceLLM(
|
21 |
model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
22 |
tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
@@ -28,7 +29,7 @@ def setup():
|
|
28 |
|
29 |
setup()
|
30 |
|
31 |
-
#
|
32 |
@spaces.GPU
|
33 |
def load_context():
|
34 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
@@ -36,21 +37,31 @@ def load_context():
|
|
36 |
query_engine = index.as_query_engine()
|
37 |
return query_engine
|
38 |
|
|
|
|
|
|
|
|
|
|
|
39 |
|
|
|
|
|
40 |
|
|
|
41 |
@spaces.GPU
|
42 |
def chatbot_response(message, history):
|
|
|
|
|
43 |
response = query_engine.query(message)
|
44 |
return str(response)
|
45 |
|
46 |
-
|
47 |
-
query_engine = load_context()
|
48 |
-
|
49 |
-
|
50 |
iface = gr.ChatInterface(
|
51 |
fn=chatbot_response,
|
52 |
title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
|
53 |
-
description=
|
|
|
|
|
|
|
54 |
examples=["Who is Zaraphus?"],
|
55 |
cache_examples=True,
|
56 |
)
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
4 |
+
from llama_index.core import StorageContext, load_index_from_storage, Settings
|
|
|
|
|
|
|
5 |
from llama_index.llms.huggingface import HuggingFaceLLM
|
6 |
import torch
|
7 |
+
from pydantic import BaseModel
|
8 |
|
9 |
PERSIST_DIR = './storage'
|
10 |
|
11 |
# Configure the settings
|
12 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
13 |
|
14 |
+
# Pydantic config to avoid protected namespace warning
|
15 |
+
class Config(BaseModel):
|
16 |
+
model_config = {'protected_namespaces': ()}
|
17 |
+
|
18 |
@spaces.GPU(duration=240)
|
19 |
def setup():
|
20 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device=DEVICE)
|
|
|
21 |
Settings.llm = HuggingFaceLLM(
|
22 |
model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
23 |
tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
|
|
29 |
|
30 |
setup()
|
31 |
|
32 |
+
# Load the existing index
|
33 |
@spaces.GPU
|
34 |
def load_context():
|
35 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
|
|
37 |
query_engine = index.as_query_engine()
|
38 |
return query_engine
|
39 |
|
40 |
+
query_engine = None
|
41 |
+
|
42 |
+
def initialize_query_engine():
|
43 |
+
global query_engine
|
44 |
+
query_engine = load_context()
|
45 |
|
46 |
+
# Initialize query engine at the start
|
47 |
+
initialize_query_engine()
|
48 |
|
49 |
+
# Chatbot response function
|
50 |
@spaces.GPU
|
51 |
def chatbot_response(message, history):
|
52 |
+
if query_engine is None:
|
53 |
+
initialize_query_engine()
|
54 |
response = query_engine.query(message)
|
55 |
return str(response)
|
56 |
|
57 |
+
# Initialize Gradio interface
|
|
|
|
|
|
|
58 |
iface = gr.ChatInterface(
|
59 |
fn=chatbot_response,
|
60 |
title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
|
61 |
+
description=(
|
62 |
+
"Low quality and extremely slow version of the ones you can find on the github page: "
|
63 |
+
"https://github.com/emarron/UESP-lore. I am not paying to have Llama3 on here."
|
64 |
+
),
|
65 |
examples=["Who is Zaraphus?"],
|
66 |
cache_examples=True,
|
67 |
)
|