Spaces:
Sleeping
Sleeping
emar
commited on
Commit
•
b0752bd
1
Parent(s):
09070fb
back to shitty cpu for now
Browse files
app.py
CHANGED
@@ -13,46 +13,46 @@ PERSIST_DIR = './storage'
|
|
13 |
# Configure the settings
|
14 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
|
38 |
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
|
45 |
-
|
46 |
-
|
47 |
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
|
57 |
-
|
58 |
-
|
|
|
13 |
# Configure the settings
|
14 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
|
16 |
+
@spaces.GPU
|
17 |
+
def setup():
|
18 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
|
19 |
|
20 |
+
Settings.llm = HuggingFaceLLM(
|
21 |
+
model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
22 |
+
tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
23 |
+
context_window=2048,
|
24 |
+
max_new_tokens=256,
|
25 |
+
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
|
26 |
+
device_map="auto",
|
27 |
+
)
|
28 |
|
29 |
|
30 |
+
# load the existing index
|
31 |
+
@spaces.GPU
|
32 |
+
def load_context():
|
33 |
+
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
34 |
+
index = load_index_from_storage(storage_context)
|
35 |
+
query_engine = index.as_query_engine()
|
36 |
+
return query_engine
|
37 |
|
38 |
|
39 |
|
40 |
+
@spaces.GPU
|
41 |
+
def chatbot_response(message, history):
|
42 |
+
response = query_engine.query(message)
|
43 |
+
return str(response)
|
44 |
|
45 |
+
setup()
|
46 |
+
query_engine = load_context()
|
47 |
|
48 |
|
49 |
+
iface = gr.ChatInterface(
|
50 |
+
fn=chatbot_response,
|
51 |
+
title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
|
52 |
+
description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore I am not paying to have Llama3 on here.",
|
53 |
+
examples=["Who is Zaraphus?"],
|
54 |
+
cache_examples=True,
|
55 |
+
)
|
56 |
|
57 |
+
if __name__ == "__main__":
|
58 |
+
iface.launch()
|