Spaces:

emarron
/

elderscrolls_lore_bot

Sleeping

emar commited on 17 days ago

Commit

b0752bd

•

1 Parent(s): 09070fb

back to shitty cpu for now

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,46 +13,46 @@ PERSIST_DIR = './storage'
 # Configure the settings
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# @spaces.GPU
-# def setup():
-#     Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
-#     Settings.llm = HuggingFaceLLM(
-#         model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-#         tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-#         context_window=2048,
-#         max_new_tokens=256,
-#         generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
-#         device_map="auto",
-#     )
-# # load the existing index
-# @spaces.GPU
-# def load_context():
-#     storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
-#     index = load_index_from_storage(storage_context)
-#     query_engine = index.as_query_engine()
-#     return query_engine
-# @spaces.GPU
-# def chatbot_response(message, history):
-#     response = query_engine.query(message)
-#     return str(response)
-# setup()
-# query_engine = load_context()
-# iface = gr.ChatInterface(
-#     fn=chatbot_response,
-#     title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
-#     description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore  I am not paying to have Llama3 on here.",
-#     examples=["Who is Zaraphus?"],
-#     cache_examples=True,
-# )
-# if __name__ == "__main__":
-#     iface.launch()

 # Configure the settings
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+@spaces.GPU
+def setup():
+    Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
+    Settings.llm = HuggingFaceLLM(
+        model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        context_window=2048,
+        max_new_tokens=256,
+        generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
+        device_map="auto",
+    )
+# load the existing index
+@spaces.GPU
+def load_context():
+    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
+    index = load_index_from_storage(storage_context)
+    query_engine = index.as_query_engine()
+    return query_engine
+@spaces.GPU
+def chatbot_response(message, history):
+    response = query_engine.query(message)
+    return str(response)
+setup()
+query_engine = load_context()
+iface = gr.ChatInterface(
+    fn=chatbot_response,
+    title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
+    description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore  I am not paying to have Llama3 on here.",
+    examples=["Who is Zaraphus?"],
+    cache_examples=True,
+)
+if __name__ == "__main__":
+    iface.launch()