emar commited on
Commit
b0752bd
1 Parent(s): 09070fb

back to shitty cpu for now

Browse files
Files changed (1) hide show
  1. app.py +33 -33
app.py CHANGED
@@ -13,46 +13,46 @@ PERSIST_DIR = './storage'
13
  # Configure the settings
14
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
16
- # @spaces.GPU
17
- # def setup():
18
- # Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
19
 
20
- # Settings.llm = HuggingFaceLLM(
21
- # model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
22
- # tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
23
- # context_window=2048,
24
- # max_new_tokens=256,
25
- # generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
26
- # device_map="auto",
27
- # )
28
 
29
 
30
- # # load the existing index
31
- # @spaces.GPU
32
- # def load_context():
33
- # storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
34
- # index = load_index_from_storage(storage_context)
35
- # query_engine = index.as_query_engine()
36
- # return query_engine
37
 
38
 
39
 
40
- # @spaces.GPU
41
- # def chatbot_response(message, history):
42
- # response = query_engine.query(message)
43
- # return str(response)
44
 
45
- # setup()
46
- # query_engine = load_context()
47
 
48
 
49
- # iface = gr.ChatInterface(
50
- # fn=chatbot_response,
51
- # title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
52
- # description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore I am not paying to have Llama3 on here.",
53
- # examples=["Who is Zaraphus?"],
54
- # cache_examples=True,
55
- # )
56
 
57
- # if __name__ == "__main__":
58
- # iface.launch()
 
13
  # Configure the settings
14
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
16
+ @spaces.GPU
17
+ def setup():
18
+ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
19
 
20
+ Settings.llm = HuggingFaceLLM(
21
+ model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
22
+ tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
23
+ context_window=2048,
24
+ max_new_tokens=256,
25
+ generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
26
+ device_map="auto",
27
+ )
28
 
29
 
30
+ # load the existing index
31
+ @spaces.GPU
32
+ def load_context():
33
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
34
+ index = load_index_from_storage(storage_context)
35
+ query_engine = index.as_query_engine()
36
+ return query_engine
37
 
38
 
39
 
40
+ @spaces.GPU
41
+ def chatbot_response(message, history):
42
+ response = query_engine.query(message)
43
+ return str(response)
44
 
45
+ setup()
46
+ query_engine = load_context()
47
 
48
 
49
+ iface = gr.ChatInterface(
50
+ fn=chatbot_response,
51
+ title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
52
+ description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore I am not paying to have Llama3 on here.",
53
+ examples=["Who is Zaraphus?"],
54
+ cache_examples=True,
55
+ )
56
 
57
+ if __name__ == "__main__":
58
+ iface.launch()