emar commited on
Commit
09070fb
1 Parent(s): e0e7586
Files changed (1) hide show
  1. app.py +33 -33
app.py CHANGED
@@ -13,46 +13,46 @@ PERSIST_DIR = './storage'
13
  # Configure the settings
14
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
16
- @spaces.GPU
17
- def setup():
18
- Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
19
 
20
- Settings.llm = HuggingFaceLLM(
21
- model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
22
- tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
23
- context_window=2048,
24
- max_new_tokens=256,
25
- generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
26
- device_map="auto",
27
- )
28
 
29
 
30
- # load the existing index
31
- @spaces.GPU
32
- def load_context():
33
- storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
34
- index = load_index_from_storage(storage_context)
35
- query_engine = index.as_query_engine()
36
- return query_engine
37
 
38
 
39
 
40
- @spaces.GPU
41
- def chatbot_response(message, history):
42
- response = query_engine.query(message)
43
- return str(response)
44
 
45
- setup()
46
- query_engine = load_context()
47
 
48
 
49
- iface = gr.ChatInterface(
50
- fn=chatbot_response,
51
- title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
52
- description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore I am not paying to have Llama3 on here.",
53
- examples=["Who is Zaraphus?"],
54
- cache_examples=True,
55
- )
56
 
57
- if __name__ == "__main__":
58
- iface.launch()
 
13
  # Configure the settings
14
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
16
+ # @spaces.GPU
17
+ # def setup():
18
+ # Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
19
 
20
+ # Settings.llm = HuggingFaceLLM(
21
+ # model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
22
+ # tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
23
+ # context_window=2048,
24
+ # max_new_tokens=256,
25
+ # generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
26
+ # device_map="auto",
27
+ # )
28
 
29
 
30
+ # # load the existing index
31
+ # @spaces.GPU
32
+ # def load_context():
33
+ # storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
34
+ # index = load_index_from_storage(storage_context)
35
+ # query_engine = index.as_query_engine()
36
+ # return query_engine
37
 
38
 
39
 
40
+ # @spaces.GPU
41
+ # def chatbot_response(message, history):
42
+ # response = query_engine.query(message)
43
+ # return str(response)
44
 
45
+ # setup()
46
+ # query_engine = load_context()
47
 
48
 
49
+ # iface = gr.ChatInterface(
50
+ # fn=chatbot_response,
51
+ # title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
52
+ # description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore I am not paying to have Llama3 on here.",
53
+ # examples=["Who is Zaraphus?"],
54
+ # cache_examples=True,
55
+ # )
56
 
57
+ # if __name__ == "__main__":
58
+ # iface.launch()