Spaces:
Sleeping
Sleeping
emar
commited on
Commit
•
5561bd8
1
Parent(s):
67b46b4
added sliders
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ PERSIST_DIR = './storage'
|
|
15 |
# Configure the settings
|
16 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
17 |
|
|
|
18 |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
|
19 |
|
20 |
Settings.llm = HuggingFaceLLM(
|
@@ -22,7 +23,7 @@ Settings.llm = HuggingFaceLLM(
|
|
22 |
tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
|
23 |
context_window=2048,
|
24 |
max_new_tokens=256,
|
25 |
-
generate_kwargs={"temperature": 0.
|
26 |
device_map="auto",
|
27 |
)
|
28 |
|
@@ -38,28 +39,35 @@ query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_po
|
|
38 |
|
39 |
|
40 |
@spaces.GPU
|
41 |
-
def chatbot_response(message, context_window, max_new_tokens, temperature, top_k, top_p):
|
42 |
Settings.llm.context_window = context_window
|
43 |
Settings.llm.max_new_tokens = max_new_tokens
|
44 |
-
Settings.llm.generate_kwargs = {"temperature": temperature, "top_k": top_k, "top_p": top_p, "
|
45 |
response = query_engine.query(message)
|
46 |
return str(response)
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
gr.
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
)
|
61 |
-
|
|
|
|
|
|
|
|
|
62 |
|
|
|
|
|
|
|
63 |
|
64 |
if __name__ == "__main__":
|
65 |
iface.launch()
|
|
|
15 |
# Configure the settings
|
16 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
17 |
|
18 |
+
|
19 |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
|
20 |
|
21 |
Settings.llm = HuggingFaceLLM(
|
|
|
23 |
tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
|
24 |
context_window=2048,
|
25 |
max_new_tokens=256,
|
26 |
+
generate_kwargs={"temperature": 0.5, "top_k": 50, "top_p": 0.95},
|
27 |
device_map="auto",
|
28 |
)
|
29 |
|
|
|
39 |
|
40 |
|
41 |
@spaces.GPU
|
42 |
+
def chatbot_response(message, history, context_window, max_new_tokens, temperature, top_k, top_p):
|
43 |
Settings.llm.context_window = context_window
|
44 |
Settings.llm.max_new_tokens = max_new_tokens
|
45 |
+
Settings.llm.generate_kwargs = {"temperature": temperature, "top_k": top_k, "top_p": top_p, "do_sample": True}
|
46 |
response = query_engine.query(message)
|
47 |
return str(response)
|
48 |
|
49 |
+
with gr.Blocks() as iface:
|
50 |
+
gr.Markdown("# UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct + BGE")
|
51 |
+
gr.Markdown("Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore")
|
52 |
+
|
53 |
+
with gr.Row():
|
54 |
+
with gr.Column(scale=3):
|
55 |
+
chatbot = gr.ChatInterface(
|
56 |
+
fn=chatbot_response,
|
57 |
+
examples=["Who is Zaraphus?", "What is the relation between Dragonbreak and CHIM?", "What is the Lunar Lorkhan?"],
|
58 |
+
cache_examples=True,
|
59 |
+
)
|
60 |
+
|
61 |
+
with gr.Column(scale=1):
|
62 |
+
context_window = gr.Slider(minimum=512, maximum=4096, step=256, value=2048, label="Context Window: (Default 2048): How many tokens you can ask.")
|
63 |
+
max_new_tokens = gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens:( Default: 256) How many tokens the response can be at max.")
|
64 |
+
temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="(Default: 7) Temperature: How random are we allowed to be? Higher Number = More Random")
|
65 |
+
top_k = gr.Slider(minimum=1, maximum=100, step=1, value=50, label="Top K: (Default: 50) Helps the model find a natural stopping point. Higher Number = More words/More likely to go off the rails.")
|
66 |
+
top_p = gr.Slider(minimum=0.5, maximum=1.0, step=0.05, value=0.95, label="Top P: (Default: 0.95) Higher P = more diversity/randomness ")
|
67 |
|
68 |
+
# Connect the sliders to the chatbot
|
69 |
+
for slider in [context_window, max_new_tokens, temperature, top_k, top_p]:
|
70 |
+
slider.change(lambda x: x, inputs=[slider], outputs=[])
|
71 |
|
72 |
if __name__ == "__main__":
|
73 |
iface.launch()
|