Spaces:

emarron
/

elderscrolls_lore_bot

Sleeping

App Files Files Community

emar commited on 15 days ago

Commit

5561bd8

•

1 Parent(s): 67b46b4

added sliders

Browse files

Files changed (1) hide show

app.py +25 -17

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ PERSIST_DIR = './storage'
 # Configure the settings
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
 Settings.llm = HuggingFaceLLM(
@@ -22,7 +23,7 @@ Settings.llm = HuggingFaceLLM(
     tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
     context_window=2048,
     max_new_tokens=256,
-    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
     device_map="auto",
 )
@@ -38,28 +39,35 @@ query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_po
 @spaces.GPU
-def chatbot_response(message, context_window, max_new_tokens, temperature, top_k, top_p):
     Settings.llm.context_window = context_window
     Settings.llm.max_new_tokens = max_new_tokens
-    Settings.llm.generate_kwargs = {"temperature": temperature, "top_k": top_k, "top_p": top_p, "do sample": True}
     response = query_engine.query(message)
     return str(response)
-iface = gr.Interface(
-    fn=chatbot_response,
-    inputs=[
-        gr.Slider(minimum=512, maximum=4096, step=256, value=2048, label="Context Window: How much stuff you can send to it"),
-        gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens: How long the response should be"),
-        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature: Lower number is by the book, higher number is make stuff up"),
-        gr.Slider(minimum=1, maximum=100, step=1, value=50, label="Top K"),
-        gr.Slider(minimum=0.5, maximum=1.0, step=0.05, value=0.95, label="Top P"),
-    ],
-    outputs=gr.Textbox(label="Response"),
-    title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct + BGE_LARGE. This is inferior to the downloadable ones.",
-    description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore",
-)
 if __name__ == "__main__":
     iface.launch()

 # Configure the settings
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
 Settings.llm = HuggingFaceLLM(
     tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
     context_window=2048,
     max_new_tokens=256,
+    generate_kwargs={"temperature": 0.5, "top_k": 50, "top_p": 0.95},
     device_map="auto",
 )
 @spaces.GPU
+def chatbot_response(message, history, context_window, max_new_tokens, temperature, top_k, top_p):
     Settings.llm.context_window = context_window
     Settings.llm.max_new_tokens = max_new_tokens
+    Settings.llm.generate_kwargs = {"temperature": temperature, "top_k": top_k, "top_p": top_p, "do_sample": True}
     response = query_engine.query(message)
     return str(response)
+with gr.Blocks() as iface:
+    gr.Markdown("# UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct + BGE")
+    gr.Markdown("Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore")
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.ChatInterface(
+                fn=chatbot_response,
+                examples=["Who is Zaraphus?", "What is the relation between Dragonbreak and CHIM?", "What is the Lunar Lorkhan?"],
+                cache_examples=True,
+            )
+        with gr.Column(scale=1):
+            context_window = gr.Slider(minimum=512, maximum=4096, step=256, value=2048, label="Context Window: (Default 2048): How many tokens you can ask.")
+            max_new_tokens = gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens:( Default: 256) How many tokens the response can be at max.")
+            temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="(Default: 7) Temperature: How random are we allowed to be? Higher Number = More Random")
+            top_k = gr.Slider(minimum=1, maximum=100, step=1, value=50, label="Top K: (Default: 50) Helps the model find a natural stopping point. Higher Number = More words/More likely to go off the rails.")
+            top_p = gr.Slider(minimum=0.5, maximum=1.0, step=0.05, value=0.95, label="Top P: (Default: 0.95) Higher P = more diversity/randomness ")
+    # Connect the sliders to the chatbot
+    for slider in [context_window, max_new_tokens, temperature, top_k, top_p]:
+        slider.change(lambda x: x, inputs=[slider], outputs=[])
 if __name__ == "__main__":
     iface.launch()