emar commited on
Commit
5561bd8
1 Parent(s): 67b46b4

added sliders

Browse files
Files changed (1) hide show
  1. app.py +25 -17
app.py CHANGED
@@ -15,6 +15,7 @@ PERSIST_DIR = './storage'
15
  # Configure the settings
16
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
 
 
18
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
19
 
20
  Settings.llm = HuggingFaceLLM(
@@ -22,7 +23,7 @@ Settings.llm = HuggingFaceLLM(
22
  tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
23
  context_window=2048,
24
  max_new_tokens=256,
25
- generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
26
  device_map="auto",
27
  )
28
 
@@ -38,28 +39,35 @@ query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_po
38
 
39
 
40
  @spaces.GPU
41
- def chatbot_response(message, context_window, max_new_tokens, temperature, top_k, top_p):
42
  Settings.llm.context_window = context_window
43
  Settings.llm.max_new_tokens = max_new_tokens
44
- Settings.llm.generate_kwargs = {"temperature": temperature, "top_k": top_k, "top_p": top_p, "do sample": True}
45
  response = query_engine.query(message)
46
  return str(response)
47
 
48
- iface = gr.Interface(
49
- fn=chatbot_response,
50
- inputs=[
51
- gr.Slider(minimum=512, maximum=4096, step=256, value=2048, label="Context Window: How much stuff you can send to it"),
52
- gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens: How long the response should be"),
53
- gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature: Lower number is by the book, higher number is make stuff up"),
54
- gr.Slider(minimum=1, maximum=100, step=1, value=50, label="Top K"),
55
- gr.Slider(minimum=0.5, maximum=1.0, step=0.05, value=0.95, label="Top P"),
56
- ],
57
- outputs=gr.Textbox(label="Response"),
58
- title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct + BGE_LARGE. This is inferior to the downloadable ones.",
59
- description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore",
60
- )
61
-
 
 
 
 
62
 
 
 
 
63
 
64
  if __name__ == "__main__":
65
  iface.launch()
 
15
  # Configure the settings
16
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
 
18
+
19
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
20
 
21
  Settings.llm = HuggingFaceLLM(
 
23
  tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
24
  context_window=2048,
25
  max_new_tokens=256,
26
+ generate_kwargs={"temperature": 0.5, "top_k": 50, "top_p": 0.95},
27
  device_map="auto",
28
  )
29
 
 
39
 
40
 
41
  @spaces.GPU
42
+ def chatbot_response(message, history, context_window, max_new_tokens, temperature, top_k, top_p):
43
  Settings.llm.context_window = context_window
44
  Settings.llm.max_new_tokens = max_new_tokens
45
+ Settings.llm.generate_kwargs = {"temperature": temperature, "top_k": top_k, "top_p": top_p, "do_sample": True}
46
  response = query_engine.query(message)
47
  return str(response)
48
 
49
+ with gr.Blocks() as iface:
50
+ gr.Markdown("# UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct + BGE")
51
+ gr.Markdown("Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore")
52
+
53
+ with gr.Row():
54
+ with gr.Column(scale=3):
55
+ chatbot = gr.ChatInterface(
56
+ fn=chatbot_response,
57
+ examples=["Who is Zaraphus?", "What is the relation between Dragonbreak and CHIM?", "What is the Lunar Lorkhan?"],
58
+ cache_examples=True,
59
+ )
60
+
61
+ with gr.Column(scale=1):
62
+ context_window = gr.Slider(minimum=512, maximum=4096, step=256, value=2048, label="Context Window: (Default 2048): How many tokens you can ask.")
63
+ max_new_tokens = gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens:( Default: 256) How many tokens the response can be at max.")
64
+ temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="(Default: 7) Temperature: How random are we allowed to be? Higher Number = More Random")
65
+ top_k = gr.Slider(minimum=1, maximum=100, step=1, value=50, label="Top K: (Default: 50) Helps the model find a natural stopping point. Higher Number = More words/More likely to go off the rails.")
66
+ top_p = gr.Slider(minimum=0.5, maximum=1.0, step=0.05, value=0.95, label="Top P: (Default: 0.95) Higher P = more diversity/randomness ")
67
 
68
+ # Connect the sliders to the chatbot
69
+ for slider in [context_window, max_new_tokens, temperature, top_k, top_p]:
70
+ slider.change(lambda x: x, inputs=[slider], outputs=[])
71
 
72
  if __name__ == "__main__":
73
  iface.launch()