cetusian commited on
Commit
5ffb836
·
verified ·
1 Parent(s): 00b9af3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -37
app.py CHANGED
@@ -1,18 +1,18 @@
1
  import os
2
  import gradio as gr
3
- from huggingface_hub import login
4
- from huggingface_hub import InferenceClient
5
  import spaces
6
 
7
- # Retrieve API key and authenticate
8
  api_key = os.getenv("LLAMA")
9
  login(api_key)
10
 
11
- # Initialize InferenceClient for the Llama model
12
- client = InferenceClient("meta-llama/Llama-3.1-70B-Instruct")
 
13
 
14
  @spaces.GPU
15
- def respond(
16
  message,
17
  history: list[dict],
18
  system_message,
@@ -22,17 +22,26 @@ def respond(
22
  ):
23
  # Start with the system message
24
  messages = [{"role": "system", "content": system_message}]
 
 
25
 
26
- # Add the conversation history
27
- messages += history
 
28
 
29
- # Add the latest user message
30
- messages.append({"role": "user", "content": message})
31
-
32
- response = ""
 
 
 
 
 
 
33
 
34
- # Send the conversation to the model and stream the response
35
- for message in client.chat_completion(
36
  messages,
37
  max_tokens=max_tokens,
38
  stream=True,
@@ -40,29 +49,73 @@ def respond(
40
  top_p=top_p,
41
  ):
42
  token = message.choices[0].delta.content
43
- response += token
44
- yield response
45
-
46
- # Initialize the Gradio ChatInterface with the new format
47
- demo = gr.ChatInterface(
48
- respond,
49
- type="messages", # Use the OpenAI-style format
50
- additional_inputs=[
51
- gr.Textbox(
52
- value="You are a helpful Customer Support assistant that specializes in the low-code software company: 'Plant an App' and tech-related topics.",
53
- label="System message"
54
- ),
55
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
56
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
57
- gr.Slider(
58
- minimum=0.1,
59
- maximum=1.0,
60
- value=0.95,
61
- step=0.05,
62
- label="Top-p (nucleus sampling)"
63
- ),
64
- ],
65
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  if __name__ == "__main__":
68
  demo.launch()
 
1
  import os
2
  import gradio as gr
3
+ from huggingface_hub import login, InferenceClient
 
4
  import spaces
5
 
6
+ # Authenticate with Hugging Face API key
7
  api_key = os.getenv("LLAMA")
8
  login(api_key)
9
 
10
+ # Initialize InferenceClients for multiple models
11
+ client1 = InferenceClient("meta-llama/Llama-3.1-70B-Instruct")
12
+ client2 = InferenceClient("bigscience/bloom")
13
 
14
  @spaces.GPU
15
+ def compare_models(
16
  message,
17
  history: list[dict],
18
  system_message,
 
22
  ):
23
  # Start with the system message
24
  messages = [{"role": "system", "content": system_message}]
25
+ messages += history # Add conversation history
26
+ messages.append({"role": "user", "content": message}) # Add user message
27
 
28
+ # Fetch responses from both models
29
+ response1 = ""
30
+ response2 = ""
31
 
32
+ # Stream responses for Model 1
33
+ for message in client1.chat_completion(
34
+ messages,
35
+ max_tokens=max_tokens,
36
+ stream=True,
37
+ temperature=temperature,
38
+ top_p=top_p,
39
+ ):
40
+ token = message.choices[0].delta.content
41
+ response1 += token
42
 
43
+ # Stream responses for Model 2
44
+ for message in client2.chat_completion(
45
  messages,
46
  max_tokens=max_tokens,
47
  stream=True,
 
49
  top_p=top_p,
50
  ):
51
  token = message.choices[0].delta.content
52
+ response2 += token
53
+
54
+ # Return responses side-by-side
55
+ return response1, response2
56
+
57
+ def handle_vote(vote, current_votes):
58
+ """Handle user votes."""
59
+ current_votes[vote] += 1
60
+ return f"Model 1: {current_votes['model1']} votes | Model 2: {current_votes['model2']} votes"
61
+
62
+ # Initialize voting state
63
+ votes = {"model1": 0, "model2": 0}
64
+
65
+ # Create Gradio interface
66
+ with gr.Blocks() as demo:
67
+ gr.Markdown("# AI Model Comparison Tool")
68
+ with gr.Row():
69
+ system_message = gr.Textbox(
70
+ value="You are a helpful assistant specializing in tech-related topics.",
71
+ label="System message",
72
+ )
73
+ max_tokens = gr.Slider(
74
+ minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
75
+ )
76
+ temperature = gr.Slider(
77
+ minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
78
+ )
79
+ top_p = gr.Slider(
80
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"
81
+ )
82
+
83
+ with gr.Row():
84
+ message = gr.Textbox(label="Enter your message")
85
+
86
+ compare_btn = gr.Button("Compare Models")
87
+
88
+ with gr.Row():
89
+ response1 = gr.Textbox(label="Response from Model 1")
90
+ response2 = gr.Textbox(label="Response from Model 2")
91
+
92
+ with gr.Row():
93
+ vote_model1 = gr.Button("Vote for Model 1")
94
+ vote_model2 = gr.Button("Vote for Model 2")
95
+
96
+ vote_status = gr.Textbox(
97
+ value=f"Model 1: {votes['model1']} votes | Model 2: {votes['model2']} votes",
98
+ label="Voting Results",
99
+ )
100
+
101
+ # Link components
102
+ compare_btn.click(
103
+ compare_models,
104
+ inputs=[message, [], system_message, max_tokens, temperature, top_p],
105
+ outputs=[response1, response2],
106
+ )
107
+
108
+ vote_model1.click(
109
+ handle_vote,
110
+ inputs=["model1", votes],
111
+ outputs=vote_status,
112
+ )
113
+
114
+ vote_model2.click(
115
+ handle_vote,
116
+ inputs=["model2", votes],
117
+ outputs=vote_status,
118
+ )
119
 
120
  if __name__ == "__main__":
121
  demo.launch()