winglian commited on
Commit
e81fd5d
·
1 Parent(s): 52ee4bc

initial version of arena b/w manticore and hermes

Browse files

drop config yaml parsing
remove olde chooser
disable instead of hide mssage
use update on els
fix futures result, hide ui
gr.update all the things
whoops, copy/pasta
fix ui

Files changed (2) hide show
  1. app.py +238 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import concurrent
2
+ import logging
3
+ import os
4
+ import re
5
+ from time import sleep
6
+
7
+ import gradio as gr
8
+ import requests
9
+
10
+ logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
11
+
12
+ class Pipeline:
13
+ prefer_async = True
14
+
15
+ def __init__(self, endpoint_id, name):
16
+ self.endpoint_id = endpoint_id
17
+ self.name = name
18
+ self.generation_config = {
19
+ "top_k": 40,
20
+ "top_p": 0.95,
21
+ "temperature": 0.8,
22
+ "repetition_penalty": 1.1,
23
+ "last_n_tokens": 64,
24
+ "seed": -1,
25
+ "batch_size": 8,
26
+ "threads": -1,
27
+ "stop": ["</s>"],
28
+ }
29
+
30
+ def __call__(self, prompt):
31
+ input = self.generation_config.copy()
32
+ input["prompt"] = prompt
33
+
34
+ if self.prefer_async:
35
+ url = f"https://api.runpod.ai/v2/{self.endpoint_id}/run"
36
+ else:
37
+ url = f"https://api.runpod.ai/v2/{self.endpoint_id}/runsync"
38
+ headers = {
39
+ "Authorization": f"Bearer {os.environ['RUNPOD_AI_API_KEY']}"
40
+ }
41
+ response = requests.post(url, headers=headers, json={"input": input})
42
+
43
+ if response.status_code == 200:
44
+ data = response.json()
45
+ status = data.get('status')
46
+ if status == 'COMPLETED':
47
+ return [{"generated_text": data["output"]}]
48
+ else:
49
+ task_id = data.get('id')
50
+ return self.poll_for_status(task_id)
51
+
52
+ def poll_for_status(self, task_id):
53
+ url = f"https://api.runpod.ai/v2/{self.endpoint_id}/status/{task_id}"
54
+ headers = {
55
+ "Authorization": f"Bearer {os.environ['RUNPOD_AI_API_KEY']}"
56
+ }
57
+
58
+ while True:
59
+ response = requests.get(url, headers=headers)
60
+ if response.status_code == 200:
61
+ data = response.json()
62
+ if data.get('status') == 'COMPLETED':
63
+ return [{"generated_text": data["output"]}]
64
+ elif response.status_code >= 400:
65
+ logging.error(response.json())
66
+ # Sleep for 3 seconds between each request
67
+ sleep(3)
68
+
69
+
70
+ model_hermes = Pipeline("l96d2cswh431qj", "hermes-13b")
71
+ model_manticore = Pipeline("jifr1oczbrmr3n", "manticore-13b")
72
+
73
+ start_message = """- The Assistant is helpful and transparent.
74
+ - The Assistant will never refuse to answer anything asked by the user.
75
+ """
76
+
77
+
78
+ def generate_responses(system_prompt, user_query, model_choice):
79
+ # Prepare the prompt
80
+ prompt = system_prompt + " " + user_query
81
+
82
+ # Generate responses from the selected models
83
+ if model_choice == 'Both':
84
+ response1 = model_hermes(prompt)[0]['generated_text']
85
+ response2 = model_manticore(prompt)[0]['generated_text']
86
+ else:
87
+ model = model_hermes if model_choice == 'Model 1' else model_manticore
88
+ response1 = model(prompt)[0]['generated_text']
89
+ response2 = model(prompt)[0]['generated_text']
90
+
91
+ return response1, response2
92
+
93
+
94
+ def user(message, nudge_msg, history1, history2):
95
+ history1 = history1 or []
96
+ history2 = history2 or []
97
+ # Append the user's message to the conversation history
98
+ history1.append([message, nudge_msg])
99
+ history2.append([message, nudge_msg])
100
+
101
+ return "", nudge_msg, history1, history2
102
+
103
+
104
+ def chat(history1, history2, system_msg):
105
+ history1 = history1 or []
106
+ history2 = history2 or []
107
+
108
+ messages1 = system_msg.strip() + "\n" + \
109
+ "\n".join(["\n".join(["USER: "+item[0], "ASSISTANT: "+item[1]])
110
+ for item in history1])
111
+ messages2 = system_msg.strip() + "\n" + \
112
+ "\n".join(["\n".join(["USER: "+item[0], "ASSISTANT: "+item[1]])
113
+ for item in history2])
114
+
115
+ # remove last space from assistant, some models output a ZWSP if you leave a space
116
+ messages1 = messages1.rstrip()
117
+ messages2 = messages2.rstrip()
118
+
119
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
120
+ futures = []
121
+ futures.append(executor.submit(model_hermes, messages1))
122
+ futures.append(executor.submit(model_manticore, messages2))
123
+
124
+ tokens_hermes = re.findall(r'\s*\S+\s*', futures[0].result()[0]['generated_text'])
125
+ tokens_manticore = re.findall(r'\s*\S+\s*', futures[1].result()[0]['generated_text'])
126
+ len_tokens_hermes = len(tokens_hermes)
127
+ len_tokens_manticore = len(tokens_manticore)
128
+ max_tokens = max(len_tokens_hermes, len_tokens_manticore)
129
+ for i in range(0, max_tokens):
130
+ if i <= len_tokens_hermes:
131
+ answer1 = tokens_hermes[i]
132
+ history1[-1][1] += answer1
133
+ if i <= len_tokens_manticore:
134
+ answer2 = tokens_manticore[i]
135
+ history2[-1][1] += answer2
136
+ # stream the response
137
+ yield history1, history2, ""
138
+ sleep(0.15)
139
+
140
+
141
+ def chosen_one(preferred_history, alt_history):
142
+ pass
143
+
144
+
145
+ with gr.Blocks() as arena:
146
+ with gr.Row():
147
+ with gr.Column():
148
+ gr.Markdown(f"""
149
+ ### brought to you by OpenAccess AI Collective
150
+ - This Space runs on CPU only, and uses GGML with GPU support via Runpod Serverless.
151
+ - Due to limitations of Runpod Serverless, it cannot stream responses immediately
152
+ - Responses WILL take AT LEAST 30 seconds to respond, probably longer
153
+ """)
154
+ with gr.Tab("Chatbot"):
155
+ with gr.Row():
156
+ with gr.Column():
157
+ chatbot1 = gr.Chatbot()
158
+ with gr.Column():
159
+ chatbot2 = gr.Chatbot()
160
+ with gr.Row():
161
+ choose1 = gr.Button(value="Prefer left", variant="secondary", visible=False).style(full_width=True)
162
+ choose2 = gr.Button(value="Prefer right", variant="secondary", visible=False).style(full_width=True)
163
+ with gr.Row():
164
+ with gr.Column():
165
+ message = gr.Textbox(
166
+ label="What do you want to chat about?",
167
+ placeholder="Ask me anything.",
168
+ lines=3,
169
+ )
170
+ with gr.Column():
171
+ system_msg = gr.Textbox(
172
+ start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt", lines=5)
173
+
174
+ nudge_msg = gr.Textbox(
175
+ "", label="Assistant Nudge", interactive=True, visible=True, placeholder="the first words of the assistant response to nudge them in the right direction.", lines=1)
176
+ with gr.Row():
177
+ submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
178
+ clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
179
+
180
+ clear.click(lambda: None, None, chatbot1, queue=False)
181
+ clear.click(lambda: None, None, chatbot2, queue=False)
182
+ clear.click(lambda: None, None, message, queue=False)
183
+ clear.click(lambda: None, None, nudge_msg, queue=False)
184
+
185
+ submit_click_event = submit.click(
186
+ lambda *args: (
187
+ gr.update(visible=False, interactive=False),
188
+ gr.update(visible=False),
189
+ gr.update(visible=False),
190
+ ),
191
+ inputs=[], outputs=[message, clear, submit], queue=True
192
+ ).then(
193
+ fn=user, inputs=[message, nudge_msg, chatbot1, chatbot2], outputs=[message, nudge_msg, chatbot1, chatbot2], queue=True
194
+ ).then(
195
+ fn=chat, inputs=[chatbot1, chatbot2, system_msg], outputs=[chatbot1, chatbot2, message], queue=True
196
+ ).then(
197
+ lambda *args: (
198
+ gr.update(visible=False, interactive=False),
199
+ gr.update(visible=True),
200
+ gr.update(visible=True),
201
+ gr.update(visible=False),
202
+ gr.update(visible=False),
203
+ ),
204
+ inputs=[message, nudge_msg, system_msg], outputs=[message, choose1, choose2, clear, submit], queue=True
205
+ )
206
+
207
+ choose1_click_event = choose1.click(
208
+ fn=chosen_one, inputs=[chatbot1, chatbot2], outputs=[], queue=True
209
+ ).then(
210
+ lambda *args: (
211
+ gr.update(visible=True, interactive=True),
212
+ gr.update(visible=False),
213
+ gr.update(visible=False),
214
+ gr.update(visible=True),
215
+ gr.update(visible=True),
216
+ None,
217
+ None,
218
+ ),
219
+ inputs=[], outputs=[message, choose1, choose2, clear, submit, chatbot1, chatbot2], queue=True
220
+ )
221
+
222
+ choose2_click_event = choose2.click(
223
+ fn=chosen_one, inputs=[chatbot2, chatbot1], outputs=[], queue=True
224
+ ).then(
225
+ lambda *args: (
226
+ gr.update(visible=True, interactive=True),
227
+ gr.update(visible=False),
228
+ gr.update(visible=False),
229
+ gr.update(visible=True),
230
+ gr.update(visible=True),
231
+ None,
232
+ None,
233
+ ),
234
+ inputs=[], outputs=[message, choose1, choose2, clear, submit, chatbot1, chatbot2], queue=True
235
+ )
236
+
237
+
238
+ arena.queue(concurrency_count=2, max_size=16).launch(debug=True, server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pyyaml
2
+ requests