Haofei Yu
commited on
Commit
•
d98f331
1
Parent(s):
084fe8e
update UI (#23)
Browse files- app.py +72 -63
- images/banner.jpg +0 -0
- images/{CTM-AI.png → icon.png} +0 -0
- ui_constants.py +191 -0
app.py
CHANGED
@@ -12,6 +12,7 @@ ctm.add_supervisor("gpt4_supervisor")
|
|
12 |
DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
|
13 |
|
14 |
|
|
|
15 |
def convert_base64(image_array):
|
16 |
image = Image.fromarray(image_array)
|
17 |
buffer = io.BytesIO()
|
@@ -23,12 +24,7 @@ def convert_base64(image_array):
|
|
23 |
|
24 |
def introduction():
|
25 |
with gr.Column(scale=2):
|
26 |
-
gr.Image("images/
|
27 |
-
with gr.Column(scale=5):
|
28 |
-
gr.Markdown(
|
29 |
-
"""Consciousness Turing Machine Demo
|
30 |
-
"""
|
31 |
-
)
|
32 |
|
33 |
|
34 |
def add_processor(processor_name, display_name, state):
|
@@ -36,7 +32,10 @@ def add_processor(processor_name, display_name, state):
|
|
36 |
ctm.add_processor(processor_name)
|
37 |
print(ctm.processor_group_map)
|
38 |
print(len(ctm.processor_list))
|
39 |
-
return
|
|
|
|
|
|
|
40 |
|
41 |
|
42 |
def processor_tab():
|
@@ -55,10 +54,9 @@ def processor_tab():
|
|
55 |
"gpt4v_scene_location_processor",
|
56 |
]
|
57 |
|
58 |
-
with gr.
|
59 |
with gr.Row():
|
60 |
-
with gr.
|
61 |
-
gr.Markdown("### Text Processors")
|
62 |
for model_name in text_processors:
|
63 |
display_name = (
|
64 |
model_name.replace("processor", "")
|
@@ -66,7 +64,10 @@ def processor_tab():
|
|
66 |
.title()
|
67 |
)
|
68 |
|
69 |
-
button = gr.Button(
|
|
|
|
|
|
|
70 |
processor_name = gr.Textbox(
|
71 |
value=model_name, visible=False
|
72 |
)
|
@@ -79,8 +80,6 @@ def processor_tab():
|
|
79 |
outputs=[button],
|
80 |
)
|
81 |
|
82 |
-
with gr.Column(scale=1):
|
83 |
-
gr.Markdown("### Vision Processors")
|
84 |
for model_name in vision_processors:
|
85 |
display_name = (
|
86 |
model_name.replace("processor", "")
|
@@ -88,7 +87,10 @@ def processor_tab():
|
|
88 |
.title()
|
89 |
)
|
90 |
|
91 |
-
button = gr.Button(
|
|
|
|
|
|
|
92 |
processor_name = gr.Textbox(
|
93 |
value=model_name, visible=False
|
94 |
)
|
@@ -117,6 +119,10 @@ def forward(query, content, image, state):
|
|
117 |
uptree_competition_output_info,
|
118 |
ask_supervisor_output_info,
|
119 |
state,
|
|
|
|
|
|
|
|
|
120 |
)
|
121 |
|
122 |
|
@@ -129,7 +135,8 @@ def ask_processors(query, text, image, state):
|
|
129 |
)
|
130 |
output_info = ""
|
131 |
for name, info in processor_output.items():
|
132 |
-
|
|
|
133 |
state["processor_output"] = processor_output
|
134 |
return output_info, state
|
135 |
|
@@ -138,8 +145,8 @@ def uptree_competition(state):
|
|
138 |
winning_output = ctm.uptree_competition(state["processor_output"])
|
139 |
state["winning_output"] = winning_output
|
140 |
output_info = (
|
141 |
-
"
|
142 |
-
winning_output["name"], winning_output["gist"]
|
143 |
)
|
144 |
)
|
145 |
return output_info, state
|
@@ -149,66 +156,68 @@ def ask_supervisor(state):
|
|
149 |
question = state["question"]
|
150 |
winning_output = state["winning_output"]
|
151 |
answer, score = ctm.ask_supervisor(question, winning_output)
|
152 |
-
output_info =
|
153 |
state["answer"] = answer
|
154 |
state["score"] = score
|
155 |
return output_info, state
|
156 |
|
157 |
|
158 |
-
def
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
with gr.
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
supervisor_output,
|
192 |
-
state,
|
193 |
-
],
|
194 |
-
)
|
195 |
-
return interface_tab
|
196 |
|
197 |
|
198 |
def main():
|
199 |
with gr.Blocks(
|
200 |
css="""#chat_container {height: 820px; width: 1000px; margin-left: auto; margin-right: auto;}
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
205 |
) as demo:
|
206 |
with gr.Row():
|
207 |
introduction()
|
208 |
with gr.Row():
|
209 |
-
|
210 |
-
|
211 |
-
|
|
|
|
|
212 |
return demo
|
213 |
|
214 |
|
|
|
12 |
DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
|
13 |
|
14 |
|
15 |
+
|
16 |
def convert_base64(image_array):
|
17 |
image = Image.fromarray(image_array)
|
18 |
buffer = io.BytesIO()
|
|
|
24 |
|
25 |
def introduction():
|
26 |
with gr.Column(scale=2):
|
27 |
+
gr.Image("images/banner.jpg", elem_id="banner-image", show_label=False)
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
|
30 |
def add_processor(processor_name, display_name, state):
|
|
|
32 |
ctm.add_processor(processor_name)
|
33 |
print(ctm.processor_group_map)
|
34 |
print(len(ctm.processor_list))
|
35 |
+
return gr.Button(
|
36 |
+
value=display_name,
|
37 |
+
elem_id="selected"
|
38 |
+
)
|
39 |
|
40 |
|
41 |
def processor_tab():
|
|
|
54 |
"gpt4v_scene_location_processor",
|
55 |
]
|
56 |
|
57 |
+
with gr.Accordion('Select your processors here.'):
|
58 |
with gr.Row():
|
59 |
+
with gr.Blocks():
|
|
|
60 |
for model_name in text_processors:
|
61 |
display_name = (
|
62 |
model_name.replace("processor", "")
|
|
|
64 |
.title()
|
65 |
)
|
66 |
|
67 |
+
button = gr.Button(
|
68 |
+
value=display_name,
|
69 |
+
elem_id="unselected"
|
70 |
+
)
|
71 |
processor_name = gr.Textbox(
|
72 |
value=model_name, visible=False
|
73 |
)
|
|
|
80 |
outputs=[button],
|
81 |
)
|
82 |
|
|
|
|
|
83 |
for model_name in vision_processors:
|
84 |
display_name = (
|
85 |
model_name.replace("processor", "")
|
|
|
87 |
.title()
|
88 |
)
|
89 |
|
90 |
+
button = gr.Button(
|
91 |
+
value=display_name,
|
92 |
+
elem_id="unselected"
|
93 |
+
)
|
94 |
processor_name = gr.Textbox(
|
95 |
value=model_name, visible=False
|
96 |
)
|
|
|
119 |
uptree_competition_output_info,
|
120 |
ask_supervisor_output_info,
|
121 |
state,
|
122 |
+
gr.Button(
|
123 |
+
value="Update CTM",
|
124 |
+
elem_id="selected-ctm",
|
125 |
+
)
|
126 |
)
|
127 |
|
128 |
|
|
|
135 |
)
|
136 |
output_info = ""
|
137 |
for name, info in processor_output.items():
|
138 |
+
gist = info["gist"].replace("\n", "").strip()
|
139 |
+
output_info += f"<{name}>\n{gist}\n\n"
|
140 |
state["processor_output"] = processor_output
|
141 |
return output_info, state
|
142 |
|
|
|
145 |
winning_output = ctm.uptree_competition(state["processor_output"])
|
146 |
state["winning_output"] = winning_output
|
147 |
output_info = (
|
148 |
+
"<{}>\n{}".format(
|
149 |
+
winning_output["name"], winning_output["gist"].replace("\n", "").strip()
|
150 |
)
|
151 |
)
|
152 |
return output_info, state
|
|
|
156 |
question = state["question"]
|
157 |
winning_output = state["winning_output"]
|
158 |
answer, score = ctm.ask_supervisor(question, winning_output)
|
159 |
+
output_info = answer
|
160 |
state["answer"] = answer
|
161 |
state["score"] = score
|
162 |
return output_info, state
|
163 |
|
164 |
|
165 |
+
def input_tab():
|
166 |
+
state = gr.State({}) # State to hold and pass values
|
167 |
+
|
168 |
+
with gr.Accordion("Enter your input here."):
|
169 |
+
with gr.Row():
|
170 |
+
query = gr.Textbox(label="Query", placeholder="Type your query here", lines=3)
|
171 |
+
|
172 |
+
with gr.Row():
|
173 |
+
text = gr.Textbox(label="Text Input", placeholder="Input text data", lines=11)
|
174 |
+
image = gr.Image(label="Image Input")
|
175 |
+
return query, text, image, state
|
176 |
+
|
177 |
+
def output_tab(query, text, image, state):
|
178 |
+
|
179 |
+
with gr.Accordion("Check your outputs here."):
|
180 |
+
processors_output = gr.Textbox(label="STM Chunks", visible=True, lines=5)
|
181 |
+
competition_output = gr.Textbox(label="Winning Chunk", visible=True, lines=3)
|
182 |
+
supervisor_output = gr.Textbox(label="Answer", visible=True, lines=2)
|
183 |
+
|
184 |
+
forward_button = gr.Button("Launch CTM", elem_id="unselected-ctm")
|
185 |
+
|
186 |
+
forward_button.click(
|
187 |
+
fn=forward,
|
188 |
+
inputs=[query, text, image, state],
|
189 |
+
outputs=[
|
190 |
+
processors_output,
|
191 |
+
competition_output,
|
192 |
+
supervisor_output,
|
193 |
+
state,
|
194 |
+
forward_button,
|
195 |
+
],
|
196 |
+
)
|
197 |
+
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
|
200 |
def main():
|
201 |
with gr.Blocks(
|
202 |
css="""#chat_container {height: 820px; width: 1000px; margin-left: auto; margin-right: auto;}
|
203 |
+
#chatbot {height: 600px; overflow: auto;}
|
204 |
+
#create_container {height: 750px; margin-left: 0px; margin-right: 0px;}
|
205 |
+
#tokenizer_renderer span {white-space: pre-wrap}
|
206 |
+
#selected {background-color: orange; width: 180px}
|
207 |
+
#unselected {width: 180px;}
|
208 |
+
#selected-ctm {background-color: orange;}
|
209 |
+
#unselected-ctm {}
|
210 |
+
""",
|
211 |
+
theme="gradio/monochrome",
|
212 |
) as demo:
|
213 |
with gr.Row():
|
214 |
introduction()
|
215 |
with gr.Row():
|
216 |
+
with gr.Column():
|
217 |
+
processor_tab()
|
218 |
+
query, text, image, state = input_tab()
|
219 |
+
with gr.Column():
|
220 |
+
output_tab(query, text, image, state)
|
221 |
return demo
|
222 |
|
223 |
|
images/banner.jpg
ADDED
images/{CTM-AI.png → icon.png}
RENAMED
File without changes
|
ui_constants.py
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
DEFAULT_LP = 0.5
|
4 |
+
|
5 |
+
banner_url = "https://github.com/sotopia-lab/sotopia-website/blob/main/public/bg_xl.png" # the same repo here.
|
6 |
+
BANNER = f'<div style="display: flex; justify-content: flex-start;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 800px;"> </div>'
|
7 |
+
|
8 |
+
TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> 🦁 AI2 sotopia Leaderboard </b> </body> </html>"
|
9 |
+
|
10 |
+
WINRATE_HEATMAP = "<div><img src='https://github.com/WildEval/sotopia-Leaderboard/blob/main/gradio/pairwise_win_fractions.png?raw=true' style='width:100%;'></div>"
|
11 |
+
|
12 |
+
CITATION_TEXT = """@inproceedings{
|
13 |
+
zhou2024sotopia,
|
14 |
+
title={{SOTOPIA}: Interactive Evaluation for Social Intelligence in Language Agents},
|
15 |
+
author={Xuhui Zhou and Hao Zhu and Leena Mathur and Ruohong Zhang and Haofei Yu and Zhengyang Qi and Louis-Philippe Morency and Yonatan Bisk and Daniel Fried and Graham Neubig and Maarten Sap},
|
16 |
+
booktitle={The Twelfth International Conference on Learning Representations},
|
17 |
+
year={2024},
|
18 |
+
url={https://openreview.net/forum?id=mM7VurbA4r}
|
19 |
+
}
|
20 |
+
"""
|
21 |
+
|
22 |
+
|
23 |
+
column_names = {
|
24 |
+
"model name ": "Model",
|
25 |
+
"elo overall": "Overall Elo",
|
26 |
+
'Information seeking': 'InfoSek',
|
27 |
+
'Creative Writing': 'CrtWrt',
|
28 |
+
'Coding & Debugging': 'Code',
|
29 |
+
'Reasoning': 'Reason',
|
30 |
+
'Editing': 'Edit',
|
31 |
+
'Math': 'Math',
|
32 |
+
'Planning': 'Plan',
|
33 |
+
'Brainstorming': 'Brnstrm',
|
34 |
+
'Role playing': 'RolPly',
|
35 |
+
'Advice seeking': 'AdvSek',
|
36 |
+
'Data Analysis': 'DataAna',
|
37 |
+
'Others': 'Misc',
|
38 |
+
"average": "Task-Avg Elo",
|
39 |
+
}
|
40 |
+
|
41 |
+
all_task_types = [
|
42 |
+
'Information seeking',
|
43 |
+
'Creative Writing',
|
44 |
+
'Coding & Debugging',
|
45 |
+
'Reasoning',
|
46 |
+
'Editing',
|
47 |
+
'Math',
|
48 |
+
'Planning',
|
49 |
+
'Brainstorming',
|
50 |
+
'Role playing',
|
51 |
+
'Advice seeking',
|
52 |
+
'Data Analysis',
|
53 |
+
'Others'
|
54 |
+
]
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
js_light = """
|
59 |
+
function refresh() {
|
60 |
+
const url = new URL(window.location);
|
61 |
+
if (url.searchParams.get('__theme') !== 'light') {
|
62 |
+
url.searchParams.set('__theme', 'light');
|
63 |
+
window.location.href = url.href;
|
64 |
+
}
|
65 |
+
}
|
66 |
+
"""
|
67 |
+
|
68 |
+
js_code = """
|
69 |
+
function scroll_top() {
|
70 |
+
console.log("Hello from Gradio!");
|
71 |
+
const bubbles = document.querySelectorAll('.bubble-wrap');
|
72 |
+
bubbles.forEach((bubble, index) => {
|
73 |
+
setTimeout(() => {
|
74 |
+
bubble.scrollTop = 0;
|
75 |
+
}, index * 100); // Delay of 100ms between each iteration
|
76 |
+
});
|
77 |
+
}
|
78 |
+
"""
|
79 |
+
|
80 |
+
|
81 |
+
TASK_TYPE_STR = "**Tasks**: Info seeking (**InfoSek**), Creative Writing (**CrtWrt**), Coding&Debugging (**Code**), Reasoning (**Reason**), Editing (**Edit**), **Math**, Planning (**Plan**), Brainstorming (**Brnstrm**), Role playing (**RolPly**), Advice seeking (**AdvSek**), Data Analysis (**DataAna**)"
|
82 |
+
|
83 |
+
css = """
|
84 |
+
code {
|
85 |
+
font-size: large;
|
86 |
+
}
|
87 |
+
footer {visibility: hidden}
|
88 |
+
.top-left-LP{
|
89 |
+
margin-top: 6px;
|
90 |
+
margin-left: 5px;
|
91 |
+
}
|
92 |
+
.markdown-text{font-size: 14pt}
|
93 |
+
.markdown-text-small{font-size: 13pt}
|
94 |
+
.markdown-text-tiny{font-size: 12pt}
|
95 |
+
.markdown-text-tiny-red{
|
96 |
+
font-size: 12pt;
|
97 |
+
color: red;
|
98 |
+
background-color: yellow;
|
99 |
+
font-color: red;
|
100 |
+
font-weight: bold;
|
101 |
+
}
|
102 |
+
th {
|
103 |
+
text-align: center;
|
104 |
+
font-size: 17px; /* Adjust the font size as needed */
|
105 |
+
}
|
106 |
+
td {
|
107 |
+
font-size: 15px; /* Adjust the font size as needed */
|
108 |
+
text-align: center;
|
109 |
+
}
|
110 |
+
.sample_button{
|
111 |
+
border: 1px solid #000000;
|
112 |
+
border-radius: 5px;
|
113 |
+
padding: 5px;
|
114 |
+
font-size: 15pt;
|
115 |
+
font-weight: bold;
|
116 |
+
margin: 5px;
|
117 |
+
}
|
118 |
+
.chat-common{
|
119 |
+
height: auto;
|
120 |
+
max-height: 400px;
|
121 |
+
min-height: 100px;
|
122 |
+
}
|
123 |
+
.chat-specific{
|
124 |
+
height: auto;
|
125 |
+
max-height: 600px;
|
126 |
+
min-height: 200px;
|
127 |
+
}
|
128 |
+
#od-benchmark-tab-table-button{
|
129 |
+
font-size: 15pt;
|
130 |
+
font-weight: bold;
|
131 |
+
}
|
132 |
+
.btn_boderline{
|
133 |
+
border: 1px solid #000000;
|
134 |
+
border-radius: 5px;
|
135 |
+
padding: 5px;
|
136 |
+
margin: 5px;
|
137 |
+
font-size: 15pt;
|
138 |
+
font-weight: bold;
|
139 |
+
}
|
140 |
+
.btn_boderline_next{
|
141 |
+
border: 0.1px solid #000000;
|
142 |
+
border-radius: 5px;
|
143 |
+
padding: 5px;
|
144 |
+
margin: 5px;
|
145 |
+
font-size: 15pt;
|
146 |
+
font-weight: bold;
|
147 |
+
}
|
148 |
+
.btn_boderline_gray{
|
149 |
+
border: 0.5px solid gray;
|
150 |
+
border-radius: 5px;
|
151 |
+
padding: 5px;
|
152 |
+
margin: 5px;
|
153 |
+
font-size: 15pt;
|
154 |
+
font-weight: italic;
|
155 |
+
}
|
156 |
+
.btn_boderline_selected{
|
157 |
+
border: 2px solid purple;
|
158 |
+
background-color: #f2f2f2;
|
159 |
+
border-radius: 5px;
|
160 |
+
padding: 5px;
|
161 |
+
margin: 5px;
|
162 |
+
font-size: 15pt;
|
163 |
+
font-weight: bold;
|
164 |
+
}
|
165 |
+
.accordion-label button span{
|
166 |
+
font-size: 14pt;
|
167 |
+
font-weight: bold;
|
168 |
+
}
|
169 |
+
#select-models span{
|
170 |
+
font-size: 10pt;
|
171 |
+
}
|
172 |
+
#select-tasks span{
|
173 |
+
font-size: 10pt;
|
174 |
+
}
|
175 |
+
.markdown-text-details{
|
176 |
+
margin: 10px;
|
177 |
+
padding: 10px;
|
178 |
+
}
|
179 |
+
button.selected[role="tab"][aria-selected="true"] {
|
180 |
+
font-size: 18px; /* or any other size you prefer */
|
181 |
+
font-weight: bold;
|
182 |
+
}
|
183 |
+
#od-benchmark-tab-table-ablation-button {
|
184 |
+
font-size: larger; /* Adjust the font size as needed */
|
185 |
+
}
|
186 |
+
.plotly-plot{
|
187 |
+
height: auto;
|
188 |
+
max-height: 600px;
|
189 |
+
min-height: 600px;
|
190 |
+
}
|
191 |
+
"""
|