BreakLee commited on
Commit
c4d90ef
·
1 Parent(s): 6d72d0f

SEED Benchmark Leaderboard Update

Browse files
__pycache__/constants.cpython-38.pyc ADDED
Binary file (7.6 kB). View file
 
app.py CHANGED
@@ -126,6 +126,9 @@ def add_new_eval(
126
  model_type,
127
  model_name,
128
  LLM_name,
 
 
 
129
  each_task_accuracy[1],
130
  each_task_accuracy[2],
131
  each_task_accuracy[3],
@@ -135,19 +138,25 @@ def add_new_eval(
135
  each_task_accuracy[7],
136
  each_task_accuracy[8],
137
  each_task_accuracy[9],
138
- average_accuracy_image,
139
  each_task_accuracy[10],
140
  each_task_accuracy[11],
141
  each_task_accuracy[12],
142
- average_accuracy_video,
143
- overall_accuracy]
144
- # pdb.set_trace()
145
  csv_data.loc[col] = new_data
146
  csv_data = csv_data.to_csv(CSV_DIR, index=False)
147
  return 0
148
 
149
  def get_baseline_df():
 
 
 
 
 
 
 
 
150
  df = pd.read_csv(CSV_DIR)
 
151
  return df
152
 
153
  block = gr.Blocks()
@@ -173,8 +182,8 @@ with block:
173
 
174
  # selection for column part:
175
  checkbox_group = gr.CheckboxGroup(
176
- choices=TASK_INFO,
177
- value=TASK_INFO,
178
  label="Select options",
179
  interactive=True,
180
  )
@@ -191,9 +200,9 @@ with block:
191
 
192
  def on_checkbox_group_change(selected_columns):
193
  # pdb.set_trace()
194
- selected_columns = [item for item in TASK_INFO if item in selected_columns]
195
  present_columns = MODEL_INFO + selected_columns
196
- updated_data = get_baseline_df()[present_columns]
197
  updated_headers = present_columns
198
  update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
199
 
@@ -229,10 +238,10 @@ with block:
229
  with gr.Row():
230
  with gr.Column():
231
  model_name_textbox = gr.Textbox(
232
- label="Model Name", placeholder="LLaMA-7B"
233
  )
234
  revision_name_textbox = gr.Textbox(
235
- label="Revision Model Name", placeholder="LLaMA"
236
  )
237
  model_type = gr.Dropdown(
238
  choices=[
@@ -241,7 +250,7 @@ with block:
241
  "VideoLLM",
242
  "Other",
243
  ],
244
- label="Model Type",
245
  multiselect=False,
246
  value="ImageLLM",
247
  interactive=True,
@@ -254,18 +263,18 @@ with block:
254
 
255
  LLM_type = gr.Dropdown(
256
  choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "Other"],
257
- label="LLM Type",
258
  multiselect=False,
259
  value="LLaMA-7B",
260
  interactive=True,
261
  )
262
  LLM_name_textbox = gr.Textbox(
263
- label="LLM Model (for Other)",
264
  placeholder="LLaMA-13B"
265
  )
266
  Evaluation_dimension = gr.Dropdown(
267
  choices=["All", "Image", "Video"],
268
- label="Evaluation Dimension",
269
  multiselect=False,
270
  value="All",
271
  interactive=True,
 
126
  model_type,
127
  model_name,
128
  LLM_name,
129
+ overall_accuracy,
130
+ average_accuracy_image,
131
+ average_accuracy_video,
132
  each_task_accuracy[1],
133
  each_task_accuracy[2],
134
  each_task_accuracy[3],
 
138
  each_task_accuracy[7],
139
  each_task_accuracy[8],
140
  each_task_accuracy[9],
 
141
  each_task_accuracy[10],
142
  each_task_accuracy[11],
143
  each_task_accuracy[12],
144
+ ]
 
 
145
  csv_data.loc[col] = new_data
146
  csv_data = csv_data.to_csv(CSV_DIR, index=False)
147
  return 0
148
 
149
  def get_baseline_df():
150
+ # pdb.set_trace()
151
+ df = pd.read_csv(CSV_DIR)
152
+ df = df.sort_values(by="Avg. All", ascending=False)
153
+ present_columns = MODEL_INFO + checkbox_group.value
154
+ df = df[present_columns]
155
+ return df
156
+
157
+ def get_all_df():
158
  df = pd.read_csv(CSV_DIR)
159
+ df = df.sort_values(by="Avg. All", ascending=False)
160
  return df
161
 
162
  block = gr.Blocks()
 
182
 
183
  # selection for column part:
184
  checkbox_group = gr.CheckboxGroup(
185
+ choices=TASK_INFO_v2,
186
+ value=AVG_INFO,
187
  label="Select options",
188
  interactive=True,
189
  )
 
200
 
201
  def on_checkbox_group_change(selected_columns):
202
  # pdb.set_trace()
203
+ selected_columns = [item for item in TASK_INFO_v2 if item in selected_columns]
204
  present_columns = MODEL_INFO + selected_columns
205
+ updated_data = get_all_df()[present_columns]
206
  updated_headers = present_columns
207
  update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
208
 
 
238
  with gr.Row():
239
  with gr.Column():
240
  model_name_textbox = gr.Textbox(
241
+ label="Model name", placeholder="LLaMA-7B"
242
  )
243
  revision_name_textbox = gr.Textbox(
244
+ label="Revision Model Name", placeholder="LLaMA-7B"
245
  )
246
  model_type = gr.Dropdown(
247
  choices=[
 
250
  "VideoLLM",
251
  "Other",
252
  ],
253
+ label="Model type",
254
  multiselect=False,
255
  value="ImageLLM",
256
  interactive=True,
 
263
 
264
  LLM_type = gr.Dropdown(
265
  choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "Other"],
266
+ label="LLM type",
267
  multiselect=False,
268
  value="LLaMA-7B",
269
  interactive=True,
270
  )
271
  LLM_name_textbox = gr.Textbox(
272
+ label="LLM model (for Other)",
273
  placeholder="LLaMA-13B"
274
  )
275
  Evaluation_dimension = gr.Dropdown(
276
  choices=["All", "Image", "Video"],
277
+ label="Evaluation dimension",
278
  multiselect=False,
279
  value="All",
280
  interactive=True,
constants.py CHANGED
@@ -1,11 +1,15 @@
1
  # this is .py for store constants
2
  MODEL_INFO = ["Model Type", "Model", "Language Model"]
3
  TASK_INFO = ["Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Avg. Img", "Action Recognition", "Action Prediction", "Procedure Understanding", "Avg. Video", "Avg. All"]
4
- AVG_INFO = ["Avg. Img", "Avg. Video", "Avg. All"]
 
 
5
  DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
6
  CSV_DIR = "./file/result.csv"
7
 
8
- COLUMN_NAMES = MODEL_INFO + TASK_INFO
 
 
9
  DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
10
 
11
  UNTUNED_MODEL_RESULTS = '''LLM & Flan-T5 & Flan-T5-XL &23.0 &29.0 &32.8 &31.8 &20.5 &31.8 &33.0 &18.2 &19.4 &23.2 &34.9 &25.4 \\
 
1
  # this is .py for store constants
2
  MODEL_INFO = ["Model Type", "Model", "Language Model"]
3
  TASK_INFO = ["Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Avg. Img", "Action Recognition", "Action Prediction", "Procedure Understanding", "Avg. Video", "Avg. All"]
4
+ TASK_INFO_v2 = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
5
+
6
+ AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
7
  DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
8
  CSV_DIR = "./file/result.csv"
9
 
10
+ # COLUMN_NAMES = MODEL_INFO + TASK_INFO
11
+ COLUMN_NAMES = MODEL_INFO + TASK_INFO_v2
12
+
13
  DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
14
 
15
  UNTUNED_MODEL_RESULTS = '''LLM & Flan-T5 & Flan-T5-XL &23.0 &29.0 &32.8 &31.8 &20.5 &31.8 &33.0 &18.2 &19.4 &23.2 &34.9 &25.4 \\
file/result.csv CHANGED
@@ -1,22 +1,22 @@
1
- Model Type,Model,Language Model,Scene Understanding,Instance Identity,Instance Attributes,Instance Localization,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Avg. Img,Action Recognition,Action Prediction,Procedure Understanding,Avg. Video,Avg. All
2
- LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,23.0,29.0,32.8,31.8,20.5,31.8,33.0,18.2,19.4,27.3,23.2,34.9,25.4,28.6,27.7
3
- LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,28.2,27.3,34.5,23.8,29.5,28.5
4
- LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37.0,9.0,26.6,33.0,23.1,26.2,27.3,26.8
5
- ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,49.7,32.6,47.5,24.0,36.7,46.4
6
- ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,57.8,33.1,49.1,27.1,38.3,52.7
7
- ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,58.8,34.5,49.6,23.1,38.1,53.4
8
- ImageLLM,[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,42.7,34.9,33.5,28.4,41.9,30.8,27.8,46.8,27.7,37.0,29.7,21.4,19.1,23.8,33.5
9
- ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Flan-T5-XL,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,47.4,38.2,24.5,27.1,29.9,42.8
10
- ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,51.9,44.1,39.9,36.1,33.7,36.4,32.0,53.2,30.6,41.8,39.5,24.3,31.9,31.4,39.1
11
- ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,34.5,36.9,25.8,24.0,29.2,33.2
12
- ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,44.9,38.6,32.2,30.9,26.3,31.8,32.0,51.4,31.8,35.2,37.9,27.2,24.8,30.4,33.9
13
- ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,42.9,36.8,29.2,23.8,30.6,39.7
14
- ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20.0,34.5,37.2,25.4,24.2,29.3,33.1
15
- ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,53.2,45.3,40.0,31.2,39.3,32.6,36.1,51.4,25.9,42.7,42.9,34.7,26.9,35.7,40.9
16
- ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,45.2,38.5,29.3,33.0,29.7,35.5,39.2,52.0,24.7,35.2,38.6,18.5,19.6,25.8,32.7
17
- ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,41.7,35.5,31.8,29.5,36.2,32.0,32.0,51.1,27.1,35.5,33.9,25.4,23.0,27.8,33.5
18
- ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,37.9,26.7,17.9,26.5,23.0,34.0
19
- ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,63.4,57.1,58.5,44.0,41.4,37.9,55.7,60.7,25.9,54.4,41.3,40.4,27.0,37.5,50.0
20
- VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,47.1,43.8,34.9,40.0,32.8,34.6,42.3,50.5,17.7,39.0,34.9,36.4,27.3,33.7,37.6
21
- VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,33.9,27.6,21.3,21.1,23.5,31.2
22
- VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,32.0,31.3,23.2,20.7,25.4,30.3
 
1
+ Model Type,Model,Language Model,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attributes,Instance Localization,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
2
+ LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,27.7,27.3,28.6,23.0,29.0,32.8,31.8,20.5,31.8,33.0,18.2,19.4,23.2,34.9,25.4
3
+ LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
4
+ LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37.0,9.0,33.0,23.1,26.2
5
+ ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24.0
6
+ ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
7
+ ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
8
+ ImageLLM,[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,33.5,37.0,23.8,42.7,34.9,33.5,28.4,41.9,30.8,27.8,46.8,27.7,29.7,21.4,19.1
9
+ ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Flan-T5-XL,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
10
+ ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32.0,53.2,30.6,39.5,24.3,31.9
11
+ ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24.0
12
+ ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32.0,51.4,31.8,37.9,27.2,24.8
13
+ ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,39.7,42.9,30.6,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
14
+ ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20.0,37.2,25.4,24.2
15
+ ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,40.9,42.7,35.7,53.2,45.3,40.0,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
16
+ ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,32.7,35.2,25.8,45.2,38.5,29.3,33.0,29.7,35.5,39.2,52.0,24.7,38.6,18.5,19.6
17
+ ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32.0,32.0,51.1,27.1,33.9,25.4,23.0
18
+ ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,34.0,37.9,23.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
19
+ ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,50.0,54.4,37.5,63.4,57.1,58.5,44.0,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27.0
20
+ VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,37.6,39.0,33.7,47.1,43.8,34.9,40.0,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
21
+ VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
22
+ VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,30.3,32.0,25.4,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
src/__pycache__/utils_display.cpython-38.pyc CHANGED
Binary files a/src/__pycache__/utils_display.cpython-38.pyc and b/src/__pycache__/utils_display.cpython-38.pyc differ
 
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc CHANGED
Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc differ