BreakLee commited on
Commit
75d4504
β€’
1 Parent(s): fec9185

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -25
app.py CHANGED
@@ -68,7 +68,14 @@ def add_new_eval(
68
  else:
69
  content = input_file.decode("utf-8")
70
  prediction = prediction_analyse(content)
71
- each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) for i in range(1, 13)}
 
 
 
 
 
 
 
72
 
73
  # count for average image\video\all
74
  total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
@@ -77,20 +84,43 @@ def add_new_eval(
77
  total_image = sum(prediction[i]["total"] for i in range(1, 10))
78
  total_video = sum(prediction[i]["total"] for i in range(10, 13))
79
 
80
- average_accuracy_image = round(total_correct_image / total_image * 100, 1)
81
- average_accuracy_video = round(total_correct_video / total_video * 100, 1)
82
- overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1)
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- if LLM_type == 'other':
85
  LLM_name = LLM_name_textbox
86
  else:
87
  LLM_name = LLM_type
88
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  if model_link == '':
91
- model_name = model_name_textbox # no url
92
  else:
93
- model_name = '[' + model_name_textbox + '](' + model_link + ')'
 
94
  # add new data
95
  new_data = [
96
  model_type,
@@ -112,22 +142,8 @@ def add_new_eval(
112
  average_accuracy_video,
113
  overall_accuracy]
114
  # pdb.set_trace()
115
- csv_data = pd.read_csv(CSV_DIR)
116
-
117
- # pdb.set_trace()
118
- if revision_name_textbox == '':
119
- col = csv_data.shape[0]
120
- csv_data.loc[col] = new_data
121
- csv_data = csv_data.to_csv(CSV_DIR, index=False)
122
- else:
123
- model_name_list = csv_data['Model']
124
- name_list = [name.split(']')[0][1:] for name in model_name_list]
125
- if revision_name_textbox not in name_list:
126
- col = csv_data.shape[0]
127
- else:
128
- col = name_list.index(revision_name_textbox)
129
- csv_data.loc[col] = new_data
130
- csv_data = csv_data.to_csv(CSV_DIR, index=False)
131
  return 0
132
 
133
  def get_baseline_df():
@@ -204,6 +220,8 @@ with block:
204
  with gr.TabItem("πŸš€ Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
205
  gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
206
 
 
 
207
 
208
  with gr.Row():
209
  gr.Markdown("# βœ‰οΈβœ¨ Submit your model evaluation json file here!", elem_classes="markdown-text")
@@ -235,14 +253,14 @@ with block:
235
  with gr.Column():
236
 
237
  LLM_type = gr.Dropdown(
238
- choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "other"],
239
  label="LLM type",
240
  multiselect=False,
241
  value="LLaMA-7B",
242
  interactive=True,
243
  )
244
  LLM_name_textbox = gr.Textbox(
245
- label="LLM model (for other)",
246
  placeholder="LLaMA-13B"
247
  )
248
  Evaluation_dimension = gr.Dropdown(
 
68
  else:
69
  content = input_file.decode("utf-8")
70
  prediction = prediction_analyse(content)
71
+ csv_data = pd.read_csv(CSV_DIR)
72
+
73
+ Start_dimension, End_dimension = 1, 13
74
+ if Evaluation_dimension == 'Image':
75
+ End_dimension = 10
76
+ elif Evaluation_dimension == 'Video':
77
+ Start_dimension = 10
78
+ each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) if i >= Start_dimension and i < End_dimension else 0 for i in range(1, 13)}
79
 
80
  # count for average image\video\all
81
  total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
 
84
  total_image = sum(prediction[i]["total"] for i in range(1, 10))
85
  total_video = sum(prediction[i]["total"] for i in range(10, 13))
86
 
87
+ if Evaluation_dimension != 'Video':
88
+ average_accuracy_image = round(total_correct_image / total_image * 100, 1)
89
+ else:
90
+ average_accuracy_image = 0
91
+
92
+ if Evaluation_dimension != 'Image':
93
+ average_accuracy_video = round(total_correct_video / total_video * 100, 1)
94
+ else:
95
+ average_accuracy_video = 0
96
+
97
+ if Evaluation_dimension == 'All':
98
+ overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1)
99
+ else:
100
+ overall_accuracy = 0
101
 
102
+ if LLM_type == 'Other':
103
  LLM_name = LLM_name_textbox
104
  else:
105
  LLM_name = LLM_type
106
 
107
+ if revision_name_textbox == '':
108
+ col = csv_data.shape[0]
109
+ model_name = model_name_textbox
110
+ else:
111
+ model_name = revision_name_textbox
112
+ model_name_list = csv_data['Model']
113
+ name_list = [name.split(']')[0][1:] for name in model_name_list]
114
+ if revision_name_textbox not in name_list:
115
+ col = csv_data.shape[0]
116
+ else:
117
+ col = name_list.index(revision_name_textbox)
118
 
119
  if model_link == '':
120
+ model_name = model_name # no url
121
  else:
122
+ model_name = '[' + model_name + '](' + model_link + ')'
123
+
124
  # add new data
125
  new_data = [
126
  model_type,
 
142
  average_accuracy_video,
143
  overall_accuracy]
144
  # pdb.set_trace()
145
+ csv_data.loc[col] = new_data
146
+ csv_data = csv_data.to_csv(CSV_DIR, index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  return 0
148
 
149
  def get_baseline_df():
 
220
  with gr.TabItem("πŸš€ Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
221
  gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
222
 
223
+ with gr.Row():
224
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
225
 
226
  with gr.Row():
227
  gr.Markdown("# βœ‰οΈβœ¨ Submit your model evaluation json file here!", elem_classes="markdown-text")
 
253
  with gr.Column():
254
 
255
  LLM_type = gr.Dropdown(
256
+ choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "Other"],
257
  label="LLM type",
258
  multiselect=False,
259
  value="LLaMA-7B",
260
  interactive=True,
261
  )
262
  LLM_name_textbox = gr.Textbox(
263
+ label="LLM model (for Other)",
264
  placeholder="LLaMA-13B"
265
  )
266
  Evaluation_dimension = gr.Dropdown(