BreakLee commited on
Commit
8fd167a
1 Parent(s): ff6b794

Upload 14 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
__pycache__/constants.cpython-311.pyc ADDED
Binary file (4.39 kB). View file
 
app.py CHANGED
@@ -7,7 +7,7 @@ import tempfile
7
  import re
8
  from constants import *
9
  from src.auto_leaderboard.model_metadata_type import ModelType
10
-
11
 
12
  global data_component, filter_component
13
 
@@ -26,15 +26,12 @@ def prediction_analyse(prediction_content):
26
  # pdb.set_trace()
27
  predictions = prediction_content.split("\n")
28
 
29
- # 读取 ground_truth JSON 文件
30
- with open("./file/SEED-Bench-1.json", "r") as file:
31
- ground_truth_data = json.load(file)["questions"]
32
-
33
- # 将 ground_truth 数据转换为以 question_id 为键的字典
34
- ground_truth = {item["question_id"]: item for item in ground_truth_data}
35
 
36
  # 初始化结果统计字典
37
- results = {i: {"correct": 0, "total": 0} for i in range(1, 13)}
38
 
39
  # 遍历 predictions,计算每个 question_type_id 的正确预测数和总预测数
40
  for prediction in predictions:
@@ -48,15 +45,15 @@ def prediction_analyse(prediction_content):
48
  print(f"Warning: Skipping invalid JSON data in line: {prediction}")
49
  continue
50
  question_id = prediction["question_id"]
51
- if question_id not in ground_truth:
52
  continue
53
  gt_item = ground_truth[question_id]
54
- question_type_id = gt_item["question_type_id"]
55
 
56
- if prediction["prediction"] == gt_item["answer"]:
57
- results[question_type_id]["correct"] += 1
58
 
59
- results[question_type_id]["total"] += 1
60
 
61
  return results
62
 
@@ -70,45 +67,23 @@ def add_new_eval(
70
  if input_file is None:
71
  return "Error! Empty file!"
72
  else:
73
- model_size = validate_model_size(model_size)
74
  # v1 evaluation
75
  content = input_file.decode("utf-8")
76
  prediction = prediction_analyse(content)
77
  csv_data = pd.read_csv(CSV_DIR)
 
78
 
79
- Start_dimension, End_dimension = 1, 13
80
- if Evaluation_dimension == 'Image':
81
- End_dimension = 10
82
- elif Evaluation_dimension == 'Video':
83
- Start_dimension = 10
84
- each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) if i >= Start_dimension and i < End_dimension else 0 for i in range(1, 13)}
85
 
86
  # count for average image\video\all
87
- total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
88
- total_correct_video = sum(prediction[i]["correct"] for i in range(10, 13))
89
-
90
- total_image = sum(prediction[i]["total"] for i in range(1, 10))
91
- total_video = sum(prediction[i]["total"] for i in range(10, 13))
92
-
93
- if Evaluation_dimension != 'Video':
94
- average_accuracy_image = round(total_correct_image / total_image * 100, 1)
95
- else:
96
- average_accuracy_image = 0
97
-
98
- if Evaluation_dimension != 'Image':
99
- average_accuracy_video = round(total_correct_video / total_video * 100, 1)
100
- else:
101
- average_accuracy_video = 0
102
-
103
- if Evaluation_dimension == 'All':
104
- overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1)
105
- else:
106
- overall_accuracy = 0
107
-
108
- if LLM_type == 'Other':
109
- LLM_name = LLM_name_textbox
110
- else:
111
- LLM_name = LLM_type
112
 
113
  if revision_name_textbox == '':
114
  col = csv_data.shape[0]
@@ -130,11 +105,14 @@ def add_new_eval(
130
  # add new data
131
  new_data = [
132
  model_name,
133
- LLM_name,
134
- model_size,
135
- overall_accuracy,
136
- average_accuracy_image,
137
- average_accuracy_video,
 
 
 
138
  each_task_accuracy[1],
139
  each_task_accuracy[2],
140
  each_task_accuracy[3],
@@ -146,13 +124,25 @@ def add_new_eval(
146
  each_task_accuracy[9],
147
  each_task_accuracy[10],
148
  each_task_accuracy[11],
149
- each_task_accuracy[12],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  ]
151
  csv_data.loc[col] = new_data
152
  csv_data = csv_data.to_csv(CSV_DIR, index=False)
153
 
154
- csv_task_data.loc[col] = new_data
155
- csv_task_data = csv_task_data.to_csv(CSV_TASK_DIR, index=False)
156
  return 0
157
 
158
  def get_baseline_df():
 
7
  import re
8
  from constants import *
9
  from src.auto_leaderboard.model_metadata_type import ModelType
10
+ import dask.dataframe as dd
11
 
12
  global data_component, filter_component
13
 
 
26
  # pdb.set_trace()
27
  predictions = prediction_content.split("\n")
28
 
29
+ # 读取 ground_truth 文件
30
+ df = dd.read_parquet("./file/av_odyssey.parquet")
31
+ ground_truth = {row[0]: row[6] for row in df.itertuples(index=False, name=None)}
 
 
 
32
 
33
  # 初始化结果统计字典
34
+ results = {i: {"correct": 0, "total": 0} for i in range(1, 27)}
35
 
36
  # 遍历 predictions,计算每个 question_type_id 的正确预测数和总预测数
37
  for prediction in predictions:
 
45
  print(f"Warning: Skipping invalid JSON data in line: {prediction}")
46
  continue
47
  question_id = prediction["question_id"]
48
+ if question_id not in ground_truth.keys():
49
  continue
50
  gt_item = ground_truth[question_id]
51
+ question_type_id = question_id.split("_")[0]
52
 
53
+ if prediction["prediction"] == gt_item:
54
+ results[int(question_type_id)]["correct"] += 1
55
 
56
+ results[int(question_type_id)]["total"] += 1
57
 
58
  return results
59
 
 
67
  if input_file is None:
68
  return "Error! Empty file!"
69
  else:
 
70
  # v1 evaluation
71
  content = input_file.decode("utf-8")
72
  prediction = prediction_analyse(content)
73
  csv_data = pd.read_csv(CSV_DIR)
74
+ # pdb.set_trace()
75
 
76
+ each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) for i in range(1, 27)}
 
 
 
 
 
77
 
78
  # count for average image\video\all
79
+ total_correct_timbre = round(sum(prediction[i]["correct"] for i in range(timbre_task[0], timbre_task[1] + 1)) / sum(prediction[i]["total"] for i in range(timbre_task[0], timbre_task[1] + 1)) * 100, 1)
80
+ total_correct_tone = round(sum(prediction[i]["correct"] for i in range(tone_task[0], tone_task[1] + 1)) / sum(prediction[i]["total"] for i in range(tone_task[0], tone_task[1] + 1)) * 100, 1)
81
+ total_correct_melody = round(sum(prediction[i]["correct"] for i in range(melody_task[0], melody_task[1] + 1)) / sum(prediction[i]["total"] for i in range(melody_task[0], melody_task[1] + 1)) * 100, 1)
82
+ total_correct_space = round(sum(prediction[i]["correct"] for i in range(space_task[0], space_task[1] + 1)) / sum(prediction[i]["total"] for i in range(space_task[0], space_task[1] + 1)) * 100, 1)
83
+ total_correct_time = round(sum(prediction[i]["correct"] for i in range(time_task[0], time_task[1] + 1)) / sum(prediction[i]["total"] for i in range(time_task[0], time_task[1] + 1)) * 100, 1)
84
+ total_correct_hallucination = round(sum(prediction[i]["correct"] for i in range(hallucination_task[0], hallucination_task[1] + 1)) / sum(prediction[i]["total"] for i in range(hallucination_task[0], hallucination_task[1] + 1)) * 100, 1)
85
+ total_correct_intricay = round(sum(prediction[i]["correct"] for i in range(intricay_task[0], intricay_task[1] + 1)) / sum(prediction[i]["total"] for i in range(intricay_task[0], intricay_task[1] + 1)) * 100, 1)
86
+ all_average = round(sum(prediction[i]["correct"] for i in range(1, 27)) / sum(prediction[i]["total"] for i in range(1, 27)) * 100, 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  if revision_name_textbox == '':
89
  col = csv_data.shape[0]
 
105
  # add new data
106
  new_data = [
107
  model_name,
108
+ all_average,
109
+ total_correct_timbre,
110
+ total_correct_tone,
111
+ total_correct_melody,
112
+ total_correct_space,
113
+ total_correct_time,
114
+ total_correct_hallucination,
115
+ total_correct_intricay,
116
  each_task_accuracy[1],
117
  each_task_accuracy[2],
118
  each_task_accuracy[3],
 
124
  each_task_accuracy[9],
125
  each_task_accuracy[10],
126
  each_task_accuracy[11],
127
+ each_task_accuracy[12],
128
+ each_task_accuracy[13],
129
+ each_task_accuracy[14],
130
+ each_task_accuracy[15],
131
+ each_task_accuracy[16],
132
+ each_task_accuracy[17],
133
+ each_task_accuracy[18],
134
+ each_task_accuracy[19],
135
+ each_task_accuracy[20],
136
+ each_task_accuracy[21],
137
+ each_task_accuracy[22],
138
+ each_task_accuracy[23],
139
+ each_task_accuracy[24],
140
+ each_task_accuracy[25],
141
+ each_task_accuracy[26],
142
  ]
143
  csv_data.loc[col] = new_data
144
  csv_data = csv_data.to_csv(CSV_DIR, index=False)
145
 
 
 
146
  return 0
147
 
148
  def get_baseline_df():
constants.py CHANGED
@@ -13,6 +13,14 @@ AVG_INFO = ["Avg. All", "Avg. Timbre", "Avg. Tone", "Avg. Melody", "Avg. Space",
13
  DATA_TITILE_TYPE = ["markdown"] * len(MODEL_INFO) + ["number"] * len(TASK_INFO)
14
  CSV_DIR = "./file/AV-Odyssey_performance.csv"
15
 
 
 
 
 
 
 
 
 
16
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
17
 
18
  DATA_NUM = [200, 200, 200, 200, 200, 200, 200, 200, 108, 196, 200, 200, 20, 97, 200, 200, 200, 200, 20, 20, 200, 200, 200, 200, 199, 195]
 
13
  DATA_TITILE_TYPE = ["markdown"] * len(MODEL_INFO) + ["number"] * len(TASK_INFO)
14
  CSV_DIR = "./file/AV-Odyssey_performance.csv"
15
 
16
+ timbre_task = [1, 11]
17
+ tone_task = [12, 13]
18
+ melody_task = [14, 18]
19
+ space_task = [19, 20]
20
+ time_task = [21, 23]
21
+ hallucination_task = [24, 24]
22
+ intricay_task = [25, 26]
23
+
24
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
25
 
26
  DATA_NUM = [200, 200, 200, 200, 200, 200, 200, 200, 108, 196, 200, 200, 20, 97, 200, 200, 200, 200, 20, 20, 200, 200, 200, 200, 199, 195]
file/AV-Odyssey_performance.csv CHANGED
@@ -1,4 +1,4 @@
1
- Model,Avg. All,Avg. Timbre,Avg. Tone,Avg. Melody,Avg. Space,Avg. Time,Avg. Hallucination,Avg. Intricacy,Instrument Recognition,Singer Recognition,Gunshot Recognition,Bird Recognition,Animal Recognition,Transportation Recognition,Material Recognition,Scene Recognition,Hazard Recognition,Action Recognition,Eating Sound Recognition,Speech Sentiment Analysis,Meme Understanding,Music Sentiment Analysis,Music Genre Classification,Dance and Music Matching,Film and Music Matching,Music Score Matching,Audio 3D Angle Estimation,Audio Distance Estimation,Audio Time Estimation,Audio-Visual Synchronization,Action Sequencing,Hallucination Evaluation,Action Prediction,Action Tracing
2
  [Unified-IO-2 L](https://unified-io-2.allenai.org/),26.0,23.8,24.1,28.8,15.0,26.8,30.0,30.4,20.5,22.5,25.5,18.5,27.0,26.5,23.0,28.0,21.3,20.9,26.5,24.5,20.0,27.9,31.0,27.5,32.5,24.5,15.0,15.0,28.0,25.5,27.0,30.0,27.1,33.8
3
  [Unified-IO-2 XL](https://unified-io-2.allenai.org/),26.3,24.3,23.2,27.8,22.5,25.3,31.5,34.8,20.0,23.5,24.0,20.5,27.5,26.0,27.5,30.0,19.4,19.9,26.5,23.0,25.0,26.9,30.5,27.0,31.5,22.5,30.0,15.0,26.5,25.5,24.0,31.5,35.7,33.8
4
  [Unified-IO-2 XXL](https://unified-io-2.allenai.org/),27.2,26.3,22.7,26.4,32.5,26.8,24.5,33.8,29.5,24.0,23.5,29.0,23.5,25.5,30.5,26.5,23.1,27.0,25.5,23.0,20.0,23.9,31.5,27.5,24.5,23.5,50.0,15.0,28.0,25.0,27.5,24.5,33.2,34.4
 
1
+ Model,Avg. All,Avg. Timbre,Avg. Tone,Avg. Melody,Avg. Space,Avg. Time,Avg. Hallucination,Avg. Intricacy,Instrument Recognition,Singer Recognition,Gunshot Recognition,Bird Recognition,Animal Recognition,Transportation Recognition,Material Recognition,Scene Recognition,Hazard Recognition,Action Recognition,Eating Sound Recognition,Speech Sentiment Analysis,Meme Understanding,Music Sentiment Analysis,Music Genre Classification,Dance and Music Matching,Film and Music Matching,Music Score Matching,Audio 3D Angle Estimation,Audio Distance Estimation,Audio Time Estimation,Audio-Visual Synchronization,Action Sequencing,Hallucination Evaluation,Action Prediction,Action Tracing
2
  [Unified-IO-2 L](https://unified-io-2.allenai.org/),26.0,23.8,24.1,28.8,15.0,26.8,30.0,30.4,20.5,22.5,25.5,18.5,27.0,26.5,23.0,28.0,21.3,20.9,26.5,24.5,20.0,27.9,31.0,27.5,32.5,24.5,15.0,15.0,28.0,25.5,27.0,30.0,27.1,33.8
3
  [Unified-IO-2 XL](https://unified-io-2.allenai.org/),26.3,24.3,23.2,27.8,22.5,25.3,31.5,34.8,20.0,23.5,24.0,20.5,27.5,26.0,27.5,30.0,19.4,19.9,26.5,23.0,25.0,26.9,30.5,27.0,31.5,22.5,30.0,15.0,26.5,25.5,24.0,31.5,35.7,33.8
4
  [Unified-IO-2 XXL](https://unified-io-2.allenai.org/),27.2,26.3,22.7,26.4,32.5,26.8,24.5,33.8,29.5,24.0,23.5,29.0,23.5,25.5,30.5,26.5,23.1,27.0,25.5,23.0,20.0,23.9,31.5,27.5,24.5,23.5,50.0,15.0,28.0,25.0,27.5,24.5,33.2,34.4
file/av_odyssey.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c065933c9dff31e8d16c9684570fb4d2e90ddec621bef3a138bd1d44d56e82a0
3
+ size 251176
requirements.txt CHANGED
@@ -68,3 +68,5 @@ urllib3==1.26.15
68
  uvicorn==0.21.1
69
  websockets==11.0.1
70
  yarl==1.8.2
 
 
 
68
  uvicorn==0.21.1
69
  websockets==11.0.1
70
  yarl==1.8.2
71
+ fastparquet
72
+ dask
src/__pycache__/utils_display.cpython-311.pyc CHANGED
Binary files a/src/__pycache__/utils_display.cpython-311.pyc and b/src/__pycache__/utils_display.cpython-311.pyc differ
 
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-311.pyc CHANGED
Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-311.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-311.pyc differ