Upload 14 files
Browse files- .gitattributes +35 -35
- __pycache__/constants.cpython-311.pyc +0 -0
- app.py +43 -53
- constants.py +8 -0
- file/AV-Odyssey_performance.csv +1 -1
- file/av_odyssey.parquet +3 -0
- requirements.txt +2 -0
- src/__pycache__/utils_display.cpython-311.pyc +0 -0
- src/auto_leaderboard/__pycache__/model_metadata_type.cpython-311.pyc +0 -0
.gitattributes
CHANGED
@@ -1,35 +1,35 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
__pycache__/constants.cpython-311.pyc
ADDED
Binary file (4.39 kB). View file
|
|
app.py
CHANGED
@@ -7,7 +7,7 @@ import tempfile
|
|
7 |
import re
|
8 |
from constants import *
|
9 |
from src.auto_leaderboard.model_metadata_type import ModelType
|
10 |
-
|
11 |
|
12 |
global data_component, filter_component
|
13 |
|
@@ -26,15 +26,12 @@ def prediction_analyse(prediction_content):
|
|
26 |
# pdb.set_trace()
|
27 |
predictions = prediction_content.split("\n")
|
28 |
|
29 |
-
# 读取 ground_truth
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
# 将 ground_truth 数据转换为以 question_id 为键的字典
|
34 |
-
ground_truth = {item["question_id"]: item for item in ground_truth_data}
|
35 |
|
36 |
# 初始化结果统计字典
|
37 |
-
results = {i: {"correct": 0, "total": 0} for i in range(1,
|
38 |
|
39 |
# 遍历 predictions,计算每个 question_type_id 的正确预测数和总预测数
|
40 |
for prediction in predictions:
|
@@ -48,15 +45,15 @@ def prediction_analyse(prediction_content):
|
|
48 |
print(f"Warning: Skipping invalid JSON data in line: {prediction}")
|
49 |
continue
|
50 |
question_id = prediction["question_id"]
|
51 |
-
if question_id not in ground_truth:
|
52 |
continue
|
53 |
gt_item = ground_truth[question_id]
|
54 |
-
question_type_id =
|
55 |
|
56 |
-
if prediction["prediction"] == gt_item
|
57 |
-
results[question_type_id]["correct"] += 1
|
58 |
|
59 |
-
results[question_type_id]["total"] += 1
|
60 |
|
61 |
return results
|
62 |
|
@@ -70,45 +67,23 @@ def add_new_eval(
|
|
70 |
if input_file is None:
|
71 |
return "Error! Empty file!"
|
72 |
else:
|
73 |
-
model_size = validate_model_size(model_size)
|
74 |
# v1 evaluation
|
75 |
content = input_file.decode("utf-8")
|
76 |
prediction = prediction_analyse(content)
|
77 |
csv_data = pd.read_csv(CSV_DIR)
|
|
|
78 |
|
79 |
-
|
80 |
-
if Evaluation_dimension == 'Image':
|
81 |
-
End_dimension = 10
|
82 |
-
elif Evaluation_dimension == 'Video':
|
83 |
-
Start_dimension = 10
|
84 |
-
each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) if i >= Start_dimension and i < End_dimension else 0 for i in range(1, 13)}
|
85 |
|
86 |
# count for average image\video\all
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
else:
|
96 |
-
average_accuracy_image = 0
|
97 |
-
|
98 |
-
if Evaluation_dimension != 'Image':
|
99 |
-
average_accuracy_video = round(total_correct_video / total_video * 100, 1)
|
100 |
-
else:
|
101 |
-
average_accuracy_video = 0
|
102 |
-
|
103 |
-
if Evaluation_dimension == 'All':
|
104 |
-
overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1)
|
105 |
-
else:
|
106 |
-
overall_accuracy = 0
|
107 |
-
|
108 |
-
if LLM_type == 'Other':
|
109 |
-
LLM_name = LLM_name_textbox
|
110 |
-
else:
|
111 |
-
LLM_name = LLM_type
|
112 |
|
113 |
if revision_name_textbox == '':
|
114 |
col = csv_data.shape[0]
|
@@ -130,11 +105,14 @@ def add_new_eval(
|
|
130 |
# add new data
|
131 |
new_data = [
|
132 |
model_name,
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
138 |
each_task_accuracy[1],
|
139 |
each_task_accuracy[2],
|
140 |
each_task_accuracy[3],
|
@@ -146,13 +124,25 @@ def add_new_eval(
|
|
146 |
each_task_accuracy[9],
|
147 |
each_task_accuracy[10],
|
148 |
each_task_accuracy[11],
|
149 |
-
each_task_accuracy[12],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
]
|
151 |
csv_data.loc[col] = new_data
|
152 |
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
153 |
|
154 |
-
csv_task_data.loc[col] = new_data
|
155 |
-
csv_task_data = csv_task_data.to_csv(CSV_TASK_DIR, index=False)
|
156 |
return 0
|
157 |
|
158 |
def get_baseline_df():
|
|
|
7 |
import re
|
8 |
from constants import *
|
9 |
from src.auto_leaderboard.model_metadata_type import ModelType
|
10 |
+
import dask.dataframe as dd
|
11 |
|
12 |
global data_component, filter_component
|
13 |
|
|
|
26 |
# pdb.set_trace()
|
27 |
predictions = prediction_content.split("\n")
|
28 |
|
29 |
+
# 读取 ground_truth 文件
|
30 |
+
df = dd.read_parquet("./file/av_odyssey.parquet")
|
31 |
+
ground_truth = {row[0]: row[6] for row in df.itertuples(index=False, name=None)}
|
|
|
|
|
|
|
32 |
|
33 |
# 初始化结果统计字典
|
34 |
+
results = {i: {"correct": 0, "total": 0} for i in range(1, 27)}
|
35 |
|
36 |
# 遍历 predictions,计算每个 question_type_id 的正确预测数和总预测数
|
37 |
for prediction in predictions:
|
|
|
45 |
print(f"Warning: Skipping invalid JSON data in line: {prediction}")
|
46 |
continue
|
47 |
question_id = prediction["question_id"]
|
48 |
+
if question_id not in ground_truth.keys():
|
49 |
continue
|
50 |
gt_item = ground_truth[question_id]
|
51 |
+
question_type_id = question_id.split("_")[0]
|
52 |
|
53 |
+
if prediction["prediction"] == gt_item:
|
54 |
+
results[int(question_type_id)]["correct"] += 1
|
55 |
|
56 |
+
results[int(question_type_id)]["total"] += 1
|
57 |
|
58 |
return results
|
59 |
|
|
|
67 |
if input_file is None:
|
68 |
return "Error! Empty file!"
|
69 |
else:
|
|
|
70 |
# v1 evaluation
|
71 |
content = input_file.decode("utf-8")
|
72 |
prediction = prediction_analyse(content)
|
73 |
csv_data = pd.read_csv(CSV_DIR)
|
74 |
+
# pdb.set_trace()
|
75 |
|
76 |
+
each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) for i in range(1, 27)}
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
# count for average image\video\all
|
79 |
+
total_correct_timbre = round(sum(prediction[i]["correct"] for i in range(timbre_task[0], timbre_task[1] + 1)) / sum(prediction[i]["total"] for i in range(timbre_task[0], timbre_task[1] + 1)) * 100, 1)
|
80 |
+
total_correct_tone = round(sum(prediction[i]["correct"] for i in range(tone_task[0], tone_task[1] + 1)) / sum(prediction[i]["total"] for i in range(tone_task[0], tone_task[1] + 1)) * 100, 1)
|
81 |
+
total_correct_melody = round(sum(prediction[i]["correct"] for i in range(melody_task[0], melody_task[1] + 1)) / sum(prediction[i]["total"] for i in range(melody_task[0], melody_task[1] + 1)) * 100, 1)
|
82 |
+
total_correct_space = round(sum(prediction[i]["correct"] for i in range(space_task[0], space_task[1] + 1)) / sum(prediction[i]["total"] for i in range(space_task[0], space_task[1] + 1)) * 100, 1)
|
83 |
+
total_correct_time = round(sum(prediction[i]["correct"] for i in range(time_task[0], time_task[1] + 1)) / sum(prediction[i]["total"] for i in range(time_task[0], time_task[1] + 1)) * 100, 1)
|
84 |
+
total_correct_hallucination = round(sum(prediction[i]["correct"] for i in range(hallucination_task[0], hallucination_task[1] + 1)) / sum(prediction[i]["total"] for i in range(hallucination_task[0], hallucination_task[1] + 1)) * 100, 1)
|
85 |
+
total_correct_intricay = round(sum(prediction[i]["correct"] for i in range(intricay_task[0], intricay_task[1] + 1)) / sum(prediction[i]["total"] for i in range(intricay_task[0], intricay_task[1] + 1)) * 100, 1)
|
86 |
+
all_average = round(sum(prediction[i]["correct"] for i in range(1, 27)) / sum(prediction[i]["total"] for i in range(1, 27)) * 100, 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
if revision_name_textbox == '':
|
89 |
col = csv_data.shape[0]
|
|
|
105 |
# add new data
|
106 |
new_data = [
|
107 |
model_name,
|
108 |
+
all_average,
|
109 |
+
total_correct_timbre,
|
110 |
+
total_correct_tone,
|
111 |
+
total_correct_melody,
|
112 |
+
total_correct_space,
|
113 |
+
total_correct_time,
|
114 |
+
total_correct_hallucination,
|
115 |
+
total_correct_intricay,
|
116 |
each_task_accuracy[1],
|
117 |
each_task_accuracy[2],
|
118 |
each_task_accuracy[3],
|
|
|
124 |
each_task_accuracy[9],
|
125 |
each_task_accuracy[10],
|
126 |
each_task_accuracy[11],
|
127 |
+
each_task_accuracy[12],
|
128 |
+
each_task_accuracy[13],
|
129 |
+
each_task_accuracy[14],
|
130 |
+
each_task_accuracy[15],
|
131 |
+
each_task_accuracy[16],
|
132 |
+
each_task_accuracy[17],
|
133 |
+
each_task_accuracy[18],
|
134 |
+
each_task_accuracy[19],
|
135 |
+
each_task_accuracy[20],
|
136 |
+
each_task_accuracy[21],
|
137 |
+
each_task_accuracy[22],
|
138 |
+
each_task_accuracy[23],
|
139 |
+
each_task_accuracy[24],
|
140 |
+
each_task_accuracy[25],
|
141 |
+
each_task_accuracy[26],
|
142 |
]
|
143 |
csv_data.loc[col] = new_data
|
144 |
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
145 |
|
|
|
|
|
146 |
return 0
|
147 |
|
148 |
def get_baseline_df():
|
constants.py
CHANGED
@@ -13,6 +13,14 @@ AVG_INFO = ["Avg. All", "Avg. Timbre", "Avg. Tone", "Avg. Melody", "Avg. Space",
|
|
13 |
DATA_TITILE_TYPE = ["markdown"] * len(MODEL_INFO) + ["number"] * len(TASK_INFO)
|
14 |
CSV_DIR = "./file/AV-Odyssey_performance.csv"
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
17 |
|
18 |
DATA_NUM = [200, 200, 200, 200, 200, 200, 200, 200, 108, 196, 200, 200, 20, 97, 200, 200, 200, 200, 20, 20, 200, 200, 200, 200, 199, 195]
|
|
|
13 |
DATA_TITILE_TYPE = ["markdown"] * len(MODEL_INFO) + ["number"] * len(TASK_INFO)
|
14 |
CSV_DIR = "./file/AV-Odyssey_performance.csv"
|
15 |
|
16 |
+
timbre_task = [1, 11]
|
17 |
+
tone_task = [12, 13]
|
18 |
+
melody_task = [14, 18]
|
19 |
+
space_task = [19, 20]
|
20 |
+
time_task = [21, 23]
|
21 |
+
hallucination_task = [24, 24]
|
22 |
+
intricay_task = [25, 26]
|
23 |
+
|
24 |
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
25 |
|
26 |
DATA_NUM = [200, 200, 200, 200, 200, 200, 200, 200, 108, 196, 200, 200, 20, 97, 200, 200, 200, 200, 20, 20, 200, 200, 200, 200, 199, 195]
|
file/AV-Odyssey_performance.csv
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
[Unified-IO-2 L](https://unified-io-2.allenai.org/),26.0,23.8,24.1,28.8,15.0,26.8,30.0,30.4,20.5,22.5,25.5,18.5,27.0,26.5,23.0,28.0,21.3,20.9,26.5,24.5,20.0,27.9,31.0,27.5,32.5,24.5,15.0,15.0,28.0,25.5,27.0,30.0,27.1,33.8
|
3 |
[Unified-IO-2 XL](https://unified-io-2.allenai.org/),26.3,24.3,23.2,27.8,22.5,25.3,31.5,34.8,20.0,23.5,24.0,20.5,27.5,26.0,27.5,30.0,19.4,19.9,26.5,23.0,25.0,26.9,30.5,27.0,31.5,22.5,30.0,15.0,26.5,25.5,24.0,31.5,35.7,33.8
|
4 |
[Unified-IO-2 XXL](https://unified-io-2.allenai.org/),27.2,26.3,22.7,26.4,32.5,26.8,24.5,33.8,29.5,24.0,23.5,29.0,23.5,25.5,30.5,26.5,23.1,27.0,25.5,23.0,20.0,23.9,31.5,27.5,24.5,23.5,50.0,15.0,28.0,25.0,27.5,24.5,33.2,34.4
|
|
|
1 |
+
Model,Avg. All,Avg. Timbre,Avg. Tone,Avg. Melody,Avg. Space,Avg. Time,Avg. Hallucination,Avg. Intricacy,Instrument Recognition,Singer Recognition,Gunshot Recognition,Bird Recognition,Animal Recognition,Transportation Recognition,Material Recognition,Scene Recognition,Hazard Recognition,Action Recognition,Eating Sound Recognition,Speech Sentiment Analysis,Meme Understanding,Music Sentiment Analysis,Music Genre Classification,Dance and Music Matching,Film and Music Matching,Music Score Matching,Audio 3D Angle Estimation,Audio Distance Estimation,Audio Time Estimation,Audio-Visual Synchronization,Action Sequencing,Hallucination Evaluation,Action Prediction,Action Tracing
|
2 |
[Unified-IO-2 L](https://unified-io-2.allenai.org/),26.0,23.8,24.1,28.8,15.0,26.8,30.0,30.4,20.5,22.5,25.5,18.5,27.0,26.5,23.0,28.0,21.3,20.9,26.5,24.5,20.0,27.9,31.0,27.5,32.5,24.5,15.0,15.0,28.0,25.5,27.0,30.0,27.1,33.8
|
3 |
[Unified-IO-2 XL](https://unified-io-2.allenai.org/),26.3,24.3,23.2,27.8,22.5,25.3,31.5,34.8,20.0,23.5,24.0,20.5,27.5,26.0,27.5,30.0,19.4,19.9,26.5,23.0,25.0,26.9,30.5,27.0,31.5,22.5,30.0,15.0,26.5,25.5,24.0,31.5,35.7,33.8
|
4 |
[Unified-IO-2 XXL](https://unified-io-2.allenai.org/),27.2,26.3,22.7,26.4,32.5,26.8,24.5,33.8,29.5,24.0,23.5,29.0,23.5,25.5,30.5,26.5,23.1,27.0,25.5,23.0,20.0,23.9,31.5,27.5,24.5,23.5,50.0,15.0,28.0,25.0,27.5,24.5,33.2,34.4
|
file/av_odyssey.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c065933c9dff31e8d16c9684570fb4d2e90ddec621bef3a138bd1d44d56e82a0
|
3 |
+
size 251176
|
requirements.txt
CHANGED
@@ -68,3 +68,5 @@ urllib3==1.26.15
|
|
68 |
uvicorn==0.21.1
|
69 |
websockets==11.0.1
|
70 |
yarl==1.8.2
|
|
|
|
|
|
68 |
uvicorn==0.21.1
|
69 |
websockets==11.0.1
|
70 |
yarl==1.8.2
|
71 |
+
fastparquet
|
72 |
+
dask
|
src/__pycache__/utils_display.cpython-311.pyc
CHANGED
Binary files a/src/__pycache__/utils_display.cpython-311.pyc and b/src/__pycache__/utils_display.cpython-311.pyc differ
|
|
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-311.pyc
CHANGED
Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-311.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-311.pyc differ
|
|