Spaces:
Running
Running
import json | |
import os | |
import glob | |
import argparse | |
import csv | |
def chatgpt_json(merge_file): | |
# chat results | |
merge_data = merge_file.decode("utf-8") | |
merge_data = merge_data.replace(": true,", ": \"true\",") | |
merge_data = merge_data.replace(": false,", ": \"false\",") | |
merge_data = eval(merge_data) | |
dataset_scores_dict = {} | |
for dataset_name, dataset_results in merge_data.items(): | |
correct, total_nums = 0, 0 | |
for id in dataset_results: | |
for dim in dataset_results[id]: | |
for result in dataset_results[id][dim]: | |
correct += result['rating'] | |
total_nums += 1 | |
dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2) | |
# dataset_scores_dict[dataset_name] = round(correct / total_nums , 4) | |
return dataset_scores_dict | |
def compute_scores(merge_file): | |
merge_data = merge_file.decode("utf-8") | |
merge_data = merge_data.replace(": true,", ": \"true\",") | |
merge_data = merge_data.replace(": false,", ": \"false\",") | |
merge_data = merge_data.replace(": null,", ": \"null\",") | |
merge_data = eval(merge_data) | |
dataset_scores_dict = {} | |
total_correct, total_num = 0, 0 | |
eval_dims = ['action', 'speed', 'direction', 'order', 'attribute_change', 'avg'] | |
dim_correct, dim_total = {dim: 0 for dim in eval_dims if dim!='avg'}, {dim: 0 for dim in eval_dims if dim!='avg'} | |
for dataset_name, dataset_results in merge_data.items(): | |
dataset_correct, dataset_num = {dim: 0 for dim in eval_dims}, {dim: 0 for dim in eval_dims} | |
for id in dataset_results: | |
for dim in dataset_results[id]: | |
for result in dataset_results[id][dim]: | |
dataset_correct['avg'] += result['rating'] | |
dataset_correct[dim] += result['rating'] | |
dim_correct[dim] += result['rating'] | |
dataset_num['avg'] += 1 | |
dataset_num[dim] += 1 | |
dim_total[dim] += 1 | |
total_correct += dataset_correct['avg'] | |
total_num += dataset_num['avg'] | |
for dim in eval_dims: | |
dataset_scores_dict[f"{dim}_{dataset_name}"] = round(dataset_correct[dim] / dataset_num[dim] * 100, 2) | |
for dim in dim_correct: | |
dataset_scores_dict[f"avg_{dim}"] = round(dim_correct[dim] / dim_total[dim] * 100, 2) | |
dataset_scores_dict["avg_all"] = round(total_correct / total_num * 100, 2) | |
# print(dataset_score_dict) | |
# with open(args.score_output_file, 'w', encoding='utf-8') as f: | |
# json.dump(dataset_score_dict, f, indent=2) | |
# print(f'{args.score_output_file} is saved!') | |
# ======================== | |
data = [ | |
["Avg. All", "Avg. Action", "Avg. Direction", "Avg. Speed", "Avg. Event Order", "Avg. Attribute Change", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation", | |
"Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation", | |
"Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation", | |
"Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation", | |
"Event Order. Multi-Choice", "Event Order. Yes/No", "Event Order. Caption Matching", "Event Order. Caption Generation", | |
"Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation"], | |
[dataset_scores_dict["avg_all"], dataset_scores_dict["avg_action"], dataset_scores_dict["avg_direction"], dataset_scores_dict["avg_speed"], dataset_scores_dict["avg_order"], dataset_scores_dict["avg_attribute_change"], | |
dataset_scores_dict["avg_multi-choice"], dataset_scores_dict["avg_yes_no"], dataset_scores_dict["avg_caption_matching"], dataset_scores_dict["avg_captioning"], | |
dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'], | |
dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'], | |
dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'], | |
dataset_scores_dict['order_multi-choice'], dataset_scores_dict['order_yes_no'], dataset_scores_dict['order_caption_matching'], dataset_scores_dict['order_captioning'], | |
dataset_scores_dict['attribute_change_multi-choice'], dataset_scores_dict['attribute_change_yes_no'], dataset_scores_dict['attribute_change_caption_matching'], dataset_scores_dict['attribute_change_captioning'], | |
], | |
] | |
return data | |