Spaces:
Runtime error
Runtime error
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] | |
import gradio as gr | |
import pandas as pd | |
import json | |
import pdb | |
import tempfile | |
from constants import * | |
from src.auto_leaderboard.model_metadata_type import ModelType | |
global data_component, filter_component | |
def upload_file(files): | |
file_paths = [file.name for file in files] | |
return file_paths | |
def prediction_analyse(prediction_content): | |
# pdb.set_trace() | |
predictions = prediction_content.split("\n") | |
# 读取 ground_truth JSON 文件 | |
with open("./file/SEED-Bench.json", "r") as file: | |
ground_truth_data = json.load(file)["questions"] | |
# 将 ground_truth 数据转换为以 question_id 为键的字典 | |
ground_truth = {item["question_id"]: item for item in ground_truth_data} | |
# 初始化结果统计字典 | |
results = {i: {"correct": 0, "total": 0} for i in range(1, 13)} | |
# 遍历 predictions,计算每个 question_type_id 的正确预测数和总预测数 | |
for prediction in predictions: | |
# pdb.set_trace() | |
prediction = prediction.strip() | |
if not prediction: | |
continue | |
try: | |
prediction = json.loads(prediction) | |
except json.JSONDecodeError: | |
print(f"Warning: Skipping invalid JSON data in line: {prediction}") | |
continue | |
question_id = prediction["question_id"] | |
gt_item = ground_truth[question_id] | |
question_type_id = gt_item["question_type_id"] | |
if prediction["prediction"] == gt_item["answer"]: | |
results[question_type_id]["correct"] += 1 | |
results[question_type_id]["total"] += 1 | |
return results | |
def add_new_eval( | |
input_file, | |
model_name_textbox: str, | |
revision_name_textbox: str, | |
model_type: str, | |
model_link: str, | |
LLM_type: str, | |
LLM_name_textbox: str, | |
Evaluation_dimension: str, | |
): | |
if input_file is None: | |
return "Error! Empty file!" | |
else: | |
content = input_file.decode("utf-8") | |
prediction = prediction_analyse(content) | |
csv_data = pd.read_csv(CSV_DIR) | |
Start_dimension, End_dimension = 1, 13 | |
if Evaluation_dimension == 'Image': | |
End_dimension = 10 | |
elif Evaluation_dimension == 'Video': | |
Start_dimension = 10 | |
each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) if i >= Start_dimension and i < End_dimension else 0 for i in range(1, 13)} | |
# count for average image\video\all | |
total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10)) | |
total_correct_video = sum(prediction[i]["correct"] for i in range(10, 13)) | |
total_image = sum(prediction[i]["total"] for i in range(1, 10)) | |
total_video = sum(prediction[i]["total"] for i in range(10, 13)) | |
if Evaluation_dimension != 'Video': | |
average_accuracy_image = round(total_correct_image / total_image * 100, 1) | |
else: | |
average_accuracy_image = 0 | |
if Evaluation_dimension != 'Image': | |
average_accuracy_video = round(total_correct_video / total_video * 100, 1) | |
else: | |
average_accuracy_video = 0 | |
if Evaluation_dimension == 'All': | |
overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1) | |
else: | |
overall_accuracy = 0 | |
if LLM_type == 'Other': | |
LLM_name = LLM_name_textbox | |
else: | |
LLM_name = LLM_type | |
if revision_name_textbox == '': | |
col = csv_data.shape[0] | |
model_name = model_name_textbox | |
else: | |
model_name = revision_name_textbox | |
model_name_list = csv_data['Model'] | |
name_list = [name.split(']')[0][1:] for name in model_name_list] | |
if revision_name_textbox not in name_list: | |
col = csv_data.shape[0] | |
else: | |
col = name_list.index(revision_name_textbox) | |
if model_link == '': | |
model_name = model_name # no url | |
else: | |
model_name = '[' + model_name + '](' + model_link + ')' | |
# add new data | |
new_data = [ | |
model_type, | |
model_name, | |
LLM_name, | |
each_task_accuracy[1], | |
each_task_accuracy[2], | |
each_task_accuracy[3], | |
each_task_accuracy[4], | |
each_task_accuracy[5], | |
each_task_accuracy[6], | |
each_task_accuracy[7], | |
each_task_accuracy[8], | |
each_task_accuracy[9], | |
average_accuracy_image, | |
each_task_accuracy[10], | |
each_task_accuracy[11], | |
each_task_accuracy[12], | |
average_accuracy_video, | |
overall_accuracy] | |
# pdb.set_trace() | |
csv_data.loc[col] = new_data | |
csv_data = csv_data.to_csv(CSV_DIR, index=False) | |
return 0 | |
def get_baseline_df(): | |
df = pd.read_csv(CSV_DIR) | |
return df | |
block = gr.Blocks() | |
with block: | |
gr.Markdown( | |
LEADERBORAD_INTRODUCTION | |
) | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("🏅 SEED Benchmark", elem_id="seed-benchmark-tab-table", id=0): | |
with gr.Row(): | |
with gr.Accordion("Citation", open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
).style(show_copy_button=True) | |
gr.Markdown( | |
TABLE_INTRODUCTION | |
) | |
# selection for column part: | |
checkbox_group = gr.CheckboxGroup( | |
choices=TASK_INFO, | |
value=TASK_INFO, | |
label="Select options", | |
interactive=True, | |
) | |
# 创建数据帧组件 | |
data_component = gr.components.Dataframe( | |
value=get_baseline_df, | |
headers=COLUMN_NAMES, | |
type="pandas", | |
datatype=DATA_TITILE_TYPE, | |
interactive=False, | |
visible=True, | |
) | |
def on_checkbox_group_change(selected_columns): | |
# pdb.set_trace() | |
selected_columns = [item for item in TASK_INFO if item in selected_columns] | |
present_columns = MODEL_INFO + selected_columns | |
updated_data = get_baseline_df()[present_columns] | |
updated_headers = present_columns | |
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers] | |
filter_component = gr.components.Dataframe( | |
value=updated_data, | |
headers=updated_headers, | |
type="pandas", | |
datatype=update_datatype, | |
interactive=False, | |
visible=True, | |
) | |
# pdb.set_trace() | |
return filter_component.value | |
# 将复选框组关联到处理函数 | |
checkbox_group.change(fn=on_checkbox_group_change, inputs=checkbox_group, outputs=data_component) | |
# table 2 | |
with gr.TabItem("📝 About", elem_id="seed-benchmark-tab-table", id=2): | |
gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text") | |
# table 3 | |
with gr.TabItem("🚀 Submit here! ", elem_id="seed-benchmark-tab-table", id=3): | |
gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text") | |
with gr.Row(): | |
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") | |
with gr.Row(): | |
gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Column(): | |
model_name_textbox = gr.Textbox( | |
label="Model Name", placeholder="LLaMA-7B" | |
) | |
revision_name_textbox = gr.Textbox( | |
label="Revision Model Name", placeholder="LLaMA" | |
) | |
model_type = gr.Dropdown( | |
choices=[ | |
"LLM", | |
"ImageLLM", | |
"VideoLLM", | |
"Other", | |
], | |
label="Model Type", | |
multiselect=False, | |
value="ImageLLM", | |
interactive=True, | |
) | |
model_link = gr.Textbox( | |
label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf" | |
) | |
with gr.Column(): | |
LLM_type = gr.Dropdown( | |
choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "Other"], | |
label="LLM Type", | |
multiselect=False, | |
value="LLaMA-7B", | |
interactive=True, | |
) | |
LLM_name_textbox = gr.Textbox( | |
label="LLM Model (for Other)", | |
placeholder="LLaMA-13B" | |
) | |
Evaluation_dimension = gr.Dropdown( | |
choices=["All", "Image", "Video"], | |
label="Evaluation Dimension", | |
multiselect=False, | |
value="All", | |
interactive=True, | |
) | |
with gr.Column(): | |
input_file = gr.inputs.File(label = "Click to Upload a json File", file_count="single", type='binary') | |
submit_button = gr.Button("Submit Eval") | |
submission_result = gr.Markdown() | |
submit_button.click( | |
add_new_eval, | |
inputs = [ | |
input_file, | |
model_name_textbox, | |
revision_name_textbox, | |
model_type, | |
model_link, | |
LLM_type, | |
LLM_name_textbox, | |
Evaluation_dimension, | |
], | |
# outputs = submission_result, | |
) | |
with gr.Row(): | |
data_run = gr.Button("Refresh") | |
data_run.click( | |
get_baseline_df, outputs=data_component | |
) | |
# block.load(get_baseline_df, outputs=data_title) | |
block.launch() |