|
|
|
|
|
|
|
|
|
import os
|
|
import json
|
|
import argparse
|
|
import streamlit as st
|
|
|
|
from huggingface_hub import hf_hub_download
|
|
|
|
from utils.Evaluation_answer_txt import Evaluation_answer_txt
|
|
from utils.upload_hub import file_name_decode
|
|
|
|
|
|
def download_gold_answer(repo, filename, token, force_download=False):
|
|
ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
|
|
return ret
|
|
|
|
HUB_TOKEN = st.secrets['hf']
|
|
ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
|
|
GET_GOLD_ANSWER_PATH = {
|
|
'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
|
|
'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
|
|
'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
|
|
}
|
|
|
|
|
|
def eval_answer_txt(set_name, uploaded_file_path):
|
|
if set_name not in GET_GOLD_ANSWER_PATH:
|
|
return None
|
|
gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
|
|
eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file_path)
|
|
score_json = eval.eval()
|
|
return score_json
|
|
|
|
|
|
def evaluate_directory(input_dir, output_dir='./.output'):
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
for root, _, files in os.walk(input_dir):
|
|
for file in files:
|
|
filename_info = file_name_decode(file)
|
|
if filename_info:
|
|
model_name_input = filename_info['model_name']
|
|
dataset_input = filename_info['dataset']
|
|
method_input = filename_info['method']
|
|
file_name = filename_info['file_name']
|
|
|
|
file_path = os.path.join(root, file)
|
|
|
|
file_path = os.path.abspath(file_path)
|
|
score_json = eval_answer_txt(dataset_input, file_path)
|
|
|
|
if score_json:
|
|
leaderboard_dict = {
|
|
"model name": model_name_input,
|
|
"dataset": dataset_input,
|
|
"method": method_input,
|
|
"file name": file_name,
|
|
"submitter": 'zhaorui',
|
|
|
|
"MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
|
|
"MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
|
|
"MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
|
|
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
|
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
|
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
|
"detail result": json.dumps(score_json,indent=4)
|
|
}
|
|
|
|
|
|
repo_file_name = f'train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
|
output_path = os.path.join(output_dir, repo_file_name)
|
|
with open(output_path, 'w') as f:
|
|
json.dump(leaderboard_dict, f, indent=4)
|
|
else:
|
|
print(f"Failed to evaluate {file_path}")
|
|
|
|
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Evaluate all text files in the given directory.")
|
|
parser.add_argument('input_dir', type=str, help='Path to the directory containing text files.')
|
|
parser.add_argument('--output_dir', type=str, default='./.output', help='Path to the directory to save the output json files.')
|
|
|
|
args = parser.parse_args()
|
|
|
|
evaluate_directory(args.input_dir, args.output_dir)
|
|
|
|
print(f"Evaluation completed. Results saved to evaluation_results.json")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|