zhao rui
commited on
Commit
•
db33c1e
1
Parent(s):
e1ad072
[fixbug] huggingface load dataset
Browse files- .gitignore +3 -0
- README.md +6 -0
- app.py +2 -2
- batch_eval_script.py +94 -0
.gitignore
CHANGED
@@ -6,4 +6,7 @@ __pycache__
|
|
6 |
*.tsv
|
7 |
*.csv
|
8 |
*.json
|
|
|
|
|
|
|
9 |
|
|
|
6 |
*.tsv
|
7 |
*.csv
|
8 |
*.json
|
9 |
+
*.txt
|
10 |
+
|
11 |
+
|
12 |
|
README.md
CHANGED
@@ -41,3 +41,9 @@ Setting2
|
|
41 |
Setting3
|
42 |
```
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
Setting3
|
42 |
```
|
43 |
|
44 |
+
|
45 |
+
# cli batch eval tool
|
46 |
+
```
|
47 |
+
python .\batch_eval_script.py ..\deid_resaut
|
48 |
+
```
|
49 |
+
|
app.py
CHANGED
@@ -85,7 +85,7 @@ try:
|
|
85 |
color = [st.success, st.info, st.warning, st.error]
|
86 |
color[i % 4](dis)
|
87 |
|
88 |
-
dic = df.iloc[dind]['detail result']
|
89 |
dt_df = pd.DataFrame(dic).T
|
90 |
st.dataframe(dt_df)
|
91 |
|
@@ -210,7 +210,7 @@ if st.session_state['score_json']:
|
|
210 |
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
211 |
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
212 |
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
213 |
-
"detail result": score_json
|
214 |
}
|
215 |
|
216 |
repo_file_path = f'data/train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
|
|
85 |
color = [st.success, st.info, st.warning, st.error]
|
86 |
color[i % 4](dis)
|
87 |
|
88 |
+
dic = json.loads(df.iloc[dind]['detail result'])
|
89 |
dt_df = pd.DataFrame(dic).T
|
90 |
st.dataframe(dt_df)
|
91 |
|
|
|
210 |
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
211 |
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
212 |
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
213 |
+
"detail result": json.dumps(score_json,indent=4) #score_json
|
214 |
}
|
215 |
|
216 |
repo_file_path = f'data/train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
batch_eval_script.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# a argparse script it can set eval dir
|
2 |
+
# and run the eval script in the dir then save the reasult json file in the dir
|
3 |
+
# usage: python .\batch_eval_script.py ..\deid_resaut
|
4 |
+
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import argparse
|
8 |
+
import streamlit as st
|
9 |
+
|
10 |
+
from huggingface_hub import hf_hub_download
|
11 |
+
|
12 |
+
from utils.Evaluation_answer_txt import Evaluation_answer_txt
|
13 |
+
from utils.upload_hub import file_name_decode
|
14 |
+
|
15 |
+
# Function to download gold answer based on dataset name
|
16 |
+
def download_gold_answer(repo, filename, token, force_download=False):
|
17 |
+
ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
|
18 |
+
return ret
|
19 |
+
|
20 |
+
HUB_TOKEN = st.secrets['hf']
|
21 |
+
ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
|
22 |
+
GET_GOLD_ANSWER_PATH = {
|
23 |
+
'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
|
24 |
+
'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
|
25 |
+
'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
|
26 |
+
}
|
27 |
+
|
28 |
+
# Function to evaluate answer text
|
29 |
+
def eval_answer_txt(set_name, uploaded_file_path):
|
30 |
+
if set_name not in GET_GOLD_ANSWER_PATH:
|
31 |
+
return None
|
32 |
+
gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
|
33 |
+
eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file_path)
|
34 |
+
score_json = eval.eval()
|
35 |
+
return score_json
|
36 |
+
|
37 |
+
# Function to traverse directory and evaluate files
|
38 |
+
def evaluate_directory(input_dir, output_dir='./.output'):
|
39 |
+
os.makedirs(output_dir, exist_ok=True)
|
40 |
+
for root, _, files in os.walk(input_dir):
|
41 |
+
for file in files:
|
42 |
+
filename_info = file_name_decode(file)
|
43 |
+
if filename_info:
|
44 |
+
model_name_input = filename_info['model_name']
|
45 |
+
dataset_input = filename_info['dataset']
|
46 |
+
method_input = filename_info['method']
|
47 |
+
file_name = filename_info['file_name']
|
48 |
+
|
49 |
+
file_path = os.path.join(root, file)
|
50 |
+
# get full path of the file
|
51 |
+
file_path = os.path.abspath(file_path)
|
52 |
+
score_json = eval_answer_txt(dataset_input, file_path)
|
53 |
+
# print(f"sss" , GET_GOLD_ANSWER_PATH[dataset_input], file_path)
|
54 |
+
if score_json:
|
55 |
+
leaderboard_dict = {
|
56 |
+
"model name": model_name_input,
|
57 |
+
"dataset": dataset_input,
|
58 |
+
"method": method_input,
|
59 |
+
"file name": file_name,
|
60 |
+
"submitter": 'zhaorui',
|
61 |
+
|
62 |
+
"MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
|
63 |
+
"MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
|
64 |
+
"MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
|
65 |
+
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
66 |
+
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
67 |
+
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
68 |
+
"detail result": json.dumps(score_json,indent=4) #score_json
|
69 |
+
}
|
70 |
+
|
71 |
+
# train-[01-ai@Yi-1.5-6B-Chat][Setting1][icl][answer.txt].json
|
72 |
+
repo_file_name = f'train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
73 |
+
output_path = os.path.join(output_dir, repo_file_name)
|
74 |
+
with open(output_path, 'w') as f:
|
75 |
+
json.dump(leaderboard_dict, f, indent=4)
|
76 |
+
else:
|
77 |
+
print(f"Failed to evaluate {file_path}")
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
# Main function to handle argparse
|
82 |
+
def main():
|
83 |
+
parser = argparse.ArgumentParser(description="Evaluate all text files in the given directory.")
|
84 |
+
parser.add_argument('input_dir', type=str, help='Path to the directory containing text files.')
|
85 |
+
parser.add_argument('--output_dir', type=str, default='./.output', help='Path to the directory to save the output json files.')
|
86 |
+
|
87 |
+
args = parser.parse_args()
|
88 |
+
|
89 |
+
evaluate_directory(args.input_dir, args.output_dir)
|
90 |
+
|
91 |
+
print(f"Evaluation completed. Results saved to evaluation_results.json")
|
92 |
+
|
93 |
+
if __name__ == "__main__":
|
94 |
+
main()
|