zhaorui-nb
commited on
Commit
•
2f22782
1
Parent(s):
db33c1e
no change
Browse files- .gitattributes +35 -35
- .gitignore +12 -12
- README.md +49 -49
- app.py +224 -224
- batch_eval_script.py +94 -94
- utils/Evaluation_answer_txt.py +179 -179
- utils/upload_hub.py +56 -56
.gitattributes
CHANGED
@@ -1,35 +1,35 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
-
|
2 |
-
secrets.toml
|
3 |
-
__pycache__
|
4 |
-
|
5 |
-
# *.txt
|
6 |
-
*.tsv
|
7 |
-
*.csv
|
8 |
-
*.json
|
9 |
-
*.txt
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
1 |
+
|
2 |
+
secrets.toml
|
3 |
+
__pycache__
|
4 |
+
|
5 |
+
# *.txt
|
6 |
+
*.tsv
|
7 |
+
*.csv
|
8 |
+
*.json
|
9 |
+
*.txt
|
10 |
+
|
11 |
+
|
12 |
+
|
README.md
CHANGED
@@ -1,49 +1,49 @@
|
|
1 |
-
---
|
2 |
-
title: De Identification Leaderboard
|
3 |
-
emoji: 🏃
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: yellow
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.35.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
# de-identification-leaderboard
|
19 |
-
|
20 |
-
## leaderboard data
|
21 |
-
score wil save to huggingface dataset
|
22 |
-
[zhaorui-nb/leaderboard-score](https://huggingface.co/datasets/zhaorui-nb/leaderboard-score)
|
23 |
-
|
24 |
-
|
25 |
-
## submit
|
26 |
-
### filename format
|
27 |
-
replace '/' to '@'
|
28 |
-
```
|
29 |
-
[{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt"
|
30 |
-
```
|
31 |
-
|
32 |
-
### line in answer txt (tsv)
|
33 |
-
```
|
34 |
-
{file_name}\t{label_type}\t{label_start}\t{label_end}\t{label_text}\n
|
35 |
-
```
|
36 |
-
|
37 |
-
## Support dataset
|
38 |
-
```
|
39 |
-
Setting1
|
40 |
-
Setting2
|
41 |
-
Setting3
|
42 |
-
```
|
43 |
-
|
44 |
-
|
45 |
-
# cli batch eval tool
|
46 |
-
```
|
47 |
-
python .\batch_eval_script.py ..\deid_resaut
|
48 |
-
```
|
49 |
-
|
|
|
1 |
+
---
|
2 |
+
title: De Identification Leaderboard
|
3 |
+
emoji: 🏃
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.35.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
# de-identification-leaderboard
|
19 |
+
|
20 |
+
## leaderboard data
|
21 |
+
score wil save to huggingface dataset
|
22 |
+
[zhaorui-nb/leaderboard-score](https://huggingface.co/datasets/zhaorui-nb/leaderboard-score)
|
23 |
+
|
24 |
+
|
25 |
+
## submit
|
26 |
+
### filename format
|
27 |
+
replace '/' to '@'
|
28 |
+
```
|
29 |
+
[{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt"
|
30 |
+
```
|
31 |
+
|
32 |
+
### line in answer txt (tsv)
|
33 |
+
```
|
34 |
+
{file_name}\t{label_type}\t{label_start}\t{label_end}\t{label_text}\n
|
35 |
+
```
|
36 |
+
|
37 |
+
## Support dataset
|
38 |
+
```
|
39 |
+
Setting1
|
40 |
+
Setting2
|
41 |
+
Setting3
|
42 |
+
```
|
43 |
+
|
44 |
+
|
45 |
+
# cli batch eval tool
|
46 |
+
```
|
47 |
+
python .\batch_eval_script.py ..\deid_resaut
|
48 |
+
```
|
49 |
+
|
app.py
CHANGED
@@ -1,224 +1,224 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
import os
|
4 |
-
from utils.Evaluation_answer_txt import Evaluation_answer_txt
|
5 |
-
from utils.upload_hub import upload_scores_to_hub, file_name_decode
|
6 |
-
import time
|
7 |
-
import json
|
8 |
-
import datasets
|
9 |
-
from huggingface_hub import HfApi
|
10 |
-
from huggingface_hub import hf_hub_download
|
11 |
-
# st.set_page_config(layout="wide")
|
12 |
-
st.set_page_config(layout="centered")
|
13 |
-
st.markdown(
|
14 |
-
f"""
|
15 |
-
<style>
|
16 |
-
.appview-container .main .block-container{{
|
17 |
-
max-width: 80%;
|
18 |
-
padding: 50px;
|
19 |
-
}}
|
20 |
-
</style>
|
21 |
-
""",
|
22 |
-
unsafe_allow_html=True
|
23 |
-
)
|
24 |
-
|
25 |
-
@st.cache_data
|
26 |
-
def download_gold_answer(repo, filename, token, force_download=False):
|
27 |
-
ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
|
28 |
-
return ret
|
29 |
-
|
30 |
-
|
31 |
-
HUB_TOKEN = st.secrets['hf']
|
32 |
-
HUB_API = HfApi(token=HUB_TOKEN)
|
33 |
-
|
34 |
-
LEADERBOARD_DATASET_REPO = 'zhaorui-nb/leaderboard-score'
|
35 |
-
# Setting1 Setting2 Setting3
|
36 |
-
|
37 |
-
ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
|
38 |
-
GET_GOLD_ANSWER_PATH = {
|
39 |
-
'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
|
40 |
-
'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
|
41 |
-
'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
|
42 |
-
}
|
43 |
-
|
44 |
-
|
45 |
-
# cache the dataset in the session state
|
46 |
-
def get_leaderboard_df():
|
47 |
-
with st.spinner('Loading leaderboard data...'):
|
48 |
-
if st.session_state.get('leaderboard_df') is None:
|
49 |
-
dataset = datasets.load_dataset(LEADERBOARD_DATASET_REPO)
|
50 |
-
df = pd.DataFrame(dataset['train'])
|
51 |
-
st.session_state['leaderboard_df'] = df
|
52 |
-
return df
|
53 |
-
else:
|
54 |
-
return st.session_state['leaderboard_df']
|
55 |
-
|
56 |
-
|
57 |
-
st.title('De-identification Model Leaderboard')
|
58 |
-
|
59 |
-
try:
|
60 |
-
with st.container():
|
61 |
-
# columns
|
62 |
-
# ['model name', 'dataset', 'method', 'file name', 'submitter',
|
63 |
-
# 'MICRO precision', 'MICRO recall', 'MICRO f1', 'MACRO precision',
|
64 |
-
# 'MACRO recall', 'MACRO f1', 'detail result']
|
65 |
-
|
66 |
-
df = get_leaderboard_df()
|
67 |
-
# replace model name column @ to /
|
68 |
-
df['model name'] = df['model name'].str.replace('@', '/')
|
69 |
-
|
70 |
-
# remove the detail result column
|
71 |
-
default_columns = [c for c in df.columns if c not in ['detail result']]
|
72 |
-
selected_columns = st.multiselect('Select columns to display', df.columns, default=default_columns)
|
73 |
-
|
74 |
-
leaderboard_df = st.dataframe(df[selected_columns], selection_mode='multi-row', on_select='rerun', key='leaderboard')
|
75 |
-
|
76 |
-
st.subheader("Detail Result")
|
77 |
-
det_ind = st.session_state.leaderboard['selection']['rows']
|
78 |
-
if len(det_ind) == 0:
|
79 |
-
st.write(f'Please check the boxes to view the detailed results.')
|
80 |
-
else:
|
81 |
-
col_detial = st.columns(len(det_ind))
|
82 |
-
for i, dind in enumerate(det_ind):
|
83 |
-
with col_detial[i]:
|
84 |
-
dis = f"{df.iloc[dind]['model name']}___{df.iloc[dind]['dataset']}___{df.iloc[dind]['method']}"
|
85 |
-
color = [st.success, st.info, st.warning, st.error]
|
86 |
-
color[i % 4](dis)
|
87 |
-
|
88 |
-
dic = json.loads(df.iloc[dind]['detail result'])
|
89 |
-
dt_df = pd.DataFrame(dic).T
|
90 |
-
st.dataframe(dt_df)
|
91 |
-
|
92 |
-
except Exception as e:
|
93 |
-
st.error(f"Error: {e}")
|
94 |
-
|
95 |
-
st.markdown("---")
|
96 |
-
|
97 |
-
# ############################################################################################################
|
98 |
-
# ############################################### Evaluation_answer_txt
|
99 |
-
# ############################################################################################################
|
100 |
-
|
101 |
-
model_name_input = ''
|
102 |
-
dataset_input = ''
|
103 |
-
method_input = ''
|
104 |
-
file_name = ''
|
105 |
-
submitter_input = ''
|
106 |
-
|
107 |
-
if 'score_json' not in st.session_state:
|
108 |
-
st.session_state['score_json'] = None
|
109 |
-
|
110 |
-
@st.cache_data()
|
111 |
-
def get_file_info(uploaded_file):
|
112 |
-
filename_info = file_name_decode(uploaded_file.name)
|
113 |
-
return filename_info
|
114 |
-
|
115 |
-
@st.cache_data()
|
116 |
-
def eval_answer_txt(set_name, uploaded_file):
|
117 |
-
print(f"eval_answer_txt: {time.time()}" , set_name)
|
118 |
-
|
119 |
-
if set_name not in GET_GOLD_ANSWER_PATH:
|
120 |
-
return None
|
121 |
-
gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
|
122 |
-
eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file)
|
123 |
-
score_json = eval.eval()
|
124 |
-
return score_json
|
125 |
-
|
126 |
-
def clear_score_json():
|
127 |
-
st.session_state['score_json'] = None
|
128 |
-
|
129 |
-
st.title("Model Evaluation")
|
130 |
-
st.write("Support file naming: [{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt")
|
131 |
-
|
132 |
-
col_upload = st.columns([3,1])
|
133 |
-
with col_upload[0]:
|
134 |
-
uploaded_file = st.file_uploader("Please upload the answer.txt file", type=["txt"], key="uploaded_file", on_change=clear_score_json)
|
135 |
-
with col_upload[1]:
|
136 |
-
if not uploaded_file:
|
137 |
-
st.warning("please upload file")
|
138 |
-
st.session_state['score_json'] = None
|
139 |
-
else:
|
140 |
-
st.success("file uploaded successfully")
|
141 |
-
|
142 |
-
filename_info = get_file_info(uploaded_file)
|
143 |
-
if filename_info:
|
144 |
-
model_name_input = filename_info['model_name']
|
145 |
-
dataset_input = filename_info['dataset']
|
146 |
-
method_input = filename_info['method']
|
147 |
-
file_name = filename_info['file_name']
|
148 |
-
|
149 |
-
col_score = st.columns([7,5])
|
150 |
-
if uploaded_file:
|
151 |
-
with col_score[1], st.container(border=True):
|
152 |
-
model_name_input = st.text_input("model name", model_name_input)
|
153 |
-
dataset_input = st.text_input("dataset", dataset_input)
|
154 |
-
method_input = st.text_input("method", method_input)
|
155 |
-
file_name = st.text_input("file name", file_name)
|
156 |
-
submitter_input = st.text_input("submitter", submitter_input)
|
157 |
-
check_all_fill_in = model_name_input and dataset_input and method_input and file_name and submitter_input
|
158 |
-
|
159 |
-
col_sumit_and_recalculate = st.columns(2)
|
160 |
-
with col_sumit_and_recalculate[0]:
|
161 |
-
calculate_btn = st.button("calculate", type='secondary', use_container_width=True)
|
162 |
-
with col_sumit_and_recalculate[1]:
|
163 |
-
submit_btn = st.button("SUBMIT", type='primary', use_container_width=True , disabled=not check_all_fill_in)
|
164 |
-
|
165 |
-
if calculate_btn or st.session_state['score_json'] is None:
|
166 |
-
set_name = dataset_input
|
167 |
-
st.session_state['score_json'] = eval_answer_txt(set_name, uploaded_file)
|
168 |
-
if st.session_state['score_json']:
|
169 |
-
st.success("evaluation success")
|
170 |
-
else:
|
171 |
-
st.error("evaluation failed, please check the file content or set the correct dataset name.")
|
172 |
-
|
173 |
-
if st.session_state['score_json']:
|
174 |
-
with col_score[0], st.container(border=True):
|
175 |
-
df = pd.DataFrame(st.session_state['score_json']).T
|
176 |
-
# split the column MICRO_AVERAGE and MACRO_AVERAGE into another dataframe
|
177 |
-
tag_df = df.drop(["MICRO_AVERAGE", "MACRO_AVERAGE"], axis=0)
|
178 |
-
avg_df = df.loc[["MICRO_AVERAGE", "MACRO_AVERAGE"]]
|
179 |
-
|
180 |
-
col_sort_func = st.columns(2)
|
181 |
-
|
182 |
-
with col_sort_func[0]:
|
183 |
-
sorted_column = st.selectbox("选择排序列", df.columns)
|
184 |
-
|
185 |
-
with col_sort_func[1]:
|
186 |
-
ascending = st.radio("Sort Order", ["Ascending", "Descending"])
|
187 |
-
|
188 |
-
tag_df = tag_df.sort_values(by=sorted_column, ascending=ascending=="Ascending")
|
189 |
-
|
190 |
-
st.dataframe(pd.concat([tag_df, avg_df]), use_container_width=True)
|
191 |
-
|
192 |
-
|
193 |
-
if not check_all_fill_in:
|
194 |
-
st.warning("Please fill in the complete information.")
|
195 |
-
|
196 |
-
if submit_btn:
|
197 |
-
if st.session_state['score_json']:
|
198 |
-
score_json = st.session_state['score_json']
|
199 |
-
|
200 |
-
leaderboard_dict = {
|
201 |
-
"model name": model_name_input,
|
202 |
-
"dataset": dataset_input,
|
203 |
-
"method": method_input,
|
204 |
-
"file name": file_name,
|
205 |
-
"submitter": submitter_input,
|
206 |
-
|
207 |
-
"MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
|
208 |
-
"MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
|
209 |
-
"MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
|
210 |
-
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
211 |
-
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
212 |
-
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
213 |
-
"detail result": json.dumps(score_json,indent=4) #score_json
|
214 |
-
}
|
215 |
-
|
216 |
-
repo_file_path = f'data/train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
217 |
-
upload_res = upload_scores_to_hub(HUB_API, leaderboard_dict, repo_file_path, hub_repo=LEADERBOARD_DATASET_REPO)
|
218 |
-
if upload_res:
|
219 |
-
st.success(f"submit success")
|
220 |
-
st.success(f"your score at here: {upload_res}")
|
221 |
-
else:
|
222 |
-
st.error("submit failed")
|
223 |
-
|
224 |
-
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
from utils.Evaluation_answer_txt import Evaluation_answer_txt
|
5 |
+
from utils.upload_hub import upload_scores_to_hub, file_name_decode
|
6 |
+
import time
|
7 |
+
import json
|
8 |
+
import datasets
|
9 |
+
from huggingface_hub import HfApi
|
10 |
+
from huggingface_hub import hf_hub_download
|
11 |
+
# st.set_page_config(layout="wide")
|
12 |
+
st.set_page_config(layout="centered")
|
13 |
+
st.markdown(
|
14 |
+
f"""
|
15 |
+
<style>
|
16 |
+
.appview-container .main .block-container{{
|
17 |
+
max-width: 80%;
|
18 |
+
padding: 50px;
|
19 |
+
}}
|
20 |
+
</style>
|
21 |
+
""",
|
22 |
+
unsafe_allow_html=True
|
23 |
+
)
|
24 |
+
|
25 |
+
@st.cache_data
|
26 |
+
def download_gold_answer(repo, filename, token, force_download=False):
|
27 |
+
ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
|
28 |
+
return ret
|
29 |
+
|
30 |
+
|
31 |
+
HUB_TOKEN = st.secrets['hf']
|
32 |
+
HUB_API = HfApi(token=HUB_TOKEN)
|
33 |
+
|
34 |
+
LEADERBOARD_DATASET_REPO = 'zhaorui-nb/leaderboard-score'
|
35 |
+
# Setting1 Setting2 Setting3
|
36 |
+
|
37 |
+
ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
|
38 |
+
GET_GOLD_ANSWER_PATH = {
|
39 |
+
'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
|
40 |
+
'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
|
41 |
+
'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
|
42 |
+
}
|
43 |
+
|
44 |
+
|
45 |
+
# cache the dataset in the session state
|
46 |
+
def get_leaderboard_df():
|
47 |
+
with st.spinner('Loading leaderboard data...'):
|
48 |
+
if st.session_state.get('leaderboard_df') is None:
|
49 |
+
dataset = datasets.load_dataset(LEADERBOARD_DATASET_REPO)
|
50 |
+
df = pd.DataFrame(dataset['train'])
|
51 |
+
st.session_state['leaderboard_df'] = df
|
52 |
+
return df
|
53 |
+
else:
|
54 |
+
return st.session_state['leaderboard_df']
|
55 |
+
|
56 |
+
|
57 |
+
st.title('De-identification Model Leaderboard')
|
58 |
+
|
59 |
+
try:
|
60 |
+
with st.container():
|
61 |
+
# columns
|
62 |
+
# ['model name', 'dataset', 'method', 'file name', 'submitter',
|
63 |
+
# 'MICRO precision', 'MICRO recall', 'MICRO f1', 'MACRO precision',
|
64 |
+
# 'MACRO recall', 'MACRO f1', 'detail result']
|
65 |
+
|
66 |
+
df = get_leaderboard_df()
|
67 |
+
# replace model name column @ to /
|
68 |
+
df['model name'] = df['model name'].str.replace('@', '/')
|
69 |
+
|
70 |
+
# remove the detail result column
|
71 |
+
default_columns = [c for c in df.columns if c not in ['detail result']]
|
72 |
+
selected_columns = st.multiselect('Select columns to display', df.columns, default=default_columns)
|
73 |
+
|
74 |
+
leaderboard_df = st.dataframe(df[selected_columns], selection_mode='multi-row', on_select='rerun', key='leaderboard')
|
75 |
+
|
76 |
+
st.subheader("Detail Result")
|
77 |
+
det_ind = st.session_state.leaderboard['selection']['rows']
|
78 |
+
if len(det_ind) == 0:
|
79 |
+
st.write(f'Please check the boxes to view the detailed results.')
|
80 |
+
else:
|
81 |
+
col_detial = st.columns(len(det_ind))
|
82 |
+
for i, dind in enumerate(det_ind):
|
83 |
+
with col_detial[i]:
|
84 |
+
dis = f"{df.iloc[dind]['model name']}___{df.iloc[dind]['dataset']}___{df.iloc[dind]['method']}"
|
85 |
+
color = [st.success, st.info, st.warning, st.error]
|
86 |
+
color[i % 4](dis)
|
87 |
+
|
88 |
+
dic = json.loads(df.iloc[dind]['detail result'])
|
89 |
+
dt_df = pd.DataFrame(dic).T
|
90 |
+
st.dataframe(dt_df)
|
91 |
+
|
92 |
+
except Exception as e:
|
93 |
+
st.error(f"Error: {e}")
|
94 |
+
|
95 |
+
st.markdown("---")
|
96 |
+
|
97 |
+
# ############################################################################################################
|
98 |
+
# ############################################### Evaluation_answer_txt
|
99 |
+
# ############################################################################################################
|
100 |
+
|
101 |
+
model_name_input = ''
|
102 |
+
dataset_input = ''
|
103 |
+
method_input = ''
|
104 |
+
file_name = ''
|
105 |
+
submitter_input = ''
|
106 |
+
|
107 |
+
if 'score_json' not in st.session_state:
|
108 |
+
st.session_state['score_json'] = None
|
109 |
+
|
110 |
+
@st.cache_data()
|
111 |
+
def get_file_info(uploaded_file):
|
112 |
+
filename_info = file_name_decode(uploaded_file.name)
|
113 |
+
return filename_info
|
114 |
+
|
115 |
+
@st.cache_data()
|
116 |
+
def eval_answer_txt(set_name, uploaded_file):
|
117 |
+
print(f"eval_answer_txt: {time.time()}" , set_name)
|
118 |
+
|
119 |
+
if set_name not in GET_GOLD_ANSWER_PATH:
|
120 |
+
return None
|
121 |
+
gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
|
122 |
+
eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file)
|
123 |
+
score_json = eval.eval()
|
124 |
+
return score_json
|
125 |
+
|
126 |
+
def clear_score_json():
|
127 |
+
st.session_state['score_json'] = None
|
128 |
+
|
129 |
+
st.title("Model Evaluation")
|
130 |
+
st.write("Support file naming: [{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt")
|
131 |
+
|
132 |
+
col_upload = st.columns([3,1])
|
133 |
+
with col_upload[0]:
|
134 |
+
uploaded_file = st.file_uploader("Please upload the answer.txt file", type=["txt"], key="uploaded_file", on_change=clear_score_json)
|
135 |
+
with col_upload[1]:
|
136 |
+
if not uploaded_file:
|
137 |
+
st.warning("please upload file")
|
138 |
+
st.session_state['score_json'] = None
|
139 |
+
else:
|
140 |
+
st.success("file uploaded successfully")
|
141 |
+
|
142 |
+
filename_info = get_file_info(uploaded_file)
|
143 |
+
if filename_info:
|
144 |
+
model_name_input = filename_info['model_name']
|
145 |
+
dataset_input = filename_info['dataset']
|
146 |
+
method_input = filename_info['method']
|
147 |
+
file_name = filename_info['file_name']
|
148 |
+
|
149 |
+
col_score = st.columns([7,5])
|
150 |
+
if uploaded_file:
|
151 |
+
with col_score[1], st.container(border=True):
|
152 |
+
model_name_input = st.text_input("model name", model_name_input)
|
153 |
+
dataset_input = st.text_input("dataset", dataset_input)
|
154 |
+
method_input = st.text_input("method", method_input)
|
155 |
+
file_name = st.text_input("file name", file_name)
|
156 |
+
submitter_input = st.text_input("submitter", submitter_input)
|
157 |
+
check_all_fill_in = model_name_input and dataset_input and method_input and file_name and submitter_input
|
158 |
+
|
159 |
+
col_sumit_and_recalculate = st.columns(2)
|
160 |
+
with col_sumit_and_recalculate[0]:
|
161 |
+
calculate_btn = st.button("calculate", type='secondary', use_container_width=True)
|
162 |
+
with col_sumit_and_recalculate[1]:
|
163 |
+
submit_btn = st.button("SUBMIT", type='primary', use_container_width=True , disabled=not check_all_fill_in)
|
164 |
+
|
165 |
+
if calculate_btn or st.session_state['score_json'] is None:
|
166 |
+
set_name = dataset_input
|
167 |
+
st.session_state['score_json'] = eval_answer_txt(set_name, uploaded_file)
|
168 |
+
if st.session_state['score_json']:
|
169 |
+
st.success("evaluation success")
|
170 |
+
else:
|
171 |
+
st.error("evaluation failed, please check the file content or set the correct dataset name.")
|
172 |
+
|
173 |
+
if st.session_state['score_json']:
|
174 |
+
with col_score[0], st.container(border=True):
|
175 |
+
df = pd.DataFrame(st.session_state['score_json']).T
|
176 |
+
# split the column MICRO_AVERAGE and MACRO_AVERAGE into another dataframe
|
177 |
+
tag_df = df.drop(["MICRO_AVERAGE", "MACRO_AVERAGE"], axis=0)
|
178 |
+
avg_df = df.loc[["MICRO_AVERAGE", "MACRO_AVERAGE"]]
|
179 |
+
|
180 |
+
col_sort_func = st.columns(2)
|
181 |
+
|
182 |
+
with col_sort_func[0]:
|
183 |
+
sorted_column = st.selectbox("选择排序列", df.columns)
|
184 |
+
|
185 |
+
with col_sort_func[1]:
|
186 |
+
ascending = st.radio("Sort Order", ["Ascending", "Descending"])
|
187 |
+
|
188 |
+
tag_df = tag_df.sort_values(by=sorted_column, ascending=ascending=="Ascending")
|
189 |
+
|
190 |
+
st.dataframe(pd.concat([tag_df, avg_df]), use_container_width=True)
|
191 |
+
|
192 |
+
|
193 |
+
if not check_all_fill_in:
|
194 |
+
st.warning("Please fill in the complete information.")
|
195 |
+
|
196 |
+
if submit_btn:
|
197 |
+
if st.session_state['score_json']:
|
198 |
+
score_json = st.session_state['score_json']
|
199 |
+
|
200 |
+
leaderboard_dict = {
|
201 |
+
"model name": model_name_input,
|
202 |
+
"dataset": dataset_input,
|
203 |
+
"method": method_input,
|
204 |
+
"file name": file_name,
|
205 |
+
"submitter": submitter_input,
|
206 |
+
|
207 |
+
"MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
|
208 |
+
"MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
|
209 |
+
"MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
|
210 |
+
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
211 |
+
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
212 |
+
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
213 |
+
"detail result": json.dumps(score_json,indent=4) #score_json
|
214 |
+
}
|
215 |
+
|
216 |
+
repo_file_path = f'data/train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
217 |
+
upload_res = upload_scores_to_hub(HUB_API, leaderboard_dict, repo_file_path, hub_repo=LEADERBOARD_DATASET_REPO)
|
218 |
+
if upload_res:
|
219 |
+
st.success(f"submit success")
|
220 |
+
st.success(f"your score at here: {upload_res}")
|
221 |
+
else:
|
222 |
+
st.error("submit failed")
|
223 |
+
|
224 |
+
|
batch_eval_script.py
CHANGED
@@ -1,94 +1,94 @@
|
|
1 |
-
# a argparse script it can set eval dir
|
2 |
-
# and run the eval script in the dir then save the reasult json file in the dir
|
3 |
-
# usage: python .\batch_eval_script.py ..\deid_resaut
|
4 |
-
|
5 |
-
import os
|
6 |
-
import json
|
7 |
-
import argparse
|
8 |
-
import streamlit as st
|
9 |
-
|
10 |
-
from huggingface_hub import hf_hub_download
|
11 |
-
|
12 |
-
from utils.Evaluation_answer_txt import Evaluation_answer_txt
|
13 |
-
from utils.upload_hub import file_name_decode
|
14 |
-
|
15 |
-
# Function to download gold answer based on dataset name
|
16 |
-
def download_gold_answer(repo, filename, token, force_download=False):
|
17 |
-
ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
|
18 |
-
return ret
|
19 |
-
|
20 |
-
HUB_TOKEN = st.secrets['hf']
|
21 |
-
ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
|
22 |
-
GET_GOLD_ANSWER_PATH = {
|
23 |
-
'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
|
24 |
-
'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
|
25 |
-
'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
|
26 |
-
}
|
27 |
-
|
28 |
-
# Function to evaluate answer text
|
29 |
-
def eval_answer_txt(set_name, uploaded_file_path):
|
30 |
-
if set_name not in GET_GOLD_ANSWER_PATH:
|
31 |
-
return None
|
32 |
-
gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
|
33 |
-
eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file_path)
|
34 |
-
score_json = eval.eval()
|
35 |
-
return score_json
|
36 |
-
|
37 |
-
# Function to traverse directory and evaluate files
|
38 |
-
def evaluate_directory(input_dir, output_dir='./.output'):
|
39 |
-
os.makedirs(output_dir, exist_ok=True)
|
40 |
-
for root, _, files in os.walk(input_dir):
|
41 |
-
for file in files:
|
42 |
-
filename_info = file_name_decode(file)
|
43 |
-
if filename_info:
|
44 |
-
model_name_input = filename_info['model_name']
|
45 |
-
dataset_input = filename_info['dataset']
|
46 |
-
method_input = filename_info['method']
|
47 |
-
file_name = filename_info['file_name']
|
48 |
-
|
49 |
-
file_path = os.path.join(root, file)
|
50 |
-
# get full path of the file
|
51 |
-
file_path = os.path.abspath(file_path)
|
52 |
-
score_json = eval_answer_txt(dataset_input, file_path)
|
53 |
-
# print(f"sss" , GET_GOLD_ANSWER_PATH[dataset_input], file_path)
|
54 |
-
if score_json:
|
55 |
-
leaderboard_dict = {
|
56 |
-
"model name": model_name_input,
|
57 |
-
"dataset": dataset_input,
|
58 |
-
"method": method_input,
|
59 |
-
"file name": file_name,
|
60 |
-
"submitter": 'zhaorui',
|
61 |
-
|
62 |
-
"MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
|
63 |
-
"MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
|
64 |
-
"MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
|
65 |
-
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
66 |
-
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
67 |
-
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
68 |
-
"detail result": json.dumps(score_json,indent=4) #score_json
|
69 |
-
}
|
70 |
-
|
71 |
-
# train-[01-ai@Yi-1.5-6B-Chat][Setting1][icl][answer.txt].json
|
72 |
-
repo_file_name = f'train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
73 |
-
output_path = os.path.join(output_dir, repo_file_name)
|
74 |
-
with open(output_path, 'w') as f:
|
75 |
-
json.dump(leaderboard_dict, f, indent=4)
|
76 |
-
else:
|
77 |
-
print(f"Failed to evaluate {file_path}")
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
# Main function to handle argparse
|
82 |
-
def main():
|
83 |
-
parser = argparse.ArgumentParser(description="Evaluate all text files in the given directory.")
|
84 |
-
parser.add_argument('input_dir', type=str, help='Path to the directory containing text files.')
|
85 |
-
parser.add_argument('--output_dir', type=str, default='./.output', help='Path to the directory to save the output json files.')
|
86 |
-
|
87 |
-
args = parser.parse_args()
|
88 |
-
|
89 |
-
evaluate_directory(args.input_dir, args.output_dir)
|
90 |
-
|
91 |
-
print(f"Evaluation completed. Results saved to evaluation_results.json")
|
92 |
-
|
93 |
-
if __name__ == "__main__":
|
94 |
-
main()
|
|
|
1 |
+
# a argparse script it can set eval dir
|
2 |
+
# and run the eval script in the dir then save the reasult json file in the dir
|
3 |
+
# usage: python .\batch_eval_script.py ..\deid_resaut
|
4 |
+
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import argparse
|
8 |
+
import streamlit as st
|
9 |
+
|
10 |
+
from huggingface_hub import hf_hub_download
|
11 |
+
|
12 |
+
from utils.Evaluation_answer_txt import Evaluation_answer_txt
|
13 |
+
from utils.upload_hub import file_name_decode
|
14 |
+
|
15 |
+
# Function to download gold answer based on dataset name
|
16 |
+
def download_gold_answer(repo, filename, token, force_download=False):
|
17 |
+
ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
|
18 |
+
return ret
|
19 |
+
|
20 |
+
HUB_TOKEN = st.secrets['hf']
|
21 |
+
ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
|
22 |
+
GET_GOLD_ANSWER_PATH = {
|
23 |
+
'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
|
24 |
+
'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
|
25 |
+
'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
|
26 |
+
}
|
27 |
+
|
28 |
+
# Function to evaluate answer text
|
29 |
+
def eval_answer_txt(set_name, uploaded_file_path):
|
30 |
+
if set_name not in GET_GOLD_ANSWER_PATH:
|
31 |
+
return None
|
32 |
+
gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
|
33 |
+
eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file_path)
|
34 |
+
score_json = eval.eval()
|
35 |
+
return score_json
|
36 |
+
|
37 |
+
# Function to traverse directory and evaluate files
|
38 |
+
def evaluate_directory(input_dir, output_dir='./.output'):
|
39 |
+
os.makedirs(output_dir, exist_ok=True)
|
40 |
+
for root, _, files in os.walk(input_dir):
|
41 |
+
for file in files:
|
42 |
+
filename_info = file_name_decode(file)
|
43 |
+
if filename_info:
|
44 |
+
model_name_input = filename_info['model_name']
|
45 |
+
dataset_input = filename_info['dataset']
|
46 |
+
method_input = filename_info['method']
|
47 |
+
file_name = filename_info['file_name']
|
48 |
+
|
49 |
+
file_path = os.path.join(root, file)
|
50 |
+
# get full path of the file
|
51 |
+
file_path = os.path.abspath(file_path)
|
52 |
+
score_json = eval_answer_txt(dataset_input, file_path)
|
53 |
+
# print(f"sss" , GET_GOLD_ANSWER_PATH[dataset_input], file_path)
|
54 |
+
if score_json:
|
55 |
+
leaderboard_dict = {
|
56 |
+
"model name": model_name_input,
|
57 |
+
"dataset": dataset_input,
|
58 |
+
"method": method_input,
|
59 |
+
"file name": file_name,
|
60 |
+
"submitter": 'zhaorui',
|
61 |
+
|
62 |
+
"MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
|
63 |
+
"MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
|
64 |
+
"MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
|
65 |
+
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
66 |
+
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
67 |
+
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
68 |
+
"detail result": json.dumps(score_json,indent=4) #score_json
|
69 |
+
}
|
70 |
+
|
71 |
+
# train-[01-ai@Yi-1.5-6B-Chat][Setting1][icl][answer.txt].json
|
72 |
+
repo_file_name = f'train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
73 |
+
output_path = os.path.join(output_dir, repo_file_name)
|
74 |
+
with open(output_path, 'w') as f:
|
75 |
+
json.dump(leaderboard_dict, f, indent=4)
|
76 |
+
else:
|
77 |
+
print(f"Failed to evaluate {file_path}")
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
# Main function to handle argparse
|
82 |
+
def main():
|
83 |
+
parser = argparse.ArgumentParser(description="Evaluate all text files in the given directory.")
|
84 |
+
parser.add_argument('input_dir', type=str, help='Path to the directory containing text files.')
|
85 |
+
parser.add_argument('--output_dir', type=str, default='./.output', help='Path to the directory to save the output json files.')
|
86 |
+
|
87 |
+
args = parser.parse_args()
|
88 |
+
|
89 |
+
evaluate_directory(args.input_dir, args.output_dir)
|
90 |
+
|
91 |
+
print(f"Evaluation completed. Results saved to evaluation_results.json")
|
92 |
+
|
93 |
+
if __name__ == "__main__":
|
94 |
+
main()
|
utils/Evaluation_answer_txt.py
CHANGED
@@ -1,180 +1,180 @@
|
|
1 |
-
import re
|
2 |
-
import os
|
3 |
-
from collections import Counter
|
4 |
-
import json
|
5 |
-
|
6 |
-
|
7 |
-
class Tag:
|
8 |
-
def __init__(self, txt_line:str):
|
9 |
-
# | file_name | label_type | label_start | label_end | label_text |
|
10 |
-
# match = re.match(r'(.+)\t(\w+)\t(\d+)\t(\d+)\t(.+)', txt_line)
|
11 |
-
try:
|
12 |
-
sep = txt_line.strip().split('\t')
|
13 |
-
self.file_id = sep[0]
|
14 |
-
self.type = sep[1]
|
15 |
-
self.start = sep[2] # int(sep[2])
|
16 |
-
self.end = sep[3] # int(sep[3])
|
17 |
-
self.text = sep[4]
|
18 |
-
except:
|
19 |
-
raise ValueError('The format of the input line is not correct. Please check the input line format.')
|
20 |
-
|
21 |
-
def get_type(self):
|
22 |
-
return self.type
|
23 |
-
|
24 |
-
def get_file_id(self):
|
25 |
-
return self.file_id
|
26 |
-
|
27 |
-
def __eq__(self, other: 'Tag'):
|
28 |
-
# if all file_id, type, start, end, are the same, return True
|
29 |
-
# text is not considered for the comparison
|
30 |
-
ck_file_id = self.file_id == other.file_id
|
31 |
-
ck_type = self.type == other.type
|
32 |
-
ck_start = self.start == other.start
|
33 |
-
ck_end = self.end == other.end
|
34 |
-
# ck_text = self.text == other.text
|
35 |
-
if ck_file_id and ck_type and ck_start and ck_end:
|
36 |
-
return True
|
37 |
-
else:
|
38 |
-
return False
|
39 |
-
def __repr__(self):
|
40 |
-
return f'<{self.__class__.__name__} {self.file_id:10} {self.type:10} s:{self.start:5} e:{self.end:5} {self.text}>\n'
|
41 |
-
|
42 |
-
def __hash__(self):
|
43 |
-
return hash((self.file_id, self.type, self.start, self.end))
|
44 |
-
|
45 |
-
class Evaluation_answer_txt:
|
46 |
-
def __init__(self, gold_answer, pred_answer):
|
47 |
-
self.gold_answer = gold_answer
|
48 |
-
self.pred_answer = pred_answer
|
49 |
-
|
50 |
-
self.gold_set = set() # set of Tag
|
51 |
-
self.pred_set = set() # set of Tag
|
52 |
-
|
53 |
-
self.type_set = set() # set of label type str
|
54 |
-
self.gold_label_counter = Counter() # Counter of gold label type
|
55 |
-
|
56 |
-
self.resault_score = {}
|
57 |
-
|
58 |
-
def _lines_to_tag_set(self, lines, set_type): # set_type: 'gold' or 'pred'
|
59 |
-
tags = []
|
60 |
-
for i in range(len(lines)):
|
61 |
-
try:
|
62 |
-
tag = Tag(lines[i])
|
63 |
-
tags.append(tag)
|
64 |
-
except:
|
65 |
-
print(f'Error at {set_type} answer line: {i+1}, {lines[i]}')
|
66 |
-
return set(tags)
|
67 |
-
|
68 |
-
def _set_filter(self, tag_set, type):
|
69 |
-
# tag set filter by type
|
70 |
-
return {tag for tag in tag_set if tag.get_type() == type}
|
71 |
-
|
72 |
-
def _division(self, a, b):
|
73 |
-
try:
|
74 |
-
return a / b
|
75 |
-
except:
|
76 |
-
return 0.0
|
77 |
-
|
78 |
-
def _f1_score(self, TP=None, FP=None, FN=None):
|
79 |
-
if TP is None or FP is None or FN is None:
|
80 |
-
raise ValueError('TP, FP, FN should be given.')
|
81 |
-
|
82 |
-
precision = self._division(TP, TP + FP)
|
83 |
-
recall = self._division(TP, TP + FN)
|
84 |
-
f1 = self._division(2 * precision * recall, precision + recall)
|
85 |
-
|
86 |
-
return {'precision': precision, 'recall': recall, 'f1': f1}
|
87 |
-
|
88 |
-
|
89 |
-
def eval(self, ignore_no_gold_tag_file=True):
|
90 |
-
with open(self.gold_answer, 'r') as f:
|
91 |
-
gold_line = f.readlines()
|
92 |
-
# with open(self.pred_answer, 'r') as f:
|
93 |
-
# pred_line = f.readlines()
|
94 |
-
########## add to support the input is a file object ##########
|
95 |
-
if isinstance(self.pred_answer, str):
|
96 |
-
with open(self.pred_answer, 'r') as f:
|
97 |
-
pred_line = f.readlines()
|
98 |
-
|
99 |
-
|
100 |
-
else:
|
101 |
-
pred_line = self.pred_answer.readlines()
|
102 |
-
#pred_line is bytes, need to decode
|
103 |
-
pred_line = [line.decode('utf-8') for line in pred_line]
|
104 |
-
|
105 |
-
self.gold_set = self._lines_to_tag_set(gold_line, 'gold')
|
106 |
-
self.pred_set = self._lines_to_tag_set(pred_line, 'pred')
|
107 |
-
|
108 |
-
# in islab aicup program, it will ignore the files that have no gold tags
|
109 |
-
# that program only consider the files that write in gold answer.txt
|
110 |
-
if ignore_no_gold_tag_file:
|
111 |
-
# filter the files that have no gold tags
|
112 |
-
gold_files = {tag.get_file_id() for tag in self.gold_set}
|
113 |
-
self.pred_set = {tag for tag in self.pred_set if tag.get_file_id() in gold_files}
|
114 |
-
|
115 |
-
# statistics tags and types
|
116 |
-
for tag in self.gold_set:
|
117 |
-
self.type_set.add(tag.get_type())
|
118 |
-
self.gold_label_counter[tag.get_type()] += 1
|
119 |
-
for tag in self.pred_set:
|
120 |
-
self.type_set.add(tag.get_type())
|
121 |
-
|
122 |
-
TP_set = self.gold_set & self.pred_set
|
123 |
-
FP_set = self.pred_set - self.gold_set
|
124 |
-
FN_set = self.gold_set - self.pred_set
|
125 |
-
|
126 |
-
# count each type of label
|
127 |
-
for label in self.type_set:
|
128 |
-
filter_TP = self._set_filter(TP_set, label)
|
129 |
-
filter_FP = self._set_filter(FP_set, label)
|
130 |
-
filter_FN = self._set_filter(FN_set, label)
|
131 |
-
score = self._f1_score(len(filter_TP), len(filter_FP), len(filter_FN))
|
132 |
-
self.resault_score[label] = score
|
133 |
-
|
134 |
-
# MICRO_AVERAGE
|
135 |
-
self.resault_score['MICRO_AVERAGE'] = self._f1_score(len(TP_set), len(FP_set), len(FN_set))
|
136 |
-
|
137 |
-
# MACRO_AVERAGE
|
138 |
-
precision_sum = 0
|
139 |
-
recall_sum = 0
|
140 |
-
# f1_sum = 0 # at aicup, calc by MACRO_AVERAGE precision and recall
|
141 |
-
for label in self.type_set:
|
142 |
-
precision_sum += self.resault_score[label]['precision']
|
143 |
-
recall_sum += self.resault_score[label]['recall']
|
144 |
-
# f1_sum += self.resault_score[label]['f1']
|
145 |
-
|
146 |
-
precision = self._division(precision_sum, len(self.type_set))
|
147 |
-
recall = self._division(recall_sum, len(self.type_set))
|
148 |
-
# f1 = 2 * precision * recall / (precision + recall)
|
149 |
-
f1 = self._division(2 * precision * recall , (precision + recall))
|
150 |
-
|
151 |
-
self.resault_score['MACRO_AVERAGE'] = {'precision': precision, 'recall': recall, 'f1': f1}
|
152 |
-
|
153 |
-
# add Support to each type of label
|
154 |
-
for label in self.type_set:
|
155 |
-
self.resault_score[label]['support'] = self.gold_label_counter[label]
|
156 |
-
self.resault_score['MICRO_AVERAGE']['support'] = len(self.gold_set)
|
157 |
-
self.resault_score['MACRO_AVERAGE']['support'] = len(self.gold_set)
|
158 |
-
|
159 |
-
# return json.dumps(self.resault_score, indent=4)
|
160 |
-
return self.resault_score
|
161 |
-
|
162 |
-
|
163 |
-
if __name__=="__main__":
|
164 |
-
# with open('.output/[meta-llama@Llama-2-7b-hf][Setting3][icl]answer.txt', 'r', encoding='utf-8') as f:
|
165 |
-
# lines = [line.strip() for line in f.readlines() if line.strip() != '']
|
166 |
-
|
167 |
-
# gold_path = 'dataset/Setting3_test_answer.txt'
|
168 |
-
# pred_path = '.output/EleutherAI-pythia-1b-Setting3_answer.txt'
|
169 |
-
|
170 |
-
|
171 |
-
# gold_path = './.output/test_eval/gold_answer.txt'
|
172 |
-
# pred_path = './.output/test_eval/pred_answer.txt'
|
173 |
-
|
174 |
-
gold_path = 'dataset/Setting3_test_answer.txt'
|
175 |
-
pred_path = '.output/[meta-llama@Llama-2-7b-hf][Setting3][icl]answer.txt'
|
176 |
-
|
177 |
-
|
178 |
-
eval = Evaluation_answer_txt(gold_path, pred_path)
|
179 |
-
res = eval.eval()
|
180 |
print(res)
|
|
|
1 |
+
import re
|
2 |
+
import os
|
3 |
+
from collections import Counter
|
4 |
+
import json
|
5 |
+
|
6 |
+
|
7 |
+
class Tag:
|
8 |
+
def __init__(self, txt_line:str):
|
9 |
+
# | file_name | label_type | label_start | label_end | label_text |
|
10 |
+
# match = re.match(r'(.+)\t(\w+)\t(\d+)\t(\d+)\t(.+)', txt_line)
|
11 |
+
try:
|
12 |
+
sep = txt_line.strip().split('\t')
|
13 |
+
self.file_id = sep[0]
|
14 |
+
self.type = sep[1]
|
15 |
+
self.start = sep[2] # int(sep[2])
|
16 |
+
self.end = sep[3] # int(sep[3])
|
17 |
+
self.text = sep[4]
|
18 |
+
except:
|
19 |
+
raise ValueError('The format of the input line is not correct. Please check the input line format.')
|
20 |
+
|
21 |
+
def get_type(self):
|
22 |
+
return self.type
|
23 |
+
|
24 |
+
def get_file_id(self):
|
25 |
+
return self.file_id
|
26 |
+
|
27 |
+
def __eq__(self, other: 'Tag'):
|
28 |
+
# if all file_id, type, start, end, are the same, return True
|
29 |
+
# text is not considered for the comparison
|
30 |
+
ck_file_id = self.file_id == other.file_id
|
31 |
+
ck_type = self.type == other.type
|
32 |
+
ck_start = self.start == other.start
|
33 |
+
ck_end = self.end == other.end
|
34 |
+
# ck_text = self.text == other.text
|
35 |
+
if ck_file_id and ck_type and ck_start and ck_end:
|
36 |
+
return True
|
37 |
+
else:
|
38 |
+
return False
|
39 |
+
def __repr__(self):
|
40 |
+
return f'<{self.__class__.__name__} {self.file_id:10} {self.type:10} s:{self.start:5} e:{self.end:5} {self.text}>\n'
|
41 |
+
|
42 |
+
def __hash__(self):
|
43 |
+
return hash((self.file_id, self.type, self.start, self.end))
|
44 |
+
|
45 |
+
class Evaluation_answer_txt:
|
46 |
+
def __init__(self, gold_answer, pred_answer):
|
47 |
+
self.gold_answer = gold_answer
|
48 |
+
self.pred_answer = pred_answer
|
49 |
+
|
50 |
+
self.gold_set = set() # set of Tag
|
51 |
+
self.pred_set = set() # set of Tag
|
52 |
+
|
53 |
+
self.type_set = set() # set of label type str
|
54 |
+
self.gold_label_counter = Counter() # Counter of gold label type
|
55 |
+
|
56 |
+
self.resault_score = {}
|
57 |
+
|
58 |
+
def _lines_to_tag_set(self, lines, set_type): # set_type: 'gold' or 'pred'
|
59 |
+
tags = []
|
60 |
+
for i in range(len(lines)):
|
61 |
+
try:
|
62 |
+
tag = Tag(lines[i])
|
63 |
+
tags.append(tag)
|
64 |
+
except:
|
65 |
+
print(f'Error at {set_type} answer line: {i+1}, {lines[i]}')
|
66 |
+
return set(tags)
|
67 |
+
|
68 |
+
def _set_filter(self, tag_set, type):
|
69 |
+
# tag set filter by type
|
70 |
+
return {tag for tag in tag_set if tag.get_type() == type}
|
71 |
+
|
72 |
+
def _division(self, a, b):
|
73 |
+
try:
|
74 |
+
return a / b
|
75 |
+
except:
|
76 |
+
return 0.0
|
77 |
+
|
78 |
+
def _f1_score(self, TP=None, FP=None, FN=None):
|
79 |
+
if TP is None or FP is None or FN is None:
|
80 |
+
raise ValueError('TP, FP, FN should be given.')
|
81 |
+
|
82 |
+
precision = self._division(TP, TP + FP)
|
83 |
+
recall = self._division(TP, TP + FN)
|
84 |
+
f1 = self._division(2 * precision * recall, precision + recall)
|
85 |
+
|
86 |
+
return {'precision': precision, 'recall': recall, 'f1': f1}
|
87 |
+
|
88 |
+
|
89 |
+
def eval(self, ignore_no_gold_tag_file=True):
|
90 |
+
with open(self.gold_answer, 'r') as f:
|
91 |
+
gold_line = f.readlines()
|
92 |
+
# with open(self.pred_answer, 'r') as f:
|
93 |
+
# pred_line = f.readlines()
|
94 |
+
########## add to support the input is a file object ##########
|
95 |
+
if isinstance(self.pred_answer, str):
|
96 |
+
with open(self.pred_answer, 'r') as f:
|
97 |
+
pred_line = f.readlines()
|
98 |
+
|
99 |
+
|
100 |
+
else:
|
101 |
+
pred_line = self.pred_answer.readlines()
|
102 |
+
#pred_line is bytes, need to decode
|
103 |
+
pred_line = [line.decode('utf-8') for line in pred_line]
|
104 |
+
|
105 |
+
self.gold_set = self._lines_to_tag_set(gold_line, 'gold')
|
106 |
+
self.pred_set = self._lines_to_tag_set(pred_line, 'pred')
|
107 |
+
|
108 |
+
# in islab aicup program, it will ignore the files that have no gold tags
|
109 |
+
# that program only consider the files that write in gold answer.txt
|
110 |
+
if ignore_no_gold_tag_file:
|
111 |
+
# filter the files that have no gold tags
|
112 |
+
gold_files = {tag.get_file_id() for tag in self.gold_set}
|
113 |
+
self.pred_set = {tag for tag in self.pred_set if tag.get_file_id() in gold_files}
|
114 |
+
|
115 |
+
# statistics tags and types
|
116 |
+
for tag in self.gold_set:
|
117 |
+
self.type_set.add(tag.get_type())
|
118 |
+
self.gold_label_counter[tag.get_type()] += 1
|
119 |
+
for tag in self.pred_set:
|
120 |
+
self.type_set.add(tag.get_type())
|
121 |
+
|
122 |
+
TP_set = self.gold_set & self.pred_set
|
123 |
+
FP_set = self.pred_set - self.gold_set
|
124 |
+
FN_set = self.gold_set - self.pred_set
|
125 |
+
|
126 |
+
# count each type of label
|
127 |
+
for label in self.type_set:
|
128 |
+
filter_TP = self._set_filter(TP_set, label)
|
129 |
+
filter_FP = self._set_filter(FP_set, label)
|
130 |
+
filter_FN = self._set_filter(FN_set, label)
|
131 |
+
score = self._f1_score(len(filter_TP), len(filter_FP), len(filter_FN))
|
132 |
+
self.resault_score[label] = score
|
133 |
+
|
134 |
+
# MICRO_AVERAGE
|
135 |
+
self.resault_score['MICRO_AVERAGE'] = self._f1_score(len(TP_set), len(FP_set), len(FN_set))
|
136 |
+
|
137 |
+
# MACRO_AVERAGE
|
138 |
+
precision_sum = 0
|
139 |
+
recall_sum = 0
|
140 |
+
# f1_sum = 0 # at aicup, calc by MACRO_AVERAGE precision and recall
|
141 |
+
for label in self.type_set:
|
142 |
+
precision_sum += self.resault_score[label]['precision']
|
143 |
+
recall_sum += self.resault_score[label]['recall']
|
144 |
+
# f1_sum += self.resault_score[label]['f1']
|
145 |
+
|
146 |
+
precision = self._division(precision_sum, len(self.type_set))
|
147 |
+
recall = self._division(recall_sum, len(self.type_set))
|
148 |
+
# f1 = 2 * precision * recall / (precision + recall)
|
149 |
+
f1 = self._division(2 * precision * recall , (precision + recall))
|
150 |
+
|
151 |
+
self.resault_score['MACRO_AVERAGE'] = {'precision': precision, 'recall': recall, 'f1': f1}
|
152 |
+
|
153 |
+
# add Support to each type of label
|
154 |
+
for label in self.type_set:
|
155 |
+
self.resault_score[label]['support'] = self.gold_label_counter[label]
|
156 |
+
self.resault_score['MICRO_AVERAGE']['support'] = len(self.gold_set)
|
157 |
+
self.resault_score['MACRO_AVERAGE']['support'] = len(self.gold_set)
|
158 |
+
|
159 |
+
# return json.dumps(self.resault_score, indent=4)
|
160 |
+
return self.resault_score
|
161 |
+
|
162 |
+
|
163 |
+
if __name__=="__main__":
|
164 |
+
# with open('.output/[meta-llama@Llama-2-7b-hf][Setting3][icl]answer.txt', 'r', encoding='utf-8') as f:
|
165 |
+
# lines = [line.strip() for line in f.readlines() if line.strip() != '']
|
166 |
+
|
167 |
+
# gold_path = 'dataset/Setting3_test_answer.txt'
|
168 |
+
# pred_path = '.output/EleutherAI-pythia-1b-Setting3_answer.txt'
|
169 |
+
|
170 |
+
|
171 |
+
# gold_path = './.output/test_eval/gold_answer.txt'
|
172 |
+
# pred_path = './.output/test_eval/pred_answer.txt'
|
173 |
+
|
174 |
+
gold_path = 'dataset/Setting3_test_answer.txt'
|
175 |
+
pred_path = '.output/[meta-llama@Llama-2-7b-hf][Setting3][icl]answer.txt'
|
176 |
+
|
177 |
+
|
178 |
+
eval = Evaluation_answer_txt(gold_path, pred_path)
|
179 |
+
res = eval.eval()
|
180 |
print(res)
|
utils/upload_hub.py
CHANGED
@@ -1,56 +1,56 @@
|
|
1 |
-
import json
|
2 |
-
import uuid
|
3 |
-
import os
|
4 |
-
import re
|
5 |
-
from huggingface_hub import HfApi
|
6 |
-
from huggingface_hub import hf_hub_download
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
def file_name_decode(file_name):
|
11 |
-
# model_name,dataset,method,answer.txt
|
12 |
-
# input file name example: [mistralai@Mistral-7B-Instruct-v0.3][Setting3][icl]answer.txt
|
13 |
-
|
14 |
-
match = re.match(rf'\[([^\[^\]]+)\]\[([^\[^\]]+)\]\[([^\[^\]]+)\]([^\[^\]]+)', file_name)
|
15 |
-
|
16 |
-
if match:
|
17 |
-
model_name, dataset, method, file_name = match.groups()
|
18 |
-
ret_dict = {
|
19 |
-
'model_name': model_name,
|
20 |
-
'dataset': dataset,
|
21 |
-
'method': method,
|
22 |
-
'file_name': file_name
|
23 |
-
}
|
24 |
-
return ret_dict
|
25 |
-
return None
|
26 |
-
|
27 |
-
def upload_scores_to_hub(api, scores_dict, path_in_repo,hub_repo='zhaorui-nb/test_json'):
|
28 |
-
# id = str(uuid.uuid4())
|
29 |
-
save_json_path = f'.output/upload.json'
|
30 |
-
os.makedirs(os.path.dirname(save_json_path), exist_ok=True)
|
31 |
-
with open(save_json_path, 'w') as f:
|
32 |
-
json.dump(scores_dict, f , indent=4)
|
33 |
-
|
34 |
-
# SAVE JSON TO HUB
|
35 |
-
res = api.upload_file(
|
36 |
-
path_or_fileobj=save_json_path,
|
37 |
-
path_in_repo=path_in_repo, #f'data/train,{os.path.basename(save_json_path)}',
|
38 |
-
repo_id=hub_repo,
|
39 |
-
repo_type="dataset",
|
40 |
-
)
|
41 |
-
|
42 |
-
return res
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
if __name__ == "__main__":
|
49 |
-
|
50 |
-
pass
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
1 |
+
import json
|
2 |
+
import uuid
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
from huggingface_hub import HfApi
|
6 |
+
from huggingface_hub import hf_hub_download
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
def file_name_decode(file_name):
|
11 |
+
# model_name,dataset,method,answer.txt
|
12 |
+
# input file name example: [mistralai@Mistral-7B-Instruct-v0.3][Setting3][icl]answer.txt
|
13 |
+
|
14 |
+
match = re.match(rf'\[([^\[^\]]+)\]\[([^\[^\]]+)\]\[([^\[^\]]+)\]([^\[^\]]+)', file_name)
|
15 |
+
|
16 |
+
if match:
|
17 |
+
model_name, dataset, method, file_name = match.groups()
|
18 |
+
ret_dict = {
|
19 |
+
'model_name': model_name,
|
20 |
+
'dataset': dataset,
|
21 |
+
'method': method,
|
22 |
+
'file_name': file_name
|
23 |
+
}
|
24 |
+
return ret_dict
|
25 |
+
return None
|
26 |
+
|
27 |
+
def upload_scores_to_hub(api, scores_dict, path_in_repo,hub_repo='zhaorui-nb/test_json'):
|
28 |
+
# id = str(uuid.uuid4())
|
29 |
+
save_json_path = f'.output/upload.json'
|
30 |
+
os.makedirs(os.path.dirname(save_json_path), exist_ok=True)
|
31 |
+
with open(save_json_path, 'w') as f:
|
32 |
+
json.dump(scores_dict, f , indent=4)
|
33 |
+
|
34 |
+
# SAVE JSON TO HUB
|
35 |
+
res = api.upload_file(
|
36 |
+
path_or_fileobj=save_json_path,
|
37 |
+
path_in_repo=path_in_repo, #f'data/train,{os.path.basename(save_json_path)}',
|
38 |
+
repo_id=hub_repo,
|
39 |
+
repo_type="dataset",
|
40 |
+
)
|
41 |
+
|
42 |
+
return res
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
|
50 |
+
pass
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
|