Spaces:
Sleeping
Sleeping
add examples
Browse files- generation_evaluator.py +15 -14
- gradio_tst.py +12 -2
- requirements.txt +0 -1
generation_evaluator.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import datasets
|
2 |
import evaluate
|
|
|
3 |
import numpy as np
|
4 |
import spacy
|
5 |
import torch
|
6 |
from alignscore import AlignScore
|
7 |
-
import nltk
|
8 |
|
9 |
_CITATION = """\
|
10 |
@inproceedings{lin-2004-rouge,
|
@@ -150,8 +150,8 @@ class GenerationEvaluator(evaluate.Metric):
|
|
150 |
spacy.cli.download("en_core_web_sm")
|
151 |
|
152 |
# Download punkt for AlignScore
|
153 |
-
nltk.download(
|
154 |
-
|
155 |
# Download AlignScore model and move to GPU if possible
|
156 |
model_path = dl_manager.download(ALIGNSCORE_ARGS["ckpt_path"])
|
157 |
ALIGNSCORE_ARGS["ckpt_path"] = model_path
|
@@ -160,29 +160,31 @@ class GenerationEvaluator(evaluate.Metric):
|
|
160 |
)
|
161 |
self.align_scorer = AlignScore(**ALIGNSCORE_ARGS)
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
def _compute(self, predictions, references):
|
164 |
# Compute ROUGE
|
165 |
-
|
166 |
-
|
167 |
-
rouge_results = rouge_score.compute(
|
168 |
predictions=predictions, references=references
|
169 |
)
|
170 |
|
171 |
# Compute BLEU
|
172 |
-
|
173 |
-
bleu_results = bleu_score.compute(
|
174 |
predictions=predictions, references=references
|
175 |
)
|
176 |
|
177 |
# Compute Exact Match
|
178 |
-
|
179 |
-
exact_match_results = exact_match_score.compute(
|
180 |
predictions=predictions, references=references
|
181 |
)
|
182 |
|
183 |
# Compute BERTScore
|
184 |
-
|
185 |
-
bert_score_results = bert_score.compute(
|
186 |
predictions=predictions, references=references, lang="en"
|
187 |
)
|
188 |
|
@@ -203,8 +205,7 @@ class GenerationEvaluator(evaluate.Metric):
|
|
203 |
)
|
204 |
|
205 |
# Compute CHRF
|
206 |
-
|
207 |
-
chrf_results = chrf.compute(
|
208 |
predictions=predictions, references=references
|
209 |
)
|
210 |
|
|
|
1 |
import datasets
|
2 |
import evaluate
|
3 |
+
import nltk
|
4 |
import numpy as np
|
5 |
import spacy
|
6 |
import torch
|
7 |
from alignscore import AlignScore
|
|
|
8 |
|
9 |
_CITATION = """\
|
10 |
@inproceedings{lin-2004-rouge,
|
|
|
150 |
spacy.cli.download("en_core_web_sm")
|
151 |
|
152 |
# Download punkt for AlignScore
|
153 |
+
nltk.download("punkt_tab")
|
154 |
+
|
155 |
# Download AlignScore model and move to GPU if possible
|
156 |
model_path = dl_manager.download(ALIGNSCORE_ARGS["ckpt_path"])
|
157 |
ALIGNSCORE_ARGS["ckpt_path"] = model_path
|
|
|
160 |
)
|
161 |
self.align_scorer = AlignScore(**ALIGNSCORE_ARGS)
|
162 |
|
163 |
+
# Prepare scorers
|
164 |
+
self.rouge_scorer = evaluate.load("rouge")
|
165 |
+
self.bleu_scorer = evaluate.load("bleu")
|
166 |
+
self.exact_match_scorer = evaluate.load("exact_match")
|
167 |
+
self.bert_scorer = evaluate.load("bertscore")
|
168 |
+
self.chrf_scorer = evaluate.load("chrf")
|
169 |
+
|
170 |
def _compute(self, predictions, references):
|
171 |
# Compute ROUGE
|
172 |
+
rouge_results = self.rouge_scorer.compute(
|
|
|
|
|
173 |
predictions=predictions, references=references
|
174 |
)
|
175 |
|
176 |
# Compute BLEU
|
177 |
+
bleu_results = self.bleu_scorer.compute(
|
|
|
178 |
predictions=predictions, references=references
|
179 |
)
|
180 |
|
181 |
# Compute Exact Match
|
182 |
+
exact_match_results = self.exact_match_scorer.compute(
|
|
|
183 |
predictions=predictions, references=references
|
184 |
)
|
185 |
|
186 |
# Compute BERTScore
|
187 |
+
bert_score_results = self.bert_scorer.compute(
|
|
|
188 |
predictions=predictions, references=references, lang="en"
|
189 |
)
|
190 |
|
|
|
205 |
)
|
206 |
|
207 |
# Compute CHRF
|
208 |
+
chrf_results = self.chrf_scorer.compute(
|
|
|
209 |
predictions=predictions, references=references
|
210 |
)
|
211 |
|
gradio_tst.py
CHANGED
@@ -117,6 +117,15 @@ def launch_gradio_widget2(metric):
|
|
117 |
def compute(data):
|
118 |
return metric.compute(**parse_gradio_data(data, gradio_input_types))
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
iface = gr.Interface(
|
121 |
fn=compute,
|
122 |
inputs=gr.Dataframe(
|
@@ -132,8 +141,9 @@ def launch_gradio_widget2(metric):
|
|
132 |
),
|
133 |
title=f"Metric: {metric.name}",
|
134 |
article=parse_readme(local_path / "README.md"),
|
135 |
-
|
136 |
-
|
|
|
137 |
)
|
138 |
|
139 |
iface.launch(share=True)
|
|
|
117 |
def compute(data):
|
118 |
return metric.compute(**parse_gradio_data(data, gradio_input_types))
|
119 |
|
120 |
+
test_cases = [
|
121 |
+
{
|
122 |
+
"predictions": [
|
123 |
+
"You are so good",
|
124 |
+
"Madrid is the capital of Spain",
|
125 |
+
],
|
126 |
+
"references": ["You are so bad", "Paris is the capital of France"],
|
127 |
+
}
|
128 |
+
]
|
129 |
iface = gr.Interface(
|
130 |
fn=compute,
|
131 |
inputs=gr.Dataframe(
|
|
|
141 |
),
|
142 |
title=f"Metric: {metric.name}",
|
143 |
article=parse_readme(local_path / "README.md"),
|
144 |
+
examples=[
|
145 |
+
parse_test_cases(test_cases, feature_names, gradio_input_types)
|
146 |
+
],
|
147 |
)
|
148 |
|
149 |
iface.launch(share=True)
|
requirements.txt
CHANGED
@@ -5,7 +5,6 @@ gradio
|
|
5 |
bert_score
|
6 |
rouge_score
|
7 |
numpy
|
8 |
-
git+https://github.com/huggingface/evaluate@a4bdc10c48a450b978d91389a48dbb5297835c7d
|
9 |
sacrebleu
|
10 |
git+https://github.com/yuh-zha/AlignScore.git
|
11 |
spacy
|
|
|
5 |
bert_score
|
6 |
rouge_score
|
7 |
numpy
|
|
|
8 |
sacrebleu
|
9 |
git+https://github.com/yuh-zha/AlignScore.git
|
10 |
spacy
|