jogonba2 commited on
Commit
cb18316
·
1 Parent(s): e315877

add examples

Browse files
Files changed (3) hide show
  1. generation_evaluator.py +15 -14
  2. gradio_tst.py +12 -2
  3. requirements.txt +0 -1
generation_evaluator.py CHANGED
@@ -1,10 +1,10 @@
1
  import datasets
2
  import evaluate
 
3
  import numpy as np
4
  import spacy
5
  import torch
6
  from alignscore import AlignScore
7
- import nltk
8
 
9
  _CITATION = """\
10
  @inproceedings{lin-2004-rouge,
@@ -150,8 +150,8 @@ class GenerationEvaluator(evaluate.Metric):
150
  spacy.cli.download("en_core_web_sm")
151
 
152
  # Download punkt for AlignScore
153
- nltk.download('punkt_tab')
154
-
155
  # Download AlignScore model and move to GPU if possible
156
  model_path = dl_manager.download(ALIGNSCORE_ARGS["ckpt_path"])
157
  ALIGNSCORE_ARGS["ckpt_path"] = model_path
@@ -160,29 +160,31 @@ class GenerationEvaluator(evaluate.Metric):
160
  )
161
  self.align_scorer = AlignScore(**ALIGNSCORE_ARGS)
162
 
 
 
 
 
 
 
 
163
  def _compute(self, predictions, references):
164
  # Compute ROUGE
165
- rouge_score = evaluate.load("rouge")
166
-
167
- rouge_results = rouge_score.compute(
168
  predictions=predictions, references=references
169
  )
170
 
171
  # Compute BLEU
172
- bleu_score = evaluate.load("bleu")
173
- bleu_results = bleu_score.compute(
174
  predictions=predictions, references=references
175
  )
176
 
177
  # Compute Exact Match
178
- exact_match_score = evaluate.load("exact_match")
179
- exact_match_results = exact_match_score.compute(
180
  predictions=predictions, references=references
181
  )
182
 
183
  # Compute BERTScore
184
- bert_score = evaluate.load("bertscore")
185
- bert_score_results = bert_score.compute(
186
  predictions=predictions, references=references, lang="en"
187
  )
188
 
@@ -203,8 +205,7 @@ class GenerationEvaluator(evaluate.Metric):
203
  )
204
 
205
  # Compute CHRF
206
- chrf = evaluate.load("chrf")
207
- chrf_results = chrf.compute(
208
  predictions=predictions, references=references
209
  )
210
 
 
1
  import datasets
2
  import evaluate
3
+ import nltk
4
  import numpy as np
5
  import spacy
6
  import torch
7
  from alignscore import AlignScore
 
8
 
9
  _CITATION = """\
10
  @inproceedings{lin-2004-rouge,
 
150
  spacy.cli.download("en_core_web_sm")
151
 
152
  # Download punkt for AlignScore
153
+ nltk.download("punkt_tab")
154
+
155
  # Download AlignScore model and move to GPU if possible
156
  model_path = dl_manager.download(ALIGNSCORE_ARGS["ckpt_path"])
157
  ALIGNSCORE_ARGS["ckpt_path"] = model_path
 
160
  )
161
  self.align_scorer = AlignScore(**ALIGNSCORE_ARGS)
162
 
163
+ # Prepare scorers
164
+ self.rouge_scorer = evaluate.load("rouge")
165
+ self.bleu_scorer = evaluate.load("bleu")
166
+ self.exact_match_scorer = evaluate.load("exact_match")
167
+ self.bert_scorer = evaluate.load("bertscore")
168
+ self.chrf_scorer = evaluate.load("chrf")
169
+
170
  def _compute(self, predictions, references):
171
  # Compute ROUGE
172
+ rouge_results = self.rouge_scorer.compute(
 
 
173
  predictions=predictions, references=references
174
  )
175
 
176
  # Compute BLEU
177
+ bleu_results = self.bleu_scorer.compute(
 
178
  predictions=predictions, references=references
179
  )
180
 
181
  # Compute Exact Match
182
+ exact_match_results = self.exact_match_scorer.compute(
 
183
  predictions=predictions, references=references
184
  )
185
 
186
  # Compute BERTScore
187
+ bert_score_results = self.bert_scorer.compute(
 
188
  predictions=predictions, references=references, lang="en"
189
  )
190
 
 
205
  )
206
 
207
  # Compute CHRF
208
+ chrf_results = self.chrf_scorer.compute(
 
209
  predictions=predictions, references=references
210
  )
211
 
gradio_tst.py CHANGED
@@ -117,6 +117,15 @@ def launch_gradio_widget2(metric):
117
  def compute(data):
118
  return metric.compute(**parse_gradio_data(data, gradio_input_types))
119
 
 
 
 
 
 
 
 
 
 
120
  iface = gr.Interface(
121
  fn=compute,
122
  inputs=gr.Dataframe(
@@ -132,8 +141,9 @@ def launch_gradio_widget2(metric):
132
  ),
133
  title=f"Metric: {metric.name}",
134
  article=parse_readme(local_path / "README.md"),
135
- # TODO: load test cases and use them to populate examples
136
- # examples=[parse_test_cases(test_cases, feature_names, gradio_input_types)]
 
137
  )
138
 
139
  iface.launch(share=True)
 
117
  def compute(data):
118
  return metric.compute(**parse_gradio_data(data, gradio_input_types))
119
 
120
+ test_cases = [
121
+ {
122
+ "predictions": [
123
+ "You are so good",
124
+ "Madrid is the capital of Spain",
125
+ ],
126
+ "references": ["You are so bad", "Paris is the capital of France"],
127
+ }
128
+ ]
129
  iface = gr.Interface(
130
  fn=compute,
131
  inputs=gr.Dataframe(
 
141
  ),
142
  title=f"Metric: {metric.name}",
143
  article=parse_readme(local_path / "README.md"),
144
+ examples=[
145
+ parse_test_cases(test_cases, feature_names, gradio_input_types)
146
+ ],
147
  )
148
 
149
  iface.launch(share=True)
requirements.txt CHANGED
@@ -5,7 +5,6 @@ gradio
5
  bert_score
6
  rouge_score
7
  numpy
8
- git+https://github.com/huggingface/evaluate@a4bdc10c48a450b978d91389a48dbb5297835c7d
9
  sacrebleu
10
  git+https://github.com/yuh-zha/AlignScore.git
11
  spacy
 
5
  bert_score
6
  rouge_score
7
  numpy
 
8
  sacrebleu
9
  git+https://github.com/yuh-zha/AlignScore.git
10
  spacy