|
import copy |
|
import json |
|
|
|
import numpy as np |
|
import fire |
|
|
|
|
|
def evaluate_annotation(key2refs, scorer): |
|
if scorer.method() == "Bleu": |
|
scores = np.array([ 0.0 for n in range(4) ]) |
|
else: |
|
scores = 0 |
|
num_cap_per_audio = len(next(iter(key2refs.values()))) |
|
|
|
for i in range(num_cap_per_audio): |
|
if i > 0: |
|
for key in key2refs: |
|
key2refs[key].insert(0, res[key][0]) |
|
res = { key: [refs.pop(),] for key, refs in key2refs.items() } |
|
score, _ = scorer.compute_score(key2refs, res) |
|
|
|
if scorer.method() == "Bleu": |
|
scores += np.array(score) |
|
else: |
|
scores += score |
|
|
|
score = scores / num_cap_per_audio |
|
return score |
|
|
|
def evaluate_prediction(key2pred, key2refs, scorer): |
|
if scorer.method() == "Bleu": |
|
scores = np.array([ 0.0 for n in range(4) ]) |
|
else: |
|
scores = 0 |
|
num_cap_per_audio = len(next(iter(key2refs.values()))) |
|
|
|
for i in range(num_cap_per_audio): |
|
key2refs_i = {} |
|
for key, refs in key2refs.items(): |
|
key2refs_i[key] = refs[:i] + refs[i+1:] |
|
score, _ = scorer.compute_score(key2refs_i, key2pred) |
|
|
|
if scorer.method() == "Bleu": |
|
scores += np.array(score) |
|
else: |
|
scores += score |
|
|
|
score = scores / num_cap_per_audio |
|
return score |
|
|
|
|
|
class Evaluator(object): |
|
|
|
def eval_annotation(self, annotation, output): |
|
captions = json.load(open(annotation, "r"))["audios"] |
|
|
|
key2refs = {} |
|
for audio_idx in range(len(captions)): |
|
audio_id = captions[audio_idx]["audio_id"] |
|
key2refs[audio_id] = [] |
|
for caption in captions[audio_idx]["captions"]: |
|
key2refs[audio_id].append(caption["caption"]) |
|
|
|
from fense.fense import Fense |
|
scores = {} |
|
scorer = Fense() |
|
scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer) |
|
|
|
refs4eval = {} |
|
for key, refs in key2refs.items(): |
|
refs4eval[key] = [] |
|
for idx, ref in enumerate(refs): |
|
refs4eval[key].append({ |
|
"audio_id": key, |
|
"id": idx, |
|
"caption": ref |
|
}) |
|
|
|
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer |
|
|
|
tokenizer = PTBTokenizer() |
|
key2refs = tokenizer.tokenize(refs4eval) |
|
|
|
|
|
from pycocoevalcap.bleu.bleu import Bleu |
|
from pycocoevalcap.cider.cider import Cider |
|
from pycocoevalcap.rouge.rouge import Rouge |
|
from pycocoevalcap.meteor.meteor import Meteor |
|
from pycocoevalcap.spice.spice import Spice |
|
|
|
|
|
scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()] |
|
for scorer in scorers: |
|
scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer) |
|
|
|
spider = 0 |
|
with open(output, "w") as f: |
|
for name, score in scores.items(): |
|
if name == "Bleu": |
|
for n in range(4): |
|
f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n])) |
|
else: |
|
f.write("{}: {:6.3f}\n".format(name, score)) |
|
if name in ["CIDEr", "SPICE"]: |
|
spider += score |
|
f.write("SPIDEr: {:6.3f}\n".format(spider / 2)) |
|
|
|
def eval_prediction(self, prediction, annotation, output): |
|
ref_captions = json.load(open(annotation, "r"))["audios"] |
|
|
|
key2refs = {} |
|
for audio_idx in range(len(ref_captions)): |
|
audio_id = ref_captions[audio_idx]["audio_id"] |
|
key2refs[audio_id] = [] |
|
for caption in ref_captions[audio_idx]["captions"]: |
|
key2refs[audio_id].append(caption["caption"]) |
|
|
|
pred_captions = json.load(open(prediction, "r"))["predictions"] |
|
|
|
key2pred = {} |
|
for audio_idx in range(len(pred_captions)): |
|
item = pred_captions[audio_idx] |
|
audio_id = item["filename"] |
|
key2pred[audio_id] = [item["tokens"]] |
|
|
|
from fense.fense import Fense |
|
scores = {} |
|
scorer = Fense() |
|
scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer) |
|
|
|
refs4eval = {} |
|
for key, refs in key2refs.items(): |
|
refs4eval[key] = [] |
|
for idx, ref in enumerate(refs): |
|
refs4eval[key].append({ |
|
"audio_id": key, |
|
"id": idx, |
|
"caption": ref |
|
}) |
|
|
|
preds4eval = {} |
|
for key, preds in key2pred.items(): |
|
preds4eval[key] = [] |
|
for idx, pred in enumerate(preds): |
|
preds4eval[key].append({ |
|
"audio_id": key, |
|
"id": idx, |
|
"caption": pred |
|
}) |
|
|
|
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer |
|
|
|
tokenizer = PTBTokenizer() |
|
key2refs = tokenizer.tokenize(refs4eval) |
|
key2pred = tokenizer.tokenize(preds4eval) |
|
|
|
|
|
from pycocoevalcap.bleu.bleu import Bleu |
|
from pycocoevalcap.cider.cider import Cider |
|
from pycocoevalcap.rouge.rouge import Rouge |
|
from pycocoevalcap.meteor.meteor import Meteor |
|
from pycocoevalcap.spice.spice import Spice |
|
|
|
scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()] |
|
for scorer in scorers: |
|
scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer) |
|
|
|
spider = 0 |
|
with open(output, "w") as f: |
|
for name, score in scores.items(): |
|
if name == "Bleu": |
|
for n in range(4): |
|
f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n])) |
|
else: |
|
f.write("{}: {:6.3f}\n".format(name, score)) |
|
if name in ["CIDEr", "SPICE"]: |
|
spider += score |
|
f.write("SPIDEr: {:6.3f}\n".format(spider / 2)) |
|
|
|
|
|
if __name__ == "__main__": |
|
fire.Fire(Evaluator) |
|
|