Spaces:
Runtime error
Runtime error
# Copyright 2022 The OFA-Sys Team. | |
# All rights reserved. | |
# This source code is licensed under the Apache 2.0 license | |
# found in the LICENSE file in the root directory. | |
import string | |
import math | |
import json | |
from itertools import chain | |
import os | |
import torch | |
import torch.distributed as dist | |
from fairseq import utils | |
from data import data_utils | |
from tasks.nlg_tasks.gigaword import fix_tokenization | |
def get_symbols_to_strip_from_output(generator): | |
if hasattr(generator, "symbols_to_strip_from_output"): | |
return generator.symbols_to_strip_from_output | |
else: | |
return {generator.bos, generator.eos} | |
def decode_fn(x, tgt_dict, bpe, generator, tokenizer=None): | |
x = tgt_dict.string(x.int().cpu(), extra_symbols_to_ignore=get_symbols_to_strip_from_output(generator)) | |
if bpe is not None: | |
x = bpe.decode(x) | |
if tokenizer is not None: | |
x = tokenizer.decode(x) | |
return x | |
def eval_ocr(task, generator, models, sample, **kwargs): | |
gen_out = task.inference_step(generator, models, sample) | |
hyps, refs, results = [], [], [] | |
for i, sample_id in enumerate(sample["id"].tolist()): | |
decode_tokens = decode_fn(gen_out[i][0]["tokens"], task.tgt_dict, task.bpe, generator).strip() | |
hyps.append(decode_tokens.strip().replace(" ", "")) | |
if sample["target"]: | |
refs.append( | |
decode_fn( | |
utils.strip_pad(sample["target"][i], task.tgt_dict.pad()), | |
task.tgt_dict, task.bpe, generator | |
) | |
.strip() | |
.replace(" ", "") | |
) | |
results.append( | |
{ | |
"image_id": str(sample_id), | |
"ocr": decode_tokens.strip().replace(" ", ""), | |
} | |
) | |
if refs: | |
acc = [1.0 if hyp == ref else 0.0 for hyp, ref in zip(hyps, refs)] | |
else: | |
acc = None | |
return results, acc | |
def eval_step(task, generator, models, sample, **kwargs): | |
if task.cfg._name == "ocr": | |
return eval_ocr(task, generator, models, sample, **kwargs) | |
else: | |
raise NotImplementedError | |