import itertools import re import spacy import json import evaluate from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel import torch from utils import * from celebbot import CelebBot QA_MODEL_ID = "google/flan-t5-large" SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2" celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"] celeb_data = get_celeb_data("data.json") references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names] references = list(itertools.chain.from_iterable(references)) predictions = [] device = 'cpu' QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID) QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID).to(device) sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID) sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device) for celeb_name in celeb_names: gender = celeb_data[celeb_name]["gender"] if celeb_name == "Madonna": name = "Madonna-American-singer-and-actress" elif celeb_name == "Anne Hathaway": name = "Anne-Hathaway-American-actress" else: name="-".join(celeb_name.split(" ")) knowledge = get_article(f"https://www.britannica.com/biography/{name}") spacy_model = spacy.load("en_core_web_lg") knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents] ai = CelebBot(celeb_name, gender, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents) for q in celeb_data[celeb_name]["questions"]: ai.text = q response = ai.question_answer() print("response:", response) predictions.append(response) file = open('predictions.txt','w') for prediction in predictions: file.write(prediction+"\n") file.close() bleu = evaluate.load("bleu") results = bleu.compute(predictions=predictions, references=references, max_order=4) print(f"BLEU: {round(results['bleu'], 2)}") meteor = evaluate.load("meteor") results = meteor.compute(predictions=predictions, references=references) print(f"METEOR: {round(results['meteor'], 2)}") rouge = evaluate.load("rouge") results = rouge.compute(predictions=predictions, references=references) print(f"ROUGE: {round(results['rougeL'], 2)}") bertscore = evaluate.load("bertscore") results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en") print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")