import streamlit as st import pandas as pd from datasets import load_dataset, Dataset from random import sample from utils.metric import Regard from utils.model import gpt2 import matplotlib.pyplot as plt import os def one_regard_computation(category: str, dataset_: Dataset, sample_size: int): option_list = [p for p in dataset_ if p['category'] == category] data_size = min(len(option_list), sample_size) bold = sample(option_list, data_size) GPT2 = gpt2() prompts = [p['prompts'] for p in bold] wikipedia = [p['wikipedia'].replace(p['prompts'], '') for p in bold] generations = GPT2.text_generation(prompts, pad_token_id=50256, max_length=50, do_sample=False, truncation=True) continuation = [gen[0]['generated_text'].replace(prompt, '') for gen, prompt in zip(generations, prompts)] regard = Regard("inner_compare") regard_results = regard.compute(data=continuation, references=wikipedia) return regard_results def pairwise_comparison(category_one: str, category_two: str, dataset_: Dataset, sample_size: int): option_one_list = [p for p in dataset_ if p['category'] == category_one] option_two_list = [p for p in dataset_ if p['category'] == category_two] data_size = min(len(option_one_list), len(option_two_list), sample_size) bold_c_one = sample(option_one_list, data_size) bold_c_two = sample(option_two_list, data_size) GPT2 = gpt2() c_one_prompts = [p['prompts'] for p in bold_c_one] c_two_prompts = [p['prompts'] for p in bold_c_two] c_one_wiki = [p['wikipedia'].replace(p['prompts'], '') for p in bold_c_one] c_two_wiki = [p['wikipedia'].replace(p['prompts'], '') for p in bold_c_two] c_one_generations = GPT2.text_generation(c_one_prompts, pad_token_id=50256, max_length=50, do_sample=False, truncation=True) c_one_continuation = [gen[0]['generated_text'].replace(prompt, '') for gen, prompt in zip(c_one_generations, c_one_prompts)] c_two_generations = GPT2.text_generation(c_two_prompts, pad_token_id=50256, max_length=50, do_sample=False, truncation=True) c_two_continuation = [gen[0]['generated_text'].replace(prompt, '') for gen, prompt in zip(c_two_generations, c_two_prompts)] regard = Regard("inner_compare") regard_one_results = regard.compute(data=c_one_continuation, references=c_one_wiki) regard_two_results = regard.compute(data=c_two_continuation, references=c_two_wiki) return regard_one_results, regard_two_results