Spaces:
Runtime error
Runtime error
from typing import List | |
import pandas as pd | |
from sentence_transformers.util import cos_sim | |
from utils.models import ModelWithPooling | |
def p0_originality(df: pd.DataFrame, model_name: str, pooling: str) -> pd.DataFrame: | |
""" | |
row-wise | |
:param df: | |
:param model_name: | |
:return: | |
""" | |
assert 'prompt' in df.columns | |
assert 'response' in df.columns | |
model = ModelWithPooling(model_name) | |
def get_cos_sim(prompt: str, response: str) -> float: | |
prompt_vec = model(text=prompt, pooling=pooling) | |
response_vec = model(text=response, pooling=pooling) | |
score = cos_sim(prompt_vec, response_vec).item() | |
return score | |
df['originality'] = df.apply(lambda x: 1 - get_cos_sim(x['prompt'], x['response']), axis=1) | |
return df | |
def p1_flexibility(df: pd.DataFrame, model_name: str, pooling: str) -> pd.DataFrame: | |
""" | |
group-wise | |
:param df: | |
:param model_name: | |
:return: | |
""" | |
assert 'prompt' in df.columns | |
assert 'response' in df.columns | |
assert 'id' in df.columns | |
model = ModelWithPooling(model_name) | |
def get_flexibility(responses: List[str]) -> float: | |
responses_vec = [model(text=_, pooling=pooling) for _ in responses] | |
score = 0 | |
for i in range(len(responses_vec) - 1): | |
score += 1 - cos_sim(responses_vec[i], responses_vec[i + 1]).item() | |
return score | |
df_out = df.groupby(by=['id', 'prompt']) \ | |
.agg({'id': 'first', 'prompt': 'first', 'response': get_flexibility}) \ | |
.rename(columns={'response': 'flexibility'}) \ | |
.reset_index(drop=True) | |
return df_out | |
if __name__ == '__main__': | |
_df_input = pd.read_csv('data/tmp/example_3.csv') | |
_df_0 = p0_originality(_df_input, 'paraphrase-multilingual-MiniLM-L12-v2') | |
_df_1 = p1_flexibility(_df_input, 'paraphrase-multilingual-MiniLM-L12-v2') | |