Spaces:
Runtime error
Runtime error
import random | |
import math | |
import pandas as pd | |
import numpy as np | |
from typing import List | |
from llm import OpenAI3 | |
llm = OpenAI3() | |
def _scores_recalculate(scores: List): | |
scores_array = np.array(scores) | |
scores_dis = abs(scores_array - scores_array.mean()) | |
scores_count = np.ceil(1 / (scores_dis + scores_dis.mean()) * len(scores)).astype(int) | |
new_scores = [] | |
for i in range(len(scores)): | |
new_scores.extend([scores[i]] * scores_count[i]) | |
return new_scores | |
def generate_scores_and_comments(standard_file_path, rewrite_prompt, topic, num=10): | |
df = pd.read_excel(standard_file_path) | |
pd.set_option('display.max_colwidth', None) | |
standard_index = df.columns.to_list() | |
standard_index.pop(standard_index.index('得分')) | |
standard_scores = list(tuple(df['得分'].to_list())) | |
scores_choice = _scores_recalculate(standard_scores) | |
assessments = [] | |
for i in range(num): | |
assessment = dict() | |
scores = dict() | |
stand_comment = [] | |
for index in standard_index: | |
score = random.choice(scores_choice) | |
scores[index] = score | |
content = df[df['得分'] == score][index].to_string(index=False) | |
stand_comment.append(content) | |
mean_score = np.array(list(scores.values())) | |
# scores['mean_score'] = mean_score | |
assessment['scores'] = scores | |
stand_comment = ';'.join(stand_comment) | |
prompt = 'f"""' + rewrite_prompt + '"""' | |
r_comment = llm(eval(prompt)) | |
r_comment = r_comment.split('\n')[-1] | |
assessment['comment'] = r_comment | |
assessments.append(assessment) | |
return assessments, df | |
def load_scores_and_comments(comments_file_path): | |
df = pd.read_excel(comments_file_path) | |
pd.set_option('display.max_colwidth', None) | |
comments = df['comments'].to_list() | |
df.pop('comments') | |
scores = df.to_dict(orient='records') | |
assessments = [{'scores': scores[i], 'comment': comments[i]} for i in range(len(comments))] | |
return assessments | |
def medium_score_rewrite(standard_file, rewrite_prompt, topic, assessments): | |
scores = [i['scores'] for i in assessments] | |
scores = pd.DataFrame(scores) | |
try: | |
medium = scores.quantile(0.5) | |
except Exception as e: # incase that the values in one column are all None | |
scores = scores.fillna(0) | |
medium = scores.quantile(0.5) | |
medium = medium.astype(int) | |
stand_comment = [] | |
df = standard_file | |
for index in medium.index: | |
score = math.ceil(medium[index]) | |
content = df[df['得分'] == score][index].to_string(index=False) | |
stand_comment.append(content) | |
stand_comment = ';'.join(stand_comment) | |
prompt = 'f"""' + rewrite_prompt + '"""' | |
r_comment = llm(eval(prompt)) | |
return r_comment | |
def quantile_summary(summary_prompt, assessments): | |
if not isinstance(assessments[0], dict): | |
comment = '\n'.join(assessments) | |
else: | |
comments = [i['comment'] for i in assessments] | |
comments = pd.Series(comments) | |
scores = [i['scores'] for i in assessments] | |
scores = pd.DataFrame(scores) | |
try: | |
quartiles = scores.quantile([0.25, 0.75]) | |
except Exception as e: # incase that the values in one column are all None | |
scores = scores.fillna(0) | |
quartiles = scores.quantile([0.25, 0.75]) | |
quartiles = quartiles.astype(int) | |
up = (scores - quartiles.loc[0.75]) < 0 | |
down = (scores - quartiles.loc[0.25]) > 0 | |
select_index = (up & down).all(axis=1) | |
if not select_index.any(): | |
select_index = (up & down).any(axis=1) | |
if select_index.any(): | |
select_comments = comments[select_index].to_list() | |
else: | |
select_comments = comments.to_list() | |
comment = '\n'.join(select_comments) | |
prompt = 'f"""' + summary_prompt + '"""' | |
s_comment = llm(eval(prompt)) | |
return s_comment | |