Roleplay_LLM_Arena / utils.py
zzc0208's picture
Upload 7 files
4182c57 verified
raw
history blame
3.87 kB
import time
from pymongo import MongoClient
import pandas as pd
import math
import os
# MongoDB连接配置
client = MongoClient(os.getenv("client_link"))
db = client.get_database('roleplay')
collection = db.get_collection('model_stats')
def update_model_stats(model1_name, model2_name, winner, turn, anony, language):
# 获取当前时间戳
tstamp = time.time()
# 插入数据到MongoDB
collection.insert_one({
"Model 1": model1_name,
"Model 2": model2_name,
"Winner": winner,
"Turn": turn,
"Anony": anony,
"Language": language,
"tstamp": tstamp
})
def calculate_elo(winner_elo, loser_elo, k=30, outcome=1):
"""
winner_elo: Elo score of the winner before the game
loser_elo: Elo score of the loser before the game
k: K-factor in Elo calculation
outcome: 1 if winner won, 0.5 if tie, 0 if loser won (inverted)
"""
expected_win = 1 / (1 + math.pow(10, (loser_elo - winner_elo) / 400))
new_winner_elo = winner_elo + k * (outcome - expected_win)
return new_winner_elo
def load_dataframe():
# 从MongoDB读取数据
cursor = collection.find({})
# 将游标中的数据转换为DataFrame
data = pd.DataFrame(list(cursor))
# 创建模型名称的唯一列表
models = pd.unique(data[['Model 1', 'Model 2']].values.ravel('K'))
# 初始化结果字典
results = {'模型名称': [], '参赛次数': [], '胜利次数': [], 'ELO': []}
elo_dict = {model: 1000 for model in models} # 初始化ELO分数为1000
for _, row in data.iterrows():
model1 = row['Model 1']
model2 = row['Model 2']
winner = row['Winner']
if winner == 'Model 1':
elo_dict[model1] = calculate_elo(elo_dict[model1], elo_dict[model2], outcome=1)
elo_dict[model2] = calculate_elo(elo_dict[model2], elo_dict[model1], outcome=0)
elif winner == 'Model 2':
elo_dict[model2] = calculate_elo(elo_dict[model2], elo_dict[model1], outcome=1)
elo_dict[model1] = calculate_elo(elo_dict[model1], elo_dict[model2], outcome=0)
elif winner == 'tie':
elo_dict[model1] = calculate_elo(elo_dict[model1], elo_dict[model2], outcome=0.8)
elo_dict[model2] = calculate_elo(elo_dict[model2], elo_dict[model1], outcome=0.8)
elif winner == 'bothbad':
elo_dict[model1] = calculate_elo(elo_dict[model1], elo_dict[model2], outcome=0.1)
elo_dict[model2] = calculate_elo(elo_dict[model2], elo_dict[model1], outcome=0.1)
for model in models:
count = data['Model 1'].value_counts().get(model, 0) + data['Model 2'].value_counts().get(model, 0)
win_count = 0
win_count += len(data[(data['Winner'] == 'Model 1') & (data['Model 1'] == model)])
win_count += len(data[(data['Winner'] == 'Model 2') & (data['Model 2'] == model)])
win_count += len(data[(data['Winner'] == 'tie') & ((data['Model 1'] == model) | (data['Model 2'] == model))])
results['模型名称'].append(model)
results['参赛次数'].append(count)
results['胜利次数'].append(win_count)
results['ELO'].append(round(elo_dict[model]))
# 将结果字典转换为DataFrame
result_df = pd.DataFrame(results)
# 计算胜率并排序
result_df["模型胜率"] = (result_df['胜利次数'] / result_df['参赛次数']) * 100
result_df = result_df.sort_values(by="模型胜率", ascending=False)
result_df["模型胜率"] = result_df["模型胜率"].map("{:.2f}%".format)
return result_df
def change_name(old,new):
collection.update_many(
{ "Model 1": old },
{ "$set": { "Model 1": new } }
)
# 更新 Model 2 字段
collection.update_many(
{ "Model 2": old },
{ "$set": { "Model 2": new } }
)