File size: 4,297 Bytes
3d13fcf 1fdf681 3d13fcf 1fdf681 af6c470 6cf5862 af6c470 1fdf681 af6c470 1fdf681 af6c470 1fdf681 af6c470 1fdf681 3d13fcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import gistyc
import requests
from dataclasses import dataclass
import re
import streamlit as st
@dataclass
class GistInfo:
gist_id: str
filename: str
url: str
model_name: str
model_id: str
model: str
agieval: float
gpt4all: float
truthfulqa: float
bigbench: float
average: float
def update_gist(content, gist_id, access_token):
"""
Update the content of a GitHub Gist.
Args:
content (str): The new content of the gist.
gist_id (str): The ID of the gist to update.
access_token (str): GitHub personal access token with gist permissions.
"""
api_url = f"https://api.github.com/gists/{gist_id}"
headers = {
"Authorization": f"token {access_token}",
"Accept": "application/vnd.github.v3+json"
}
data = {
"files": {
"YALL - Yet Another LLM Leaderboard.md": {
"content": content
}
}
}
response = requests.patch(api_url, json=data, headers=headers)
if response.status_code == 200:
print("Gist updated successfully.")
else:
print("Failed to update gist. Status code:", response.status_code)
print("Response:", response.json())
@st.cache_data
def create_yall():
# Get token
GITHUB_API_TOKEN = os.environ.get("github")
# Retrieve all gists
gist_api = gistyc.GISTyc(auth_token=GITHUB_API_TOKEN)
data = gist_api.get_gists()
# List to store the GistInfo objects
gist_infos = []
for data_dict in data:
if 'files' not in data_dict or not data_dict['files']:
continue
file_info = next(iter(data_dict['files'].values()))
filename = file_info['filename']
if not filename.endswith("-Nous.md"):
continue
raw_url = file_info['raw_url']
response = requests.get(raw_url)
if response.status_code != 200 or "Error: File does not exist" in response.text:
continue
# Parse the markdown table
lines = response.text.split('\n')
if len(lines) < 3:
continue
values = lines[2].split('|')[1:-1]
# Extract model name and model id using regular expression
model_match = re.search(r'\[([^\]]+)\]\(https://huggingface.co/([^/]+)/([^)]+)\)', values[0].strip())
if model_match:
model_name = model_match.group(1)
model_id = f"{model_match.group(2)}/{model_match.group(3)}"
print(values[0].strip())
print(model_name)
print(model_id)
print("=============")
else:
model_name = model_id = 'Unknown'
# Calculate the average
try:
agieval = float(values[1].strip())
gpt4all = float(values[2].strip())
bigbench = float(values[4].strip())
average = round((agieval + gpt4all + bigbench) / 3, 2)
except (ValueError, IndexError) as e:
print(f"Error processing values: {e}")
continue
# Create a GistInfo object and add it to the list
gist_info = GistInfo(
gist_id=data_dict['id'],
filename=filename,
url=data_dict['html_url'],
model_name=model_name,
model_id=model_id,
model=values[0].strip(),
agieval=agieval,
gpt4all=gpt4all,
truthfulqa=float(values[3].strip()),
bigbench=bigbench,
average=average,
)
gist_infos.append(gist_info)
# Sort the list by average
gist_infos = sorted(gist_infos, key=lambda x: x.average, reverse=True)
# Create markdown table
markdown_table = "| Model | Average | AGIEval | GPT4All | TruthfulQA | Bigbench |\n"
markdown_table += "|---|---:|---:|---:|---:|---:|\n"
for gist in gist_infos:
model_link = f"[{gist.model_id}](https://huggingface.co/{gist.model_id})"
markdown_table += f"| {model_link} [π]({gist.url}) | {gist.average} | {gist.agieval} | {gist.gpt4all} | {gist.truthfulqa} | {gist.bigbench} |\n"
# Update YALL's gist
update_gist(content=markdown_table, gist_id="90294929a2dbcb8877f9696f28105fdf", access_token=GITHUB_API_TOKEN)
return markdown_table |