Spaces:
Running
Running
File size: 10,562 Bytes
8e67ebe 4ade002 8e67ebe a67391c 8e67ebe 69b4d58 8e67ebe 6863798 8e67ebe d6ca95d d4a2bc9 61a2dd5 d4a2bc9 61a2dd5 d4a2bc9 1e6bb64 d4a2bc9 f14b394 d4a2bc9 8e67ebe 3c69767 8e67ebe 6863798 34ecb22 d6ca95d 4ade002 8e67ebe d0e8be9 8e67ebe d0e8be9 8e67ebe 69b4d58 d0e8be9 8e67ebe d6ca95d 8e67ebe 69b4d58 8e67ebe ce477d4 0d47bf5 d6ca95d 6863798 69b4d58 ce477d4 d0e8be9 ce477d4 d0e8be9 ce477d4 d0e8be9 ce477d4 8e67ebe ce477d4 d0e8be9 6863798 d0e8be9 6863798 ce477d4 b2fa6ba 3f4d979 ce477d4 61a2dd5 d4a2bc9 61a2dd5 d0e8be9 61a2dd5 d0e8be9 d4a2bc9 f317a71 69b4d58 e7c6ec9 69b4d58 e133192 48fbcbf 69b4d58 d4a2bc9 d0e8be9 8e67ebe d0e8be9 8e67ebe 40646ba d0e8be9 ef23ff1 d0e8be9 137d615 6b2be75 d6ca95d 4dd39c5 4ade002 d6ca95d 31c09bf d6ca95d 31c09bf a0954a7 4ade002 d6ca95d 6af7972 34ecb22 b19c539 34ecb22 e348563 8e67ebe d0e8be9 8e67ebe 49350e3 8e4e85f 8e67ebe d0e8be9 ce477d4 e0ca2ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
import logging
import os
os.makedirs("tmp", exist_ok=True)
os.environ['TMP_DIR'] = "tmp"
import subprocess
import shutil
import gradio as gr
import numpy as np
from src.radial.radial import create_plot
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_leaderboard import Leaderboard, SelectColumns
from gradio_space_ci import enable_space_ci
import json
from io import BytesIO
def handle_file_upload(file):
file_path = file.name.split("/")[-1] if "/" in file.name else file.name
logging.info("File uploaded: %s", file_path)
with open(file.name, "r") as f:
v = json.load(f)
return v, file_path
def submit_file(v, file_path, mn, profile: gr.OAuthProfile | None):
if profile is None:
return "Hub Login Required"
new_file = v['results']
new_file['model'] = profile.username + "/" + mn
new_file['moviesmc'] = new_file['moviemc']["acc,none"]
new_file['musicmc'] = new_file['musicmc']["acc,none"]
new_file['booksmc'] = new_file['bookmc']["acc,none"]
new_file['mmluproru'] = new_file['mmluproru']["acc,none"]
new_file['lawmc'] = new_file['lawmc']["acc,none"]
new_file['model_dtype'] = v['config']["model_dtype"]
new_file['ppl'] = 0
new_file.pop('moviemc')
new_file.pop('bookmc')
buf = BytesIO()
buf.write(json.dumps(new_file).encode('utf-8'))
API.upload_file(
path_or_fileobj=buf,
path_in_repo="model_data/external/" + profile.username+mn + ".json",
repo_id="Vikhrmodels/s-openbench-eval",
repo_type="dataset",
)
os.environ[RESET_JUDGEMENT_ENV] = "1"
return "Success!"
from src.display.about import (
INTRODUCTION_TEXT,
TITLE,
LLM_BENCHMARKS_TEXT
)
from src.display.css_html_js import custom_css
from src.display.utils import (
AutoEvalColumn,
fields,
)
from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
import huggingface_hub
# huggingface_hub.login(token=H4_TOKEN)
os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
# Start ephemeral Spaces on PRs (see config in README.md)
enable_space_ci()
# download_openbench()
def restart_space():
API.restart_space(repo_id=REPO_ID)
download_openbench()
def update_plot(selected_models):
return create_plot(selected_models)
def build_demo():
download_openbench()
demo = gr.Blocks(title="Small Shlepa", css=custom_css)
leaderboard_df = build_leadearboard_df()
default_model = leaderboard_df["model"].iloc[0]
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons"):
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
Leaderboard(
value=leaderboard_df,
datatype=[c.type for c in fields(AutoEvalColumn)],
select_columns=SelectColumns(
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
label="Select Columns to Display:",
),
search_columns=[
AutoEvalColumn.model.name,
# AutoEvalColumn.fullname.name,
# AutoEvalColumn.license.name
],
)
# with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
with gr.Row():
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
with gr.Column():
# def upload_file(file,su,mn):
# file_path = file.name.split("/")[-1] if "/" in file.name else file.name
# logging.info("New submition: file saved to %s", file_path)
# with open(file.name, "r") as f:
# v=json.load(f)
# new_file = v['results']
# new_file['model'] = mn+"/"+su
# new_file['moviesmc']=new_file['moviemc']["acc,none"]
# new_file['musicmc']=new_file['musicmc']["acc,none"]
# new_file['booksmc']=new_file['bookmc']["acc,none"]
# new_file['lawmc']=new_file['lawmc']["acc,none"]
# # name = v['config']["model_args"].split('=')[1].split(',')[0]
# new_file['model_dtype'] = v['config']["model_dtype"]
# new_file['ppl'] = 0
# new_file.pop('moviemc')
# new_file.pop('bookmc')
# buf = BytesIO()
# buf.write(json.dumps(new_file).encode('utf-8'))
# API.upload_file(
# path_or_fileobj=buf,
# path_in_repo="model_data/external/" + su+mn + ".json",
# repo_id="Vikhrmodels/s-openbench-eval",
# repo_type="dataset",
# )
# os.environ[RESET_JUDGEMENT_ENV] = "1"
# return file.name
# gr.LoginButton()
model_name_textbox = gr.Textbox(label="Model name")
# submitter_username = gr.Textbox(label="Username")
# def toggle_upload_button(model_name, username):
# return bool(model_name) and bool(username)
file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
# upload_button = gr.Button("Click to Upload & Submit Answers", elem_id="upload_button",variant='primary')
uploaded_file = gr.State()
file_path = gr.State()
with gr.Row():
with gr.Column():
out = gr.Textbox("Статус отправки")
with gr.Column():
login_button = gr.LoginButton(elem_id="oauth-button")
submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
file_output.upload(
handle_file_upload,
file_output,
[uploaded_file, file_path]
)
submit_button.click(
submit_file,
[uploaded_file, file_path, model_name_textbox],
[out]
)
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
with gr.Column():
model_dropdown = gr.Dropdown(
choices=leaderboard_df["model"].tolist(),
label="Models",
value=leaderboard_df["model"].tolist(),
multiselect=True,
info="Select models"
)
with gr.Column():
plot = gr.Plot(update_plot(model_dropdown.value))
# plot = gr.Plot()
model_dropdown.change(
fn=update_plot,
inputs=[model_dropdown],
outputs=[plot]
)
return demo
# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
# print(os.system('cd src/gen/ && python show_result.py --output'))
def update_board():
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
logging.info("Updating the judgement: %s", need_reset)
if need_reset != "1":
# return
pass
os.environ[RESET_JUDGEMENT_ENV] = "0"
# `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
# recursively. In this specific context, it is used to delete the directory named "m_data" along
# with all its files and subdirectories. This command helps in cleaning up the existing data in
# the "m_data" directory before downloading new dataset files into it.
shutil.rmtree("./m_data")
shutil.rmtree("./data")
download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
import glob
data_list = [{"musicmc": 0.3021276595744681, "lawmc": 0.2800829875518672, "model": "apsys/saiga_3_8b", "moviesmc": 0.3472222222222222, "booksmc": 0.2800829875518672, "model_dtype": "torch.float16", "ppl": 0, 'mmluproru':0}]
for file in glob.glob("./m_data/model_data/external/*.json"):
with open(file) as f:
try:
data = json.load(f)
data_list.append(data)
except Exception as e:
pass # data was badly formatted, should not fail
print("DATALIST,", data_list)
if len(data_list)>1:
data_list.pop(0)
with open("genned.json", "w") as f:
json.dump(data_list, f)
API.upload_file(
path_or_fileobj="genned.json",
path_in_repo="leaderboard.json",
repo_id="Vikhrmodels/s-shlepa-metainfo",
repo_type="dataset",
)
restart_space()
# gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
# subprocess.run(["python3", gen_judgement_file], check=True)
if __name__ == "__main__":
os.environ[RESET_JUDGEMENT_ENV] = "1"
scheduler = BackgroundScheduler()
# update_board()
scheduler.add_job(update_board, "interval", minutes=1)
scheduler.start()
demo_app = build_demo()
demo_app.launch(debug=True,share=True)
|