Spaces:
Running
Running
File size: 4,433 Bytes
8e67ebe 6863798 8e67ebe 6863798 8e67ebe d317f64 8e67ebe ce477d4 6863798 ce477d4 8e67ebe ce477d4 6863798 ce477d4 8e67ebe 985ab6e 8e67ebe ce477d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import logging
import os
import subprocess
import gradio as gr
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_leaderboard import Leaderboard, SelectColumns
from gradio_space_ci import enable_space_ci
from src.display.about import (
INTRODUCTION_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
AutoEvalColumn,
fields,
)
from src.envs import (
API,
H4_TOKEN,
REPO_ID,
RESET_JUDGEMENT_ENV,
)
from src.leaderboard.build_leaderboard import build_leadearboard_df
os.environ['GRADIO_ANALYTICS_ENABLED']='false'
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Start ephemeral Spaces on PRs (see config in README.md)
enable_space_ci()
def restart_space():
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
def build_demo():
demo = gr.Blocks(
title = "Chatbot Arena Leaderboard",
css=custom_css
)
leaderboard_df = build_leadearboard_df()
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
leaderboard = Leaderboard(
value=leaderboard_df,
datatype=[c.type for c in fields(AutoEvalColumn)],
select_columns=SelectColumns(
default_selection=[
c.name
for c in fields(AutoEvalColumn)
if c.displayed_by_default
],
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
label="Select Columns to Display:",
),
search_columns=[
AutoEvalColumn.model.name,
# AutoEvalColumn.fullname.name,
# AutoEvalColumn.license.name
],
)
#with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
#with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
with gr.Row():
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
with gr.Column():
model_name_textbox = gr.Textbox(label="Model name")
def upload_file(file):
file_path = file.name.split('/')[-1] if '/' in file.name else file.name
logging.info("New submition: file saved to %s", file_path)
API.upload_file(path_or_fileobj=file.name,path_in_repo='./external/'+file_path,repo_id='Vikhrmodels/openbench-eval',repo_type='dataset')
os.environ[RESET_JUDGEMENT_ENV] = '1'
return file.name
if model_name_textbox:
file_output = gr.File()
upload_button = gr.UploadButton("Click to Upload & Submit Answers", file_types=['*'], file_count="single")
upload_button.upload(upload_file, upload_button, file_output)
return demo
# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
# print(os.system('cd src/gen/ && python show_result.py --output'))
def update_board():
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
if need_reset != '1':
return
os.environ[RESET_JUDGEMENT_ENV] = '0'
subprocess.run(['python', 'src/gen/gen_judgement.py'], check = False)
subprocess.Popen('python3.src/gen/show_result.py --output')
if __name__ == "__main__":
os.environ[RESET_JUDGEMENT_ENV] = '1'
scheduler = BackgroundScheduler()
scheduler.add_job(update_board, "interval", minutes=10)
scheduler.start()
demo_app = build_demo()
demo_app.launch(debug=True)
|