kaz-llm-lb

Running

App Files Files Community

kaz-llm-lb / app.py

apsys

fix

2274e1b 6 months ago

raw

history blame

6.68 kB

	import logging
	import os
	os.makedirs("tmp", exist_ok=True)
	os.environ['TMP_DIR'] = "tmp"
	import subprocess

	import gradio as gr
	from apscheduler.schedulers.background import BackgroundScheduler
	from gradio_leaderboard import Leaderboard, SelectColumns
	from gradio_space_ci import enable_space_ci
	import json
	from io import BytesIO


	from src.display.about import (
	INTRODUCTION_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css
	from src.display.utils import (
	AutoEvalColumn,
	fields,
	)
	from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
	from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
	import huggingface_hub
	# huggingface_hub.login(token=H4_TOKEN)

	os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"

	# Configure logging
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

	# Start ephemeral Spaces on PRs (see config in README.md)
	enable_space_ci()


	download_openbench()

	def restart_space():
	API.restart_space(repo_id=REPO_ID)
	download_openbench()


	def build_demo():
	demo = gr.Blocks(title="Small Shlepa", css=custom_css)
	leaderboard_df = build_leadearboard_df()
	with demo:
	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons"):
	with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
	Leaderboard(
	value=leaderboard_df,
	datatype=[c.type for c in fields(AutoEvalColumn)],
	select_columns=SelectColumns(
	default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
	cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
	label="Select Columns to Display:",
	),
	search_columns=[
	AutoEvalColumn.model.name,
	# AutoEvalColumn.fullname.name,
	# AutoEvalColumn.license.name
	],
	)

	# with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
	# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
	# with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
	# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")

	with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
	with gr.Row():
	gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")

	with gr.Column():
	model_name_textbox = gr.Textbox(label="Model name")
	submitter_username = gr.Textbox(label="Username")

	def upload_file(file,su,mn):
	file_path = file.name.split("/")[-1] if "/" in file.name else file.name
	logging.info("New submition: file saved to %s", file_path)
	with open(file.name, "r") as f:
	v=json.load(f)
	new_file = v['results']
	new_file['model'] = mn+"/"+su
	new_file['moviesmc']=new_file['moviemc']["acc,none"]
	new_file['musicmc']=new_file['musicmc']["acc,none"]
	new_file['booksmc']=new_file['bookmc']["acc,none"]
	new_file['lawmc']=new_file['lawmc']["acc,none"]
	# name = v['config']["model_args"].split('=')[1].split(',')[0]
	new_file['model_dtype'] = v['config']["model_dtype"]
	new_file['ppl'] = 0
	new_file.pop('moviemc')
	new_file.pop('bookmc')
	buf = BytesIO()
	buf.write(json.dumps(new_file).encode('utf-8'))
	API.upload_file(
	path_or_fileobj=buf,
	path_in_repo="model_data/external/" + su+mn + ".json",
	repo_id="Vikhrmodels/s-openbench-eval",
	repo_type="dataset",
	)
	os.environ[RESET_JUDGEMENT_ENV] = "1"
	return file.name

	if model_name_textbox and submitter_username:
	file_output = gr.File()
	upload_button = gr.UploadButton(
	"Click to Upload & Submit Answers", file_types=["*"], file_count="single"
	)
	upload_button.upload(upload_file, [upload_button,model_name_textbox,submitter_username], file_output)

	return demo


	# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
	# print(os.system('cd src/gen/ && python show_result.py --output'))


	def update_board():
	need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
	logging.info("Updating the judgement: %s", need_reset)
	if need_reset != "1":
	return
	os.environ[RESET_JUDGEMENT_ENV] = "0"
	import shutil
	shutil.rmtree("m_data")
	shutil.rmtree("data")
	download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
	import glob
	data_list = [{"musicmc": 0.3021276595744681, "lawmc": 0.2800829875518672, "model": "apsys/saiga_3_8b", "moviesmc": 0.3472222222222222, "booksmc": 0.2800829875518672, "model_dtype": "torch.float16", "ppl": 0}]
	for file in glob.glob("./m_data/model_data/external/*.json"):
	with open(file) as f:
	try:
	data = json.load(f)
	data_list.append(data)
	except:
	continue
	if len(data_list) >1:
	data_list.pop(0)
	with open("genned.json", "w") as f:
	json.dump(data_list, f)


	API.upload_file(
	path_or_fileobj="genned.json",
	path_in_repo="leaderboard.json",
	repo_id="Vikhrmodels/s-shlepa-metainfo",
	repo_type="dataset",
	)
	restart_space()

	# gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
	# subprocess.run(["python3", gen_judgement_file], check=True)



	if __name__ == "__main__":
	os.environ[RESET_JUDGEMENT_ENV] = "1"

	scheduler = BackgroundScheduler()
	scheduler.add_job(update_board, "interval", minutes=1)
	scheduler.start()

	demo_app = build_demo()
	demo_app.launch(debug=True)