import gradio as gr import pandas as pd import os from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import HfApi from uploads import add_new_eval CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r"""@inproceedings{kumar-etal-2024-booksql, title = "BookSQL: A Large Scale Text-to-SQL Dataset for Accounting Domain", author = "Kumar, Rahul and Raja, Amar and Harsola, Shrutendra and Subrahmaniam, Vignesh and Modi, Ashutosh", booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics", month = "march", year = "2024", address = "Mexico City, Mexico", publisher = "Association for Computational Linguistics" }""" api = HfApi() TOKEN = os.environ.get("TOKEN", None) LEADERBOARD_PATH = f"Exploration-lab/BookSQL-Leaderboard" def restart_space(): api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN) # Function to load data from a given CSV file def baseline_load_data(tasks): # version = version.replace("%", "p") file_path = f"submissions/baseline/baseline.csv" # Replace with your file paths df = pd.read_csv(file_path) # we only want specific columns and in a specific order # column_names = [ # "Method", # "Submitted By", # "L-NER", # "RR", # "CJPE", # "BAIL", # "LSI", # "PCR", # "SUMM", # "Average", # ] column_names = [ "Method", "Submitted By", "EMA", "EX", "BLEU-4", "ROUGE-L" ] if tasks is None: breakpoint() # based on the tasks, remove the columns that are not needed if "EMA" not in tasks: column_names.remove("EMA") if "EX" not in tasks: column_names.remove("EX") if "BLEU-4" not in tasks: column_names.remove("BLEU-4") if "ROUGE-L" not in tasks: column_names.remove("ROUGE-L") df = df[column_names] # df = df.sort_values(by="Average", ascending=False) df = df.drop_duplicates(subset=["Method"], keep="first") return df def load_data(tasks): baseline_df = baseline_load_data(tasks) return baseline_df # Function for searching in the leaderboard def search_leaderboard(df, query): if query == "": return df else: return df[df["Method"].str.contains(query)] # Function to change the version of the leaderboard def change_version(tasks): new_df = load_data(tasks) return new_df # Initialize Gradio app demo = gr.Blocks() with demo: gr.Markdown( """ ## 🥇 BookSQL Leaderboard Given the importance and wide prevalence of business databases across the world, the proposed dataset, [BookSQL](https://arxiv.org/abs/2406.07860) focuses on the finance and accounting domain. Accounting databases are used across a wide spectrum of industries like construction, healthcare, retail, educational services, insurance, restaurant, real estate, etc. Business in these industries arranges their financial transactions into their own different set of categories (called a chart of accounts Industry Details in accounting terminology. Text-to-SQL system developed on BookSQL will be robust at handling various types of accounting databases. The total size of the dataset is 1 million. The dataset is prepared under financial experts' supervision, and the dataset's statistics are provided in below table. The dataset consists of 27 businesses, and each business has around 35k - 40k transactions Read more at [https://github.com/Exploration-Lab/BookSQL](https://github.com/Exploration-Lab/BookSQL). Please follow this format for uploading prediction file (https://huggingface.co/spaces/Exploration-Lab/BookSQL-Leaderboard/blob/main/sample_prediction.csv) """ ) with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", show_copy_button=True, ) # .style(show_copy_button=True) with gr.Tabs(): with gr.TabItem("Leaderboard"): with gr.Row(): # tasks_checkbox = gr.CheckboxGroup( # label="Select Tasks", # choices=["L-NER", "RR", "CJPE", "BAIL", "LSI", "PCR", "SUMM"], # value=["L-NER", "RR", "CJPE", "BAIL", "LSI", "PCR", "SUMM"], # ) tasks_checkbox = gr.CheckboxGroup( label="Select Tasks", choices=["EMA","EX","BLEU-4","ROUGE-L"], value=["EMA","EX","BLEU-4","ROUGE-L"], ) with gr.Row(): search_bar = gr.Textbox( placeholder="Search for methods...", show_label=False, ) leaderboard_table = gr.components.Dataframe( value=load_data( # "baseline", ["EMA","EX","BLEU-4","ROUGE-L"], ), interactive=True, visible=True, ) # version_dropdown.change( # change_version, # inputs=[model_dropdown, version_dropdown, tasks_checkbox], # outputs=leaderboard_table, # ) # model_dropdown.change( # change_version, # inputs=[model_dropdown, version_dropdown, tasks_checkbox], # outputs=leaderboard_table, # ) search_bar.change( search_leaderboard, inputs=[ leaderboard_table, search_bar, # tasks_checkbox ], outputs=leaderboard_table, ) tasks_checkbox.change( change_version, inputs=[tasks_checkbox], outputs=leaderboard_table, ) with gr.Accordion("Submit a new model for evaluation"): with gr.Row(): with gr.Column(): method_name_textbox = gr.Textbox(label="Method name") url_textbox = gr.Textbox(label="Url to model information") with gr.Column(): organisation = gr.Textbox(label="Organisation") mail = gr.Textbox(label="Contact email") file_output = gr.File() submit_button = gr.Button("Submit Eval") submission_result = gr.Markdown() submit_button.click( add_new_eval, [ method_name_textbox, url_textbox, file_output, organisation, mail, ], submission_result, ) gr.Markdown( """ ## Quick Links - [**GitHub Repository**](https://github.com/exploration-lab/BookSQL): Access the source code, fine-tuning scripts, and additional resources for the BookSQL dataset. - [**arXiv Paper**](https://arxiv.org/abs/2406.07860): Detailed information about the BookSQL dataset and its significance in unlearning tasks. - [**Dataset on Hugging Face**](https://huggingface.co/datasets/Exploration-Lab/BookSQL): Direct link to download the BookSQL dataset. """ ) # scheduler = BackgroundScheduler() # scheduler.add_job(restart_space, "interval", seconds=1800) # scheduler.start() # demo.queue(default_concurrency_limit=40).launch() # demo.launch() scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=3600) scheduler.start() # demo.launch(debug=True) demo.launch(share=True)