import gradio as gr import pandas as pd import os import itertools from constants import metric_dict, tags, columns # Download from github and load the data # TODO: Download every x hours def download_data(url = "https://github.com/Pranjal2041/GEO/GEO-Bench/leaderboard/leaderboard.jsonl", path = "leaderboard.jsonl"): ret_code = os.system(f'wget {url} -O {path}_tmp') if ret_code != 0: return ret_code os.system(f'mv {path}_tmp {path}') return 0 def search_leaderboard(df, queries): # Assuming DATA_OVERALL is the DataFrame containing the leaderboard data # filtered_data = df[df["Method"].str.contains(query, case=False, na=False)] temp_pds = [] for query in queries: temp_pds.append(df[df["Method"].str.contains(query, case=False, na=False)]) return pd.concat(temp_pds).drop_duplicates() def search_tags_leaderboard(df, tag_blocks, queries): return search_leaderboard(filter_tags(df, tag_blocks), queries) def filter_tags(df, tag_blocks): def fuzzy_in(x, y_set): return any(x in z for z in y_set) all_tags_sets = [set(tag.lower() for tag in tag_block) for tag_block in tag_blocks] filtered_rows = [i for i, tags in enumerate(complete_dt['tags']) if all('any' in tag_set or any(fuzzy_in(tag.lower(), tag_set) for tag in tags) for tag_set in all_tags_sets)] return prepare_complete_dt(df.iloc[filtered_rows]) def prepare_complete_dt(complete_dt): data = [] DATA_OVERALL = complete_dt.copy() for Method in set(complete_dt['Method']): data.append([]) data[-1].append(Method) for metric in metric_dict: metric_val = metric_dict[metric] data[-1].append(complete_dt[complete_dt['Method'] == Method][metric_val].mean()) data[-1].append(complete_dt[complete_dt['Method'] == Method]['source'].iloc[0]) DATA_OVERALL = pd.DataFrame(data, columns=columns) try: DATA_OVERALL.sort_values(by=['WordPos Overall'], inplace=True, ascending=False) except: ... return DATA_OVERALL def format_df_for_leaderboard(df): # The source column needs to be embedded directly into the Method column using appropriate markdown. df['Method'] = df[['source', 'Method']].apply(lambda x: f'{x[1]}', axis=1) # Convert all float metrics to 1 decimal df_copy = df.copy() for metric in metric_dict: df_copy[metric] = df_copy[metric].apply(lambda x: float(f'{(100*x):.1f}')) # drop the source column return df_copy.drop(columns=['source']) ret_code = 0 # ret_code = download_data() if ret_code != 0: print("Leaderboard Download failed") complete_dt = pd.read_json('leaderboard.jsonl', lines=True, orient='records') DATA_OVERALL = prepare_complete_dt(complete_dt) with gr.Blocks() as demo: demo_content = """

GEO-Bench Leaderboard

- For benchmarking content optimization Methods for Generative Engines.
- GEO-Bench evaluates Methods for optimizing website content to improve visibility in generative engine responses. Benchmark contains 10K queries across 9 datasets covering diverse domains and intents.
- Refer to GEO paper for more details

""" gr.HTML(demo_content) with gr.Tabs(): with gr.TabItem('Overall 📊'): with gr.Row(): gr.Markdown('## Overall Leaderboard') with gr.Row(): data_overall = gr.components.Dataframe( format_df_for_leaderboard(DATA_OVERALL), datatype=["markdown"] + ["number"] * (len(DATA_OVERALL.columns) - 2) + ['markdown'], type="pandas", wrap=True, interactive=False, ) # data_overall. with gr.Row(): # search_bar = gr.Textbox(type="text", label="Search for a Method:") search_bar = gr.Textbox( placeholder=" 🔍 Search for your Method (separate multiple queries with `,`) and press ENTER...", show_label=False, elem_id="search-bar", ) def search_button_click(query): filtered_data = search_leaderboard(DATA_OVERALL, [x.strip() for x in query.split(',')]) return format_df_for_leaderboard(filtered_data) with gr.TabItem('Tag-Wise Results 📊'): with gr.Row(): gr.Markdown(f""" ## Tag-Wise Results - The following table shows the results for each tag. - The tags are sorted in the order of their performance. - The table is sorted in the order of the overall score. """) with gr.Row(): search_bar_tag = gr.Textbox( placeholder=" 🔍 Search for your Method (separate multiple queries with `,`) and press ENTER...", show_label=False, elem_id="search-bar", ) def search_button_click(query): filtered_data = search_leaderboard(DATA_OVERALL, [x.strip() for x in query.split(',')]) return format_df_for_leaderboard(filtered_data) with gr.Row(): boxes = dict() with gr.Column(min_width=320): for tag in list(tags.keys())[:3]: with gr.Box(elem_id="box-filter"): boxes[tag] = gr.CheckboxGroup( label=tag, choices=tags[tag], value=tags[tag], interactive=True, elem_id=f"filter-{tag}", ) with gr.Column(min_width=320): for tag in list(tags.keys())[4:]: with gr.Box(elem_id="box-filter"): boxes[tag] = gr.CheckboxGroup( label=tag, choices=tags[tag], value=tags[tag], interactive=True, elem_id=f"filter-{tag}", ) with gr.Row(): tag = list(tags.keys())[3] with gr.Box(elem_id="box-filter"): boxes[tag] = gr.CheckboxGroup( label=tag, choices=tags[tag], value=tags[tag], interactive=True, elem_id=f"filter-{tag}", ) with gr.Row(): data_tag_wise = gr.components.Dataframe( format_df_for_leaderboard(DATA_OVERALL), datatype=["markdown"] + ["number"] * (len(DATA_OVERALL.columns) - 2) + ['markdown'], type="pandas", wrap=True, interactive=False, ) def filter_tag_click(*boxes): return format_df_for_leaderboard(filter_tags(complete_dt, list(boxes))) def search_tag_click(query, *boxes): return format_df_for_leaderboard(search_tags_leaderboard(complete_dt, list(boxes), [x.strip() for x in query.split(',')])) for box in boxes: boxes[box].change(fn=filter_tag_click, inputs=list(boxes.values()), outputs=data_tag_wise) search_bar_tag.submit(fn=search_tag_click, inputs=[search_bar_tag] + list(boxes.values()), outputs=data_tag_wise) with gr.TabItem('About GEO-bench 📖'): with gr.Row(): gr.Markdown(f""" ## About GEO-bench - GEO-bench is a benchmarking platform for content optimization Methods for generative engines. - It is a part of the work released under [GEO](https://arxiv.org/abs/2311.09735) - The benchmark comprises of 9 datasets, 7 of which were publicly available, while 2 have been released by us. - Dataset can be downloaded from [here](huggingface.co/datasets/GEO-optim/geo-bench)""") with gr.Row(): # Goal of benchmarking content optimization for generative engines # Contains 10K carefully curated queries # Queries are diverse and cover many domains/intents # Annotated with tags/dimensions like domain, difficulty, etc. # Above list in HTML format gr.HTML(f"""

Key-Highlights of GEO-bench

Goal of benchmarking content optimization for generative engines
Contains 10K carefully curated queries
Queries are diverse and cover many domains/intents
Annotated with tags/dimensions like domain, difficulty, etc.

""") # Benchmark Link: # gr.Markdown(f"""### Benchmark Link: [GEO-bench](huggingface.co/datasets/GEO-optim/geo-bench)""") # Info about tags and other statistics with gr.TabItem('Submit 📝'): with gr.Row(): gr.Markdown(f""" ## Submit - To submit your Method, please check [here](github.com/Pranjal2041/GEO/GEO-Bench/leaderboard/Readme.md)""") # Create a form to submit, the response should be sent to a google form search_bar.submit(fn=search_button_click, inputs=search_bar, outputs=data_overall) if __name__ == "__main__": demo.launch(share=True)