import gradio as gr import pandas as pd import numpy as np # Dictionary mapping file extensions to full language names extension_to_language = { "clj": "Clojure", "cpp": "C++", "cs": "C#", "d": "D", "dart": "Dart", "elixir": "Elixir", "go": "Go", "hs": "Haskell", "java": "Java", "jl": "Julia", "js": "JavaScript", "lua": "Lua", "ml": "OCaml", "php": "PHP", "pl": "Perl", "r": "R", "rb": "Ruby", "rkt": "Racket", "rs": "Rust", "scala": "Scala", "sh": "Shell", "swift": "Swift", "ts": "TypeScript" } # Read the CSV file df = pd.read_csv('passk.csv') # Function to extract language and model from Dataset def extract_info(dataset): parts = dataset.split('-') language = parts[1] model = '-'.join(parts[2:-2]) return pd.Series({'Language': language, 'Model': model}) # Extract language and model information df[['Language', 'Model']] = df['Dataset'].apply(extract_info) # Create a dictionary to map models to friendly names model_to_friendly = { "starcoder2_15b": "StarCoder2-15B", "deepseekcoder_v2lite_base": "DeepSeekCoder2-Lite-Base" } # Function to get friendly name or original name if not in the dictionary def get_friendly_name(model): return model_to_friendly.get(model, model) # Create a pivot table pivot = df.pivot(index='Model', columns='Language', values='Estimate') # Get unique languages and models languages = sorted(pivot.columns) models = sorted(pivot.index) # Function to update the table based on selected languages def update_table(selected_languages): if not selected_languages: return pd.DataFrame({'Model': [get_friendly_name(model) for model in models]}) display_data = pivot[selected_languages].replace(np.nan, "-") display_data = display_data.applymap(lambda x: f"{x:.2f}" if isinstance(x, (int, float)) else x) # Add the Model column as the first column display_data.insert(0, 'Model', [get_friendly_name(model) for model in display_data.index]) # Reset the index to remove the model names from the index display_data = display_data.reset_index(drop=True) # Rename columns to full language names display_data.columns = ['Model'] + [extension_to_language.get(lang, lang) for lang in selected_languages] return display_data # Function to get initial table data def get_initial_table(): return update_table(languages) # Create the Gradio interface with gr.Blocks() as app: gr.Markdown(""" # MultiPL-E Results [MultiPL-E](https://huggingface.co/datasets/nuprl/MultiPL-E) is a dataset for evaluating large language models for code generation that supports several programming languages. It takes the OpenAI HumanEval and the Mostly Basic Python Programs (MBPP) benchmarks and uses little compilers to translate them to other languages. It is easy to add support for new languages and benchmarks. This table shows how some recent Code LLMs perform on MultiPL-HumanEval. We use the MultiPL-E 3.0 problems, which incorporates several fixes and supports several new programming languages. """) with gr.Row(): language_checkboxes = gr.CheckboxGroup( choices=[f"{extension_to_language[lang]} ({lang})" for lang in languages], label="Select Languages", value=[f"{extension_to_language[lang]} ({lang})" for lang in languages] ) table = gr.Dataframe( value=get_initial_table, headers=['Model'] + [extension_to_language.get(lang, lang) for lang in languages], type="pandas" ) def update_table_wrapper(selected_languages): # Extract language codes from the selected full names selected_codes = [lang.split('(')[-1].strip(')') for lang in selected_languages] return update_table(selected_codes) language_checkboxes.change(update_table_wrapper, inputs=[language_checkboxes], outputs=[table]) # Launch the app if __name__ == "__main__": app.launch()