Spaces:

nuprl
/

MultiPL-E

Sleeping

App Files Files Community

MultiPL-E / app.py

arjunguha

Two digits of precision

1d6280a 3 months ago

raw

history blame

4 kB

	import gradio as gr
	import pandas as pd
	import numpy as np

	# Dictionary mapping file extensions to full language names
	extension_to_language = {
	"clj": "Clojure",
	"cpp": "C++",
	"cs": "C#",
	"d": "D",
	"elixir": "Elixir",
	"go": "Go",
	"hs": "Haskell",
	"java": "Java",
	"jl": "Julia",
	"js": "JavaScript",
	"lua": "Lua",
	"ml": "OCaml",
	"php": "PHP",
	"pl": "Perl",
	"r": "R",
	"rb": "Ruby",
	"rkt": "Racket",
	"rs": "Rust",
	"scala": "Scala",
	"sh": "Shell",
	"swift": "Swift",
	"ts": "TypeScript"
	}

	# Read the CSV file
	df = pd.read_csv('passk.csv')

	# Function to extract language and model from Dataset
	def extract_info(dataset):
	parts = dataset.split('-')
	language = parts[1]
	model = '-'.join(parts[2:-2])
	return pd.Series({'Language': language, 'Model': model})

	# Extract language and model information
	df[['Language', 'Model']] = df['Dataset'].apply(extract_info)

	# Create a dictionary to map models to friendly names
	model_to_friendly = {
	"starcoder2_15b": "StarCoder2-15B",
	"deepseekcoder_v2lite_base": "DeepSeekCoder2-Lite-Base"
	}

	# Function to get friendly name or original name if not in the dictionary
	def get_friendly_name(model):
	return model_to_friendly.get(model, model)

	# Create a pivot table
	pivot = df.pivot(index='Model', columns='Language', values='Estimate')

	# Get unique languages and models
	languages = sorted(pivot.columns)
	models = sorted(pivot.index)

	# Function to update the table based on selected languages
	def update_table(selected_languages):
	if not selected_languages:
	return pd.DataFrame({'Model': [get_friendly_name(model) for model in models]})

	display_data = pivot[selected_languages].replace(np.nan, "-")
	display_data = display_data.applymap(lambda x: f"{x:.2f}" if isinstance(x, (int, float)) else x)

	# Add the Model column as the first column
	display_data.insert(0, 'Model', [get_friendly_name(model) for model in display_data.index])

	# Reset the index to remove the model names from the index
	display_data = display_data.reset_index(drop=True)

	# Rename columns to full language names
	display_data.columns = ['Model'] + [extension_to_language.get(lang, lang) for lang in selected_languages]

	return display_data

	# Function to get initial table data
	def get_initial_table():
	return update_table(languages)

	# Create the Gradio interface
	with gr.Blocks() as app:
	gr.Markdown("""
	# MultiPL-E Results

	[MultiPL-E](https://huggingface.co/datasets/nuprl/MultiPL-E) is a dataset for
	evaluating large language models for code generation that supports several
	programming languages. It takes the OpenAI HumanEval and the Mostly Basic
	Python Programs (MBPP) benchmarks and uses little compilers to translate them
	to other languages. It is easy to add support for new languages and benchmarks.

	This table shows how some recent Code LLMs perform on MultiPL-HumanEval.

	We use the MultiPL-E 3.0 problems, which incorporates several fixes and
	supports several new programming languages.

	""")

	with gr.Row():
	language_checkboxes = gr.CheckboxGroup(
	choices=[f"{extension_to_language[lang]} ({lang})" for lang in languages],
	label="Select Languages",
	value=[f"{extension_to_language[lang]} ({lang})" for lang in languages]
	)

	table = gr.Dataframe(
	value=get_initial_table,
	headers=['Model'] + [extension_to_language.get(lang, lang) for lang in languages],
	type="pandas"
	)

	def update_table_wrapper(selected_languages):
	# Extract language codes from the selected full names
	selected_codes = [lang.split('(')[-1].strip(')') for lang in selected_languages]
	return update_table(selected_codes)

	language_checkboxes.change(update_table_wrapper, inputs=[language_checkboxes], outputs=[table])

	# Launch the app
	if __name__ == "__main__":
	app.launch()