Describe MultiPL-E
Browse files
app.py
CHANGED
@@ -44,7 +44,7 @@ df[['Language', 'Model']] = df['Dataset'].apply(extract_info)
|
|
44 |
# Create a dictionary to map models to friendly names
|
45 |
model_to_friendly = {
|
46 |
"starcoder2_15b": "StarCoder2-15B",
|
47 |
-
"
|
48 |
}
|
49 |
|
50 |
# Function to get friendly name or original name if not in the dictionary
|
@@ -83,7 +83,21 @@ def get_initial_table():
|
|
83 |
|
84 |
# Create the Gradio interface
|
85 |
with gr.Blocks() as app:
|
86 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
with gr.Row():
|
89 |
language_checkboxes = gr.CheckboxGroup(
|
|
|
44 |
# Create a dictionary to map models to friendly names
|
45 |
model_to_friendly = {
|
46 |
"starcoder2_15b": "StarCoder2-15B",
|
47 |
+
"deepseekcoder_v2lite_base": "DeepSeekCoder2-Lite-Base"
|
48 |
}
|
49 |
|
50 |
# Function to get friendly name or original name if not in the dictionary
|
|
|
83 |
|
84 |
# Create the Gradio interface
|
85 |
with gr.Blocks() as app:
|
86 |
+
gr.Markdown("""
|
87 |
+
# MultiPL-E Results
|
88 |
+
|
89 |
+
[MultiPL-E](https://huggingface.co/datasets/nuprl/MultiPL-E) is a dataset for
|
90 |
+
evaluating large language models for code generation that supports several
|
91 |
+
programming languages. It takes the OpenAI HumanEval and the Mostly Basic
|
92 |
+
Python Programs (MBPP) benchmarks and uses little compilers to translate them
|
93 |
+
to other languages. It is easy to add support for new languages and benchmarks.
|
94 |
+
|
95 |
+
This table shows how some recent Code LLMs perform on MultiPL-HumanEval.
|
96 |
+
|
97 |
+
We use the MultiPL-E 3.0 problems, which incorporates several fixes and
|
98 |
+
supports several new programming languages.
|
99 |
+
|
100 |
+
""")
|
101 |
|
102 |
with gr.Row():
|
103 |
language_checkboxes = gr.CheckboxGroup(
|