arjunguha commited on
Commit
8cbb73d
1 Parent(s): 99a9bf4

Describe MultiPL-E

Browse files
Files changed (1) hide show
  1. app.py +16 -2
app.py CHANGED
@@ -44,7 +44,7 @@ df[['Language', 'Model']] = df['Dataset'].apply(extract_info)
44
  # Create a dictionary to map models to friendly names
45
  model_to_friendly = {
46
  "starcoder2_15b": "StarCoder2-15B",
47
- "deepseekcoder_v2lite": "DeepSeekCoder2-Lite"
48
  }
49
 
50
  # Function to get friendly name or original name if not in the dictionary
@@ -83,7 +83,21 @@ def get_initial_table():
83
 
84
  # Create the Gradio interface
85
  with gr.Blocks() as app:
86
- gr.Markdown("# Model Leaderboard")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  with gr.Row():
89
  language_checkboxes = gr.CheckboxGroup(
 
44
  # Create a dictionary to map models to friendly names
45
  model_to_friendly = {
46
  "starcoder2_15b": "StarCoder2-15B",
47
+ "deepseekcoder_v2lite_base": "DeepSeekCoder2-Lite-Base"
48
  }
49
 
50
  # Function to get friendly name or original name if not in the dictionary
 
83
 
84
  # Create the Gradio interface
85
  with gr.Blocks() as app:
86
+ gr.Markdown("""
87
+ # MultiPL-E Results
88
+
89
+ [MultiPL-E](https://huggingface.co/datasets/nuprl/MultiPL-E) is a dataset for
90
+ evaluating large language models for code generation that supports several
91
+ programming languages. It takes the OpenAI HumanEval and the Mostly Basic
92
+ Python Programs (MBPP) benchmarks and uses little compilers to translate them
93
+ to other languages. It is easy to add support for new languages and benchmarks.
94
+
95
+ This table shows how some recent Code LLMs perform on MultiPL-HumanEval.
96
+
97
+ We use the MultiPL-E 3.0 problems, which incorporates several fixes and
98
+ supports several new programming languages.
99
+
100
+ """)
101
 
102
  with gr.Row():
103
  language_checkboxes = gr.CheckboxGroup(