Spaces:

mlabonne
/

llm-auto-eval

Running

File size: 4,140 Bytes

1700f59
 
 
 
 
 
64c6ce8
249c6e0
1d809c2
bb5768c
249c6e0
bb5768c
1700f59
 
44b82da
79284c5
1700f59
44b82da
1700f59
 
 
 
 
 
 
 
 
44b82da
1700f59
 
 
 
 
 
 
2ee60e6
1700f59
 
64c6ce8
1700f59
 
2b13d05
486f0f6
7580a4a
44b82da
2696f89
1700f59
64c6ce8
1700f59
2696f89
 
1700f59
 
2ee60e6
6c7d2a6
1700f59

import runpod
import gradio as gr

GPU_LIST = ["NVIDIA A100 80GB PCIe", "NVIDIA A100-SXM4-80GB", "NVIDIA A30", "NVIDIA A40", "NVIDIA GeForce RTX 3070", "NVIDIA GeForce RTX 3080", "NVIDIA GeForce RTX 3080 Ti", "NVIDIA GeForce RTX 3090", "NVIDIA GeForce RTX 3090 Ti", "NVIDIA GeForce RTX 4070 Ti", "NVIDIA GeForce RTX 4080", "NVIDIA GeForce RTX 4090", "NVIDIA H100 80GB HBM3", "NVIDIA H100 PCIe", "NVIDIA L4", "NVIDIA L40", "NVIDIA RTX 4000 Ada Generation", "NVIDIA RTX 4000 SFF Ada Generation", "NVIDIA RTX 5000 Ada Generation", "NVIDIA RTX 6000 Ada Generation", "NVIDIA RTX A2000", "NVIDIA RTX A4000", "NVIDIA RTX A4500", "NVIDIA RTX A5000", "NVIDIA RTX A6000", "Tesla V100-FHHL-16GB", "Tesla V100-PCIE-16GB", "Tesla V100-SXM2-16GB", "Tesla V100-SXM2-32GB"]

TITLE = """
<div align="center">
    <p style="font-size: 36px;">🧐 LLM AutoEval</p>
    <p style="font-size: 20px;">💻 <a href="https://github.com/mlabonne/llm-autoeval">GitHub</a> • 📝 <a href="https://colab.research.google.com/drive/1Igs3WZuXAIv9X0vwqiE90QlEPys8e8Oa?usp=sharing">Colab notebook</a></p>
</div><br/>
<p><strong>Automatically evaluate your LLMs</strong> using <a href="https://www.runpod.io/console/pods">RunPod</a>. If you don't have an account, please consider using my <a href="https://runpod.io?ref=9nvk2srl">referral link</a>.</p>
<p><em>Once a pod has started, you can safely close this tab. The results are then privately uploaded to <a href="https://gist.github.com/">GitHub Gist</a>, and the pod is automatically destroyed.</em></p>
"""

def autoeval(BENCHMARK, MODEL_ID, GPU, NUMBER_OF_GPUS, CONTAINER_DISK, CLOUD_TYPE, REPO, TRUST_REMOTE_CODE, DEBUG, GITHUB_API_TOKEN, RUNPOD_TOKEN):
    runpod.api_key = RUNPOD_TOKEN
    pod = runpod.create_pod(
        name=f"Eval {MODEL_ID.split('/')[-1]} on {BENCHMARK.capitalize()}",
        image_name="runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel-ubuntu22.04",
        gpu_type_id=GPU,
        cloud_type=CLOUD_TYPE,
        gpu_count=NUMBER_OF_GPUS,
        volume_in_gb=0,
        container_disk_in_gb=CONTAINER_DISK,
        template_id="au6nz6emhk",
        env={
            "BENCHMARK": BENCHMARK,
            "MODEL_ID": MODEL_ID,
            "REPO": REPO,
            "TRUST_REMOTE_CODE": TRUST_REMOTE_CODE,
            "DEBUG": DEBUG,
            "GITHUB_API_TOKEN": GITHUB_API_TOKEN,
        }
    )
    
    return "Evaluation started!"

with gr.Blocks() as demo:
    gr.HTML(TITLE)
    inputs = [
        gr.Dropdown(["nous", "openllm"], label="Benchmark", info="Select your benchmark suite", value="nous"),
        gr.Textbox("", label="Model", value="mlabonne/NeuralBeagle14-7B", info="ID of the model you want to evaluate", placeholder="mlabonne/NeuralBeagle14-7B"),
        gr.Dropdown(GPU_LIST, label="GPU", value="NVIDIA GeForce RTX 3090", info="Select your GPU to run the evaluation"),
        gr.Slider(minimum=1, maximum=8, value=1, step=1, label="Number of GPUs", info="Number of GPUs to use"),
        gr.Slider(minimum=50, maximum=500, value=75, step=25, label="Container disk", info="Size of the container disk in GB"),
        gr.Dropdown(["COMMUNITY", "SECURE"], value="COMMUNITY", label="Cloud type", info="Select your cloud type"),
        gr.Textbox("https://github.com/mlabonne/llm-autoeval.git", label="LLM AutoEval repo", info="Link to your LLM AutoEval repo"),
        gr.Checkbox(label="Trust remote code", value=False, info="Required for some models like phi-2"),
        gr.Checkbox(label="Debug", value=False, info="Don't kill the pod after evaluation if activated"),
        gr.Textbox("", label="Github API Token", info="Your Github API token", placeholder="hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"),
        gr.Textbox("", label="Runpod API Token", info="Your Runpod API token", placeholder="XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"),
    ]
    btn = gr.Button("Evaluate!")
    outputs = gr.Textbox(label="Output", autofocus=True)
    gr.HTML('<div align="center"><p style="font-size: 20px;">→ Find your pods: <a href="https://www.runpod.io/console/pods">https://www.runpod.io/console/pods</a></p></div>')
    btn.click(autoeval, inputs, outputs)

demo.launch()