model-index: | |
- name: mosaic-14b-sce | |
results: | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: IFEval (0-Shot) | |
type: HuggingFaceH4/ifeval | |
args: | |
num_few_shot: 0 | |
metrics: | |
- type: inst_level_strict_acc and prompt_level_strict_acc | |
value: 68.76 | |
name: strict accuracy | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/mosaic-14b-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: BBH (3-Shot) | |
type: BBH | |
args: | |
num_few_shot: 3 | |
metrics: | |
- type: acc_norm | |
value: 55.69 | |
name: normalized accuracy | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/mosaic-14b-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: MATH Lvl 5 (4-Shot) | |
type: hendrycks/competition_math | |
args: | |
num_few_shot: 4 | |
metrics: | |
- type: exact_match | |
value: 38.29 | |
name: exact match | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/mosaic-14b-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: GPQA (0-shot) | |
type: Idavidrein/gpqa | |
args: | |
num_few_shot: 0 | |
metrics: | |
- type: acc_norm | |
value: 14.99 | |
name: acc_norm | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/mosaic-14b-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: MuSR (0-shot) | |
type: TAUR-Lab/MuSR | |
args: | |
num_few_shot: 0 | |
metrics: | |
- type: acc_norm | |
value: 16.44 | |
name: acc_norm | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/mosaic-14b-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: MMLU-PRO (5-shot) | |
type: TIGER-Lab/MMLU-Pro | |
config: main | |
split: test | |
args: | |
num_few_shot: 5 | |
metrics: | |
- type: acc | |
value: 48.85 | |
name: accuracy | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/mosaic-14b-sce | |
name: Open LLM Leaderboard | |
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) | |
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/Quazim0t0__mosaic-14b-sce-details) | |
| Metric |Value| | |
|-------------------|----:| | |
|Avg. |40.50| | |
|IFEval (0-Shot) |68.76| | |
|BBH (3-Shot) |55.69| | |
|MATH Lvl 5 (4-Shot)|38.29| | |
|GPQA (0-shot) |14.99| | |
|MuSR (0-shot) |16.44| | |
|MMLU-PRO (5-shot) |48.85| | |