Add Aquila model series which have gsm8k test set contamination
#21
by
bpHigh
- opened
- README.md +1 -1
- contamination_report.csv +4 -0
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🏭
|
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
python_version: 3.
|
8 |
sdk_version: 4.19.1
|
9 |
app_file: app.py
|
10 |
app_port: 7860
|
|
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
python_version: 3.11
|
8 |
sdk_version: 4.19.1
|
9 |
app_file: app.py
|
10 |
app_port: 7860
|
contamination_report.csv
CHANGED
@@ -148,6 +148,8 @@ gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;
|
|
148 |
gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
149 |
gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://arxiv.org/abs/2310.20707;2
|
150 |
|
|
|
|
|
151 |
gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
|
152 |
gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
|
153 |
|
@@ -664,6 +666,8 @@ wmt/wmt16;fr-en;GPT-3;;model;;;14.0;data-based;https://arxiv.org/abs/2005.14165;
|
|
664 |
wmt/wmt16;ro-en;FLAN;;model;;;12.4;data-based;https://arxiv.org/abs/2109.01652;13
|
665 |
wmt/wmt16;ro-en;GPT-3;;model;;;21.0;data-based;https://arxiv.org/abs/2005.14165;13
|
666 |
|
|
|
|
|
667 |
xnli;en;EleutherAI/pile;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
668 |
xnli;en;allenai/c4;;corpus;;;0.12;data-based;https://arxiv.org/abs/2310.20707;2
|
669 |
xnli;en;oscar-corpus/OSCAR-2301;;corpus;;;0.24;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
148 |
gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
149 |
gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://arxiv.org/abs/2310.20707;2
|
150 |
|
151 |
+
gsm8k;;BAAI/Aquila2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/Aquila2-34B/blob/main/README.md;21
|
152 |
+
gsm8k;;BAAI/AquilaChat2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/AquilaChat2-34B/blob/main/README.md;21
|
153 |
gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
|
154 |
gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
|
155 |
|
|
|
666 |
wmt/wmt16;ro-en;FLAN;;model;;;12.4;data-based;https://arxiv.org/abs/2109.01652;13
|
667 |
wmt/wmt16;ro-en;GPT-3;;model;;;21.0;data-based;https://arxiv.org/abs/2005.14165;13
|
668 |
|
669 |
+
xlangai/spider;;GPT-3.5;;model;;11.3;;model-based;https://arxiv.org/abs/2402.08100;18
|
670 |
+
|
671 |
xnli;en;EleutherAI/pile;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
672 |
xnli;en;allenai/c4;;corpus;;;0.12;data-based;https://arxiv.org/abs/2310.20707;2
|
673 |
xnli;en;oscar-corpus/OSCAR-2301;;corpus;;;0.24;data-based;https://arxiv.org/abs/2310.20707;2
|