Add Aquila model series which have gsm8k test set contamination

#21
Files changed (2) hide show
  1. README.md +1 -1
  2. contamination_report.csv +4 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🏭
4
  colorFrom: green
5
  colorTo: blue
6
  sdk: gradio
7
- python_version: 3.10
8
  sdk_version: 4.19.1
9
  app_file: app.py
10
  app_port: 7860
 
4
  colorFrom: green
5
  colorTo: blue
6
  sdk: gradio
7
+ python_version: 3.11
8
  sdk_version: 4.19.1
9
  app_file: app.py
10
  app_port: 7860
contamination_report.csv CHANGED
@@ -148,6 +148,8 @@ gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;
148
  gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
149
  gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://arxiv.org/abs/2310.20707;2
150
 
 
 
151
  gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
152
  gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
153
 
@@ -664,6 +666,8 @@ wmt/wmt16;fr-en;GPT-3;;model;;;14.0;data-based;https://arxiv.org/abs/2005.14165;
664
  wmt/wmt16;ro-en;FLAN;;model;;;12.4;data-based;https://arxiv.org/abs/2109.01652;13
665
  wmt/wmt16;ro-en;GPT-3;;model;;;21.0;data-based;https://arxiv.org/abs/2005.14165;13
666
 
 
 
667
  xnli;en;EleutherAI/pile;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
668
  xnli;en;allenai/c4;;corpus;;;0.12;data-based;https://arxiv.org/abs/2310.20707;2
669
  xnli;en;oscar-corpus/OSCAR-2301;;corpus;;;0.24;data-based;https://arxiv.org/abs/2310.20707;2
 
148
  gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
149
  gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://arxiv.org/abs/2310.20707;2
150
 
151
+ gsm8k;;BAAI/Aquila2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/Aquila2-34B/blob/main/README.md;21
152
+ gsm8k;;BAAI/AquilaChat2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/AquilaChat2-34B/blob/main/README.md;21
153
  gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
154
  gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
155
 
 
666
  wmt/wmt16;ro-en;FLAN;;model;;;12.4;data-based;https://arxiv.org/abs/2109.01652;13
667
  wmt/wmt16;ro-en;GPT-3;;model;;;21.0;data-based;https://arxiv.org/abs/2005.14165;13
668
 
669
+ xlangai/spider;;GPT-3.5;;model;;11.3;;model-based;https://arxiv.org/abs/2402.08100;18
670
+
671
  xnli;en;EleutherAI/pile;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
672
  xnli;en;allenai/c4;;corpus;;;0.12;data-based;https://arxiv.org/abs/2310.20707;2
673
  xnli;en;oscar-corpus/OSCAR-2301;;corpus;;;0.24;data-based;https://arxiv.org/abs/2310.20707;2