hynky HF staff commited on
Commit
2d03fbb
1 Parent(s): 43ec909

remove old files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. banner.png +0 -0
  2. bibliography.bib +0 -321
  3. data/clustering/data.csv +0 -0
  4. data/clustering/info.csv +0 -106
  5. data/plots/c4_filters_hellaswag/agg_score.json +0 -1
  6. data/plots/c4_filters_hellaswag/arc_acc_norm.json +0 -1
  7. data/plots/c4_filters_hellaswag/commonsense_qa_acc_norm.json +0 -1
  8. data/plots/c4_filters_hellaswag/hellaswag_acc_norm.json +0 -1
  9. data/plots/c4_filters_hellaswag/index.json +0 -2
  10. data/plots/c4_filters_hellaswag/mmlu_acc_norm.json +0 -1
  11. data/plots/c4_filters_hellaswag/openbookqa_acc_norm.json +0 -1
  12. data/plots/c4_filters_hellaswag/piqa_acc_norm.json +0 -1
  13. data/plots/c4_filters_hellaswag/winogrande_acc_norm.json +0 -1
  14. data/plots/cross_ind_unfiltered_comparison/agg_score.json +0 -1
  15. data/plots/cross_ind_unfiltered_comparison/arc_acc_norm.json +0 -1
  16. data/plots/cross_ind_unfiltered_comparison/commonsense_qa_acc_norm.json +0 -1
  17. data/plots/cross_ind_unfiltered_comparison/hellaswag_acc_norm.json +0 -1
  18. data/plots/cross_ind_unfiltered_comparison/index.json +0 -1
  19. data/plots/cross_ind_unfiltered_comparison/mmlu_acc_norm.json +0 -1
  20. data/plots/cross_ind_unfiltered_comparison/openbookqa_acc_norm.json +0 -1
  21. data/plots/cross_ind_unfiltered_comparison/piqa_acc_norm.json +0 -1
  22. data/plots/cross_ind_unfiltered_comparison/winogrande_acc_norm.json +0 -1
  23. data/plots/custom-filters/agg_score.json +0 -1
  24. data/plots/custom-filters/arc_acc_norm.json +0 -1
  25. data/plots/custom-filters/commonsense_qa_acc_norm.json +0 -1
  26. data/plots/custom-filters/hellaswag_acc_norm.json +0 -1
  27. data/plots/custom-filters/index.json +0 -1
  28. data/plots/custom-filters/mmlu_acc_norm.json +0 -1
  29. data/plots/custom-filters/openbookqa_acc_norm.json +0 -1
  30. data/plots/custom-filters/piqa_acc_norm.json +0 -1
  31. data/plots/custom-filters/winogrande_acc_norm.json +0 -1
  32. data/plots/dataset_ablations/agg_score.json +0 -1
  33. data/plots/dataset_ablations/arc_acc_norm.json +0 -1
  34. data/plots/dataset_ablations/commonsense_qa_acc_norm.json +0 -1
  35. data/plots/dataset_ablations/hellaswag_acc_norm.json +0 -1
  36. data/plots/dataset_ablations/index.json +0 -1
  37. data/plots/dataset_ablations/mmlu_acc_norm.json +0 -1
  38. data/plots/dataset_ablations/openbookqa_acc_norm.json +0 -1
  39. data/plots/dataset_ablations/piqa_acc_norm.json +0 -1
  40. data/plots/dataset_ablations/winogrande_acc_norm.json +0 -1
  41. data/plots/dededup_difference/big-run-fineweb-cross-dedup-fixed.json +0 -1
  42. data/plots/dededup_difference/big-run-refinedweb.json +0 -1
  43. data/plots/dededup_difference/big-run-sampled_full_filtered_no_dedup.json +0 -1
  44. data/plots/dededup_difference/index.json +0 -1
  45. data/plots/dedup_all_dumps_bad/agg_score.json +0 -1
  46. data/plots/dedup_all_dumps_bad/arc_acc_norm.json +0 -1
  47. data/plots/dedup_all_dumps_bad/commonsense_qa_acc_norm.json +0 -1
  48. data/plots/dedup_all_dumps_bad/hellaswag_acc_norm.json +0 -1
  49. data/plots/dedup_all_dumps_bad/index.json +0 -1
  50. data/plots/dedup_all_dumps_bad/mmlu_acc_norm.json +0 -1
banner.png DELETED
Binary file (353 kB)
 
bibliography.bib DELETED
@@ -1,321 +0,0 @@
1
- @inproceedings{barbaresi-2021-trafilatura,
2
- title = {Trafilatura: A Web Scraping Library and Command-Line Tool for Text Discovery and Extraction},
3
- author = "Barbaresi, Adrien",
4
- booktitle = "Proceedings of the Joint Conference of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations",
5
- pages = "122--131",
6
- publisher = "Association for Computational Linguistics",
7
- url = "https://aclanthology.org/2021.acl-demo.15",
8
- year = 2021,
9
- }
10
- @misc{penedo2023refinedweb,
11
- title={The RefinedWeb Dataset for Falcon LLM: Outperforming Curated Corpora with Web Data, and Web Data Only},
12
- author={Guilherme Penedo and Quentin Malartic and Daniel Hesslow and Ruxandra Cojocaru and Alessandro Cappelli and Hamza Alobeidli and Baptiste Pannier and Ebtesam Almazrouei and Julien Launay},
13
- year={2023},
14
- eprint={2306.01116},
15
- archivePrefix={arXiv},
16
- primaryClass={cs.CL}
17
- }
18
- @article{joulin2016fasttext,
19
- title={FastText.zip: Compressing text classification models},
20
- author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Douze, Matthijs and J{\'e}gou, H{\'e}rve and Mikolov, Tomas},
21
- journal={arXiv preprint arXiv:1612.03651},
22
- year={2016}
23
- }
24
- @article{joulin2016bag,
25
- title={Bag of Tricks for Efficient Text Classification},
26
- author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas},
27
- journal={arXiv preprint arXiv:1607.01759},
28
- year={2016}
29
- }
30
- @misc{penedo2024datatrove,
31
- author = {Penedo, Guilherme and Kydlíček, Hynek and Cappelli, Alessandro and Wolf, Thomas and Sasko, Mario},
32
- title = {DataTrove: large scale data processing},
33
- year = {2024},
34
- publisher = {GitHub},
35
- journal = {GitHub repository},
36
- url = {https://github.com/huggingface/datatrove}
37
- }
38
- @misc{chiang2024chatbot,
39
- title={Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference},
40
- author={Wei-Lin Chiang and Lianmin Zheng and Ying Sheng and Anastasios Nikolas Angelopoulos and Tianle Li and Dacheng Li and Hao Zhang and Banghua Zhu and Michael Jordan and Joseph E. Gonzalez and Ion Stoica},
41
- year={2024},
42
- eprint={2403.04132},
43
- archivePrefix={arXiv},
44
- primaryClass={cs.AI}
45
- }
46
- @misc{rae2022scaling,
47
- title={Scaling Language Models: Methods, Analysis & Insights from Training Gopher},
48
- author={Jack W. Rae and Sebastian Borgeaud and Trevor Cai and Katie Millican and Jordan Hoffmann and Francis Song and John Aslanides and Sarah Henderson and Roman Ring and Susannah Young and Eliza Rutherford and Tom Hennigan and Jacob Menick and Albin Cassirer and Richard Powell and George van den Driessche and Lisa Anne Hendricks and Maribeth Rauh and Po-Sen Huang and Amelia Glaese and Johannes Welbl and Sumanth Dathathri and Saffron Huang and Jonathan Uesato and John Mellor and Irina Higgins and Antonia Creswell and Nat McAleese and Amy Wu and Erich Elsen and Siddhant Jayakumar and Elena Buchatskaya and David Budden and Esme Sutherland and Karen Simonyan and Michela Paganini and Laurent Sifre and Lena Martens and Xiang Lorraine Li and Adhiguna Kuncoro and Aida Nematzadeh and Elena Gribovskaya and Domenic Donato and Angeliki Lazaridou and Arthur Mensch and Jean-Baptiste Lespiau and Maria Tsimpoukelli and Nikolai Grigorev and Doug Fritz and Thibault Sottiaux and Mantas Pajarskas and Toby Pohlen and Zhitao Gong and Daniel Toyama and Cyprien de Masson d'Autume and Yujia Li and Tayfun Terzi and Vladimir Mikulik and Igor Babuschkin and Aidan Clark and Diego de Las Casas and Aurelia Guy and Chris Jones and James Bradbury and Matthew Johnson and Blake Hechtman and Laura Weidinger and Iason Gabriel and William Isaac and Ed Lockhart and Simon Osindero and Laura Rimell and Chris Dyer and Oriol Vinyals and Kareem Ayoub and Jeff Stanway and Lorrayne Bennett and Demis Hassabis and Koray Kavukcuoglu and Geoffrey Irving},
49
- year={2022},
50
- eprint={2112.11446},
51
- archivePrefix={arXiv},
52
- primaryClass={cs.CL}
53
- }
54
- @misc{lee2022deduplicating,
55
- title={Deduplicating Training Data Makes Language Models Better},
56
- author={Katherine Lee and Daphne Ippolito and Andrew Nystrom and Chiyuan Zhang and Douglas Eck and Chris Callison-Burch and Nicholas Carlini},
57
- year={2022},
58
- eprint={2107.06499},
59
- archivePrefix={arXiv},
60
- primaryClass={cs.CL}
61
- }
62
- @misc{carlini2023quantifying,
63
- title={Quantifying Memorization Across Neural Language Models},
64
- author={Nicholas Carlini and Daphne Ippolito and Matthew Jagielski and Katherine Lee and Florian Tramer and Chiyuan Zhang},
65
- year={2023},
66
- eprint={2202.07646},
67
- archivePrefix={arXiv},
68
- primaryClass={cs.LG}
69
- }
70
- @misc{raffel2023exploring,
71
- title={Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
72
- author={Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu},
73
- year={2023},
74
- eprint={1910.10683},
75
- archivePrefix={arXiv},
76
- primaryClass={cs.LG}
77
- }
78
- @misc{touvron2023llama,
79
- title={LLaMA: Open and Efficient Foundation Language Models},
80
- author={Hugo Touvron and Thibaut Lavril and Gautier Izacard and Xavier Martinet and Marie-Anne Lachaux and Timothée Lacroix and Baptiste Rozière and Naman Goyal and Eric Hambro and Faisal Azhar and Aurelien Rodriguez and Armand Joulin and Edouard Grave and Guillaume Lample},
81
- year={2023},
82
- eprint={2302.13971},
83
- archivePrefix={arXiv},
84
- primaryClass={cs.CL}
85
- }
86
- @article{dolma,
87
- title = {Dolma: an Open Corpus of Three Trillion Tokens for Language Model Pretraining Research},
88
- author={
89
- Luca Soldaini and Rodney Kinney and Akshita Bhagia and Dustin Schwenk and David Atkinson and
90
- Russell Authur and Ben Bogin and Khyathi Chandu and Jennifer Dumas and Yanai Elazar and
91
- Valentin Hofmann and Ananya Harsh Jha and Sachin Kumar and Li Lucy and Xinxi Lyu and
92
- Nathan Lambert and Ian Magnusson and Jacob Morrison and Niklas Muennighoff and Aakanksha Naik and
93
- Crystal Nam and Matthew E. Peters and Abhilasha Ravichander and Kyle Richardson and Zejiang Shen and
94
- Emma Strubell and Nishant Subramani and Oyvind Tafjord and Pete Walsh and Luke Zettlemoyer and
95
- Noah A. Smith and Hannaneh Hajishirzi and Iz Beltagy and Dirk Groeneveld and Jesse Dodge and Kyle Lo
96
- },
97
- year = {2024},
98
- journal={arXiv preprint},
99
- }
100
- @article{gao2020pile,
101
- title={The {P}ile: An 800{GB} dataset of diverse text for language modeling},
102
- author={Gao, Leo and Biderman, Stella and Black, Sid and Golding, Laurence and Hoppe, Travis and Foster, Charles and Phang, Jason and He, Horace and Thite, Anish and Nabeshima, Noa and others},
103
- journal={arXiv preprint arXiv:2101.00027},
104
- year={2020}
105
- }
106
- @misc{cerebras2023slimpajama,
107
- author = {Soboleva, Daria and Al-Khateeb, Faisal and Myers, Robert and Steeves, Jacob R and Hestness, Joel and Dey, Nolan},
108
- title = {SlimPajama: A 627B token cleaned and deduplicated version of RedPajama},
109
- month = {June},
110
- year = 2023,
111
- url = {https://huggingface.co/datasets/cerebras/SlimPajama-627B},
112
- }
113
- @software{together2023redpajama,
114
- author = {Together Computer},
115
- title = {RedPajama: an Open Dataset for Training Large Language Models},
116
- month = {October},
117
- year = 2023,
118
- url = {https://github.com/togethercomputer/RedPajama-Data}
119
- }
120
- @article{jaccard1912distribution,
121
- title={The distribution of the flora in the alpine zone. 1},
122
- author={Jaccard, Paul},
123
- journal={New phytologist},
124
- volume={11},
125
- number={2},
126
- pages={37--50},
127
- year={1912},
128
- publisher={Wiley Online Library}
129
- }
130
- @misc{albalak2024survey,
131
- title={A Survey on Data Selection for Language Models},
132
- author={Alon Albalak and Yanai Elazar and Sang Michael Xie and Shayne Longpre and Nathan Lambert and Xinyi Wang and Niklas Muennighoff and Bairu Hou and Liangming Pan and Haewon Jeong and Colin Raffel and Shiyu Chang and Tatsunori Hashimoto and William Yang Wang},
133
- year={2024},
134
- eprint={2402.16827},
135
- archivePrefix={arXiv},
136
- primaryClass={cs.CL}
137
- }
138
- @misc{longpre2023pretrainers,
139
- title={A Pretrainer's Guide to Training Data: Measuring the Effects of Data Age, Domain Coverage, Quality, & Toxicity},
140
- author={Shayne Longpre and Gregory Yauney and Emily Reif and Katherine Lee and Adam Roberts and Barret Zoph and Denny Zhou and Jason Wei and Kevin Robinson and David Mimno and Daphne Ippolito},
141
- year={2023},
142
- eprint={2305.13169},
143
- archivePrefix={arXiv},
144
- primaryClass={cs.CL}
145
- }
146
- @misc{wenzek2019ccnet,
147
- title={CCNet: Extracting High Quality Monolingual Datasets from Web Crawl Data},
148
- author={Guillaume Wenzek and Marie-Anne Lachaux and Alexis Conneau and Vishrav Chaudhary and Francisco Guzmán and Armand Joulin and Edouard Grave},
149
- year={2019},
150
- eprint={1911.00359},
151
- archivePrefix={arXiv},
152
- primaryClass={cs.CL}
153
- }
154
- @misc{soldaini2024dolma,
155
- title={Dolma: an Open Corpus of Three Trillion Tokens for Language Model Pretraining Research},
156
- author={Luca Soldaini and Rodney Kinney and Akshita Bhagia and Dustin Schwenk and David Atkinson and Russell Authur and Ben Bogin and Khyathi Chandu and Jennifer Dumas and Yanai Elazar and Valentin Hofmann and Ananya Harsh Jha and Sachin Kumar and Li Lucy and Xinxi Lyu and Nathan Lambert and Ian Magnusson and Jacob Morrison and Niklas Muennighoff and Aakanksha Naik and Crystal Nam and Matthew E. Peters and Abhilasha Ravichander and Kyle Richardson and Zejiang Shen and Emma Strubell and Nishant Subramani and Oyvind Tafjord and Pete Walsh and Luke Zettlemoyer and Noah A. Smith and Hannaneh Hajishirzi and Iz Beltagy and Dirk Groeneveld and Jesse Dodge and Kyle Lo},
157
- year={2024},
158
- eprint={2402.00159},
159
- archivePrefix={arXiv},
160
- primaryClass={cs.CL}
161
- }
162
- @misc{ouyang2022training,
163
- title={Training language models to follow instructions with human feedback},
164
- author={Long Ouyang and Jeff Wu and Xu Jiang and Diogo Almeida and Carroll L. Wainwright and Pamela Mishkin and Chong Zhang and Sandhini Agarwal and Katarina Slama and Alex Ray and John Schulman and Jacob Hilton and Fraser Kelton and Luke Miller and Maddie Simens and Amanda Askell and Peter Welinder and Paul Christiano and Jan Leike and Ryan Lowe},
165
- year={2022},
166
- eprint={2203.02155},
167
- archivePrefix={arXiv},
168
- primaryClass={cs.CL}
169
- }
170
- @misc{hoffmann2022training,
171
- title={Training Compute-Optimal Large Language Models},
172
- author={Jordan Hoffmann and Sebastian Borgeaud and Arthur Mensch and Elena Buchatskaya and Trevor Cai and Eliza Rutherford and Diego de Las Casas and Lisa Anne Hendricks and Johannes Welbl and Aidan Clark and Tom Hennigan and Eric Noland and Katie Millican and George van den Driessche and Bogdan Damoc and Aurelia Guy and Simon Osindero and Karen Simonyan and Erich Elsen and Jack W. Rae and Oriol Vinyals and Laurent Sifre},
173
- year={2022},
174
- eprint={2203.15556},
175
- archivePrefix={arXiv},
176
- primaryClass={cs.CL}
177
- }
178
- @misc{muennighoff2023scaling,
179
- title={Scaling Data-Constrained Language Models},
180
- author={Niklas Muennighoff and Alexander M. Rush and Boaz Barak and Teven Le Scao and Aleksandra Piktus and Nouamane Tazi and Sampo Pyysalo and Thomas Wolf and Colin Raffel},
181
- year={2023},
182
- eprint={2305.16264},
183
- archivePrefix={arXiv},
184
- primaryClass={cs.CL}
185
- }
186
- @misc{hernandez2022scaling,
187
- title={Scaling Laws and Interpretability of Learning from Repeated Data},
188
- author={Danny Hernandez and Tom Brown and Tom Conerly and Nova DasSarma and Dawn Drain and Sheer El-Showk and Nelson Elhage and Zac Hatfield-Dodds and Tom Henighan and Tristan Hume and Scott Johnston and Ben Mann and Chris Olah and Catherine Olsson and Dario Amodei and Nicholas Joseph and Jared Kaplan and Sam McCandlish},
189
- year={2022},
190
- eprint={2205.10487},
191
- archivePrefix={arXiv},
192
- primaryClass={cs.LG}
193
- }
194
- @article{llama3modelcard,
195
-
196
- title={Llama 3 Model Card},
197
-
198
- author={AI@Meta},
199
-
200
- year={2024},
201
-
202
- url = {https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md}
203
-
204
- }
205
- @misc{jiang2024mixtral,
206
- title={Mixtral of Experts},
207
- author={Albert Q. Jiang and Alexandre Sablayrolles and Antoine Roux and Arthur Mensch and Blanche Savary and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Emma Bou Hanna and Florian Bressand and Gianna Lengyel and Guillaume Bour and Guillaume Lample and Lélio Renard Lavaud and Lucile Saulnier and Marie-Anne Lachaux and Pierre Stock and Sandeep Subramanian and Sophia Yang and Szymon Antoniak and Teven Le Scao and Théophile Gervet and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed},
208
- year={2024},
209
- eprint={2401.04088},
210
- archivePrefix={arXiv},
211
- primaryClass={cs.LG}
212
- }
213
- @article{yuan2024self,
214
- title={Self-rewarding language models},
215
- author={Yuan, Weizhe and Pang, Richard Yuanzhe and Cho, Kyunghyun and Sukhbaatar, Sainbayar and Xu, Jing and Weston, Jason},
216
- journal={arXiv preprint arXiv:2401.10020},
217
- year={2024}
218
- }
219
- @article{verga2024replacing,
220
- title={Replacing Judges with Juries: Evaluating LLM Generations with a Panel of Diverse Models},
221
- author={Verga, Pat and Hofstatter, Sebastian and Althammer, Sophia and Su, Yixuan and Piktus, Aleksandra and Arkhangorodsky, Arkady and Xu, Minjie and White, Naomi and Lewis, Patrick},
222
- journal={arXiv preprint arXiv:2404.18796},
223
- year={2024}
224
- }
225
- @article{abdin2024phi,
226
- title={Phi-3 technical report: A highly capable language model locally on your phone},
227
- author={Abdin, Marah and Jacobs, Sam Ade and Awan, Ammar Ahmad and Aneja, Jyoti and Awadallah, Ahmed and Awadalla, Hany and Bach, Nguyen and Bahree, Amit and Bakhtiari, Arash and Behl, Harkirat and others},
228
- journal={arXiv preprint arXiv:2404.14219},
229
- year={2024}
230
- }
231
- @misc{meta2024responsible,
232
- title = {Our responsible approach to Meta AI and Meta Llama 3},
233
- author = {Meta},
234
- year = {2024},
235
- url = {https://ai.meta.com/blog/meta-llama-3-meta-ai-responsibility/},
236
- note = {Accessed: 2024-05-31}
237
- }
238
- @inproceedings{talmor-etal-2019-commonsenseqa,
239
- title = "CommonsenseQA: A Question Answering Challenge Targeting Commonsense Knowledge",
240
- author = "Talmor, Alon and
241
- Herzig, Jonathan and
242
- Lourie, Nicholas and
243
- Berant, Jonathan",
244
- booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
245
- month = jun,
246
- year = "2019",
247
- address = "Minneapolis, Minnesota",
248
- publisher = "Association for Computational Linguistics",
249
- url = "https://aclanthology.org/N19-1421",
250
- doi = "10.18653/v1/N19-1421",
251
- pages = "4149--4158",
252
- archivePrefix = "arXiv",
253
- eprint = "1811.00937",
254
- primaryClass = "cs",
255
- }
256
- @inproceedings{zellers-etal-2019-hellaswag,
257
- title = "HellaSwag: Can a Machine Really Finish Your Sentence?",
258
- author = "Zellers, Rowan and
259
- Holtzman, Ari and
260
- Bisk, Yonatan and
261
- Farhadi, Ali and
262
- Choi, Yejin",
263
- editor = "Korhonen, Anna and
264
- Traum, David and
265
- M{\`a}rquez, Llu{\'\i}s",
266
- booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
267
- month = jul,
268
- year = "2019",
269
- address = "Florence, Italy",
270
- publisher = "Association for Computational Linguistics",
271
- url = "https://aclanthology.org/P19-1472",
272
- doi = "10.18653/v1/P19-1472",
273
- pages = "4791--4800",
274
- abstract = "Recent work by Zellers et al. (2018) introduced a new task of commonsense natural language inference: given an event description such as {``}A woman sits at a piano,{''} a machine must select the most likely followup: {``}She sets her fingers on the keys.{''} With the introduction of BERT, near human-level performance was reached. Does this mean that machines can perform human level commonsense inference? In this paper, we show that commonsense inference still proves difficult for even state-of-the-art models, by presenting HellaSwag, a new challenge dataset. Though its questions are trivial for humans ({\textgreater}95{\%} accuracy), state-of-the-art models struggle ({\textless}48{\%}). We achieve this via Adversarial Filtering (AF), a data collection paradigm wherein a series of discriminators iteratively select an adversarial set of machine-generated wrong answers. AF proves to be surprisingly robust. The key insight is to scale up the length and complexity of the dataset examples towards a critical {`}Goldilocks{'} zone wherein generated text is ridiculous to humans, yet often misclassified by state-of-the-art models. Our construction of HellaSwag, and its resulting difficulty, sheds light on the inner workings of deep pretrained models. More broadly, it suggests a new path forward for NLP research, in which benchmarks co-evolve with the evolving state-of-the-art in an adversarial way, so as to present ever-harder challenges.",
275
- }
276
- @inproceedings{OpenBookQA2018,
277
- title={Can a Suit of Armor Conduct Electricity? A New Dataset for Open Book Question Answering},
278
- author={Todor Mihaylov and Peter Clark and Tushar Khot and Ashish Sabharwal},
279
- booktitle={EMNLP},
280
- year={2018}
281
- }
282
- @misc{bisk2019piqa,
283
- title={PIQA: Reasoning about Physical Commonsense in Natural Language},
284
- author={Yonatan Bisk and Rowan Zellers and Ronan Le Bras and Jianfeng Gao and Yejin Choi},
285
- year={2019},
286
- eprint={1911.11641},
287
- archivePrefix={arXiv},
288
- primaryClass={cs.CL}
289
- }
290
- @misc{sap2019socialiqa,
291
- title={SocialIQA: Commonsense Reasoning about Social Interactions},
292
- author={Maarten Sap and Hannah Rashkin and Derek Chen and Ronan LeBras and Yejin Choi},
293
- year={2019},
294
- eprint={1904.09728},
295
- archivePrefix={arXiv},
296
- primaryClass={cs.CL}
297
- }
298
- @misc{sakaguchi2019winogrande,
299
- title={WinoGrande: An Adversarial Winograd Schema Challenge at Scale},
300
- author={Keisuke Sakaguchi and Ronan Le Bras and Chandra Bhagavatula and Yejin Choi},
301
- year={2019},
302
- eprint={1907.10641},
303
- archivePrefix={arXiv},
304
- primaryClass={cs.CL}
305
- }
306
- @misc{clark2018think,
307
- title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
308
- author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
309
- year={2018},
310
- eprint={1803.05457},
311
- archivePrefix={arXiv},
312
- primaryClass={cs.AI}
313
- }
314
- @misc{hendrycks2021measuring,
315
- title={Measuring Massive Multitask Language Understanding},
316
- author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
317
- year={2021},
318
- eprint={2009.03300},
319
- archivePrefix={arXiv},
320
- primaryClass={cs.CY}
321
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/clustering/data.csv DELETED
The diff for this file is too large to render. See raw diff
 
data/clustering/info.csv DELETED
@@ -1,106 +0,0 @@
1
- ,cluster_id,cluster_summaries,cluster_position_x,cluster_position_y
2
- 0,-1,None,9.926462,4.7121987
3
- 1,0,Philosophical/Spiritual Introspection,10.312462,1.2666532
4
- 2,1,"Scholarships",8.167274,4.8995786
5
- 3,2,Politics,8.81142,2.4859838
6
- 4,3,Theology,9.615214,0.3783942
7
- 5,4,Dating,4.985182,1.8439052
8
- 6,5,Accommodation,11.457769,5.080919
9
- 7,6,Football,6.6154537,-1.6859366
10
- 8,7,Film Festival,6.9734483,1.4548192
11
- 9,8,Culinary,13.426296,4.5412893
12
- 10,9,Music,6.0653744,0.7536916
13
- 11,10,Gambling,3.124241,3.2533677
14
- 12,11,Baseball,7.133596,-2.4256644
15
- 13,12,Technology,6.4929094,6.768577
16
- 14,13,Website Policies,4.873843,5.771508
17
- 15,14,Weddings,11.815845,3.7894728
18
- 16,15,Gaming,5.529167,2.9530518
19
- 17,16,Commodities/Services Provision,10.453564,5.8489122
20
- 18,17,Crafts,13.287651,6.4237967
21
- 19,18,Automobiles,9.9531145,8.840178
22
- 20,19,Watches,13.893139,9.859185
23
- 21,20,Dogs,12.595798,3.5351615
24
- 22,21,Photography,10.7942295,3.5504062
25
- 23,22,Legalities,8.942016,4.72733
26
- 24,23,Consumer Electronics,7.078649,8.338984
27
- 25,24,Insulation,10.520957,7.914946
28
- 26,25,Cannabis,14.317424,3.2114828
29
- 27,26,Footwear,15.052116,7.6956415
30
- 28,27,Real Estate,9.536316,6.103533
31
- 29,28,Relocation,10.205071,7.1883316
32
- 30,29,Sports betting,3.2779586,2.443366
33
- 31,30,Narratives,7.613535,1.8300554
34
- 32,31,Dating,4.788838,2.1900373
35
- 33,32,Apparel/Clothing,14.394226,7.3073387
36
- 34,33,User Authentication,5.265638,6.4014487
37
- 35,34,Academicwriting,6.9187264,3.4357684
38
- 36,35,Sports,7.4969172,-2.086585
39
- 37,36,Fashion/Lifestyle Products,13.821669,7.7150764
40
- 38,37,Diverse events,9.437052,2.2438836
41
- 39,38,Blockchain/Cryptocurrency,7.7586045,6.9439344
42
- 40,39,Online Businesses/Marketing,6.522259,5.219268
43
- 41,40,Healthcare,11.425277,2.3801014
44
- 42,41,Home Decor,12.878046,7.2632184
45
- 43,42,Biomedicine,12.789575,2.3376262
46
- 44,43,Jewelry,14.259997,8.653363
47
- 45,44,Addiction,11.561383,1.3774762
48
- 46,45,Products,11.711758,8.423251
49
- 47,46,Multi-purposefulness,11.080702,7.4574013
50
- 48,47,"Mass transit",9.910158,5.4402313
51
- 49,48,Ethernet,6.9763823,7.7909245
52
- 50,49,Legal,9.516912,4.636553
53
- 51,50,E-commerce,13.263438,8.6548195
54
- 52,51,Audio,7.717162,8.903019
55
- 53,52,Infrastructure,10.52904,5.369669
56
- 54,53,Firearms,11.062812,9.268473
57
- 55,54,Freight/Logistics,9.551044,7.0336204
58
- 56,55,Products,12.073747,7.645973
59
- 57,56,Vaccinations,11.9387045,2.7824683
60
- 58,57,Artwork,11.019163,4.1677165
61
- 59,58,Viticulture,14.223523,5.0761614
62
- 60,59,WordPress,5.9597983,5.824579
63
- 61,60,Cosmetics/Dermatology,15.093273,3.4669027
64
- 62,61,Software,6.375921,6.4298844
65
- 63,62,Dentistry,14.76626,1.1620314
66
- 64,63,Pest Control,13.201735,3.6806118
67
- 65,64,SEO,5.720493,5.238112
68
- 66,65,Lottery,1.7142816,2.9782674
69
- 67,66,Narratives,8.460977,1.0804662
70
- 68,67,Waste Reduction & Recycling,10.634534,6.959523
71
- 69,68,Communication,6.438943,5.9467845
72
- 70,69,Orthopedics,13.005415,1.1908791
73
- 71,70,Home Decor & Furniture,12.732457,7.876862
74
- 72,71,Education,7.6568975,3.4944353
75
- 73,72,Sports,7.295141,-0.7343214
76
- 74,73,Social Media Advertising,6.133886,4.8547883
77
- 75,74,Privacy,4.756733,6.3598356
78
- 76,75,Website design,6.1168823,5.465095
79
- 77,76,Roofing,11.389448,8.080609
80
- 78,77,Nutrition/Supplements,13.631578,2.5334294
81
- 79,78,Haircare/Hairstyling,15.544645,4.54254
82
- 80,79,Cookies,4.341592,6.819268
83
- 81,80,International Trade,8.993828,6.4757586
84
- 82,81,Entrepreneurial Resources,9.435777,5.3340797
85
- 83,82,Cricket,6.5171986,-1.245905
86
- 84,83,Crafts,13.852216,7.049825
87
- 85,84,Floristry,13.407425,5.8741536
88
- 86,85,Genealogy,9.530803,1.6548243
89
- 87,86,Mental Health,11.074349,1.6069281
90
- 88,87,Volunteerism,10.145443,3.6734574
91
- 89,88,Lighting,11.385381,8.93693
92
- 90,89,Artificial Intelligence,6.5306387,6.2178063
93
- 91,90,Business,7.471462,6.4142885
94
- 92,91,E-commerce,13.638669,6.5098934
95
- 93,92,Urbanization/Over-tourism,10.221115,6.100654
96
- 94,93,Events,10.8449,3.9822264
97
- 95,94,Pharmaceuticals/Biotechnology,12.318266,2.4331784
98
- 96,95,Professional Wrestling,6.856304,-0.65598303
99
- 97,96,Various,9.3211975,3.4894605
100
- 98,97,Medicine,13.17882,2.1281319
101
- 99,98,Community Engagement,9.848856,3.5187004
102
- 100,99,Fitness,12.504849,0.9134393
103
- 101,100,Bathroom Design & Toilet Engineering,11.779076,7.2920136
104
- 102,101,Business Development,7.328447,5.659843
105
- 103,102,Sports,7.6370654,-1.0701839
106
- 104,103,Sexuality,13.817207,1.6510898
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/plots/c4_filters_hellaswag/agg_score.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-60gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308296035975218,0.35613923892378807,0.3746252153068781,0.38806260935962195,0.39690930768847466,0.4043668694794178,0.40220927633345127,0.41070565767586226,0.41399387270212173,0.4170555509626865,0.42098715901374817,0.4210818205028772,0.42051274701952934,0.424176013097167,0.4225243702530861],"label":"baseline"},"filtering-c4-curly_bracket":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308933284133672,0.3583905678242445,0.38119001872837543,0.3873079549521208,0.39723034016788,0.4043100867420435,0.40908974781632423,0.4140731003135443,0.41894380562007427,0.41736695170402527,0.4232212919741869,0.4229240976274013,0.4236308634281158,0.42750727012753487,0.4268195778131485],"label":"curly_bracket filter"},"filtering-c4-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308933284133672,0.36182260885834694,0.3764855917543173,0.3928546328097582,0.3978128544986248,0.4073755294084549,0.4112890623509884,0.41486112400889397,0.4196756165474653,0.4235504809767008,0.42218128964304924,0.4228535555303097,0.4249562546610832,0.42740595713257784,0.42711055465042586],"label":"terminal_punct filter"},"filtering-c4-word_lengths":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308933284133672,0.36000680737197394,0.37551611103117466,0.38802069239318365,0.3933942876756191,0.4043118376284838,0.40780537389218807,0.4112964067608118,0.4137573726475239,0.41791345551609993,0.4173779133707285,0.42117033526301384,0.42073468305170536,0.42412591539323324,0.4260616712272167],"label":"word_lengths filter"},"filtering-c4-all":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308933284133672,0.36066408455371857,0.3812380563467741,0.394003426656127,0.40062618628144264,0.4117735456675291,0.4165923688560724,0.4175422675907612,0.42100309208035464,0.42246321588754654,0.42360376194119453,0.42823668196797365,0.4299001637846231,0.4302353039383888,0.4310380257666111],"label":"All filters"},"filtering-c4-all-except-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.330924579873681,0.35825083684176207,0.37912008538842196,0.38942993618547916,0.3983491826802492,0.4053049590438604,0.4079726096242666,0.4135104585438967,0.41717425361275673,0.41904263757169247,0.4211529679596424,0.4212619122117758,0.42373160831630224,0.42435371689498425,0.4279126934707165],"label":"All filters except terminal_punct"},"sm-baseline-c4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308933284133672,0.3580685469011466,0.3740996705989043,0.39048008372386295,0.39857714250683784,0.40837346265713376,0.4111154315372308,0.41773712386687595,0.4196594481666882,0.42379963273803395,0.4276047808428605,0.42980752388636273,0.43098293244838715,0.43155378103256226,0.4327609067161878],"label":"C4"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"C4 filtering effect on HellaSwag"}}}
 
 
data/plots/c4_filters_hellaswag/arc_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-60gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.2905000001192093,0.32549999654293055,0.3307500034570694,0.3467499911785126,0.3500000089406967,0.3452499955892563,0.3622500002384185,0.35999999940395355,0.37024998664855957,0.3684999942779541,0.3675000071525574,0.37249998748302454,0.37675000727176666,0.3760000020265579],"label":"baseline"},"filtering-c4-curly_bracket":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.29250000417232513,0.3184999972581863,0.3297500014305115,0.34450000524520874,0.3512499928474426,0.35724999010562897,0.36375001072883606,0.3665000051259994,0.3684999942779541,0.3712499886751175,0.37375000119209284,0.37800000607967377,0.3840000033378601,0.37950000166893005],"label":"curly_bracket filter"},"filtering-c4-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.2947500050067901,0.31974999606609344,0.3344999998807907,0.3445000052452087,0.351500004529953,0.35199999809265137,0.35925000905990595,0.3634999990463257,0.36374999582767487,0.36550000309944153,0.36775000393390656,0.3677499890327453,0.36900000274181366,0.36650000512599945],"label":"terminal_punct filter"},"filtering-c4-word_lengths":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.28949999809265137,0.3187499940395355,0.33825001120567316,0.35074999928474426,0.3604999929666519,0.36274999380111694,0.3634999990463257,0.3645000010728836,0.3644999861717224,0.3669999986886978,0.3642500042915344,0.3722499907016754,0.37499999999999994,0.37549999356269836],"label":"word_lengths filter"},"filtering-c4-all":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.30024999380111694,0.32724998891353607,0.33374999463558197,0.34574998915195465,0.351749986410141,0.36124999821186066,0.3527500033378601,0.3582500070333481,0.35850000381469727,0.36075000464916224,0.364750012755394,0.37049999833106995,0.3729999959468841,0.36974999308586115],"label":"All filters"},"filtering-c4-all-except-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2512499988079071,0.294500008225441,0.32725000381469727,0.3352499902248382,0.3504999876022339,0.3487499952316284,0.3557500094175339,0.35324999690055847,0.36374999582767487,0.36474999785423273,0.372749999165535,0.36775000393390656,0.3707500100135803,0.3734999895095825,0.375],"label":"All filters except terminal_punct"},"sm-baseline-c4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.2928333381811778,0.3191666702429453,0.3451666633288066,0.342166672150294,0.35983332991600037,0.35483332475026447,0.3643333315849304,0.3631666700045268,0.3698333303133647,0.3696666657924652,0.37433333198229474,0.3805000086625417,0.3800000051657359,0.3798333406448364],"label":"C4"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"C4 filtering effect on HellaSwag"}}}
 
 
data/plots/c4_filters_hellaswag/commonsense_qa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-60gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.2584999948740005,0.2850000113248825,0.30850000679492945,0.30149999260902405,0.31049999594688416,0.3079999983310699,0.3150000125169754,0.32199999690055847,0.3244999945163727,0.3205000013113022,0.3244999945163727,0.3279999941587448,0.33149999380111694,0.32850000262260437],"label":"baseline"},"filtering-c4-curly_bracket":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.2619999945163727,0.288000002503395,0.29749999940395355,0.30399999022483826,0.3149999976158142,0.3245000094175339,0.3230000138282776,0.3240000009536743,0.3245000094175339,0.33550000190734863,0.335999995470047,0.32999999821186066,0.3375000059604645,0.34049999713897705],"label":"curly_bracket filter"},"filtering-c4-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.2650000005960464,0.28599999845027924,0.3110000044107437,0.2944999933242798,0.3085000067949295,0.32199999690055847,0.31949999928474426,0.3240000009536743,0.32500000298023224,0.3245000094175339,0.32199999690055847,0.3265000134706497,0.3295000046491623,0.32999999821186066],"label":"terminal_punct filter"},"filtering-c4-word_lengths":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.26349999010562897,0.2824999988079071,0.2985000014305115,0.3050000071525574,0.3119999915361404,0.3110000044107437,0.3164999932050705,0.32199999690055847,0.3279999941587448,0.3365000039339065,0.3375000059604645,0.3384999930858612,0.340499997138977,0.341499999165535],"label":"word_lengths filter"},"filtering-c4-all":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.26299999654293055,0.2864999920129776,0.2944999933242798,0.2985000014305115,0.3165000081062317,0.3194999992847442,0.318000003695488,0.32500000298023224,0.32899999618530273,0.3254999965429306,0.33150000870227814,0.3330000042915344,0.33200000226497645,0.3330000042915344],"label":"All filters"},"filtering-c4-all-except-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.25800000131130213,0.2849999964237213,0.29200001060962677,0.289000004529953,0.30349999666213984,0.30400000512599945,0.3139999955892563,0.3139999955892563,0.318000003695488,0.32299999892711634,0.3174999952316284,0.3215000033378601,0.32250000536441803,0.32549999654293055],"label":"All filters except terminal_punct"},"sm-baseline-c4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.25700000921885174,0.2786666651566823,0.2960000038146972,0.3049999972184499,0.3053333262602488,0.3120000064373016,0.31733333071072894,0.3163333336512248,0.3186666667461395,0.3226666748523712,0.3286666671435038,0.3240000009536743,0.32900000611941016,0.3283333381017049],"label":"C4"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"C4 filtering effect on HellaSwag"}}}
 
 
data/plots/c4_filters_hellaswag/hellaswag_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-60gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.28949999809265137,0.32599999010562897,0.34450000524520874,0.3725000023841858,0.38500000536441803,0.39499999582767487,0.408500000834465,0.41700001060962677,0.4174999892711639,0.4284999966621399,0.42849999666213984,0.43150000274181366,0.4399999976158142,0.4375],"label":"baseline"},"filtering-c4-curly_bracket":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.29749999940395355,0.3240000009536743,0.34849999845027924,0.3725000023841858,0.3895000070333481,0.39800000190734863,0.41000001132488245,0.4214999973773956,0.42149999737739563,0.42499999701976776,0.42750000953674316,0.4364999979734421,0.4354999959468841,0.4385000020265579],"label":"curly_bracket filter"},"filtering-c4-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.2939999997615814,0.3295000046491623,0.3684999942779541,0.38449999690055847,0.398499995470047,0.3959999978542328,0.4204999953508377,0.4335000067949295,0.445499986410141,0.443000003695488,0.455499991774559,0.45250000059604645,0.4529999941587448,0.4545000046491623],"label":"terminal_punct filter"},"filtering-c4-word_lengths":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.29100000858306885,0.32400000095367426,0.3439999967813492,0.3575000017881393,0.3800000101327896,0.40049999952316284,0.4134999960660934,0.42099998891353607,0.4204999953508377,0.4280000030994415,0.44099999964237213,0.43799999356269836,0.44200000166893005,0.44600000977516174],"label":"word_lengths filter"},"filtering-c4-all":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.29950000345706934,0.33799999952316284,0.3789999932050705,0.3970000147819519,0.42149999737739563,0.431999996304512,0.4440000057220459,0.4490000009536743,0.45949999988079065,0.4714999943971634,0.48000000417232513,0.47749999165534973,0.48100000619888306,0.48950000107288355],"label":"All filters"},"filtering-c4-all-except-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.2955000102519989,0.3385000079870224,0.36800000071525574,0.40099999308586115,0.4099999964237213,0.41700001060962677,0.42400000989437103,0.4389999955892563,0.4414999932050705,0.4484999924898147,0.455499991774559,0.45799998939037323,0.4660000056028366,0.471000000834465],"label":"All filters except terminal_punct"},"sm-baseline-c4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.29699999094009394,0.3369999925295512,0.3699999948342641,0.3930000066757202,0.41233333945274353,0.42733333508173627,0.43799999356269836,0.4506666660308838,0.454666664203008,0.47166667381922406,0.47766666611035663,0.476666659116745,0.48366666833559663,0.4853333334128062],"label":"C4"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"C4 filtering effect on HellaSwag"}}}
 
 
data/plots/c4_filters_hellaswag/index.json DELETED
@@ -1,2 +0,0 @@
1
- {"files":{"agg_score":{"file":"agg_score.json"},"commonsense_qa/acc_norm":{"file":"commonsense_qa_acc_norm.json"},"hellaswag/acc_norm":{"file":"hellaswag_acc_norm.json"},"openbookqa/acc_norm":{"file":"openbookqa_acc_norm.json"},"piqa/acc_norm":{"file":"piqa_acc_norm.json"},"winogrande/acc_norm":{"file":"winogrande_acc_norm.json"},"arc/acc_norm":{"file":"arc_acc_norm.json"},"mmlu/acc_norm":{"file":"mmlu_acc_norm.json"}},"settings":{"slider":{"min":0,"max":10,"default":3}, "defaultMetric": "hellaswag/acc_norm"}}
2
-
 
 
 
data/plots/c4_filters_hellaswag/mmlu_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-60gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.25013685226440424,0.25661391019821167,0.2620016932487488,0.2657508552074432,0.2710244506597519,0.2744349539279938,0.27642421424388885,0.2818952649831772,0.2794509679079056,0.2831944525241852,0.28439727425575256,0.2866545617580414,0.2866020053625107,0.28615814447402954,0.2871949374675751],"label":"baseline"},"filtering-c4-curly_bracket":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501466572284698,0.25762456655502314,0.2630201578140259,0.2672136425971985,0.27234274148941034,0.2702306807041168,0.27446796000003815,0.27583475410938263,0.2770504504442215,0.2794356495141983,0.28302033245563507,0.28214274346828455,0.2855468988418579,0.2840581685304642,0.28505663573741913],"label":"curly_bracket filter"},"filtering-c4-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501466572284698,0.2583308666944504,0.2611347585916519,0.26333703100681305,0.2685028165578842,0.2725042402744293,0.27531248331069946,0.27463899552822113,0.2784048914909363,0.27915388345718384,0.27945026755332947,0.28207844495773315,0.281900018453598,0.2822476774454117,0.28188446164131165],"label":"terminal_punct filter"},"filtering-c4-word_lengths":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501466572284698,0.25205445289611816,0.2613788843154907,0.26891554892063135,0.2724043130874634,0.27449470758438105,0.27719296514987946,0.27587129175662994,0.2815589904785156,0.2833077013492584,0.2830233126878738,0.28461267054080963,0.2871275246143341,0.28650729358196253,0.2869933694601059],"label":"word_lengths filter"},"filtering-c4-all":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501466572284698,0.25806266069412226,0.26165445148944855,0.26727744936943054,0.2677594721317291,0.2689383774995804,0.2724889665842056,0.27308812737464905,0.27327476441860193,0.27370570600032806,0.277080088853836,0.27814342081546783,0.2782013118267059,0.27888238430023193,0.2795541882514953],"label":"All filters"},"filtering-c4-all-except-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501466572284698,0.25500668585300446,0.26221066713333124,0.26368947327136993,0.2702934741973877,0.27218967676162714,0.27553085982799524,0.27833363413810724,0.2786440253257751,0.2810910940170288,0.2834737300872803,0.2833452969789505,0.2836028486490249,0.28682972490787506,0.2868015915155411],"label":"All filters except terminal_punct"},"sm-baseline-c4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501466572284698,0.2557150324185689,0.25763070583343506,0.2643406589825948,0.26745049158732087,0.2721543808778127,0.2737567722797394,0.2732303539911906,0.27877557277679443,0.27923040588696796,0.2798382341861725,0.2831268608570099,0.28203009565671283,0.2810969154040019,0.28292057911554974],"label":"C4"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"C4 filtering effect on HellaSwag"}}}
 
 
data/plots/c4_filters_hellaswag/openbookqa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-60gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.2559999972581863,0.27699999511241913,0.288000002503395,0.2980000078678131,0.31199999153614044,0.29500000178813934,0.3139999955892563,0.31199999153614044,0.31200000643730164,0.3369999974966049,0.32899999618530273,0.3200000077486038,0.3310000002384186,0.3330000042915344],"label":"baseline"},"filtering-c4-curly_bracket":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.25800000131130213,0.29899999499320984,0.27900001406669617,0.296999990940094,0.2980000078678131,0.3149999976158142,0.3179999887943268,0.32500000298023224,0.3079999983310699,0.32900001108646393,0.32599999010562897,0.3190000057220459,0.3279999941587448,0.3229999989271164],"label":"curly_bracket filter"},"filtering-c4-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.26900000870227814,0.27400000393390656,0.2929999977350235,0.29600000381469727,0.306999996304512,0.3199999928474426,0.3190000057220459,0.31299999356269836,0.3229999989271164,0.3210000097751617,0.3270000070333481,0.3230000138282776,0.33399999141693115,0.3260000050067901],"label":"terminal_punct filter"},"filtering-c4-word_lengths":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.2690000087022781,0.27300000190734863,0.28599999845027924,0.28299999237060547,0.3050000071525574,0.30900000035762787,0.31199999153614044,0.3200000077486038,0.33200000226497645,0.31200000643730164,0.3230000138282776,0.32299999892711634,0.32899999618530273,0.3320000022649765],"label":"word_lengths filter"},"filtering-c4-all":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.2590000033378601,0.278999999165535,0.2979999929666519,0.29899999499320984,0.3270000070333481,0.32800000905990595,0.32899999618530273,0.3369999974966049,0.33200000226497645,0.3260000050067901,0.33599999547004694,0.335999995470047,0.33500000834465027,0.3330000042915344],"label":"All filters"},"filtering-c4-all-except-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.2500000074505806,0.2759999930858612,0.2800000011920929,0.29099999368190765,0.3070000112056732,0.3070000112056732,0.3229999989271164,0.3240000009536743,0.31700000166893005,0.3100000023841858,0.31300000846385956,0.31700000166893005,0.3100000023841858,0.3189999908208847],"label":"All filters except terminal_punct"},"sm-baseline-c4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.2526666720708211,0.26533332467079157,0.26600000262260437,0.29333333174387616,0.3059999942779541,0.30933333436648053,0.31600000460942584,0.31466667850812274,0.32933333516120905,0.3346666693687439,0.3366666634877522,0.3386666675408681,0.33799999952316284,0.33066666126251215],"label":"C4"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"C4 filtering effect on HellaSwag"}}}
 
 
data/plots/c4_filters_hellaswag/piqa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-60gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.6105000078678131,0.6350000202655792,0.6620000004768372,0.675000011920929,0.6940000057220459,0.6974999904632568,0.7054999768733978,0.7060000002384186,0.7059999704360962,0.7084999978542328,0.7060000002384186,0.7084999978542328,0.7144999802112579,0.7134999930858612],"label":"baseline"},"filtering-c4-curly_bracket":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.6149999797344208,0.6520000100135803,0.6789999902248383,0.69200000166893,0.6949999928474426,0.6955000162124634,0.7055000066757202,0.7150000035762787,0.7169999778270721,0.7184999883174896,0.7235000133514404,0.7240000069141388,0.723499983549118,0.7249999940395355],"label":"curly_bracket filter"},"filtering-c4-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.621999979019165,0.6549999713897705,0.6695000231266022,0.6860000193119049,0.6994999945163727,0.6980000138282776,0.7084999978542328,0.7120000123977661,0.7124999761581421,0.7160000205039978,0.7179999947547913,0.7195000052452087,0.7229999899864197,0.723499983549118],"label":"terminal_punct filter"},"filtering-c4-word_lengths":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.6229999959468842,0.6590000092983246,0.6714999973773956,0.6820000112056732,0.6949999928474426,0.6940000057220459,0.7064999938011169,0.7005000114440918,0.6989999711513519,0.7084999978542328,0.7060000002384186,0.7099999785423279,0.7160000205039978,0.7150000035762787],"label":"word_lengths filter"},"filtering-c4-all":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.6215000152587891,0.6580000221729279,0.6784999966621399,0.69200000166893,0.703499972820282,0.7029999792575836,0.710999995470047,0.7139999866485596,0.7179999947547913,0.7150000035762787,0.715499997138977,0.7184999883174896,0.7160000205039978,0.7224999964237213],"label":"All filters"},"filtering-c4-all-except-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.621999979019165,0.6520000100135803,0.6800000071525574,0.6895000040531158,0.6949999928474426,0.6990000009536743,0.7045000195503235,0.7114999890327454,0.710999995470047,0.7159999907016754,0.7199999988079071,0.7199999988079071,0.7204999923706055,0.7254999876022339],"label":"All filters except terminal_punct"},"sm-baseline-c4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.6196666558583578,0.6583333412806193,0.6833333373069763,0.6829999883969625,0.6983333230018616,0.702999989191691,0.7056666612625122,0.7076666553815206,0.7139999866485596,0.7209999958674113,0.7179999947547913,0.7273333470026652,0.7209999958674113,0.7273333271344503],"label":"C4"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"C4 filtering effect on HellaSwag"}}}
 
 
data/plots/c4_filters_hellaswag/winogrande_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-60gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.48950000107288355,0.48950000107288355,0.5049999952316284,0.5125000178813934,0.5004999935626984,0.5065000057220459,0.5055000185966492,0.511000007390976,0.5160000026226044,0.5209999978542328,0.5270000100135803,0.5219999849796295,0.5149999856948853,0.5125000178813934],"label":"baseline"},"filtering-c4-curly_bracket":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.48350000381469727,0.5024999976158142,0.5039999932050705,0.5049999952316284,0.5115000009536743,0.50450000166893,0.5120000243186951,0.5144999921321869,0.5194999873638153,0.5250000059604645,0.5170000195503235,0.5180000066757202,0.527999997138977,0.5259999930858612],"label":"curly_bracket filter"},"filtering-c4-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.49050000309944153,0.48900000751018524,0.5080000162124634,0.50450000166893,0.5185000002384186,0.5175000131130219,0.5099999904632568,0.526500016450882,0.5320000052452087,0.5230000019073486,0.5105000138282776,0.5214999914169312,0.523499995470047,0.5264999866485596],"label":"terminal_punct filter"},"filtering-c4-word_lengths":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.49050000309944153,0.49000000953674316,0.4999999850988388,0.4989999830722809,0.5115000009536743,0.5105000138282776,0.5069999992847443,0.5109999775886536,0.5164999961853027,0.5059999823570251,0.5129999816417694,0.5059999823570251,0.5115000009536743,0.5164999961853027],"label":"word_lengths filter"},"filtering-c4-all":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.4884999990463257,0.4989999979734421,0.5064999908208847,0.49800001084804535,0.5040000081062317,0.5139999985694885,0.5160000026226044,0.5109999775886536,0.5070000141859055,0.5115000009536743,0.5105000138282776,0.5175000131130219,0.5200000107288361,0.5135000050067902],"label":"All filters"},"filtering-c4-all-except-terminal_punct":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.49150000512599945,0.49900001287460327,0.49300000071525574,0.5015000104904175,0.5094999969005585,0.5109999775886536,0.5085000097751617,0.507500022649765,0.5205000042915344,0.5125000178813934,0.5160000026226044,0.5175000131130219,0.5150000154972076,0.5179999768733978],"label":"All filters except terminal_punct"},"sm-baseline-c4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.4933333396911621,0.48733333746592206,0.5056666731834412,0.5066666503747305,0.5116666754086813,0.5076666871706644,0.5213333169619242,0.5150000055631002,0.5183333357175192,0.5169999996821085,0.515333334604899,0.5193333427111307,0.5143333276112875,0.5196666717529297],"label":"C4"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"C4 filtering effect on HellaSwag"}}}
 
 
data/plots/cross_ind_unfiltered_comparison/agg_score.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3534814938902855,0.3764607086777687,0.38782499730587,0.3981050960719585,0.4028486795723438,0.4125883243978023,0.4117814563214779,0.414029736071825,0.4197172522544861,0.4211113378405571,0.4279881417751312,0.4280137903988361,0.4280424378812313,0.4291964024305343,0.4326301179826259,0.4371833503246307,0.4346669465303421,0.4336562640964985,0.4432648755609989,0.4401291646063328,0.4394684173166752,0.4476612061262131,0.4465444348752498,0.4472153298556804,0.4433343075215816,0.4510187618434429,0.4459567815065384,0.4460812956094742,0.4498684890568256,0.4529943652451038,0.4528274349868297,0.4551213420927524,0.4549156539142132,0.4564928151667118,0.4576693661510944,0.4557182416319847,0.4536240361630916,0.457439012825489,0.4570476822555065,0.4589823484420776,0.462024375796318,0.4540738053619861,0.4550252184271812,0.4576593860983848,0.4573238864541054,0.4575810581445694,0.4622134491801262,0.4592566937208175,0.4614734016358852,0.4637473002076149,0.4625372551381588,0.4613912180066108,0.4597448222339153,0.4594792164862156,0.4662549719214439,0.4634026065468788,0.4633508697152138,0.4635734222829342,0.4628961533308029,0.4670135043561458,0.4639505892992019,0.4631133340299129,0.4665167145431041,0.4672448337078094,0.4693268723785877,0.4630668573081493,0.4676454700529575,0.4646359197795391,0.4621579721570015,0.4692446552217006,0.4704835228621959,0.4663223996758461,0.4680556617677212,0.466339822858572,0.4682099223136902,0.4711195565760135,0.4722655527293682,0.4727961830794811,0.4676857478916645,0.4719390422105789,0.4713102728128433,0.4712141714990139,0.4721613004803657,0.4713456854224205,0.4682970903813839,0.4679934531450271,0.4685162976384163,0.4679946713149547,0.4681242071092129,0.4702276065945625,0.472664151340723,0.4730790853500366,0.4731674715876579,0.4718914777040481,0.4719801284372806,0.4761029370129108,0.4735167175531387,0.4730370938777923,0.4730173237621784,0.4735377207398414,0.4777223989367485,0.4796326830983162,0.4734170883893966,0.4739485755562782,0.4748299159109592,0.4765299335122108,0.4745025858283043,0.4754423759877682,0.4784592799842357,0.4761341325938701,0.4760282784700393,0.4769757278263569,0.47154351323843,0.4786738082766533,0.4804279990494251,0.4777076803147793,0.4798569902777672,0.4759011939167976,0.4784621745347976,0.479673832654953,0.4780617095530033,0.48076206818223,0.47995800152421,0.4790860973298549,0.4817167408764362,0.4811586998403072,0.482547752559185,0.4816697351634502,0.4809327870607376,0.4816545359790325,0.4804601892828941,0.4776877984404564,0.4813711903989315,0.4844604581594467,0.4819537848234176,0.4820829331874847,0.4778126627206802,0.482935007661581,0.48230691999197,0.4826001971960068,0.4823969900608063,0.4811219945549965,0.4789146520197391,0.484035175293684,0.4848698377609253,0.4855728335678577,0.4825376532971859,0.485215101391077,0.4824351668357849,0.4835342466831207,0.4822137206792831,0.4838785007596016,0.4837255179882049,0.4853012599050998,0.4857851006090641,0.4863366298377514,0.4856646582484245,0.4842503517866134,0.4838776960968971,0.4846346862614155,0.4837041422724724,0.4813097268342972,0.4873070046305656,0.4841253720223903,0.4837464913725853,0.483069509267807,0.4851242564618587,0.4861010462045669],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3551952373236418,0.3736435137689113,0.3814037963747978,0.3948809280991554,0.3996850810945034,0.4089604057371616,0.4100853353738785,0.4119834117591381,0.4168377220630646,0.4186493046581745,0.4169826358556747,0.4234288297593593,0.4229162000119686,0.4273439794778824,0.4290364980697632,0.4291782416403293,0.4296907968819141,0.4311576783657074,0.4326641112565994,0.430318683385849,0.430436260998249,0.4339037239551544,0.4363459683954716,0.4357402548193931,0.4342963136732578,0.4366712383925915,0.4363959729671478,0.436981026083231,0.4447868093848228,0.4411709941923618,0.4406092017889023,0.4424176625907421,0.4423875361680984,0.4422253370285034,0.4410557933151722,0.4447037056088447,0.4454837813973427,0.4435960277915001,0.4468514993786812,0.4479999616742134,0.4428562931716442,0.445764634758234,0.4456562362611294,0.4488007053732872,0.4475954286754131,0.4468922987580299,0.4548408314585686,0.4511027485132217,0.4530330970883369,0.4483681954443455,0.4531726539134979,0.45334542542696,0.4544384703040123,0.4530758671462536,0.4540613554418087,0.4510113634169101,0.4538320265710354,0.4518541917204857,0.4536847211420536,0.4532708041369915,0.4552236869931221,0.455034039914608,0.4562875479459762,0.4532428197562694,0.4574853852391243,0.4517738744616508,0.4579889141023159,0.4538268558681011,0.456730306148529,0.4526018649339676,0.4562746733427048,0.4560015797615051,0.4555426277220249,0.4561501257121563,0.4524396173655987,0.4557023830711841,0.4589769169688225,0.4581078588962555,0.4620813727378845,0.4586601965129375,0.4568093195557594,0.4569808952510357,0.4567535072565079,0.4575250148773193,0.4606908001005649,0.4603964723646641,0.4622848592698574,0.4594669193029403,0.4640629850327968,0.4604269936680794,0.4634841009974479,0.4644578285515308,0.4642514958977699,0.4666304066777229,0.4616626128554344,0.4588956907391548,0.4620226770639419,0.4628621749579906,0.4595407098531723,0.4635516740381717,0.46005355194211,0.4601523540914058,0.4644204638898372,0.4620639197528362,0.46614545956254,0.4636696502566337,0.4610077403485775,0.4640897810459137,0.4636163525283336,0.4630545899271965,0.466012816876173,0.4650349207222461,0.4613720141351223,0.4644323363900184,0.4647249802947044,0.4656480401754379,0.4651664271950722,0.4622530452907085,0.4655019529163837,0.4650313258171081,0.466718140989542,0.4661559611558914,0.4661237150430679,0.4664223715662956,0.4640601389110088,0.4642657749354839,0.4633881188929081,0.4629989042878151,0.4685831367969513,0.4675870984792709,0.467183344066143,0.4678030684590339,0.4660939238965511,0.4691914953291416,0.4670972637832165,0.468262892216444,0.4672016054391861,0.4676182121038437,0.4698677137494087,0.4658828042447567,0.4701816700398922,0.4684622809290886,0.466015312820673,0.4675401039421558,0.4693200923502445,0.4702670983970165,0.4679145030677318,0.4676233418285846,0.4674933589994907,0.4678357951343059,0.4669915996491909,0.4657857678830623,0.4666901864111423,0.4669371582567692,0.4672787226736545,0.4684535376727581,0.4685697965323925,0.4694835692644119,0.4683254994451999,0.4712230190634727,0.4683987610042095,0.4707653746008873,0.4663059376180172,0.4683133698999882,0.4686385430395603,0.4657671600580215,0.4692615270614624],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3605199865996837,0.3733148723840713,0.3882005847990513,0.3934122696518898,0.3947227671742439,0.4042885974049568,0.3974800482392311,0.4055779427289963,0.4133470430970192,0.4117913842201233,0.4113653488457203,0.4149517640471458,0.4187851920723915,0.4252083078026771,0.4206527359783649,0.4240428246557712,0.422003373503685,0.4280910938978195,0.4244147576391697,0.4316282644867897,0.4295645765960216,0.4310102686285972,0.4360743537545204,0.4313482865691185,0.4350991360843181,0.4378576353192329,0.4335876516997814,0.4347924515604973,0.4348904751241207,0.436600212007761,0.430036511272192,0.4350974671542644,0.4399556629359722,0.4371416717767715,0.4363861419260502,0.4376698136329651,0.4405004419386387,0.4373639523983001,0.4379038028419018,0.4371281825006008,0.4393439553678036,0.440426729619503,0.4401675276458263,0.4429537951946258,0.4449137263000011,0.4434786736965179,0.4450470842421055,0.4454202279448509,0.4394537284970283,0.442185215651989,0.4461225643754005,0.4427758157253265,0.4430646039545536,0.4476901069283485,0.4478763341903686,0.4493869319558143,0.4448477327823639,0.450044184923172,0.4498609118163585,0.4457665979862213,0.4506924152374267,0.449855338782072,0.448790930211544,0.4474099352955818,0.4546772800385952,0.4529431238770485,0.452015146613121,0.4502020999789238,0.4493804536759853,0.4523266032338142,0.4551868587732315,0.4501944817602634,0.4493303671479225,0.4526805207133293,0.4533850513398647,0.4518048763275146,0.4518973492085933,0.4531301632523536,0.4518006071448326,0.4553494565188885,0.4528752230107784,0.4536322727799415,0.4561733976006508,0.4549491256475448,0.4574789106845855,0.4577847123146057,0.4563642293214798,0.4578686729073524,0.4561499990522861,0.4537816494703293,0.4542164430022239,0.4559455662965774,0.4554723873734474,0.4575514122843742,0.4575202167034149,0.4592722058296203,0.4585275091230869,0.4580587856471538,0.456934317946434,0.4577495418488979,0.4540119916200638,0.4570806957781315,0.4608120545744896,0.4588425755500793,0.4578334167599678,0.4610816091299057,0.4598177038133144,0.461849745362997,0.4631866924464702,0.4601576402783394,0.4646804705262184,0.4632389545440674,0.4604574106633663,0.4602976888418197,0.4581312239170074,0.4654182009398937,0.4655338563024997,0.4616620391607284,0.461054053157568,0.4613021649420261,0.4658613465726375,0.4633531905710697,0.4613638147711754,0.4643996246159076,0.462500050663948,0.4650798961520195,0.4648764543235302,0.4639869071543216,0.4634246975183487,0.46585888043046,0.4639799632132053,0.4630857892334461,0.4644265696406364,0.4642998576164245,0.4686848931014538,0.4687492996454239,0.4650243632495403,0.4627032242715359,0.4665953740477562,0.4660026729106903,0.4664581045508384,0.4676475040614605,0.4657339677214622,0.4664678275585174,0.4673498086631298,0.4676674827933311,0.4680955372750759,0.4681585058569908,0.4659864418208599,0.4686457589268684,0.4661462865769863,0.4658931568264961,0.4674226939678192,0.46805215254426,0.4682257212698459,0.4689070098102093,0.4699570722877979,0.4655096270143986,0.4688013233244419,0.4707522802054882,0.4661469310522079,0.4688841328024864,0.4671329781413078,0.4662554152309894,0.4697433896362781,0.4698473587632179,0.4676505327224731,0.4696521013975143],"label":"FineWeb filtered only"},"big-run-sampled_full_ind_minhash":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3608616776764393,0.3745453506708145,0.3862277194857597,0.3989979773759842,0.406296543776989,0.4094927236437797,0.4138859286904335,0.4177777022123337,0.4208802655339241,0.4254550077021122,0.4283009432256222,0.429458349943161,0.4330311268568039,0.4303463362157345,0.4349483698606491,0.4348161295056343,0.438955657184124,0.4389265701174736,0.4393925778567791,0.4383306242525577,0.4436748661100864,0.4423373565077781,0.4460027255117893,0.4440812170505523,0.4476902261376381,0.4465879611670971,0.4497823156416416,0.4513350501656532,0.4518667235970497,0.45149727165699,0.4513994492590427,0.4521937072277069,0.4520382955670357,0.4530793912708759,0.4516105614602566,0.4530563354492187,0.4495660625398159,0.4520940892398357,0.4561133235692978,0.4522969461977482,0.4575686641037464,0.4589144177734852,0.4582882039248943,0.457970168441534,0.4554797261953354,0.4622044861316681,0.4596928395330906,0.4624353349208832,0.4619148448109627,0.461100060492754,0.458431463688612,0.4620467089116573,0.4562215581536293,0.4620163068175316,0.4631462283432483,0.4600549824535846,0.4620365314185619,0.458735141903162,0.461642112582922,0.461245734244585,0.4645131677389145,0.4629777930676937,0.4651660025119781,0.4653937108814716,0.4676259346306324,0.4667201824486255,0.4650012850761413,0.4676916748285293,0.4708514772355556,0.4673572592437267,0.4689626581966877,0.4678038358688354,0.4667215310037136,0.4646228328347206,0.4662510119378567,0.4674677737057209,0.4690804108977318,0.4634581170976162,0.4701276533305645,0.4676450751721859,0.4672758504748344,0.4674397967755794,0.4656238108873367,0.4690065123140812,0.4677213467657566,0.4678985886275768,0.4735414572060108,0.4705612398684025,0.4703374318778515,0.4704933613538742,0.4688010476529598,0.4699571952223778,0.4674785658717155,0.4701188169419765,0.4682065695524215,0.4729971997439861,0.4748715870082378,0.4745333231985569,0.4737020246684551,0.4747246317565441,0.4771635122597217,0.4740425907075405,0.475264236330986,0.4744705818593502,0.474684040993452,0.4721556939184665,0.475641455501318,0.476833701133728,0.4746401384472847,0.4742486327886581,0.4730467088520527,0.4773029200732708,0.4760043211281299,0.4770320989191532,0.4742161482572555,0.4780259765684604,0.4806670732796192,0.4784667380154133,0.4788618609309196,0.4762138128280639,0.4777246937155723,0.4796081893146038,0.4798486456274986,0.475479181855917,0.4779988899827003,0.4765858314931392,0.4772914499044418,0.47843898832798,0.4799034222960472,0.4803600236773491,0.4751846008002758,0.4777872562408447,0.4779460839927196,0.4787487275898456,0.4808406494557857,0.4810357913374901,0.4797308407723903,0.4800078608095646,0.4806460626423359,0.4810502976179123,0.4797912389039993,0.477332629263401,0.4818884879350662,0.482621606439352,0.4833096489310264,0.4821632876992225,0.4831674285233021,0.4830279909074306,0.4849893450736999,0.4845218025147915,0.4825541749596596,0.4833571836352348,0.4853803217411041,0.483093187212944,0.4850797094404697,0.485261783003807,0.4837660938501358,0.4835929833352566,0.4855643883347511,0.4832059442996979,0.484714712947607,0.4839249886572361,0.4829078912734985,0.4818423055112362,0.482727088034153,0.4824129492044449,0.4820138849318027,0.4865870922803879],"label":"FineWeb independent MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"}},"title":{"text":"Independent dedup outperforms dedup across dumps"}}}
 
 
data/plots/cross_ind_unfiltered_comparison/arc_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2899999916553497,0.31700000166893,0.3409999907016754,0.3425000011920929,0.3485000133514404,0.3555000126361847,0.3574999868869781,0.3585000038146972,0.363999992609024,0.3619999885559082,0.3675000071525574,0.3865000009536743,0.3810000121593475,0.3740000128746032,0.3810000121593475,0.3810000121593475,0.3860000073909759,0.3810000121593475,0.3894999921321869,0.3849999904632568,0.3855000138282776,0.3989999890327453,0.3980000019073486,0.3995000123977661,0.395000010728836,0.4084999859333038,0.4040000140666961,0.4004999995231628,0.3955000042915344,0.4135000109672546,0.4070000052452087,0.4104999899864197,0.4014999866485595,0.4099999964237213,0.4199999868869781,0.414000004529953,0.402999997138977,0.4214999973773956,0.4095000028610229,0.4059999883174896,0.4090000092983246,0.4074999988079071,0.4120000004768371,0.4154999852180481,0.4189999997615814,0.4149999916553497,0.429500013589859,0.4154999852180481,0.4214999973773956,0.4244999885559082,0.4205000102519989,0.4269999861717224,0.4214999973773956,0.4180000126361847,0.4415000081062317,0.4320000112056732,0.4350000023841858,0.4259999990463257,0.4300000071525574,0.4259999990463257,0.4189999997615814,0.4269999861717224,0.4199999868869781,0.426499992609024,0.4350000023841858,0.4289999902248382,0.4345000088214874,0.4259999990463257,0.426499992609024,0.4395000040531158,0.4395000040531158,0.4359999895095825,0.4280000030994415,0.4370000064373016,0.4329999983310699,0.4309999942779541,0.4490000009536743,0.4399999976158142,0.4339999854564667,0.4399999976158142,0.4345000088214874,0.429500013589859,0.4370000064373016,0.4379999935626983,0.4284999966621399,0.4309999942779541,0.4350000023841858,0.4399999976158142,0.4314999878406524,0.4300000071525574,0.4410000145435333,0.4345000088214874,0.4410000145435333,0.4345000088214874,0.4339999854564667,0.4460000097751617,0.4410000145435333,0.4469999969005584,0.4480000138282776,0.4435000121593475,0.4375,0.4519999921321869,0.4480000138282776,0.4429999887943268,0.4519999921321869,0.4435000121593475,0.4334999918937683,0.4460000097751617,0.4564999938011169,0.4469999969005584,0.453000009059906,0.4485000073909759,0.4410000145435333,0.4444999992847442,0.4485000073909759,0.457500010728836,0.4469999969005584,0.4535000026226043,0.4535000026226043,0.4485000073909759,0.4490000009536743,0.4505000114440918,0.4595000147819519,0.4544999897480011,0.453000009059906,0.4605000019073486,0.4620000123977661,0.457500010728836,0.453000009059906,0.4550000131130218,0.460999995470047,0.4449999928474426,0.4474999904632568,0.457500010728836,0.4584999978542328,0.4494999945163727,0.4474999904632568,0.4625000059604645,0.4639999866485595,0.4555000066757202,0.4469999969005584,0.4600000083446502,0.453000009059906,0.4629999995231628,0.4589999914169311,0.4614999890327453,0.4555000066757202,0.4560000002384186,0.4580000042915344,0.4584999978542328,0.4560000002384186,0.4605000019073486,0.4595000147819519,0.4639999866485595,0.4614999890327453,0.4564999938011169,0.4634999930858612,0.4625000059604645,0.4614999890327453,0.4679999947547912,0.4584999978542328,0.4595000147819519,0.4505000114440918,0.4544999897480011,0.4595000147819519,0.4620000123977661,0.4670000076293945,0.4555000066757202],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2904999852180481,0.3289999961853027,0.3379999995231628,0.3400000035762787,0.3535000085830688,0.3700000047683716,0.3619999885559082,0.3695000112056732,0.3625000119209289,0.3745000064373016,0.3804999887943268,0.3835000097751617,0.3810000121593475,0.3785000145435333,0.3799999952316284,0.3885000050067901,0.3919999897480011,0.3899999856948852,0.3939999938011169,0.4004999995231628,0.3889999985694885,0.4000000059604645,0.3930000066757202,0.4025000035762787,0.398499995470047,0.3939999938011169,0.3989999890327453,0.4020000100135803,0.4079999923706054,0.4129999876022339,0.4014999866485595,0.4129999876022339,0.4079999923706054,0.4115000069141388,0.4070000052452087,0.4095000028610229,0.4199999868869781,0.4165000021457672,0.4239999949932098,0.4129999876022339,0.4034999907016754,0.4050000011920929,0.4135000109672546,0.4189999997615814,0.418500006198883,0.4199999868869781,0.4365000128746032,0.4320000112056732,0.4255000054836273,0.4259999990463257,0.4244999885559082,0.4275000095367431,0.4259999990463257,0.4210000038146972,0.421999990940094,0.4099999964237213,0.4305000007152557,0.4239999949932098,0.4194999933242798,0.4205000102519989,0.4255000054836273,0.414000004529953,0.4210000038146972,0.4180000126361847,0.4429999887943268,0.429500013589859,0.4165000021457672,0.4239999949932098,0.4255000054836273,0.4180000126361847,0.4325000047683716,0.4305000007152557,0.4329999983310699,0.4325000047683716,0.4320000112056732,0.4375,0.4410000145435333,0.4395000040531158,0.4379999935626983,0.4280000030994415,0.4365000128746032,0.4205000102519989,0.426499992609024,0.4280000030994415,0.4354999959468841,0.4314999878406524,0.429500013589859,0.421999990940094,0.4345000088214874,0.429500013589859,0.4354999959468841,0.4314999878406524,0.4404999911785126,0.4384999871253967,0.4359999895095825,0.4345000088214874,0.4320000112056732,0.4345000088214874,0.4375,0.4410000145435333,0.4280000030994415,0.4320000112056732,0.44200000166893,0.4460000097751617,0.4390000104904175,0.4314999878406524,0.4339999854564667,0.4390000104904175,0.4460000097751617,0.4309999942779541,0.4444999992847442,0.44200000166893,0.4404999911785126,0.4395000040531158,0.4370000064373016,0.4519999921321869,0.4429999887943268,0.4395000040531158,0.4415000081062317,0.4384999871253967,0.4494999945163727,0.4469999969005584,0.4375,0.4395000040531158,0.4345000088214874,0.4390000104904175,0.4375,0.4309999942779541,0.4320000112056732,0.4415000081062317,0.4354999959468841,0.445499986410141,0.4404999911785126,0.4429999887943268,0.4395000040531158,0.4354999959468841,0.4429999887943268,0.4410000145435333,0.4494999945163727,0.4429999887943268,0.4460000097751617,0.445499986410141,0.4429999887943268,0.4429999887943268,0.4350000023841858,0.4474999904632568,0.4415000081062317,0.4424999952316284,0.4375,0.4444999992847442,0.4424999952316284,0.4354999959468841,0.445499986410141,0.4379999935626983,0.4449999928474426,0.4365000128746032,0.4474999904632568,0.4440000057220459,0.4465000033378601,0.445499986410141,0.4474999904632568,0.4494999945163727,0.4449999928474426,0.4444999992847442,0.44200000166893,0.4345000088214874,0.4404999911785126],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2894999980926513,0.3235000073909759,0.3389999866485595,0.3384999930858612,0.3459999859333038,0.359499990940094,0.3429999947547912,0.3619999885559082,0.3564999997615814,0.3625000119209289,0.363999992609024,0.3680000007152557,0.3680000007152557,0.3785000145435333,0.3684999942779541,0.375,0.3734999895095825,0.3849999904632568,0.3944999873638153,0.3865000009536743,0.395000010728836,0.3935000002384186,0.3980000019073486,0.3910000026226043,0.3885000050067901,0.3914999961853027,0.3815000057220459,0.395000010728836,0.3894999921321869,0.395000010728836,0.3935000002384186,0.4034999907016754,0.4004999995231628,0.3970000147819519,0.3975000083446502,0.3995000123977661,0.3980000019073486,0.4034999907016754,0.3959999978542328,0.3989999890327453,0.402999997138977,0.3880000114440918,0.3980000019073486,0.4040000140666961,0.3989999890327453,0.3970000147819519,0.3925000131130218,0.4120000004768371,0.3935000002384186,0.395000010728836,0.4070000052452087,0.3935000002384186,0.4034999907016754,0.4189999997615814,0.4129999876022339,0.4160000085830688,0.4149999916553497,0.418500006198883,0.4225000143051147,0.4174999892711639,0.4210000038146972,0.4045000076293945,0.4079999923706054,0.4124999940395355,0.4144999980926513,0.4169999957084656,0.4194999933242798,0.4154999852180481,0.4169999957084656,0.4225000143051147,0.4225000143051147,0.4230000078678131,0.4160000085830688,0.4325000047683716,0.4325000047683716,0.4199999868869781,0.4199999868869781,0.4189999997615814,0.4269999861717224,0.4259999990463257,0.4230000078678131,0.4144999980926513,0.4329999983310699,0.4275000095367431,0.4305000007152557,0.4289999902248382,0.4235000014305115,0.4235000014305115,0.4325000047683716,0.4244999885559082,0.4314999878406524,0.4194999933242798,0.4350000023841858,0.4269999861717224,0.4235000014305115,0.4300000071525574,0.4284999966621399,0.4255000054836273,0.4280000030994415,0.4345000088214874,0.4225000143051147,0.4334999918937683,0.4300000071525574,0.4350000023841858,0.429500013589859,0.4325000047683716,0.4384999871253967,0.4345000088214874,0.4354999959468841,0.4359999895095825,0.4354999959468841,0.4424999952316284,0.4424999952316284,0.4320000112056732,0.4280000030994415,0.4390000104904175,0.4480000138282776,0.4415000081062317,0.4384999871253967,0.4390000104904175,0.4494999945163727,0.4449999928474426,0.4384999871253967,0.4424999952316284,0.4359999895095825,0.445499986410141,0.4399999976158142,0.4375,0.4410000145435333,0.4384999871253967,0.4375,0.4329999983310699,0.4370000064373016,0.4354999959468841,0.4440000057220459,0.4384999871253967,0.4384999871253967,0.4390000104904175,0.4424999952316284,0.4379999935626983,0.4345000088214874,0.4354999959468841,0.4440000057220459,0.4395000040531158,0.4465000033378601,0.4404999911785126,0.4505000114440918,0.4480000138282776,0.4449999928474426,0.445499986410141,0.4410000145435333,0.4485000073909759,0.4460000097751617,0.4480000138282776,0.4465000033378601,0.4460000097751617,0.4460000097751617,0.4395000040531158,0.4474999904632568,0.4469999969005584,0.4404999911785126,0.4440000057220459,0.4435000121593475,0.4435000121593475,0.4514999985694885,0.4474999904632568,0.4474999904632568,0.445499986410141],"label":"FineWeb filtered only"},"big-run-sampled_full_ind_minhash":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2939999997615814,0.3174999952316284,0.3294999897480011,0.3510000109672546,0.3485000133514404,0.3634999990463257,0.3700000047683716,0.3524999916553497,0.375,0.3804999887943268,0.37950000166893,0.3824999928474426,0.3799999952316284,0.3865000009536743,0.395000010728836,0.3844999969005584,0.3894999921321869,0.3855000138282776,0.3955000042915344,0.3995000123977661,0.4009999930858612,0.3939999938011169,0.3970000147819519,0.3955000042915344,0.3955000042915344,0.4079999923706054,0.3959999978542328,0.4090000092983246,0.4045000076293945,0.3930000066757202,0.4099999964237213,0.4054999947547912,0.4124999940395355,0.4160000085830688,0.4149999916553497,0.4070000052452087,0.4110000133514404,0.4144999980926513,0.4120000004768371,0.4050000011920929,0.4165000021457672,0.4180000126361847,0.4050000011920929,0.4120000004768371,0.4135000109672546,0.4320000112056732,0.4284999966621399,0.4269999861717224,0.414000004529953,0.4255000054836273,0.4165000021457672,0.4144999980926513,0.4079999923706054,0.4205000102519989,0.4180000126361847,0.4244999885559082,0.4235000014305115,0.4244999885559082,0.4300000071525574,0.4160000085830688,0.4205000102519989,0.4329999983310699,0.4280000030994415,0.4244999885559082,0.4375,0.4244999885559082,0.4365000128746032,0.4329999983310699,0.4424999952316284,0.4390000104904175,0.4449999928474426,0.445499986410141,0.4320000112056732,0.4365000128746032,0.4244999885559082,0.429500013589859,0.4395000040531158,0.4284999966621399,0.44200000166893,0.4370000064373016,0.4399999976158142,0.4334999918937683,0.4429999887943268,0.44200000166893,0.4334999918937683,0.4384999871253967,0.4365000128746032,0.4390000104904175,0.4354999959468841,0.44200000166893,0.4350000023841858,0.4390000104904175,0.4404999911785126,0.4410000145435333,0.4305000007152557,0.4490000009536743,0.4510000050067901,0.4605000019073486,0.4490000009536743,0.449999988079071,0.4595000147819519,0.4514999985694885,0.4490000009536743,0.4474999904632568,0.4444999992847442,0.4524999856948852,0.4465000033378601,0.4519999921321869,0.4550000131130218,0.4524999856948852,0.4429999887943268,0.4550000131130218,0.4510000050067901,0.4560000002384186,0.4465000033378601,0.4485000073909759,0.4524999856948852,0.4440000057220459,0.457500010728836,0.4544999897480011,0.4480000138282776,0.4584999978542328,0.4544999897480011,0.4569999873638153,0.4584999978542328,0.4444999992847442,0.4629999995231628,0.457500010728836,0.4555000066757202,0.4569999873638153,0.4474999904632568,0.4564999938011169,0.4595000147819519,0.4634999930858612,0.4555000066757202,0.453000009059906,0.457500010728836,0.4614999890327453,0.460999995470047,0.4539999961853027,0.4595000147819519,0.4629999995231628,0.4670000076293945,0.4580000042915344,0.4639999866485595,0.457500010728836,0.4595000147819519,0.4665000140666961,0.4584999978542328,0.4629999995231628,0.4595000147819519,0.4659999907016754,0.4645000100135803,0.4675000011920929,0.4690000116825104,0.4715000092983246,0.4634999930858612,0.4634999930858612,0.4639999866485595,0.465499997138977,0.4675000011920929,0.4670000076293945,0.4600000083446502,0.4595000147819519,0.4625000059604645,0.4600000083446502,0.4645000100135803,0.4715000092983246],"label":"FineWeb independent MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"}},"title":{"text":"Independent dedup outperforms dedup across dumps"}}}
 
 
data/plots/cross_ind_unfiltered_comparison/commonsense_qa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2529999911785126,0.2800000011920929,0.2870000004768371,0.3179999887943268,0.3129999935626983,0.3210000097751617,0.3160000145435333,0.3210000097751617,0.31700000166893,0.3330000042915344,0.3389999866485595,0.3289999961853027,0.3429999947547912,0.3370000123977661,0.3379999995231628,0.3459999859333038,0.3490000069141388,0.3470000028610229,0.3600000143051147,0.3569999933242798,0.3449999988079071,0.3650000095367431,0.3499999940395355,0.3540000021457672,0.3569999933242798,0.3619999885559082,0.3619999885559082,0.3580000102519989,0.3740000128746032,0.3709999918937683,0.3720000088214874,0.3759999871253967,0.3720000088214874,0.3659999966621399,0.3790000081062317,0.3610000014305115,0.3650000095367431,0.3650000095367431,0.3720000088214874,0.3729999959468841,0.3790000081062317,0.3680000007152557,0.3659999966621399,0.3680000007152557,0.3619999885559082,0.3619999885559082,0.3729999959468841,0.3720000088214874,0.3650000095367431,0.3759999871253967,0.367000013589859,0.3650000095367431,0.3680000007152557,0.3580000102519989,0.3589999973773956,0.3700000047683716,0.3680000007152557,0.367000013589859,0.3709999918937683,0.3880000114440918,0.3810000121593475,0.375,0.4040000140666961,0.3860000073909759,0.3840000033378601,0.3779999911785126,0.3729999959468841,0.3720000088214874,0.3799999952316284,0.3799999952316284,0.3779999911785126,0.3689999878406524,0.3770000040531158,0.3740000128746032,0.3819999992847442,0.3899999856948852,0.3799999952316284,0.3919999897480011,0.3720000088214874,0.3770000040531158,0.3930000066757202,0.3849999904632568,0.3899999856948852,0.3740000128746032,0.3740000128746032,0.3799999952316284,0.3779999911785126,0.3880000114440918,0.3709999918937683,0.3810000121593475,0.3880000114440918,0.3980000019073486,0.3819999992847442,0.3849999904632568,0.3810000121593475,0.3819999992847442,0.3889999985694885,0.3840000033378601,0.3910000026226043,0.3899999856948852,0.3959999978542328,0.3880000114440918,0.3869999945163727,0.3779999911785126,0.3819999992847442,0.3919999897480011,0.3849999904632568,0.3860000073909759,0.3919999897480011,0.3819999992847442,0.3819999992847442,0.3889999985694885,0.3889999985694885,0.3860000073909759,0.3880000114440918,0.3889999985694885,0.3939999938011169,0.3899999856948852,0.3869999945163727,0.3910000026226043,0.3910000026226043,0.3910000026226043,0.3970000147819519,0.3970000147819519,0.3970000147819519,0.3970000147819519,0.3939999938011169,0.4000000059604645,0.3970000147819519,0.402999997138977,0.3959999978542328,0.3959999978542328,0.4000000059604645,0.4040000140666961,0.4020000100135803,0.3989999890327453,0.3919999897480011,0.3930000066757202,0.3930000066757202,0.3980000019073486,0.4000000059604645,0.395000010728836,0.3899999856948852,0.4059999883174896,0.4020000100135803,0.4020000100135803,0.4059999883174896,0.3970000147819519,0.4110000133514404,0.4050000011920929,0.4000000059604645,0.4090000092983246,0.3989999890327453,0.402999997138977,0.4009999930858612,0.3980000019073486,0.4090000092983246,0.4079999923706054,0.4079999923706054,0.4020000100135803,0.402999997138977,0.402999997138977,0.4059999883174896,0.4040000140666961,0.4059999883174896,0.3989999890327453,0.4070000052452087,0.4059999883174896],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2540000081062317,0.2870000004768371,0.2829999923706054,0.3210000097751617,0.3079999983310699,0.3230000138282776,0.3179999887943268,0.3160000145435333,0.3289999961853027,0.3199999928474426,0.324999988079071,0.3310000002384186,0.3260000050067901,0.335999995470047,0.335999995470047,0.3310000002384186,0.335999995470047,0.3339999914169311,0.3459999859333038,0.3330000042915344,0.3449999988079071,0.3429999947547912,0.3479999899864197,0.3420000076293945,0.3479999899864197,0.3459999859333038,0.3339999914169311,0.3350000083446502,0.3519999980926513,0.3440000116825104,0.3490000069141388,0.3379999995231628,0.3420000076293945,0.3610000014305115,0.3409999907016754,0.356000006198883,0.3630000054836273,0.3519999980926513,0.3510000109672546,0.3619999885559082,0.3569999933242798,0.3479999899864197,0.3529999852180481,0.3569999933242798,0.3529999852180481,0.3519999980926513,0.3549999892711639,0.356000006198883,0.3499999940395355,0.3479999899864197,0.3619999885559082,0.3459999859333038,0.3519999980926513,0.3529999852180481,0.3680000007152557,0.3519999980926513,0.3580000102519989,0.3549999892711639,0.3490000069141388,0.3499999940395355,0.3600000143051147,0.3709999918937683,0.3659999966621399,0.3569999933242798,0.3510000109672546,0.3600000143051147,0.367000013589859,0.3529999852180481,0.363999992609024,0.3630000054836273,0.3619999885559082,0.356000006198883,0.367000013589859,0.3600000143051147,0.3540000021457672,0.3589999973773956,0.3610000014305115,0.356000006198883,0.3680000007152557,0.3519999980926513,0.3549999892711639,0.3479999899864197,0.3549999892711639,0.3519999980926513,0.367000013589859,0.3600000143051147,0.3600000143051147,0.3680000007152557,0.356000006198883,0.3610000014305115,0.3689999878406524,0.367000013589859,0.3689999878406524,0.3720000088214874,0.3680000007152557,0.3569999933242798,0.3650000095367431,0.363999992609024,0.3610000014305115,0.3709999918937683,0.3569999933242798,0.3540000021457672,0.3619999885559082,0.3549999892711639,0.3650000095367431,0.3680000007152557,0.3589999973773956,0.356000006198883,0.3610000014305115,0.3619999885559082,0.3740000128746032,0.3700000047683716,0.3650000095367431,0.3819999992847442,0.3770000040531158,0.3810000121593475,0.3729999959468841,0.3680000007152557,0.3689999878406524,0.3740000128746032,0.3779999911785126,0.3720000088214874,0.3740000128746032,0.367000013589859,0.363999992609024,0.367000013589859,0.3689999878406524,0.3709999918937683,0.3709999918937683,0.375,0.3680000007152557,0.375,0.3630000054836273,0.3720000088214874,0.3819999992847442,0.3729999959468841,0.3689999878406524,0.363999992609024,0.3709999918937683,0.3659999966621399,0.3700000047683716,0.367000013589859,0.3709999918937683,0.3759999871253967,0.3759999871253967,0.3729999959468841,0.3729999959468841,0.3729999959468841,0.3779999911785126,0.375,0.3700000047683716,0.3659999966621399,0.3759999871253967,0.3779999911785126,0.3709999918937683,0.3840000033378601,0.3720000088214874,0.375,0.367000013589859,0.3770000040531158,0.3709999918937683,0.375,0.3709999918937683,0.3740000128746032,0.3740000128746032,0.375,0.3770000040531158],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2599999904632568,0.277999997138977,0.2910000085830688,0.3070000112056732,0.3140000104904175,0.3019999861717224,0.3059999942779541,0.3210000097751617,0.3230000138282776,0.324999988079071,0.3149999976158142,0.3109999895095825,0.3339999914169311,0.3289999961853027,0.3319999873638153,0.3319999873638153,0.3300000131130218,0.3370000123977661,0.3219999969005584,0.3370000123977661,0.328000009059906,0.3339999914169311,0.3420000076293945,0.3400000035762787,0.3440000116825104,0.3510000109672546,0.3409999907016754,0.3449999988079071,0.3339999914169311,0.3540000021457672,0.3339999914169311,0.3470000028610229,0.3470000028610229,0.3440000116825104,0.3589999973773956,0.3569999933242798,0.3630000054836273,0.3549999892711639,0.3589999973773956,0.3449999988079071,0.3549999892711639,0.3449999988079071,0.3389999866485595,0.3499999940395355,0.3610000014305115,0.3619999885559082,0.3600000143051147,0.3519999980926513,0.3479999899864197,0.356000006198883,0.3519999980926513,0.3440000116825104,0.3490000069141388,0.3519999980926513,0.3470000028610229,0.3589999973773956,0.3449999988079071,0.3490000069141388,0.356000006198883,0.3619999885559082,0.3569999933242798,0.3659999966621399,0.3610000014305115,0.3549999892711639,0.3700000047683716,0.363999992609024,0.3600000143051147,0.3580000102519989,0.3549999892711639,0.3619999885559082,0.3689999878406524,0.3630000054836273,0.363999992609024,0.3700000047683716,0.367000013589859,0.3630000054836273,0.3630000054836273,0.3700000047683716,0.3589999973773956,0.3540000021457672,0.3540000021457672,0.3659999966621399,0.3619999885559082,0.3589999973773956,0.3650000095367431,0.3709999918937683,0.3680000007152557,0.3689999878406524,0.3650000095367431,0.3729999959468841,0.3619999885559082,0.3689999878406524,0.3569999933242798,0.3510000109672546,0.3680000007152557,0.363999992609024,0.3700000047683716,0.3659999966621399,0.3659999966621399,0.363999992609024,0.3619999885559082,0.3659999966621399,0.3680000007152557,0.3610000014305115,0.3720000088214874,0.3729999959468841,0.3810000121593475,0.3630000054836273,0.3689999878406524,0.3709999918937683,0.3759999871253967,0.382999986410141,0.3729999959468841,0.3720000088214874,0.3680000007152557,0.3659999966621399,0.3650000095367431,0.363999992609024,0.3589999973773956,0.356000006198883,0.3650000095367431,0.3659999966621399,0.367000013589859,0.3729999959468841,0.3720000088214874,0.375,0.3740000128746032,0.3700000047683716,0.3569999933242798,0.3759999871253967,0.3740000128746032,0.367000013589859,0.3770000040531158,0.3759999871253967,0.3709999918937683,0.3779999911785126,0.3709999918937683,0.3689999878406524,0.3799999952316284,0.3630000054836273,0.375,0.3700000047683716,0.3700000047683716,0.3729999959468841,0.3720000088214874,0.3790000081062317,0.375,0.3729999959468841,0.3770000040531158,0.3799999952316284,0.3779999911785126,0.3720000088214874,0.3799999952316284,0.3759999871253967,0.3799999952316284,0.3790000081062317,0.375,0.3740000128746032,0.3729999959468841,0.3840000033378601,0.3659999966621399,0.3759999871253967,0.3720000088214874,0.3720000088214874,0.3759999871253967,0.375,0.3650000095367431,0.3729999959468841],"label":"FineWeb filtered only"},"big-run-sampled_full_ind_minhash":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2639999985694885,0.2790000140666961,0.296999990940094,0.3109999895095825,0.3240000009536743,0.3070000112056732,0.3210000097751617,0.31700000166893,0.3339999914169311,0.324999988079071,0.3260000050067901,0.3330000042915344,0.3409999907016754,0.3350000083446502,0.3400000035762787,0.3529999852180481,0.3400000035762787,0.3490000069141388,0.3529999852180481,0.3499999940395355,0.3459999859333038,0.3370000123977661,0.356000006198883,0.3490000069141388,0.3429999947547912,0.3490000069141388,0.3610000014305115,0.3499999940395355,0.3569999933242798,0.3610000014305115,0.3619999885559082,0.3449999988079071,0.3409999907016754,0.3420000076293945,0.3449999988079071,0.3409999907016754,0.3379999995231628,0.3420000076293945,0.3569999933242798,0.3529999852180481,0.3610000014305115,0.363999992609024,0.3600000143051147,0.3540000021457672,0.3499999940395355,0.3689999878406524,0.367000013589859,0.3569999933242798,0.3610000014305115,0.3680000007152557,0.3630000054836273,0.3709999918937683,0.3540000021457672,0.3580000102519989,0.367000013589859,0.3529999852180481,0.356000006198883,0.3569999933242798,0.3610000014305115,0.3700000047683716,0.375,0.3709999918937683,0.3819999992847442,0.3709999918937683,0.3650000095367431,0.3709999918937683,0.3650000095367431,0.3709999918937683,0.3840000033378601,0.3740000128746032,0.375,0.356000006198883,0.3689999878406524,0.3700000047683716,0.3819999992847442,0.3799999952316284,0.3779999911785126,0.3729999959468841,0.3709999918937683,0.3759999871253967,0.3709999918937683,0.3759999871253967,0.3779999911785126,0.3779999911785126,0.3689999878406524,0.3840000033378601,0.3860000073909759,0.3849999904632568,0.3790000081062317,0.375,0.3849999904632568,0.3720000088214874,0.3770000040531158,0.3799999952316284,0.3810000121593475,0.382999986410141,0.3650000095367431,0.3740000128746032,0.382999986410141,0.3689999878406524,0.3759999871253967,0.3869999945163727,0.3889999985694885,0.3860000073909759,0.3819999992847442,0.3689999878406524,0.3860000073909759,0.3810000121593475,0.382999986410141,0.3819999992847442,0.3840000033378601,0.3889999985694885,0.3880000114440918,0.3849999904632568,0.3799999952316284,0.3910000026226043,0.3989999890327453,0.3880000114440918,0.3880000114440918,0.3840000033378601,0.3880000114440918,0.3860000073909759,0.3919999897480011,0.3880000114440918,0.3939999938011169,0.3869999945163727,0.3919999897480011,0.3910000026226043,0.382999986410141,0.3930000066757202,0.3840000033378601,0.3880000114440918,0.3840000033378601,0.3819999992847442,0.382999986410141,0.3880000114440918,0.3860000073909759,0.3860000073909759,0.3869999945163727,0.3860000073909759,0.3899999856948852,0.3819999992847442,0.3860000073909759,0.3889999985694885,0.3840000033378601,0.395000010728836,0.3899999856948852,0.3899999856948852,0.3910000026226043,0.3959999978542328,0.3959999978542328,0.3919999897480011,0.3980000019073486,0.3880000114440918,0.3930000066757202,0.4000000059604645,0.3919999897480011,0.3919999897480011,0.4040000140666961,0.3930000066757202,0.3970000147819519,0.3889999985694885,0.3959999978542328,0.3930000066757202,0.3939999938011169,0.3970000147819519,0.3910000026226043,0.4020000100135803],"label":"FineWeb independent MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"}},"title":{"text":"Independent dedup outperforms dedup across dumps"}}}
 
 
data/plots/cross_ind_unfiltered_comparison/hellaswag_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2759999930858612,0.328000009059906,0.3499999940395355,0.3889999985694885,0.3910000026226043,0.402999997138977,0.4210000038146972,0.4280000030994415,0.4359999895095825,0.4469999969005584,0.4440000057220459,0.4600000083446502,0.4690000116825104,0.4600000083446502,0.4679999947547912,0.4729999899864197,0.4760000109672546,0.4839999973773956,0.4939999878406524,0.488999992609024,0.4990000128746032,0.4979999959468841,0.4979999959468841,0.5009999871253967,0.5,0.5090000033378601,0.5070000290870667,0.5180000066757202,0.5199999809265137,0.5109999775886536,0.5130000114440918,0.5249999761581421,0.5149999856948853,0.5299999713897705,0.5339999794960022,0.5189999938011169,0.5289999842643738,0.5249999761581421,0.5320000052452087,0.5460000038146973,0.5419999957084656,0.5260000228881836,0.5289999842643738,0.546999990940094,0.5419999957084656,0.5419999957084656,0.5460000038146973,0.5419999957084656,0.5389999747276306,0.5440000295639038,0.5569999814033508,0.5450000166893005,0.5329999923706055,0.5580000281333923,0.5339999794960022,0.5540000200271606,0.5460000038146973,0.5479999780654907,0.5529999732971191,0.5540000200271606,0.5619999766349792,0.5490000247955322,0.5410000085830688,0.5490000247955322,0.5569999814033508,0.550000011920929,0.5479999780654907,0.5630000233650208,0.546999990940094,0.5559999942779541,0.5600000023841858,0.5509999990463257,0.5569999814033508,0.5569999814033508,0.5580000281333923,0.5619999766349792,0.5580000281333923,0.5669999718666077,0.5569999814033508,0.5709999799728394,0.5529999732971191,0.5649999976158142,0.5659999847412109,0.5659999847412109,0.5690000057220459,0.5600000023841858,0.5580000281333923,0.5540000200271606,0.5640000104904175,0.5680000185966492,0.5709999799728394,0.5649999976158142,0.5680000185966492,0.5730000138282776,0.5640000104904175,0.5799999833106995,0.5699999928474426,0.5669999718666077,0.5680000185966492,0.5770000219345093,0.5709999799728394,0.5759999752044678,0.5690000057220459,0.5789999961853027,0.5740000009536743,0.5709999799728394,0.5789999961853027,0.5709999799728394,0.5770000219345093,0.5770000219345093,0.5730000138282776,0.5809999704360962,0.5720000267028809,0.5849999785423279,0.5820000171661377,0.5799999833106995,0.5830000042915344,0.5759999752044678,0.5730000138282776,0.5799999833106995,0.5830000042915344,0.5860000252723694,0.5789999961853027,0.5789999961853027,0.5860000252723694,0.5979999899864197,0.5920000076293945,0.5820000171661377,0.5870000123977661,0.5889999866485596,0.5839999914169312,0.5849999785423279,0.5899999737739563,0.5920000076293945,0.593999981880188,0.597000002861023,0.5889999866485596,0.5889999866485596,0.5849999785423279,0.5899999737739563,0.5989999771118164,0.5899999737739563,0.5839999914169312,0.5910000205039978,0.5910000205039978,0.5929999947547913,0.5920000076293945,0.5929999947547913,0.5889999866485596,0.5899999737739563,0.593999981880188,0.5910000205039978,0.5960000157356262,0.5920000076293945,0.5889999866485596,0.593999981880188,0.5879999995231628,0.5960000157356262,0.5920000076293945,0.5960000157356262,0.5960000157356262,0.5920000076293945,0.6010000109672546,0.5920000076293945,0.5899999737739563,0.5889999866485596,0.5920000076293945,0.6019999980926514],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.3009999990463257,0.3149999976158142,0.3400000035762787,0.3610000014305115,0.3680000007152557,0.3799999952316284,0.4020000100135803,0.4180000126361847,0.4129999876022339,0.4259999990463257,0.4239999949932098,0.4440000057220459,0.44200000166893,0.4440000057220459,0.4580000042915344,0.4510000050067901,0.4560000002384186,0.4650000035762787,0.4569999873638153,0.460999995470047,0.4659999907016754,0.4679999947547912,0.4779999852180481,0.4740000069141388,0.4600000083446502,0.4860000014305115,0.4790000021457672,0.4880000054836273,0.4930000007152557,0.4860000014305115,0.4850000143051147,0.4900000095367431,0.4850000143051147,0.4900000095367431,0.4959999918937683,0.492000013589859,0.4850000143051147,0.4970000088214874,0.4900000095367431,0.4979999959468841,0.503000020980835,0.5040000081062317,0.4990000128746032,0.4979999959468841,0.5080000162124634,0.5019999742507935,0.4970000088214874,0.4939999878406524,0.5120000243186951,0.5070000290870667,0.503000020980835,0.5070000290870667,0.503000020980835,0.5109999775886536,0.5080000162124634,0.5009999871253967,0.5090000033378601,0.5,0.5149999856948853,0.5109999775886536,0.5099999904632568,0.5130000114440918,0.5080000162124634,0.5080000162124634,0.5109999775886536,0.5099999904632568,0.5239999890327454,0.5180000066757202,0.5130000114440918,0.5120000243186951,0.5180000066757202,0.515999972820282,0.5260000228881836,0.5199999809265137,0.5239999890327454,0.5220000147819519,0.527999997138977,0.5249999761581421,0.5270000100135803,0.5249999761581421,0.5189999938011169,0.5230000019073486,0.5249999761581421,0.5199999809265137,0.5230000019073486,0.5299999713897705,0.5350000262260437,0.5339999794960022,0.5329999923706055,0.5249999761581421,0.5299999713897705,0.5360000133514404,0.5329999923706055,0.5410000085830688,0.5249999761581421,0.5289999842643738,0.5360000133514404,0.5360000133514404,0.5370000004768372,0.5389999747276306,0.5289999842643738,0.5299999713897705,0.5410000085830688,0.5329999923706055,0.5419999957084656,0.5410000085830688,0.527999997138977,0.5370000004768372,0.5429999828338623,0.5419999957084656,0.5389999747276306,0.5320000052452087,0.5350000262260437,0.5419999957084656,0.5410000085830688,0.5339999794960022,0.5440000295639038,0.5329999923706055,0.5429999828338623,0.5460000038146973,0.5400000214576721,0.5429999828338623,0.5479999780654907,0.550000011920929,0.5490000247955322,0.5410000085830688,0.5450000166893005,0.5429999828338623,0.550000011920929,0.5529999732971191,0.5490000247955322,0.5450000166893005,0.5450000166893005,0.5519999861717224,0.5569999814033508,0.5460000038146973,0.546999990940094,0.5509999990463257,0.5509999990463257,0.5450000166893005,0.5440000295639038,0.5440000295639038,0.546999990940094,0.5479999780654907,0.546999990940094,0.5460000038146973,0.546999990940094,0.5479999780654907,0.5460000038146973,0.5460000038146973,0.5440000295639038,0.5410000085830688,0.5440000295639038,0.5389999747276306,0.5410000085830688,0.546999990940094,0.546999990940094,0.5479999780654907,0.546999990940094,0.550000011920929,0.546999990940094,0.5460000038146973,0.546999990940094,0.5479999780654907,0.5479999780654907,0.5519999861717224,0.550000011920929],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2809999883174896,0.3230000138282776,0.3409999907016754,0.3600000143051147,0.3569999933242798,0.3889999985694885,0.395000010728836,0.4199999868869781,0.4180000126361847,0.421999990940094,0.4289999902248382,0.4350000023841858,0.4359999895095825,0.4469999969005584,0.4350000023841858,0.4480000138282776,0.4480000138282776,0.453000009059906,0.4550000131130218,0.4589999914169311,0.4639999866485595,0.4600000083446502,0.460999995470047,0.4589999914169311,0.481000006198883,0.4769999980926513,0.4709999859333038,0.4740000069141388,0.4679999947547912,0.4790000021457672,0.4729999899864197,0.4819999933242798,0.4850000143051147,0.4819999933242798,0.4819999933242798,0.4880000054836273,0.4869999885559082,0.4959999918937683,0.4850000143051147,0.4959999918937683,0.492000013589859,0.503000020980835,0.4930000007152557,0.5099999904632568,0.5040000081062317,0.5009999871253967,0.4970000088214874,0.4979999959468841,0.5059999823570251,0.5070000290870667,0.5040000081062317,0.5059999823570251,0.5049999952316284,0.5080000162124634,0.5049999952316284,0.5019999742507935,0.5120000243186951,0.5170000195503235,0.5170000195503235,0.5090000033378601,0.5239999890327454,0.527999997138977,0.5230000019073486,0.5210000276565552,0.5149999856948853,0.5189999938011169,0.5270000100135803,0.5149999856948853,0.5099999904632568,0.5299999713897705,0.5199999809265137,0.5230000019073486,0.5260000228881836,0.5249999761581421,0.5239999890327454,0.5329999923706055,0.5210000276565552,0.5260000228881836,0.5170000195503235,0.531000018119812,0.5289999842643738,0.531000018119812,0.5270000100135803,0.5299999713897705,0.5370000004768372,0.5379999876022339,0.5419999957084656,0.5329999923706055,0.5360000133514404,0.5299999713897705,0.5360000133514404,0.5270000100135803,0.5450000166893005,0.5410000085830688,0.546999990940094,0.5329999923706055,0.5329999923706055,0.5379999876022339,0.5299999713897705,0.5429999828338623,0.5360000133514404,0.5339999794960022,0.5419999957084656,0.5410000085830688,0.5370000004768372,0.5389999747276306,0.527999997138977,0.5400000214576721,0.5400000214576721,0.531000018119812,0.5440000295639038,0.5460000038146973,0.5479999780654907,0.5460000038146973,0.5410000085830688,0.5509999990463257,0.5479999780654907,0.5410000085830688,0.5389999747276306,0.550000011920929,0.5569999814033508,0.550000011920929,0.5490000247955322,0.5490000247955322,0.5569999814033508,0.5519999861717224,0.5479999780654907,0.5559999942779541,0.5550000071525574,0.5460000038146973,0.5540000200271606,0.5460000038146973,0.5460000038146973,0.5509999990463257,0.5460000038146973,0.5550000071525574,0.5479999780654907,0.5479999780654907,0.5540000200271606,0.5550000071525574,0.5529999732971191,0.5529999732971191,0.5509999990463257,0.5509999990463257,0.5419999957084656,0.546999990940094,0.5509999990463257,0.5559999942779541,0.5490000247955322,0.5509999990463257,0.5529999732971191,0.550000011920929,0.5540000200271606,0.5550000071525574,0.5580000281333923,0.550000011920929,0.5569999814033508,0.5490000247955322,0.5519999861717224,0.5519999861717224,0.5559999942779541,0.5569999814033508,0.5559999942779541,0.5550000071525574,0.5559999942779541,0.5490000247955322,0.5550000071525574,0.5600000023841858],"label":"FineWeb filtered only"},"big-run-sampled_full_ind_minhash":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.3019999861717224,0.3059999942779541,0.335999995470047,0.3610000014305115,0.3819999992847442,0.4009999930858612,0.4020000100135803,0.4250000119209289,0.4309999942779541,0.4469999969005584,0.4519999921321869,0.453000009059906,0.4580000042915344,0.4729999899864197,0.4749999940395355,0.4699999988079071,0.4799999892711639,0.4749999940395355,0.4769999980926513,0.481000006198883,0.4839999973773956,0.4959999918937683,0.5040000081062317,0.4970000088214874,0.4979999959468841,0.5070000290870667,0.5049999952316284,0.5109999775886536,0.515999972820282,0.5120000243186951,0.5120000243186951,0.515999972820282,0.5120000243186951,0.5249999761581421,0.5170000195503235,0.5199999809265137,0.5270000100135803,0.5170000195503235,0.5220000147819519,0.5260000228881836,0.5360000133514404,0.5339999794960022,0.5370000004768372,0.5339999794960022,0.5329999923706055,0.531000018119812,0.5329999923706055,0.5400000214576721,0.5429999828338623,0.5389999747276306,0.5419999957084656,0.5429999828338623,0.5360000133514404,0.5299999713897705,0.546999990940094,0.5360000133514404,0.5450000166893005,0.5440000295639038,0.5350000262260437,0.5339999794960022,0.5419999957084656,0.5450000166893005,0.5460000038146973,0.5370000004768372,0.5490000247955322,0.5440000295639038,0.550000011920929,0.5490000247955322,0.5450000166893005,0.5490000247955322,0.5559999942779541,0.5559999942779541,0.5410000085830688,0.5419999957084656,0.5529999732971191,0.5460000038146973,0.5540000200271606,0.5379999876022339,0.5509999990463257,0.5540000200271606,0.5419999957084656,0.546999990940094,0.5479999780654907,0.5460000038146973,0.5460000038146973,0.5519999861717224,0.5600000023841858,0.5540000200271606,0.5509999990463257,0.5609999895095825,0.5619999766349792,0.5590000152587891,0.5559999942779541,0.5580000281333923,0.5640000104904175,0.5649999976158142,0.5590000152587891,0.5550000071525574,0.5630000233650208,0.5630000233650208,0.5609999895095825,0.5559999942779541,0.5609999895095825,0.5630000233650208,0.5680000185966492,0.5630000233650208,0.5690000057220459,0.5609999895095825,0.5590000152587891,0.5640000104904175,0.5690000057220459,0.5640000104904175,0.5630000233650208,0.574999988079071,0.5630000233650208,0.5619999766349792,0.5690000057220459,0.5770000219345093,0.5690000057220459,0.5609999895095825,0.5649999976158142,0.5680000185966492,0.5590000152587891,0.5600000023841858,0.5619999766349792,0.5799999833106995,0.5619999766349792,0.5699999928474426,0.5709999799728394,0.5669999718666077,0.5680000185966492,0.5609999895095825,0.5649999976158142,0.5680000185966492,0.5730000138282776,0.5720000267028809,0.5709999799728394,0.5770000219345093,0.574999988079071,0.5730000138282776,0.5690000057220459,0.5740000009536743,0.578000009059906,0.574999988079071,0.5820000171661377,0.5730000138282776,0.5740000009536743,0.574999988079071,0.5770000219345093,0.5789999961853027,0.5759999752044678,0.5720000267028809,0.5770000219345093,0.5759999752044678,0.5789999961853027,0.5789999961853027,0.5730000138282776,0.5789999961853027,0.5759999752044678,0.5690000057220459,0.5849999785423279,0.5759999752044678,0.5699999928474426,0.5789999961853027,0.5820000171661377,0.5730000138282776,0.5730000138282776,0.5789999961853027],"label":"FineWeb independent MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"}},"title":{"text":"Independent dedup outperforms dedup across dumps"}}}
 
 
data/plots/cross_ind_unfiltered_comparison/index.json DELETED
@@ -1 +0,0 @@
1
- {"files":{"agg_score":{"file":"agg_score.json"},"commonsense_qa/acc_norm":{"file":"commonsense_qa_acc_norm.json"},"hellaswag/acc_norm":{"file":"hellaswag_acc_norm.json"},"openbookqa/acc_norm":{"file":"openbookqa_acc_norm.json"},"piqa/acc_norm":{"file":"piqa_acc_norm.json"},"winogrande/acc_norm":{"file":"winogrande_acc_norm.json"},"arc/acc_norm":{"file":"arc_acc_norm.json"},"mmlu/acc_norm":{"file":"mmlu_acc_norm.json"}},"settings":{"slider":{"min":0,"max":30,"default":5}}}
 
 
data/plots/cross_ind_unfiltered_comparison/mmlu_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2528519630432129,0.2616856694221496,0.2665999829769134,0.2683407664299011,0.2742894291877746,0.2762066125869751,0.2807516455650329,0.2767378389835357,0.2807380557060241,0.2788906991481781,0.2844051718711853,0.2856102883815765,0.2883394360542297,0.2875711619853973,0.2890409529209137,0.2894668281078338,0.2883355319499969,0.2872501015663147,0.291619062423706,0.2900333702564239,0.2962473034858703,0.2962896525859833,0.297355443239212,0.2932226359844208,0.2886744439601898,0.29665008187294,0.2976542115211487,0.2991503179073334,0.3004479110240936,0.3044549524784088,0.2976194322109222,0.3014707863330841,0.3048252463340759,0.3039425611495971,0.303354948759079,0.3027459383010864,0.2999922931194305,0.3050121665000915,0.2998814284801483,0.2978588044643402,0.3041949570178985,0.3010904192924499,0.3022017180919647,0.2997751235961914,0.3015910983085632,0.3096485137939453,0.3012076020240783,0.3065535724163055,0.3042872548103332,0.3104783594608307,0.2997980415821075,0.3051296770572662,0.303458571434021,0.3088337182998657,0.3145398199558258,0.3032208085060119,0.310806930065155,0.3075874149799347,0.3101692199707031,0.310107946395874,0.3066047430038452,0.3109066784381866,0.3081336915493011,0.3084586262702942,0.3086149394512176,0.3085348606109619,0.3136637806892395,0.3110873103141784,0.31076380610466,0.3084572553634643,0.3133681714534759,0.3125792145729065,0.3124453127384186,0.3097185790538788,0.3106793165206909,0.3089564740657806,0.3111244142055511,0.3123694658279419,0.3144859969615936,0.3135123550891876,0.311982125043869,0.3142133951187134,0.3122903704643249,0.3147654831409454,0.3078767359256744,0.314947634935379,0.3171303570270538,0.3129573762416839,0.3154936134815216,0.3158208429813385,0.3153132200241089,0.3141326904296875,0.3163397014141083,0.3166318237781524,0.3168410360813141,0.3198235332965851,0.3201336860656738,0.3212967813014984,0.3191385567188263,0.3178017139434814,0.3192791938781738,0.323061466217041,0.320336639881134,0.3165886104106903,0.3206393420696258,0.3167395293712616,0.3135207295417785,0.315539002418518,0.3191742599010467,0.321073055267334,0.3222262561321258,0.3193058371543884,0.3213480710983276,0.3198905289173126,0.3219239711761474,0.3211614489555359,0.318855881690979,0.3177095353603363,0.324197381734848,0.3208906352519989,0.3264936804771423,0.3245965242385864,0.3231639564037323,0.3221887946128845,0.3277338445186615,0.3227696120738983,0.3263820111751556,0.3258577883243561,0.3264622390270233,0.3222362995147705,0.3286814987659454,0.3235024213790893,0.32446950674057,0.3311836123466491,0.328130304813385,0.3271634578704834,0.3250012993812561,0.3309800624847412,0.3274554014205932,0.3273015916347503,0.3261759579181671,0.32697594165802,0.3303172886371612,0.3282814025878906,0.3289586305618286,0.3260826468467712,0.3258011937141418,0.3297208249568939,0.3254813551902771,0.3287739753723144,0.3287097811698913,0.3275279700756073,0.3293041586875915,0.3314100801944732,0.3287808299064636,0.3251930773258209,0.3288172781467438,0.3265027701854706,0.3275215625762939,0.3290774822235107,0.3261331617832184,0.3299777805805206,0.331955999135971,0.3305029273033142,0.3274719417095184,0.3235560953617096,0.3269940316677093,0.3323083519935608],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2510619163513183,0.2621481418609619,0.2632303833961487,0.2720474302768707,0.2719806432723999,0.2726832032203674,0.2786827087402344,0.2823672890663147,0.276201844215393,0.2816944718360901,0.280361145734787,0.2819306254386902,0.2823295891284942,0.2892518043518066,0.2872919738292694,0.2859259247779846,0.2885263860225677,0.2862614393234253,0.2933129370212555,0.2930494546890259,0.2884900867938995,0.2942298054695129,0.2927677929401397,0.2954220175743103,0.2918704748153686,0.2943699061870575,0.2891678512096405,0.291848212480545,0.2942944765090942,0.2973679602146148,0.2953736186027527,0.2963412702083587,0.297100305557251,0.2963026762008667,0.2944463491439819,0.2971296310424804,0.293870210647583,0.2982682287693023,0.2978119254112243,0.2989997565746307,0.2993503510951996,0.298117071390152,0.2977498769760132,0.3004056811332702,0.3012634217739105,0.3001384139060974,0.3052266240119934,0.3038219809532165,0.3037647306919098,0.3009455502033233,0.3038812279701233,0.303263396024704,0.3025077581405639,0.3056069612503052,0.3024908602237701,0.3050909340381622,0.3001562356948852,0.303833544254303,0.3019777834415436,0.3036664128303528,0.3022894859313965,0.3042722940444946,0.3023003339767456,0.3069425821304321,0.307883083820343,0.3026910126209259,0.3054113090038299,0.3046148121356964,0.305342435836792,0.3048149049282074,0.3066973984241485,0.3055126965045929,0.3063409924507141,0.307701051235199,0.3075169324874878,0.3091190159320831,0.3098153173923492,0.31436288356781,0.3096509575843811,0.3022815883159637,0.3119745552539825,0.3083471357822418,0.3085280954837799,0.3082001209259033,0.3080264329910278,0.3116717934608459,0.3097788393497467,0.3117353916168213,0.3170038759708404,0.3099159002304077,0.3133728504180908,0.3161626160144806,0.3095119595527649,0.3135432302951813,0.3103009164333343,0.3126655519008636,0.3121814131736755,0.3123973608016968,0.3148256838321686,0.3144133985042572,0.3124284744262695,0.3102188408374786,0.3123636841773987,0.3115113973617553,0.3151636719703674,0.3148572146892547,0.315061867237091,0.3127182424068451,0.3139308094978332,0.3134367167949676,0.3136025071144104,0.3172793388366699,0.3134761154651642,0.3109587132930755,0.3127998411655426,0.3161843717098236,0.3163313865661621,0.3145243525505066,0.3155156075954437,0.3127505779266357,0.3182451128959656,0.3162476718425751,0.3124897480010986,0.3128789663314819,0.3119811117649078,0.314126193523407,0.3136049509048462,0.3149912655353546,0.3146650791168213,0.3151968121528625,0.3179666996002197,0.3169245719909668,0.3202513754367828,0.3185319602489471,0.3202781081199646,0.3186031281948089,0.3166128396987915,0.3199457228183746,0.3194417059421539,0.3170624077320099,0.3184532523155212,0.3191981911659241,0.3191225528717041,0.3173209130764007,0.3195607960224151,0.3166368305683136,0.3188160359859466,0.3174867630004883,0.3184468746185303,0.3211863338947296,0.3184327483177185,0.3177861273288727,0.3180214762687683,0.3194973170757293,0.3212297558784485,0.3211282789707184,0.3200584352016449,0.3168685734272003,0.3211040198802948,0.3222841620445251,0.3196901082992553,0.3236229419708252,0.3204475045204162,0.3210069537162781,0.3191083669662475,0.31863734126091,0.3195922076702118],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2516599297523498,0.2610189318656921,0.2666046619415283,0.2667981088161468,0.2667821645736694,0.2708088159561157,0.2738403379917145,0.2726235687732696,0.2762763500213623,0.2768311202526092,0.2809228301048279,0.2836140990257263,0.2822815179824829,0.2831664383411407,0.2797218561172485,0.286342591047287,0.2855269610881805,0.2847287058830261,0.2888180613517761,0.286526083946228,0.2865165770053863,0.294582188129425,0.2925947606563568,0.2947863042354584,0.2892930805683136,0.2903610467910766,0.288201242685318,0.2873396277427673,0.2916238009929657,0.2908017039299011,0.2907920777797699,0.2952797412872314,0.2941452264785766,0.2921333611011505,0.2925891280174255,0.2968584895133972,0.2980035543441772,0.2964116632938385,0.2962304651737213,0.2950254380702972,0.2977516651153564,0.2944138348102569,0.3003402054309845,0.2976303696632385,0.3013098239898681,0.302829384803772,0.3018766045570373,0.305361807346344,0.2971298694610595,0.3014816343784332,0.3019805550575256,0.3037064969539642,0.2970167994499206,0.2995208501815796,0.2970106601715088,0.2990955114364624,0.3027818500995636,0.3048534691333771,0.2993872463703155,0.2986327707767486,0.3015393316745758,0.3003426790237427,0.3003274798393249,0.3017795085906982,0.3019182682037353,0.3015450537204742,0.3046211004257202,0.3031167984008789,0.3020436763763428,0.3011128306388855,0.3029948472976684,0.3045558631420135,0.301642894744873,0.3029441833496094,0.3035804331302643,0.3004390001296997,0.3021787703037262,0.306041270494461,0.3064048886299133,0.3087956011295318,0.3070018291473388,0.3065581619739532,0.3093871772289276,0.3060930073261261,0.3033313155174255,0.3072777390480041,0.306413859128952,0.3104493916034698,0.3056999444961548,0.3077532052993774,0.309231549501419,0.3070645034313202,0.3117790520191192,0.3114112913608551,0.312661737203598,0.3181777000427246,0.3117201030254364,0.3099702894687652,0.3074746131896972,0.3064963519573211,0.3105958700180053,0.3111456036567688,0.3084964454174042,0.3087405860424042,0.3121673166751861,0.3121528625488281,0.3100416660308838,0.3142979145050049,0.3129935264587402,0.3112611472606659,0.3119436800479889,0.3154115974903106,0.3091593086719513,0.3103814721107483,0.3130497634410858,0.3133455514907837,0.3152708411216736,0.3137963414192199,0.3099324703216553,0.3164172768592834,0.3133907914161682,0.3128255009651184,0.3134104907512665,0.3106969892978668,0.3130004107952118,0.3131391704082489,0.3130116462707519,0.3143952488899231,0.3143975436687469,0.3143710494041443,0.3163396418094635,0.3166862726211548,0.3184126019477844,0.3178988993167877,0.317479133605957,0.3184944093227386,0.316694974899292,0.3176258206367492,0.3182629346847534,0.3200214207172394,0.3181648552417755,0.320680022239685,0.3178716897964477,0.3182425796985626,0.3182984292507171,0.3158398568630218,0.3152642548084259,0.3132680356502533,0.3178914785385132,0.3156660795211792,0.3161703050136566,0.3176451921463012,0.3173815906047821,0.3194171786308288,0.3193057179450989,0.3172560334205627,0.317656546831131,0.3155770003795624,0.3199106156826019,0.3170182108879089,0.3156754970550537,0.3180731236934662,0.3205638229846954,0.3175432682037353,0.3184471428394317,0.3192788958549499,0.3197042346000671,0.3177168369293213],"label":"FineWeb filtered only"},"big-run-sampled_full_ind_minhash":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2558934390544891,0.2618628144264221,0.2683217823505401,0.2699837982654571,0.2738722860813141,0.2744417488574981,0.2740873992443084,0.2807216048240661,0.2820421457290649,0.2891400754451751,0.2879075407981872,0.2881667613983154,0.2892490327358246,0.2882707118988037,0.2935869693756103,0.2870290875434875,0.2911452651023865,0.2949125170707702,0.2916406095027923,0.2981449663639068,0.2953989207744598,0.2946988642215729,0.297021746635437,0.3001497685909271,0.3010218441486358,0.2977036237716675,0.2992585003376007,0.2986803948879242,0.2994338274002075,0.2989781498908996,0.3041955828666687,0.3030496537685394,0.303806334733963,0.3036351203918457,0.3058845102787018,0.300450712442398,0.3025284707546234,0.3072526752948761,0.3039065897464752,0.3073755502700805,0.3070493042469024,0.3083153367042541,0.3123056292533874,0.307761400938034,0.3053378164768219,0.3116358816623688,0.3080427348613739,0.308482676744461,0.307318776845932,0.3083004653453827,0.3089516758918762,0.3088736236095428,0.3077724277973175,0.3126304149627685,0.3101697862148285,0.3159398734569549,0.314792275428772,0.3103811144828796,0.3111368715763092,0.3129658997058868,0.311605304479599,0.3118223249912262,0.3133279979228973,0.3146496713161468,0.3195074200630188,0.3142614662647247,0.3125102519989013,0.3115333616733551,0.3183117806911468,0.3168580532073974,0.3187012672424316,0.3179306983947754,0.3157722651958465,0.3214826583862304,0.3145081698894501,0.3172421753406524,0.3151432573795318,0.3181649446487427,0.3180212080478668,0.3171605765819549,0.3212067782878876,0.3180184066295624,0.3209905624389648,0.319052129983902,0.3212707936763763,0.3196887373924255,0.3188316226005554,0.3164899051189422,0.3241994678974151,0.3179469406604767,0.3214083909988403,0.3206575512886047,0.3263285160064697,0.3219505250453949,0.3181525468826294,0.3219776451587677,0.3259726762771606,0.3197665512561798,0.3236161768436432,0.3177970349788666,0.3258080780506134,0.3208407461643219,0.3251138925552368,0.3242645859718323,0.3229723274707794,0.3227455914020538,0.3206316232681274,0.3256695866584778,0.3241210877895355,0.3224890530109405,0.3263737261295318,0.3214233517646789,0.3240345120429992,0.3222567737102508,0.3242291808128357,0.3257078528404236,0.3278365731239319,0.3277338743209839,0.3253948092460632,0.3232105076313019,0.3267974853515625,0.3263654410839081,0.3262891769409179,0.3238334357738495,0.3294911682605743,0.3261866867542267,0.3243315815925598,0.3250119090080261,0.326727420091629,0.3268802464008331,0.3269768059253692,0.3257980346679687,0.3280686736106872,0.3274897634983063,0.3282252252101898,0.3272863030433655,0.328346699476242,0.325562834739685,0.3301684856414795,0.3284023404121399,0.3268299400806427,0.3286610245704651,0.3291078805923462,0.324972927570343,0.3314772248268127,0.3278062343597412,0.326839417219162,0.3277239501476288,0.330414742231369,0.3271744549274444,0.3279334008693695,0.3288575112819671,0.3285425007343292,0.3282454907894134,0.3296376466751098,0.3305942714214325,0.3276287615299225,0.3292438983917236,0.329515129327774,0.3281475007534027,0.3282177448272705,0.3333999514579773,0.3302631080150604,0.330238401889801,0.3323166668415069,0.3313035368919372,0.32961106300354,0.3321967124938965],"label":"FineWeb independent MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"}},"title":{"text":"Independent dedup outperforms dedup across dumps"}}}
 
 
data/plots/cross_ind_unfiltered_comparison/openbookqa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2560000121593475,0.2840000092983246,0.3059999942779541,0.3059999942779541,0.2980000078678131,0.3240000009536743,0.3100000023841858,0.3000000119209289,0.3160000145435333,0.3140000104904175,0.3260000050067901,0.3199999928474426,0.2980000078678131,0.3179999887943268,0.3179999887943268,0.3319999873638153,0.3019999861717224,0.2939999997615814,0.3319999873638153,0.3319999873638153,0.3219999969005584,0.3379999995231628,0.3379999995231628,0.3339999914169311,0.3240000009536743,0.3479999899864197,0.3300000131130218,0.3240000009536743,0.3300000131130218,0.3400000035762787,0.3459999859333038,0.3319999873638153,0.3379999995231628,0.356000006198883,0.3339999914169311,0.3459999859333038,0.3440000116825104,0.3519999980926513,0.3479999899864197,0.3339999914169311,0.3400000035762787,0.3479999899864197,0.3379999995231628,0.3479999899864197,0.3499999940395355,0.3400000035762787,0.3499999940395355,0.3420000076293945,0.3659999966621399,0.3400000035762787,0.3459999859333038,0.3499999940395355,0.356000006198883,0.3400000035762787,0.356000006198883,0.3339999914169311,0.3339999914169311,0.3479999899864197,0.3420000076293945,0.3580000102519989,0.3339999914169311,0.3440000116825104,0.3400000035762787,0.3499999940395355,0.3540000021457672,0.3479999899864197,0.3499999940395355,0.3420000076293945,0.3379999995231628,0.335999995470047,0.356000006198883,0.3459999859333038,0.3499999940395355,0.3400000035762787,0.3440000116825104,0.356000006198883,0.3519999980926513,0.3400000035762787,0.3440000116825104,0.356000006198883,0.3400000035762787,0.356000006198883,0.3600000143051147,0.3540000021457672,0.3479999899864197,0.3379999995231628,0.3440000116825104,0.3300000131130218,0.3400000035762787,0.3459999859333038,0.3339999914169311,0.3499999940395355,0.3600000143051147,0.3440000116825104,0.3499999940395355,0.356000006198883,0.3420000076293945,0.3479999899864197,0.3379999995231628,0.3379999995231628,0.3459999859333038,0.356000006198883,0.328000009059906,0.3459999859333038,0.3519999980926513,0.3499999940395355,0.3519999980926513,0.3420000076293945,0.3499999940395355,0.3420000076293945,0.3339999914169311,0.335999995470047,0.3379999995231628,0.3379999995231628,0.3540000021457672,0.356000006198883,0.356000006198883,0.335999995470047,0.363999992609024,0.363999992609024,0.3499999940395355,0.356000006198883,0.3519999980926513,0.3519999980926513,0.3540000021457672,0.3459999859333038,0.3479999899864197,0.3519999980926513,0.3519999980926513,0.3420000076293945,0.3440000116825104,0.3379999995231628,0.3519999980926513,0.356000006198883,0.3420000076293945,0.3580000102519989,0.3499999940395355,0.3619999885559082,0.3519999980926513,0.3600000143051147,0.3459999859333038,0.3519999980926513,0.3519999980926513,0.3499999940395355,0.3580000102519989,0.356000006198883,0.3580000102519989,0.3600000143051147,0.3440000116825104,0.3600000143051147,0.3440000116825104,0.3479999899864197,0.3479999899864197,0.3580000102519989,0.3600000143051147,0.3580000102519989,0.3540000021457672,0.3519999980926513,0.3459999859333038,0.3459999859333038,0.3540000021457672,0.335999995470047,0.3540000021457672,0.3540000021457672,0.3519999980926513,0.356000006198883,0.3499999940395355,0.356000006198883],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2460000067949295,0.2720000147819519,0.270000010728836,0.2939999997615814,0.2960000038146972,0.3240000009536743,0.3019999861717224,0.2879999876022339,0.3179999887943268,0.3059999942779541,0.2899999916553497,0.3100000023841858,0.3179999887943268,0.3219999969005584,0.3219999969005584,0.3300000131130218,0.3140000104904175,0.3240000009536743,0.3079999983310699,0.3260000050067901,0.3120000064373016,0.3160000145435333,0.3179999887943268,0.3260000050067901,0.3260000050067901,0.3240000009536743,0.3379999995231628,0.3219999969005584,0.3319999873638153,0.3379999995231628,0.3339999914169311,0.328000009059906,0.3319999873638153,0.3199999928474426,0.3000000119209289,0.3260000050067901,0.3240000009536743,0.328000009059906,0.3240000009536743,0.328000009059906,0.3260000050067901,0.3440000116825104,0.3199999928474426,0.3319999873638153,0.3219999969005584,0.335999995470047,0.3519999980926513,0.3379999995231628,0.328000009059906,0.3300000131130218,0.335999995470047,0.3479999899864197,0.3459999859333038,0.3479999899864197,0.3540000021457672,0.3479999899864197,0.3300000131130218,0.356000006198883,0.3479999899864197,0.356000006198883,0.335999995470047,0.335999995470047,0.3479999899864197,0.3339999914169311,0.3540000021457672,0.3300000131130218,0.3479999899864197,0.3499999940395355,0.3400000035762787,0.3459999859333038,0.3339999914169311,0.3479999899864197,0.335999995470047,0.3400000035762787,0.3179999887943268,0.335999995470047,0.328000009059906,0.328000009059906,0.3540000021457672,0.3479999899864197,0.3420000076293945,0.3580000102519989,0.3459999859333038,0.3420000076293945,0.3459999859333038,0.3440000116825104,0.3499999940395355,0.335999995470047,0.3540000021457672,0.356000006198883,0.3400000035762787,0.3600000143051147,0.3580000102519989,0.3519999980926513,0.3499999940395355,0.3540000021457672,0.3519999980926513,0.3499999940395355,0.3440000116825104,0.356000006198883,0.3479999899864197,0.3479999899864197,0.3440000116825104,0.3499999940395355,0.3440000116825104,0.3519999980926513,0.3440000116825104,0.356000006198883,0.3459999859333038,0.3580000102519989,0.356000006198883,0.3519999980926513,0.3420000076293945,0.3379999995231628,0.3479999899864197,0.3459999859333038,0.3499999940395355,0.3400000035762787,0.3440000116825104,0.3420000076293945,0.3420000076293945,0.3499999940395355,0.3459999859333038,0.3420000076293945,0.3459999859333038,0.3459999859333038,0.3479999899864197,0.3440000116825104,0.3720000088214874,0.3619999885559082,0.356000006198883,0.3519999980926513,0.3459999859333038,0.3440000116825104,0.3420000076293945,0.3580000102519989,0.3600000143051147,0.3519999980926513,0.3600000143051147,0.3440000116825104,0.3600000143051147,0.3619999885559082,0.3499999940395355,0.3499999940395355,0.363999992609024,0.3580000102519989,0.3499999940395355,0.3479999899864197,0.3479999899864197,0.3580000102519989,0.3540000021457672,0.3600000143051147,0.3420000076293945,0.3519999980926513,0.3440000116825104,0.3519999980926513,0.3540000021457672,0.356000006198883,0.3459999859333038,0.3499999940395355,0.3519999980926513,0.3580000102519989,0.3440000116825104,0.3499999940395355,0.3580000102519989,0.3479999899864197,0.3479999899864197],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2560000121593475,0.2720000147819519,0.2980000078678131,0.2840000092983246,0.2879999876022339,0.3039999902248382,0.2860000133514404,0.2899999916553497,0.3019999861717224,0.2960000038146972,0.3039999902248382,0.3100000023841858,0.3160000145435333,0.3260000050067901,0.3160000145435333,0.3260000050067901,0.3179999887943268,0.3420000076293945,0.3219999969005584,0.328000009059906,0.3240000009536743,0.3300000131130218,0.328000009059906,0.3199999928474426,0.3379999995231628,0.3400000035762787,0.3240000009536743,0.3120000064373016,0.3319999873638153,0.3260000050067901,0.3120000064373016,0.3160000145435333,0.3140000104904175,0.3179999887943268,0.3160000145435333,0.3199999928474426,0.3240000009536743,0.3260000050067901,0.3179999887943268,0.3300000131130218,0.3179999887943268,0.328000009059906,0.3240000009536743,0.328000009059906,0.3260000050067901,0.3199999928474426,0.3400000035762787,0.3339999914169311,0.328000009059906,0.328000009059906,0.3339999914169311,0.328000009059906,0.328000009059906,0.335999995470047,0.3580000102519989,0.3499999940395355,0.3260000050067901,0.3499999940395355,0.3420000076293945,0.3160000145435333,0.3339999914169311,0.335999995470047,0.3400000035762787,0.3240000009536743,0.3319999873638153,0.3379999995231628,0.3400000035762787,0.3379999995231628,0.3319999873638153,0.3319999873638153,0.3440000116825104,0.3300000131130218,0.3219999969005584,0.3260000050067901,0.3219999969005584,0.3339999914169311,0.328000009059906,0.3300000131130218,0.3219999969005584,0.3379999995231628,0.3400000035762787,0.3319999873638153,0.328000009059906,0.3440000116825104,0.3339999914169311,0.328000009059906,0.3379999995231628,0.3499999940395355,0.3339999914169311,0.3300000131130218,0.328000009059906,0.335999995470047,0.3240000009536743,0.335999995470047,0.3240000009536743,0.3400000035762787,0.3400000035762787,0.3420000076293945,0.3319999873638153,0.3339999914169311,0.3300000131130218,0.3400000035762787,0.3459999859333038,0.3400000035762787,0.3379999995231628,0.3459999859333038,0.3379999995231628,0.3300000131130218,0.3519999980926513,0.3379999995231628,0.356000006198883,0.335999995470047,0.3420000076293945,0.3400000035762787,0.328000009059906,0.3540000021457672,0.3499999940395355,0.3479999899864197,0.3440000116825104,0.3519999980926513,0.356000006198883,0.3540000021457672,0.3440000116825104,0.3499999940395355,0.356000006198883,0.356000006198883,0.356000006198883,0.363999992609024,0.3600000143051147,0.356000006198883,0.3479999899864197,0.356000006198883,0.3459999859333038,0.3479999899864197,0.3619999885559082,0.363999992609024,0.3499999940395355,0.3379999995231628,0.3479999899864197,0.3499999940395355,0.356000006198883,0.3519999980926513,0.3540000021457672,0.3619999885559082,0.3580000102519989,0.3540000021457672,0.356000006198883,0.3479999899864197,0.3519999980926513,0.356000006198883,0.3499999940395355,0.3379999995231628,0.3479999899864197,0.3499999940395355,0.3440000116825104,0.3580000102519989,0.356000006198883,0.3499999940395355,0.3479999899864197,0.3580000102519989,0.3519999980926513,0.3540000021457672,0.3519999980926513,0.3540000021457672,0.356000006198883,0.363999992609024,0.356000006198883,0.356000006198883],"label":"FineWeb filtered only"},"big-run-sampled_full_ind_minhash":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2660000026226043,0.277999997138977,0.2820000052452087,0.3079999983310699,0.3140000104904175,0.3260000050067901,0.3039999902248382,0.3319999873638153,0.3240000009536743,0.3199999928474426,0.3379999995231628,0.3339999914169311,0.3319999873638153,0.3179999887943268,0.3319999873638153,0.3219999969005584,0.3319999873638153,0.3379999995231628,0.3199999928474426,0.3179999887943268,0.3400000035762787,0.3219999969005584,0.335999995470047,0.3339999914169311,0.3420000076293945,0.3240000009536743,0.3440000116825104,0.3420000076293945,0.3379999995231628,0.3459999859333038,0.328000009059906,0.3420000076293945,0.3459999859333038,0.3479999899864197,0.3379999995231628,0.356000006198883,0.3379999995231628,0.3440000116825104,0.3400000035762787,0.3379999995231628,0.3499999940395355,0.3540000021457672,0.3479999899864197,0.3479999899864197,0.3440000116825104,0.3459999859333038,0.3440000116825104,0.3519999980926513,0.356000006198883,0.3600000143051147,0.3379999995231628,0.356000006198883,0.3400000035762787,0.3519999980926513,0.3479999899864197,0.3479999899864197,0.3400000035762787,0.3459999859333038,0.3519999980926513,0.3440000116825104,0.3400000035762787,0.356000006198883,0.3420000076293945,0.356000006198883,0.3540000021457672,0.3600000143051147,0.3339999914169311,0.3499999940395355,0.3580000102519989,0.3440000116825104,0.3479999899864197,0.3580000102519989,0.3519999980926513,0.3339999914169311,0.3540000021457672,0.3459999859333038,0.3459999859333038,0.3400000035762787,0.356000006198883,0.356000006198883,0.3420000076293945,0.3420000076293945,0.3400000035762787,0.3479999899864197,0.3519999980926513,0.3319999873638153,0.3580000102519989,0.356000006198883,0.356000006198883,0.3499999940395355,0.3479999899864197,0.3400000035762787,0.3440000116825104,0.3339999914169311,0.3379999995231628,0.3479999899864197,0.3680000007152557,0.3619999885559082,0.3440000116825104,0.3619999885559082,0.3580000102519989,0.356000006198883,0.3600000143051147,0.3519999980926513,0.3519999980926513,0.3459999859333038,0.3540000021457672,0.3600000143051147,0.356000006198883,0.3540000021457672,0.3519999980926513,0.356000006198883,0.3600000143051147,0.3540000021457672,0.3540000021457672,0.363999992609024,0.3580000102519989,0.3680000007152557,0.3580000102519989,0.356000006198883,0.3519999980926513,0.3519999980926513,0.3519999980926513,0.3459999859333038,0.3499999940395355,0.356000006198883,0.3540000021457672,0.3540000021457672,0.3659999966621399,0.3619999885559082,0.3420000076293945,0.363999992609024,0.3580000102519989,0.3619999885559082,0.3759999871253967,0.3740000128746032,0.363999992609024,0.3580000102519989,0.3700000047683716,0.3700000047683716,0.363999992609024,0.3440000116825104,0.3580000102519989,0.3680000007152557,0.3700000047683716,0.3740000128746032,0.3619999885559082,0.3619999885559082,0.3700000047683716,0.363999992609024,0.363999992609024,0.363999992609024,0.3700000047683716,0.3600000143051147,0.3680000007152557,0.363999992609024,0.3659999966621399,0.363999992609024,0.3680000007152557,0.3580000102519989,0.363999992609024,0.3659999966621399,0.363999992609024,0.3580000102519989,0.3600000143051147,0.3600000143051147,0.3580000102519989,0.3600000143051147],"label":"FineWeb independent MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"}},"title":{"text":"Independent dedup outperforms dedup across dumps"}}}
 
 
data/plots/cross_ind_unfiltered_comparison/piqa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6019999980926514,0.652999997138977,0.6710000038146973,0.6740000247955322,0.6899999976158142,0.6919999718666077,0.6909999847412109,0.7070000171661377,0.7089999914169312,0.7129999995231628,0.7229999899864197,0.7120000123977661,0.7200000286102295,0.7300000190734863,0.7279999852180481,0.7369999885559082,0.7390000224113464,0.7350000143051147,0.7319999933242798,0.7279999852180481,0.7269999980926514,0.7459999918937683,0.7400000095367432,0.7390000224113464,0.7319999933242798,0.7390000224113464,0.7379999756813049,0.7390000224113464,0.7360000014305115,0.7440000176429749,0.7400000095367432,0.7360000014305115,0.7480000257492065,0.7360000014305115,0.7440000176429749,0.7459999918937683,0.7409999966621399,0.746999979019165,0.7440000176429749,0.7450000047683716,0.753000020980835,0.7390000224113464,0.7490000128746033,0.7419999837875366,0.7390000224113464,0.7559999823570251,0.7519999742507935,0.7549999952316284,0.7419999837875366,0.7490000128746033,0.7540000081062317,0.7480000257492065,0.7450000047683716,0.7429999709129333,0.7509999871253967,0.7549999952316284,0.7490000128746033,0.7490000128746033,0.7400000095367432,0.753000020980835,0.75,0.7509999871253967,0.7570000290870667,0.7590000033378601,0.7570000290870667,0.7329999804496765,0.7540000081062317,0.746999979019165,0.7409999966621399,0.7590000033378601,0.7509999871253967,0.7570000290870667,0.75,0.7540000081062317,0.7480000257492065,0.7580000162124634,0.7639999985694885,0.7630000114440918,0.7590000033378601,0.7549999952316284,0.7480000257492065,0.7509999871253967,0.7570000290870667,0.75,0.7540000081062317,0.7480000257492065,0.7549999952316284,0.7559999823570251,0.7580000162124634,0.7580000162124634,0.753000020980835,0.7490000128746033,0.7540000081062317,0.7639999985694885,0.7580000162124634,0.7519999742507935,0.7590000033378601,0.75,0.7570000290870667,0.7620000243186951,0.7710000276565552,0.7739999890327454,0.7620000243186951,0.7549999952316284,0.7599999904632568,0.765999972820282,0.7680000066757202,0.7639999985694885,0.7540000081062317,0.7649999856948853,0.7649999856948853,0.7609999775886536,0.7549999952316284,0.765999972820282,0.7639999985694885,0.7580000162124634,0.7710000276565552,0.7570000290870667,0.7630000114440918,0.7580000162124634,0.7599999904632568,0.7649999856948853,0.7670000195503235,0.7699999809265137,0.7710000276565552,0.7559999823570251,0.7609999775886536,0.7620000243186951,0.7620000243186951,0.7609999775886536,0.753000020980835,0.7570000290870667,0.7620000243186951,0.7609999775886536,0.7609999775886536,0.7559999823570251,0.7540000081062317,0.7570000290870667,0.7639999985694885,0.7590000033378601,0.7680000066757202,0.7680000066757202,0.765999972820282,0.765999972820282,0.7670000195503235,0.7739999890327454,0.7649999856948853,0.7749999761581421,0.7699999809265137,0.7639999985694885,0.7680000066757202,0.7630000114440918,0.7680000066757202,0.7699999809265137,0.7739999890327454,0.7749999761581421,0.765999972820282,0.7680000066757202,0.7710000276565552,0.7680000066757202,0.765999972820282,0.7689999938011169,0.7760000228881836,0.7710000276565552,0.7680000066757202,0.7649999856948853,0.7720000147819519,0.7730000019073486],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6169999837875366,0.6359999775886536,0.6769999861717224,0.6769999861717224,0.6970000267028809,0.6990000009536743,0.6970000267028809,0.6959999799728394,0.7049999833106995,0.7089999914169312,0.7179999947547913,0.7099999785423279,0.7160000205039978,0.7260000109672546,0.7229999899864197,0.7179999947547913,0.7210000157356262,0.7200000286102295,0.734000027179718,0.7089999914169312,0.7229999899864197,0.7239999771118164,0.7310000061988831,0.7300000190734863,0.7260000109672546,0.7250000238418579,0.7239999771118164,0.7289999723434448,0.7390000224113464,0.7229999899864197,0.7310000061988831,0.7350000143051147,0.7289999723434448,0.734000027179718,0.7289999723434448,0.7329999804496765,0.7300000190734863,0.7319999933242798,0.7440000176429749,0.746999979019165,0.7310000061988831,0.7329999804496765,0.7480000257492065,0.7429999709129333,0.7369999885559082,0.7269999980926514,0.7269999980926514,0.7379999756813049,0.75,0.7360000014305115,0.746999979019165,0.7409999966621399,0.7369999885559082,0.7459999918937683,0.7400000095367432,0.7409999966621399,0.746999979019165,0.7360000014305115,0.7459999918937683,0.7400000095367432,0.7429999709129333,0.7350000143051147,0.7390000224113464,0.7379999756813049,0.7480000257492065,0.7329999804496765,0.734000027179718,0.7390000224113464,0.7459999918937683,0.7360000014305115,0.7419999837875366,0.7429999709129333,0.7400000095367432,0.7379999756813049,0.7310000061988831,0.7360000014305115,0.7390000224113464,0.75,0.7369999885559082,0.7570000290870667,0.7409999966621399,0.7459999918937683,0.7350000143051147,0.7459999918937683,0.7509999871253967,0.7429999709129333,0.7419999837875366,0.7419999837875366,0.75,0.7440000176429749,0.7450000047683716,0.75,0.7409999966621399,0.7490000128746033,0.7409999966621399,0.7419999837875366,0.7429999709129333,0.7490000128746033,0.7419999837875366,0.7419999837875366,0.75,0.753000020980835,0.75,0.746999979019165,0.7519999742507935,0.746999979019165,0.7570000290870667,0.7549999952316284,0.75,0.7540000081062317,0.7480000257492065,0.7490000128746033,0.7419999837875366,0.7419999837875366,0.746999979019165,0.746999979019165,0.75,0.7519999742507935,0.7580000162124634,0.7549999952316284,0.7490000128746033,0.7480000257492065,0.7519999742507935,0.7590000033378601,0.7450000047683716,0.75,0.7440000176429749,0.7419999837875366,0.7519999742507935,0.7450000047683716,0.753000020980835,0.7450000047683716,0.7440000176429749,0.7559999823570251,0.7509999871253967,0.7540000081062317,0.7440000176429749,0.7509999871253967,0.753000020980835,0.7490000128746033,0.7570000290870667,0.7490000128746033,0.746999979019165,0.746999979019165,0.7509999871253967,0.7509999871253967,0.7519999742507935,0.7570000290870667,0.7540000081062317,0.7440000176429749,0.7480000257492065,0.7509999871253967,0.7509999871253967,0.7509999871253967,0.7549999952316284,0.75,0.7559999823570251,0.746999979019165,0.7609999775886536,0.7549999952316284,0.746999979019165,0.7490000128746033,0.753000020980835,0.753000020980835,0.7609999775886536,0.746999979019165,0.7580000162124634],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.621999979019165,0.6439999938011169,0.6700000166893005,0.6790000200271606,0.6869999766349792,0.6959999799728394,0.6790000200271606,0.6880000233650208,0.7049999833106995,0.699999988079071,0.6990000009536743,0.6940000057220459,0.7110000252723694,0.7089999914169312,0.7120000123977661,0.7070000171661377,0.7070000171661377,0.6990000009536743,0.7009999752044678,0.7160000205039978,0.7200000286102295,0.7149999737739563,0.7250000238418579,0.7210000157356262,0.722000002861023,0.7310000061988831,0.7289999723434448,0.7319999933242798,0.7250000238418579,0.722000002861023,0.7210000157356262,0.7170000076293945,0.7260000109672546,0.7250000238418579,0.7210000157356262,0.7200000286102295,0.7379999756813049,0.7239999771118164,0.7239999771118164,0.7080000042915344,0.7289999723434448,0.7289999723434448,0.7300000190734863,0.7329999804496765,0.7319999933242798,0.7350000143051147,0.7390000224113464,0.7350000143051147,0.7289999723434448,0.734000027179718,0.7329999804496765,0.7400000095367432,0.7409999966621399,0.7310000061988831,0.7350000143051147,0.7360000014305115,0.7360000014305115,0.7409999966621399,0.7319999933242798,0.7409999966621399,0.7400000095367432,0.7390000224113464,0.7329999804496765,0.7459999918937683,0.753000020980835,0.746999979019165,0.734000027179718,0.7369999885559082,0.7419999837875366,0.734000027179718,0.7419999837875366,0.7289999723434448,0.7350000143051147,0.7300000190734863,0.7519999742507935,0.7390000224113464,0.7400000095367432,0.7409999966621399,0.7429999709129333,0.7450000047683716,0.7329999804496765,0.7260000109672546,0.7570000290870667,0.7360000014305115,0.7519999742507935,0.7419999837875366,0.7379999756813049,0.7390000224113464,0.7490000128746033,0.734000027179718,0.7360000014305115,0.7390000224113464,0.7440000176429749,0.7450000047683716,0.7319999933242798,0.7429999709129333,0.7519999742507935,0.7540000081062317,0.7519999742507935,0.753000020980835,0.7480000257492065,0.7440000176429749,0.7459999918937683,0.7369999885559082,0.7419999837875366,0.7480000257492065,0.7419999837875366,0.765999972820282,0.746999979019165,0.7459999918937683,0.7570000290870667,0.7390000224113464,0.7409999966621399,0.7459999918937683,0.75,0.7570000290870667,0.753000020980835,0.7549999952316284,0.7519999742507935,0.7490000128746033,0.746999979019165,0.7459999918937683,0.7459999918937683,0.746999979019165,0.7409999966621399,0.7419999837875366,0.7459999918937683,0.7440000176429749,0.7459999918937683,0.7490000128746033,0.7450000047683716,0.7409999966621399,0.7419999837875366,0.7490000128746033,0.7590000033378601,0.7549999952316284,0.7549999952316284,0.746999979019165,0.753000020980835,0.7549999952316284,0.746999979019165,0.7580000162124634,0.7490000128746033,0.753000020980835,0.75,0.75,0.7540000081062317,0.7540000081062317,0.7490000128746033,0.7570000290870667,0.7570000290870667,0.7590000033378601,0.7559999823570251,0.7620000243186951,0.7590000033378601,0.7509999871253967,0.7639999985694885,0.7580000162124634,0.7599999904632568,0.7620000243186951,0.7590000033378601,0.7609999775886536,0.7559999823570251,0.75,0.7509999871253967,0.7549999952316284,0.7540000081062317,0.7540000081062317],"label":"FineWeb filtered only"},"big-run-sampled_full_ind_minhash":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6209999918937683,0.6549999713897705,0.6800000071525574,0.6830000281333923,0.703000009059906,0.7020000219345093,0.7110000252723694,0.7160000205039978,0.7129999995231628,0.7210000157356262,0.7250000238418579,0.7210000157356262,0.7310000061988831,0.7269999980926514,0.7269999980926514,0.7329999804496765,0.7459999918937683,0.734000027179718,0.7409999966621399,0.7390000224113464,0.7350000143051147,0.7509999871253967,0.7440000176429749,0.7379999756813049,0.7599999904632568,0.7400000095367432,0.7409999966621399,0.7590000033378601,0.7409999966621399,0.7440000176429749,0.7400000095367432,0.7450000047683716,0.75,0.7440000176429749,0.7409999966621399,0.7429999709129333,0.7440000176429749,0.7440000176429749,0.7559999823570251,0.7459999918937683,0.7559999823570251,0.7540000081062317,0.7599999904632568,0.7559999823570251,0.7490000128746033,0.7490000128746033,0.7429999709129333,0.7609999775886536,0.7519999742507935,0.7480000257492065,0.7490000128746033,0.7620000243186951,0.7580000162124634,0.7580000162124634,0.7540000081062317,0.7509999871253967,0.7519999742507935,0.7440000176429749,0.7459999918937683,0.7559999823570251,0.7620000243186951,0.746999979019165,0.7570000290870667,0.7620000243186951,0.7570000290870667,0.7540000081062317,0.7540000081062317,0.7570000290870667,0.7590000033378601,0.7519999742507935,0.75,0.7559999823570251,0.7590000033378601,0.7559999823570251,0.7519999742507935,0.7639999985694885,0.7620000243186951,0.7549999952316284,0.7490000128746033,0.7559999823570251,0.7639999985694885,0.7609999775886536,0.7609999775886536,0.7519999742507935,0.7549999952316284,0.7570000290870667,0.7620000243186951,0.7599999904632568,0.7639999985694885,0.7559999823570251,0.753000020980835,0.7649999856948853,0.753000020980835,0.7549999952316284,0.7609999775886536,0.7599999904632568,0.7680000066757202,0.7540000081062317,0.7559999823570251,0.7590000033378601,0.7590000033378601,0.7649999856948853,0.7639999985694885,0.7710000276565552,0.7699999809265137,0.7609999775886536,0.765999972820282,0.7670000195503235,0.7720000147819519,0.7639999985694885,0.7609999775886536,0.7549999952316284,0.7630000114440918,0.7670000195503235,0.7599999904632568,0.765999972820282,0.7670000195503235,0.7670000195503235,0.7670000195503235,0.7720000147819519,0.7760000228881836,0.7710000276565552,0.7829999923706055,0.7630000114440918,0.7720000147819519,0.7649999856948853,0.7630000114440918,0.7699999809265137,0.7720000147819519,0.7720000147819519,0.7689999938011169,0.777999997138977,0.7689999938011169,0.7760000228881836,0.7730000019073486,0.7799999713897705,0.7720000147819519,0.7760000228881836,0.7710000276565552,0.7770000100135803,0.777999997138977,0.7670000195503235,0.7789999842643738,0.7799999713897705,0.7749999761581421,0.7730000019073486,0.777999997138977,0.777999997138977,0.7799999713897705,0.7770000100135803,0.7770000100135803,0.7789999842643738,0.7760000228881836,0.7770000100135803,0.7770000100135803,0.7770000100135803,0.7739999890327454,0.7689999938011169,0.7760000228881836,0.777999997138977,0.7699999809265137,0.7739999890327454,0.7670000195503235,0.7699999809265137,0.7710000276565552,0.7730000019073486,0.7739999890327454,0.7680000066757202],"label":"FineWeb independent MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"}},"title":{"text":"Independent dedup outperforms dedup across dumps"}}}
 
 
data/plots/cross_ind_unfiltered_comparison/winogrande_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.5,0.4979999959468841,0.4950000047683716,0.4950000047683716,0.5049999952316284,0.5329999923706055,0.5220000147819519,0.5139999985694885,0.5339999794960022,0.5130000114440918,0.5389999747276306,0.5400000214576721,0.5270000100135803,0.5320000052452087,0.5260000228881836,0.5370000004768372,0.527999997138977,0.5289999842643738,0.5339999794960022,0.5270000100135803,0.531000018119812,0.527999997138977,0.5400000214576721,0.5479999780654907,0.550000011920929,0.5400000214576721,0.5350000262260437,0.5410000085830688,0.5379999876022339,0.5299999713897705,0.5490000247955322,0.5509999990463257,0.5519999861717224,0.5429999828338623,0.5429999828338623,0.5440000295639038,0.5379999876022339,0.5379999876022339,0.5419999957084656,0.5609999895095825,0.5540000200271606,0.5370000004768372,0.5440000295639038,0.5410000085830688,0.5379999876022339,0.5329999923706055,0.5419999957084656,0.5419999957084656,0.5519999861717224,0.550000011920929,0.5509999990463257,0.5400000214576721,0.5450000166893005,0.5509999990463257,0.5569999814033508,0.5550000071525574,0.5590000152587891,0.5479999780654907,0.5550000071525574,0.5440000295639038,0.5460000038146973,0.546999990940094,0.5559999942779541,0.5550000071525574,0.5490000247955322,0.5440000295639038,0.546999990940094,0.5450000166893005,0.546999990940094,0.5649999976158142,0.5490000247955322,0.5519999861717224,0.550000011920929,0.5509999990463257,0.5519999861717224,0.5519999861717224,0.5529999732971191,0.5490000247955322,0.546999990940094,0.550000011920929,0.5720000267028809,0.5619999766349792,0.5490000247955322,0.5680000185966492,0.5519999861717224,0.5569999814033508,0.5509999990463257,0.5619999766349792,0.5630000233650208,0.5529999732971191,0.5619999766349792,0.5609999895095825,0.550000011920929,0.5479999780654907,0.5529999732971191,0.5519999861717224,0.5580000281333923,0.5590000152587891,0.5529999732971191,0.550000011920929,0.5680000185966492,0.5580000281333923,0.5630000233650208,0.5630000233650208,0.5559999942779541,0.5649999976158142,0.5569999814033508,0.5649999976158142,0.5659999847412109,0.5559999942779541,0.5659999847412109,0.5630000233650208,0.5509999990463257,0.5669999718666077,0.5669999718666077,0.5479999780654907,0.5540000200271606,0.5580000281333923,0.5519999861717224,0.5590000152587891,0.5590000152587891,0.5619999766349792,0.5509999990463257,0.546999990940094,0.5609999895095825,0.5540000200271606,0.5630000233650208,0.5580000281333923,0.5559999942779541,0.5680000185966492,0.5649999976158142,0.5619999766349792,0.5580000281333923,0.5630000233650208,0.5559999942779541,0.5540000200271606,0.5540000200271606,0.5569999814033508,0.5619999766349792,0.5559999942779541,0.5600000023841858,0.5460000038146973,0.5429999828338623,0.5580000281333923,0.5550000071525574,0.5580000281333923,0.5540000200271606,0.5609999895095825,0.5519999861717224,0.550000011920929,0.5519999861717224,0.5590000152587891,0.5619999766349792,0.5600000023841858,0.5590000152587891,0.5690000057220459,0.5640000104904175,0.5580000281333923,0.5559999942779541,0.5569999814033508,0.5569999814033508,0.5540000200271606,0.5640000104904175,0.5600000023841858,0.5550000071525574,0.5640000104904175,0.5600000023841858,0.5540000200271606],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4869999885559082,0.4959999918937683,0.4979999959468841,0.5099999904632568,0.515999972820282,0.5080000162124634,0.5249999761581421,0.5239999890327454,0.5299999713897705,0.5239999890327454,0.5149999856948853,0.5270000100135803,0.5249999761581421,0.5180000066757202,0.5220000147819519,0.5329999923706055,0.5289999842643738,0.5239999890327454,0.5299999713897705,0.5230000019073486,0.5130000114440918,0.5180000066757202,0.5299999713897705,0.5199999809265137,0.5270000100135803,0.5230000019073486,0.5299999713897705,0.5320000052452087,0.5429999828338623,0.527999997138977,0.5379999876022339,0.527999997138977,0.5419999957084656,0.5329999923706055,0.5450000166893005,0.5320000052452087,0.5410000085830688,0.5249999761581421,0.5400000214576721,0.5249999761581421,0.5289999842643738,0.5320000052452087,0.5339999794960022,0.5320000052452087,0.5350000262260437,0.5400000214576721,0.5450000166893005,0.5440000295639038,0.5400000214576721,0.5379999876022339,0.5350000262260437,0.5410000085830688,0.5490000247955322,0.531000018119812,0.5389999747276306,0.546999990940094,0.5529999732971191,0.5370000004768372,0.5440000295639038,0.5400000214576721,0.5490000247955322,0.550000011920929,0.5580000281333923,0.5609999895095825,0.5429999828338623,0.5529999732971191,0.5519999861717224,0.5450000166893005,0.550000011920929,0.5379999876022339,0.5490000247955322,0.5460000038146973,0.5419999957084656,0.5569999814033508,0.5509999990463257,0.5490000247955322,0.5529999732971191,0.5479999780654907,0.5590000152587891,0.5479999780654907,0.5509999990463257,0.5440000295639038,0.5509999990463257,0.5540000200271606,0.5559999942779541,0.5630000233650208,0.5649999976158142,0.5640000104904175,0.5649999976158142,0.5490000247955322,0.5709999799728394,0.5659999847412109,0.5630000233650208,0.5640000104904175,0.5580000281333923,0.546999990940094,0.5550000071525574,0.5580000281333923,0.5429999828338623,0.5440000295639038,0.5569999814033508,0.5569999814033508,0.5540000200271606,0.5550000071525574,0.5649999976158142,0.5540000200271606,0.5630000233650208,0.5609999895095825,0.5580000281333923,0.5509999990463257,0.5550000071525574,0.5550000071525574,0.5519999861717224,0.5609999895095825,0.5630000233650208,0.5509999990463257,0.550000011920929,0.5490000247955322,0.5540000200271606,0.550000011920929,0.5529999732971191,0.5460000038146973,0.550000011920929,0.5529999732971191,0.5519999861717224,0.5529999732971191,0.5609999895095825,0.5590000152587891,0.5550000071525574,0.550000011920929,0.5609999895095825,0.5619999766349792,0.5609999895095825,0.5540000200271606,0.550000011920929,0.5600000023841858,0.5559999942779541,0.5609999895095825,0.5569999814033508,0.5600000023841858,0.5680000185966492,0.5580000281333923,0.5559999942779541,0.5569999814033508,0.5669999718666077,0.5709999799728394,0.5640000104904175,0.5569999814033508,0.5600000023841858,0.5569999814033508,0.5649999976158142,0.5600000023841858,0.5580000281333923,0.5609999895095825,0.5590000152587891,0.5640000104904175,0.5529999732971191,0.5640000104904175,0.5649999976158142,0.5659999847412109,0.5630000233650208,0.5630000233650208,0.5619999766349792,0.5609999895095825,0.5559999942779541,0.5529999732971191,0.5600000023841858],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.5239999890327454,0.4900000095367431,0.5040000081062317,0.5099999904632568,0.4990000128746032,0.5170000195503235,0.5040000081062317,0.5009999871253967,0.5230000019073486,0.5109999775886536,0.5059999823570251,0.5130000114440918,0.5090000033378601,0.5180000066757202,0.5220000147819519,0.5189999938011169,0.5180000066757202,0.5220000147819519,0.5120000243186951,0.5460000038146973,0.5239999890327454,0.5289999842643738,0.5440000295639038,0.5339999794960022,0.5299999713897705,0.5260000228881836,0.5360000133514404,0.5339999794960022,0.5360000133514404,0.5299999713897705,0.5180000066757202,0.5249999761581421,0.5440000295639038,0.5299999713897705,0.5339999794960022,0.5239999890327454,0.527999997138977,0.5139999985694885,0.5289999842643738,0.5360000133514404,0.5260000228881836,0.5389999747276306,0.5460000038146973,0.5270000100135803,0.5339999794960022,0.5320000052452087,0.5329999923706055,0.5260000228881836,0.5220000147819519,0.5260000228881836,0.5379999876022339,0.5410000085830688,0.5350000262260437,0.5389999747276306,0.5320000052452087,0.5389999747276306,0.5379999876022339,0.5329999923706055,0.5270000100135803,0.5170000195503235,0.5329999923706055,0.5370000004768372,0.5379999876022339,0.5249999761581421,0.5479999780654907,0.546999990940094,0.5400000214576721,0.5440000295639038,0.5360000133514404,0.5450000166893005,0.5440000295639038,0.5370000004768372,0.5370000004768372,0.5479999780654907,0.5379999876022339,0.5400000214576721,0.5479999780654907,0.5379999876022339,0.5509999990463257,0.5440000295639038,0.5379999876022339,0.550000011920929,0.5389999747276306,0.5370000004768372,0.5379999876022339,0.5419999957084656,0.5360000133514404,0.5509999990463257,0.5360000133514404,0.5419999957084656,0.5419999957084656,0.550000011920929,0.5360000133514404,0.5519999861717224,0.5540000200271606,0.546999990940094,0.5370000004768372,0.5379999876022339,0.5519999861717224,0.5329999923706055,0.5400000214576721,0.5429999828338623,0.550000011920929,0.5490000247955322,0.5360000133514404,0.550000011920929,0.5569999814033508,0.5490000247955322,0.5490000247955322,0.5479999780654907,0.5350000262260437,0.5490000247955322,0.5370000004768372,0.5440000295639038,0.5329999923706055,0.5440000295639038,0.5429999828338623,0.5389999747276306,0.5450000166893005,0.5320000052452087,0.5450000166893005,0.5400000214576721,0.5419999957084656,0.5460000038146973,0.5370000004768372,0.5400000214576721,0.5460000038146973,0.5370000004768372,0.5370000004768372,0.5460000038146973,0.5400000214576721,0.5490000247955322,0.5529999732971191,0.5379999876022339,0.5460000038146973,0.5450000166893005,0.5429999828338623,0.5460000038146973,0.5400000214576721,0.5479999780654907,0.5460000038146973,0.5540000200271606,0.5400000214576721,0.5350000262260437,0.5490000247955322,0.5460000038146973,0.5460000038146973,0.5509999990463257,0.5410000085830688,0.5429999828338623,0.5379999876022339,0.5450000166893005,0.5389999747276306,0.5400000214576721,0.5400000214576721,0.550000011920929,0.5440000295639038,0.5389999747276306,0.5450000166893005,0.5400000214576721,0.5389999747276306,0.5419999957084656,0.5410000085830688,0.5440000295639038,0.5519999861717224,0.5479999780654907,0.5450000166893005,0.5569999814033508],"label":"FineWeb filtered only"},"big-run-sampled_full_ind_minhash":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4880000054836273,0.492000013589859,0.5059999823570251,0.5139999985694885,0.5070000290870667,0.5090000033378601,0.5230000019073486,0.5189999938011169,0.5189999938011169,0.5220000147819519,0.5149999856948853,0.5260000228881836,0.5329999923706055,0.5230000019073486,0.5180000066757202,0.5289999842643738,0.5400000214576721,0.5410000085830688,0.5440000295639038,0.5329999923706055,0.550000011920929,0.5419999957084656,0.5360000133514404,0.5429999828338623,0.5429999828338623,0.5450000166893005,0.5490000247955322,0.5400000214576721,0.5509999990463257,0.5559999942779541,0.5479999780654907,0.5540000200271606,0.5490000247955322,0.5400000214576721,0.5429999828338623,0.5460000038146973,0.5370000004768372,0.5479999780654907,0.5550000071525574,0.5490000247955322,0.5400000214576721,0.5410000085830688,0.5460000038146973,0.546999990940094,0.5479999780654907,0.546999990940094,0.5509999990463257,0.5450000166893005,0.5590000152587891,0.5419999957084656,0.5540000200271606,0.5440000295639038,0.5450000166893005,0.5580000281333923,0.5540000200271606,0.5440000295639038,0.5619999766349792,0.5450000166893005,0.5600000023841858,0.5559999942779541,0.5600000023841858,0.5400000214576721,0.5569999814033508,0.5600000023841858,0.5619999766349792,0.5529999732971191,0.5649999976158142,0.5609999895095825,0.5550000071525574,0.5609999895095825,0.5580000281333923,0.5550000071525574,0.5619999766349792,0.5550000071525574,0.5519999861717224,0.5600000023841858,0.5550000071525574,0.5550000071525574,0.5590000152587891,0.5490000247955322,0.5580000281333923,0.5600000023841858,0.5419999957084656,0.5559999942779541,0.5559999942779541,0.5529999732971191,0.5609999895095825,0.5519999861717224,0.5569999814033508,0.5569999814033508,0.5509999990463257,0.5619999766349792,0.546999990940094,0.5619999766349792,0.5460000038146973,0.5529999732971191,0.5619999766349792,0.5690000057220459,0.5680000185966492,0.5720000267028809,0.5640000104904175,0.5550000071525574,0.5509999990463257,0.550000011920929,0.5600000023841858,0.5609999895095825,0.5630000233650208,0.5649999976158142,0.5529999732971191,0.5540000200271606,0.5529999732971191,0.5659999847412109,0.5600000023841858,0.5590000152587891,0.5619999766349792,0.5600000023841858,0.5730000138282776,0.5569999814033508,0.5690000057220459,0.5619999766349792,0.5680000185966492,0.578000009059906,0.5730000138282776,0.5550000071525574,0.5529999732971191,0.5600000023841858,0.5630000233650208,0.5590000152587891,0.5659999847412109,0.5669999718666077,0.5609999895095825,0.5630000233650208,0.5569999814033508,0.5490000247955322,0.5619999766349792,0.5550000071525574,0.5630000233650208,0.5559999942779541,0.5559999942779541,0.5649999976158142,0.5569999814033508,0.5619999766349792,0.5559999942779541,0.5669999718666077,0.5609999895095825,0.5690000057220459,0.5770000219345093,0.5690000057220459,0.5720000267028809,0.5619999766349792,0.5649999976158142,0.5669999718666077,0.5680000185966492,0.5699999928474426,0.5640000104904175,0.5609999895095825,0.5740000009536743,0.5690000057220459,0.5669999718666077,0.5720000267028809,0.5699999928474426,0.5709999799728394,0.5740000009536743,0.5680000185966492,0.5619999766349792,0.5690000057220459,0.5659999847412109,0.574999988079071],"label":"FineWeb independent MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"}},"title":{"text":"Independent dedup outperforms dedup across dumps"}}}
 
 
data/plots/custom-filters/agg_score.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-40gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308596294373274,0.35654734168201685,0.3758235517889261,0.38752372190356255,0.39841264486312866,0.4040419068187475,0.4097859803587198,0.41541148349642754,0.416892247274518,0.41986062191426754,0.4234193116426468,0.4218583852052688,0.4243287574499845,0.42519346065819263,0.42440339736640453],"label":"Baseline"},"filtering-custom-line-char-duplicated-v2-0.01":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308979943394661,0.35727922804653645,0.3758955802768469,0.39312327839434147,0.3984657619148493,0.4037223849445581,0.40907647646963596,0.41408527828752995,0.42114910110831255,0.42039695382118225,0.4248786196112633,0.42590542137622833,0.4263712782412767,0.42797840014100075,0.4277621991932392],"label":"Line duplicates filter"},"filtering-custom-lines-punc-0.12":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308933284133672,0.35735468938946724,0.3787423223257065,0.391122592613101,0.3976811040192842,0.4041402228176594,0.4110417179763317,0.4150725454092026,0.42221225984394545,0.4235249478369951,0.42567262239754194,0.42764298990368843,0.4280493911355734,0.42981273680925364,0.42845905013382435],"label":"Punctuation filter"},"filtering-custom-short-line-ratio-0.67":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.33087017294019455,0.35839469730854034,0.379800958558917,0.3909519836306572,0.3985003251582384,0.4028578344732523,0.4080309104174375,0.411550747230649,0.4152813777327537,0.41849316097795963,0.42109199613332743,0.4223319999873638,0.42558939941227436,0.42717534117400646,0.426479609683156],"label":"Short lines filter"},"filtering-custom-punc0.12-short-lines0.67-line_char_dup0.1":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.3308933284133672,0.35955795273184776,0.3757704347372055,0.3934198468923569,0.398214865475893,0.4062729831784963,0.41363069601356983,0.41463132016360754,0.41851891577243805,0.4239445272833109,0.42439557053148746,0.4273625332862139,0.4289980959147215,0.4327357914298773,0.43017333932220936],"label":"Filters combined"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"Custom filters performance"},"yaxis":{"range":[0.38,0.44]}}}
 
 
data/plots/custom-filters/arc_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-40gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2512499988079071,0.29224999248981476,0.3267499953508377,0.34375,0.34800000488758087,0.35224999487400055,0.3567499965429306,0.36450000107288355,0.369499996304512,0.3712500035762787,0.3722500056028366,0.37325000762939453,0.377250000834465,0.37624999880790705,0.3764999955892563],"label":"Baseline"},"filtering-custom-line-char-duplicated-v2-0.01":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.29474999010562897,0.3184999972581863,0.3392500132322311,0.35074999928474426,0.35300000011920923,0.35750000178813934,0.3684999942779541,0.3817500025033951,0.37800000607967377,0.38199999928474426,0.38600000739097595,0.38525000214576716,0.39000000059604645,0.38850000500679016],"label":"Line duplicates filter"},"filtering-custom-lines-punc-0.12":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.29100000858306885,0.31949999928474426,0.33675000071525574,0.34524999558925623,0.35850000381469727,0.3557499945163727,0.36124999821186066,0.3599999994039535,0.36800000071525574,0.36775000393390656,0.3770000040531158,0.37025000154972076,0.37424999475479126,0.37299999594688416],"label":"Punctuation filter"},"filtering-custom-short-line-ratio-0.67":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.2892500013113022,0.3190000057220459,0.3385000079870224,0.3449999988079071,0.3495000004768371,0.36374999582767487,0.3604999929666519,0.36549998819828033,0.37074999511241913,0.37150000035762787,0.3722500056028366,0.37774999439716334,0.3774999976158142,0.37899999320507044],"label":"Short lines filter"},"filtering-custom-punc0.12-short-lines0.67-line_char_dup0.1":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.2905000001192093,0.3199999928474426,0.3397499918937683,0.3467499911785126,0.3540000021457672,0.3662499934434891,0.36374999582767487,0.3647499978542328,0.3675000071525574,0.371749997138977,0.37074999511241913,0.375,0.3787499964237213,0.38099999725818634],"label":"Filters combined"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"Custom filters performance"},"yaxis":{"range":[0.38,0.44]}}}
 
 
data/plots/custom-filters/commonsense_qa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-40gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.2644999921321869,0.2850000113248825,0.2875000089406967,0.31049999594688416,0.3135000020265579,0.3279999941587448,0.32999999821186066,0.32349999248981476,0.3229999989271164,0.32350000739097595,0.32900001108646393,0.3264999985694885,0.3349999934434891,0.32999999821186066],"label":"Baseline"},"filtering-custom-line-char-duplicated-v2-0.01":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.2644999921321869,0.2750000059604645,0.2989999949932098,0.2974999994039535,0.31599999964237213,0.3149999976158142,0.3199999928474426,0.3244999945163727,0.3269999921321869,0.33550000190734863,0.3275000005960464,0.33599999547004694,0.3349999934434891,0.33849999308586115],"label":"Line duplicates filter"},"filtering-custom-lines-punc-0.12":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.26349999010562897,0.28849999606609344,0.29600000381469727,0.30650000274181366,0.31900000572204584,0.3229999989271164,0.3150000125169754,0.3244999945163727,0.3310000002384186,0.3310000002384186,0.32999999821186066,0.3334999978542328,0.3344999998807907,0.32999999821186066],"label":"Punctuation filter"},"filtering-custom-short-line-ratio-0.67":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.2620000094175339,0.28949999809265137,0.2974999994039535,0.30550000071525574,0.30900000035762787,0.31200000643730164,0.3190000057220459,0.32999999821186066,0.3254999965429306,0.3344999998807907,0.3320000022649765,0.3374999910593033,0.3369999974966049,0.33949999511241913],"label":"Short lines filter"},"filtering-custom-punc0.12-short-lines0.67-line_char_dup0.1":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2329999953508377,0.2649999856948852,0.2790000140666961,0.29649999737739563,0.3135000020265579,0.3164999932050705,0.32099999487400055,0.3210000097751617,0.3305000066757202,0.3205000013113022,0.32549999654293055,0.3295000046491623,0.33050000667572016,0.335999995470047,0.33200000226497645],"label":"Filters combined"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"Custom filters performance"},"yaxis":{"range":[0.38,0.44]}}}
 
 
data/plots/custom-filters/hellaswag_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-40gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.2900000065565109,0.3279999941587448,0.3550000041723251,0.375,0.38850000500679016,0.40350000560283655,0.41200000047683716,0.4194999933242798,0.42249999940395355,0.4329999983310699,0.43449999392032623,0.43700000643730164,0.4395000040531158,0.43950000405311584],"label":"Baseline"},"filtering-custom-line-char-duplicated-v2-0.01":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.28900000452995295,0.3310000002384186,0.3505000025033951,0.3790000081062317,0.39250001311302185,0.40549999475479126,0.4224999994039535,0.4284999966621399,0.43050000071525574,0.43799999356269836,0.4459999948740005,0.4495000094175339,0.4564999938011169,0.4529999941587448],"label":"Line duplicates filter"},"filtering-custom-lines-punc-0.12":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.29449999332427973,0.33550000190734863,0.34800000488758087,0.3764999955892563,0.3824999928474426,0.3955000042915344,0.41799999773502344,0.4270000010728836,0.43400000035762787,0.44450001418590546,0.45049999654293055,0.45450000464916224,0.45449998974800104,0.4550000131130218],"label":"Punctuation filter"},"filtering-custom-short-line-ratio-0.67":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.3020000010728836,0.3310000002384186,0.357000008225441,0.37899999320507044,0.38850000500679016,0.3994999974966049,0.40349999070167536,0.4175000041723251,0.42400000989437103,0.4245000034570694,0.4335000067949295,0.4360000044107437,0.44750000536441803,0.44200000166893],"label":"Short lines filter"},"filtering-custom-punc0.12-short-lines0.67-line_char_dup0.1":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.29349999129772186,0.3210000097751617,0.36150000989437103,0.3734999895095825,0.39599999785423273,0.4125000089406967,0.4234999865293503,0.42749999463558197,0.44699999690055847,0.4549999982118606,0.4660000056028366,0.46600000560283655,0.47050000727176666,0.4675000011920929],"label":"Filters combined"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"Custom filters performance"},"yaxis":{"range":[0.38,0.44]}}}
 
 
data/plots/custom-filters/index.json DELETED
@@ -1 +0,0 @@
1
- {"files":{"agg_score":{"file":"agg_score.json"},"commonsense_qa/acc_norm":{"file":"commonsense_qa_acc_norm.json"},"hellaswag/acc_norm":{"file":"hellaswag_acc_norm.json"},"openbookqa/acc_norm":{"file":"openbookqa_acc_norm.json"},"piqa/acc_norm":{"file":"piqa_acc_norm.json"},"winogrande/acc_norm":{"file":"winogrande_acc_norm.json"},"arc/acc_norm":{"file":"arc_acc_norm.json"},"mmlu/acc_norm":{"file":"mmlu_acc_norm.json"}},"settings":{"slider":{"min":0,"max":10,"default":3}}}
 
 
data/plots/custom-filters/mmlu_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-40gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501270473003387,0.25162875652313227,0.26033842563629145,0.26643975079059595,0.26930116117000574,0.27358523011207575,0.27403785288333893,0.2792918980121612,0.28113801777362823,0.2826349586248398,0.2856044620275497,0.2851170748472214,0.28488004207611084,0.2877976596355438,0.28672714531421656],"label":"Baseline"},"filtering-custom-line-char-duplicated-v2-0.01":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.25018398463726044,0.2544838488101959,0.2611646503210068,0.2652362138032913,0.2704761028289795,0.2737790495157242,0.276611790060997,0.2786822021007538,0.281442791223526,0.2816756069660187,0.2860289514064789,0.28624334931373596,0.2867202013731003,0.28732720017433167,0.28609761595726013],"label":"Line duplicates filter"},"filtering-custom-lines-punc-0.12":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501466572284698,0.2563375234603882,0.26243858039379114,0.26873072981834406,0.27219884097576136,0.27462176978588104,0.27908372879028315,0.2813303619623184,0.28369809687137604,0.28319956362247467,0.28563097119331354,0.28614395856857294,0.28564512729644775,0.2862519174814224,0.2876724004745483],"label":"Punctuation filter"},"filtering-custom-short-line-ratio-0.67":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.24996141344308848,0.25390757620334625,0.26540763676166534,0.27061584591865534,0.27150256931781763,0.2718626409769058,0.27449728548526764,0.2784059643745422,0.28175103664398193,0.28019529581069946,0.2827359586954117,0.2814059555530548,0.2844651788473129,0.28390273451805115,0.2838368713855743],"label":"Short lines filter"},"filtering-custom-punc0.12-short-lines0.67-line_char_dup0.1":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2501466572284698,0.2534636557102203,0.2621634304523468,0.2661087810993194,0.2704689502716064,0.27318383753299713,0.2757955640554428,0.2758005559444427,0.28340134024620056,0.2835562080144882,0.28641459345817566,0.28565025329589844,0.28998473286628723,0.29013633728027344,0.2888867110013962],"label":"Filters combined"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"Custom filters performance"},"yaxis":{"range":[0.38,0.44]}}}
 
 
data/plots/custom-filters/openbookqa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-40gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.25099999457597727,0.2629999965429306,0.2810000032186508,0.2939999997615814,0.2900000065565109,0.3100000023841858,0.3129999935626983,0.3149999976158142,0.3229999989271164,0.3310000002384186,0.32100000977516174,0.32999999821186066,0.32100000977516174,0.32100000977516174],"label":"Baseline"},"filtering-custom-line-char-duplicated-v2-0.01":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.26900000870227814,0.2670000046491623,0.306999996304512,0.2939999997615814,0.2999999970197677,0.306999996304512,0.31200000643730164,0.31299999356269836,0.3200000077486038,0.3229999989271164,0.32099999487400055,0.32500000298023224,0.3240000009536743,0.3219999969005584],"label":"Line duplicates filter"},"filtering-custom-lines-punc-0.12":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.2559999972581863,0.2849999964237213,0.3110000044107437,0.2979999929666519,0.3009999990463257,0.318000003695488,0.3140000104904175,0.32899999618530273,0.32899999618530273,0.3369999974966049,0.33599999547004694,0.32900001108646393,0.3299999982118606,0.3330000042915344],"label":"Punctuation filter"},"filtering-custom-short-line-ratio-0.67":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.25999999046325684,0.28200000524520874,0.28599999845027924,0.289000004529953,0.29999999701976776,0.31300000846385956,0.31900000572204584,0.3149999976158142,0.32099999487400055,0.3139999955892563,0.3190000057220459,0.32200001180171967,0.3229999989271164,0.3240000009536743],"label":"Short lines filter"},"filtering-custom-punc0.12-short-lines0.67-line_char_dup0.1":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2860000133514404,0.2719999998807907,0.277999997138977,0.3039999902248382,0.28199999034404755,0.30200000107288355,0.3050000071525574,0.31299999356269836,0.32099999487400055,0.3269999921321869,0.31599999964237213,0.3260000050067901,0.32600000500679016,0.3299999982118606,0.32500000298023224],"label":"Filters combined"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"Custom filters performance"},"yaxis":{"range":[0.38,0.44]}}}
 
 
data/plots/custom-filters/piqa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-40gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.613999992609024,0.6504999995231628,0.6649999916553497,0.6870000064373016,0.6915000081062317,0.6974999904632568,0.7035000026226044,0.7129999995231628,0.7055000066757202,0.7080000042915344,0.7084999978542328,0.7114999890327454,0.714000016450882,0.7115000188350677],"label":"Baseline"},"filtering-custom-line-char-duplicated-v2-0.01":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.609499990940094,0.652999997138977,0.6744999885559082,0.68299999833107,0.6809999942779541,0.6965000033378601,0.6995000243186951,0.7145000100135803,0.7100000083446503,0.7105000019073486,0.7134999930858612,0.7159999907016754,0.7170000076293945,0.7199999988079071],"label":"Line duplicates filter"},"filtering-custom-lines-punc-0.12":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.6155000030994415,0.648499995470047,0.6649999916553497,0.6865000128746033,0.690500020980835,0.6965000033378601,0.7029999792575836,0.7139999866485596,0.7105000019073486,0.7089999914169312,0.7139999866485596,0.7144999802112579,0.7229999899864197,0.7175000011920929],"label":"Punctuation filter"},"filtering-custom-short-line-ratio-0.67":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.6254999935626984,0.6530000269412994,0.6665000021457672,0.6860000193119049,0.6980000138282776,0.695499986410141,0.7084999978542328,0.7080000042915344,0.7064999938011169,0.7095000147819519,0.7129999995231628,0.7159999907016754,0.7179999947547913,0.718500018119812],"label":"Short lines filter"},"filtering-custom-punc0.12-short-lines0.67-line_char_dup0.1":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.5099999904632568,0.6274999976158142,0.656000018119812,0.6665000021457672,0.6854999959468842,0.6895000040531158,0.7035000026226044,0.7060000002384186,0.7100000083446503,0.7195000052452087,0.7159999907016754,0.715499997138977,0.7170000076293945,0.7274999916553497,0.7199999988079071],"label":"Filters combined"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"Custom filters performance"},"yaxis":{"range":[0.38,0.44]}}}
 
 
data/plots/custom-filters/winogrande_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"filtering-baseline-2019-18-40gt":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.4884999990463257,0.49699999392032623,0.5035000145435333,0.5054999887943268,0.5200000107288361,0.5115000009536743,0.5154999792575836,0.507999986410141,0.515500009059906,0.5160000026226044,0.5139999985694885,0.5165000259876251,0.5160000026226044,0.523499995470047],"label":"Baseline"},"filtering-custom-line-char-duplicated-v2-0.01":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.48749999701976776,0.5024999976158142,0.5035000145435333,0.5099999904632568,0.5080000162124634,0.5050000250339508,0.5069999992847443,0.5180000066757202,0.5085000097751617,0.515500009059906,0.5165000259876251,0.5080000162124634,0.5090000033378601,0.5104999840259552],"label":"Line duplicates filter"},"filtering-custom-lines-punc-0.12":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.48899999260902405,0.49300000071525574,0.5030000060796738,0.49799999594688416,0.5024999976158142,0.5150000154972076,0.5259999930858612,0.527999997138977,0.5245000123977661,0.5275000035762787,0.5199999809265137,0.5300000011920929,0.5300000011920929,0.5289999842643738],"label":"Punctuation filter"},"filtering-custom-short-line-ratio-0.67":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.471000000834465,0.494499996304512,0.511000007390976,0.511000007390976,0.5070000290870667,0.50450000166893,0.5060000121593475,0.5074999928474426,0.5169999897480011,0.5264999866485596,0.526500016450882,0.5290000140666962,0.5275000035762787,0.5259999930858612],"label":"Short lines filter"},"filtering-custom-punc0.12-short-lines0.67-line_char_dup0.1":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.4854999929666519,0.4860000014305115,0.5094999969005585,0.5090000033378601,0.5195000171661377,0.5185000002384186,0.5090000182390213,0.5084999799728394,0.5209999978542328,0.5164999961853027,0.5254999995231628,0.5250000059604645,0.5250000059604645,0.5254999995231628],"label":"Filters combined"}},"layout":{"xaxis":{"title":{"text":"Training tokens (Billions)"}},"title":{"text":"Custom filters performance"},"yaxis":{"range":[0.38,0.44]}}}
 
 
data/plots/dataset_ablations/agg_score.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"FineWeb (ours)":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3552836012095213,0.3781493119895458,0.3866849727928638,0.4050675220787525,0.4032807648181915,0.4174600429832935,0.4206059761345386,0.427497424185276,0.4316632784903049,0.4385909177362919,0.4334069043397903,0.4360812865197658,0.4404293224215507,0.4385774843394756,0.4407080821692943,0.4467254020273685,0.4470436163246631,0.4486658610403538,0.4459679573774338,0.4454015754163265,0.4515932314097881,0.4482216536998749,0.4484201297163963,0.455057855695486,0.4526158757507801,0.453176885843277,0.450159091502428,0.4516039006412029,0.4549933448433876,0.4555377587676048,0.4575010798871517,0.4577344059944153,0.4540543705224991,0.4537974074482918,0.4611785635352134,0.4586966186761856,0.4594406597316265,0.4598931074142456,0.457538403570652,0.4591932781040668,0.4636382386088371,0.4582749158143997,0.4625946804881096,0.4633439630270004,0.4666871763765812,0.4649887941777706,0.4671247974038124,0.4665776938199997,0.4672530107200145,0.4666078947484493,0.4666155055165291,0.4727727174758911,0.467480719089508,0.4681386984884739,0.4651658721268177,0.4668439887464046,0.4671731516718864,0.4719251021742821,0.4699816256761551,0.4723306186497211,0.4686817973852157,0.468911949545145,0.4714248068630695,0.4724191203713417,0.4700912088155746,0.4685601107776165,0.4716645181179046,0.4724556542932987,0.4670086726546287,0.4703365340828895,0.4698334187269211,0.471625205129385,0.4688323326408863,0.4735309742391109,0.4729253277182579,0.4747676998376846,0.4723741039633751,0.4764323942363262,0.4737579710781574,0.4758132360875606,0.4755662642419338,0.4730159305036068,0.4787128046154976,0.4740134924650192,0.4785312972962856,0.4783577285706997,0.4752367511391639,0.474204134196043,0.4737414345145225,0.4780189953744411,0.477523285895586,0.4751617163419723,0.4776186011731624,0.4769949465990066,0.4790891669690609,0.479917362332344,0.4771673306822777,0.4825278185307979,0.4811677671968937,0.4787211790680885,0.4817796200513839,0.4819813556969166,0.4802381917834282,0.4810985140502453,0.481117732822895,0.4791575670242309,0.4798801243305206,0.4829155020415783,0.4822122864425182,0.4827562272548675,0.4839778505265713,0.4820474348962307,0.4858015961945057,0.4826803356409073,0.4831027314066887,0.4827458150684833,0.4819435514509678,0.4836879819631576,0.4835174195468426,0.4855972006917,0.4871680215001106,0.4840429238975048,0.4827739149332046,0.4881435632705688,0.4871019721031189,0.486987367272377,0.4836358055472374,0.4867987409234047,0.4869474284350872,0.4886575266718864,0.4855775311589241,0.4863000251352787,0.4841057248413563,0.488163661211729,0.4904011823236942,0.4870587214827537,0.4884037151932716,0.4873756393790245,0.4925794936716556,0.4874482750892639,0.4898910224437713,0.4893574342131614,0.4888269044458866,0.4887814335525036,0.4876748844981193,0.4853886738419533,0.4878034777939319,0.4911742769181728,0.4905468784272671,0.4896938055753708,0.4875142201781273,0.4900367334485054,0.4900274313986301,0.4905461706221103,0.4891181476414203,0.4881824217736721,0.4902780950069427,0.4895042479038238,0.4890727028250694,0.4897591508924961,0.4879062548279762,0.4897833876311779,0.4902243539690971,0.4884885586798191,0.4880276583135128,0.4927133433520794,0.4899616949260235],"label":"FineWeb (ours)"},"C4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3551123738288879,0.3784352876245975,0.3877953104674816,0.3993203267455101,0.4099095128476619,0.4136184640228748,0.414347343146801,0.4190568178892135,0.4218908287584781,0.4231295175850391,0.4287125319242477,0.4297344498336315,0.4332403875887394,0.4319765940308571,0.4359219521284103,0.4346421919763088,0.4424981512129307,0.4414303861558437,0.444732554256916,0.4384104907512665,0.444181352853775,0.4503846168518066,0.4436752013862133,0.4460380338132381,0.4498310275375843,0.4561186209321022,0.4558072946965694,0.4517050087451935,0.45280721783638,0.4511029534041881,0.4538496136665344,0.4561821073293686,0.4553975760936737,0.4567296579480171,0.4564522393047809,0.4547750055789947,0.4563902877271175,0.4566588178277015,0.4564423710107803,0.4597547873854637,0.4611989110708236,0.458673931658268,0.4615549780428409,0.4594683423638344,0.459637489169836,0.4626501463353634,0.4586587473750114,0.4627759084105491,0.4645364582538605,0.4662919379770756,0.4669810235500335,0.4629590921103954,0.4611541703343391,0.4635159634053707,0.4617790356278419,0.4662701040506363,0.4603991322219372,0.4632007218897342,0.4649621024727821,0.4670373052358627,0.4684801176190376,0.4635576270520687,0.4640897139906883,0.4648366048932075,0.467750646173954,0.4661705493927002,0.4673017747700214,0.4670937992632389,0.4679652377963066,0.4677082300186157,0.4677698649466038,0.4682595692574978,0.4695373848080635,0.466829814016819,0.4746575504541397,0.4714464470744133,0.4712707810103893,0.4668035432696342,0.4732451289892196,0.4715676791965961,0.4724206030368805,0.4714202284812927,0.4740159995853901,0.4751642681658268,0.474029790610075,0.47599982842803,0.4718085192143917,0.4766124375164509,0.4732356742024421,0.4746463894844055,0.473610121756792,0.4765255078673363,0.4751369915902614,0.477124772965908,0.4740747064352035,0.4751597791910171,0.4746479839086532,0.4750863499939441,0.474069282412529,0.4766325205564499,0.4751966968178749,0.480234831571579,0.4829998835921287,0.4779119342565536,0.4755960702896118,0.4754855707287788,0.4781345650553703,0.4780046604573726,0.4784741662442684,0.4773235991597175,0.4779680110514164,0.4786801375448704,0.4763363562524318,0.4761211909353733,0.4790357053279876,0.4782482571899891,0.4772635623812675,0.4800884462893009,0.4762507900595665,0.4809304289519787,0.4789686501026153,0.4788952358067035,0.4807513877749443,0.4819636456668377,0.4813393056392669,0.4799563512206077,0.4804293029010296,0.4832860343158245,0.4821714237332344,0.485592633485794,0.4815996848046779,0.4831583350896835,0.4846071004867553,0.4833417683839798,0.4807553999125957,0.4866329692304134,0.4829108603298664,0.4853781275451183,0.4814088977873325,0.4803747944533825,0.4820726066827774,0.4840803518891334,0.4835550002753734,0.4805133901536464,0.4861049503087997,0.4835067577660084,0.4856174662709236,0.4859548546373844,0.4868494793772697,0.4867915287613868,0.4881403036415577,0.4869705513119697,0.4862060099840164,0.4862068481743336,0.4862393103539943,0.4863550774753093,0.4858086891472339,0.4857287481427192,0.4880005866289139,0.4846996292471885,0.4863993115723133,0.4876768328249454,0.4873999394476414,0.4885325841605663,0.4876705072820186,0.485390305519104,0.4839635267853737],"label":"C4"},"Dolma":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3307658787816763,0.3523229286074638,0.377311572432518,0.3858824856579304,0.3951764293015003,0.402766715735197,0.4049470797181129,0.4120629839599132,0.4156050495803356,0.4159501679241657,0.4160743616521358,0.4245675876736641,0.4277330711483955,0.4266031645238399,0.4327213019132614,0.4314779937267303,0.4336568377912044,0.4369685687124729,0.4366284385323524,0.4392882287502289,0.4405222535133362,0.435525432229042,0.4436039961874485,0.4407493844628334,0.441445555537939,0.4410557225346565,0.4371193572878837,0.4451764188706875,0.4450704641640186,0.4473482742905617,0.4460285790264606,0.4495329968631267,0.4506766386330127,0.4496022351086139,0.4495359361171722,0.4517026245594024,0.4506071843206882,0.4504962339997291,0.4534401223063469,0.4526695124804973,0.4503647126257419,0.4501906409859657,0.4478448294103145,0.4534101262688637,0.4531553275883198,0.4552497416734695,0.4568525813519954,0.4575119316577911,0.4590418413281441,0.4530587382614612,0.4584306105971336,0.4584567248821258,0.4616208709776401,0.4618218578398227,0.4582518599927425,0.4551334418356418,0.4586207643151283,0.4616809487342834,0.4611648134887218,0.4634306691586971,0.4613924361765384,0.4588174410164356,0.4602113999426365,0.4626984223723411,0.4556249380111694,0.4615240134298801,0.4615156538784504,0.4632683917880058,0.4620018191635608,0.4673102460801601,0.4642679207026958,0.4684422351419925,0.4641293548047542,0.4620067216455936,0.463960450142622,0.4660330079495907,0.4692153520882129,0.4669661112129688,0.466122068464756,0.4682283326983452,0.4685749150812626,0.4688928835093975,0.465722806751728,0.4659770168364048,0.4635110311210155,0.4644729532301426,0.4663772545754909,0.4708714000880718,0.4700784012675285,0.465810552239418,0.4693214185535908,0.4660605490207672,0.4695423729717731,0.4688450992107391,0.4721916541457176,0.4698034971952438,0.4665254801511764,0.4703435115516186,0.4714724421501159,0.4702155850827694,0.4694059453904629,0.471682820469141,0.4736889898777008,0.4703876934945583,0.4701756276190281,0.4680277854204178,0.4743972420692444,0.4708037711679935,0.4717469103634357,0.4740131162106991,0.4727639146149158,0.4723498672246933,0.4724784828722477,0.4736377336084842,0.4709004536271095,0.4707547724246979,0.469988003373146,0.4711141660809517,0.4780926555395126,0.4735368900001049,0.4735525399446487,0.472091656178236,0.4721037782728672,0.4745764546096325,0.4754389114677906,0.4705884419381618,0.4724388048052788,0.4759692996740341,0.4773353897035122,0.4758949503302574,0.4740513376891613,0.4749866165220737,0.4769214987754822,0.4776637814939022,0.4756806269288063,0.4760831333696842,0.4764537550508976,0.4760236926376819,0.4765783883631229,0.4751994907855987,0.4746053963899612,0.4780152216553688,0.4798212572932243,0.4787088483572006,0.4781070649623871,0.478924810886383,0.4792283922433853,0.4830445684492588,0.477554626762867,0.4838457219302654,0.4780268892645836,0.4787273816764354,0.4787089973688125,0.4800758995115757,0.4792372398078441,0.480698712170124,0.4782215058803558,0.4793966896831989,0.4793987721204757,0.4804111570119858,0.4809239841997623,0.480060052126646,0.4797034226357937,0.4830792918801307,0.4778680615127086,0.4814067967236042,0.4812476821243763,0.4816214181482792],"label":"Dolma"},"RefinedWeb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3534814938902855,0.3764607086777687,0.38782499730587,0.3981050960719585,0.4028486795723438,0.4125883243978023,0.4117814563214779,0.414029736071825,0.4197172522544861,0.4211113378405571,0.4279881417751312,0.4280137903988361,0.4280424378812313,0.4326301179826259,0.4371833503246307,0.4346669465303421,0.4336562640964985,0.4432648755609989,0.4401291646063328,0.4394684173166752,0.4476612061262131,0.4465444348752498,0.4472153298556804,0.4433343075215816,0.4510187618434429,0.4459567815065384,0.4460812956094742,0.4498684890568256,0.4529943652451038,0.4528274349868297,0.4551213420927524,0.4549156539142132,0.4564928151667118,0.4576693661510944,0.4557182416319847,0.4536240361630916,0.457439012825489,0.4570476822555065,0.4589823484420776,0.462024375796318,0.4540738053619861,0.4550252184271812,0.4576593860983848,0.4573238864541054,0.4575810581445694,0.4622134491801262,0.4592566937208175,0.4614734016358852,0.4637473002076149,0.4625372551381588,0.4613912180066108,0.4597448222339153,0.4594792164862156,0.4662549719214439,0.4634026065468788,0.4633508697152138,0.4635734222829342,0.4628961533308029,0.4670135043561458,0.4639505892992019,0.4631133340299129,0.4665167145431041,0.4672448337078094,0.4693268723785877,0.4630668573081493,0.4676454700529575,0.4646359197795391,0.4621579721570015,0.4692446552217006,0.4704835228621959,0.4663223996758461,0.4680556617677212,0.466339822858572,0.4682099223136902,0.4711195565760135,0.4722655527293682,0.4727961830794811,0.4676857478916645,0.4719390422105789,0.4713102728128433,0.4712141714990139,0.4721613004803657,0.4713456854224205,0.4682970903813839,0.4679934531450271,0.4685162976384163,0.4679946713149547,0.4681242071092129,0.4702276065945625,0.472664151340723,0.4730790853500366,0.4731674715876579,0.4718914777040481,0.4719801284372806,0.4761029370129108,0.4735167175531387,0.4730370938777923,0.4730173237621784,0.4735377207398414,0.4777223989367485,0.4796326830983162,0.4734170883893966,0.4739485755562782,0.4748299159109592,0.4765299335122108,0.4745025858283043,0.4754423759877682,0.4784592799842357,0.4761341325938701,0.4760282784700393,0.4769757278263569,0.47154351323843,0.4786738082766533,0.4804279990494251,0.4777076803147793,0.4798569902777672,0.4759011939167976,0.4784621745347976,0.479673832654953,0.4780617095530033,0.48076206818223,0.47995800152421,0.4790860973298549,0.4817167408764362,0.4811586998403072,0.482547752559185,0.4816697351634502,0.4809327870607376,0.4816545359790325,0.4804601892828941,0.4776877984404564,0.4813711903989315,0.4844604581594467,0.4819537848234176,0.4820829331874847,0.4778126627206802,0.482935007661581,0.48230691999197,0.4826001971960068,0.4823969900608063,0.4811219945549965,0.4789146520197391,0.484035175293684,0.4848698377609253,0.4855728335678577,0.4825376532971859,0.485215101391077,0.4824351668357849,0.4835342466831207,0.4822137206792831,0.4838785007596016,0.4837255179882049,0.4853012599050998,0.4857851006090641,0.4863366298377514,0.4856646582484245,0.4842503517866134,0.4838776960968971,0.4846346862614155,0.4837041422724724,0.4813097268342972,0.4873070046305656,0.4841253720223903,0.4837464913725853,0.483069509267807,0.4851242564618587,0.4861010462045669],"label":"RefinedWeb"},"SlimPajama":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3304025065153837,0.3485326766967773,0.366294227540493,0.3754063658416271,0.380060039460659,0.3842084035277366,0.3941901586949825,0.396852757781744,0.4074499122798443,0.405584454536438,0.4073434360325336,0.4124714508652687,0.4134335741400718,0.4161443412303924,0.4188667088747024,0.4202426932752132,0.4203570708632469,0.4227871932089329,0.4275586046278476,0.4272583276033401,0.4236343242228031,0.4221611768007278,0.4248477220535278,0.4282951094210148,0.4292030818760395,0.4363042339682579,0.4320153258740902,0.4338943809270859,0.4330463260412216,0.4384616464376449,0.4358800426125526,0.4390238709747791,0.4387876056134701,0.4379991367459297,0.4407520480453968,0.4365722797811031,0.4373247250914573,0.4419326409697532,0.4368139393627643,0.4383439570665359,0.4416647292673588,0.4406861551105976,0.4423066079616546,0.4422457814216614,0.4484159983694553,0.4441563449800014,0.4470763392746448,0.4423048347234726,0.446647435426712,0.4459831714630127,0.4476513750851154,0.4466814696788788,0.4435278102755546,0.4461003206670284,0.4472127594053745,0.4489398226141929,0.4500008933246135,0.4516164027154445,0.4502749890089035,0.4490774273872375,0.4485588259994983,0.4482260681688785,0.4513203538954258,0.4532071612775326,0.4522806704044342,0.4535516202449798,0.4556184969842434,0.4512116685509681,0.4526651911437511,0.4534229151904583,0.4510319754481315,0.4549613930284977,0.4524864666163921,0.452243909239769,0.4522925950586796,0.4506081640720367,0.4515948034822941,0.4542848505079746,0.4496320001780987,0.4584742784500122,0.4562755897641182,0.4584121964871883,0.4543888121843338,0.4571178145706653,0.4539541527628898,0.4583615288138389,0.4560770355165005,0.4565840028226375,0.456016231328249,0.4589645341038704,0.4549507163465023,0.4590726271271705,0.4600294604897499,0.4643459767103195,0.4621279649436474,0.4603357166051864,0.4565914608538151,0.4588591121137142,0.4605486840009689,0.4612153358757496,0.4666493646800518,0.4652941562235355,0.4635471254587173,0.4646625965833664,0.4616324640810489,0.4620275981724262,0.4599555470049381,0.4569032154977321,0.463778618723154,0.4630071707069874,0.4675904884934425,0.467460885643959,0.4701412692666054,0.463244054466486,0.4643578268587589,0.4655150510370731,0.4679873175919056,0.4627011120319366,0.4645654186606407,0.4667633399367332,0.4651090800762176,0.4674227461218834,0.463763378560543,0.4683923609554767,0.4667338877916336,0.4650763012468815,0.4685031399130821,0.4682512991130352,0.4704002998769283,0.4690291061997413,0.4723447039723396,0.4700597859919071,0.4730553664267063,0.4665385261178016,0.4713114872574806,0.4723366685211658,0.4753480590879917,0.4702173508703708,0.4711348637938499,0.4730475284159183,0.4687437377870083,0.472866803407669,0.4722364842891693,0.4734555780887604,0.4718337096273899,0.474613182246685,0.4756719246506691,0.4706077054142952,0.4729462638497352,0.4753362536430359,0.4749811328947544,0.4718534983694553,0.4764903634786606,0.4737220667302608,0.473462775349617,0.4746402017772198,0.4707343839108944,0.4728488773107528,0.4778959937393665,0.4751430302858352,0.4726544991135597,0.4728592745959759,0.4748839288949966,0.4711641781032085,0.4739051833748817,0.4720781221985817,0.4751792773604393,0.4769017845392227],"label":"SlimPajama"},"The Pile":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3304216358810663,0.3466135319322347,0.3567665815353393,0.3609300442039966,0.3754195682704448,0.3760548382997513,0.3804825097322464,0.3870464153587818,0.3920954465866089,0.3908583559095859,0.3928747698664665,0.3923895694315433,0.3968513160943985,0.404996283352375,0.4021198935806751,0.4044422507286072,0.4055487178266048,0.4043267257511616,0.4051039405167103,0.4053148292005062,0.4118910208344459,0.4129008501768112,0.4153450205922127,0.4157540574669838,0.4165645688772201,0.4158085733652115,0.4154625944793224,0.4205422177910805,0.4168646782636642,0.4214760512113571,0.4222277626395225,0.4179082997143268,0.416801854968071,0.4236912615597248,0.4248885214328766,0.4242431484162807,0.4248657301068306,0.4258512482047081,0.4243130981922149,0.4244629479944706,0.430818609893322,0.4303862266242504,0.4287605956196785,0.432219460606575,0.4292010366916656,0.4285963587462902,0.4321340434253216,0.4298995658755302,0.4349256418645382,0.4324938207864761,0.4351008906960487,0.4330397509038448,0.4322203621268272,0.4351420737802982,0.4295673854649067,0.4331545792520046,0.4371312223374843,0.4368601404130459,0.4334152191877365,0.435929175466299,0.4348413497209549,0.4362935796380043,0.4395272135734558,0.4395910315215587,0.4397818148136139,0.4375653453171253,0.4350125305354595,0.4383638128638267,0.4414386712014675,0.4406127110123634,0.4395736493170261,0.4400510974228382,0.4349951185286045,0.4433234259486198,0.4408213645219803,0.4408195316791534,0.4454181902110576,0.4432171359658241,0.4386095143854618,0.4424192681908607,0.4406412802636623,0.4441004805266857,0.4434664808213711,0.4409965090453625,0.4398404918611049,0.4429648593068123,0.4477721899747848,0.4449514634907245,0.4450686313211918,0.4429425410926342,0.4468786306679249,0.4498275183141231,0.4463700018823147,0.4454979188740253,0.4485073313117027,0.4498474262654781,0.4488017149269581,0.4474775716662407,0.4488831833004951,0.4498938769102096,0.4441156759858131,0.4483954235911369,0.454746376723051,0.4491635039448738,0.4470739066600799,0.451224073767662,0.4524858966469764,0.4501192942261696,0.4518791697919369,0.4503650553524494,0.4545648172497749,0.4524221830070019,0.4521367736160755,0.4533489346504211,0.4547304809093475,0.4522802866995334,0.451341975480318,0.4508697539567947,0.4542875029146671,0.4543761536478996,0.456620555371046,0.4563915356993675,0.4555570594966411,0.4565647505223751,0.4527720846235752,0.4573541656136513,0.4586507454514503,0.4574597366154194,0.4572464860975742,0.4601885713636875,0.4597031660377979,0.4591619409620762,0.457712460309267,0.4584441147744655,0.4599815532565117,0.4610339701175689,0.4604088515043258,0.4583280719816685,0.4597837403416633,0.4560487046837806,0.4594802744686603,0.4635319598019123,0.4591141827404499,0.4597685001790523,0.4643094949424267,0.4597949869930744,0.4616970308125019,0.4608079977333545,0.461796186864376,0.4613191820681095,0.4638922624289989,0.4597249217331409,0.4629687368869781,0.4651569910347461,0.4653593450784683,0.4654904417693615,0.4689781554043293,0.4642409011721611,0.466390497982502,0.4667546525597572,0.4650019332766533,0.4624537453055382,0.4668834805488586,0.4637890830636024,0.4627578742802143,0.465025994926691,0.4623493291437626,0.4645387642085552],"label":"The Pile"},"RedPajama2":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3302722573280334,0.3470507562160492,0.364705353975296,0.3764069005846977,0.3825778141617775,0.3896523900330066,0.3974571377038955,0.397537350654602,0.4001355320215225,0.4036932997405529,0.4104340709745884,0.4086671769618988,0.413622997701168,0.4122740626335144,0.4195037446916103,0.4190866313874721,0.4231311045587063,0.4235090501606464,0.4237663596868515,0.4246972687542438,0.4299561083316803,0.4258239455521106,0.4314707778394222,0.4274507761001587,0.4299160614609718,0.4354668110609054,0.4302525334060192,0.4337679930031299,0.4337638393044472,0.4404422976076603,0.4347392916679382,0.4343290999531746,0.4375873804092407,0.4372701570391655,0.4380103722214699,0.4379842169582844,0.4400746375322342,0.4394551366567611,0.4451456516981125,0.4420723840594291,0.4404123835265636,0.4446186311542988,0.4421855062246322,0.44248116761446,0.4423649460077286,0.4426446110010147,0.4418415017426014,0.4442390464246273,0.4463883489370346,0.4474505893886089,0.446966927498579,0.4456985853612423,0.4466467499732971,0.4440925717353821,0.4471017979085445,0.4474578313529491,0.4471569992601871,0.449719063937664,0.453827504068613,0.449543334543705,0.451267059892416,0.4523019045591354,0.4507706724107265,0.4516403637826442,0.4510826356709003,0.4538759067654609,0.453817319124937,0.4493415988981724,0.4524289667606354,0.4563530124723911,0.4525565430521965,0.4559375420212745,0.4579579904675483,0.4522393532097339,0.4529666379094124,0.4572585858404636,0.4526477642357349,0.4548618048429489,0.4535989835858345,0.4557950794696808,0.4576374888420105,0.4536071494221687,0.4567363113164902,0.4559534676373005,0.455357126891613,0.4581229574978351,0.4584124349057674,0.459429319947958,0.4609483703970909,0.4585786387324333,0.4584725610911846,0.460013099014759,0.4675985686480999,0.4608690924942493,0.4615997225046158,0.4654040858149528,0.4610586836934089,0.4633209407329559,0.4612604938447475,0.460259061306715,0.4624678529798984,0.4620500393211841,0.459990244358778,0.4619648195803165,0.4650140479207039,0.4641274213790893,0.4607139900326729,0.466593112796545,0.4665224589407444,0.4634186550974846,0.4623883478343487,0.4616753794252872,0.46418297290802,0.4637203030288219,0.4639860466122627,0.4644428603351116,0.4654988572001457,0.4639867171645164,0.4652636311948299,0.4673995152115822,0.4623220227658748,0.4664290770888328,0.4666106030344963,0.4653977937996387,0.464808851480484,0.468203954398632,0.4619068317115307,0.461650725454092,0.4648414589464664,0.465693786740303,0.4655340574681759,0.4663631655275821,0.4668397903442383,0.4658713564276695,0.4650161266326904,0.4697218723595142,0.4669433757662773,0.4679506830871105,0.4668639451265335,0.4648850038647651,0.4669442251324653,0.4653140194714069,0.4670411869883537,0.4648204818367958,0.4657435193657875,0.4680184945464134,0.4664600379765033,0.4672395847737789,0.4657266996800899,0.4675477854907512,0.4670163989067077,0.4690168797969818,0.4676221422851085,0.4677147716283798,0.4668192155659199,0.4684425666928291,0.467963095754385,0.4691894799470901,0.4651848673820495,0.4664440341293812,0.467386495321989,0.4652351178228855,0.4669347554445267,0.4688444659113884,0.467147346585989,0.4683484248816967,0.4699504524469375,0.4709850251674652],"label":"RedPajama2"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"range":[0.39,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dataset_ablations/arc_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"FineWeb (ours)":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.296999990940094,0.3219999969005584,0.3305000066757202,0.3555000126361847,0.351500004529953,0.3600000143051147,0.363999992609024,0.3680000007152557,0.3785000145435333,0.3765000104904175,0.382999986410141,0.3785000145435333,0.3835000097751617,0.3819999992847442,0.3935000002384186,0.387499988079071,0.3935000002384186,0.3959999978542328,0.3860000073909759,0.3935000002384186,0.3885000050067901,0.3810000121593475,0.3880000114440918,0.3964999914169311,0.4054999947547912,0.3935000002384186,0.3944999873638153,0.3989999890327453,0.3980000019073486,0.4050000011920929,0.4054999947547912,0.4009999930858612,0.4110000133514404,0.4054999947547912,0.4180000126361847,0.4110000133514404,0.4050000011920929,0.4079999923706054,0.4120000004768371,0.402999997138977,0.4205000102519989,0.4129999876022339,0.4120000004768371,0.4169999957084656,0.4269999861717224,0.4230000078678131,0.4225000143051147,0.4300000071525574,0.4180000126361847,0.4284999966621399,0.4165000021457672,0.4325000047683716,0.4235000014305115,0.4210000038146972,0.4239999949932098,0.4235000014305115,0.421999990940094,0.4280000030994415,0.4300000071525574,0.4275000095367431,0.4305000007152557,0.4244999885559082,0.4314999878406524,0.4325000047683716,0.4395000040531158,0.4325000047683716,0.4300000071525574,0.4399999976158142,0.4320000112056732,0.4370000064373016,0.4280000030994415,0.4309999942779541,0.4314999878406524,0.4370000064373016,0.4280000030994415,0.4325000047683716,0.4300000071525574,0.4334999918937683,0.4334999918937683,0.4379999935626983,0.4399999976158142,0.4350000023841858,0.4395000040531158,0.4375,0.4390000104904175,0.4365000128746032,0.4435000121593475,0.4365000128746032,0.445499986410141,0.4440000057220459,0.4460000097751617,0.4415000081062317,0.4415000081062317,0.4339999854564667,0.4429999887943268,0.4399999976158142,0.4359999895095825,0.4370000064373016,0.4469999969005584,0.4404999911785126,0.4435000121593475,0.445499986410141,0.4424999952316284,0.4480000138282776,0.4370000064373016,0.4444999992847442,0.4465000033378601,0.4309999942779541,0.4440000057220459,0.4469999969005584,0.4539999961853027,0.4440000057220459,0.4555000066757202,0.4519999921321869,0.4510000050067901,0.4519999921321869,0.4544999897480011,0.4494999945163727,0.4584999978542328,0.4580000042915344,0.4544999897480011,0.4514999985694885,0.4550000131130218,0.4560000002384186,0.4600000083446502,0.4589999914169311,0.4560000002384186,0.457500010728836,0.4679999947547912,0.4494999945163727,0.4505000114440918,0.4440000057220459,0.4539999961853027,0.4535000026226043,0.4514999985694885,0.457500010728836,0.4620000123977661,0.4564999938011169,0.4595000147819519,0.4564999938011169,0.4550000131130218,0.4539999961853027,0.4544999897480011,0.4569999873638153,0.457500010728836,0.4539999961853027,0.4595000147819519,0.4665000140666961,0.465499997138977,0.4625000059604645,0.4629999995231628,0.4580000042915344,0.4569999873638153,0.4620000123977661,0.457500010728836,0.4550000131130218,0.4645000100135803,0.4629999995231628,0.4584999978542328,0.465499997138977,0.460999995470047,0.4634999930858612,0.4605000019073486,0.4584999978542328,0.4550000131130218,0.4564999938011169,0.4600000083446502],"label":"FineWeb (ours)"},"C4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.293500006198883,0.3230000138282776,0.3339999914169311,0.3470000028610229,0.3614999949932098,0.3555000126361847,0.3655000030994415,0.3605000078678131,0.359499990940094,0.3580000102519989,0.3680000007152557,0.375,0.3790000081062317,0.3790000081062317,0.3880000114440918,0.3860000073909759,0.3914999961853027,0.3835000097751617,0.3980000019073486,0.3759999871253967,0.3889999985694885,0.3944999873638153,0.3869999945163727,0.3910000026226043,0.3840000033378601,0.3894999921321869,0.402999997138977,0.3930000066757202,0.390500009059906,0.3899999856948852,0.395000010728836,0.4040000140666961,0.3860000073909759,0.4025000035762787,0.398499995470047,0.4020000100135803,0.4004999995231628,0.3939999938011169,0.402999997138977,0.3995000123977661,0.4040000140666961,0.3970000147819519,0.3975000083446502,0.4000000059604645,0.4034999907016754,0.4104999899864197,0.398499995470047,0.4074999988079071,0.4045000076293945,0.414000004529953,0.4095000028610229,0.418500006198883,0.4050000011920929,0.4025000035762787,0.418500006198883,0.4230000078678131,0.4090000092983246,0.402999997138977,0.4149999916553497,0.4120000004768371,0.4194999933242798,0.4104999899864197,0.4074999988079071,0.4104999899864197,0.4165000021457672,0.4189999997615814,0.4070000052452087,0.4135000109672546,0.4250000119209289,0.4235000014305115,0.4269999861717224,0.4214999973773956,0.4320000112056732,0.4189999997615814,0.4255000054836273,0.429500013589859,0.4250000119209289,0.4189999997615814,0.4280000030994415,0.4269999861717224,0.4370000064373016,0.4225000143051147,0.4239999949932098,0.4314999878406524,0.4415000081062317,0.4239999949932098,0.4239999949932098,0.4329999983310699,0.4325000047683716,0.4305000007152557,0.4305000007152557,0.4305000007152557,0.4345000088214874,0.4305000007152557,0.429500013589859,0.4309999942779541,0.4354999959468841,0.4345000088214874,0.4354999959468841,0.4390000104904175,0.4354999959468841,0.4289999902248382,0.4384999871253967,0.4259999990463257,0.4300000071525574,0.421999990940094,0.4395000040531158,0.4334999918937683,0.4350000023841858,0.4329999983310699,0.4339999854564667,0.4415000081062317,0.4469999969005584,0.4494999945163727,0.4345000088214874,0.4375,0.4359999895095825,0.4284999966621399,0.4305000007152557,0.4449999928474426,0.4390000104904175,0.4334999918937683,0.4375,0.4435000121593475,0.4370000064373016,0.4444999992847442,0.4395000040531158,0.4435000121593475,0.445499986410141,0.4449999928474426,0.4399999976158142,0.4490000009536743,0.4415000081062317,0.4429999887943268,0.4339999854564667,0.4429999887943268,0.4309999942779541,0.4429999887943268,0.4384999871253967,0.4449999928474426,0.4390000104904175,0.4429999887943268,0.4435000121593475,0.4435000121593475,0.44200000166893,0.44200000166893,0.4435000121593475,0.44200000166893,0.4485000073909759,0.4440000057220459,0.4485000073909759,0.44200000166893,0.4490000009536743,0.4494999945163727,0.44200000166893,0.4465000033378601,0.4365000128746032,0.44200000166893,0.4384999871253967,0.4440000057220459,0.4390000104904175,0.4399999976158142,0.4444999992847442,0.4449999928474426,0.4440000057220459,0.4469999969005584,0.4435000121593475],"label":"C4"},"Dolma":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2815000116825104,0.3215000033378601,0.3375000059604645,0.3490000069141388,0.351500004529953,0.3504999876022339,0.3650000095367431,0.3544999957084656,0.3569999933242798,0.3644999861717224,0.3704999983310699,0.3745000064373016,0.3725000023841858,0.3700000047683716,0.3770000040531158,0.37950000166893,0.3899999856948852,0.3855000138282776,0.3989999890327453,0.3910000026226043,0.3804999887943268,0.3844999969005584,0.3925000131130218,0.3889999985694885,0.3849999904632568,0.3840000033378601,0.3855000138282776,0.3810000121593475,0.3989999890327453,0.3799999952316284,0.4004999995231628,0.4084999859333038,0.3894999921321869,0.3930000066757202,0.4020000100135803,0.4020000100135803,0.3980000019073486,0.4065000116825104,0.4000000059604645,0.4014999866485595,0.3955000042915344,0.3959999978542328,0.4110000133514404,0.4149999916553497,0.4180000126361847,0.414000004529953,0.4135000109672546,0.418500006198883,0.4074999988079071,0.4054999947547912,0.4070000052452087,0.4259999990463257,0.4095000028610229,0.4230000078678131,0.4025000035762787,0.4180000126361847,0.4230000078678131,0.421999990940094,0.4169999957084656,0.426499992609024,0.4154999852180481,0.421999990940094,0.414000004529953,0.4074999988079071,0.4160000085830688,0.4144999980926513,0.4225000143051147,0.418500006198883,0.4205000102519989,0.4084999859333038,0.4275000095367431,0.4235000014305115,0.4144999980926513,0.4275000095367431,0.4334999918937683,0.4314999878406524,0.4320000112056732,0.4199999868869781,0.4269999861717224,0.4250000119209289,0.4174999892711639,0.421999990940094,0.4180000126361847,0.4160000085830688,0.4104999899864197,0.4144999980926513,0.4259999990463257,0.4205000102519989,0.414000004529953,0.4210000038146972,0.4189999997615814,0.4160000085830688,0.4174999892711639,0.4275000095367431,0.4325000047683716,0.4250000119209289,0.4235000014305115,0.4300000071525574,0.4320000112056732,0.4329999983310699,0.429500013589859,0.4365000128746032,0.4410000145435333,0.4354999959468841,0.4275000095367431,0.4370000064373016,0.4239999949932098,0.4415000081062317,0.4444999992847442,0.4354999959468841,0.4309999942779541,0.4325000047683716,0.4359999895095825,0.4329999983310699,0.4255000054836273,0.4300000071525574,0.4339999854564667,0.4375,0.4325000047683716,0.4309999942779541,0.4305000007152557,0.4334999918937683,0.4365000128746032,0.4375,0.4300000071525574,0.4329999983310699,0.4345000088214874,0.4404999911785126,0.4384999871253967,0.4399999976158142,0.445499986410141,0.4435000121593475,0.44200000166893,0.4365000128746032,0.4375,0.4399999976158142,0.4390000104904175,0.44200000166893,0.4334999918937683,0.4284999966621399,0.4284999966621399,0.4375,0.4359999895095825,0.4390000104904175,0.4390000104904175,0.4415000081062317,0.4370000064373016,0.4345000088214874,0.4435000121593475,0.44200000166893,0.4440000057220459,0.4365000128746032,0.4404999911785126,0.44200000166893,0.4395000040531158,0.4339999854564667,0.4370000064373016,0.4395000040531158,0.44200000166893,0.4440000057220459,0.4399999976158142,0.4384999871253967,0.4449999928474426,0.4339999854564667,0.4485000073909759,0.4444999992847442,0.44200000166893],"label":"Dolma"},"RefinedWeb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2899999916553497,0.31700000166893,0.3409999907016754,0.3425000011920929,0.3485000133514404,0.3555000126361847,0.3574999868869781,0.3585000038146972,0.363999992609024,0.3619999885559082,0.3675000071525574,0.3865000009536743,0.3810000121593475,0.3810000121593475,0.3810000121593475,0.3860000073909759,0.3810000121593475,0.3894999921321869,0.3849999904632568,0.3855000138282776,0.3989999890327453,0.3980000019073486,0.3995000123977661,0.395000010728836,0.4084999859333038,0.4040000140666961,0.4004999995231628,0.3955000042915344,0.4135000109672546,0.4070000052452087,0.4104999899864197,0.4014999866485595,0.4099999964237213,0.4199999868869781,0.414000004529953,0.402999997138977,0.4214999973773956,0.4095000028610229,0.4059999883174896,0.4090000092983246,0.4074999988079071,0.4120000004768371,0.4154999852180481,0.4189999997615814,0.4149999916553497,0.429500013589859,0.4154999852180481,0.4214999973773956,0.4244999885559082,0.4205000102519989,0.4269999861717224,0.4214999973773956,0.4180000126361847,0.4415000081062317,0.4320000112056732,0.4350000023841858,0.4259999990463257,0.4300000071525574,0.4259999990463257,0.4189999997615814,0.4269999861717224,0.4199999868869781,0.426499992609024,0.4350000023841858,0.4289999902248382,0.4345000088214874,0.4259999990463257,0.426499992609024,0.4395000040531158,0.4395000040531158,0.4359999895095825,0.4280000030994415,0.4370000064373016,0.4329999983310699,0.4309999942779541,0.4490000009536743,0.4399999976158142,0.4339999854564667,0.4399999976158142,0.4345000088214874,0.429500013589859,0.4370000064373016,0.4379999935626983,0.4284999966621399,0.4309999942779541,0.4350000023841858,0.4399999976158142,0.4314999878406524,0.4300000071525574,0.4410000145435333,0.4345000088214874,0.4410000145435333,0.4345000088214874,0.4339999854564667,0.4460000097751617,0.4410000145435333,0.4469999969005584,0.4480000138282776,0.4435000121593475,0.4375,0.4519999921321869,0.4480000138282776,0.4429999887943268,0.4519999921321869,0.4435000121593475,0.4334999918937683,0.4460000097751617,0.4564999938011169,0.4469999969005584,0.453000009059906,0.4485000073909759,0.4410000145435333,0.4444999992847442,0.4485000073909759,0.457500010728836,0.4469999969005584,0.4535000026226043,0.4535000026226043,0.4485000073909759,0.4490000009536743,0.4505000114440918,0.4595000147819519,0.4544999897480011,0.453000009059906,0.4605000019073486,0.4620000123977661,0.457500010728836,0.453000009059906,0.4550000131130218,0.460999995470047,0.4449999928474426,0.4474999904632568,0.457500010728836,0.4584999978542328,0.4494999945163727,0.4474999904632568,0.4625000059604645,0.4639999866485595,0.4555000066757202,0.4469999969005584,0.4600000083446502,0.453000009059906,0.4629999995231628,0.4589999914169311,0.4614999890327453,0.4555000066757202,0.4560000002384186,0.4580000042915344,0.4584999978542328,0.4560000002384186,0.4605000019073486,0.4595000147819519,0.4639999866485595,0.4614999890327453,0.4564999938011169,0.4634999930858612,0.4625000059604645,0.4614999890327453,0.4679999947547912,0.4584999978542328,0.4595000147819519,0.4505000114440918,0.4544999897480011,0.4595000147819519,0.4620000123977661,0.4670000076293945,0.4555000066757202],"label":"RefinedWeb"},"SlimPajama":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2535000145435333,0.2739999890327453,0.3190000057220459,0.3300000131130218,0.3490000069141388,0.3449999988079071,0.3574999868869781,0.3529999852180481,0.3610000014305115,0.3610000014305115,0.3580000102519989,0.3714999854564667,0.3785000145435333,0.3659999966621399,0.3785000145435333,0.3655000030994415,0.3765000104904175,0.3704999983310699,0.3774999976158142,0.3709999918937683,0.3745000064373016,0.3799999952316284,0.3804999887943268,0.390500009059906,0.3939999938011169,0.3995000123977661,0.390500009059906,0.3939999938011169,0.3889999985694885,0.3944999873638153,0.4000000059604645,0.4074999988079071,0.402999997138977,0.3840000033378601,0.387499988079071,0.3914999961853027,0.3939999938011169,0.3860000073909759,0.387499988079071,0.3995000123977661,0.4065000116825104,0.3925000131130218,0.4020000100135803,0.3980000019073486,0.4004999995231628,0.4000000059604645,0.4169999957084656,0.4025000035762787,0.418500006198883,0.4034999907016754,0.4040000140666961,0.4099999964237213,0.4129999876022339,0.4110000133514404,0.4165000021457672,0.4079999923706054,0.4054999947547912,0.4104999899864197,0.4160000085830688,0.4199999868869781,0.4059999883174896,0.4194999933242798,0.4074999988079071,0.4210000038146972,0.4205000102519989,0.4239999949932098,0.4239999949932098,0.4165000021457672,0.4329999983310699,0.4269999861717224,0.4120000004768371,0.4289999902248382,0.4230000078678131,0.4154999852180481,0.4235000014305115,0.4174999892711639,0.4189999997615814,0.4314999878406524,0.4259999990463257,0.4284999966621399,0.4284999966621399,0.4444999992847442,0.4300000071525574,0.4250000119209289,0.4235000014305115,0.4359999895095825,0.4239999949932098,0.4255000054836273,0.421999990940094,0.4375,0.4235000014305115,0.4314999878406524,0.4339999854564667,0.4309999942779541,0.4410000145435333,0.4395000040531158,0.4255000054836273,0.4334999918937683,0.4350000023841858,0.429500013589859,0.4514999985694885,0.4404999911785126,0.4460000097751617,0.4494999945163727,0.4435000121593475,0.4365000128746032,0.4490000009536743,0.4440000057220459,0.4605000019073486,0.4494999945163727,0.4535000026226043,0.4555000066757202,0.4595000147819519,0.4465000033378601,0.453000009059906,0.4600000083446502,0.4510000050067901,0.445499986410141,0.4444999992847442,0.4494999945163727,0.4424999952316284,0.4539999961853027,0.453000009059906,0.4544999897480011,0.4560000002384186,0.4449999928474426,0.4584999978542328,0.4474999904632568,0.4595000147819519,0.4465000033378601,0.4589999914169311,0.449999988079071,0.4469999969005584,0.4469999969005584,0.4490000009536743,0.453000009059906,0.4595000147819519,0.4535000026226043,0.4544999897480011,0.4614999890327453,0.4535000026226043,0.4564999938011169,0.453000009059906,0.4620000123977661,0.4544999897480011,0.4569999873638153,0.4620000123977661,0.4510000050067901,0.4474999904632568,0.4589999914169311,0.4584999978542328,0.4514999985694885,0.4645000100135803,0.4569999873638153,0.4550000131130218,0.4600000083446502,0.4505000114440918,0.4519999921321869,0.4675000011920929,0.4650000035762787,0.457500010728836,0.4634999930858612,0.4650000035762787,0.4580000042915344,0.4704999923706054,0.449999988079071,0.4539999961853027,0.4605000019073486],"label":"SlimPajama"},"The Pile":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2535000145435333,0.2694999873638153,0.2939999997615814,0.3064999878406524,0.3269999921321869,0.3264999985694885,0.3355000019073486,0.3540000021457672,0.3449999988079071,0.3454999923706054,0.3540000021457672,0.3600000143051147,0.3574999868869781,0.3569999933242798,0.3650000095367431,0.3659999966621399,0.3625000119209289,0.3689999878406524,0.3555000126361847,0.359499990940094,0.3855000138282776,0.3729999959468841,0.37950000166893,0.3720000088214874,0.3774999976158142,0.3740000128746032,0.3840000033378601,0.3765000104904175,0.3734999895095825,0.3855000138282776,0.3844999969005584,0.3804999887943268,0.3849999904632568,0.3939999938011169,0.3804999887943268,0.3919999897480011,0.3819999992847442,0.3955000042915344,0.3880000114440918,0.387499988079071,0.3925000131130218,0.4009999930858612,0.3935000002384186,0.3959999978542328,0.398499995470047,0.3914999961853027,0.3970000147819519,0.402999997138977,0.4070000052452087,0.3860000073909759,0.398499995470047,0.4009999930858612,0.3914999961853027,0.402999997138977,0.4025000035762787,0.3975000083446502,0.3930000066757202,0.4104999899864197,0.4054999947547912,0.395000010728836,0.4180000126361847,0.3955000042915344,0.4009999930858612,0.4000000059604645,0.4004999995231628,0.390500009059906,0.3995000123977661,0.4004999995231628,0.4045000076293945,0.4045000076293945,0.4004999995231628,0.4065000116825104,0.4084999859333038,0.4065000116825104,0.3980000019073486,0.4014999866485595,0.4095000028610229,0.3964999914169311,0.4040000140666961,0.3970000147819519,0.3970000147819519,0.4059999883174896,0.4004999995231628,0.4099999964237213,0.4115000069141388,0.4135000109672546,0.4160000085830688,0.4120000004768371,0.4124999940395355,0.4180000126361847,0.4099999964237213,0.4135000109672546,0.4160000085830688,0.4095000028610229,0.4169999957084656,0.4314999878406524,0.4280000030994415,0.4205000102519989,0.4210000038146972,0.4174999892711639,0.4090000092983246,0.4169999957084656,0.4149999916553497,0.4210000038146972,0.4210000038146972,0.426499992609024,0.4225000143051147,0.4189999997615814,0.4160000085830688,0.418500006198883,0.4339999854564667,0.4280000030994415,0.4244999885559082,0.4269999861717224,0.4235000014305115,0.4300000071525574,0.426499992609024,0.4180000126361847,0.4275000095367431,0.4275000095367431,0.4399999976158142,0.4284999966621399,0.4424999952316284,0.4435000121593475,0.4284999966621399,0.426499992609024,0.4305000007152557,0.4280000030994415,0.4339999854564667,0.4375,0.4375,0.4309999942779541,0.4365000128746032,0.4314999878406524,0.4329999983310699,0.4284999966621399,0.4305000007152557,0.4329999983310699,0.4280000030994415,0.426499992609024,0.4314999878406524,0.4365000128746032,0.4359999895095825,0.4410000145435333,0.4390000104904175,0.4404999911785126,0.4350000023841858,0.4390000104904175,0.4474999904632568,0.4415000081062317,0.445499986410141,0.4329999983310699,0.4490000009536743,0.4440000057220459,0.4510000050067901,0.4474999904632568,0.4595000147819519,0.4440000057220459,0.4469999969005584,0.4384999871253967,0.4375,0.4415000081062317,0.4480000138282776,0.4449999928474426,0.4354999959468841,0.445499986410141,0.4384999871253967,0.4375],"label":"The Pile"},"RedPajama2":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2535000145435333,0.2854999899864197,0.3145000040531158,0.332500010728836,0.3454999923706054,0.3540000021457672,0.359499990940094,0.3479999899864197,0.3535000085830688,0.3544999957084656,0.3684999942779541,0.3714999854564667,0.37950000166893,0.3790000081062317,0.3880000114440918,0.3765000104904175,0.3804999887943268,0.3880000114440918,0.3810000121593475,0.3819999992847442,0.3959999978542328,0.3925000131130218,0.4054999947547912,0.3849999904632568,0.3935000002384186,0.4000000059604645,0.3849999904632568,0.3989999890327453,0.3930000066757202,0.4129999876022339,0.4034999907016754,0.3980000019073486,0.4059999883174896,0.4004999995231628,0.4169999957084656,0.4065000116825104,0.4004999995231628,0.3970000147819519,0.4090000092983246,0.4079999923706054,0.402999997138977,0.4025000035762787,0.4009999930858612,0.4004999995231628,0.4079999923706054,0.4095000028610229,0.3959999978542328,0.4045000076293945,0.4065000116825104,0.4079999923706054,0.4154999852180481,0.4059999883174896,0.4160000085830688,0.4129999876022339,0.4115000069141388,0.4034999907016754,0.4059999883174896,0.4054999947547912,0.4239999949932098,0.4149999916553497,0.4180000126361847,0.4199999868869781,0.418500006198883,0.4289999902248382,0.4144999980926513,0.4180000126361847,0.4199999868869781,0.4275000095367431,0.414000004529953,0.4199999868869781,0.4124999940395355,0.4305000007152557,0.4210000038146972,0.4235000014305115,0.426499992609024,0.4269999861717224,0.421999990940094,0.4165000021457672,0.421999990940094,0.421999990940094,0.4250000119209289,0.4255000054836273,0.4275000095367431,0.4269999861717224,0.4329999983310699,0.4354999959468841,0.4275000095367431,0.4410000145435333,0.4325000047683716,0.4354999959468841,0.4345000088214874,0.4379999935626983,0.4550000131130218,0.4494999945163727,0.44200000166893,0.4490000009536743,0.4415000081062317,0.44200000166893,0.4345000088214874,0.4404999911785126,0.4395000040531158,0.429500013589859,0.4390000104904175,0.4399999976158142,0.4384999871253967,0.4384999871253967,0.4444999992847442,0.4429999887943268,0.4449999928474426,0.4404999911785126,0.4429999887943268,0.445499986410141,0.4424999952316284,0.4415000081062317,0.4300000071525574,0.4354999959468841,0.4375,0.4395000040531158,0.4390000104904175,0.4469999969005584,0.4365000128746032,0.4485000073909759,0.4404999911785126,0.4514999985694885,0.4415000081062317,0.4494999945163727,0.4354999959468841,0.44200000166893,0.4444999992847442,0.4469999969005584,0.4424999952316284,0.4449999928474426,0.4490000009536743,0.4444999992847442,0.4404999911785126,0.4435000121593475,0.4404999911785126,0.4465000033378601,0.4514999985694885,0.4354999959468841,0.4539999961853027,0.4465000033378601,0.4519999921321869,0.4465000033378601,0.4480000138282776,0.4494999945163727,0.4469999969005584,0.4474999904632568,0.4444999992847442,0.4460000097751617,0.4444999992847442,0.4524999856948852,0.4505000114440918,0.4485000073909759,0.4465000033378601,0.4485000073909759,0.4465000033378601,0.4480000138282776,0.4384999871253967,0.4535000026226043,0.4469999969005584,0.4490000009536743,0.4539999961853027,0.4494999945163727,0.4519999921321869,0.4494999945163727,0.4564999938011169,0.4494999945163727],"label":"RedPajama2"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"range":[0.39,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dataset_ablations/commonsense_qa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"FineWeb (ours)":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2630000114440918,0.2879999876022339,0.296999990940094,0.2960000038146972,0.3039999902248382,0.3129999935626983,0.3149999976158142,0.3300000131130218,0.3300000131130218,0.3350000083446502,0.3379999995231628,0.3370000123977661,0.3330000042915344,0.3370000123977661,0.3389999866485595,0.3429999947547912,0.3659999966621399,0.3459999859333038,0.3479999899864197,0.3440000116825104,0.3470000028610229,0.3569999933242798,0.3510000109672546,0.3680000007152557,0.3529999852180481,0.3680000007152557,0.3549999892711639,0.3540000021457672,0.3529999852180481,0.3499999940395355,0.3569999933242798,0.3529999852180481,0.3499999940395355,0.3540000021457672,0.3659999966621399,0.3600000143051147,0.3680000007152557,0.3659999966621399,0.3600000143051147,0.3659999966621399,0.3540000021457672,0.3580000102519989,0.367000013589859,0.3549999892711639,0.3729999959468841,0.3580000102519989,0.3619999885559082,0.3659999966621399,0.3680000007152557,0.3650000095367431,0.3619999885559082,0.3759999871253967,0.3689999878406524,0.3689999878406524,0.3619999885559082,0.3630000054836273,0.3650000095367431,0.3799999952316284,0.3729999959468841,0.3740000128746032,0.367000013589859,0.3720000088214874,0.3600000143051147,0.3650000095367431,0.3729999959468841,0.3589999973773956,0.3799999952316284,0.3589999973773956,0.3799999952316284,0.3680000007152557,0.367000013589859,0.367000013589859,0.3700000047683716,0.3790000081062317,0.3729999959468841,0.3770000040531158,0.3709999918937683,0.3759999871253967,0.3759999871253967,0.3700000047683716,0.3720000088214874,0.3840000033378601,0.3770000040531158,0.3770000040531158,0.3790000081062317,0.3860000073909759,0.3759999871253967,0.3650000095367431,0.3700000047683716,0.3819999992847442,0.3819999992847442,0.3630000054836273,0.3689999878406524,0.3759999871253967,0.3759999871253967,0.3779999911785126,0.3740000128746032,0.3860000073909759,0.3619999885559082,0.3740000128746032,0.3799999952316284,0.3819999992847442,0.3740000128746032,0.3770000040531158,0.375,0.3810000121593475,0.3729999959468841,0.3880000114440918,0.3840000033378601,0.3840000033378601,0.3770000040531158,0.3740000128746032,0.382999986410141,0.3840000033378601,0.3770000040531158,0.3869999945163727,0.3729999959468841,0.3770000040531158,0.3759999871253967,0.3840000033378601,0.3880000114440918,0.3759999871253967,0.3740000128746032,0.3720000088214874,0.3790000081062317,0.3740000128746032,0.3630000054836273,0.3810000121593475,0.3720000088214874,0.3729999959468841,0.3720000088214874,0.3840000033378601,0.3759999871253967,0.3840000033378601,0.3790000081062317,0.3819999992847442,0.3689999878406524,0.3700000047683716,0.3790000081062317,0.3729999959468841,0.3799999952316284,0.3799999952316284,0.3740000128746032,0.3689999878406524,0.3810000121593475,0.3720000088214874,0.382999986410141,0.3819999992847442,0.3720000088214874,0.3799999952316284,0.3740000128746032,0.3729999959468841,0.3790000081062317,0.3720000088214874,0.3680000007152557,0.3779999911785126,0.3799999952316284,0.3729999959468841,0.3740000128746032,0.3729999959468841,0.3759999871253967,0.3790000081062317,0.3689999878406524,0.3680000007152557,0.3659999966621399,0.3729999959468841,0.3680000007152557],"label":"FineWeb (ours)"},"C4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2599999904632568,0.277999997138977,0.2949999868869781,0.2980000078678131,0.3009999990463257,0.3149999976158142,0.3120000064373016,0.3219999969005584,0.3260000050067901,0.3240000009536743,0.3339999914169311,0.3310000002384186,0.3370000123977661,0.3289999961853027,0.3240000009536743,0.3350000083446502,0.3379999995231628,0.3420000076293945,0.3529999852180481,0.3370000123977661,0.3420000076293945,0.3650000095367431,0.3490000069141388,0.335999995470047,0.3490000069141388,0.3580000102519989,0.3580000102519989,0.3519999980926513,0.3549999892711639,0.356000006198883,0.3519999980926513,0.3549999892711639,0.3569999933242798,0.3569999933242798,0.3600000143051147,0.3490000069141388,0.3619999885559082,0.356000006198883,0.3459999859333038,0.3580000102519989,0.3449999988079071,0.3540000021457672,0.3540000021457672,0.3580000102519989,0.3510000109672546,0.3630000054836273,0.3470000028610229,0.356000006198883,0.356000006198883,0.3630000054836273,0.3659999966621399,0.3479999899864197,0.3549999892711639,0.3680000007152557,0.3580000102519989,0.3490000069141388,0.3519999980926513,0.3549999892711639,0.3519999980926513,0.3529999852180481,0.3659999966621399,0.3529999852180481,0.3619999885559082,0.3650000095367431,0.3600000143051147,0.3619999885559082,0.3770000040531158,0.3600000143051147,0.3580000102519989,0.3569999933242798,0.3659999966621399,0.3729999959468841,0.375,0.367000013589859,0.3650000095367431,0.3600000143051147,0.3650000095367431,0.3700000047683716,0.3689999878406524,0.3689999878406524,0.3659999966621399,0.3689999878406524,0.363999992609024,0.3709999918937683,0.367000013589859,0.3810000121593475,0.3619999885559082,0.3659999966621399,0.3610000014305115,0.3680000007152557,0.3600000143051147,0.3680000007152557,0.3700000047683716,0.3709999918937683,0.363999992609024,0.3700000047683716,0.3630000054836273,0.3689999878406524,0.3689999878406524,0.3650000095367431,0.3650000095367431,0.3680000007152557,0.3650000095367431,0.3779999911785126,0.3610000014305115,0.3630000054836273,0.375,0.3709999918937683,0.3630000054836273,0.3630000054836273,0.3619999885559082,0.3689999878406524,0.363999992609024,0.3610000014305115,0.3650000095367431,0.3689999878406524,0.3700000047683716,0.3630000054836273,0.3569999933242798,0.3630000054836273,0.356000006198883,0.3600000143051147,0.3580000102519989,0.3740000128746032,0.3589999973773956,0.3619999885559082,0.363999992609024,0.3720000088214874,0.3740000128746032,0.3689999878406524,0.3729999959468841,0.3810000121593475,0.3770000040531158,0.3709999918937683,0.3709999918937683,0.3779999911785126,0.3779999911785126,0.3779999911785126,0.3689999878406524,0.367000013589859,0.3650000095367431,0.3650000095367431,0.3619999885559082,0.363999992609024,0.3700000047683716,0.3720000088214874,0.3709999918937683,0.375,0.3759999871253967,0.3729999959468841,0.3790000081062317,0.3700000047683716,0.3700000047683716,0.3729999959468841,0.3709999918937683,0.3759999871253967,0.3709999918937683,0.3729999959468841,0.3709999918937683,0.3720000088214874,0.3700000047683716,0.3790000081062317,0.375,0.3790000081062317,0.3779999911785126,0.3650000095367431,0.3680000007152557],"label":"C4"},"Dolma":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2529999911785126,0.277999997138977,0.2939999997615814,0.3030000030994415,0.3179999887943268,0.3160000145435333,0.3179999887943268,0.3219999969005584,0.328000009059906,0.3289999961853027,0.335999995470047,0.3350000083446502,0.3400000035762787,0.3350000083446502,0.3389999866485595,0.3429999947547912,0.3459999859333038,0.3379999995231628,0.3440000116825104,0.3379999995231628,0.3420000076293945,0.3420000076293945,0.3389999866485595,0.335999995470047,0.3409999907016754,0.3370000123977661,0.3400000035762787,0.3470000028610229,0.3449999988079071,0.3529999852180481,0.3540000021457672,0.3600000143051147,0.3519999980926513,0.3589999973773956,0.3569999933242798,0.3549999892711639,0.3519999980926513,0.3610000014305115,0.356000006198883,0.3440000116825104,0.3619999885559082,0.3499999940395355,0.356000006198883,0.3610000014305115,0.3580000102519989,0.3589999973773956,0.367000013589859,0.3740000128746032,0.3490000069141388,0.3740000128746032,0.3549999892711639,0.3519999980926513,0.3610000014305115,0.3619999885559082,0.3569999933242798,0.3610000014305115,0.3720000088214874,0.3650000095367431,0.3630000054836273,0.3790000081062317,0.3610000014305115,0.3610000014305115,0.3569999933242798,0.356000006198883,0.3680000007152557,0.3600000143051147,0.3770000040531158,0.3659999966621399,0.3689999878406524,0.3729999959468841,0.3700000047683716,0.3569999933242798,0.3700000047683716,0.3700000047683716,0.3700000047683716,0.3650000095367431,0.3600000143051147,0.3680000007152557,0.3759999871253967,0.3709999918937683,0.375,0.3619999885559082,0.3650000095367431,0.3680000007152557,0.3729999959468841,0.3709999918937683,0.375,0.3759999871253967,0.3630000054836273,0.3860000073909759,0.3759999871253967,0.3770000040531158,0.3740000128746032,0.382999986410141,0.3740000128746032,0.363999992609024,0.3740000128746032,0.3840000033378601,0.3770000040531158,0.3709999918937683,0.3799999952316284,0.375,0.3740000128746032,0.3740000128746032,0.3619999885559082,0.3740000128746032,0.3680000007152557,0.3740000128746032,0.3799999952316284,0.3720000088214874,0.3779999911785126,0.3720000088214874,0.3709999918937683,0.367000013589859,0.3700000047683716,0.3759999871253967,0.3770000040531158,0.3790000081062317,0.3799999952316284,0.3740000128746032,0.3790000081062317,0.3770000040531158,0.3709999918937683,0.3849999904632568,0.3680000007152557,0.3759999871253967,0.3849999904632568,0.367000013589859,0.3790000081062317,0.382999986410141,0.3770000040531158,0.3860000073909759,0.3779999911785126,0.3729999959468841,0.3689999878406524,0.3799999952316284,0.3819999992847442,0.3740000128746032,0.375,0.3729999959468841,0.3779999911785126,0.3759999871253967,0.3779999911785126,0.3799999952316284,0.3860000073909759,0.3849999904632568,0.3840000033378601,0.3779999911785126,0.3899999856948852,0.375,0.3810000121593475,0.3880000114440918,0.3790000081062317,0.3799999952316284,0.3779999911785126,0.3700000047683716,0.375,0.375,0.3759999871253967,0.3819999992847442,0.375,0.375,0.3790000081062317,0.3759999871253967,0.3790000081062317,0.3779999911785126,0.3709999918937683],"label":"Dolma"},"RefinedWeb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2529999911785126,0.2800000011920929,0.2870000004768371,0.3179999887943268,0.3129999935626983,0.3210000097751617,0.3160000145435333,0.3210000097751617,0.31700000166893,0.3330000042915344,0.3389999866485595,0.3289999961853027,0.3429999947547912,0.3379999995231628,0.3459999859333038,0.3490000069141388,0.3470000028610229,0.3600000143051147,0.3569999933242798,0.3449999988079071,0.3650000095367431,0.3499999940395355,0.3540000021457672,0.3569999933242798,0.3619999885559082,0.3619999885559082,0.3580000102519989,0.3740000128746032,0.3709999918937683,0.3720000088214874,0.3759999871253967,0.3720000088214874,0.3659999966621399,0.3790000081062317,0.3610000014305115,0.3650000095367431,0.3650000095367431,0.3720000088214874,0.3729999959468841,0.3790000081062317,0.3680000007152557,0.3659999966621399,0.3680000007152557,0.3619999885559082,0.3619999885559082,0.3729999959468841,0.3720000088214874,0.3650000095367431,0.3759999871253967,0.367000013589859,0.3650000095367431,0.3680000007152557,0.3580000102519989,0.3589999973773956,0.3700000047683716,0.3680000007152557,0.367000013589859,0.3709999918937683,0.3880000114440918,0.3810000121593475,0.375,0.4040000140666961,0.3860000073909759,0.3840000033378601,0.3779999911785126,0.3729999959468841,0.3720000088214874,0.3799999952316284,0.3799999952316284,0.3779999911785126,0.3689999878406524,0.3770000040531158,0.3740000128746032,0.3819999992847442,0.3899999856948852,0.3799999952316284,0.3919999897480011,0.3720000088214874,0.3770000040531158,0.3930000066757202,0.3849999904632568,0.3899999856948852,0.3740000128746032,0.3740000128746032,0.3799999952316284,0.3779999911785126,0.3880000114440918,0.3709999918937683,0.3810000121593475,0.3880000114440918,0.3980000019073486,0.3819999992847442,0.3849999904632568,0.3810000121593475,0.3819999992847442,0.3889999985694885,0.3840000033378601,0.3910000026226043,0.3899999856948852,0.3959999978542328,0.3880000114440918,0.3869999945163727,0.3779999911785126,0.3819999992847442,0.3919999897480011,0.3849999904632568,0.3860000073909759,0.3919999897480011,0.3819999992847442,0.3819999992847442,0.3889999985694885,0.3889999985694885,0.3860000073909759,0.3880000114440918,0.3889999985694885,0.3939999938011169,0.3899999856948852,0.3869999945163727,0.3910000026226043,0.3910000026226043,0.3910000026226043,0.3970000147819519,0.3970000147819519,0.3970000147819519,0.3970000147819519,0.3939999938011169,0.4000000059604645,0.3970000147819519,0.402999997138977,0.3959999978542328,0.3959999978542328,0.4000000059604645,0.4040000140666961,0.4020000100135803,0.3989999890327453,0.3919999897480011,0.3930000066757202,0.3930000066757202,0.3980000019073486,0.4000000059604645,0.395000010728836,0.3899999856948852,0.4059999883174896,0.4020000100135803,0.4020000100135803,0.4059999883174896,0.3970000147819519,0.4110000133514404,0.4050000011920929,0.4000000059604645,0.4090000092983246,0.3989999890327453,0.402999997138977,0.4009999930858612,0.3980000019073486,0.4090000092983246,0.4079999923706054,0.4079999923706054,0.4020000100135803,0.402999997138977,0.402999997138977,0.4059999883174896,0.4040000140666961,0.4059999883174896,0.3989999890327453,0.4070000052452087,0.4059999883174896],"label":"RefinedWeb"},"SlimPajama":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2300000041723251,0.238999992609024,0.2619999945163727,0.2899999916553497,0.2680000066757202,0.2870000004768371,0.2910000085830688,0.3149999976158142,0.3260000050067901,0.3030000030994415,0.3129999935626983,0.3109999895095825,0.3219999969005584,0.3260000050067901,0.3230000138282776,0.3409999907016754,0.3219999969005584,0.3319999873638153,0.3350000083446502,0.3429999947547912,0.3269999921321869,0.3330000042915344,0.3330000042915344,0.3310000002384186,0.335999995470047,0.3499999940395355,0.3319999873638153,0.3440000116825104,0.3379999995231628,0.3440000116825104,0.3370000123977661,0.3490000069141388,0.3510000109672546,0.3470000028610229,0.3429999947547912,0.3440000116825104,0.3540000021457672,0.3510000109672546,0.3429999947547912,0.3569999933242798,0.3339999914169311,0.3429999947547912,0.3459999859333038,0.3499999940395355,0.3459999859333038,0.3510000109672546,0.3389999866485595,0.3350000083446502,0.3370000123977661,0.3470000028610229,0.3569999933242798,0.3459999859333038,0.335999995470047,0.3420000076293945,0.3449999988079071,0.3529999852180481,0.3580000102519989,0.363999992609024,0.3540000021457672,0.3470000028610229,0.3499999940395355,0.3610000014305115,0.3440000116825104,0.3540000021457672,0.356000006198883,0.3630000054836273,0.3650000095367431,0.3499999940395355,0.3490000069141388,0.3459999859333038,0.3470000028610229,0.3600000143051147,0.3490000069141388,0.3569999933242798,0.3490000069141388,0.3600000143051147,0.3569999933242798,0.3449999988079071,0.3429999947547912,0.3619999885559082,0.3549999892711639,0.3700000047683716,0.3569999933242798,0.3600000143051147,0.3610000014305115,0.3650000095367431,0.3619999885559082,0.356000006198883,0.3580000102519989,0.356000006198883,0.3519999980926513,0.367000013589859,0.3499999940395355,0.3720000088214874,0.3630000054836273,0.3610000014305115,0.3630000054836273,0.3529999852180481,0.3540000021457672,0.3549999892711639,0.363999992609024,0.3689999878406524,0.3700000047683716,0.3729999959468841,0.3610000014305115,0.3630000054836273,0.3650000095367431,0.3619999885559082,0.3580000102519989,0.3580000102519989,0.3740000128746032,0.3619999885559082,0.3680000007152557,0.367000013589859,0.3610000014305115,0.375,0.3659999966621399,0.3600000143051147,0.367000013589859,0.367000013589859,0.3740000128746032,0.3720000088214874,0.3709999918937683,0.3700000047683716,0.3759999871253967,0.3720000088214874,0.3720000088214874,0.3650000095367431,0.3680000007152557,0.3680000007152557,0.3659999966621399,0.375,0.3709999918937683,0.3689999878406524,0.3720000088214874,0.3680000007152557,0.3779999911785126,0.3729999959468841,0.3740000128746032,0.3689999878406524,0.382999986410141,0.3709999918937683,0.3759999871253967,0.3770000040531158,0.3770000040531158,0.3700000047683716,0.3729999959468841,0.3779999911785126,0.3779999911785126,0.3720000088214874,0.3709999918937683,0.3720000088214874,0.3779999911785126,0.3709999918937683,0.3770000040531158,0.3709999918937683,0.3700000047683716,0.3770000040531158,0.3779999911785126,0.3740000128746032,0.3779999911785126,0.3659999966621399,0.3729999959468841,0.3779999911785126,0.3740000128746032,0.375,0.382999986410141,0.382999986410141],"label":"SlimPajama"},"The Pile":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2300000041723251,0.2460000067949295,0.257999986410141,0.270000010728836,0.2840000092983246,0.2770000100135803,0.2829999923706054,0.2939999997615814,0.2949999868869781,0.3050000071525574,0.3019999861717224,0.2939999997615814,0.3059999942779541,0.3219999969005584,0.3129999935626983,0.3149999976158142,0.3079999983310699,0.3160000145435333,0.3230000138282776,0.3179999887943268,0.3120000064373016,0.3140000104904175,0.3190000057220459,0.3260000050067901,0.3289999961853027,0.3300000131130218,0.3190000057220459,0.3210000097751617,0.328000009059906,0.324999988079071,0.3230000138282776,0.3240000009536743,0.324999988079071,0.3289999961853027,0.3370000123977661,0.3269999921321869,0.3400000035762787,0.3350000083446502,0.3289999961853027,0.3330000042915344,0.3510000109672546,0.3429999947547912,0.3540000021457672,0.3429999947547912,0.3319999873638153,0.3389999866485595,0.3440000116825104,0.3319999873638153,0.3479999899864197,0.3429999947547912,0.3459999859333038,0.3420000076293945,0.3379999995231628,0.3370000123977661,0.3310000002384186,0.3339999914169311,0.3310000002384186,0.3310000002384186,0.3339999914169311,0.3300000131130218,0.3339999914169311,0.3449999988079071,0.3479999899864197,0.3330000042915344,0.3409999907016754,0.3499999940395355,0.3490000069141388,0.3350000083446502,0.3420000076293945,0.3510000109672546,0.3459999859333038,0.3449999988079071,0.3389999866485595,0.3470000028610229,0.3540000021457672,0.3470000028610229,0.3370000123977661,0.3370000123977661,0.3310000002384186,0.3440000116825104,0.335999995470047,0.3479999899864197,0.3379999995231628,0.3409999907016754,0.3330000042915344,0.3479999899864197,0.3339999914169311,0.3409999907016754,0.3350000083446502,0.3269999921321869,0.3289999961853027,0.3350000083446502,0.335999995470047,0.3289999961853027,0.3400000035762787,0.3339999914169311,0.3389999866485595,0.3350000083446502,0.3400000035762787,0.3479999899864197,0.3409999907016754,0.3519999980926513,0.3510000109672546,0.3449999988079071,0.3300000131130218,0.3370000123977661,0.3449999988079071,0.3420000076293945,0.3319999873638153,0.3569999933242798,0.3449999988079071,0.3470000028610229,0.3429999947547912,0.3499999940395355,0.3549999892711639,0.3449999988079071,0.3389999866485595,0.3409999907016754,0.3540000021457672,0.3449999988079071,0.3589999973773956,0.3389999866485595,0.3379999995231628,0.3409999907016754,0.3479999899864197,0.3479999899864197,0.3490000069141388,0.3470000028610229,0.3529999852180481,0.3479999899864197,0.3499999940395355,0.3470000028610229,0.3470000028610229,0.3449999988079071,0.3459999859333038,0.3499999940395355,0.3510000109672546,0.3470000028610229,0.3529999852180481,0.3470000028610229,0.3499999940395355,0.3449999988079071,0.3459999859333038,0.3449999988079071,0.3499999940395355,0.3499999940395355,0.3459999859333038,0.3529999852180481,0.3499999940395355,0.3429999947547912,0.3479999899864197,0.3529999852180481,0.3470000028610229,0.3589999973773956,0.3389999866485595,0.356000006198883,0.3600000143051147,0.3490000069141388,0.3470000028610229,0.3490000069141388,0.3470000028610229,0.3490000069141388,0.3549999892711639,0.3490000069141388,0.3440000116825104,0.3499999940395355,0.3459999859333038,0.3540000021457672],"label":"The Pile"},"RedPajama2":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2300000041723251,0.2469999939203262,0.2739999890327453,0.2989999949932098,0.2980000078678131,0.3149999976158142,0.3160000145435333,0.3079999983310699,0.3109999895095825,0.3070000112056732,0.31700000166893,0.3149999976158142,0.324999988079071,0.3260000050067901,0.3310000002384186,0.3490000069141388,0.3490000069141388,0.3409999907016754,0.3420000076293945,0.3440000116825104,0.3470000028610229,0.3510000109672546,0.3420000076293945,0.3510000109672546,0.3479999899864197,0.3569999933242798,0.3479999899864197,0.3470000028610229,0.3600000143051147,0.356000006198883,0.3389999866485595,0.3529999852180481,0.3529999852180481,0.3549999892711639,0.3459999859333038,0.3490000069141388,0.3510000109672546,0.3589999973773956,0.3610000014305115,0.3540000021457672,0.3569999933242798,0.3720000088214874,0.3580000102519989,0.363999992609024,0.3479999899864197,0.3449999988079071,0.3619999885559082,0.3589999973773956,0.3580000102519989,0.3589999973773956,0.3569999933242798,0.367000013589859,0.3650000095367431,0.3540000021457672,0.3549999892711639,0.3689999878406524,0.363999992609024,0.3700000047683716,0.3819999992847442,0.3790000081062317,0.3650000095367431,0.3709999918937683,0.375,0.3720000088214874,0.3689999878406524,0.3709999918937683,0.3709999918937683,0.3610000014305115,0.3720000088214874,0.3770000040531158,0.3680000007152557,0.3759999871253967,0.3729999959468841,0.3680000007152557,0.3689999878406524,0.3630000054836273,0.3549999892711639,0.3729999959468841,0.3680000007152557,0.3790000081062317,0.3659999966621399,0.3700000047683716,0.3720000088214874,0.367000013589859,0.3779999911785126,0.382999986410141,0.3799999952316284,0.3740000128746032,0.367000013589859,0.3709999918937683,0.3759999871253967,0.3849999904632568,0.3819999992847442,0.3720000088214874,0.3779999911785126,0.3740000128746032,0.3759999871253967,0.3860000073909759,0.3799999952316284,0.3709999918937683,0.3770000040531158,0.3819999992847442,0.3810000121593475,0.3819999992847442,0.3919999897480011,0.375,0.3720000088214874,0.3709999918937683,0.3819999992847442,0.3720000088214874,0.3720000088214874,0.3770000040531158,0.3819999992847442,0.3869999945163727,0.3860000073909759,0.3759999871253967,0.3860000073909759,0.3790000081062317,0.3790000081062317,0.3849999904632568,0.3790000081062317,0.3880000114440918,0.3899999856948852,0.3819999992847442,0.3790000081062317,0.3810000121593475,0.3709999918937683,0.375,0.3819999992847442,0.3860000073909759,0.3799999952316284,0.3810000121593475,0.3860000073909759,0.3790000081062317,0.3840000033378601,0.382999986410141,0.3790000081062317,0.3729999959468841,0.3799999952316284,0.375,0.3759999871253967,0.3740000128746032,0.3770000040531158,0.382999986410141,0.3720000088214874,0.3810000121593475,0.3849999904632568,0.3779999911785126,0.375,0.3790000081062317,0.3790000081062317,0.3880000114440918,0.3849999904632568,0.3919999897480011,0.3810000121593475,0.382999986410141,0.3759999871253967,0.3869999945163727,0.375,0.3810000121593475,0.382999986410141,0.3799999952316284,0.3799999952316284,0.3860000073909759,0.3770000040531158,0.3849999904632568,0.3899999856948852,0.3889999985694885],"label":"RedPajama2"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"range":[0.39,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dataset_ablations/hellaswag_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"FineWeb (ours)":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2919999957084656,0.3310000002384186,0.3549999892711639,0.3939999938011169,0.4149999916553497,0.4329999983310699,0.4460000097751617,0.4589999914169311,0.4819999933242798,0.4769999980926513,0.4830000102519989,0.4909999966621399,0.5059999823570251,0.5059999823570251,0.503000020980835,0.5170000195503235,0.5049999952316284,0.5210000276565552,0.5130000114440918,0.5189999938011169,0.5360000133514404,0.5320000052452087,0.5460000038146973,0.5400000214576721,0.5379999876022339,0.531000018119812,0.5460000038146973,0.5509999990463257,0.5519999861717224,0.5559999942779541,0.5609999895095825,0.5559999942779541,0.5580000281333923,0.5450000166893005,0.5509999990463257,0.5590000152587891,0.5649999976158142,0.5619999766349792,0.5680000185966492,0.5669999718666077,0.5709999799728394,0.5569999814033508,0.5640000104904175,0.5690000057220459,0.5720000267028809,0.5759999752044678,0.5839999914169312,0.5699999928474426,0.5740000009536743,0.5830000042915344,0.5839999914169312,0.5799999833106995,0.5830000042915344,0.574999988079071,0.5910000205039978,0.5799999833106995,0.5879999995231628,0.6039999723434448,0.578000009059906,0.5849999785423279,0.5889999866485596,0.5849999785423279,0.6019999980926514,0.5929999947547913,0.5820000171661377,0.5860000252723694,0.5910000205039978,0.5849999785423279,0.5849999785423279,0.5839999914169312,0.5860000252723694,0.5979999899864197,0.5849999785423279,0.597000002861023,0.5960000157356262,0.6019999980926514,0.6060000061988831,0.5989999771118164,0.5889999866485596,0.5920000076293945,0.5960000157356262,0.5950000286102295,0.6060000061988831,0.5960000157356262,0.6000000238418579,0.6069999933242798,0.6039999723434448,0.6069999933242798,0.6010000109672546,0.6060000061988831,0.6129999756813049,0.5989999771118164,0.6200000047683716,0.5979999899864197,0.609000027179718,0.6029999852180481,0.609000027179718,0.6179999709129333,0.6150000095367432,0.6060000061988831,0.6069999933242798,0.6119999885559082,0.6190000176429749,0.6079999804496765,0.6150000095367432,0.6079999804496765,0.6190000176429749,0.6079999804496765,0.609000027179718,0.6079999804496765,0.6179999709129333,0.6140000224113464,0.6200000047683716,0.621999979019165,0.6129999756813049,0.6200000047683716,0.6129999756813049,0.6110000014305115,0.6069999933242798,0.609000027179718,0.6159999966621399,0.6169999837875366,0.6129999756813049,0.6169999837875366,0.6159999966621399,0.6200000047683716,0.6150000095367432,0.6240000128746033,0.6179999709129333,0.6179999709129333,0.6129999756813049,0.6179999709129333,0.6110000014305115,0.6190000176429749,0.6200000047683716,0.6150000095367432,0.6159999966621399,0.621999979019165,0.6209999918937683,0.6230000257492065,0.6200000047683716,0.6240000128746033,0.6159999966621399,0.6200000047683716,0.6159999966621399,0.6179999709129333,0.6119999885559082,0.6269999742507935,0.6230000257492065,0.6200000047683716,0.6240000128746033,0.6190000176429749,0.6169999837875366,0.6299999952316284,0.625,0.6179999709129333,0.6150000095367432,0.6259999871253967,0.621999979019165,0.625,0.6190000176429749,0.6259999871253967,0.6340000033378601,0.628000020980835,0.6290000081062317,0.628000020980835,0.6269999742507935],"label":"FineWeb (ours)"},"C4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2879999876022339,0.3300000131130218,0.3799999952316284,0.4059999883174896,0.4129999876022339,0.421999990940094,0.4370000064373016,0.4390000104904175,0.4539999961853027,0.4709999859333038,0.4860000014305115,0.4790000021457672,0.4839999973773956,0.4959999918937683,0.5019999742507935,0.4939999878406524,0.5,0.5090000033378601,0.5080000162124634,0.5260000228881836,0.5289999842643738,0.5320000052452087,0.5360000133514404,0.5370000004768372,0.5440000295639038,0.5389999747276306,0.5419999957084656,0.5379999876022339,0.5580000281333923,0.5440000295639038,0.5479999780654907,0.5490000247955322,0.5569999814033508,0.5590000152587891,0.546999990940094,0.550000011920929,0.5450000166893005,0.5569999814033508,0.5609999895095825,0.5609999895095825,0.5720000267028809,0.5690000057220459,0.5630000233650208,0.5630000233650208,0.5640000104904175,0.5659999847412109,0.5709999799728394,0.5789999961853027,0.578000009059906,0.5789999961853027,0.5789999961853027,0.5730000138282776,0.5759999752044678,0.5770000219345093,0.5759999752044678,0.5870000123977661,0.5759999752044678,0.5820000171661377,0.5849999785423279,0.5820000171661377,0.5879999995231628,0.5809999704360962,0.5860000252723694,0.5849999785423279,0.5839999914169312,0.5910000205039978,0.5849999785423279,0.5860000252723694,0.5989999771118164,0.5899999737739563,0.593999981880188,0.5929999947547913,0.5870000123977661,0.5849999785423279,0.5910000205039978,0.5889999866485596,0.5910000205039978,0.5830000042915344,0.597000002861023,0.5879999995231628,0.5929999947547913,0.6010000109672546,0.6050000190734863,0.6000000238418579,0.6039999723434448,0.5929999947547913,0.5950000286102295,0.6000000238418579,0.6010000109672546,0.6069999933242798,0.6039999723434448,0.6129999756813049,0.5920000076293945,0.6029999852180481,0.5950000286102295,0.5989999771118164,0.6010000109672546,0.6050000190734863,0.5950000286102295,0.6010000109672546,0.6050000190734863,0.6000000238418579,0.6119999885559082,0.6129999756813049,0.6069999933242798,0.6079999804496765,0.6010000109672546,0.6100000143051147,0.6069999933242798,0.6100000143051147,0.6179999709129333,0.6050000190734863,0.609000027179718,0.6060000061988831,0.6140000224113464,0.6129999756813049,0.6069999933242798,0.6119999885559082,0.6150000095367432,0.6140000224113464,0.6150000095367432,0.6179999709129333,0.6209999918937683,0.6309999823570251,0.6169999837875366,0.6209999918937683,0.6159999966621399,0.6150000095367432,0.6169999837875366,0.6240000128746033,0.6179999709129333,0.6159999966621399,0.621999979019165,0.625,0.621999979019165,0.6169999837875366,0.6179999709129333,0.6330000162124634,0.621999979019165,0.625,0.621999979019165,0.6309999823570251,0.6299999952316284,0.6230000257492065,0.625,0.621999979019165,0.6259999871253967,0.621999979019165,0.628000020980835,0.6320000290870667,0.625,0.6380000114440918,0.6269999742507935,0.6349999904632568,0.625,0.6340000033378601,0.6309999823570251,0.6359999775886536,0.6330000162124634,0.6299999952316284,0.6349999904632568,0.6299999952316284,0.6389999985694885,0.6430000066757202,0.6330000162124634,0.6320000290870667,0.6389999985694885],"label":"C4"},"Dolma":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2910000085830688,0.3300000131130218,0.3510000109672546,0.3759999871253967,0.3939999938011169,0.3980000019073486,0.4099999964237213,0.4309999942779541,0.4350000023841858,0.4490000009536743,0.4580000042915344,0.4720000028610229,0.4760000109672546,0.4740000069141388,0.481000006198883,0.4930000007152557,0.4950000047683716,0.4959999918937683,0.4939999878406524,0.4990000128746032,0.5080000162124634,0.5170000195503235,0.5059999823570251,0.5199999809265137,0.5130000114440918,0.5199999809265137,0.5249999761581421,0.5189999938011169,0.5299999713897705,0.5370000004768372,0.5429999828338623,0.5410000085830688,0.5360000133514404,0.5440000295639038,0.5370000004768372,0.5419999957084656,0.5389999747276306,0.5320000052452087,0.5490000247955322,0.5429999828338623,0.5419999957084656,0.5479999780654907,0.550000011920929,0.5479999780654907,0.5550000071525574,0.5559999942779541,0.5519999861717224,0.5540000200271606,0.5529999732971191,0.5619999766349792,0.5580000281333923,0.5630000233650208,0.5540000200271606,0.5630000233650208,0.5580000281333923,0.5669999718666077,0.5600000023841858,0.5630000233650208,0.5740000009536743,0.5590000152587891,0.5630000233650208,0.5600000023841858,0.5680000185966492,0.5630000233650208,0.5630000233650208,0.5720000267028809,0.5699999928474426,0.5699999928474426,0.5709999799728394,0.5770000219345093,0.574999988079071,0.578000009059906,0.5759999752044678,0.5730000138282776,0.5839999914169312,0.5809999704360962,0.578000009059906,0.5839999914169312,0.5820000171661377,0.5870000123977661,0.5820000171661377,0.5770000219345093,0.5870000123977661,0.574999988079071,0.5730000138282776,0.5879999995231628,0.5910000205039978,0.5799999833106995,0.5860000252723694,0.5910000205039978,0.5809999704360962,0.593999981880188,0.5899999737739563,0.5809999704360962,0.5870000123977661,0.5950000286102295,0.5929999947547913,0.5950000286102295,0.593999981880188,0.5879999995231628,0.5879999995231628,0.593999981880188,0.6000000238418579,0.5929999947547913,0.5809999704360962,0.5920000076293945,0.6019999980926514,0.597000002861023,0.597000002861023,0.5910000205039978,0.5960000157356262,0.5879999995231628,0.5960000157356262,0.6019999980926514,0.5960000157356262,0.6000000238418579,0.6029999852180481,0.6019999980926514,0.6010000109672546,0.6010000109672546,0.6010000109672546,0.5960000157356262,0.6019999980926514,0.597000002861023,0.593999981880188,0.5979999899864197,0.6000000238418579,0.6010000109672546,0.6100000143051147,0.6010000109672546,0.5920000076293945,0.6019999980926514,0.6060000061988831,0.6110000014305115,0.6159999966621399,0.6100000143051147,0.6019999980926514,0.6079999804496765,0.6169999837875366,0.6140000224113464,0.6190000176429749,0.6150000095367432,0.6079999804496765,0.6110000014305115,0.6119999885559082,0.6110000014305115,0.6240000128746033,0.6140000224113464,0.6179999709129333,0.6110000014305115,0.6169999837875366,0.6179999709129333,0.6159999966621399,0.6129999756813049,0.6169999837875366,0.6179999709129333,0.6129999756813049,0.6129999756813049,0.6150000095367432,0.6110000014305115,0.6179999709129333,0.6110000014305115,0.6159999966621399,0.6169999837875366,0.6159999966621399,0.6140000224113464,0.6159999966621399],"label":"Dolma"},"RefinedWeb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2759999930858612,0.328000009059906,0.3499999940395355,0.3889999985694885,0.3910000026226043,0.402999997138977,0.4210000038146972,0.4280000030994415,0.4359999895095825,0.4469999969005584,0.4440000057220459,0.4600000083446502,0.4690000116825104,0.4679999947547912,0.4729999899864197,0.4760000109672546,0.4839999973773956,0.4939999878406524,0.488999992609024,0.4990000128746032,0.4979999959468841,0.4979999959468841,0.5009999871253967,0.5,0.5090000033378601,0.5070000290870667,0.5180000066757202,0.5199999809265137,0.5109999775886536,0.5130000114440918,0.5249999761581421,0.5149999856948853,0.5299999713897705,0.5339999794960022,0.5189999938011169,0.5289999842643738,0.5249999761581421,0.5320000052452087,0.5460000038146973,0.5419999957084656,0.5260000228881836,0.5289999842643738,0.546999990940094,0.5419999957084656,0.5419999957084656,0.5460000038146973,0.5419999957084656,0.5389999747276306,0.5440000295639038,0.5569999814033508,0.5450000166893005,0.5329999923706055,0.5580000281333923,0.5339999794960022,0.5540000200271606,0.5460000038146973,0.5479999780654907,0.5529999732971191,0.5540000200271606,0.5619999766349792,0.5490000247955322,0.5410000085830688,0.5490000247955322,0.5569999814033508,0.550000011920929,0.5479999780654907,0.5630000233650208,0.546999990940094,0.5559999942779541,0.5600000023841858,0.5509999990463257,0.5569999814033508,0.5569999814033508,0.5580000281333923,0.5619999766349792,0.5580000281333923,0.5669999718666077,0.5569999814033508,0.5709999799728394,0.5529999732971191,0.5649999976158142,0.5659999847412109,0.5659999847412109,0.5690000057220459,0.5600000023841858,0.5580000281333923,0.5540000200271606,0.5640000104904175,0.5680000185966492,0.5709999799728394,0.5649999976158142,0.5680000185966492,0.5730000138282776,0.5640000104904175,0.5799999833106995,0.5699999928474426,0.5669999718666077,0.5680000185966492,0.5770000219345093,0.5709999799728394,0.5759999752044678,0.5690000057220459,0.5789999961853027,0.5740000009536743,0.5709999799728394,0.5789999961853027,0.5709999799728394,0.5770000219345093,0.5770000219345093,0.5730000138282776,0.5809999704360962,0.5720000267028809,0.5849999785423279,0.5820000171661377,0.5799999833106995,0.5830000042915344,0.5759999752044678,0.5730000138282776,0.5799999833106995,0.5830000042915344,0.5860000252723694,0.5789999961853027,0.5789999961853027,0.5860000252723694,0.5979999899864197,0.5920000076293945,0.5820000171661377,0.5870000123977661,0.5889999866485596,0.5839999914169312,0.5849999785423279,0.5899999737739563,0.5920000076293945,0.593999981880188,0.597000002861023,0.5889999866485596,0.5889999866485596,0.5849999785423279,0.5899999737739563,0.5989999771118164,0.5899999737739563,0.5839999914169312,0.5910000205039978,0.5910000205039978,0.5929999947547913,0.5920000076293945,0.5929999947547913,0.5889999866485596,0.5899999737739563,0.593999981880188,0.5910000205039978,0.5960000157356262,0.5920000076293945,0.5889999866485596,0.593999981880188,0.5879999995231628,0.5960000157356262,0.5920000076293945,0.5960000157356262,0.5960000157356262,0.5920000076293945,0.6010000109672546,0.5920000076293945,0.5899999737739563,0.5889999866485596,0.5920000076293945,0.6019999980926514],"label":"RefinedWeb"},"SlimPajama":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2610000073909759,0.2829999923706054,0.3109999895095825,0.3240000009536743,0.3429999947547912,0.3510000109672546,0.367000013589859,0.3729999959468841,0.3930000066757202,0.3970000147819519,0.4000000059604645,0.4169999957084656,0.4210000038146972,0.4269999861717224,0.4339999854564667,0.4379999935626983,0.4429999887943268,0.4490000009536743,0.4560000002384186,0.4560000002384186,0.4620000123977661,0.453000009059906,0.4670000076293945,0.4679999947547912,0.4679999947547912,0.4819999933242798,0.4749999940395355,0.4779999852180481,0.4720000028610229,0.4869999885559082,0.4869999885559082,0.4880000054836273,0.4939999878406524,0.4979999959468841,0.4979999959468841,0.4959999918937683,0.4860000014305115,0.5109999775886536,0.4939999878406524,0.4909999966621399,0.5049999952316284,0.4990000128746032,0.4939999878406524,0.5,0.5149999856948853,0.503000020980835,0.5139999985694885,0.5059999823570251,0.5130000114440918,0.5230000019073486,0.5210000276565552,0.5109999775886536,0.5230000019073486,0.515999972820282,0.5270000100135803,0.5329999923706055,0.531000018119812,0.5389999747276306,0.5299999713897705,0.5239999890327454,0.5320000052452087,0.5329999923706055,0.5370000004768372,0.5350000262260437,0.5379999876022339,0.5350000262260437,0.5370000004768372,0.5329999923706055,0.527999997138977,0.5289999842643738,0.5400000214576721,0.5490000247955322,0.5410000085830688,0.5299999713897705,0.5410000085830688,0.527999997138977,0.5360000133514404,0.5389999747276306,0.5320000052452087,0.5329999923706055,0.5460000038146973,0.5410000085830688,0.5360000133514404,0.5640000104904175,0.5400000214576721,0.5429999828338623,0.550000011920929,0.5509999990463257,0.5529999732971191,0.5529999732971191,0.550000011920929,0.5490000247955322,0.5609999895095825,0.5640000104904175,0.5479999780654907,0.5580000281333923,0.5519999861717224,0.5659999847412109,0.5630000233650208,0.5649999976158142,0.5649999976158142,0.5709999799728394,0.5580000281333923,0.5550000071525574,0.5580000281333923,0.5509999990463257,0.5429999828338623,0.5490000247955322,0.5559999942779541,0.5580000281333923,0.5590000152587891,0.5669999718666077,0.5659999847412109,0.5559999942779541,0.5669999718666077,0.5600000023841858,0.574999988079071,0.5709999799728394,0.5690000057220459,0.5609999895095825,0.5690000057220459,0.5730000138282776,0.5699999928474426,0.5789999961853027,0.5720000267028809,0.5680000185966492,0.5699999928474426,0.5709999799728394,0.5709999799728394,0.5709999799728394,0.5720000267028809,0.5690000057220459,0.5740000009536743,0.5730000138282776,0.5879999995231628,0.578000009059906,0.5809999704360962,0.5759999752044678,0.5730000138282776,0.5799999833106995,0.5730000138282776,0.5690000057220459,0.5720000267028809,0.5740000009536743,0.574999988079071,0.5789999961853027,0.574999988079071,0.5699999928474426,0.5759999752044678,0.5770000219345093,0.5820000171661377,0.5820000171661377,0.5860000252723694,0.5799999833106995,0.5830000042915344,0.5830000042915344,0.5730000138282776,0.5879999995231628,0.5879999995231628,0.578000009059906,0.5720000267028809,0.5830000042915344,0.5889999866485596,0.5820000171661377,0.574999988079071,0.5820000171661377,0.5889999866485596,0.5839999914169312],"label":"SlimPajama"},"The Pile":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2610000073909759,0.2649999856948852,0.2919999957084656,0.296999990940094,0.3260000050067901,0.3140000104904175,0.3339999914169311,0.3310000002384186,0.3519999980926513,0.3490000069141388,0.3540000021457672,0.3700000047683716,0.356000006198883,0.3619999885559082,0.3840000033378601,0.3849999904632568,0.382999986410141,0.3939999938011169,0.3939999938011169,0.3930000066757202,0.4040000140666961,0.4070000052452087,0.4110000133514404,0.4149999916553497,0.4259999990463257,0.4210000038146972,0.4250000119209289,0.4289999902248382,0.4269999861717224,0.4309999942779541,0.4259999990463257,0.4370000064373016,0.4390000104904175,0.4399999976158142,0.44200000166893,0.4449999928474426,0.449999988079071,0.4510000050067901,0.4569999873638153,0.4560000002384186,0.4620000123977661,0.4629999995231628,0.4580000042915344,0.460999995470047,0.4580000042915344,0.4650000035762787,0.4639999866485595,0.4629999995231628,0.4709999859333038,0.4720000028610229,0.4760000109672546,0.4679999947547912,0.4740000069141388,0.4769999980926513,0.4620000123977661,0.4769999980926513,0.4839999973773956,0.4839999973773956,0.4850000143051147,0.4839999973773956,0.4760000109672546,0.4839999973773956,0.4850000143051147,0.492000013589859,0.4779999852180481,0.4880000054836273,0.4790000021457672,0.4850000143051147,0.4909999966621399,0.4860000014305115,0.4819999933242798,0.4939999878406524,0.492000013589859,0.492000013589859,0.5040000081062317,0.4930000007152557,0.5070000290870667,0.5080000162124634,0.492000013589859,0.5040000081062317,0.4909999966621399,0.4979999959468841,0.5099999904632568,0.4939999878406524,0.4990000128746032,0.5009999871253967,0.5130000114440918,0.5059999823570251,0.5019999742507935,0.5080000162124634,0.5130000114440918,0.5180000066757202,0.5080000162124634,0.5139999985694885,0.5180000066757202,0.5040000081062317,0.5149999856948853,0.5139999985694885,0.5149999856948853,0.5210000276565552,0.515999972820282,0.5139999985694885,0.5249999761581421,0.5199999809265137,0.5170000195503235,0.5189999938011169,0.5210000276565552,0.5189999938011169,0.5210000276565552,0.515999972820282,0.5180000066757202,0.5239999890327454,0.5249999761581421,0.5230000019073486,0.5220000147819519,0.5170000195503235,0.5170000195503235,0.5130000114440918,0.5260000228881836,0.5270000100135803,0.5220000147819519,0.5299999713897705,0.5239999890327454,0.5260000228881836,0.5230000019073486,0.5339999794960022,0.5350000262260437,0.5429999828338623,0.5400000214576721,0.5329999923706055,0.5329999923706055,0.5379999876022339,0.5440000295639038,0.5410000085830688,0.5400000214576721,0.5410000085830688,0.5350000262260437,0.531000018119812,0.5440000295639038,0.5389999747276306,0.5419999957084656,0.550000011920929,0.5400000214576721,0.5429999828338623,0.5580000281333923,0.5479999780654907,0.5540000200271606,0.550000011920929,0.5559999942779541,0.5460000038146973,0.5580000281333923,0.5569999814033508,0.5619999766349792,0.5619999766349792,0.5619999766349792,0.5580000281333923,0.5519999861717224,0.5529999732971191,0.5580000281333923,0.5609999895095825,0.5659999847412109,0.550000011920929,0.5559999942779541,0.5550000071525574,0.5519999861717224,0.5529999732971191,0.5569999814033508,0.5569999814033508],"label":"The Pile"},"RedPajama2":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2610000073909759,0.2870000004768371,0.3070000112056732,0.3230000138282776,0.3449999988079071,0.3540000021457672,0.3659999966621399,0.3880000114440918,0.402999997138977,0.4070000052452087,0.4079999923706054,0.4129999876022339,0.4230000078678131,0.4300000071525574,0.4269999861717224,0.4429999887943268,0.4519999921321869,0.4490000009536743,0.4460000097751617,0.4449999928474426,0.4519999921321869,0.4550000131130218,0.4569999873638153,0.460999995470047,0.4569999873638153,0.4670000076293945,0.4659999907016754,0.4749999940395355,0.4659999907016754,0.4869999885559082,0.488999992609024,0.4779999852180481,0.4749999940395355,0.488999992609024,0.4799999892711639,0.4860000014305115,0.4850000143051147,0.503000020980835,0.503000020980835,0.4939999878406524,0.5009999871253967,0.5019999742507935,0.4970000088214874,0.5059999823570251,0.5080000162124634,0.4990000128746032,0.4979999959468841,0.5009999871253967,0.5049999952316284,0.5080000162124634,0.5080000162124634,0.5090000033378601,0.5109999775886536,0.5109999775886536,0.5180000066757202,0.5220000147819519,0.5270000100135803,0.5270000100135803,0.5109999775886536,0.5189999938011169,0.5189999938011169,0.5210000276565552,0.5220000147819519,0.5180000066757202,0.5199999809265137,0.5170000195503235,0.5249999761581421,0.5130000114440918,0.527999997138977,0.5379999876022339,0.531000018119812,0.5210000276565552,0.531000018119812,0.5260000228881836,0.5289999842643738,0.5299999713897705,0.5270000100135803,0.5329999923706055,0.5329999923706055,0.5249999761581421,0.5370000004768372,0.5389999747276306,0.5410000085830688,0.5220000147819519,0.5329999923706055,0.527999997138977,0.5329999923706055,0.5370000004768372,0.5429999828338623,0.5329999923706055,0.5410000085830688,0.5299999713897705,0.5519999861717224,0.5479999780654907,0.5490000247955322,0.5460000038146973,0.5440000295639038,0.5379999876022339,0.5350000262260437,0.5490000247955322,0.5440000295639038,0.5429999828338623,0.5509999990463257,0.5360000133514404,0.5460000038146973,0.5479999780654907,0.5410000085830688,0.5529999732971191,0.5529999732971191,0.550000011920929,0.5490000247955322,0.5479999780654907,0.5509999990463257,0.5529999732971191,0.546999990940094,0.5519999861717224,0.550000011920929,0.5490000247955322,0.5540000200271606,0.5540000200271606,0.5550000071525574,0.5529999732971191,0.5540000200271606,0.5580000281333923,0.5580000281333923,0.5580000281333923,0.5450000166893005,0.5569999814033508,0.5529999732971191,0.546999990940094,0.5550000071525574,0.5540000200271606,0.5519999861717224,0.5540000200271606,0.5569999814033508,0.5600000023841858,0.5550000071525574,0.5540000200271606,0.5609999895095825,0.5559999942779541,0.5659999847412109,0.5509999990463257,0.5580000281333923,0.5569999814033508,0.5559999942779541,0.5600000023841858,0.5569999814033508,0.5590000152587891,0.5640000104904175,0.5580000281333923,0.5519999861717224,0.5569999814033508,0.5569999814033508,0.5559999942779541,0.5569999814033508,0.5590000152587891,0.5590000152587891,0.5619999766349792,0.5559999942779541,0.5580000281333923,0.5529999732971191,0.5519999861717224,0.5550000071525574,0.5590000152587891,0.5619999766349792,0.5540000200271606,0.5529999732971191,0.5600000023841858],"label":"RedPajama2"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"range":[0.39,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dataset_ablations/index.json DELETED
@@ -1 +0,0 @@
1
- {"files":{"agg_score":{"file":"agg_score.json"},"commonsense_qa/acc_norm":{"file":"commonsense_qa_acc_norm.json"},"hellaswag/acc_norm":{"file":"hellaswag_acc_norm.json"},"openbookqa/acc_norm":{"file":"openbookqa_acc_norm.json"},"piqa/acc_norm":{"file":"piqa_acc_norm.json"},"winogrande/acc_norm":{"file":"winogrande_acc_norm.json"},"arc/acc_norm":{"file":"arc_acc_norm.json"},"mmlu/acc_norm":{"file":"mmlu_acc_norm.json"}},"settings":{"slider":{"min":0,"max":30,"default":5}}}
 
 
data/plots/dataset_ablations/mmlu_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"FineWeb (ours)":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2562687695026397,0.264194518327713,0.2659797668457031,0.2690401375293731,0.2707462012767792,0.2736803293228149,0.2808477580547333,0.2819793820381164,0.2818062305450439,0.2852273285388946,0.2852552533149719,0.293150246143341,0.2869345247745514,0.2926198840141296,0.2911646664142608,0.2883031964302063,0.2938489317893982,0.2923268675804138,0.2927436530590057,0.2957125902175903,0.2942458391189575,0.2957732379436493,0.2933609783649444,0.2939628064632416,0.2984270751476288,0.2989151179790497,0.3007727265357971,0.2968312501907348,0.2969468235969543,0.3013020753860473,0.3045085966587066,0.3018752634525299,0.3014349043369293,0.2988792657852173,0.3034284710884094,0.3015728890895843,0.3065252900123596,0.3021449446678161,0.3043071627616882,0.303546279668808,0.3056059181690216,0.2971993386745453,0.3057574033737182,0.3057517111301422,0.3124973773956299,0.3139103651046753,0.3144983947277069,0.3126215636730194,0.3140240907669067,0.3113631308078766,0.3124240636825561,0.3126817643642425,0.3123457431793213,0.3111095428466797,0.3113269805908203,0.3142518699169159,0.3163851797580719,0.3134008049964905,0.3138530254364013,0.3171449303627014,0.3119543790817261,0.3147956132888794,0.3138984441757202,0.3178529143333435,0.3162296414375305,0.315980851650238,0.3123161196708679,0.3166452944278717,0.3140694200992584,0.3176922798156738,0.3176673054695129,0.3150016367435455,0.3161586821079254,0.3222477436065674,0.3194025754928589,0.3176416158676147,0.3159928619861603,0.3169592320919037,0.3135637938976288,0.3155058920383453,0.3215300440788269,0.3201274275779724,0.3192023932933807,0.3156079053878784,0.3212503492832184,0.3163617849349975,0.3223940432071686,0.3191330432891845,0.3194314539432525,0.3221519589424133,0.3211863040924072,0.3197937309741974,0.3174488544464111,0.3159596025943756,0.3157133460044861,0.3193388879299164,0.3163386285305023,0.3202225565910339,0.3163421154022217,0.3212694227695465,0.3187369704246521,0.3203508555889129,0.3224054872989654,0.3207881152629852,0.3219418525695801,0.3197605609893799,0.3255409598350525,0.3253240585327148,0.319698303937912,0.3250498473644256,0.3228228390216827,0.3213794529438019,0.3219127357006073,0.3214426934719085,0.3238218128681183,0.3229665458202362,0.3220484256744385,0.3240038454532623,0.3246393501758575,0.3237775564193725,0.3258441984653473,0.322843462228775,0.3241913020610809,0.324148565530777,0.3238157927989959,0.3248989582061767,0.3280864655971527,0.3288898766040802,0.3265794515609741,0.3277602791786194,0.3231202363967895,0.3224002718925476,0.323845773935318,0.3278093039989471,0.3247094452381134,0.3289697468280792,0.3272296786308288,0.3275051414966583,0.3271359801292419,0.3280861675739288,0.3281281590461731,0.327859491109848,0.3281152546405792,0.3282515406608581,0.3258990049362182,0.3271094560623169,0.3259278535842895,0.3258941769599914,0.3278749883174896,0.3300504386425018,0.326113760471344,0.3242938220500946,0.3262194991111755,0.3263693153858185,0.3274452090263366,0.3254594206809997,0.3287247717380523,0.3250340223312378,0.3270816206932068,0.3275731801986694,0.3282500207424164,0.3257671594619751,0.3272948265075683,0.3274084031581878,0.3302212655544281,0.3322067260742187,0.3296935856342315],"label":"FineWeb (ours)"},"C4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2503989636898041,0.2554822564125061,0.267362505197525,0.2635625898838043,0.2717761397361755,0.2754477560520172,0.275278776884079,0.2799545228481293,0.2766266465187073,0.27903613448143,0.2817002832889557,0.2798756062984466,0.2839230597019195,0.2838127315044403,0.2853756248950958,0.2841375172138214,0.2884852290153503,0.2859430313110351,0.2888604700565338,0.2842839062213897,0.2834508419036865,0.2905769944190979,0.291401594877243,0.2923042178153991,0.2906481623649597,0.2864490151405334,0.2894583344459533,0.2936400771141052,0.289957731962204,0.2898236215114593,0.2837969064712524,0.2944568693637848,0.287180632352829,0.2923372685909271,0.2921179831027984,0.2892000675201416,0.2896223366260528,0.2912705242633819,0.2905389666557312,0.2875383198261261,0.2915911972522735,0.2923913896083832,0.2919397950172424,0.2927466332912445,0.2955999374389648,0.2927011847496032,0.2947700321674347,0.2957072854042053,0.2927916347980499,0.2963354587554931,0.2973482012748718,0.2991726994514465,0.2962333858013153,0.2936276495456695,0.2937322854995727,0.3001607954502105,0.2961930930614471,0.2966057658195495,0.2966968417167663,0.3002983927726745,0.2983409464359283,0.2959610521793365,0.2962177395820617,0.2951928675174713,0.2985051274299621,0.30136439204216,0.2984142005443573,0.3012503385543823,0.299721896648407,0.3021658658981323,0.2981589436531067,0.2985765635967254,0.2982990145683288,0.3036385178565979,0.3027603924274444,0.3010715246200561,0.3001661598682403,0.298428326845169,0.3019610047340393,0.3065414726734161,0.29936483502388,0.3038617968559265,0.3041279911994934,0.3058141767978668,0.3057383298873901,0.3019986450672149,0.3034681677818298,0.3048995435237884,0.3043853640556335,0.3066711127758026,0.30138099193573,0.3047040700912475,0.3035959601402282,0.3064981997013092,0.3050976097583771,0.3082782626152038,0.3046838641166687,0.3041907548904419,0.3060542941093445,0.3100601136684418,0.3060735464096069,0.3048785924911499,0.3074990510940552,0.3062954843044281,0.3097685873508453,0.3098846077919006,0.3045764863491058,0.3065372407436371,0.308793306350708,0.3075888156890869,0.3107441067695617,0.3069410920143127,0.3096908032894134,0.3064695000648498,0.3067855834960937,0.3074861168861389,0.3091084659099579,0.3072076141834259,0.3065063059329986,0.3104434311389923,0.3067492246627807,0.3116618990898132,0.3115111291408539,0.3112091422080993,0.3067144453525543,0.3121508359909057,0.3089344203472137,0.3137882351875305,0.3108713626861572,0.312741070985794,0.3097975254058838,0.3122666180133819,0.3133568465709686,0.3127341866493225,0.3080432415008545,0.3120637834072113,0.3122869431972503,0.3120250403881073,0.3117712736129761,0.3109983205795288,0.3125808835029602,0.3116428554058075,0.3129400014877319,0.3116071224212646,0.3128395676612854,0.3080540597438812,0.3104397654533386,0.3116388320922851,0.3122957944869995,0.3113322257995605,0.313622385263443,0.313764363527298,0.3136481344699859,0.3141548335552215,0.3139144778251648,0.3123406767845154,0.314969539642334,0.3138300478458404,0.3155047297477722,0.3115970492362976,0.3151944875717163,0.3144146800041199,0.3156995475292206,0.3192606270313263,0.3183640241622925,0.3151223957538605,0.3162081837654114],"label":"C4"},"Dolma":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501270473003387,0.2560833990573883,0.2599925696849823,0.2675598859786987,0.2684114277362823,0.2716337144374847,0.2740766704082489,0.2775039076805115,0.2783404588699341,0.2796013355255127,0.2820949554443359,0.2810406684875488,0.2823644876480102,0.2833253145217895,0.2857703566551208,0.2808239161968231,0.2917547225952148,0.2897486090660095,0.2875275015830993,0.2943058311939239,0.2891780138015747,0.2897033989429474,0.2903319895267486,0.2924950718879699,0.2875644862651825,0.2924458384513855,0.293954849243164,0.2929113805294037,0.2915636599063873,0.2917861938476562,0.2942286729812622,0.2967639863491058,0.291913092136383,0.2953178882598877,0.2972874641418457,0.2956210374832153,0.3018575012683868,0.2929699420928955,0.2950209975242615,0.3003560602664947,0.2974177300930023,0.2950251400470733,0.296758621931076,0.2972809970378876,0.2982426285743713,0.3019979298114776,0.3008206784725189,0.3005954325199127,0.2938346862792969,0.3009699583053589,0.2989449501037597,0.2996537983417511,0.3019669651985168,0.3000748455524444,0.3000148236751556,0.2995675504207611,0.298966109752655,0.3004475831985473,0.3023184835910797,0.3034453392028808,0.3016394674777984,0.3060395121574402,0.3066911995410919,0.3065873384475708,0.29949951171875,0.3031920790672302,0.3026251792907715,0.3046470880508423,0.3025145232677459,0.306982010602951,0.302643358707428,0.304037868976593,0.3015348613262176,0.299553781747818,0.3041835427284241,0.3027640879154205,0.3082228600978851,0.3047288656234741,0.3039765655994415,0.3048267066478729,0.3035992980003357,0.3056430518627167,0.3037824630737304,0.3048160970211029,0.3040882349014282,0.3052836060523987,0.3065180480480194,0.307971179485321,0.3091272115707397,0.3034843504428863,0.3075712919235229,0.3104844093322754,0.3063389658927917,0.3032608032226562,0.3080332279205322,0.3079279065132141,0.3062038123607635,0.3072481155395508,0.3117794990539551,0.3067246973514557,0.3082475662231445,0.3099625706672668,0.3070119023323059,0.3101015090942383,0.3109049797058105,0.3087223470211029,0.3101778626441955,0.3094301521778106,0.3144752383232116,0.3116049468517303,0.3116113245487213,0.3137989044189453,0.3153277635574341,0.3151018321514129,0.3132036030292511,0.3145381212234497,0.3129039704799652,0.3149133026599884,0.3102412819862366,0.3117950558662414,0.3154202997684479,0.3132332563400268,0.3113301992416382,0.3101116418838501,0.3130112290382385,0.3137075006961822,0.315510481595993,0.3132544159889221,0.3161831498146057,0.3116595447063446,0.3104106485843658,0.3173929154872894,0.314871996641159,0.316310316324234,0.3159449696540832,0.3161650598049164,0.3156299889087677,0.3161895871162414,0.3156271278858185,0.3160959482192993,0.317343145608902,0.3176217675209045,0.3190700709819793,0.3196708261966705,0.3158564865589142,0.3173984587192535,0.316327154636383,0.3143565356731415,0.3159370124340057,0.3192657828330993,0.3172151446342468,0.3148190677165985,0.3171719908714294,0.3181072175502777,0.3178979456424713,0.3210897445678711,0.3147720694541931,0.3171736001968384,0.3176901936531067,0.3162892162799835,0.3163918852806091,0.3174804747104645,0.3201273679733276,0.3196343183517456,0.3189445436000824,0.3197543323040008,0.3184814155101776,0.3209713697433471],"label":"Dolma"},"RefinedWeb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2528519630432129,0.2616856694221496,0.2665999829769134,0.2683407664299011,0.2742894291877746,0.2762066125869751,0.2807516455650329,0.2767378389835357,0.2807380557060241,0.2788906991481781,0.2844051718711853,0.2856102883815765,0.2883394360542297,0.2890409529209137,0.2894668281078338,0.2883355319499969,0.2872501015663147,0.291619062423706,0.2900333702564239,0.2962473034858703,0.2962896525859833,0.297355443239212,0.2932226359844208,0.2886744439601898,0.29665008187294,0.2976542115211487,0.2991503179073334,0.3004479110240936,0.3044549524784088,0.2976194322109222,0.3014707863330841,0.3048252463340759,0.3039425611495971,0.303354948759079,0.3027459383010864,0.2999922931194305,0.3050121665000915,0.2998814284801483,0.2978588044643402,0.3041949570178985,0.3010904192924499,0.3022017180919647,0.2997751235961914,0.3015910983085632,0.3096485137939453,0.3012076020240783,0.3065535724163055,0.3042872548103332,0.3104783594608307,0.2997980415821075,0.3051296770572662,0.303458571434021,0.3088337182998657,0.3145398199558258,0.3032208085060119,0.310806930065155,0.3075874149799347,0.3101692199707031,0.310107946395874,0.3066047430038452,0.3109066784381866,0.3081336915493011,0.3084586262702942,0.3086149394512176,0.3085348606109619,0.3136637806892395,0.3110873103141784,0.31076380610466,0.3084572553634643,0.3133681714534759,0.3125792145729065,0.3124453127384186,0.3097185790538788,0.3106793165206909,0.3089564740657806,0.3111244142055511,0.3123694658279419,0.3144859969615936,0.3135123550891876,0.311982125043869,0.3142133951187134,0.3122903704643249,0.3147654831409454,0.3078767359256744,0.314947634935379,0.3171303570270538,0.3129573762416839,0.3154936134815216,0.3158208429813385,0.3153132200241089,0.3141326904296875,0.3163397014141083,0.3166318237781524,0.3168410360813141,0.3198235332965851,0.3201336860656738,0.3212967813014984,0.3191385567188263,0.3178017139434814,0.3192791938781738,0.323061466217041,0.320336639881134,0.3165886104106903,0.3206393420696258,0.3167395293712616,0.3135207295417785,0.315539002418518,0.3191742599010467,0.321073055267334,0.3222262561321258,0.3193058371543884,0.3213480710983276,0.3198905289173126,0.3219239711761474,0.3211614489555359,0.318855881690979,0.3177095353603363,0.324197381734848,0.3208906352519989,0.3264936804771423,0.3245965242385864,0.3231639564037323,0.3221887946128845,0.3277338445186615,0.3227696120738983,0.3263820111751556,0.3258577883243561,0.3264622390270233,0.3222362995147705,0.3286814987659454,0.3235024213790893,0.32446950674057,0.3311836123466491,0.328130304813385,0.3271634578704834,0.3250012993812561,0.3309800624847412,0.3274554014205932,0.3273015916347503,0.3261759579181671,0.32697594165802,0.3303172886371612,0.3282814025878906,0.3289586305618286,0.3260826468467712,0.3258011937141418,0.3297208249568939,0.3254813551902771,0.3287739753723144,0.3287097811698913,0.3275279700756073,0.3293041586875915,0.3314100801944732,0.3287808299064636,0.3251930773258209,0.3288172781467438,0.3265027701854706,0.3275215625762939,0.3290774822235107,0.3261331617832184,0.3299777805805206,0.331955999135971,0.3305029273033142,0.3274719417095184,0.3235560953617096,0.3269940316677093,0.3323083519935608],"label":"RefinedWeb"},"SlimPajama":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2497200071811676,0.2592614293098449,0.2593538165092468,0.2662509083747864,0.2664803266525268,0.2686671912670135,0.2750212550163269,0.2728220522403717,0.2735992670059204,0.2816755771636963,0.2807474434375763,0.2832716107368469,0.2859686017036438,0.2851547598838806,0.2844336628913879,0.2834415435791015,0.2903566062450409,0.2867975533008575,0.2869687974452972,0.2880666553974151,0.2895745635032654,0.2932893931865692,0.285281777381897,0.29486083984375,0.2966246604919433,0.2939338088035583,0.2906226515769958,0.2981550693511963,0.2983705997467041,0.2941931188106537,0.3010403513908386,0.2986909449100494,0.3023008406162262,0.2999930381774902,0.2975163459777832,0.3010782599449157,0.3005977869033813,0.3014611005783081,0.2960115671157837,0.3022516369819641,0.3078178465366363,0.3019892573356628,0.3054529130458832,0.3049662113189697,0.3028279542922973,0.3042507171630859,0.3116106986999511,0.3049386143684387,0.3076794445514679,0.3053653836250305,0.3102109432220459,0.3084518015384674,0.3062224984169006,0.3118025958538055,0.3052020072937011,0.3125185668468475,0.3075071275234222,0.3104312717914581,0.3031999468803406,0.3096194863319397,0.3074706494808197,0.3123086094856262,0.3070628046989441,0.3116572499275207,0.3127453923225403,0.3144129812717438,0.3129480183124542,0.3091934025287628,0.3163215219974518,0.3153833448886871,0.3132557868957519,0.3166911005973816,0.3108917474746704,0.3084513247013092,0.3118407726287842,0.3133653402328491,0.3157584071159363,0.3147788345813751,0.311055988073349,0.3152942359447479,0.3187046945095062,0.3157975673675537,0.3121104836463928,0.3179425001144409,0.3151332139968872,0.3168922960758209,0.3146162927150726,0.3181720376014709,0.3161299228668213,0.3202162981033325,0.3161057233810425,0.3180809915065765,0.3162356913089752,0.315767765045166,0.3190236985683441,0.3171856999397278,0.3202316761016845,0.3133728802204132,0.3163894414901733,0.31822270154953,0.3266949653625488,0.3198533058166504,0.3233769834041595,0.3238007426261902,0.3205596804618835,0.3247207403182983,0.3206443786621094,0.323225736618042,0.3217288851737976,0.3235573768615722,0.3252238631248474,0.324187159538269,0.3246301114559173,0.3234524428844452,0.3228626251220703,0.3241204023361206,0.3248985707759857,0.3271089494228363,0.3260233104228973,0.3256067335605621,0.3253726661205292,0.3233819603919983,0.3251070082187652,0.3276388943195343,0.3218710720539093,0.3246103525161743,0.3265250921249389,0.3275103867053985,0.3297024071216583,0.3277328908443451,0.3267576098442077,0.327478289604187,0.3304429948329925,0.3273082375526428,0.3264918923377991,0.3236933350563049,0.3272844851016998,0.3272388875484466,0.3295789361000061,0.3278802335262298,0.3284499049186706,0.3294344544410705,0.3298918604850769,0.3306445777416229,0.3281697332859039,0.3339054882526397,0.3293753862380981,0.3298616707324981,0.3280701637268066,0.3316899836063385,0.3303491175174713,0.330327957868576,0.3314228653907776,0.3317765891551971,0.3307021856307983,0.330121636390686,0.3283750414848327,0.3317910432815552,0.3326679468154907,0.3311441838741302,0.330735981464386,0.3313741981983185,0.3320714235305786,0.3333134055137634,0.3337414860725403,0.3286249935626983,0.3344342410564422,0.3337143063545227],"label":"SlimPajama"},"The Pile":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2498730421066284,0.2534083127975464,0.2561326622962951,0.2599403262138366,0.2633565366268158,0.2649387121200561,0.2713600993156433,0.2703712880611419,0.2707636058330536,0.2733668386936188,0.2729981541633606,0.274116575717926,0.2803106009960174,0.2809702754020691,0.2779591083526611,0.2805380225181579,0.2798897624015808,0.2806138396263122,0.2803314626216888,0.2810186445713043,0.2816281020641327,0.2882067859172821,0.2882601618766784,0.2830324470996856,0.2890165150165558,0.2814686000347137,0.2897007167339325,0.2888377606868744,0.2834174335002899,0.2923084199428558,0.2913220524787903,0.2947664260864258,0.2914148271083832,0.2925300896167755,0.293608158826828,0.2889452278614044,0.2899258136749267,0.2933099865913391,0.2945047914981842,0.2932035624980926,0.2960488498210907,0.2930898070335388,0.2955847680568695,0.2957557141780853,0.2971082925796509,0.2952709197998047,0.2950723767280578,0.2981964945793152,0.2964051365852356,0.2939505577087402,0.2963071465492248,0.2983180284500122,0.2972628474235534,0.3001365661621094,0.2960390746593475,0.2977366149425506,0.2980498075485229,0.2993811666965484,0.2978217303752899,0.2964333891868591,0.3007307946681976,0.3038485944271087,0.3062177002429962,0.2997282147407532,0.3027545511722564,0.3020226955413818,0.3046002388000488,0.3044104874134063,0.3070093989372253,0.2984016835689544,0.301089197397232,0.3019088506698608,0.2994609773159027,0.3090873956680298,0.3015709221363067,0.3070562481880188,0.3078455030918121,0.3082370460033417,0.3078760802745819,0.3083541393280029,0.3041301965713501,0.3008038699626922,0.308231920003891,0.3079721331596374,0.3082239031791687,0.3112189173698425,0.3071774840354919,0.303611695766449,0.3100490272045135,0.306540310382843,0.3090290427207947,0.3081201016902923,0.3089599609375,0.3034833371639251,0.3090586364269256,0.3152794539928436,0.3124137222766876,0.308320552110672,0.3110654950141907,0.3116510510444641,0.3129254281520843,0.312163382768631,0.3099710345268249,0.3123080432415008,0.3135911822319031,0.3122925460338592,0.3133870661258697,0.3189542889595032,0.3170333206653595,0.3164204955101013,0.3185184895992279,0.319377452135086,0.3195942044258117,0.3197914958000183,0.315343827009201,0.3192422688007355,0.3192358016967773,0.3179579675197601,0.319799929857254,0.3205091953277588,0.3259644210338592,0.3236323595046997,0.3209564685821533,0.3220179975032806,0.3186767101287842,0.3223333358764648,0.3227059245109558,0.3206779062747955,0.3189719617366791,0.324008584022522,0.3231253325939178,0.3192954957485199,0.3221996128559112,0.3230528831481933,0.3278523981571197,0.3217717707157135,0.3207707703113556,0.3236245810985565,0.3222698867321014,0.3228896558284759,0.3253422081470489,0.3267556726932525,0.3249134719371795,0.32914799451828,0.3244758546352386,0.327859878540039,0.3285762071609497,0.3294639587402344,0.3258695304393768,0.3260534405708313,0.3286381065845489,0.3287994265556335,0.3297498822212219,0.3312559723854065,0.3358747959136963,0.3314234912395477,0.3303252458572387,0.3299272060394287,0.3301239311695099,0.3315372467041015,0.330515444278717,0.3301299214363098,0.331067830324173,0.3263126313686371,0.3315630555152893,0.331708014011383,0.3312946856021881,0.3308100700378418],"label":"The Pile"},"RedPajama2":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2496780008077621,0.2519060671329498,0.2591428160667419,0.2637551724910736,0.2671225368976593,0.2742190659046173,0.2761570811271667,0.27329882979393,0.2755842506885528,0.2790464162826538,0.2869725823402405,0.2838374674320221,0.2864839732646942,0.2891924679279327,0.2910299301147461,0.2891930639743805,0.288548856973648,0.29207244515419,0.2911308705806732,0.2965781390666961,0.2966488599777221,0.2910915017127991,0.2942662537097931,0.2906062006950378,0.2968285381793976,0.299734503030777,0.2920202910900116,0.3011439740657806,0.2991106510162353,0.2955383658409118,0.2984142899513244,0.3006328046321869,0.3036990761756897,0.2976612746715545,0.2990829646587372,0.2943736910820007,0.300097107887268,0.2956410348415375,0.3071651458740234,0.3055790960788727,0.3042990863323211,0.3054490685462951,0.3014840185642242,0.298349380493164,0.3029196262359619,0.3046568930149078,0.306732028722763,0.3014123737812042,0.3036067485809326,0.3066047132015228,0.3052354753017425,0.3075886964797973,0.3091740012168884,0.3017406165599823,0.3043143153190613,0.3051626980304718,0.3052559792995453,0.3032524585723877,0.3126200735569,0.3073466420173645,0.3121364712715149,0.3114152550697326,0.3056653439998626,0.3071228861808777,0.3121611177921295,0.3070071935653686,0.3095386028289795,0.3122327923774719,0.3104317188262939,0.3108241260051727,0.3109523057937622,0.3170003592967987,0.3106639087200165,0.3124147951602936,0.3132331371307373,0.3180687129497528,0.3111820816993713,0.3153944313526153,0.3127918243408203,0.3113606572151184,0.3180998861789703,0.3133571743965149,0.314390480518341,0.3146277070045471,0.3148570358753204,0.3144837021827698,0.3187994956970215,0.3204345703125,0.3150869607925415,0.3201290965080261,0.3202804625034332,0.319104790687561,0.3177886009216308,0.3194527924060821,0.3167977631092071,0.3182326853275299,0.3169693946838379,0.3235675394535064,0.3185839354991913,0.3215724229812622,0.3182428181171417,0.3219003081321716,0.3199219107627868,0.3217185437679291,0.3206124603748321,0.3205193877220154,0.318211942911148,0.3197448849678039,0.3191796839237213,0.3248491883277893,0.3221067786216736,0.3249030709266662,0.3219638168811798,0.3252624571323395,0.3218883872032165,0.3220428824424743,0.3214907944202423,0.3213936984539032,0.3221090137958526,0.3221961259841919,0.3210761547088623,0.3239326179027557,0.321384847164154,0.3226822912693023,0.3249708116054535,0.3271316289901733,0.3257546424865722,0.323205828666687,0.3232316970825195,0.3275502622127533,0.3237724900245666,0.3249053359031677,0.3257182836532593,0.3244708180427551,0.3246290981769562,0.3252749741077423,0.3240469694137573,0.329105406999588,0.3294115364551544,0.3245800733566284,0.3285538256168365,0.3260121047496795,0.3243294656276703,0.3270638883113861,0.3259481191635132,0.3246479034423828,0.321680337190628,0.3224166929721832,0.3243135511875152,0.3283822238445282,0.3236311376094818,0.327635109424591,0.3274770975112915,0.3242181539535522,0.3290536999702453,0.3260405361652374,0.3252047896385193,0.3275159001350403,0.3269789516925812,0.3300522267818451,0.3290919959545135,0.3258809447288513,0.3274780511856079,0.3302557468414306,0.3261787891387939,0.3282874226570129,0.3261036276817322,0.3293801844120025],"label":"RedPajama2"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"range":[0.39,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dataset_ablations/openbookqa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"FineWeb (ours)":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2440000027418136,0.270000010728836,0.2720000147819519,0.3000000119209289,0.2919999957084656,0.3160000145435333,0.3160000145435333,0.3179999887943268,0.3199999928474426,0.3440000116825104,0.3179999887943268,0.3240000009536743,0.3300000131130218,0.3240000009536743,0.3199999928474426,0.335999995470047,0.3339999914169311,0.3440000116825104,0.3459999859333038,0.3400000035762787,0.3440000116825104,0.335999995470047,0.3379999995231628,0.3519999980926513,0.3379999995231628,0.3420000076293945,0.3319999873638153,0.3479999899864197,0.3459999859333038,0.3339999914169311,0.3440000116825104,0.3420000076293945,0.3219999969005584,0.3319999873638153,0.3479999899864197,0.3379999995231628,0.335999995470047,0.3499999940395355,0.3420000076293945,0.3319999873638153,0.3400000035762787,0.3400000035762787,0.3519999980926513,0.3479999899864197,0.3379999995231628,0.335999995470047,0.3400000035762787,0.3319999873638153,0.3580000102519989,0.3499999940395355,0.3700000047683716,0.3680000007152557,0.335999995470047,0.3600000143051147,0.3499999940395355,0.356000006198883,0.3499999940395355,0.356000006198883,0.3619999885559082,0.363999992609024,0.3519999980926513,0.3540000021457672,0.3600000143051147,0.3600000143051147,0.3540000021457672,0.356000006198883,0.363999992609024,0.363999992609024,0.3499999940395355,0.3659999966621399,0.356000006198883,0.363999992609024,0.3540000021457672,0.3540000021457672,0.3619999885559082,0.3740000128746032,0.3519999980926513,0.3659999966621399,0.3680000007152557,0.3700000047683716,0.3580000102519989,0.3499999940395355,0.3740000128746032,0.3659999966621399,0.3659999966621399,0.3580000102519989,0.3479999899864197,0.363999992609024,0.3519999980926513,0.3580000102519989,0.356000006198883,0.3740000128746032,0.363999992609024,0.3700000047683716,0.363999992609024,0.3700000047683716,0.363999992609024,0.3799999952316284,0.3860000073909759,0.3680000007152557,0.3779999911785126,0.3740000128746032,0.3600000143051147,0.3659999966621399,0.3680000007152557,0.3619999885559082,0.3700000047683716,0.3759999871253967,0.363999992609024,0.3740000128746032,0.3799999952316284,0.3779999911785126,0.3659999966621399,0.3600000143051147,0.3740000128746032,0.3600000143051147,0.363999992609024,0.363999992609024,0.363999992609024,0.3779999911785126,0.3700000047683716,0.3799999952316284,0.3720000088214874,0.3819999992847442,0.3759999871253967,0.3799999952316284,0.3740000128746032,0.3860000073909759,0.3779999911785126,0.3959999978542328,0.3880000114440918,0.3799999952316284,0.3860000073909759,0.3759999871253967,0.3939999938011169,0.3779999911785126,0.3959999978542328,0.3779999911785126,0.3899999856948852,0.3860000073909759,0.3959999978542328,0.3759999871253967,0.3720000088214874,0.3799999952316284,0.3740000128746032,0.3759999871253967,0.3799999952316284,0.3819999992847442,0.3840000033378601,0.3720000088214874,0.363999992609024,0.3840000033378601,0.3919999897480011,0.3819999992847442,0.3819999992847442,0.3779999911785126,0.3799999952316284,0.3840000033378601,0.3819999992847442,0.3899999856948852,0.3860000073909759,0.3819999992847442,0.3840000033378601,0.3720000088214874,0.3799999952316284,0.3819999992847442,0.3959999978542328],"label":"FineWeb (ours)"},"C4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.25,0.2759999930858612,0.2739999890327453,0.2820000052452087,0.2980000078678131,0.3019999861717224,0.2899999916553497,0.3000000119209289,0.3240000009536743,0.3120000064373016,0.3260000050067901,0.3319999873638153,0.3440000116825104,0.3240000009536743,0.3400000035762787,0.3300000131130218,0.3459999859333038,0.3540000021457672,0.3319999873638153,0.3219999969005584,0.3499999940395355,0.3479999899864197,0.3300000131130218,0.3519999980926513,0.3499999940395355,0.3680000007152557,0.3479999899864197,0.3580000102519989,0.3519999980926513,0.3479999899864197,0.3600000143051147,0.3540000021457672,0.363999992609024,0.3619999885559082,0.3619999885559082,0.3540000021457672,0.3499999940395355,0.3540000021457672,0.3540000021457672,0.356000006198883,0.3519999980926513,0.3580000102519989,0.3720000088214874,0.3600000143051147,0.3700000047683716,0.3580000102519989,0.3600000143051147,0.3659999966621399,0.3759999871253967,0.3580000102519989,0.3779999911785126,0.356000006198883,0.3680000007152557,0.3680000007152557,0.356000006198883,0.363999992609024,0.3619999885559082,0.3740000128746032,0.3720000088214874,0.3740000128746032,0.3740000128746032,0.3740000128746032,0.3700000047683716,0.3600000143051147,0.3799999952316284,0.3659999966621399,0.3819999992847442,0.3779999911785126,0.3619999885559082,0.363999992609024,0.356000006198883,0.356000006198883,0.3600000143051147,0.3580000102519989,0.3779999911785126,0.3759999871253967,0.3700000047683716,0.363999992609024,0.3700000047683716,0.3700000047683716,0.3600000143051147,0.3700000047683716,0.3619999885559082,0.3700000047683716,0.3659999966621399,0.3799999952316284,0.3720000088214874,0.3779999911785126,0.3580000102519989,0.3600000143051147,0.3659999966621399,0.3819999992847442,0.3619999885559082,0.3680000007152557,0.3860000073909759,0.3860000073909759,0.3700000047683716,0.363999992609024,0.3720000088214874,0.3740000128746032,0.3700000047683716,0.3840000033378601,0.3980000019073486,0.3740000128746032,0.3659999966621399,0.3779999911785126,0.3919999897480011,0.3759999871253967,0.3740000128746032,0.3819999992847442,0.3659999966621399,0.3740000128746032,0.3799999952316284,0.363999992609024,0.3799999952316284,0.3779999911785126,0.3740000128746032,0.3980000019073486,0.3899999856948852,0.3840000033378601,0.3819999992847442,0.3779999911785126,0.3759999871253967,0.3860000073909759,0.3899999856948852,0.3700000047683716,0.3799999952316284,0.3840000033378601,0.3840000033378601,0.3919999897480011,0.3779999911785126,0.3740000128746032,0.3899999856948852,0.3759999871253967,0.3819999992847442,0.3980000019073486,0.3899999856948852,0.3899999856948852,0.3779999911785126,0.3779999911785126,0.3919999897480011,0.3880000114440918,0.3840000033378601,0.3700000047683716,0.3819999992847442,0.3880000114440918,0.3819999992847442,0.3939999938011169,0.3860000073909759,0.3899999856948852,0.3980000019073486,0.4000000059604645,0.4059999883174896,0.3880000114440918,0.4000000059604645,0.3919999897480011,0.3980000019073486,0.3899999856948852,0.3899999856948852,0.3899999856948852,0.3939999938011169,0.3899999856948852,0.3899999856948852,0.3860000073909759,0.3880000114440918,0.3840000033378601,0.3720000088214874],"label":"C4"},"Dolma":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2619999945163727,0.2860000133514404,0.2800000011920929,0.2919999957084656,0.3000000119209289,0.3059999942779541,0.3079999983310699,0.3100000023841858,0.3000000119209289,0.3039999902248382,0.328000009059906,0.3260000050067901,0.328000009059906,0.3540000021457672,0.3319999873638153,0.3219999969005584,0.328000009059906,0.3339999914169311,0.3479999899864197,0.3540000021457672,0.335999995470047,0.3499999940395355,0.3479999899864197,0.3420000076293945,0.3379999995231628,0.3300000131130218,0.363999992609024,0.3440000116825104,0.3519999980926513,0.3319999873638153,0.3400000035762787,0.3459999859333038,0.3479999899864197,0.3440000116825104,0.3379999995231628,0.3459999859333038,0.3459999859333038,0.3440000116825104,0.3519999980926513,0.3519999980926513,0.3420000076293945,0.3400000035762787,0.3499999940395355,0.3540000021457672,0.3459999859333038,0.3519999980926513,0.3459999859333038,0.3540000021457672,0.3659999966621399,0.3540000021457672,0.3600000143051147,0.3619999885559082,0.3659999966621399,0.3479999899864197,0.3519999980926513,0.3420000076293945,0.363999992609024,0.3580000102519989,0.3519999980926513,0.3440000116825104,0.3400000035762787,0.3499999940395355,0.3619999885559082,0.3519999980926513,0.3619999885559082,0.3479999899864197,0.356000006198883,0.3540000021457672,0.363999992609024,0.3459999859333038,0.3580000102519989,0.3619999885559082,0.3479999899864197,0.363999992609024,0.3459999859333038,0.3619999885559082,0.3600000143051147,0.3600000143051147,0.3519999980926513,0.3600000143051147,0.3680000007152557,0.3659999966621399,0.3659999966621399,0.3600000143051147,0.3580000102519989,0.3580000102519989,0.3720000088214874,0.3619999885559082,0.363999992609024,0.3479999899864197,0.3659999966621399,0.3600000143051147,0.3680000007152557,0.3700000047683716,0.3580000102519989,0.3420000076293945,0.3619999885559082,0.3479999899864197,0.3540000021457672,0.3499999940395355,0.3600000143051147,0.3720000088214874,0.3420000076293945,0.3580000102519989,0.3680000007152557,0.3600000143051147,0.3600000143051147,0.3440000116825104,0.3619999885559082,0.356000006198883,0.3580000102519989,0.3600000143051147,0.3540000021457672,0.3519999980926513,0.3499999940395355,0.3479999899864197,0.3540000021457672,0.3619999885559082,0.3540000021457672,0.3600000143051147,0.3519999980926513,0.3540000021457672,0.3600000143051147,0.3659999966621399,0.3479999899864197,0.3540000021457672,0.3459999859333038,0.3759999871253967,0.3580000102519989,0.3540000021457672,0.3659999966621399,0.3659999966621399,0.3659999966621399,0.3680000007152557,0.356000006198883,0.3600000143051147,0.3619999885559082,0.3700000047683716,0.3600000143051147,0.3600000143051147,0.3680000007152557,0.3799999952316284,0.3860000073909759,0.3700000047683716,0.3700000047683716,0.3659999966621399,0.3840000033378601,0.3659999966621399,0.3659999966621399,0.3659999966621399,0.3680000007152557,0.3700000047683716,0.3720000088214874,0.363999992609024,0.363999992609024,0.3740000128746032,0.3779999911785126,0.3659999966621399,0.3700000047683716,0.3720000088214874,0.3680000007152557,0.3740000128746032,0.3819999992847442,0.3619999885559082,0.3619999885559082,0.3740000128746032,0.3799999952316284],"label":"Dolma"},"RefinedWeb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2560000121593475,0.2840000092983246,0.3059999942779541,0.3059999942779541,0.2980000078678131,0.3240000009536743,0.3100000023841858,0.3000000119209289,0.3160000145435333,0.3140000104904175,0.3260000050067901,0.3199999928474426,0.2980000078678131,0.3179999887943268,0.3319999873638153,0.3019999861717224,0.2939999997615814,0.3319999873638153,0.3319999873638153,0.3219999969005584,0.3379999995231628,0.3379999995231628,0.3339999914169311,0.3240000009536743,0.3479999899864197,0.3300000131130218,0.3240000009536743,0.3300000131130218,0.3400000035762787,0.3459999859333038,0.3319999873638153,0.3379999995231628,0.356000006198883,0.3339999914169311,0.3459999859333038,0.3440000116825104,0.3519999980926513,0.3479999899864197,0.3339999914169311,0.3400000035762787,0.3479999899864197,0.3379999995231628,0.3479999899864197,0.3499999940395355,0.3400000035762787,0.3499999940395355,0.3420000076293945,0.3659999966621399,0.3400000035762787,0.3459999859333038,0.3499999940395355,0.356000006198883,0.3400000035762787,0.356000006198883,0.3339999914169311,0.3339999914169311,0.3479999899864197,0.3420000076293945,0.3580000102519989,0.3339999914169311,0.3440000116825104,0.3400000035762787,0.3499999940395355,0.3540000021457672,0.3479999899864197,0.3499999940395355,0.3420000076293945,0.3379999995231628,0.335999995470047,0.356000006198883,0.3459999859333038,0.3499999940395355,0.3400000035762787,0.3440000116825104,0.356000006198883,0.3519999980926513,0.3400000035762787,0.3440000116825104,0.356000006198883,0.3400000035762787,0.356000006198883,0.3600000143051147,0.3540000021457672,0.3479999899864197,0.3379999995231628,0.3440000116825104,0.3300000131130218,0.3400000035762787,0.3459999859333038,0.3339999914169311,0.3499999940395355,0.3600000143051147,0.3440000116825104,0.3499999940395355,0.356000006198883,0.3420000076293945,0.3479999899864197,0.3379999995231628,0.3379999995231628,0.3459999859333038,0.356000006198883,0.328000009059906,0.3459999859333038,0.3519999980926513,0.3499999940395355,0.3519999980926513,0.3420000076293945,0.3499999940395355,0.3420000076293945,0.3339999914169311,0.335999995470047,0.3379999995231628,0.3379999995231628,0.3540000021457672,0.356000006198883,0.356000006198883,0.335999995470047,0.363999992609024,0.363999992609024,0.3499999940395355,0.356000006198883,0.3519999980926513,0.3519999980926513,0.3540000021457672,0.3459999859333038,0.3479999899864197,0.3519999980926513,0.3519999980926513,0.3420000076293945,0.3440000116825104,0.3379999995231628,0.3519999980926513,0.356000006198883,0.3420000076293945,0.3580000102519989,0.3499999940395355,0.3619999885559082,0.3519999980926513,0.3600000143051147,0.3459999859333038,0.3519999980926513,0.3519999980926513,0.3499999940395355,0.3580000102519989,0.356000006198883,0.3580000102519989,0.3600000143051147,0.3440000116825104,0.3600000143051147,0.3440000116825104,0.3479999899864197,0.3479999899864197,0.3580000102519989,0.3600000143051147,0.3580000102519989,0.3540000021457672,0.3519999980926513,0.3459999859333038,0.3459999859333038,0.3540000021457672,0.335999995470047,0.3540000021457672,0.3540000021457672,0.3519999980926513,0.356000006198883,0.3499999940395355,0.356000006198883],"label":"RefinedWeb"},"SlimPajama":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2540000081062317,0.2560000121593475,0.2919999957084656,0.2840000092983246,0.2739999890327453,0.2960000038146972,0.3019999861717224,0.3140000104904175,0.3100000023841858,0.3120000064373016,0.3339999914169311,0.3160000145435333,0.3240000009536743,0.3260000050067901,0.3240000009536743,0.3120000064373016,0.3120000064373016,0.3400000035762787,0.3379999995231628,0.3240000009536743,0.3059999942779541,0.3300000131130218,0.3160000145435333,0.3260000050067901,0.3300000131130218,0.3379999995231628,0.335999995470047,0.3240000009536743,0.3260000050067901,0.3339999914169311,0.3459999859333038,0.335999995470047,0.3400000035762787,0.3440000116825104,0.3219999969005584,0.3420000076293945,0.335999995470047,0.3440000116825104,0.328000009059906,0.3379999995231628,0.3420000076293945,0.3379999995231628,0.3300000131130218,0.3499999940395355,0.3459999859333038,0.3400000035762787,0.3300000131130218,0.3499999940395355,0.335999995470047,0.3400000035762787,0.3379999995231628,0.3260000050067901,0.335999995470047,0.328000009059906,0.335999995470047,0.3379999995231628,0.3420000076293945,0.335999995470047,0.3519999980926513,0.3400000035762787,0.3319999873638153,0.3499999940395355,0.3400000035762787,0.3240000009536743,0.3300000131130218,0.3479999899864197,0.3319999873638153,0.3499999940395355,0.3400000035762787,0.3499999940395355,0.328000009059906,0.3339999914169311,0.3339999914169311,0.3379999995231628,0.328000009059906,0.328000009059906,0.335999995470047,0.3260000050067901,0.3420000076293945,0.335999995470047,0.335999995470047,0.3420000076293945,0.3400000035762787,0.3260000050067901,0.3339999914169311,0.3400000035762787,0.328000009059906,0.3319999873638153,0.3479999899864197,0.3339999914169311,0.3339999914169311,0.3519999980926513,0.3440000116825104,0.3519999980926513,0.3420000076293945,0.328000009059906,0.3400000035762787,0.3440000116825104,0.3400000035762787,0.3499999940395355,0.3479999899864197,0.3499999940395355,0.3400000035762787,0.3420000076293945,0.3459999859333038,0.3420000076293945,0.3300000131130218,0.3440000116825104,0.3499999940395355,0.356000006198883,0.3519999980926513,0.3600000143051147,0.3400000035762787,0.3400000035762787,0.3339999914169311,0.3540000021457672,0.3339999914169311,0.3339999914169311,0.3540000021457672,0.3499999940395355,0.3479999899864197,0.3339999914169311,0.3420000076293945,0.3339999914169311,0.3420000076293945,0.3440000116825104,0.3600000143051147,0.3499999940395355,0.3619999885559082,0.3580000102519989,0.3479999899864197,0.356000006198883,0.3420000076293945,0.3519999980926513,0.3519999980926513,0.3600000143051147,0.3459999859333038,0.3459999859333038,0.356000006198883,0.3459999859333038,0.3540000021457672,0.3499999940395355,0.3499999940395355,0.3540000021457672,0.356000006198883,0.3580000102519989,0.356000006198883,0.3519999980926513,0.356000006198883,0.363999992609024,0.3499999940395355,0.3519999980926513,0.3479999899864197,0.3499999940395355,0.3540000021457672,0.3479999899864197,0.335999995470047,0.3519999980926513,0.3479999899864197,0.3479999899864197,0.3459999859333038,0.3499999940395355,0.3420000076293945,0.3540000021457672,0.3479999899864197,0.3479999899864197,0.3459999859333038],"label":"SlimPajama"},"The Pile":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2759999930858612,0.2759999930858612,0.2720000147819519,0.2840000092983246,0.3019999861717224,0.2919999957084656,0.2980000078678131,0.2960000038146972,0.2899999916553497,0.2980000078678131,0.2939999997615814,0.3059999942779541,0.3199999928474426,0.3120000064373016,0.3140000104904175,0.3140000104904175,0.3019999861717224,0.3079999983310699,0.3100000023841858,0.3120000064373016,0.3120000064373016,0.3179999887943268,0.3260000050067901,0.3160000145435333,0.3319999873638153,0.3240000009536743,0.3379999995231628,0.3339999914169311,0.3300000131130218,0.3400000035762787,0.3240000009536743,0.3039999902248382,0.335999995470047,0.3219999969005584,0.3120000064373016,0.328000009059906,0.3140000104904175,0.3179999887943268,0.3240000009536743,0.3120000064373016,0.3379999995231628,0.3039999902248382,0.328000009059906,0.3260000050067901,0.3100000023841858,0.3339999914169311,0.328000009059906,0.3260000050067901,0.3319999873638153,0.328000009059906,0.328000009059906,0.3300000131130218,0.3440000116825104,0.328000009059906,0.3319999873638153,0.3440000116825104,0.3459999859333038,0.3240000009536743,0.3400000035762787,0.328000009059906,0.3499999940395355,0.3440000116825104,0.3499999940395355,0.3580000102519989,0.3420000076293945,0.3319999873638153,0.3499999940395355,0.3379999995231628,0.3479999899864197,0.3400000035762787,0.3319999873638153,0.3379999995231628,0.3339999914169311,0.3420000076293945,0.3420000076293945,0.3479999899864197,0.3499999940395355,0.3400000035762787,0.3479999899864197,0.3519999980926513,0.3379999995231628,0.3459999859333038,0.328000009059906,0.3339999914169311,0.3300000131130218,0.356000006198883,0.356000006198883,0.3540000021457672,0.3420000076293945,0.3499999940395355,0.3659999966621399,0.3519999980926513,0.3519999980926513,0.3479999899864197,0.356000006198883,0.3440000116825104,0.3499999940395355,0.3420000076293945,0.3400000035762787,0.3339999914169311,0.3440000116825104,0.3519999980926513,0.3420000076293945,0.3499999940395355,0.3540000021457672,0.3519999980926513,0.356000006198883,0.3519999980926513,0.3499999940395355,0.3580000102519989,0.3619999885559082,0.3540000021457672,0.3580000102519989,0.3540000021457672,0.3499999940395355,0.356000006198883,0.3540000021457672,0.3580000102519989,0.3499999940395355,0.3440000116825104,0.363999992609024,0.3479999899864197,0.3600000143051147,0.3540000021457672,0.3519999980926513,0.3600000143051147,0.3580000102519989,0.3459999859333038,0.3540000021457672,0.356000006198883,0.3580000102519989,0.3519999980926513,0.3519999980926513,0.3619999885559082,0.3619999885559082,0.356000006198883,0.3519999980926513,0.3600000143051147,0.3459999859333038,0.3499999940395355,0.3619999885559082,0.3519999980926513,0.3519999980926513,0.3580000102519989,0.3479999899864197,0.3479999899864197,0.3479999899864197,0.3540000021457672,0.3580000102519989,0.3499999940395355,0.3479999899864197,0.3499999940395355,0.3580000102519989,0.3600000143051147,0.3580000102519989,0.3659999966621399,0.3580000102519989,0.3540000021457672,0.3619999885559082,0.3540000021457672,0.3519999980926513,0.3600000143051147,0.3659999966621399,0.3479999899864197,0.3519999980926513,0.3459999859333038,0.356000006198883],"label":"The Pile"},"RedPajama2":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2460000067949295,0.2720000147819519,0.2800000011920929,0.2660000026226043,0.2820000052452087,0.2879999876022339,0.2899999916553497,0.2980000078678131,0.2840000092983246,0.3019999861717224,0.3019999861717224,0.3100000023841858,0.2960000038146972,0.3260000050067901,0.3160000145435333,0.3079999983310699,0.3059999942779541,0.3199999928474426,0.3100000023841858,0.3240000009536743,0.3079999983310699,0.3219999969005584,0.3140000104904175,0.3199999928474426,0.3300000131130218,0.3219999969005584,0.3260000050067901,0.3160000145435333,0.3420000076293945,0.3160000145435333,0.3219999969005584,0.3260000050067901,0.328000009059906,0.3219999969005584,0.3160000145435333,0.3219999969005584,0.3240000009536743,0.3540000021457672,0.3319999873638153,0.3240000009536743,0.335999995470047,0.3140000104904175,0.3260000050067901,0.3240000009536743,0.3400000035762787,0.335999995470047,0.3339999914169311,0.3400000035762787,0.3240000009536743,0.3400000035762787,0.3319999873638153,0.3379999995231628,0.3260000050067901,0.328000009059906,0.3199999928474426,0.3219999969005584,0.3140000104904175,0.3420000076293945,0.3339999914169311,0.3420000076293945,0.3339999914169311,0.328000009059906,0.3300000131130218,0.3400000035762787,0.3519999980926513,0.3459999859333038,0.3260000050067901,0.335999995470047,0.335999995470047,0.328000009059906,0.335999995470047,0.3479999899864197,0.3379999995231628,0.3339999914169311,0.3400000035762787,0.3400000035762787,0.3379999995231628,0.3260000050067901,0.3400000035762787,0.3379999995231628,0.3379999995231628,0.3240000009536743,0.3499999940395355,0.335999995470047,0.3379999995231628,0.328000009059906,0.3319999873638153,0.3479999899864197,0.3459999859333038,0.3199999928474426,0.3440000116825104,0.3459999859333038,0.3319999873638153,0.3440000116825104,0.3499999940395355,0.328000009059906,0.3479999899864197,0.3440000116825104,0.3300000131130218,0.3339999914169311,0.3379999995231628,0.328000009059906,0.335999995470047,0.3499999940395355,0.3519999980926513,0.3379999995231628,0.3580000102519989,0.356000006198883,0.3519999980926513,0.3379999995231628,0.3339999914169311,0.3300000131130218,0.328000009059906,0.3519999980926513,0.3540000021457672,0.3420000076293945,0.3459999859333038,0.3440000116825104,0.3440000116825104,0.3420000076293945,0.3540000021457672,0.3459999859333038,0.3319999873638153,0.3339999914169311,0.3440000116825104,0.3379999995231628,0.3379999995231628,0.3379999995231628,0.3499999940395355,0.3400000035762787,0.3479999899864197,0.3400000035762787,0.3459999859333038,0.3459999859333038,0.356000006198883,0.3499999940395355,0.3580000102519989,0.3400000035762787,0.3479999899864197,0.3400000035762787,0.3459999859333038,0.3400000035762787,0.335999995470047,0.3519999980926513,0.3440000116825104,0.3379999995231628,0.3499999940395355,0.3400000035762787,0.3499999940395355,0.3440000116825104,0.3499999940395355,0.3420000076293945,0.335999995470047,0.3479999899864197,0.3420000076293945,0.3519999980926513,0.3459999859333038,0.3479999899864197,0.3400000035762787,0.3479999899864197,0.3479999899864197,0.3479999899864197,0.3499999940395355,0.3519999980926513,0.3479999899864197,0.356000006198883,0.3540000021457672],"label":"RedPajama2"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"range":[0.39,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dataset_ablations/piqa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"FineWeb (ours)":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6190000176429749,0.6549999713897705,0.6769999861717224,0.6899999976158142,0.6869999766349792,0.7149999737739563,0.7179999947547913,0.7179999947547913,0.7319999933242798,0.7390000224113464,0.7350000143051147,0.7480000257492065,0.7440000176429749,0.7409999966621399,0.7440000176429749,0.7580000162124634,0.7419999837875366,0.7440000176429749,0.75,0.734000027179718,0.746999979019165,0.7459999918937683,0.7390000224113464,0.7490000128746033,0.7379999756813049,0.7429999709129333,0.7390000224113464,0.7360000014305115,0.7419999837875366,0.7480000257492065,0.7480000257492065,0.7490000128746033,0.7440000176429749,0.75,0.7540000081062317,0.7490000128746033,0.7549999952316284,0.7429999709129333,0.7540000081062317,0.753000020980835,0.7540000081062317,0.7440000176429749,0.7570000290870667,0.7400000095367432,0.7490000128746033,0.7549999952316284,0.7559999823570251,0.7580000162124634,0.7609999775886536,0.7480000257492065,0.7490000128746033,0.7599999904632568,0.7609999775886536,0.7540000081062317,0.753000020980835,0.7490000128746033,0.7480000257492065,0.7440000176429749,0.7549999952316284,0.7540000081062317,0.7559999823570251,0.7490000128746033,0.7409999966621399,0.7580000162124634,0.75,0.746999979019165,0.7400000095367432,0.7559999823570251,0.7490000128746033,0.7429999709129333,0.7519999742507935,0.7549999952316284,0.7559999823570251,0.753000020980835,0.753000020980835,0.746999979019165,0.746999979019165,0.7559999823570251,0.7549999952316284,0.7549999952316284,0.7570000290870667,0.7599999904632568,0.7599999904632568,0.7549999952316284,0.765999972820282,0.7649999856948853,0.7630000114440918,0.7580000162124634,0.7599999904632568,0.7559999823570251,0.7490000128746033,0.7620000243186951,0.7519999742507935,0.7580000162124634,0.7559999823570251,0.7580000162124634,0.7670000195503235,0.7599999904632568,0.7559999823570251,0.7580000162124634,0.7570000290870667,0.7649999856948853,0.7590000033378601,0.7649999856948853,0.7649999856948853,0.7609999775886536,0.7519999742507935,0.7639999985694885,0.7699999809265137,0.7689999938011169,0.7609999775886536,0.765999972820282,0.7710000276565552,0.7590000033378601,0.7710000276565552,0.7639999985694885,0.7710000276565552,0.7730000019073486,0.7680000066757202,0.7590000033378601,0.7639999985694885,0.7609999775886536,0.7559999823570251,0.7749999761581421,0.7680000066757202,0.7599999904632568,0.7609999775886536,0.7599999904632568,0.7580000162124634,0.7599999904632568,0.7649999856948853,0.765999972820282,0.7580000162124634,0.7739999890327454,0.7739999890327454,0.7739999890327454,0.7620000243186951,0.7749999761581421,0.7699999809265137,0.7670000195503235,0.7720000147819519,0.7739999890327454,0.7739999890327454,0.7649999856948853,0.7710000276565552,0.7649999856948853,0.7699999809265137,0.7760000228881836,0.7730000019073486,0.7699999809265137,0.7739999890327454,0.7720000147819519,0.7670000195503235,0.7720000147819519,0.7749999761581421,0.7699999809265137,0.7689999938011169,0.7639999985694885,0.7760000228881836,0.7670000195503235,0.7670000195503235,0.7689999938011169,0.7760000228881836,0.7670000195503235,0.7649999856948853,0.7720000147819519,0.7609999775886536],"label":"FineWeb (ours)"},"C4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6190000176429749,0.6650000214576721,0.6690000295639038,0.6909999847412109,0.7099999785423279,0.7089999914169312,0.7099999785423279,0.7129999995231628,0.7099999785423279,0.7160000205039978,0.722000002861023,0.7260000109672546,0.7200000286102295,0.7200000286102295,0.7250000238418579,0.7279999852180481,0.7379999756813049,0.7300000190734863,0.7409999966621399,0.7400000095367432,0.7329999804496765,0.7379999756813049,0.7350000143051147,0.734000027179718,0.7350000143051147,0.7519999742507935,0.7599999904632568,0.7379999756813049,0.75,0.7509999871253967,0.7459999918937683,0.746999979019165,0.7440000176429749,0.7480000257492065,0.7429999709129333,0.7429999709129333,0.7450000047683716,0.7570000290870667,0.75,0.7509999871253967,0.7620000243186951,0.7570000290870667,0.7559999823570251,0.7490000128746033,0.7519999742507935,0.753000020980835,0.7599999904632568,0.7549999952316284,0.7540000081062317,0.7590000033378601,0.7509999871253967,0.7549999952316284,0.7480000257492065,0.7580000162124634,0.7490000128746033,0.753000020980835,0.753000020980835,0.7599999904632568,0.7480000257492065,0.7590000033378601,0.7450000047683716,0.7440000176429749,0.746999979019165,0.75,0.7670000195503235,0.7590000033378601,0.7519999742507935,0.7620000243186951,0.7639999985694885,0.7599999904632568,0.7549999952316284,0.7540000081062317,0.753000020980835,0.7609999775886536,0.7699999809265137,0.7630000114440918,0.7620000243186951,0.7570000290870667,0.7670000195503235,0.7599999904632568,0.7599999904632568,0.753000020980835,0.7549999952316284,0.7649999856948853,0.7490000128746033,0.7639999985694885,0.7559999823570251,0.7609999775886536,0.7689999938011169,0.7689999938011169,0.7630000114440918,0.7580000162124634,0.7649999856948853,0.7699999809265137,0.7620000243186951,0.7590000033378601,0.7680000066757202,0.7630000114440918,0.7609999775886536,0.7739999890327454,0.7649999856948853,0.7720000147819519,0.7689999938011169,0.7630000114440918,0.765999972820282,0.7649999856948853,0.7620000243186951,0.7620000243186951,0.7670000195503235,0.7609999775886536,0.7680000066757202,0.7670000195503235,0.7620000243186951,0.7710000276565552,0.7680000066757202,0.765999972820282,0.7670000195503235,0.7739999890327454,0.7630000114440918,0.7730000019073486,0.7699999809265137,0.7620000243186951,0.765999972820282,0.7670000195503235,0.777999997138977,0.7649999856948853,0.7689999938011169,0.7710000276565552,0.7620000243186951,0.7720000147819519,0.7710000276565552,0.7620000243186951,0.7670000195503235,0.7710000276565552,0.7670000195503235,0.7670000195503235,0.7699999809265137,0.7639999985694885,0.7689999938011169,0.7710000276565552,0.7739999890327454,0.7799999713897705,0.7720000147819519,0.7699999809265137,0.7720000147819519,0.7699999809265137,0.777999997138977,0.7760000228881836,0.7760000228881836,0.7739999890327454,0.7739999890327454,0.7689999938011169,0.7689999938011169,0.7699999809265137,0.7730000019073486,0.7699999809265137,0.7689999938011169,0.7699999809265137,0.7730000019073486,0.7699999809265137,0.7770000100135803,0.7820000052452087,0.7639999985694885,0.7730000019073486,0.7749999761581421,0.7760000228881836,0.7710000276565552],"label":"C4"},"Dolma":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6000000238418579,0.6489999890327454,0.671999990940094,0.6710000038146973,0.6890000104904175,0.6949999928474426,0.7049999833106995,0.7009999752044678,0.7080000042915344,0.7099999785423279,0.7080000042915344,0.7110000252723694,0.7089999914169312,0.7200000286102295,0.7210000157356262,0.718999981880188,0.7289999723434448,0.7160000205039978,0.7250000238418579,0.7269999980926514,0.7200000286102295,0.7369999885559082,0.7260000109672546,0.7279999852180481,0.7379999756813049,0.7170000076293945,0.7300000190734863,0.7350000143051147,0.7350000143051147,0.7300000190734863,0.7360000014305115,0.7350000143051147,0.7360000014305115,0.7289999723434448,0.7379999756813049,0.7369999885559082,0.7279999852180481,0.7429999709129333,0.7360000014305115,0.7379999756813049,0.7279999852180481,0.7260000109672546,0.7329999804496765,0.7310000061988831,0.7310000061988831,0.7459999918937683,0.7369999885559082,0.746999979019165,0.7409999966621399,0.7379999756813049,0.7379999756813049,0.7559999823570251,0.7540000081062317,0.734000027179718,0.7429999709129333,0.7409999966621399,0.7440000176429749,0.7409999966621399,0.7570000290870667,0.7490000128746033,0.7450000047683716,0.7570000290870667,0.7490000128746033,0.746999979019165,0.746999979019165,0.7580000162124634,0.7409999966621399,0.7409999966621399,0.7490000128746033,0.7570000290870667,0.7559999823570251,0.7519999742507935,0.7369999885559082,0.7390000224113464,0.7540000081062317,0.7620000243186951,0.7549999952316284,0.7509999871253967,0.7609999775886536,0.7559999823570251,0.7509999871253967,0.7480000257492065,0.7440000176429749,0.7480000257492065,0.7590000033378601,0.7509999871253967,0.75,0.7620000243186951,0.7490000128746033,0.7580000162124634,0.7490000128746033,0.7540000081062317,0.7620000243186951,0.753000020980835,0.7580000162124634,0.753000020980835,0.7559999823570251,0.7590000033378601,0.7639999985694885,0.7549999952316284,0.7509999871253967,0.753000020980835,0.7609999775886536,0.7540000081062317,0.7559999823570251,0.7670000195503235,0.753000020980835,0.7549999952316284,0.7649999856948853,0.7609999775886536,0.7630000114440918,0.7710000276565552,0.7630000114440918,0.7559999823570251,0.7620000243186951,0.7540000081062317,0.753000020980835,0.7689999938011169,0.7630000114440918,0.7590000033378601,0.7559999823570251,0.753000020980835,0.7649999856948853,0.7570000290870667,0.7580000162124634,0.7609999775886536,0.7649999856948853,0.765999972820282,0.753000020980835,0.7620000243186951,0.7609999775886536,0.7639999985694885,0.7609999775886536,0.7630000114440918,0.7649999856948853,0.7639999985694885,0.7609999775886536,0.7649999856948853,0.7559999823570251,0.7599999904632568,0.765999972820282,0.7639999985694885,0.7639999985694885,0.7620000243186951,0.7639999985694885,0.7639999985694885,0.7689999938011169,0.7649999856948853,0.7680000066757202,0.765999972820282,0.7559999823570251,0.7549999952316284,0.765999972820282,0.7689999938011169,0.765999972820282,0.7699999809265137,0.7749999761581421,0.7699999809265137,0.7680000066757202,0.7730000019073486,0.7689999938011169,0.7720000147819519,0.7680000066757202,0.7720000147819519,0.7680000066757202,0.7710000276565552,0.7689999938011169],"label":"Dolma"},"RefinedWeb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6019999980926514,0.652999997138977,0.6710000038146973,0.6740000247955322,0.6899999976158142,0.6919999718666077,0.6909999847412109,0.7070000171661377,0.7089999914169312,0.7129999995231628,0.7229999899864197,0.7120000123977661,0.7200000286102295,0.7279999852180481,0.7369999885559082,0.7390000224113464,0.7350000143051147,0.7319999933242798,0.7279999852180481,0.7269999980926514,0.7459999918937683,0.7400000095367432,0.7390000224113464,0.7319999933242798,0.7390000224113464,0.7379999756813049,0.7390000224113464,0.7360000014305115,0.7440000176429749,0.7400000095367432,0.7360000014305115,0.7480000257492065,0.7360000014305115,0.7440000176429749,0.7459999918937683,0.7409999966621399,0.746999979019165,0.7440000176429749,0.7450000047683716,0.753000020980835,0.7390000224113464,0.7490000128746033,0.7419999837875366,0.7390000224113464,0.7559999823570251,0.7519999742507935,0.7549999952316284,0.7419999837875366,0.7490000128746033,0.7540000081062317,0.7480000257492065,0.7450000047683716,0.7429999709129333,0.7509999871253967,0.7549999952316284,0.7490000128746033,0.7490000128746033,0.7400000095367432,0.753000020980835,0.75,0.7509999871253967,0.7570000290870667,0.7590000033378601,0.7570000290870667,0.7329999804496765,0.7540000081062317,0.746999979019165,0.7409999966621399,0.7590000033378601,0.7509999871253967,0.7570000290870667,0.75,0.7540000081062317,0.7480000257492065,0.7580000162124634,0.7639999985694885,0.7630000114440918,0.7590000033378601,0.7549999952316284,0.7480000257492065,0.7509999871253967,0.7570000290870667,0.75,0.7540000081062317,0.7480000257492065,0.7549999952316284,0.7559999823570251,0.7580000162124634,0.7580000162124634,0.753000020980835,0.7490000128746033,0.7540000081062317,0.7639999985694885,0.7580000162124634,0.7519999742507935,0.7590000033378601,0.75,0.7570000290870667,0.7620000243186951,0.7710000276565552,0.7739999890327454,0.7620000243186951,0.7549999952316284,0.7599999904632568,0.765999972820282,0.7680000066757202,0.7639999985694885,0.7540000081062317,0.7649999856948853,0.7649999856948853,0.7609999775886536,0.7549999952316284,0.765999972820282,0.7639999985694885,0.7580000162124634,0.7710000276565552,0.7570000290870667,0.7630000114440918,0.7580000162124634,0.7599999904632568,0.7649999856948853,0.7670000195503235,0.7699999809265137,0.7710000276565552,0.7559999823570251,0.7609999775886536,0.7620000243186951,0.7620000243186951,0.7609999775886536,0.753000020980835,0.7570000290870667,0.7620000243186951,0.7609999775886536,0.7609999775886536,0.7559999823570251,0.7540000081062317,0.7570000290870667,0.7639999985694885,0.7590000033378601,0.7680000066757202,0.7680000066757202,0.765999972820282,0.765999972820282,0.7670000195503235,0.7739999890327454,0.7649999856948853,0.7749999761581421,0.7699999809265137,0.7639999985694885,0.7680000066757202,0.7630000114440918,0.7680000066757202,0.7699999809265137,0.7739999890327454,0.7749999761581421,0.765999972820282,0.7680000066757202,0.7710000276565552,0.7680000066757202,0.765999972820282,0.7689999938011169,0.7760000228881836,0.7710000276565552,0.7680000066757202,0.7649999856948853,0.7720000147819519,0.7730000019073486],"label":"RefinedWeb"},"SlimPajama":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5049999952316284,0.597000002861023,0.6169999837875366,0.6140000224113464,0.640999972820282,0.6690000295639038,0.6669999957084656,0.6610000133514404,0.6840000152587891,0.6800000071525574,0.6800000071525574,0.6880000233650208,0.6869999766349792,0.6909999847412109,0.6990000009536743,0.6890000104904175,0.7049999833106995,0.7129999995231628,0.7080000042915344,0.7049999833106995,0.7020000219345093,0.703000009059906,0.7049999833106995,0.7139999866485596,0.6990000009536743,0.7110000252723694,0.7129999995231628,0.7160000205039978,0.7229999899864197,0.7160000205039978,0.7089999914169312,0.703000009059906,0.7120000123977661,0.7210000157356262,0.7260000109672546,0.7289999723434448,0.7149999737739563,0.7200000286102295,0.7179999947547913,0.7160000205039978,0.7239999771118164,0.7239999771118164,0.7229999899864197,0.7250000238418579,0.734000027179718,0.7160000205039978,0.7260000109672546,0.7250000238418579,0.7160000205039978,0.7300000190734863,0.7269999980926514,0.7250000238418579,0.7239999771118164,0.7269999980926514,0.734000027179718,0.7210000157356262,0.7300000190734863,0.7229999899864197,0.7239999771118164,0.718999981880188,0.7310000061988831,0.7179999947547913,0.7390000224113464,0.7350000143051147,0.7360000014305115,0.7379999756813049,0.7289999723434448,0.7379999756813049,0.718999981880188,0.7459999918937683,0.7300000190734863,0.7179999947547913,0.7379999756813049,0.7310000061988831,0.7269999980926514,0.7329999804496765,0.7229999899864197,0.7229999899864197,0.7210000157356262,0.7350000143051147,0.722000002861023,0.722000002861023,0.7260000109672546,0.7239999771118164,0.7229999899864197,0.7379999756813049,0.7239999771118164,0.7369999885559082,0.7379999756813049,0.7300000190734863,0.7319999933242798,0.7360000014305115,0.7300000190734863,0.7490000128746033,0.7319999933242798,0.7289999723434448,0.722000002861023,0.7300000190734863,0.7269999980926514,0.7329999804496765,0.7379999756813049,0.7329999804496765,0.7310000061988831,0.7310000061988831,0.7319999933242798,0.7310000061988831,0.7310000061988831,0.718999981880188,0.7350000143051147,0.7319999933242798,0.7310000061988831,0.7319999933242798,0.7250000238418579,0.7269999980926514,0.7319999933242798,0.734000027179718,0.7379999756813049,0.7310000061988831,0.7300000190734863,0.7409999966621399,0.7279999852180481,0.7269999980926514,0.722000002861023,0.7360000014305115,0.7390000224113464,0.7319999933242798,0.7300000190734863,0.7350000143051147,0.7409999966621399,0.7300000190734863,0.7329999804496765,0.7360000014305115,0.7429999709129333,0.7279999852180481,0.7269999980926514,0.7400000095367432,0.7400000095367432,0.7429999709129333,0.7310000061988831,0.7350000143051147,0.734000027179718,0.7360000014305115,0.7450000047683716,0.7440000176429749,0.7429999709129333,0.7419999837875366,0.7540000081062317,0.7409999966621399,0.7429999709129333,0.7549999952316284,0.7519999742507935,0.7429999709129333,0.7540000081062317,0.7480000257492065,0.7480000257492065,0.75,0.7509999871253967,0.746999979019165,0.7490000128746033,0.7480000257492065,0.7480000257492065,0.753000020980835,0.7490000128746033,0.7419999837875366,0.7360000014305115,0.7540000081062317,0.7490000128746033,0.7570000290870667],"label":"SlimPajama"},"The Pile":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5049999952316284,0.5809999704360962,0.6010000109672546,0.6190000176429749,0.6259999871253967,0.6439999938011169,0.6449999809265137,0.6420000195503235,0.6570000052452087,0.6520000100135803,0.6579999923706055,0.6629999876022339,0.6589999794960022,0.6690000295639038,0.6570000052452087,0.6759999990463257,0.671999990940094,0.6589999794960022,0.6690000295639038,0.6669999957084656,0.675000011920929,0.6740000247955322,0.675000011920929,0.6830000281333923,0.6669999957084656,0.6669999957084656,0.6809999942779541,0.6819999814033508,0.6710000038146973,0.6859999895095825,0.6830000281333923,0.6759999990463257,0.6830000281333923,0.6850000023841858,0.6899999976158142,0.6909999847412109,0.6880000233650208,0.6909999847412109,0.6840000152587891,0.6899999976158142,0.6959999799728394,0.6890000104904175,0.6869999766349792,0.6919999718666077,0.6949999928474426,0.699999988079071,0.7039999961853027,0.6880000233650208,0.699999988079071,0.6990000009536743,0.699999988079071,0.6919999718666077,0.6970000267028809,0.6930000185966492,0.6859999895095825,0.6970000267028809,0.7049999833106995,0.6980000138282776,0.699999988079071,0.6990000009536743,0.6919999718666077,0.6970000267028809,0.6990000009536743,0.7089999914169312,0.7120000123977661,0.7020000219345093,0.6949999928474426,0.7020000219345093,0.699999988079071,0.6990000009536743,0.6940000057220459,0.7059999704360962,0.6959999799728394,0.7170000076293945,0.7059999704360962,0.7170000076293945,0.7099999785423279,0.7120000123977661,0.703000009059906,0.7080000042915344,0.7120000123977661,0.7080000042915344,0.7059999704360962,0.7099999785423279,0.703000009059906,0.7009999752044678,0.7129999995231628,0.703000009059906,0.7110000252723694,0.7139999866485596,0.7160000205039978,0.7110000252723694,0.7120000123977661,0.7099999785423279,0.7170000076293945,0.7179999947547913,0.7200000286102295,0.7200000286102295,0.7139999866485596,0.7239999771118164,0.7179999947547913,0.722000002861023,0.7200000286102295,0.7229999899864197,0.7250000238418579,0.7210000157356262,0.7200000286102295,0.7160000205039978,0.7269999980926514,0.7179999947547913,0.7200000286102295,0.7200000286102295,0.7170000076293945,0.7039999961853027,0.7179999947547913,0.7179999947547913,0.7200000286102295,0.7200000286102295,0.7160000205039978,0.7179999947547913,0.7279999852180481,0.722000002861023,0.7210000157356262,0.722000002861023,0.7149999737739563,0.7260000109672546,0.7229999899864197,0.7229999899864197,0.7279999852180481,0.7360000014305115,0.7200000286102295,0.7170000076293945,0.7200000286102295,0.7210000157356262,0.7210000157356262,0.7229999899864197,0.7269999980926514,0.7279999852180481,0.7250000238418579,0.7210000157356262,0.7279999852180481,0.7239999771118164,0.7300000190734863,0.7229999899864197,0.7250000238418579,0.7200000286102295,0.7300000190734863,0.722000002861023,0.7260000109672546,0.7300000190734863,0.7229999899864197,0.722000002861023,0.7210000157356262,0.7239999771118164,0.7269999980926514,0.7210000157356262,0.7269999980926514,0.734000027179718,0.7350000143051147,0.7289999723434448,0.7300000190734863,0.7269999980926514,0.7269999980926514,0.7210000157356262,0.7229999899864197,0.7310000061988831,0.7239999771118164,0.7200000286102295],"label":"The Pile"},"RedPajama2":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5040000081062317,0.5799999833106995,0.6079999804496765,0.625,0.6389999985694885,0.6510000228881836,0.6610000133514404,0.6579999923706055,0.6669999957084656,0.6809999942779541,0.6830000281333923,0.6769999861717224,0.6800000071525574,0.6840000152587891,0.6890000104904175,0.6919999718666077,0.6890000104904175,0.6859999895095825,0.6919999718666077,0.699999988079071,0.6909999847412109,0.6980000138282776,0.699999988079071,0.6940000057220459,0.6949999928474426,0.6940000057220459,0.6890000104904175,0.6859999895095825,0.6930000185966492,0.699999988079071,0.6970000267028809,0.7039999961853027,0.7039999961853027,0.6990000009536743,0.7070000171661377,0.7120000123977661,0.7239999771118164,0.7129999995231628,0.7020000219345093,0.703000009059906,0.7039999961853027,0.7009999752044678,0.7170000076293945,0.7129999995231628,0.7049999833106995,0.7139999866485596,0.7080000042915344,0.722000002861023,0.6970000267028809,0.7210000157356262,0.7099999785423279,0.7129999995231628,0.7020000219345093,0.718999981880188,0.7110000252723694,0.7239999771118164,0.7129999995231628,0.7300000190734863,0.722000002861023,0.7110000252723694,0.7170000076293945,0.7279999852180481,0.7279999852180481,0.7210000157356262,0.7110000252723694,0.7210000157356262,0.7200000286102295,0.7129999995231628,0.7170000076293945,0.7170000076293945,0.7179999947547913,0.7229999899864197,0.7229999899864197,0.7149999737739563,0.7129999995231628,0.7310000061988831,0.7160000205039978,0.7200000286102295,0.7250000238418579,0.7239999771118164,0.7250000238418579,0.7160000205039978,0.7279999852180481,0.7229999899864197,0.7170000076293945,0.7229999899864197,0.7329999804496765,0.7289999723434448,0.734000027179718,0.7250000238418579,0.7269999980926514,0.7120000123977661,0.7239999771118164,0.7229999899864197,0.7120000123977661,0.7289999723434448,0.7269999980926514,0.7229999899864197,0.7289999723434448,0.7200000286102295,0.7239999771118164,0.7289999723434448,0.7260000109672546,0.7289999723434448,0.7149999737739563,0.7210000157356262,0.7279999852180481,0.7300000190734863,0.7329999804496765,0.7300000190734863,0.7279999852180481,0.7300000190734863,0.7319999933242798,0.7279999852180481,0.7279999852180481,0.7250000238418579,0.7310000061988831,0.7390000224113464,0.7319999933242798,0.7310000061988831,0.722000002861023,0.7319999933242798,0.7300000190734863,0.7350000143051147,0.7379999756813049,0.7279999852180481,0.7310000061988831,0.7269999980926514,0.734000027179718,0.7250000238418579,0.7269999980926514,0.7279999852180481,0.7250000238418579,0.734000027179718,0.7289999723434448,0.7350000143051147,0.7350000143051147,0.7300000190734863,0.7250000238418579,0.7279999852180481,0.7210000157356262,0.7260000109672546,0.7329999804496765,0.7239999771118164,0.722000002861023,0.7260000109672546,0.7319999933242798,0.7300000190734863,0.7310000061988831,0.7279999852180481,0.7350000143051147,0.7279999852180481,0.7300000190734863,0.7350000143051147,0.7200000286102295,0.7319999933242798,0.7279999852180481,0.722000002861023,0.7200000286102295,0.7179999947547913,0.7260000109672546,0.7300000190734863,0.7260000109672546,0.7289999723434448,0.7210000157356262,0.7269999980926514,0.7260000109672546,0.7310000061988831],"label":"RedPajama2"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"range":[0.39,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dataset_ablations/winogrande_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"FineWeb (ours)":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4760000109672546,0.4979999959468841,0.503000020980835,0.531000018119812,0.515999972820282,0.5220000147819519,0.5210000276565552,0.5260000228881836,0.5289999842643738,0.5249999761581421,0.5239999890327454,0.5189999938011169,0.5260000228881836,0.5139999985694885,0.5299999713897705,0.5370000004768372,0.5350000262260437,0.5329999923706055,0.531000018119812,0.5299999713897705,0.550000011920929,0.5329999923706055,0.5260000228881836,0.5320000052452087,0.5339999794960022,0.5429999828338623,0.5440000295639038,0.5379999876022339,0.5509999990463257,0.5529999732971191,0.5440000295639038,0.5479999780654907,0.5419999957084656,0.5339999794960022,0.5440000295639038,0.5419999957084656,0.5370000004768372,0.5289999842643738,0.5220000147819519,0.5429999828338623,0.5519999861717224,0.5419999957084656,0.5370000004768372,0.546999990940094,0.5509999990463257,0.5509999990463257,0.5460000038146973,0.5519999861717224,0.5429999828338623,0.5419999957084656,0.5379999876022339,0.5450000166893005,0.5440000295639038,0.5440000295639038,0.5239999890327454,0.5450000166893005,0.550000011920929,0.5550000071525574,0.5429999828338623,0.5540000200271606,0.5410000085830688,0.5429999828338623,0.5550000071525574,0.5509999990463257,0.5460000038146973,0.550000011920929,0.546999990940094,0.5429999828338623,0.5299999713897705,0.550000011920929,0.5550000071525574,0.5440000295639038,0.5410000085830688,0.5450000166893005,0.550000011920929,0.546999990940094,0.5519999861717224,0.5529999732971191,0.550000011920929,0.5519999861717224,0.5540000200271606,0.5379999876022339,0.5590000152587891,0.5440000295639038,0.5540000200271606,0.5540000200271606,0.5429999828338623,0.5450000166893005,0.5440000295639038,0.5519999861717224,0.546999990940094,0.5519999861717224,0.5559999942779541,0.5659999847412109,0.5649999976158142,0.5600000023841858,0.5569999814033508,0.5550000071525574,0.5630000233650208,0.5559999942779541,0.5669999718666077,0.5550000071525574,0.5609999895095825,0.5580000281333923,0.5699999928474426,0.5580000281333923,0.5490000247955322,0.5619999766349792,0.5609999895095825,0.5529999732971191,0.5490000247955322,0.5540000200271606,0.5590000152587891,0.5600000023841858,0.5509999990463257,0.5569999814033508,0.5509999990463257,0.5580000281333923,0.5580000281333923,0.5580000281333923,0.5619999766349792,0.5649999976158142,0.5540000200271606,0.5619999766349792,0.5659999847412109,0.5759999752044678,0.5709999799728394,0.5550000071525574,0.5659999847412109,0.5659999847412109,0.5680000185966492,0.5669999718666077,0.5600000023841858,0.5619999766349792,0.5640000104904175,0.5580000281333923,0.5580000281333923,0.5540000200271606,0.5789999961853027,0.5600000023841858,0.5509999990463257,0.5690000057220459,0.5709999799728394,0.5669999718666077,0.5600000023841858,0.5619999766349792,0.5600000023841858,0.5590000152587891,0.5600000023841858,0.5690000057220459,0.5690000057220459,0.5789999961853027,0.5669999718666077,0.5690000057220459,0.5649999976158142,0.5690000057220459,0.5699999928474426,0.5669999718666077,0.5649999976158142,0.5630000233650208,0.5559999942779541,0.5669999718666077,0.5669999718666077,0.5720000267028809,0.5690000057220459,0.5830000042915344,0.5640000104904175],"label":"FineWeb (ours)"},"C4":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4760000109672546,0.5,0.4959999918937683,0.503000020980835,0.5199999809265137,0.515999972820282,0.5149999856948853,0.527999997138977,0.5170000195503235,0.5239999890327454,0.5199999809265137,0.5189999938011169,0.5189999938011169,0.5239999890327454,0.5149999856948853,0.5130000114440918,0.5210000276565552,0.5180000066757202,0.5149999856948853,0.5149999856948853,0.5220000147819519,0.5299999713897705,0.5149999856948853,0.5220000147819519,0.527999997138977,0.5419999957084656,0.5350000262260437,0.5260000228881836,0.5189999938011169,0.5230000019073486,0.5400000214576721,0.5289999842643738,0.5299999713897705,0.5299999713897705,0.5429999828338623,0.5289999842643738,0.5389999747276306,0.531000018119812,0.5440000295639038,0.5479999780654907,0.5540000200271606,0.5320000052452087,0.550000011920929,0.5350000262260437,0.5379999876022339,0.5529999732971191,0.5299999713897705,0.5419999957084656,0.5360000133514404,0.5490000247955322,0.5450000166893005,0.5440000295639038,0.5329999923706055,0.5350000262260437,0.5370000004768372,0.5379999876022339,0.5299999713897705,0.5320000052452087,0.5379999876022339,0.5400000214576721,0.5479999780654907,0.546999990940094,0.5339999794960022,0.546999990940094,0.5440000295639038,0.5320000052452087,0.5350000262260437,0.5379999876022339,0.531000018119812,0.5450000166893005,0.5360000133514404,0.5429999828338623,0.5450000166893005,0.5400000214576721,0.5609999895095825,0.5440000295639038,0.5450000166893005,0.5389999747276306,0.5419999957084656,0.5429999828338623,0.5529999732971191,0.5429999828338623,0.5630000233650208,0.5479999780654907,0.5509999990463257,0.5559999942779541,0.5569999814033508,0.5619999766349792,0.550000011920929,0.550000011920929,0.5479999780654907,0.5519999861717224,0.5569999814033508,0.5630000233650208,0.5479999780654907,0.5419999957084656,0.5569999814033508,0.550000011920929,0.5479999780654907,0.5490000247955322,0.5540000200271606,0.5720000267028809,0.5580000281333923,0.550000011920929,0.5590000152587891,0.5460000038146973,0.5460000038146973,0.5580000281333923,0.5649999976158142,0.5519999861717224,0.5540000200271606,0.5529999732971191,0.5460000038146973,0.5509999990463257,0.5550000071525574,0.550000011920929,0.5540000200271606,0.5519999861717224,0.5450000166893005,0.5519999861717224,0.5529999732971191,0.5649999976158142,0.5640000104904175,0.5429999828338623,0.5580000281333923,0.5590000152587891,0.5550000071525574,0.5609999895095825,0.5590000152587891,0.5609999895095825,0.5569999814033508,0.5580000281333923,0.5529999732971191,0.5619999766349792,0.5569999814033508,0.5659999847412109,0.5540000200271606,0.546999990940094,0.5569999814033508,0.5450000166893005,0.5519999861717224,0.546999990940094,0.5529999732971191,0.5559999942779541,0.5640000104904175,0.5580000281333923,0.5619999766349792,0.5559999942779541,0.5519999861717224,0.5609999895095825,0.5580000281333923,0.5590000152587891,0.5529999732971191,0.5519999861717224,0.5590000152587891,0.5529999732971191,0.5609999895095825,0.5600000023841858,0.5709999799728394,0.5540000200271606,0.5569999814033508,0.5600000023841858,0.5649999976158142,0.5580000281333923,0.5630000233650208,0.5619999766349792,0.5609999895095825],"label":"C4"},"Dolma":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4830000102519989,0.4909999966621399,0.5,0.5049999952316284,0.5009999871253967,0.5059999823570251,0.5189999938011169,0.5289999842643738,0.5149999856948853,0.5019999742507935,0.5130000114440918,0.5260000228881836,0.5139999985694885,0.531000018119812,0.5210000276565552,0.5189999938011169,0.5239999890327454,0.5379999876022339,0.5149999856948853,0.5239999890327454,0.5120000243186951,0.5289999842643738,0.5260000228881836,0.5370000004768372,0.5289999842643738,0.5199999809265137,0.5249999761581421,0.5400000214576721,0.5210000276565552,0.5389999747276306,0.5239999890327454,0.5320000052452087,0.5239999890327454,0.5320000052452087,0.5379999876022339,0.5189999938011169,0.5389999747276306,0.5379999876022339,0.5220000147819519,0.5220000147819519,0.5360000133514404,0.531000018119812,0.527999997138977,0.5180000066757202,0.5339999794960022,0.5329999923706055,0.5400000214576721,0.5260000228881836,0.515999972820282,0.5289999842643738,0.5519999861717224,0.5370000004768372,0.546999990940094,0.531000018119812,0.5329999923706055,0.5400000214576721,0.5389999747276306,0.527999997138977,0.5389999747276306,0.5329999923706055,0.5429999828338623,0.5329999923706055,0.5360000133514404,0.5249999761581421,0.531000018119812,0.5320000052452087,0.5350000262260437,0.5350000262260437,0.5429999828338623,0.5519999861717224,0.5440000295639038,0.5360000133514404,0.5490000247955322,0.5350000262260437,0.5329999923706055,0.5389999747276306,0.546999990940094,0.5460000038146973,0.5389999747276306,0.5410000085830688,0.5440000295639038,0.5389999747276306,0.5329999923706055,0.5339999794960022,0.5320000052452087,0.5320000052452087,0.5479999780654907,0.550000011920929,0.5490000247955322,0.5400000214576721,0.531000018119812,0.546999990940094,0.5389999747276306,0.5460000038146973,0.5400000214576721,0.5479999780654907,0.5429999828338623,0.5419999957084656,0.5389999747276306,0.550000011920929,0.546999990940094,0.5460000038146973,0.5329999923706055,0.5400000214576721,0.5419999957084656,0.5460000038146973,0.546999990940094,0.5450000166893005,0.5320000052452087,0.5479999780654907,0.5419999957084656,0.5440000295639038,0.5540000200271606,0.5440000295639038,0.5490000247955322,0.5440000295639038,0.5350000262260437,0.5569999814033508,0.5440000295639038,0.5519999861717224,0.546999990940094,0.5509999990463257,0.5519999861717224,0.5440000295639038,0.5440000295639038,0.5410000085830688,0.5559999942779541,0.5429999828338623,0.5490000247955322,0.5440000295639038,0.5440000295639038,0.5429999828338623,0.5529999732971191,0.5410000085830688,0.5440000295639038,0.5440000295639038,0.5429999828338623,0.5460000038146973,0.5460000038146973,0.546999990940094,0.550000011920929,0.5519999861717224,0.5379999876022339,0.5419999957084656,0.5450000166893005,0.546999990940094,0.5519999861717224,0.5410000085830688,0.5590000152587891,0.5519999861717224,0.5460000038146973,0.5419999957084656,0.5509999990463257,0.5450000166893005,0.5600000023841858,0.5400000214576721,0.5339999794960022,0.5490000247955322,0.5540000200271606,0.5479999780654907,0.5529999732971191,0.5509999990463257,0.5540000200271606,0.5479999780654907,0.5580000281333923,0.5509999990463257,0.5550000071525574],"label":"Dolma"},"RefinedWeb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.5,0.4979999959468841,0.4950000047683716,0.4950000047683716,0.5049999952316284,0.5329999923706055,0.5220000147819519,0.5139999985694885,0.5339999794960022,0.5130000114440918,0.5389999747276306,0.5400000214576721,0.5270000100135803,0.5260000228881836,0.5370000004768372,0.527999997138977,0.5289999842643738,0.5339999794960022,0.5270000100135803,0.531000018119812,0.527999997138977,0.5400000214576721,0.5479999780654907,0.550000011920929,0.5400000214576721,0.5350000262260437,0.5410000085830688,0.5379999876022339,0.5299999713897705,0.5490000247955322,0.5509999990463257,0.5519999861717224,0.5429999828338623,0.5429999828338623,0.5440000295639038,0.5379999876022339,0.5379999876022339,0.5419999957084656,0.5609999895095825,0.5540000200271606,0.5370000004768372,0.5440000295639038,0.5410000085830688,0.5379999876022339,0.5329999923706055,0.5419999957084656,0.5419999957084656,0.5519999861717224,0.550000011920929,0.5509999990463257,0.5400000214576721,0.5450000166893005,0.5509999990463257,0.5569999814033508,0.5550000071525574,0.5590000152587891,0.5479999780654907,0.5550000071525574,0.5440000295639038,0.5460000038146973,0.546999990940094,0.5559999942779541,0.5550000071525574,0.5490000247955322,0.5440000295639038,0.546999990940094,0.5450000166893005,0.546999990940094,0.5649999976158142,0.5490000247955322,0.5519999861717224,0.550000011920929,0.5509999990463257,0.5519999861717224,0.5519999861717224,0.5529999732971191,0.5490000247955322,0.546999990940094,0.550000011920929,0.5720000267028809,0.5619999766349792,0.5490000247955322,0.5680000185966492,0.5519999861717224,0.5569999814033508,0.5509999990463257,0.5619999766349792,0.5630000233650208,0.5529999732971191,0.5619999766349792,0.5609999895095825,0.550000011920929,0.5479999780654907,0.5529999732971191,0.5519999861717224,0.5580000281333923,0.5590000152587891,0.5529999732971191,0.550000011920929,0.5680000185966492,0.5580000281333923,0.5630000233650208,0.5630000233650208,0.5559999942779541,0.5649999976158142,0.5569999814033508,0.5649999976158142,0.5659999847412109,0.5559999942779541,0.5659999847412109,0.5630000233650208,0.5509999990463257,0.5669999718666077,0.5669999718666077,0.5479999780654907,0.5540000200271606,0.5580000281333923,0.5519999861717224,0.5590000152587891,0.5590000152587891,0.5619999766349792,0.5509999990463257,0.546999990940094,0.5609999895095825,0.5540000200271606,0.5630000233650208,0.5580000281333923,0.5559999942779541,0.5680000185966492,0.5649999976158142,0.5619999766349792,0.5580000281333923,0.5630000233650208,0.5559999942779541,0.5540000200271606,0.5540000200271606,0.5569999814033508,0.5619999766349792,0.5559999942779541,0.5600000023841858,0.5460000038146973,0.5429999828338623,0.5580000281333923,0.5550000071525574,0.5580000281333923,0.5540000200271606,0.5609999895095825,0.5519999861717224,0.550000011920929,0.5519999861717224,0.5590000152587891,0.5619999766349792,0.5600000023841858,0.5590000152587891,0.5690000057220459,0.5640000104904175,0.5580000281333923,0.5559999942779541,0.5569999814033508,0.5569999814033508,0.5540000200271606,0.5640000104904175,0.5600000023841858,0.5550000071525574,0.5640000104904175,0.5600000023841858,0.5540000200271606],"label":"RefinedWeb"},"SlimPajama":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4979999959468841,0.5040000081062317,0.4979999959468841,0.5,0.4950000047683716,0.4970000088214874,0.503000020980835,0.5139999985694885,0.5120000243186951,0.5120000243186951,0.5040000081062317,0.5099999904632568,0.5189999938011169,0.5149999856948853,0.5270000100135803,0.5239999890327454,0.5130000114440918,0.5130000114440918,0.5139999985694885,0.5099999904632568,0.5090000033378601,0.5139999985694885,0.5170000195503235,0.5180000066757202,0.5220000147819519,0.515999972820282,0.5149999856948853,0.5220000147819519,0.5490000247955322,0.5149999856948853,0.5220000147819519,0.5139999985694885,0.5120000243186951,0.5210000276565552,0.5230000019073486,0.5120000243186951,0.5230000019073486,0.5249999761581421,0.5139999985694885,0.5230000019073486,0.5239999890327454,0.5289999842643738,0.527999997138977,0.5440000295639038,0.5289999842643738,0.5270000100135803,0.5400000214576721,0.5410000085830688,0.5249999761581421,0.5270000100135803,0.5389999747276306,0.5260000228881836,0.5339999794960022,0.5270000100135803,0.531000018119812,0.5339999794960022,0.527999997138977,0.531000018119812,0.5249999761581421,0.5299999713897705,0.5299999713897705,0.5289999842643738,0.5329999923706055,0.5479999780654907,0.5389999747276306,0.5339999794960022,0.5460000038146973,0.5350000262260437,0.5350000262260437,0.5329999923706055,0.5400000214576721,0.5320000052452087,0.5460000038146973,0.5339999794960022,0.5389999747276306,0.5410000085830688,0.5410000085830688,0.5429999828338623,0.5429999828338623,0.5440000295639038,0.5339999794960022,0.5289999842643738,0.5360000133514404,0.5479999780654907,0.5419999957084656,0.5370000004768372,0.5479999780654907,0.5419999957084656,0.5379999876022339,0.531000018119812,0.5460000038146973,0.5419999957084656,0.5450000166893005,0.5440000295639038,0.5320000052452087,0.5360000133514404,0.5400000214576721,0.5479999780654907,0.5559999942779541,0.5519999861717224,0.5460000038146973,0.5379999876022339,0.5490000247955322,0.5379999876022339,0.5440000295639038,0.5419999957084656,0.5299999713897705,0.5400000214576721,0.5389999747276306,0.5419999957084656,0.5479999780654907,0.5550000071525574,0.5429999828338623,0.5360000133514404,0.5389999747276306,0.5370000004768372,0.5299999713897705,0.5419999957084656,0.5329999923706055,0.5379999876022339,0.5410000085830688,0.5400000214576721,0.5370000004768372,0.5370000004768372,0.5490000247955322,0.5419999957084656,0.5370000004768372,0.5460000038146973,0.5479999780654907,0.5580000281333923,0.550000011920929,0.5619999766349792,0.5479999780654907,0.5519999861717224,0.5519999861717224,0.5569999814033508,0.5389999747276306,0.5529999732971191,0.5509999990463257,0.5429999828338623,0.5609999895095825,0.5509999990463257,0.550000011920929,0.5479999780654907,0.5569999814033508,0.5559999942779541,0.5429999828338623,0.5540000200271606,0.550000011920929,0.5429999828338623,0.5440000295639038,0.5410000085830688,0.5529999732971191,0.5479999780654907,0.5479999780654907,0.5490000247955322,0.550000011920929,0.5509999990463257,0.5540000200271606,0.5479999780654907,0.5410000085830688,0.5410000085830688,0.5400000214576721,0.5519999861717224,0.546999990940094,0.5509999990463257,0.5479999780654907],"label":"SlimPajama"},"The Pile":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4950000047683716,0.4850000143051147,0.4819999933242798,0.4950000047683716,0.4970000088214874,0.4900000095367431,0.5080000162124634,0.5149999856948853,0.5149999856948853,0.5130000114440918,0.5019999742507935,0.5109999775886536,0.5289999842643738,0.5120000243186951,0.5090000033378601,0.5239999890327454,0.531000018119812,0.5170000195503235,0.5180000066757202,0.5230000019073486,0.5289999842643738,0.5289999842643738,0.515999972820282,0.5210000276565552,0.5149999856948853,0.5170000195503235,0.5239999890327454,0.5180000066757202,0.5189999938011169,0.5270000100135803,0.5199999809265137,0.5130000114440918,0.5170000195503235,0.5350000262260437,0.5320000052452087,0.5230000019073486,0.5239999890327454,0.5189999938011169,0.5210000276565552,0.5419999957084656,0.5180000066757202,0.5450000166893005,0.5410000085830688,0.5320000052452087,0.5289999842643738,0.5199999809265137,0.5270000100135803,0.5270000100135803,0.531000018119812,0.527999997138977,0.5370000004768372,0.5329999923706055,0.527999997138977,0.5329999923706055,0.5360000133514404,0.5429999828338623,0.5339999794960022,0.5350000262260437,0.5419999957084656,0.5329999923706055,0.531000018119812,0.5379999876022339,0.5360000133514404,0.5339999794960022,0.5350000262260437,0.5370000004768372,0.5379999876022339,0.5529999732971191,0.5370000004768372,0.5559999942779541,0.5429999828338623,0.5360000133514404,0.5479999780654907,0.5320000052452087,0.5270000100135803,0.5419999957084656,0.5400000214576721,0.5370000004768372,0.5339999794960022,0.5400000214576721,0.5550000071525574,0.5379999876022339,0.546999990940094,0.5400000214576721,0.5429999828338623,0.5440000295639038,0.5450000166893005,0.5490000247955322,0.5370000004768372,0.5429999828338623,0.5559999942779541,0.5410000085830688,0.5440000295639038,0.5419999957084656,0.5479999780654907,0.5389999747276306,0.5419999957084656,0.5569999814033508,0.5389999747276306,0.5350000262260437,0.5410000085830688,0.5659999847412109,0.5419999957084656,0.5360000133514404,0.5440000295639038,0.5490000247955322,0.5350000262260437,0.5590000152587891,0.5389999747276306,0.5400000214576721,0.527999997138977,0.5440000295639038,0.5519999861717224,0.550000011920929,0.5509999990463257,0.5429999828338623,0.5450000166893005,0.5440000295639038,0.5540000200271606,0.5440000295639038,0.5479999780654907,0.5540000200271606,0.5479999780654907,0.5410000085830688,0.5460000038146973,0.5460000038146973,0.5429999828338623,0.5389999747276306,0.5519999861717224,0.5519999861717224,0.5590000152587891,0.5460000038146973,0.5540000200271606,0.5490000247955322,0.5590000152587891,0.5640000104904175,0.5569999814033508,0.546999990940094,0.550000011920929,0.5580000281333923,0.5600000023841858,0.5479999780654907,0.5490000247955322,0.5580000281333923,0.5490000247955322,0.5590000152587891,0.5540000200271606,0.5429999828338623,0.5479999780654907,0.5590000152587891,0.546999990940094,0.5490000247955322,0.5509999990463257,0.5559999942779541,0.5609999895095825,0.5609999895095825,0.5519999861717224,0.5600000023841858,0.5630000233650208,0.5550000071525574,0.5580000281333923,0.5649999976158142,0.5600000023841858,0.5669999718666077,0.5559999942779541,0.5580000281333923,0.5590000152587891],"label":"The Pile"},"RedPajama2":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4950000047683716,0.4860000014305115,0.492000013589859,0.5019999742507935,0.4990000128746032,0.5070000290870667,0.5139999985694885,0.4909999966621399,0.515999972820282,0.5199999809265137,0.5080000162124634,0.5,0.4880000054836273,0.503000020980835,0.4909999966621399,0.5059999823570251,0.515999972820282,0.5040000081062317,0.5170000195503235,0.5220000147819519,0.5040000081062317,0.527999997138977,0.5180000066757202,0.5199999809265137,0.5299999713897705,0.5429999828338623,0.5249999761581421,0.5270000100135803,0.5260000228881836,0.5350000262260437,0.5210000276565552,0.5329999923706055,0.5339999794960022,0.5260000228881836,0.5370000004768372,0.5379999876022339,0.5260000228881836,0.527999997138977,0.5370000004768372,0.5320000052452087,0.5350000262260437,0.546999990940094,0.5299999713897705,0.5299999713897705,0.5239999890327454,0.5239999890327454,0.5360000133514404,0.5519999861717224,0.5429999828338623,0.5249999761581421,0.5199999809265137,0.5199999809265137,0.5189999938011169,0.5450000166893005,0.5370000004768372,0.5350000262260437,0.5350000262260437,0.5239999890327454,0.5270000100135803,0.546999990940094,0.5289999842643738,0.5220000147819519,0.5249999761581421,0.5299999713897705,0.5410000085830688,0.5389999747276306,0.5339999794960022,0.5329999923706055,0.5419999957084656,0.5440000295639038,0.5379999876022339,0.5450000166893005,0.5260000228881836,0.531000018119812,0.5360000133514404,0.5440000295639038,0.5370000004768372,0.5450000166893005,0.5410000085830688,0.5460000038146973,0.5320000052452087,0.5429999828338623,0.5350000262260437,0.5329999923706055,0.5389999747276306,0.5370000004768372,0.5360000133514404,0.5429999828338623,0.5350000262260437,0.5400000214576721,0.5450000166893005,0.546999990940094,0.5370000004768372,0.5379999876022339,0.5460000038146973,0.5440000295639038,0.5410000085830688,0.5379999876022339,0.5450000166893005,0.5509999990463257,0.5440000295639038,0.5360000133514404,0.5400000214576721,0.5389999747276306,0.5460000038146973,0.5389999747276306,0.5490000247955322,0.5400000214576721,0.5400000214576721,0.5329999923706055,0.5339999794960022,0.5529999732971191,0.5429999828338623,0.5419999957084656,0.5410000085830688,0.5440000295639038,0.5379999876022339,0.5419999957084656,0.5479999780654907,0.5339999794960022,0.527999997138977,0.5379999876022339,0.5440000295639038,0.5379999876022339,0.5479999780654907,0.5410000085830688,0.5410000085830688,0.5389999747276306,0.5400000214576721,0.5429999828338623,0.5389999747276306,0.5490000247955322,0.5370000004768372,0.5379999876022339,0.5419999957084656,0.5450000166893005,0.5460000038146973,0.5460000038146973,0.5419999957084656,0.546999990940094,0.5490000247955322,0.5490000247955322,0.5400000214576721,0.5450000166893005,0.5550000071525574,0.550000011920929,0.5479999780654907,0.5490000247955322,0.5440000295639038,0.5490000247955322,0.5429999828338623,0.5490000247955322,0.5460000038146973,0.550000011920929,0.546999990940094,0.5479999780654907,0.5479999780654907,0.5519999861717224,0.550000011920929,0.5450000166893005,0.5410000085830688,0.5429999828338623,0.5410000085830688,0.5410000085830688,0.546999990940094,0.550000011920929,0.5490000247955322],"label":"RedPajama2"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"range":[0.39,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dededup_difference/big-run-fineweb-cross-dedup-fixed.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"x":[0.0,10.48576,20.97152,31.45728,41.94304,52.4288,62.91456,73.40032000000001,83.88608,94.37184,104.8576,115.34336,125.82912,136.31488000000002,146.80064000000002,157.28640000000001,167.77216,178.25792,188.74368,199.22944,209.7152,220.20096,230.68672,241.17248,251.65824,262.144,272.62976000000003,283.11552,293.60128000000003,304.08704,314.57280000000003,325.05856,335.54432,346.03008],"y":[null,null,null,null,0.40171657912433145,0.42239717617630956,0.43069435879588125,0.4351756565272808,0.43896834924817085,0.4424236983060837,0.4452380746603012,0.44781614691019056,0.45025914907455444,0.4521562337875366,0.4531575210392475,0.45397180542349813,0.4548915736377239,0.4563755728304386,0.45696389451622965,0.458776044100523,0.4609984554350376,0.4624955080449581,0.4629682660102844,0.4638278633356093,0.4645016059279441,0.4646032989025116,0.46489162668585776,0.4657001614570618,0.46593172624707224,0.4667894795536995,0.4675446107983589,0.46748293563723564,0.4683080866932869,0.46885923445224764],"label":"FineWeb full MinHash"},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"},"range":[0.35,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dededup_difference/big-run-refinedweb.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"x":[0.0,10.48576,20.97152,28.311552000000002,31.45728,41.94304,52.4288,62.91456,73.40032000000001,83.88608,94.37184,104.8576,115.34336,125.82912,136.31488000000002,146.80064000000002,157.28640000000001,167.77216,178.25792,188.74368,199.22944,209.7152,220.20096,230.68672,241.17248,251.65824,262.144,272.62976000000003,283.11552,293.60128000000003,304.08704,314.57280000000003,325.05856,335.54432,346.03008],"y":[null,null,null,null,0.40424661971628667,0.42596163749694826,0.43559565395116806,0.4419388733804226,0.4472432412207127,0.4522114463150501,0.45583397448062896,0.45813767313957215,0.460252707451582,0.4618991769850254,0.46210767328739166,0.46468816623091697,0.46640462651848785,0.46798615977168073,0.4687947325408458,0.4707141913473606,0.47183807417750356,0.4731586426496506,0.474202574789524,0.47580953985452645,0.4768182456493378,0.47721000015735626,0.477897260338068,0.47868331149220467,0.4798942424356937,0.48083210438489904,0.48233432918786995,0.4825453333556652,0.48372062146663664,0.48404486328363416,0.48417936712503434],"label":"RefinedWeb"},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"},"range":[0.35,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dededup_difference/big-run-sampled_full_filtered_no_dedup.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"x":[0.0,10.48576,20.97152,28.311552000000002,31.45728,41.94304,52.4288,62.91456,73.40032000000001,83.88608,94.37184,104.8576,115.34336,125.82912,136.31488000000002,146.80064000000002,157.28640000000001,167.77216,178.25792,188.74368,199.22944,209.7152,220.20096,230.68672,241.17248,251.65824,262.144,272.62976000000003,283.11552,293.60128000000003,304.08704,314.57280000000003,325.05856,335.54432,346.03008],"y":[null,null,null,null,0.39733172245323656,0.4170659720897675,0.42569294571876515,0.42934197112917893,0.4318342722952365,0.43489449843764305,0.43767731785774233,0.43933030366897585,0.4432003878057003,0.44580490812659257,0.44852474182844154,0.4508663788437842,0.45200284123420714,0.45270049944519997,0.45411895886063575,0.45437362268567083,0.4551906920969486,0.45563211515545843,0.4572733923792839,0.45865254402160643,0.4608928956091404,0.46221072375774386,0.464424304664135,0.4650039754807949,0.465817741304636,0.46602572202682496,0.4663869492709637,0.466600227355957,0.4675856366753578,0.4670651629567145,0.46774301379919053],"label":"FineWeb filtered only"},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"},"range":[0.35,0.5]},"title":{"text":"Dataset Ablations"}}}
 
 
data/plots/dededup_difference/index.json DELETED
@@ -1 +0,0 @@
1
- {"files":{"big-run-fineweb-cross-dedup-fixed":{"file":"big-run-fineweb-cross-dedup-fixed.json"},"big-run-refinedweb":{"file":"big-run-refinedweb.json"},"big-run-sampled_full_filtered_no_dedup":{"file":"big-run-sampled_full_filtered_no_dedup.json"}}}
 
 
data/plots/dedup_all_dumps_bad/agg_score.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3534814938902855,0.3764607086777687,0.38782499730587,0.3981050960719585,0.4028486795723438,0.4125883243978023,0.4117814563214779,0.414029736071825,0.4197172522544861,0.4211113378405571,0.4279881417751312,0.4280137903988361,0.4280424378812313,0.4291964024305343,0.4326301179826259,0.4371833503246307,0.4346669465303421,0.4336562640964985,0.4432648755609989,0.4401291646063328,0.4394684173166752,0.4476612061262131,0.4465444348752498,0.4472153298556804,0.4433343075215816,0.4510187618434429,0.4459567815065384,0.4460812956094742,0.4498684890568256,0.4529943652451038,0.4528274349868297,0.4551213420927524,0.4549156539142132,0.4564928151667118,0.4576693661510944,0.4557182416319847,0.4536240361630916,0.457439012825489,0.4570476822555065,0.4589823484420776,0.462024375796318,0.4540738053619861,0.4550252184271812,0.4576593860983848,0.4573238864541054,0.4575810581445694,0.4622134491801262,0.4592566937208175,0.4614734016358852,0.4637473002076149,0.4625372551381588,0.4613912180066108,0.4597448222339153,0.4594792164862156,0.4662549719214439,0.4634026065468788,0.4633508697152138,0.4635734222829342,0.4628961533308029,0.4670135043561458,0.4639505892992019,0.4631133340299129,0.4665167145431041,0.4672448337078094,0.4693268723785877,0.4630668573081493,0.4676454700529575,0.4646359197795391,0.4621579721570015,0.4692446552217006,0.4704835228621959,0.4663223996758461,0.4680556617677212,0.466339822858572,0.4682099223136902,0.4711195565760135,0.4722655527293682,0.4727961830794811,0.4676857478916645,0.4719390422105789,0.4713102728128433,0.4712141714990139,0.4721613004803657,0.4713456854224205,0.4682970903813839,0.4679934531450271,0.4685162976384163,0.4679946713149547,0.4681242071092129,0.4702276065945625,0.472664151340723,0.4730790853500366,0.4731674715876579,0.4718914777040481,0.4719801284372806,0.4761029370129108,0.4735167175531387,0.4730370938777923,0.4730173237621784,0.4735377207398414,0.4777223989367485,0.4796326830983162,0.4734170883893966,0.4739485755562782,0.4748299159109592,0.4765299335122108,0.4745025858283043,0.4754423759877682,0.4784592799842357,0.4761341325938701,0.4760282784700393,0.4769757278263569,0.47154351323843,0.4786738082766533,0.4804279990494251,0.4777076803147793,0.4798569902777672,0.4759011939167976,0.4784621745347976,0.479673832654953,0.4780617095530033,0.48076206818223,0.47995800152421,0.4790860973298549,0.4817167408764362,0.4811586998403072,0.482547752559185,0.4816697351634502,0.4809327870607376,0.4816545359790325,0.4804601892828941,0.4776877984404564,0.4813711903989315,0.4844604581594467,0.4819537848234176,0.4820829331874847,0.4778126627206802,0.482935007661581,0.48230691999197,0.4826001971960068,0.4823969900608063,0.4811219945549965,0.4789146520197391,0.484035175293684,0.4848698377609253,0.4855728335678577,0.4825376532971859,0.485215101391077,0.4824351668357849,0.4835342466831207,0.4822137206792831,0.4838785007596016,0.4837255179882049,0.4853012599050998,0.4857851006090641,0.4863366298377514,0.4856646582484245,0.4842503517866134,0.4838776960968971,0.4846346862614155,0.4837041422724724,0.4813097268342972,0.4873070046305656,0.4841253720223903,0.4837464913725853,0.483069509267807,0.4851242564618587,0.4861010462045669],"label":"RefinedWeb"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3605199865996837,0.3733148723840713,0.3882005847990513,0.3934122696518898,0.3947227671742439,0.4042885974049568,0.3974800482392311,0.4055779427289963,0.4133470430970192,0.4117913842201233,0.4113653488457203,0.4149517640471458,0.4187851920723915,0.4252083078026771,0.4206527359783649,0.4240428246557712,0.422003373503685,0.4280910938978195,0.4244147576391697,0.4316282644867897,0.4295645765960216,0.4310102686285972,0.4360743537545204,0.4313482865691185,0.4350991360843181,0.4378576353192329,0.4335876516997814,0.4347924515604973,0.4348904751241207,0.436600212007761,0.430036511272192,0.4350974671542644,0.4399556629359722,0.4371416717767715,0.4363861419260502,0.4376698136329651,0.4405004419386387,0.4373639523983001,0.4379038028419018,0.4371281825006008,0.4393439553678036,0.440426729619503,0.4401675276458263,0.4429537951946258,0.4449137263000011,0.4434786736965179,0.4450470842421055,0.4454202279448509,0.4394537284970283,0.442185215651989,0.4461225643754005,0.4427758157253265,0.4430646039545536,0.4476901069283485,0.4478763341903686,0.4493869319558143,0.4448477327823639,0.450044184923172,0.4498609118163585,0.4457665979862213,0.4506924152374267,0.449855338782072,0.448790930211544,0.4474099352955818,0.4546772800385952,0.4529431238770485,0.452015146613121,0.4502020999789238,0.4493804536759853,0.4523266032338142,0.4551868587732315,0.4501944817602634,0.4493303671479225,0.4526805207133293,0.4533850513398647,0.4518048763275146,0.4518973492085933,0.4531301632523536,0.4518006071448326,0.4553494565188885,0.4528752230107784,0.4536322727799415,0.4561733976006508,0.4549491256475448,0.4574789106845855,0.4577847123146057,0.4563642293214798,0.4578686729073524,0.4561499990522861,0.4537816494703293,0.4542164430022239,0.4559455662965774,0.4554723873734474,0.4575514122843742,0.4575202167034149,0.4592722058296203,0.4585275091230869,0.4580587856471538,0.456934317946434,0.4577495418488979,0.4540119916200638,0.4570806957781315,0.4608120545744896,0.4588425755500793,0.4578334167599678,0.4610816091299057,0.4598177038133144,0.461849745362997,0.4631866924464702,0.4601576402783394,0.4646804705262184,0.4632389545440674,0.4604574106633663,0.4602976888418197,0.4581312239170074,0.4654182009398937,0.4655338563024997,0.4616620391607284,0.461054053157568,0.4613021649420261,0.4658613465726375,0.4633531905710697,0.4613638147711754,0.4643996246159076,0.462500050663948,0.4650798961520195,0.4648764543235302,0.4639869071543216,0.4634246975183487,0.46585888043046,0.4639799632132053,0.4630857892334461,0.4644265696406364,0.4642998576164245,0.4686848931014538,0.4687492996454239,0.4650243632495403,0.4627032242715359,0.4665953740477562,0.4660026729106903,0.4664581045508384,0.4676475040614605,0.4657339677214622,0.4664678275585174,0.4673498086631298,0.4676674827933311,0.4680955372750759,0.4681585058569908,0.4659864418208599,0.4686457589268684,0.4661462865769863,0.4658931568264961,0.4674226939678192,0.46805215254426,0.4682257212698459,0.4689070098102093,0.4699570722877979,0.4655096270143986,0.4688013233244419,0.4707522802054882,0.4661469310522079,0.4688841328024864,0.4671329781413078,0.4662554152309894,0.4697433896362781,0.4698473587632179,0.4676505327224731,0.4696521013975143],"label":"FineWeb filtered only"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3551952373236418,0.3736435137689113,0.3814037963747978,0.3948809280991554,0.3996850810945034,0.4089604057371616,0.4100853353738785,0.4119834117591381,0.4168377220630646,0.4186493046581745,0.4169826358556747,0.4234288297593593,0.4229162000119686,0.4273439794778824,0.4290364980697632,0.4291782416403293,0.4296907968819141,0.4311576783657074,0.4326641112565994,0.430318683385849,0.430436260998249,0.4339037239551544,0.4363459683954716,0.4357402548193931,0.4342963136732578,0.4366712383925915,0.4363959729671478,0.436981026083231,0.4447868093848228,0.4411709941923618,0.4406092017889023,0.4424176625907421,0.4423875361680984,0.4422253370285034,0.4410557933151722,0.4447037056088447,0.4454837813973427,0.4435960277915001,0.4468514993786812,0.4479999616742134,0.4428562931716442,0.445764634758234,0.4456562362611294,0.4488007053732872,0.4475954286754131,0.4468922987580299,0.4548408314585686,0.4511027485132217,0.4530330970883369,0.4483681954443455,0.4531726539134979,0.45334542542696,0.4544384703040123,0.4530758671462536,0.4540613554418087,0.4510113634169101,0.4538320265710354,0.4518541917204857,0.4536847211420536,0.4532708041369915,0.4552236869931221,0.455034039914608,0.4562875479459762,0.4532428197562694,0.4574853852391243,0.4517738744616508,0.4579889141023159,0.4538268558681011,0.456730306148529,0.4526018649339676,0.4562746733427048,0.4560015797615051,0.4555426277220249,0.4561501257121563,0.4524396173655987,0.4557023830711841,0.4589769169688225,0.4581078588962555,0.4620813727378845,0.4586601965129375,0.4568093195557594,0.4569808952510357,0.4567535072565079,0.4575250148773193,0.4606908001005649,0.4603964723646641,0.4622848592698574,0.4594669193029403,0.4640629850327968,0.4604269936680794,0.4634841009974479,0.4644578285515308,0.4642514958977699,0.4666304066777229,0.4616626128554344,0.4588956907391548,0.4620226770639419,0.4628621749579906,0.4595407098531723,0.4635516740381717,0.46005355194211,0.4601523540914058,0.4644204638898372,0.4620639197528362,0.46614545956254,0.4636696502566337,0.4610077403485775,0.4640897810459137,0.4636163525283336,0.4630545899271965,0.466012816876173,0.4650349207222461,0.4613720141351223,0.4644323363900184,0.4647249802947044,0.4656480401754379,0.4651664271950722,0.4622530452907085,0.4655019529163837,0.4650313258171081,0.466718140989542,0.4661559611558914,0.4661237150430679,0.4664223715662956,0.4640601389110088,0.4642657749354839,0.4633881188929081,0.4629989042878151,0.4685831367969513,0.4675870984792709,0.467183344066143,0.4678030684590339,0.4660939238965511,0.4691914953291416,0.4670972637832165,0.468262892216444,0.4672016054391861,0.4676182121038437,0.4698677137494087,0.4658828042447567,0.4701816700398922,0.4684622809290886,0.466015312820673,0.4675401039421558,0.4693200923502445,0.4702670983970165,0.4679145030677318,0.4676233418285846,0.4674933589994907,0.4678357951343059,0.4669915996491909,0.4657857678830623,0.4666901864111423,0.4669371582567692,0.4672787226736545,0.4684535376727581,0.4685697965323925,0.4694835692644119,0.4683254994451999,0.4712230190634727,0.4683987610042095,0.4707653746008873,0.4663059376180172,0.4683133698999882,0.4686385430395603,0.4657671600580215,0.4692615270614624],"label":"FineWeb full MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"title":{"text":"Dedup across all dumps does not improve performance"}}}
 
 
data/plots/dedup_all_dumps_bad/arc_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2899999916553497,0.31700000166893,0.3409999907016754,0.3425000011920929,0.3485000133514404,0.3555000126361847,0.3574999868869781,0.3585000038146972,0.363999992609024,0.3619999885559082,0.3675000071525574,0.3865000009536743,0.3810000121593475,0.3740000128746032,0.3810000121593475,0.3810000121593475,0.3860000073909759,0.3810000121593475,0.3894999921321869,0.3849999904632568,0.3855000138282776,0.3989999890327453,0.3980000019073486,0.3995000123977661,0.395000010728836,0.4084999859333038,0.4040000140666961,0.4004999995231628,0.3955000042915344,0.4135000109672546,0.4070000052452087,0.4104999899864197,0.4014999866485595,0.4099999964237213,0.4199999868869781,0.414000004529953,0.402999997138977,0.4214999973773956,0.4095000028610229,0.4059999883174896,0.4090000092983246,0.4074999988079071,0.4120000004768371,0.4154999852180481,0.4189999997615814,0.4149999916553497,0.429500013589859,0.4154999852180481,0.4214999973773956,0.4244999885559082,0.4205000102519989,0.4269999861717224,0.4214999973773956,0.4180000126361847,0.4415000081062317,0.4320000112056732,0.4350000023841858,0.4259999990463257,0.4300000071525574,0.4259999990463257,0.4189999997615814,0.4269999861717224,0.4199999868869781,0.426499992609024,0.4350000023841858,0.4289999902248382,0.4345000088214874,0.4259999990463257,0.426499992609024,0.4395000040531158,0.4395000040531158,0.4359999895095825,0.4280000030994415,0.4370000064373016,0.4329999983310699,0.4309999942779541,0.4490000009536743,0.4399999976158142,0.4339999854564667,0.4399999976158142,0.4345000088214874,0.429500013589859,0.4370000064373016,0.4379999935626983,0.4284999966621399,0.4309999942779541,0.4350000023841858,0.4399999976158142,0.4314999878406524,0.4300000071525574,0.4410000145435333,0.4345000088214874,0.4410000145435333,0.4345000088214874,0.4339999854564667,0.4460000097751617,0.4410000145435333,0.4469999969005584,0.4480000138282776,0.4435000121593475,0.4375,0.4519999921321869,0.4480000138282776,0.4429999887943268,0.4519999921321869,0.4435000121593475,0.4334999918937683,0.4460000097751617,0.4564999938011169,0.4469999969005584,0.453000009059906,0.4485000073909759,0.4410000145435333,0.4444999992847442,0.4485000073909759,0.457500010728836,0.4469999969005584,0.4535000026226043,0.4535000026226043,0.4485000073909759,0.4490000009536743,0.4505000114440918,0.4595000147819519,0.4544999897480011,0.453000009059906,0.4605000019073486,0.4620000123977661,0.457500010728836,0.453000009059906,0.4550000131130218,0.460999995470047,0.4449999928474426,0.4474999904632568,0.457500010728836,0.4584999978542328,0.4494999945163727,0.4474999904632568,0.4625000059604645,0.4639999866485595,0.4555000066757202,0.4469999969005584,0.4600000083446502,0.453000009059906,0.4629999995231628,0.4589999914169311,0.4614999890327453,0.4555000066757202,0.4560000002384186,0.4580000042915344,0.4584999978542328,0.4560000002384186,0.4605000019073486,0.4595000147819519,0.4639999866485595,0.4614999890327453,0.4564999938011169,0.4634999930858612,0.4625000059604645,0.4614999890327453,0.4679999947547912,0.4584999978542328,0.4595000147819519,0.4505000114440918,0.4544999897480011,0.4595000147819519,0.4620000123977661,0.4670000076293945,0.4555000066757202],"label":"RefinedWeb"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2894999980926513,0.3235000073909759,0.3389999866485595,0.3384999930858612,0.3459999859333038,0.359499990940094,0.3429999947547912,0.3619999885559082,0.3564999997615814,0.3625000119209289,0.363999992609024,0.3680000007152557,0.3680000007152557,0.3785000145435333,0.3684999942779541,0.375,0.3734999895095825,0.3849999904632568,0.3944999873638153,0.3865000009536743,0.395000010728836,0.3935000002384186,0.3980000019073486,0.3910000026226043,0.3885000050067901,0.3914999961853027,0.3815000057220459,0.395000010728836,0.3894999921321869,0.395000010728836,0.3935000002384186,0.4034999907016754,0.4004999995231628,0.3970000147819519,0.3975000083446502,0.3995000123977661,0.3980000019073486,0.4034999907016754,0.3959999978542328,0.3989999890327453,0.402999997138977,0.3880000114440918,0.3980000019073486,0.4040000140666961,0.3989999890327453,0.3970000147819519,0.3925000131130218,0.4120000004768371,0.3935000002384186,0.395000010728836,0.4070000052452087,0.3935000002384186,0.4034999907016754,0.4189999997615814,0.4129999876022339,0.4160000085830688,0.4149999916553497,0.418500006198883,0.4225000143051147,0.4174999892711639,0.4210000038146972,0.4045000076293945,0.4079999923706054,0.4124999940395355,0.4144999980926513,0.4169999957084656,0.4194999933242798,0.4154999852180481,0.4169999957084656,0.4225000143051147,0.4225000143051147,0.4230000078678131,0.4160000085830688,0.4325000047683716,0.4325000047683716,0.4199999868869781,0.4199999868869781,0.4189999997615814,0.4269999861717224,0.4259999990463257,0.4230000078678131,0.4144999980926513,0.4329999983310699,0.4275000095367431,0.4305000007152557,0.4289999902248382,0.4235000014305115,0.4235000014305115,0.4325000047683716,0.4244999885559082,0.4314999878406524,0.4194999933242798,0.4350000023841858,0.4269999861717224,0.4235000014305115,0.4300000071525574,0.4284999966621399,0.4255000054836273,0.4280000030994415,0.4345000088214874,0.4225000143051147,0.4334999918937683,0.4300000071525574,0.4350000023841858,0.429500013589859,0.4325000047683716,0.4384999871253967,0.4345000088214874,0.4354999959468841,0.4359999895095825,0.4354999959468841,0.4424999952316284,0.4424999952316284,0.4320000112056732,0.4280000030994415,0.4390000104904175,0.4480000138282776,0.4415000081062317,0.4384999871253967,0.4390000104904175,0.4494999945163727,0.4449999928474426,0.4384999871253967,0.4424999952316284,0.4359999895095825,0.445499986410141,0.4399999976158142,0.4375,0.4410000145435333,0.4384999871253967,0.4375,0.4329999983310699,0.4370000064373016,0.4354999959468841,0.4440000057220459,0.4384999871253967,0.4384999871253967,0.4390000104904175,0.4424999952316284,0.4379999935626983,0.4345000088214874,0.4354999959468841,0.4440000057220459,0.4395000040531158,0.4465000033378601,0.4404999911785126,0.4505000114440918,0.4480000138282776,0.4449999928474426,0.445499986410141,0.4410000145435333,0.4485000073909759,0.4460000097751617,0.4480000138282776,0.4465000033378601,0.4460000097751617,0.4460000097751617,0.4395000040531158,0.4474999904632568,0.4469999969005584,0.4404999911785126,0.4440000057220459,0.4435000121593475,0.4435000121593475,0.4514999985694885,0.4474999904632568,0.4474999904632568,0.445499986410141],"label":"FineWeb filtered only"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2904999852180481,0.3289999961853027,0.3379999995231628,0.3400000035762787,0.3535000085830688,0.3700000047683716,0.3619999885559082,0.3695000112056732,0.3625000119209289,0.3745000064373016,0.3804999887943268,0.3835000097751617,0.3810000121593475,0.3785000145435333,0.3799999952316284,0.3885000050067901,0.3919999897480011,0.3899999856948852,0.3939999938011169,0.4004999995231628,0.3889999985694885,0.4000000059604645,0.3930000066757202,0.4025000035762787,0.398499995470047,0.3939999938011169,0.3989999890327453,0.4020000100135803,0.4079999923706054,0.4129999876022339,0.4014999866485595,0.4129999876022339,0.4079999923706054,0.4115000069141388,0.4070000052452087,0.4095000028610229,0.4199999868869781,0.4165000021457672,0.4239999949932098,0.4129999876022339,0.4034999907016754,0.4050000011920929,0.4135000109672546,0.4189999997615814,0.418500006198883,0.4199999868869781,0.4365000128746032,0.4320000112056732,0.4255000054836273,0.4259999990463257,0.4244999885559082,0.4275000095367431,0.4259999990463257,0.4210000038146972,0.421999990940094,0.4099999964237213,0.4305000007152557,0.4239999949932098,0.4194999933242798,0.4205000102519989,0.4255000054836273,0.414000004529953,0.4210000038146972,0.4180000126361847,0.4429999887943268,0.429500013589859,0.4165000021457672,0.4239999949932098,0.4255000054836273,0.4180000126361847,0.4325000047683716,0.4305000007152557,0.4329999983310699,0.4325000047683716,0.4320000112056732,0.4375,0.4410000145435333,0.4395000040531158,0.4379999935626983,0.4280000030994415,0.4365000128746032,0.4205000102519989,0.426499992609024,0.4280000030994415,0.4354999959468841,0.4314999878406524,0.429500013589859,0.421999990940094,0.4345000088214874,0.429500013589859,0.4354999959468841,0.4314999878406524,0.4404999911785126,0.4384999871253967,0.4359999895095825,0.4345000088214874,0.4320000112056732,0.4345000088214874,0.4375,0.4410000145435333,0.4280000030994415,0.4320000112056732,0.44200000166893,0.4460000097751617,0.4390000104904175,0.4314999878406524,0.4339999854564667,0.4390000104904175,0.4460000097751617,0.4309999942779541,0.4444999992847442,0.44200000166893,0.4404999911785126,0.4395000040531158,0.4370000064373016,0.4519999921321869,0.4429999887943268,0.4395000040531158,0.4415000081062317,0.4384999871253967,0.4494999945163727,0.4469999969005584,0.4375,0.4395000040531158,0.4345000088214874,0.4390000104904175,0.4375,0.4309999942779541,0.4320000112056732,0.4415000081062317,0.4354999959468841,0.445499986410141,0.4404999911785126,0.4429999887943268,0.4395000040531158,0.4354999959468841,0.4429999887943268,0.4410000145435333,0.4494999945163727,0.4429999887943268,0.4460000097751617,0.445499986410141,0.4429999887943268,0.4429999887943268,0.4350000023841858,0.4474999904632568,0.4415000081062317,0.4424999952316284,0.4375,0.4444999992847442,0.4424999952316284,0.4354999959468841,0.445499986410141,0.4379999935626983,0.4449999928474426,0.4365000128746032,0.4474999904632568,0.4440000057220459,0.4465000033378601,0.445499986410141,0.4474999904632568,0.4494999945163727,0.4449999928474426,0.4444999992847442,0.44200000166893,0.4345000088214874,0.4404999911785126],"label":"FineWeb full MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"title":{"text":"Dedup across all dumps does not improve performance"}}}
 
 
data/plots/dedup_all_dumps_bad/commonsense_qa_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2529999911785126,0.2800000011920929,0.2870000004768371,0.3179999887943268,0.3129999935626983,0.3210000097751617,0.3160000145435333,0.3210000097751617,0.31700000166893,0.3330000042915344,0.3389999866485595,0.3289999961853027,0.3429999947547912,0.3370000123977661,0.3379999995231628,0.3459999859333038,0.3490000069141388,0.3470000028610229,0.3600000143051147,0.3569999933242798,0.3449999988079071,0.3650000095367431,0.3499999940395355,0.3540000021457672,0.3569999933242798,0.3619999885559082,0.3619999885559082,0.3580000102519989,0.3740000128746032,0.3709999918937683,0.3720000088214874,0.3759999871253967,0.3720000088214874,0.3659999966621399,0.3790000081062317,0.3610000014305115,0.3650000095367431,0.3650000095367431,0.3720000088214874,0.3729999959468841,0.3790000081062317,0.3680000007152557,0.3659999966621399,0.3680000007152557,0.3619999885559082,0.3619999885559082,0.3729999959468841,0.3720000088214874,0.3650000095367431,0.3759999871253967,0.367000013589859,0.3650000095367431,0.3680000007152557,0.3580000102519989,0.3589999973773956,0.3700000047683716,0.3680000007152557,0.367000013589859,0.3709999918937683,0.3880000114440918,0.3810000121593475,0.375,0.4040000140666961,0.3860000073909759,0.3840000033378601,0.3779999911785126,0.3729999959468841,0.3720000088214874,0.3799999952316284,0.3799999952316284,0.3779999911785126,0.3689999878406524,0.3770000040531158,0.3740000128746032,0.3819999992847442,0.3899999856948852,0.3799999952316284,0.3919999897480011,0.3720000088214874,0.3770000040531158,0.3930000066757202,0.3849999904632568,0.3899999856948852,0.3740000128746032,0.3740000128746032,0.3799999952316284,0.3779999911785126,0.3880000114440918,0.3709999918937683,0.3810000121593475,0.3880000114440918,0.3980000019073486,0.3819999992847442,0.3849999904632568,0.3810000121593475,0.3819999992847442,0.3889999985694885,0.3840000033378601,0.3910000026226043,0.3899999856948852,0.3959999978542328,0.3880000114440918,0.3869999945163727,0.3779999911785126,0.3819999992847442,0.3919999897480011,0.3849999904632568,0.3860000073909759,0.3919999897480011,0.3819999992847442,0.3819999992847442,0.3889999985694885,0.3889999985694885,0.3860000073909759,0.3880000114440918,0.3889999985694885,0.3939999938011169,0.3899999856948852,0.3869999945163727,0.3910000026226043,0.3910000026226043,0.3910000026226043,0.3970000147819519,0.3970000147819519,0.3970000147819519,0.3970000147819519,0.3939999938011169,0.4000000059604645,0.3970000147819519,0.402999997138977,0.3959999978542328,0.3959999978542328,0.4000000059604645,0.4040000140666961,0.4020000100135803,0.3989999890327453,0.3919999897480011,0.3930000066757202,0.3930000066757202,0.3980000019073486,0.4000000059604645,0.395000010728836,0.3899999856948852,0.4059999883174896,0.4020000100135803,0.4020000100135803,0.4059999883174896,0.3970000147819519,0.4110000133514404,0.4050000011920929,0.4000000059604645,0.4090000092983246,0.3989999890327453,0.402999997138977,0.4009999930858612,0.3980000019073486,0.4090000092983246,0.4079999923706054,0.4079999923706054,0.4020000100135803,0.402999997138977,0.402999997138977,0.4059999883174896,0.4040000140666961,0.4059999883174896,0.3989999890327453,0.4070000052452087,0.4059999883174896],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2540000081062317,0.2870000004768371,0.2829999923706054,0.3210000097751617,0.3079999983310699,0.3230000138282776,0.3179999887943268,0.3160000145435333,0.3289999961853027,0.3199999928474426,0.324999988079071,0.3310000002384186,0.3260000050067901,0.335999995470047,0.335999995470047,0.3310000002384186,0.335999995470047,0.3339999914169311,0.3459999859333038,0.3330000042915344,0.3449999988079071,0.3429999947547912,0.3479999899864197,0.3420000076293945,0.3479999899864197,0.3459999859333038,0.3339999914169311,0.3350000083446502,0.3519999980926513,0.3440000116825104,0.3490000069141388,0.3379999995231628,0.3420000076293945,0.3610000014305115,0.3409999907016754,0.356000006198883,0.3630000054836273,0.3519999980926513,0.3510000109672546,0.3619999885559082,0.3569999933242798,0.3479999899864197,0.3529999852180481,0.3569999933242798,0.3529999852180481,0.3519999980926513,0.3549999892711639,0.356000006198883,0.3499999940395355,0.3479999899864197,0.3619999885559082,0.3459999859333038,0.3519999980926513,0.3529999852180481,0.3680000007152557,0.3519999980926513,0.3580000102519989,0.3549999892711639,0.3490000069141388,0.3499999940395355,0.3600000143051147,0.3709999918937683,0.3659999966621399,0.3569999933242798,0.3510000109672546,0.3600000143051147,0.367000013589859,0.3529999852180481,0.363999992609024,0.3630000054836273,0.3619999885559082,0.356000006198883,0.367000013589859,0.3600000143051147,0.3540000021457672,0.3589999973773956,0.3610000014305115,0.356000006198883,0.3680000007152557,0.3519999980926513,0.3549999892711639,0.3479999899864197,0.3549999892711639,0.3519999980926513,0.367000013589859,0.3600000143051147,0.3600000143051147,0.3680000007152557,0.356000006198883,0.3610000014305115,0.3689999878406524,0.367000013589859,0.3689999878406524,0.3720000088214874,0.3680000007152557,0.3569999933242798,0.3650000095367431,0.363999992609024,0.3610000014305115,0.3709999918937683,0.3569999933242798,0.3540000021457672,0.3619999885559082,0.3549999892711639,0.3650000095367431,0.3680000007152557,0.3589999973773956,0.356000006198883,0.3610000014305115,0.3619999885559082,0.3740000128746032,0.3700000047683716,0.3650000095367431,0.3819999992847442,0.3770000040531158,0.3810000121593475,0.3729999959468841,0.3680000007152557,0.3689999878406524,0.3740000128746032,0.3779999911785126,0.3720000088214874,0.3740000128746032,0.367000013589859,0.363999992609024,0.367000013589859,0.3689999878406524,0.3709999918937683,0.3709999918937683,0.375,0.3680000007152557,0.375,0.3630000054836273,0.3720000088214874,0.3819999992847442,0.3729999959468841,0.3689999878406524,0.363999992609024,0.3709999918937683,0.3659999966621399,0.3700000047683716,0.367000013589859,0.3709999918937683,0.3759999871253967,0.3759999871253967,0.3729999959468841,0.3729999959468841,0.3729999959468841,0.3779999911785126,0.375,0.3700000047683716,0.3659999966621399,0.3759999871253967,0.3779999911785126,0.3709999918937683,0.3840000033378601,0.3720000088214874,0.375,0.367000013589859,0.3770000040531158,0.3709999918937683,0.375,0.3709999918937683,0.3740000128746032,0.3740000128746032,0.375,0.3770000040531158],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2599999904632568,0.277999997138977,0.2910000085830688,0.3070000112056732,0.3140000104904175,0.3019999861717224,0.3059999942779541,0.3210000097751617,0.3230000138282776,0.324999988079071,0.3149999976158142,0.3109999895095825,0.3339999914169311,0.3289999961853027,0.3319999873638153,0.3319999873638153,0.3300000131130218,0.3370000123977661,0.3219999969005584,0.3370000123977661,0.328000009059906,0.3339999914169311,0.3420000076293945,0.3400000035762787,0.3440000116825104,0.3510000109672546,0.3409999907016754,0.3449999988079071,0.3339999914169311,0.3540000021457672,0.3339999914169311,0.3470000028610229,0.3470000028610229,0.3440000116825104,0.3589999973773956,0.3569999933242798,0.3630000054836273,0.3549999892711639,0.3589999973773956,0.3449999988079071,0.3549999892711639,0.3449999988079071,0.3389999866485595,0.3499999940395355,0.3610000014305115,0.3619999885559082,0.3600000143051147,0.3519999980926513,0.3479999899864197,0.356000006198883,0.3519999980926513,0.3440000116825104,0.3490000069141388,0.3519999980926513,0.3470000028610229,0.3589999973773956,0.3449999988079071,0.3490000069141388,0.356000006198883,0.3619999885559082,0.3569999933242798,0.3659999966621399,0.3610000014305115,0.3549999892711639,0.3700000047683716,0.363999992609024,0.3600000143051147,0.3580000102519989,0.3549999892711639,0.3619999885559082,0.3689999878406524,0.3630000054836273,0.363999992609024,0.3700000047683716,0.367000013589859,0.3630000054836273,0.3630000054836273,0.3700000047683716,0.3589999973773956,0.3540000021457672,0.3540000021457672,0.3659999966621399,0.3619999885559082,0.3589999973773956,0.3650000095367431,0.3709999918937683,0.3680000007152557,0.3689999878406524,0.3650000095367431,0.3729999959468841,0.3619999885559082,0.3689999878406524,0.3569999933242798,0.3510000109672546,0.3680000007152557,0.363999992609024,0.3700000047683716,0.3659999966621399,0.3659999966621399,0.363999992609024,0.3619999885559082,0.3659999966621399,0.3680000007152557,0.3610000014305115,0.3720000088214874,0.3729999959468841,0.3810000121593475,0.3630000054836273,0.3689999878406524,0.3709999918937683,0.3759999871253967,0.382999986410141,0.3729999959468841,0.3720000088214874,0.3680000007152557,0.3659999966621399,0.3650000095367431,0.363999992609024,0.3589999973773956,0.356000006198883,0.3650000095367431,0.3659999966621399,0.367000013589859,0.3729999959468841,0.3720000088214874,0.375,0.3740000128746032,0.3700000047683716,0.3569999933242798,0.3759999871253967,0.3740000128746032,0.367000013589859,0.3770000040531158,0.3759999871253967,0.3709999918937683,0.3779999911785126,0.3709999918937683,0.3689999878406524,0.3799999952316284,0.3630000054836273,0.375,0.3700000047683716,0.3700000047683716,0.3729999959468841,0.3720000088214874,0.3790000081062317,0.375,0.3729999959468841,0.3770000040531158,0.3799999952316284,0.3779999911785126,0.3720000088214874,0.3799999952316284,0.3759999871253967,0.3799999952316284,0.3790000081062317,0.375,0.3740000128746032,0.3729999959468841,0.3840000033378601,0.3659999966621399,0.3759999871253967,0.3720000088214874,0.3720000088214874,0.3759999871253967,0.375,0.3650000095367431,0.3729999959468841],"label":"FineWeb filtered only"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"title":{"text":"Dedup across all dumps does not improve performance"}}}
 
 
data/plots/dedup_all_dumps_bad/hellaswag_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2759999930858612,0.328000009059906,0.3499999940395355,0.3889999985694885,0.3910000026226043,0.402999997138977,0.4210000038146972,0.4280000030994415,0.4359999895095825,0.4469999969005584,0.4440000057220459,0.4600000083446502,0.4690000116825104,0.4600000083446502,0.4679999947547912,0.4729999899864197,0.4760000109672546,0.4839999973773956,0.4939999878406524,0.488999992609024,0.4990000128746032,0.4979999959468841,0.4979999959468841,0.5009999871253967,0.5,0.5090000033378601,0.5070000290870667,0.5180000066757202,0.5199999809265137,0.5109999775886536,0.5130000114440918,0.5249999761581421,0.5149999856948853,0.5299999713897705,0.5339999794960022,0.5189999938011169,0.5289999842643738,0.5249999761581421,0.5320000052452087,0.5460000038146973,0.5419999957084656,0.5260000228881836,0.5289999842643738,0.546999990940094,0.5419999957084656,0.5419999957084656,0.5460000038146973,0.5419999957084656,0.5389999747276306,0.5440000295639038,0.5569999814033508,0.5450000166893005,0.5329999923706055,0.5580000281333923,0.5339999794960022,0.5540000200271606,0.5460000038146973,0.5479999780654907,0.5529999732971191,0.5540000200271606,0.5619999766349792,0.5490000247955322,0.5410000085830688,0.5490000247955322,0.5569999814033508,0.550000011920929,0.5479999780654907,0.5630000233650208,0.546999990940094,0.5559999942779541,0.5600000023841858,0.5509999990463257,0.5569999814033508,0.5569999814033508,0.5580000281333923,0.5619999766349792,0.5580000281333923,0.5669999718666077,0.5569999814033508,0.5709999799728394,0.5529999732971191,0.5649999976158142,0.5659999847412109,0.5659999847412109,0.5690000057220459,0.5600000023841858,0.5580000281333923,0.5540000200271606,0.5640000104904175,0.5680000185966492,0.5709999799728394,0.5649999976158142,0.5680000185966492,0.5730000138282776,0.5640000104904175,0.5799999833106995,0.5699999928474426,0.5669999718666077,0.5680000185966492,0.5770000219345093,0.5709999799728394,0.5759999752044678,0.5690000057220459,0.5789999961853027,0.5740000009536743,0.5709999799728394,0.5789999961853027,0.5709999799728394,0.5770000219345093,0.5770000219345093,0.5730000138282776,0.5809999704360962,0.5720000267028809,0.5849999785423279,0.5820000171661377,0.5799999833106995,0.5830000042915344,0.5759999752044678,0.5730000138282776,0.5799999833106995,0.5830000042915344,0.5860000252723694,0.5789999961853027,0.5789999961853027,0.5860000252723694,0.5979999899864197,0.5920000076293945,0.5820000171661377,0.5870000123977661,0.5889999866485596,0.5839999914169312,0.5849999785423279,0.5899999737739563,0.5920000076293945,0.593999981880188,0.597000002861023,0.5889999866485596,0.5889999866485596,0.5849999785423279,0.5899999737739563,0.5989999771118164,0.5899999737739563,0.5839999914169312,0.5910000205039978,0.5910000205039978,0.5929999947547913,0.5920000076293945,0.5929999947547913,0.5889999866485596,0.5899999737739563,0.593999981880188,0.5910000205039978,0.5960000157356262,0.5920000076293945,0.5889999866485596,0.593999981880188,0.5879999995231628,0.5960000157356262,0.5920000076293945,0.5960000157356262,0.5960000157356262,0.5920000076293945,0.6010000109672546,0.5920000076293945,0.5899999737739563,0.5889999866485596,0.5920000076293945,0.6019999980926514],"label":"RefinedWeb"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2809999883174896,0.3230000138282776,0.3409999907016754,0.3600000143051147,0.3569999933242798,0.3889999985694885,0.395000010728836,0.4199999868869781,0.4180000126361847,0.421999990940094,0.4289999902248382,0.4350000023841858,0.4359999895095825,0.4469999969005584,0.4350000023841858,0.4480000138282776,0.4480000138282776,0.453000009059906,0.4550000131130218,0.4589999914169311,0.4639999866485595,0.4600000083446502,0.460999995470047,0.4589999914169311,0.481000006198883,0.4769999980926513,0.4709999859333038,0.4740000069141388,0.4679999947547912,0.4790000021457672,0.4729999899864197,0.4819999933242798,0.4850000143051147,0.4819999933242798,0.4819999933242798,0.4880000054836273,0.4869999885559082,0.4959999918937683,0.4850000143051147,0.4959999918937683,0.492000013589859,0.503000020980835,0.4930000007152557,0.5099999904632568,0.5040000081062317,0.5009999871253967,0.4970000088214874,0.4979999959468841,0.5059999823570251,0.5070000290870667,0.5040000081062317,0.5059999823570251,0.5049999952316284,0.5080000162124634,0.5049999952316284,0.5019999742507935,0.5120000243186951,0.5170000195503235,0.5170000195503235,0.5090000033378601,0.5239999890327454,0.527999997138977,0.5230000019073486,0.5210000276565552,0.5149999856948853,0.5189999938011169,0.5270000100135803,0.5149999856948853,0.5099999904632568,0.5299999713897705,0.5199999809265137,0.5230000019073486,0.5260000228881836,0.5249999761581421,0.5239999890327454,0.5329999923706055,0.5210000276565552,0.5260000228881836,0.5170000195503235,0.531000018119812,0.5289999842643738,0.531000018119812,0.5270000100135803,0.5299999713897705,0.5370000004768372,0.5379999876022339,0.5419999957084656,0.5329999923706055,0.5360000133514404,0.5299999713897705,0.5360000133514404,0.5270000100135803,0.5450000166893005,0.5410000085830688,0.546999990940094,0.5329999923706055,0.5329999923706055,0.5379999876022339,0.5299999713897705,0.5429999828338623,0.5360000133514404,0.5339999794960022,0.5419999957084656,0.5410000085830688,0.5370000004768372,0.5389999747276306,0.527999997138977,0.5400000214576721,0.5400000214576721,0.531000018119812,0.5440000295639038,0.5460000038146973,0.5479999780654907,0.5460000038146973,0.5410000085830688,0.5509999990463257,0.5479999780654907,0.5410000085830688,0.5389999747276306,0.550000011920929,0.5569999814033508,0.550000011920929,0.5490000247955322,0.5490000247955322,0.5569999814033508,0.5519999861717224,0.5479999780654907,0.5559999942779541,0.5550000071525574,0.5460000038146973,0.5540000200271606,0.5460000038146973,0.5460000038146973,0.5509999990463257,0.5460000038146973,0.5550000071525574,0.5479999780654907,0.5479999780654907,0.5540000200271606,0.5550000071525574,0.5529999732971191,0.5529999732971191,0.5509999990463257,0.5509999990463257,0.5419999957084656,0.546999990940094,0.5509999990463257,0.5559999942779541,0.5490000247955322,0.5509999990463257,0.5529999732971191,0.550000011920929,0.5540000200271606,0.5550000071525574,0.5580000281333923,0.550000011920929,0.5569999814033508,0.5490000247955322,0.5519999861717224,0.5519999861717224,0.5559999942779541,0.5569999814033508,0.5559999942779541,0.5550000071525574,0.5559999942779541,0.5490000247955322,0.5550000071525574,0.5600000023841858],"label":"FineWeb filtered only"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.3009999990463257,0.3149999976158142,0.3400000035762787,0.3610000014305115,0.3680000007152557,0.3799999952316284,0.4020000100135803,0.4180000126361847,0.4129999876022339,0.4259999990463257,0.4239999949932098,0.4440000057220459,0.44200000166893,0.4440000057220459,0.4580000042915344,0.4510000050067901,0.4560000002384186,0.4650000035762787,0.4569999873638153,0.460999995470047,0.4659999907016754,0.4679999947547912,0.4779999852180481,0.4740000069141388,0.4600000083446502,0.4860000014305115,0.4790000021457672,0.4880000054836273,0.4930000007152557,0.4860000014305115,0.4850000143051147,0.4900000095367431,0.4850000143051147,0.4900000095367431,0.4959999918937683,0.492000013589859,0.4850000143051147,0.4970000088214874,0.4900000095367431,0.4979999959468841,0.503000020980835,0.5040000081062317,0.4990000128746032,0.4979999959468841,0.5080000162124634,0.5019999742507935,0.4970000088214874,0.4939999878406524,0.5120000243186951,0.5070000290870667,0.503000020980835,0.5070000290870667,0.503000020980835,0.5109999775886536,0.5080000162124634,0.5009999871253967,0.5090000033378601,0.5,0.5149999856948853,0.5109999775886536,0.5099999904632568,0.5130000114440918,0.5080000162124634,0.5080000162124634,0.5109999775886536,0.5099999904632568,0.5239999890327454,0.5180000066757202,0.5130000114440918,0.5120000243186951,0.5180000066757202,0.515999972820282,0.5260000228881836,0.5199999809265137,0.5239999890327454,0.5220000147819519,0.527999997138977,0.5249999761581421,0.5270000100135803,0.5249999761581421,0.5189999938011169,0.5230000019073486,0.5249999761581421,0.5199999809265137,0.5230000019073486,0.5299999713897705,0.5350000262260437,0.5339999794960022,0.5329999923706055,0.5249999761581421,0.5299999713897705,0.5360000133514404,0.5329999923706055,0.5410000085830688,0.5249999761581421,0.5289999842643738,0.5360000133514404,0.5360000133514404,0.5370000004768372,0.5389999747276306,0.5289999842643738,0.5299999713897705,0.5410000085830688,0.5329999923706055,0.5419999957084656,0.5410000085830688,0.527999997138977,0.5370000004768372,0.5429999828338623,0.5419999957084656,0.5389999747276306,0.5320000052452087,0.5350000262260437,0.5419999957084656,0.5410000085830688,0.5339999794960022,0.5440000295639038,0.5329999923706055,0.5429999828338623,0.5460000038146973,0.5400000214576721,0.5429999828338623,0.5479999780654907,0.550000011920929,0.5490000247955322,0.5410000085830688,0.5450000166893005,0.5429999828338623,0.550000011920929,0.5529999732971191,0.5490000247955322,0.5450000166893005,0.5450000166893005,0.5519999861717224,0.5569999814033508,0.5460000038146973,0.546999990940094,0.5509999990463257,0.5509999990463257,0.5450000166893005,0.5440000295639038,0.5440000295639038,0.546999990940094,0.5479999780654907,0.546999990940094,0.5460000038146973,0.546999990940094,0.5479999780654907,0.5460000038146973,0.5460000038146973,0.5440000295639038,0.5410000085830688,0.5440000295639038,0.5389999747276306,0.5410000085830688,0.546999990940094,0.546999990940094,0.5479999780654907,0.546999990940094,0.550000011920929,0.546999990940094,0.5460000038146973,0.546999990940094,0.5479999780654907,0.5479999780654907,0.5519999861717224,0.550000011920929],"label":"FineWeb full MinHash"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"title":{"text":"Dedup across all dumps does not improve performance"}}}
 
 
data/plots/dedup_all_dumps_bad/index.json DELETED
@@ -1 +0,0 @@
1
- {"files":{"agg_score":{"file":"agg_score.json"},"commonsense_qa/acc_norm":{"file":"commonsense_qa_acc_norm.json"},"hellaswag/acc_norm":{"file":"hellaswag_acc_norm.json"},"openbookqa/acc_norm":{"file":"openbookqa_acc_norm.json"},"piqa/acc_norm":{"file":"piqa_acc_norm.json"},"winogrande/acc_norm":{"file":"winogrande_acc_norm.json"},"arc/acc_norm":{"file":"arc_acc_norm.json"},"mmlu/acc_norm":{"file":"mmlu_acc_norm.json"}},"settings":{"slider":{"min":0,"max":30,"default":5}}}
 
 
data/plots/dedup_all_dumps_bad/mmlu_acc_norm.json DELETED
@@ -1 +0,0 @@
1
- {"data":{"big-run-refinedweb":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2528519630432129,0.2616856694221496,0.2665999829769134,0.2683407664299011,0.2742894291877746,0.2762066125869751,0.2807516455650329,0.2767378389835357,0.2807380557060241,0.2788906991481781,0.2844051718711853,0.2856102883815765,0.2883394360542297,0.2875711619853973,0.2890409529209137,0.2894668281078338,0.2883355319499969,0.2872501015663147,0.291619062423706,0.2900333702564239,0.2962473034858703,0.2962896525859833,0.297355443239212,0.2932226359844208,0.2886744439601898,0.29665008187294,0.2976542115211487,0.2991503179073334,0.3004479110240936,0.3044549524784088,0.2976194322109222,0.3014707863330841,0.3048252463340759,0.3039425611495971,0.303354948759079,0.3027459383010864,0.2999922931194305,0.3050121665000915,0.2998814284801483,0.2978588044643402,0.3041949570178985,0.3010904192924499,0.3022017180919647,0.2997751235961914,0.3015910983085632,0.3096485137939453,0.3012076020240783,0.3065535724163055,0.3042872548103332,0.3104783594608307,0.2997980415821075,0.3051296770572662,0.303458571434021,0.3088337182998657,0.3145398199558258,0.3032208085060119,0.310806930065155,0.3075874149799347,0.3101692199707031,0.310107946395874,0.3066047430038452,0.3109066784381866,0.3081336915493011,0.3084586262702942,0.3086149394512176,0.3085348606109619,0.3136637806892395,0.3110873103141784,0.31076380610466,0.3084572553634643,0.3133681714534759,0.3125792145729065,0.3124453127384186,0.3097185790538788,0.3106793165206909,0.3089564740657806,0.3111244142055511,0.3123694658279419,0.3144859969615936,0.3135123550891876,0.311982125043869,0.3142133951187134,0.3122903704643249,0.3147654831409454,0.3078767359256744,0.314947634935379,0.3171303570270538,0.3129573762416839,0.3154936134815216,0.3158208429813385,0.3153132200241089,0.3141326904296875,0.3163397014141083,0.3166318237781524,0.3168410360813141,0.3198235332965851,0.3201336860656738,0.3212967813014984,0.3191385567188263,0.3178017139434814,0.3192791938781738,0.323061466217041,0.320336639881134,0.3165886104106903,0.3206393420696258,0.3167395293712616,0.3135207295417785,0.315539002418518,0.3191742599010467,0.321073055267334,0.3222262561321258,0.3193058371543884,0.3213480710983276,0.3198905289173126,0.3219239711761474,0.3211614489555359,0.318855881690979,0.3177095353603363,0.324197381734848,0.3208906352519989,0.3264936804771423,0.3245965242385864,0.3231639564037323,0.3221887946128845,0.3277338445186615,0.3227696120738983,0.3263820111751556,0.3258577883243561,0.3264622390270233,0.3222362995147705,0.3286814987659454,0.3235024213790893,0.32446950674057,0.3311836123466491,0.328130304813385,0.3271634578704834,0.3250012993812561,0.3309800624847412,0.3274554014205932,0.3273015916347503,0.3261759579181671,0.32697594165802,0.3303172886371612,0.3282814025878906,0.3289586305618286,0.3260826468467712,0.3258011937141418,0.3297208249568939,0.3254813551902771,0.3287739753723144,0.3287097811698913,0.3275279700756073,0.3293041586875915,0.3314100801944732,0.3287808299064636,0.3251930773258209,0.3288172781467438,0.3265027701854706,0.3275215625762939,0.3290774822235107,0.3261331617832184,0.3299777805805206,0.331955999135971,0.3305029273033142,0.3274719417095184,0.3235560953617096,0.3269940316677093,0.3323083519935608],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2510619163513183,0.2621481418609619,0.2632303833961487,0.2720474302768707,0.2719806432723999,0.2726832032203674,0.2786827087402344,0.2823672890663147,0.276201844215393,0.2816944718360901,0.280361145734787,0.2819306254386902,0.2823295891284942,0.2892518043518066,0.2872919738292694,0.2859259247779846,0.2885263860225677,0.2862614393234253,0.2933129370212555,0.2930494546890259,0.2884900867938995,0.2942298054695129,0.2927677929401397,0.2954220175743103,0.2918704748153686,0.2943699061870575,0.2891678512096405,0.291848212480545,0.2942944765090942,0.2973679602146148,0.2953736186027527,0.2963412702083587,0.297100305557251,0.2963026762008667,0.2944463491439819,0.2971296310424804,0.293870210647583,0.2982682287693023,0.2978119254112243,0.2989997565746307,0.2993503510951996,0.298117071390152,0.2977498769760132,0.3004056811332702,0.3012634217739105,0.3001384139060974,0.3052266240119934,0.3038219809532165,0.3037647306919098,0.3009455502033233,0.3038812279701233,0.303263396024704,0.3025077581405639,0.3056069612503052,0.3024908602237701,0.3050909340381622,0.3001562356948852,0.303833544254303,0.3019777834415436,0.3036664128303528,0.3022894859313965,0.3042722940444946,0.3023003339767456,0.3069425821304321,0.307883083820343,0.3026910126209259,0.3054113090038299,0.3046148121356964,0.305342435836792,0.3048149049282074,0.3066973984241485,0.3055126965045929,0.3063409924507141,0.307701051235199,0.3075169324874878,0.3091190159320831,0.3098153173923492,0.31436288356781,0.3096509575843811,0.3022815883159637,0.3119745552539825,0.3083471357822418,0.3085280954837799,0.3082001209259033,0.3080264329910278,0.3116717934608459,0.3097788393497467,0.3117353916168213,0.3170038759708404,0.3099159002304077,0.3133728504180908,0.3161626160144806,0.3095119595527649,0.3135432302951813,0.3103009164333343,0.3126655519008636,0.3121814131736755,0.3123973608016968,0.3148256838321686,0.3144133985042572,0.3124284744262695,0.3102188408374786,0.3123636841773987,0.3115113973617553,0.3151636719703674,0.3148572146892547,0.315061867237091,0.3127182424068451,0.3139308094978332,0.3134367167949676,0.3136025071144104,0.3172793388366699,0.3134761154651642,0.3109587132930755,0.3127998411655426,0.3161843717098236,0.3163313865661621,0.3145243525505066,0.3155156075954437,0.3127505779266357,0.3182451128959656,0.3162476718425751,0.3124897480010986,0.3128789663314819,0.3119811117649078,0.314126193523407,0.3136049509048462,0.3149912655353546,0.3146650791168213,0.3151968121528625,0.3179666996002197,0.3169245719909668,0.3202513754367828,0.3185319602489471,0.3202781081199646,0.3186031281948089,0.3166128396987915,0.3199457228183746,0.3194417059421539,0.3170624077320099,0.3184532523155212,0.3191981911659241,0.3191225528717041,0.3173209130764007,0.3195607960224151,0.3166368305683136,0.3188160359859466,0.3174867630004883,0.3184468746185303,0.3211863338947296,0.3184327483177185,0.3177861273288727,0.3180214762687683,0.3194973170757293,0.3212297558784485,0.3211282789707184,0.3200584352016449,0.3168685734272003,0.3211040198802948,0.3222841620445251,0.3196901082992553,0.3236229419708252,0.3204475045204162,0.3210069537162781,0.3191083669662475,0.31863734126091,0.3195922076702118],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2516599297523498,0.2610189318656921,0.2666046619415283,0.2667981088161468,0.2667821645736694,0.2708088159561157,0.2738403379917145,0.2726235687732696,0.2762763500213623,0.2768311202526092,0.2809228301048279,0.2836140990257263,0.2822815179824829,0.2831664383411407,0.2797218561172485,0.286342591047287,0.2855269610881805,0.2847287058830261,0.2888180613517761,0.286526083946228,0.2865165770053863,0.294582188129425,0.2925947606563568,0.2947863042354584,0.2892930805683136,0.2903610467910766,0.288201242685318,0.2873396277427673,0.2916238009929657,0.2908017039299011,0.2907920777797699,0.2952797412872314,0.2941452264785766,0.2921333611011505,0.2925891280174255,0.2968584895133972,0.2980035543441772,0.2964116632938385,0.2962304651737213,0.2950254380702972,0.2977516651153564,0.2944138348102569,0.3003402054309845,0.2976303696632385,0.3013098239898681,0.302829384803772,0.3018766045570373,0.305361807346344,0.2971298694610595,0.3014816343784332,0.3019805550575256,0.3037064969539642,0.2970167994499206,0.2995208501815796,0.2970106601715088,0.2990955114364624,0.3027818500995636,0.3048534691333771,0.2993872463703155,0.2986327707767486,0.3015393316745758,0.3003426790237427,0.3003274798393249,0.3017795085906982,0.3019182682037353,0.3015450537204742,0.3046211004257202,0.3031167984008789,0.3020436763763428,0.3011128306388855,0.3029948472976684,0.3045558631420135,0.301642894744873,0.3029441833496094,0.3035804331302643,0.3004390001296997,0.3021787703037262,0.306041270494461,0.3064048886299133,0.3087956011295318,0.3070018291473388,0.3065581619739532,0.3093871772289276,0.3060930073261261,0.3033313155174255,0.3072777390480041,0.306413859128952,0.3104493916034698,0.3056999444961548,0.3077532052993774,0.309231549501419,0.3070645034313202,0.3117790520191192,0.3114112913608551,0.312661737203598,0.3181777000427246,0.3117201030254364,0.3099702894687652,0.3074746131896972,0.3064963519573211,0.3105958700180053,0.3111456036567688,0.3084964454174042,0.3087405860424042,0.3121673166751861,0.3121528625488281,0.3100416660308838,0.3142979145050049,0.3129935264587402,0.3112611472606659,0.3119436800479889,0.3154115974903106,0.3091593086719513,0.3103814721107483,0.3130497634410858,0.3133455514907837,0.3152708411216736,0.3137963414192199,0.3099324703216553,0.3164172768592834,0.3133907914161682,0.3128255009651184,0.3134104907512665,0.3106969892978668,0.3130004107952118,0.3131391704082489,0.3130116462707519,0.3143952488899231,0.3143975436687469,0.3143710494041443,0.3163396418094635,0.3166862726211548,0.3184126019477844,0.3178988993167877,0.317479133605957,0.3184944093227386,0.316694974899292,0.3176258206367492,0.3182629346847534,0.3200214207172394,0.3181648552417755,0.320680022239685,0.3178716897964477,0.3182425796985626,0.3182984292507171,0.3158398568630218,0.3152642548084259,0.3132680356502533,0.3178914785385132,0.3156660795211792,0.3161703050136566,0.3176451921463012,0.3173815906047821,0.3194171786308288,0.3193057179450989,0.3172560334205627,0.317656546831131,0.3155770003795624,0.3199106156826019,0.3170182108879089,0.3156754970550537,0.3180731236934662,0.3205638229846954,0.3175432682037353,0.3184471428394317,0.3192788958549499,0.3197042346000671,0.3177168369293213],"label":"FineWeb filtered only"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"title":{"text":"Dedup across all dumps does not improve performance"}}}