rerender images
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- assets/data/clustering/data.csv +0 -0
- assets/data/plots/edu-100k/arc_acc_norm.json +1 -1
- assets/data/plots/edu-100k/hellaswag_acc_norm.json +1 -1
- assets/data/plots/edu-100k/mmlu_acc_norm.json +1 -1
- assets/data/plots/edu-100k/openbookqa_acc_norm.json +1 -1
- assets/data/plots/edu-100k/piqa_acc_norm.json +1 -1
- assets/data/plots/edu-100k/siqa_acc_norm.json +1 -1
- assets/data/plots/edu-100k/winogrande_acc_norm.json +1 -1
- assets/images/Untitled 1.png +0 -0
- assets/images/Untitled 3.png +0 -0
- assets/images/Untitled 4.png +0 -0
- assets/images/Untitled 5.png +0 -0
- assets/images/Untitled 6.png +0 -0
- assets/images/Untitled.png +0 -0
- assets/images/c4_filters_hellaswag.png +0 -0
- assets/images/clusters.png +0 -0
- assets/images/cross_ind_unfiltered_comparison.png +0 -0
- assets/images/custom_filters.png +0 -0
- assets/images/dataset_ablations.png +0 -0
- assets/images/dedup_all_dumps_bad.png +0 -0
- assets/images/dedup_attempts.png +0 -0
- assets/images/dedup_impact_simulation.png +0 -0
- assets/images/duplicates_simul.png +0 -0
- assets/images/edu-100k.png +0 -0
- assets/images/edu-8k.png +0 -0
- assets/images/filtering_steps.png +0 -0
- assets/images/minhash_parameters_comparison.png +0 -0
- assets/images/minhash_params.png +0 -0
- assets/images/removed_data_cross_dedup.png +0 -0
- assets/images/score_by_dump.png +0 -0
- assets/images/stats.png +0 -0
- assets/images/wet_comparison.png +0 -0
- dist/assets/data/clustering/data.csv +0 -0
- dist/assets/data/clustering/info.csv +0 -106
- dist/assets/data/plots/all_dumps_bad/agg_score.json +0 -1
- dist/assets/data/plots/all_dumps_bad/arc_acc_norm.json +0 -1
- dist/assets/data/plots/all_dumps_bad/commonsense_qa_acc_norm.json +0 -1
- dist/assets/data/plots/all_dumps_bad/hellaswag_acc_norm.json +0 -1
- dist/assets/data/plots/all_dumps_bad/index.json +0 -1
- dist/assets/data/plots/all_dumps_bad/mmlu_acc_norm.json +0 -1
- dist/assets/data/plots/all_dumps_bad/openbookqa_acc_norm.json +0 -1
- dist/assets/data/plots/all_dumps_bad/piqa_acc_norm.json +0 -1
- dist/assets/data/plots/all_dumps_bad/siqa_acc_norm.json +0 -1
- dist/assets/data/plots/all_dumps_bad/winogrande_acc_norm.json +0 -1
- dist/assets/data/plots/all_filtering_steps/agg_score.json +0 -1
- dist/assets/data/plots/all_filtering_steps/arc_acc_norm.json +0 -1
- dist/assets/data/plots/all_filtering_steps/commonsense_qa_acc_norm.json +0 -1
- dist/assets/data/plots/all_filtering_steps/hellaswag_acc_norm.json +0 -1
- dist/assets/data/plots/all_filtering_steps/index.json +0 -1
- dist/assets/data/plots/all_filtering_steps/mmlu_acc_norm.json +0 -1
assets/data/clustering/data.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
assets/data/plots/edu-100k/arc_acc_norm.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"data": {"C4": {"x": ["C4"], "y": [0.
|
|
|
1 |
+
{"data": {"C4": {"x": ["C4"], "y": [0.4435000121593475], "label": "C4"}, "Dolma": {"x": ["Dolma"], "y": [0.44200000166893], "label": "Dolma"}, "FineWeb": {"x": ["FineWeb"], "y": [0.4600000083446502], "label": "FineWeb"}, "RedPajama2": {"x": ["RedPajama2"], "y": [0.4494999945163727], "label": "RedPajama2"}, "RefinedWeb": {"x": ["RefinedWeb"], "y": [0.4555000066757202], "label": "RefinedWeb"}, "SlimPajama": {"x": ["SlimPajama"], "y": [0.4605000019073486], "label": "SlimPajama"}, "The Pile": {"x": ["The Pile"], "y": [0.4375], "label": "The Pile"}, "FineWeb-Edu": {"x": ["FineWeb-Edu"], "y": [0.5734999775886536], "label": "FineWeb-Edu"}}, "layout": {"showlegend": false, "title": {"text": "Evaluation results at 350B tokens"}, "xaxis": {"title": {"text": "Dataset", "standoff": 30}, "tickangle": 30}, "yaxis": {"range": [0.225, 0.6431999731063843]}, "margin": {"b": 100}}}
|
assets/data/plots/edu-100k/hellaswag_acc_norm.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"data": {"C4": {"x": ["C4"], "y": [0.
|
|
|
1 |
+
{"data": {"C4": {"x": ["C4"], "y": [0.6389999985694885], "label": "C4"}, "Dolma": {"x": ["Dolma"], "y": [0.6159999966621399], "label": "Dolma"}, "FineWeb": {"x": ["FineWeb"], "y": [0.6269999742507935], "label": "FineWeb"}, "RedPajama2": {"x": ["RedPajama2"], "y": [0.5600000023841858], "label": "RedPajama2"}, "RefinedWeb": {"x": ["RefinedWeb"], "y": [0.6019999980926514], "label": "RefinedWeb"}, "SlimPajama": {"x": ["SlimPajama"], "y": [0.5839999914169312], "label": "SlimPajama"}, "The Pile": {"x": ["The Pile"], "y": [0.5569999814033508], "label": "The Pile"}, "FineWeb-Edu": {"x": ["FineWeb-Edu"], "y": [0.597000002861023], "label": "FineWeb-Edu"}}, "layout": {"showlegend": false, "title": {"text": "Evaluation results at 350B tokens"}, "xaxis": {"title": {"text": "Dataset", "standoff": 30}, "tickangle": 30}, "yaxis": {"range": [0.225, 0.7217999982833863]}, "margin": {"b": 100}}}
|
assets/data/plots/edu-100k/mmlu_acc_norm.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"data": {"C4": {"x": ["C4"], "y": [0.
|
|
|
1 |
+
{"data": {"C4": {"x": ["C4"], "y": [0.3162081837654114], "label": "C4"}, "Dolma": {"x": ["Dolma"], "y": [0.3209713697433471], "label": "Dolma"}, "FineWeb": {"x": ["FineWeb"], "y": [0.3296935856342315], "label": "FineWeb"}, "RedPajama2": {"x": ["RedPajama2"], "y": [0.3293801844120025], "label": "RedPajama2"}, "RefinedWeb": {"x": ["RefinedWeb"], "y": [0.3323083519935608], "label": "RefinedWeb"}, "SlimPajama": {"x": ["SlimPajama"], "y": [0.3337143063545227], "label": "SlimPajama"}, "The Pile": {"x": ["The Pile"], "y": [0.3308100700378418], "label": "The Pile"}, "FineWeb-Edu": {"x": ["FineWeb-Edu"], "y": [0.3744533956050873], "label": "FineWeb-Edu"}}, "layout": {"showlegend": false, "title": {"text": "Evaluation results at 350B tokens"}, "xaxis": {"title": {"text": "Dataset", "standoff": 30}, "tickangle": 30}, "yaxis": {"range": [0.225, 0.40434407472610473]}, "margin": {"b": 100}}}
|
assets/data/plots/edu-100k/openbookqa_acc_norm.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"data": {"C4": {"x": ["C4"], "y": [0.
|
|
|
1 |
+
{"data": {"C4": {"x": ["C4"], "y": [0.3720000088214874], "label": "C4"}, "Dolma": {"x": ["Dolma"], "y": [0.3799999952316284], "label": "Dolma"}, "FineWeb": {"x": ["FineWeb"], "y": [0.3959999978542328], "label": "FineWeb"}, "RedPajama2": {"x": ["RedPajama2"], "y": [0.3540000021457672], "label": "RedPajama2"}, "RefinedWeb": {"x": ["RefinedWeb"], "y": [0.356000006198883], "label": "RefinedWeb"}, "SlimPajama": {"x": ["SlimPajama"], "y": [0.3459999859333038], "label": "SlimPajama"}, "The Pile": {"x": ["The Pile"], "y": [0.356000006198883], "label": "The Pile"}, "FineWeb-Edu": {"x": ["FineWeb-Edu"], "y": [0.4180000126361847], "label": "FineWeb-Edu"}}, "layout": {"showlegend": false, "title": {"text": "Evaluation results at 350B tokens"}, "xaxis": {"title": {"text": "Dataset", "standoff": 30}, "tickangle": 30}, "yaxis": {"range": [0.225, 0.45660001516342164]}, "margin": {"b": 100}}}
|
assets/data/plots/edu-100k/piqa_acc_norm.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"data": {"C4": {"x": ["C4"], "y": [0.
|
|
|
1 |
+
{"data": {"C4": {"x": ["C4"], "y": [0.7710000276565552], "label": "C4"}, "Dolma": {"x": ["Dolma"], "y": [0.7689999938011169], "label": "Dolma"}, "FineWeb": {"x": ["FineWeb"], "y": [0.7609999775886536], "label": "FineWeb"}, "RedPajama2": {"x": ["RedPajama2"], "y": [0.7310000061988831], "label": "RedPajama2"}, "RefinedWeb": {"x": ["RefinedWeb"], "y": [0.7730000019073486], "label": "RefinedWeb"}, "SlimPajama": {"x": ["SlimPajama"], "y": [0.7570000290870667], "label": "SlimPajama"}, "The Pile": {"x": ["The Pile"], "y": [0.7200000286102295], "label": "The Pile"}, "FineWeb-Edu": {"x": ["FineWeb-Edu"], "y": [0.7689999938011169], "label": "FineWeb-Edu"}}, "layout": {"showlegend": false, "title": {"text": "Evaluation results at 350B tokens"}, "xaxis": {"title": {"text": "Dataset", "standoff": 30}, "tickangle": 30}, "yaxis": {"range": [0.45, 0.8376000022888184]}, "margin": {"b": 100}}}
|
assets/data/plots/edu-100k/siqa_acc_norm.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"data": {"C4": {"x": ["C4"], "y": [0.4009999930858612], "label": "C4"}, "Dolma": {"x": ["Dolma"], "y": [0.
|
|
|
1 |
+
{"data": {"C4": {"x": ["C4"], "y": [0.4009999930858612], "label": "C4"}, "Dolma": {"x": ["Dolma"], "y": [0.3989999890327453], "label": "Dolma"}, "FineWeb": {"x": ["FineWeb"], "y": [0.414000004529953], "label": "FineWeb"}, "RedPajama2": {"x": ["RedPajama2"], "y": [0.4059999883174896], "label": "RedPajama2"}, "RefinedWeb": {"x": ["RefinedWeb"], "y": [0.4099999964237213], "label": "RefinedWeb"}, "SlimPajama": {"x": ["SlimPajama"], "y": [0.402999997138977], "label": "SlimPajama"}, "The Pile": {"x": ["The Pile"], "y": [0.4020000100135803], "label": "The Pile"}, "FineWeb-Edu": {"x": ["FineWeb-Edu"], "y": [0.4040000140666961], "label": "FineWeb-Edu"}}, "layout": {"showlegend": false, "title": {"text": "Evaluation results at 350B tokens"}, "xaxis": {"title": {"text": "Dataset", "standoff": 30}, "tickangle": 30}, "yaxis": {"range": [0.29700000000000004, 0.4374000054359436]}, "margin": {"b": 100}}}
|
assets/data/plots/edu-100k/winogrande_acc_norm.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"data": {"C4": {"x": ["C4"], "y": [0.
|
|
|
1 |
+
{"data": {"C4": {"x": ["C4"], "y": [0.5609999895095825], "label": "C4"}, "Dolma": {"x": ["Dolma"], "y": [0.5550000071525574], "label": "Dolma"}, "FineWeb": {"x": ["FineWeb"], "y": [0.5640000104904175], "label": "FineWeb"}, "RedPajama2": {"x": ["RedPajama2"], "y": [0.5490000247955322], "label": "RedPajama2"}, "RefinedWeb": {"x": ["RefinedWeb"], "y": [0.5540000200271606], "label": "RefinedWeb"}, "SlimPajama": {"x": ["SlimPajama"], "y": [0.5479999780654907], "label": "SlimPajama"}, "The Pile": {"x": ["The Pile"], "y": [0.5590000152587891], "label": "The Pile"}, "FineWeb-Edu": {"x": ["FineWeb-Edu"], "y": [0.578000009059906], "label": "FineWeb-Edu"}}, "layout": {"showlegend": false, "title": {"text": "Evaluation results at 350B tokens"}, "xaxis": {"title": {"text": "Dataset", "standoff": 30}, "tickangle": 30}, "yaxis": {"range": [0.45, 0.6036000108718872]}, "margin": {"b": 100}}}
|
assets/images/Untitled 1.png
DELETED
Binary file (139 kB)
|
|
assets/images/Untitled 3.png
DELETED
Binary file (551 kB)
|
|
assets/images/Untitled 4.png
DELETED
Binary file (483 kB)
|
|
assets/images/Untitled 5.png
DELETED
Binary file (475 kB)
|
|
assets/images/Untitled 6.png
DELETED
Binary file (116 kB)
|
|
assets/images/Untitled.png
DELETED
Binary file (309 kB)
|
|
assets/images/c4_filters_hellaswag.png
CHANGED
assets/images/clusters.png
CHANGED
assets/images/cross_ind_unfiltered_comparison.png
CHANGED
assets/images/custom_filters.png
CHANGED
assets/images/dataset_ablations.png
CHANGED
assets/images/dedup_all_dumps_bad.png
CHANGED
assets/images/dedup_attempts.png
CHANGED
assets/images/dedup_impact_simulation.png
DELETED
Binary file (123 kB)
|
|
assets/images/duplicates_simul.png
ADDED
assets/images/edu-100k.png
CHANGED
assets/images/edu-8k.png
CHANGED
assets/images/filtering_steps.png
CHANGED
assets/images/minhash_parameters_comparison.png
DELETED
Binary file (35.3 kB)
|
|
assets/images/minhash_params.png
ADDED
assets/images/removed_data_cross_dedup.png
CHANGED
assets/images/score_by_dump.png
DELETED
Binary file (400 kB)
|
|
assets/images/stats.png
ADDED
assets/images/wet_comparison.png
CHANGED
dist/assets/data/clustering/data.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
dist/assets/data/clustering/info.csv
DELETED
@@ -1,106 +0,0 @@
|
|
1 |
-
,cluster_id,cluster_summaries,cluster_position_x,cluster_position_y
|
2 |
-
0,-1,None,9.926462,4.7121987
|
3 |
-
1,0,Philosophical/Spiritual Introspection,10.312462,1.2666532
|
4 |
-
2,1,"Scholarships,",8.167274,4.8995786
|
5 |
-
3,2,Politics,8.81142,2.4859838
|
6 |
-
4,3,Theology,9.615214,0.3783942
|
7 |
-
5,4,Dating,4.985182,1.8439052
|
8 |
-
6,5,Accommodation,11.457769,5.080919
|
9 |
-
7,6,Football,6.6154537,-1.6859366
|
10 |
-
8,7,Film Festival,6.9734483,1.4548192
|
11 |
-
9,8,Culinary,13.426296,4.5412893
|
12 |
-
10,9,Music,6.0653744,0.7536916
|
13 |
-
11,10,Gambling,3.124241,3.2533677
|
14 |
-
12,11,Baseball,7.133596,-2.4256644
|
15 |
-
13,12,Technology,6.4929094,6.768577
|
16 |
-
14,13,Website Policies,4.873843,5.771508
|
17 |
-
15,14,Weddings,11.815845,3.7894728
|
18 |
-
16,15,Gaming,5.529167,2.9530518
|
19 |
-
17,16,Commodities/Services Provision,10.453564,5.8489122
|
20 |
-
18,17,Crafts,13.287651,6.4237967
|
21 |
-
19,18,Automobiles,9.9531145,8.840178
|
22 |
-
20,19,Watches,13.893139,9.859185
|
23 |
-
21,20,Dogs,12.595798,3.5351615
|
24 |
-
22,21,Photography,10.7942295,3.5504062
|
25 |
-
23,22,Legalities,8.942016,4.72733
|
26 |
-
24,23,Consumer Electronics,7.078649,8.338984
|
27 |
-
25,24,Insulation,10.520957,7.914946
|
28 |
-
26,25,Cannabis,14.317424,3.2114828
|
29 |
-
27,26,Footwear,15.052116,7.6956415
|
30 |
-
28,27,Real Estate,9.536316,6.103533
|
31 |
-
29,28,Relocation,10.205071,7.1883316
|
32 |
-
30,29,Sports betting,3.2779586,2.443366
|
33 |
-
31,30,Narratives,7.613535,1.8300554
|
34 |
-
32,31,Dating,4.788838,2.1900373
|
35 |
-
33,32,Apparel/Clothing,14.394226,7.3073387
|
36 |
-
34,33,User Authentication,5.265638,6.4014487
|
37 |
-
35,34,Academicwriting,6.9187264,3.4357684
|
38 |
-
36,35,Sports,7.4969172,-2.086585
|
39 |
-
37,36,Fashion/Lifestyle Products,13.821669,7.7150764
|
40 |
-
38,37,Diverse events,9.437052,2.2438836
|
41 |
-
39,38,Blockchain/Cryptocurrency,7.7586045,6.9439344
|
42 |
-
40,39,Online Businesses/Marketing,6.522259,5.219268
|
43 |
-
41,40,Healthcare,11.425277,2.3801014
|
44 |
-
42,41,Home Decor,12.878046,7.2632184
|
45 |
-
43,42,Biomedicine,12.789575,2.3376262
|
46 |
-
44,43,Jewelry,14.259997,8.653363
|
47 |
-
45,44,Addiction,11.561383,1.3774762
|
48 |
-
46,45,Products,11.711758,8.423251
|
49 |
-
47,46,Multi-purposefulness,11.080702,7.4574013
|
50 |
-
48,47,"Mass transit,",9.910158,5.4402313
|
51 |
-
49,48,Ethernet,6.9763823,7.7909245
|
52 |
-
50,49,Legal,9.516912,4.636553
|
53 |
-
51,50,E-commerce,13.263438,8.6548195
|
54 |
-
52,51,Audio,7.717162,8.903019
|
55 |
-
53,52,Infrastructure,10.52904,5.369669
|
56 |
-
54,53,Firearms,11.062812,9.268473
|
57 |
-
55,54,Freight/Logistics,9.551044,7.0336204
|
58 |
-
56,55,Products,12.073747,7.645973
|
59 |
-
57,56,Vaccinations,11.9387045,2.7824683
|
60 |
-
58,57,Artwork,11.019163,4.1677165
|
61 |
-
59,58,Viticulture,14.223523,5.0761614
|
62 |
-
60,59,WordPress,5.9597983,5.824579
|
63 |
-
61,60,Cosmetics/Dermatology,15.093273,3.4669027
|
64 |
-
62,61,Software,6.375921,6.4298844
|
65 |
-
63,62,Dentistry,14.76626,1.1620314
|
66 |
-
64,63,Pest Control,13.201735,3.6806118
|
67 |
-
65,64,SEO,5.720493,5.238112
|
68 |
-
66,65,Lottery,1.7142816,2.9782674
|
69 |
-
67,66,Narratives,8.460977,1.0804662
|
70 |
-
68,67,Waste Reduction & Recycling,10.634534,6.959523
|
71 |
-
69,68,Communication,6.438943,5.9467845
|
72 |
-
70,69,Orthopedics,13.005415,1.1908791
|
73 |
-
71,70,Home Decor & Furniture,12.732457,7.876862
|
74 |
-
72,71,Education,7.6568975,3.4944353
|
75 |
-
73,72,Sports,7.295141,-0.7343214
|
76 |
-
74,73,Social Media Advertising,6.133886,4.8547883
|
77 |
-
75,74,Privacy,4.756733,6.3598356
|
78 |
-
76,75,Website design,6.1168823,5.465095
|
79 |
-
77,76,Roofing,11.389448,8.080609
|
80 |
-
78,77,Nutrition/Supplements,13.631578,2.5334294
|
81 |
-
79,78,Haircare/Hairstyling,15.544645,4.54254
|
82 |
-
80,79,Cookies,4.341592,6.819268
|
83 |
-
81,80,International Trade,8.993828,6.4757586
|
84 |
-
82,81,Entrepreneurial Resources,9.435777,5.3340797
|
85 |
-
83,82,Cricket,6.5171986,-1.245905
|
86 |
-
84,83,Crafts,13.852216,7.049825
|
87 |
-
85,84,Floristry,13.407425,5.8741536
|
88 |
-
86,85,Genealogy,9.530803,1.6548243
|
89 |
-
87,86,Mental Health,11.074349,1.6069281
|
90 |
-
88,87,Volunteerism,10.145443,3.6734574
|
91 |
-
89,88,Lighting,11.385381,8.93693
|
92 |
-
90,89,Artificial Intelligence,6.5306387,6.2178063
|
93 |
-
91,90,Business,7.471462,6.4142885
|
94 |
-
92,91,E-commerce,13.638669,6.5098934
|
95 |
-
93,92,Urbanization/Over-tourism,10.221115,6.100654
|
96 |
-
94,93,Events,10.8449,3.9822264
|
97 |
-
95,94,Pharmaceuticals/Biotechnology,12.318266,2.4331784
|
98 |
-
96,95,Professional Wrestling,6.856304,-0.65598303
|
99 |
-
97,96,Various,9.3211975,3.4894605
|
100 |
-
98,97,Medicine,13.17882,2.1281319
|
101 |
-
99,98,Community Engagement,9.848856,3.5187004
|
102 |
-
100,99,Fitness,12.504849,0.9134393
|
103 |
-
101,100,Bathroom Design & Toilet Engineering,11.779076,7.2920136
|
104 |
-
102,101,Business Development,7.328447,5.659843
|
105 |
-
103,102,Sports,7.6370654,-1.0701839
|
106 |
-
104,103,Sexuality,13.817207,1.6510898
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dist/assets/data/plots/all_dumps_bad/agg_score.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3534814938902855,0.3764607086777687,0.38782499730587,0.3981050960719585,0.4028486795723438,0.4125883243978023,0.4117814563214779,0.414029736071825,0.4197172522544861,0.4211113378405571,0.4279881417751312,0.4280137903988361,0.4280424378812313,0.4291964024305343,0.4326301179826259,0.4371833503246307,0.4346669465303421,0.4336562640964985,0.4432648755609989,0.4401291646063328,0.4394684173166752,0.4476612061262131,0.4465444348752498,0.4472153298556804,0.4433343075215816,0.4510187618434429,0.4459567815065384,0.4460812956094742,0.4498684890568256,0.4529943652451038,0.4528274349868297,0.4551213420927524,0.4549156539142132,0.4564928151667118,0.4576693661510944,0.4557182416319847,0.4536240361630916,0.457439012825489,0.4570476822555065,0.4589823484420776,0.462024375796318,0.4540738053619861,0.4550252184271812,0.4576593860983848,0.4573238864541054,0.4575810581445694,0.4622134491801262,0.4592566937208175,0.4614734016358852,0.4637473002076149,0.4625372551381588,0.4613912180066108,0.4597448222339153,0.4594792164862156,0.4662549719214439,0.4634026065468788,0.4633508697152138,0.4635734222829342,0.4628961533308029,0.4670135043561458,0.4639505892992019,0.4631133340299129,0.4665167145431041,0.4672448337078094,0.4693268723785877,0.4630668573081493,0.4676454700529575,0.4646359197795391,0.4621579721570015,0.4692446552217006,0.4704835228621959,0.4663223996758461,0.4680556617677212,0.466339822858572,0.4682099223136902,0.4711195565760135,0.4722655527293682,0.4727961830794811,0.4676857478916645,0.4719390422105789,0.4713102728128433,0.4712141714990139,0.4721613004803657,0.4713456854224205,0.4682970903813839,0.4679934531450271,0.4685162976384163,0.4679946713149547,0.4681242071092129,0.4702276065945625,0.472664151340723,0.4730790853500366,0.4731674715876579,0.4718914777040481,0.4719801284372806,0.4761029370129108,0.4735167175531387,0.4730370938777923,0.4730173237621784,0.4735377207398414,0.4777223989367485,0.4796326830983162,0.4734170883893966,0.4739485755562782,0.4748299159109592,0.4765299335122108,0.4745025858283043,0.4754423759877682,0.4784592799842357,0.4761341325938701,0.4760282784700393,0.4769757278263569,0.47154351323843,0.4786738082766533,0.4804279990494251,0.4777076803147793,0.4798569902777672,0.4759011939167976,0.4784621745347976,0.479673832654953,0.4780617095530033,0.48076206818223,0.47995800152421,0.4790860973298549,0.4817167408764362,0.4811586998403072,0.482547752559185,0.4816697351634502,0.4809327870607376,0.4816545359790325,0.4804601892828941,0.4776877984404564,0.4813711903989315,0.4844604581594467,0.4819537848234176,0.4820829331874847,0.4778126627206802,0.482935007661581,0.48230691999197,0.4826001971960068,0.4823969900608063,0.4811219945549965,0.4789146520197391,0.484035175293684,0.4848698377609253,0.4855728335678577,0.4825376532971859,0.485215101391077,0.4824351668357849,0.4835342466831207,0.4822137206792831,0.4838785007596016,0.4837255179882049,0.4853012599050998,0.4857851006090641,0.4863366298377514,0.4856646582484245,0.4842503517866134,0.4838776960968971,0.4846346862614155,0.4837041422724724,0.4813097268342972,0.4873070046305656,0.4841253720223903,0.4837464913725853,0.483069509267807,0.4851242564618587,0.4861010462045669],"label":"RefinedWeb"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3605199865996837,0.3733148723840713,0.3882005847990513,0.3934122696518898,0.3947227671742439,0.4042885974049568,0.3974800482392311,0.4055779427289963,0.4133470430970192,0.4117913842201233,0.4113653488457203,0.4149517640471458,0.4187851920723915,0.4252083078026771,0.4206527359783649,0.4240428246557712,0.422003373503685,0.4280910938978195,0.4244147576391697,0.4316282644867897,0.4295645765960216,0.4310102686285972,0.4360743537545204,0.4313482865691185,0.4350991360843181,0.4378576353192329,0.4335876516997814,0.4347924515604973,0.4348904751241207,0.436600212007761,0.430036511272192,0.4350974671542644,0.4399556629359722,0.4371416717767715,0.4363861419260502,0.4376698136329651,0.4405004419386387,0.4373639523983001,0.4379038028419018,0.4371281825006008,0.4393439553678036,0.440426729619503,0.4401675276458263,0.4429537951946258,0.4449137263000011,0.4434786736965179,0.4450470842421055,0.4454202279448509,0.4394537284970283,0.442185215651989,0.4461225643754005,0.4427758157253265,0.4430646039545536,0.4476901069283485,0.4478763341903686,0.4493869319558143,0.4448477327823639,0.450044184923172,0.4498609118163585,0.4457665979862213,0.4506924152374267,0.449855338782072,0.448790930211544,0.4474099352955818,0.4546772800385952,0.4529431238770485,0.452015146613121,0.4502020999789238,0.4493804536759853,0.4523266032338142,0.4551868587732315,0.4501944817602634,0.4493303671479225,0.4526805207133293,0.4533850513398647,0.4518048763275146,0.4518973492085933,0.4531301632523536,0.4518006071448326,0.4553494565188885,0.4528752230107784,0.4536322727799415,0.4561733976006508,0.4549491256475448,0.4574789106845855,0.4577847123146057,0.4563642293214798,0.4578686729073524,0.4561499990522861,0.4537816494703293,0.4542164430022239,0.4559455662965774,0.4554723873734474,0.4575514122843742,0.4575202167034149,0.4592722058296203,0.4585275091230869,0.4580587856471538,0.456934317946434,0.4577495418488979,0.4540119916200638,0.4570806957781315,0.4608120545744896,0.4588425755500793,0.4578334167599678,0.4610816091299057,0.4598177038133144,0.461849745362997,0.4631866924464702,0.4601576402783394,0.4646804705262184,0.4632389545440674,0.4604574106633663,0.4602976888418197,0.4581312239170074,0.4654182009398937,0.4655338563024997,0.4616620391607284,0.461054053157568,0.4613021649420261,0.4658613465726375,0.4633531905710697,0.4613638147711754,0.4643996246159076,0.462500050663948,0.4650798961520195,0.4648764543235302,0.4639869071543216,0.4634246975183487,0.46585888043046,0.4639799632132053,0.4630857892334461,0.4644265696406364,0.4642998576164245,0.4686848931014538,0.4687492996454239,0.4650243632495403,0.4627032242715359,0.4665953740477562,0.4660026729106903,0.4664581045508384,0.4676475040614605,0.4657339677214622,0.4664678275585174,0.4673498086631298,0.4676674827933311,0.4680955372750759,0.4681585058569908,0.4659864418208599,0.4686457589268684,0.4661462865769863,0.4658931568264961,0.4674226939678192,0.46805215254426,0.4682257212698459,0.4689070098102093,0.4699570722877979,0.4655096270143986,0.4688013233244419,0.4707522802054882,0.4661469310522079,0.4688841328024864,0.4671329781413078,0.4662554152309894,0.4697433896362781,0.4698473587632179,0.4676505327224731,0.4696521013975143],"label":"FineWeb filtered only"},"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3551952373236418,0.3736435137689113,0.3814037963747978,0.3948809280991554,0.3996850810945034,0.4089604057371616,0.4100853353738785,0.4119834117591381,0.4168377220630646,0.4186493046581745,0.4169826358556747,0.4234288297593593,0.4229162000119686,0.4273439794778824,0.4290364980697632,0.4291782416403293,0.4296907968819141,0.4311576783657074,0.4326641112565994,0.430318683385849,0.430436260998249,0.4339037239551544,0.4363459683954716,0.4357402548193931,0.4342963136732578,0.4366712383925915,0.4363959729671478,0.436981026083231,0.4447868093848228,0.4411709941923618,0.4406092017889023,0.4424176625907421,0.4423875361680984,0.4422253370285034,0.4410557933151722,0.4447037056088447,0.4454837813973427,0.4435960277915001,0.4468514993786812,0.4479999616742134,0.4428562931716442,0.445764634758234,0.4456562362611294,0.4488007053732872,0.4475954286754131,0.4468922987580299,0.4548408314585686,0.4511027485132217,0.4530330970883369,0.4483681954443455,0.4531726539134979,0.45334542542696,0.4544384703040123,0.4530758671462536,0.4540613554418087,0.4510113634169101,0.4538320265710354,0.4518541917204857,0.4536847211420536,0.4532708041369915,0.4552236869931221,0.455034039914608,0.4562875479459762,0.4532428197562694,0.4574853852391243,0.4517738744616508,0.4579889141023159,0.4538268558681011,0.456730306148529,0.4526018649339676,0.4562746733427048,0.4560015797615051,0.4555426277220249,0.4561501257121563,0.4524396173655987,0.4557023830711841,0.4589769169688225,0.4581078588962555,0.4620813727378845,0.4586601965129375,0.4568093195557594,0.4569808952510357,0.4567535072565079,0.4575250148773193,0.4606908001005649,0.4603964723646641,0.4622848592698574,0.4594669193029403,0.4640629850327968,0.4604269936680794,0.4634841009974479,0.4644578285515308,0.4642514958977699,0.4666304066777229,0.4616626128554344,0.4588956907391548,0.4620226770639419,0.4628621749579906,0.4595407098531723,0.4635516740381717,0.46005355194211,0.4601523540914058,0.4644204638898372,0.4620639197528362,0.46614545956254,0.4636696502566337,0.4610077403485775,0.4640897810459137,0.4636163525283336,0.4630545899271965,0.466012816876173,0.4650349207222461,0.4613720141351223,0.4644323363900184,0.4647249802947044,0.4656480401754379,0.4651664271950722,0.4622530452907085,0.4655019529163837,0.4650313258171081,0.466718140989542,0.4661559611558914,0.4661237150430679,0.4664223715662956,0.4640601389110088,0.4642657749354839,0.4633881188929081,0.4629989042878151,0.4685831367969513,0.4675870984792709,0.467183344066143,0.4678030684590339,0.4660939238965511,0.4691914953291416,0.4670972637832165,0.468262892216444,0.4672016054391861,0.4676182121038437,0.4698677137494087,0.4658828042447567,0.4701816700398922,0.4684622809290886,0.466015312820673,0.4675401039421558,0.4693200923502445,0.4702670983970165,0.4679145030677318,0.4676233418285846,0.4674933589994907,0.4678357951343059,0.4669915996491909,0.4657857678830623,0.4666901864111423,0.4669371582567692,0.4672787226736545,0.4684535376727581,0.4685697965323925,0.4694835692644119,0.4683254994451999,0.4712230190634727,0.4683987610042095,0.4707653746008873,0.4663059376180172,0.4683133698999882,0.4686385430395603,0.4657671600580215,0.4692615270614624],"label":"FineWeb full MinHash"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/arc_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2899999916553497,0.31700000166893,0.3409999907016754,0.3425000011920929,0.3485000133514404,0.3555000126361847,0.3574999868869781,0.3585000038146972,0.363999992609024,0.3619999885559082,0.3675000071525574,0.3865000009536743,0.3810000121593475,0.3740000128746032,0.3810000121593475,0.3810000121593475,0.3860000073909759,0.3810000121593475,0.3894999921321869,0.3849999904632568,0.3855000138282776,0.3989999890327453,0.3980000019073486,0.3995000123977661,0.395000010728836,0.4084999859333038,0.4040000140666961,0.4004999995231628,0.3955000042915344,0.4135000109672546,0.4070000052452087,0.4104999899864197,0.4014999866485595,0.4099999964237213,0.4199999868869781,0.414000004529953,0.402999997138977,0.4214999973773956,0.4095000028610229,0.4059999883174896,0.4090000092983246,0.4074999988079071,0.4120000004768371,0.4154999852180481,0.4189999997615814,0.4149999916553497,0.429500013589859,0.4154999852180481,0.4214999973773956,0.4244999885559082,0.4205000102519989,0.4269999861717224,0.4214999973773956,0.4180000126361847,0.4415000081062317,0.4320000112056732,0.4350000023841858,0.4259999990463257,0.4300000071525574,0.4259999990463257,0.4189999997615814,0.4269999861717224,0.4199999868869781,0.426499992609024,0.4350000023841858,0.4289999902248382,0.4345000088214874,0.4259999990463257,0.426499992609024,0.4395000040531158,0.4395000040531158,0.4359999895095825,0.4280000030994415,0.4370000064373016,0.4329999983310699,0.4309999942779541,0.4490000009536743,0.4399999976158142,0.4339999854564667,0.4399999976158142,0.4345000088214874,0.429500013589859,0.4370000064373016,0.4379999935626983,0.4284999966621399,0.4309999942779541,0.4350000023841858,0.4399999976158142,0.4314999878406524,0.4300000071525574,0.4410000145435333,0.4345000088214874,0.4410000145435333,0.4345000088214874,0.4339999854564667,0.4460000097751617,0.4410000145435333,0.4469999969005584,0.4480000138282776,0.4435000121593475,0.4375,0.4519999921321869,0.4480000138282776,0.4429999887943268,0.4519999921321869,0.4435000121593475,0.4334999918937683,0.4460000097751617,0.4564999938011169,0.4469999969005584,0.453000009059906,0.4485000073909759,0.4410000145435333,0.4444999992847442,0.4485000073909759,0.457500010728836,0.4469999969005584,0.4535000026226043,0.4535000026226043,0.4485000073909759,0.4490000009536743,0.4505000114440918,0.4595000147819519,0.4544999897480011,0.453000009059906,0.4605000019073486,0.4620000123977661,0.457500010728836,0.453000009059906,0.4550000131130218,0.460999995470047,0.4449999928474426,0.4474999904632568,0.457500010728836,0.4584999978542328,0.4494999945163727,0.4474999904632568,0.4625000059604645,0.4639999866485595,0.4555000066757202,0.4469999969005584,0.4600000083446502,0.453000009059906,0.4629999995231628,0.4589999914169311,0.4614999890327453,0.4555000066757202,0.4560000002384186,0.4580000042915344,0.4584999978542328,0.4560000002384186,0.4605000019073486,0.4595000147819519,0.4639999866485595,0.4614999890327453,0.4564999938011169,0.4634999930858612,0.4625000059604645,0.4614999890327453,0.4679999947547912,0.4584999978542328,0.4595000147819519,0.4505000114440918,0.4544999897480011,0.4595000147819519,0.4620000123977661,0.4670000076293945,0.4555000066757202],"label":"RefinedWeb"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2894999980926513,0.3235000073909759,0.3389999866485595,0.3384999930858612,0.3459999859333038,0.359499990940094,0.3429999947547912,0.3619999885559082,0.3564999997615814,0.3625000119209289,0.363999992609024,0.3680000007152557,0.3680000007152557,0.3785000145435333,0.3684999942779541,0.375,0.3734999895095825,0.3849999904632568,0.3944999873638153,0.3865000009536743,0.395000010728836,0.3935000002384186,0.3980000019073486,0.3910000026226043,0.3885000050067901,0.3914999961853027,0.3815000057220459,0.395000010728836,0.3894999921321869,0.395000010728836,0.3935000002384186,0.4034999907016754,0.4004999995231628,0.3970000147819519,0.3975000083446502,0.3995000123977661,0.3980000019073486,0.4034999907016754,0.3959999978542328,0.3989999890327453,0.402999997138977,0.3880000114440918,0.3980000019073486,0.4040000140666961,0.3989999890327453,0.3970000147819519,0.3925000131130218,0.4120000004768371,0.3935000002384186,0.395000010728836,0.4070000052452087,0.3935000002384186,0.4034999907016754,0.4189999997615814,0.4129999876022339,0.4160000085830688,0.4149999916553497,0.418500006198883,0.4225000143051147,0.4174999892711639,0.4210000038146972,0.4045000076293945,0.4079999923706054,0.4124999940395355,0.4144999980926513,0.4169999957084656,0.4194999933242798,0.4154999852180481,0.4169999957084656,0.4225000143051147,0.4225000143051147,0.4230000078678131,0.4160000085830688,0.4325000047683716,0.4325000047683716,0.4199999868869781,0.4199999868869781,0.4189999997615814,0.4269999861717224,0.4259999990463257,0.4230000078678131,0.4144999980926513,0.4329999983310699,0.4275000095367431,0.4305000007152557,0.4289999902248382,0.4235000014305115,0.4235000014305115,0.4325000047683716,0.4244999885559082,0.4314999878406524,0.4194999933242798,0.4350000023841858,0.4269999861717224,0.4235000014305115,0.4300000071525574,0.4284999966621399,0.4255000054836273,0.4280000030994415,0.4345000088214874,0.4225000143051147,0.4334999918937683,0.4300000071525574,0.4350000023841858,0.429500013589859,0.4325000047683716,0.4384999871253967,0.4345000088214874,0.4354999959468841,0.4359999895095825,0.4354999959468841,0.4424999952316284,0.4424999952316284,0.4320000112056732,0.4280000030994415,0.4390000104904175,0.4480000138282776,0.4415000081062317,0.4384999871253967,0.4390000104904175,0.4494999945163727,0.4449999928474426,0.4384999871253967,0.4424999952316284,0.4359999895095825,0.445499986410141,0.4399999976158142,0.4375,0.4410000145435333,0.4384999871253967,0.4375,0.4329999983310699,0.4370000064373016,0.4354999959468841,0.4440000057220459,0.4384999871253967,0.4384999871253967,0.4390000104904175,0.4424999952316284,0.4379999935626983,0.4345000088214874,0.4354999959468841,0.4440000057220459,0.4395000040531158,0.4465000033378601,0.4404999911785126,0.4505000114440918,0.4480000138282776,0.4449999928474426,0.445499986410141,0.4410000145435333,0.4485000073909759,0.4460000097751617,0.4480000138282776,0.4465000033378601,0.4460000097751617,0.4460000097751617,0.4395000040531158,0.4474999904632568,0.4469999969005584,0.4404999911785126,0.4440000057220459,0.4435000121593475,0.4435000121593475,0.4514999985694885,0.4474999904632568,0.4474999904632568,0.445499986410141],"label":"FineWeb filtered only"},"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2904999852180481,0.3289999961853027,0.3379999995231628,0.3400000035762787,0.3535000085830688,0.3700000047683716,0.3619999885559082,0.3695000112056732,0.3625000119209289,0.3745000064373016,0.3804999887943268,0.3835000097751617,0.3810000121593475,0.3785000145435333,0.3799999952316284,0.3885000050067901,0.3919999897480011,0.3899999856948852,0.3939999938011169,0.4004999995231628,0.3889999985694885,0.4000000059604645,0.3930000066757202,0.4025000035762787,0.398499995470047,0.3939999938011169,0.3989999890327453,0.4020000100135803,0.4079999923706054,0.4129999876022339,0.4014999866485595,0.4129999876022339,0.4079999923706054,0.4115000069141388,0.4070000052452087,0.4095000028610229,0.4199999868869781,0.4165000021457672,0.4239999949932098,0.4129999876022339,0.4034999907016754,0.4050000011920929,0.4135000109672546,0.4189999997615814,0.418500006198883,0.4199999868869781,0.4365000128746032,0.4320000112056732,0.4255000054836273,0.4259999990463257,0.4244999885559082,0.4275000095367431,0.4259999990463257,0.4210000038146972,0.421999990940094,0.4099999964237213,0.4305000007152557,0.4239999949932098,0.4194999933242798,0.4205000102519989,0.4255000054836273,0.414000004529953,0.4210000038146972,0.4180000126361847,0.4429999887943268,0.429500013589859,0.4165000021457672,0.4239999949932098,0.4255000054836273,0.4180000126361847,0.4325000047683716,0.4305000007152557,0.4329999983310699,0.4325000047683716,0.4320000112056732,0.4375,0.4410000145435333,0.4395000040531158,0.4379999935626983,0.4280000030994415,0.4365000128746032,0.4205000102519989,0.426499992609024,0.4280000030994415,0.4354999959468841,0.4314999878406524,0.429500013589859,0.421999990940094,0.4345000088214874,0.429500013589859,0.4354999959468841,0.4314999878406524,0.4404999911785126,0.4384999871253967,0.4359999895095825,0.4345000088214874,0.4320000112056732,0.4345000088214874,0.4375,0.4410000145435333,0.4280000030994415,0.4320000112056732,0.44200000166893,0.4460000097751617,0.4390000104904175,0.4314999878406524,0.4339999854564667,0.4390000104904175,0.4460000097751617,0.4309999942779541,0.4444999992847442,0.44200000166893,0.4404999911785126,0.4395000040531158,0.4370000064373016,0.4519999921321869,0.4429999887943268,0.4395000040531158,0.4415000081062317,0.4384999871253967,0.4494999945163727,0.4469999969005584,0.4375,0.4395000040531158,0.4345000088214874,0.4390000104904175,0.4375,0.4309999942779541,0.4320000112056732,0.4415000081062317,0.4354999959468841,0.445499986410141,0.4404999911785126,0.4429999887943268,0.4395000040531158,0.4354999959468841,0.4429999887943268,0.4410000145435333,0.4494999945163727,0.4429999887943268,0.4460000097751617,0.445499986410141,0.4429999887943268,0.4429999887943268,0.4350000023841858,0.4474999904632568,0.4415000081062317,0.4424999952316284,0.4375,0.4444999992847442,0.4424999952316284,0.4354999959468841,0.445499986410141,0.4379999935626983,0.4449999928474426,0.4365000128746032,0.4474999904632568,0.4440000057220459,0.4465000033378601,0.445499986410141,0.4474999904632568,0.4494999945163727,0.4449999928474426,0.4444999992847442,0.44200000166893,0.4345000088214874,0.4404999911785126],"label":"FineWeb full MinHash"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/commonsense_qa_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2529999911785126,0.2800000011920929,0.2870000004768371,0.3179999887943268,0.3129999935626983,0.3210000097751617,0.3160000145435333,0.3210000097751617,0.31700000166893,0.3330000042915344,0.3389999866485595,0.3289999961853027,0.3429999947547912,0.3370000123977661,0.3379999995231628,0.3459999859333038,0.3490000069141388,0.3470000028610229,0.3600000143051147,0.3569999933242798,0.3449999988079071,0.3650000095367431,0.3499999940395355,0.3540000021457672,0.3569999933242798,0.3619999885559082,0.3619999885559082,0.3580000102519989,0.3740000128746032,0.3709999918937683,0.3720000088214874,0.3759999871253967,0.3720000088214874,0.3659999966621399,0.3790000081062317,0.3610000014305115,0.3650000095367431,0.3650000095367431,0.3720000088214874,0.3729999959468841,0.3790000081062317,0.3680000007152557,0.3659999966621399,0.3680000007152557,0.3619999885559082,0.3619999885559082,0.3729999959468841,0.3720000088214874,0.3650000095367431,0.3759999871253967,0.367000013589859,0.3650000095367431,0.3680000007152557,0.3580000102519989,0.3589999973773956,0.3700000047683716,0.3680000007152557,0.367000013589859,0.3709999918937683,0.3880000114440918,0.3810000121593475,0.375,0.4040000140666961,0.3860000073909759,0.3840000033378601,0.3779999911785126,0.3729999959468841,0.3720000088214874,0.3799999952316284,0.3799999952316284,0.3779999911785126,0.3689999878406524,0.3770000040531158,0.3740000128746032,0.3819999992847442,0.3899999856948852,0.3799999952316284,0.3919999897480011,0.3720000088214874,0.3770000040531158,0.3930000066757202,0.3849999904632568,0.3899999856948852,0.3740000128746032,0.3740000128746032,0.3799999952316284,0.3779999911785126,0.3880000114440918,0.3709999918937683,0.3810000121593475,0.3880000114440918,0.3980000019073486,0.3819999992847442,0.3849999904632568,0.3810000121593475,0.3819999992847442,0.3889999985694885,0.3840000033378601,0.3910000026226043,0.3899999856948852,0.3959999978542328,0.3880000114440918,0.3869999945163727,0.3779999911785126,0.3819999992847442,0.3919999897480011,0.3849999904632568,0.3860000073909759,0.3919999897480011,0.3819999992847442,0.3819999992847442,0.3889999985694885,0.3889999985694885,0.3860000073909759,0.3880000114440918,0.3889999985694885,0.3939999938011169,0.3899999856948852,0.3869999945163727,0.3910000026226043,0.3910000026226043,0.3910000026226043,0.3970000147819519,0.3970000147819519,0.3970000147819519,0.3970000147819519,0.3939999938011169,0.4000000059604645,0.3970000147819519,0.402999997138977,0.3959999978542328,0.3959999978542328,0.4000000059604645,0.4040000140666961,0.4020000100135803,0.3989999890327453,0.3919999897480011,0.3930000066757202,0.3930000066757202,0.3980000019073486,0.4000000059604645,0.395000010728836,0.3899999856948852,0.4059999883174896,0.4020000100135803,0.4020000100135803,0.4059999883174896,0.3970000147819519,0.4110000133514404,0.4050000011920929,0.4000000059604645,0.4090000092983246,0.3989999890327453,0.402999997138977,0.4009999930858612,0.3980000019073486,0.4090000092983246,0.4079999923706054,0.4079999923706054,0.4020000100135803,0.402999997138977,0.402999997138977,0.4059999883174896,0.4040000140666961,0.4059999883174896,0.3989999890327453,0.4070000052452087,0.4059999883174896],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2540000081062317,0.2870000004768371,0.2829999923706054,0.3210000097751617,0.3079999983310699,0.3230000138282776,0.3179999887943268,0.3160000145435333,0.3289999961853027,0.3199999928474426,0.324999988079071,0.3310000002384186,0.3260000050067901,0.335999995470047,0.335999995470047,0.3310000002384186,0.335999995470047,0.3339999914169311,0.3459999859333038,0.3330000042915344,0.3449999988079071,0.3429999947547912,0.3479999899864197,0.3420000076293945,0.3479999899864197,0.3459999859333038,0.3339999914169311,0.3350000083446502,0.3519999980926513,0.3440000116825104,0.3490000069141388,0.3379999995231628,0.3420000076293945,0.3610000014305115,0.3409999907016754,0.356000006198883,0.3630000054836273,0.3519999980926513,0.3510000109672546,0.3619999885559082,0.3569999933242798,0.3479999899864197,0.3529999852180481,0.3569999933242798,0.3529999852180481,0.3519999980926513,0.3549999892711639,0.356000006198883,0.3499999940395355,0.3479999899864197,0.3619999885559082,0.3459999859333038,0.3519999980926513,0.3529999852180481,0.3680000007152557,0.3519999980926513,0.3580000102519989,0.3549999892711639,0.3490000069141388,0.3499999940395355,0.3600000143051147,0.3709999918937683,0.3659999966621399,0.3569999933242798,0.3510000109672546,0.3600000143051147,0.367000013589859,0.3529999852180481,0.363999992609024,0.3630000054836273,0.3619999885559082,0.356000006198883,0.367000013589859,0.3600000143051147,0.3540000021457672,0.3589999973773956,0.3610000014305115,0.356000006198883,0.3680000007152557,0.3519999980926513,0.3549999892711639,0.3479999899864197,0.3549999892711639,0.3519999980926513,0.367000013589859,0.3600000143051147,0.3600000143051147,0.3680000007152557,0.356000006198883,0.3610000014305115,0.3689999878406524,0.367000013589859,0.3689999878406524,0.3720000088214874,0.3680000007152557,0.3569999933242798,0.3650000095367431,0.363999992609024,0.3610000014305115,0.3709999918937683,0.3569999933242798,0.3540000021457672,0.3619999885559082,0.3549999892711639,0.3650000095367431,0.3680000007152557,0.3589999973773956,0.356000006198883,0.3610000014305115,0.3619999885559082,0.3740000128746032,0.3700000047683716,0.3650000095367431,0.3819999992847442,0.3770000040531158,0.3810000121593475,0.3729999959468841,0.3680000007152557,0.3689999878406524,0.3740000128746032,0.3779999911785126,0.3720000088214874,0.3740000128746032,0.367000013589859,0.363999992609024,0.367000013589859,0.3689999878406524,0.3709999918937683,0.3709999918937683,0.375,0.3680000007152557,0.375,0.3630000054836273,0.3720000088214874,0.3819999992847442,0.3729999959468841,0.3689999878406524,0.363999992609024,0.3709999918937683,0.3659999966621399,0.3700000047683716,0.367000013589859,0.3709999918937683,0.3759999871253967,0.3759999871253967,0.3729999959468841,0.3729999959468841,0.3729999959468841,0.3779999911785126,0.375,0.3700000047683716,0.3659999966621399,0.3759999871253967,0.3779999911785126,0.3709999918937683,0.3840000033378601,0.3720000088214874,0.375,0.367000013589859,0.3770000040531158,0.3709999918937683,0.375,0.3709999918937683,0.3740000128746032,0.3740000128746032,0.375,0.3770000040531158],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2599999904632568,0.277999997138977,0.2910000085830688,0.3070000112056732,0.3140000104904175,0.3019999861717224,0.3059999942779541,0.3210000097751617,0.3230000138282776,0.324999988079071,0.3149999976158142,0.3109999895095825,0.3339999914169311,0.3289999961853027,0.3319999873638153,0.3319999873638153,0.3300000131130218,0.3370000123977661,0.3219999969005584,0.3370000123977661,0.328000009059906,0.3339999914169311,0.3420000076293945,0.3400000035762787,0.3440000116825104,0.3510000109672546,0.3409999907016754,0.3449999988079071,0.3339999914169311,0.3540000021457672,0.3339999914169311,0.3470000028610229,0.3470000028610229,0.3440000116825104,0.3589999973773956,0.3569999933242798,0.3630000054836273,0.3549999892711639,0.3589999973773956,0.3449999988079071,0.3549999892711639,0.3449999988079071,0.3389999866485595,0.3499999940395355,0.3610000014305115,0.3619999885559082,0.3600000143051147,0.3519999980926513,0.3479999899864197,0.356000006198883,0.3519999980926513,0.3440000116825104,0.3490000069141388,0.3519999980926513,0.3470000028610229,0.3589999973773956,0.3449999988079071,0.3490000069141388,0.356000006198883,0.3619999885559082,0.3569999933242798,0.3659999966621399,0.3610000014305115,0.3549999892711639,0.3700000047683716,0.363999992609024,0.3600000143051147,0.3580000102519989,0.3549999892711639,0.3619999885559082,0.3689999878406524,0.3630000054836273,0.363999992609024,0.3700000047683716,0.367000013589859,0.3630000054836273,0.3630000054836273,0.3700000047683716,0.3589999973773956,0.3540000021457672,0.3540000021457672,0.3659999966621399,0.3619999885559082,0.3589999973773956,0.3650000095367431,0.3709999918937683,0.3680000007152557,0.3689999878406524,0.3650000095367431,0.3729999959468841,0.3619999885559082,0.3689999878406524,0.3569999933242798,0.3510000109672546,0.3680000007152557,0.363999992609024,0.3700000047683716,0.3659999966621399,0.3659999966621399,0.363999992609024,0.3619999885559082,0.3659999966621399,0.3680000007152557,0.3610000014305115,0.3720000088214874,0.3729999959468841,0.3810000121593475,0.3630000054836273,0.3689999878406524,0.3709999918937683,0.3759999871253967,0.382999986410141,0.3729999959468841,0.3720000088214874,0.3680000007152557,0.3659999966621399,0.3650000095367431,0.363999992609024,0.3589999973773956,0.356000006198883,0.3650000095367431,0.3659999966621399,0.367000013589859,0.3729999959468841,0.3720000088214874,0.375,0.3740000128746032,0.3700000047683716,0.3569999933242798,0.3759999871253967,0.3740000128746032,0.367000013589859,0.3770000040531158,0.3759999871253967,0.3709999918937683,0.3779999911785126,0.3709999918937683,0.3689999878406524,0.3799999952316284,0.3630000054836273,0.375,0.3700000047683716,0.3700000047683716,0.3729999959468841,0.3720000088214874,0.3790000081062317,0.375,0.3729999959468841,0.3770000040531158,0.3799999952316284,0.3779999911785126,0.3720000088214874,0.3799999952316284,0.3759999871253967,0.3799999952316284,0.3790000081062317,0.375,0.3740000128746032,0.3729999959468841,0.3840000033378601,0.3659999966621399,0.3759999871253967,0.3720000088214874,0.3720000088214874,0.3759999871253967,0.375,0.3650000095367431,0.3729999959468841],"label":"FineWeb filtered only"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/hellaswag_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2759999930858612,0.328000009059906,0.3499999940395355,0.3889999985694885,0.3910000026226043,0.402999997138977,0.4210000038146972,0.4280000030994415,0.4359999895095825,0.4469999969005584,0.4440000057220459,0.4600000083446502,0.4690000116825104,0.4600000083446502,0.4679999947547912,0.4729999899864197,0.4760000109672546,0.4839999973773956,0.4939999878406524,0.488999992609024,0.4990000128746032,0.4979999959468841,0.4979999959468841,0.5009999871253967,0.5,0.5090000033378601,0.5070000290870667,0.5180000066757202,0.5199999809265137,0.5109999775886536,0.5130000114440918,0.5249999761581421,0.5149999856948853,0.5299999713897705,0.5339999794960022,0.5189999938011169,0.5289999842643738,0.5249999761581421,0.5320000052452087,0.5460000038146973,0.5419999957084656,0.5260000228881836,0.5289999842643738,0.546999990940094,0.5419999957084656,0.5419999957084656,0.5460000038146973,0.5419999957084656,0.5389999747276306,0.5440000295639038,0.5569999814033508,0.5450000166893005,0.5329999923706055,0.5580000281333923,0.5339999794960022,0.5540000200271606,0.5460000038146973,0.5479999780654907,0.5529999732971191,0.5540000200271606,0.5619999766349792,0.5490000247955322,0.5410000085830688,0.5490000247955322,0.5569999814033508,0.550000011920929,0.5479999780654907,0.5630000233650208,0.546999990940094,0.5559999942779541,0.5600000023841858,0.5509999990463257,0.5569999814033508,0.5569999814033508,0.5580000281333923,0.5619999766349792,0.5580000281333923,0.5669999718666077,0.5569999814033508,0.5709999799728394,0.5529999732971191,0.5649999976158142,0.5659999847412109,0.5659999847412109,0.5690000057220459,0.5600000023841858,0.5580000281333923,0.5540000200271606,0.5640000104904175,0.5680000185966492,0.5709999799728394,0.5649999976158142,0.5680000185966492,0.5730000138282776,0.5640000104904175,0.5799999833106995,0.5699999928474426,0.5669999718666077,0.5680000185966492,0.5770000219345093,0.5709999799728394,0.5759999752044678,0.5690000057220459,0.5789999961853027,0.5740000009536743,0.5709999799728394,0.5789999961853027,0.5709999799728394,0.5770000219345093,0.5770000219345093,0.5730000138282776,0.5809999704360962,0.5720000267028809,0.5849999785423279,0.5820000171661377,0.5799999833106995,0.5830000042915344,0.5759999752044678,0.5730000138282776,0.5799999833106995,0.5830000042915344,0.5860000252723694,0.5789999961853027,0.5789999961853027,0.5860000252723694,0.5979999899864197,0.5920000076293945,0.5820000171661377,0.5870000123977661,0.5889999866485596,0.5839999914169312,0.5849999785423279,0.5899999737739563,0.5920000076293945,0.593999981880188,0.597000002861023,0.5889999866485596,0.5889999866485596,0.5849999785423279,0.5899999737739563,0.5989999771118164,0.5899999737739563,0.5839999914169312,0.5910000205039978,0.5910000205039978,0.5929999947547913,0.5920000076293945,0.5929999947547913,0.5889999866485596,0.5899999737739563,0.593999981880188,0.5910000205039978,0.5960000157356262,0.5920000076293945,0.5889999866485596,0.593999981880188,0.5879999995231628,0.5960000157356262,0.5920000076293945,0.5960000157356262,0.5960000157356262,0.5920000076293945,0.6010000109672546,0.5920000076293945,0.5899999737739563,0.5889999866485596,0.5920000076293945,0.6019999980926514],"label":"RefinedWeb"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2809999883174896,0.3230000138282776,0.3409999907016754,0.3600000143051147,0.3569999933242798,0.3889999985694885,0.395000010728836,0.4199999868869781,0.4180000126361847,0.421999990940094,0.4289999902248382,0.4350000023841858,0.4359999895095825,0.4469999969005584,0.4350000023841858,0.4480000138282776,0.4480000138282776,0.453000009059906,0.4550000131130218,0.4589999914169311,0.4639999866485595,0.4600000083446502,0.460999995470047,0.4589999914169311,0.481000006198883,0.4769999980926513,0.4709999859333038,0.4740000069141388,0.4679999947547912,0.4790000021457672,0.4729999899864197,0.4819999933242798,0.4850000143051147,0.4819999933242798,0.4819999933242798,0.4880000054836273,0.4869999885559082,0.4959999918937683,0.4850000143051147,0.4959999918937683,0.492000013589859,0.503000020980835,0.4930000007152557,0.5099999904632568,0.5040000081062317,0.5009999871253967,0.4970000088214874,0.4979999959468841,0.5059999823570251,0.5070000290870667,0.5040000081062317,0.5059999823570251,0.5049999952316284,0.5080000162124634,0.5049999952316284,0.5019999742507935,0.5120000243186951,0.5170000195503235,0.5170000195503235,0.5090000033378601,0.5239999890327454,0.527999997138977,0.5230000019073486,0.5210000276565552,0.5149999856948853,0.5189999938011169,0.5270000100135803,0.5149999856948853,0.5099999904632568,0.5299999713897705,0.5199999809265137,0.5230000019073486,0.5260000228881836,0.5249999761581421,0.5239999890327454,0.5329999923706055,0.5210000276565552,0.5260000228881836,0.5170000195503235,0.531000018119812,0.5289999842643738,0.531000018119812,0.5270000100135803,0.5299999713897705,0.5370000004768372,0.5379999876022339,0.5419999957084656,0.5329999923706055,0.5360000133514404,0.5299999713897705,0.5360000133514404,0.5270000100135803,0.5450000166893005,0.5410000085830688,0.546999990940094,0.5329999923706055,0.5329999923706055,0.5379999876022339,0.5299999713897705,0.5429999828338623,0.5360000133514404,0.5339999794960022,0.5419999957084656,0.5410000085830688,0.5370000004768372,0.5389999747276306,0.527999997138977,0.5400000214576721,0.5400000214576721,0.531000018119812,0.5440000295639038,0.5460000038146973,0.5479999780654907,0.5460000038146973,0.5410000085830688,0.5509999990463257,0.5479999780654907,0.5410000085830688,0.5389999747276306,0.550000011920929,0.5569999814033508,0.550000011920929,0.5490000247955322,0.5490000247955322,0.5569999814033508,0.5519999861717224,0.5479999780654907,0.5559999942779541,0.5550000071525574,0.5460000038146973,0.5540000200271606,0.5460000038146973,0.5460000038146973,0.5509999990463257,0.5460000038146973,0.5550000071525574,0.5479999780654907,0.5479999780654907,0.5540000200271606,0.5550000071525574,0.5529999732971191,0.5529999732971191,0.5509999990463257,0.5509999990463257,0.5419999957084656,0.546999990940094,0.5509999990463257,0.5559999942779541,0.5490000247955322,0.5509999990463257,0.5529999732971191,0.550000011920929,0.5540000200271606,0.5550000071525574,0.5580000281333923,0.550000011920929,0.5569999814033508,0.5490000247955322,0.5519999861717224,0.5519999861717224,0.5559999942779541,0.5569999814033508,0.5559999942779541,0.5550000071525574,0.5559999942779541,0.5490000247955322,0.5550000071525574,0.5600000023841858],"label":"FineWeb filtered only"},"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.3009999990463257,0.3149999976158142,0.3400000035762787,0.3610000014305115,0.3680000007152557,0.3799999952316284,0.4020000100135803,0.4180000126361847,0.4129999876022339,0.4259999990463257,0.4239999949932098,0.4440000057220459,0.44200000166893,0.4440000057220459,0.4580000042915344,0.4510000050067901,0.4560000002384186,0.4650000035762787,0.4569999873638153,0.460999995470047,0.4659999907016754,0.4679999947547912,0.4779999852180481,0.4740000069141388,0.4600000083446502,0.4860000014305115,0.4790000021457672,0.4880000054836273,0.4930000007152557,0.4860000014305115,0.4850000143051147,0.4900000095367431,0.4850000143051147,0.4900000095367431,0.4959999918937683,0.492000013589859,0.4850000143051147,0.4970000088214874,0.4900000095367431,0.4979999959468841,0.503000020980835,0.5040000081062317,0.4990000128746032,0.4979999959468841,0.5080000162124634,0.5019999742507935,0.4970000088214874,0.4939999878406524,0.5120000243186951,0.5070000290870667,0.503000020980835,0.5070000290870667,0.503000020980835,0.5109999775886536,0.5080000162124634,0.5009999871253967,0.5090000033378601,0.5,0.5149999856948853,0.5109999775886536,0.5099999904632568,0.5130000114440918,0.5080000162124634,0.5080000162124634,0.5109999775886536,0.5099999904632568,0.5239999890327454,0.5180000066757202,0.5130000114440918,0.5120000243186951,0.5180000066757202,0.515999972820282,0.5260000228881836,0.5199999809265137,0.5239999890327454,0.5220000147819519,0.527999997138977,0.5249999761581421,0.5270000100135803,0.5249999761581421,0.5189999938011169,0.5230000019073486,0.5249999761581421,0.5199999809265137,0.5230000019073486,0.5299999713897705,0.5350000262260437,0.5339999794960022,0.5329999923706055,0.5249999761581421,0.5299999713897705,0.5360000133514404,0.5329999923706055,0.5410000085830688,0.5249999761581421,0.5289999842643738,0.5360000133514404,0.5360000133514404,0.5370000004768372,0.5389999747276306,0.5289999842643738,0.5299999713897705,0.5410000085830688,0.5329999923706055,0.5419999957084656,0.5410000085830688,0.527999997138977,0.5370000004768372,0.5429999828338623,0.5419999957084656,0.5389999747276306,0.5320000052452087,0.5350000262260437,0.5419999957084656,0.5410000085830688,0.5339999794960022,0.5440000295639038,0.5329999923706055,0.5429999828338623,0.5460000038146973,0.5400000214576721,0.5429999828338623,0.5479999780654907,0.550000011920929,0.5490000247955322,0.5410000085830688,0.5450000166893005,0.5429999828338623,0.550000011920929,0.5529999732971191,0.5490000247955322,0.5450000166893005,0.5450000166893005,0.5519999861717224,0.5569999814033508,0.5460000038146973,0.546999990940094,0.5509999990463257,0.5509999990463257,0.5450000166893005,0.5440000295639038,0.5440000295639038,0.546999990940094,0.5479999780654907,0.546999990940094,0.5460000038146973,0.546999990940094,0.5479999780654907,0.5460000038146973,0.5460000038146973,0.5440000295639038,0.5410000085830688,0.5440000295639038,0.5389999747276306,0.5410000085830688,0.546999990940094,0.546999990940094,0.5479999780654907,0.546999990940094,0.550000011920929,0.546999990940094,0.5460000038146973,0.546999990940094,0.5479999780654907,0.5479999780654907,0.5519999861717224,0.550000011920929],"label":"FineWeb full MinHash"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/index.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"files":{"agg_score":{"file":"agg_score.json"},"commonsense_qa/acc_norm":{"file":"commonsense_qa_acc_norm.json"},"hellaswag/acc_norm":{"file":"hellaswag_acc_norm.json"},"openbookqa/acc_norm":{"file":"openbookqa_acc_norm.json"},"piqa/acc_norm":{"file":"piqa_acc_norm.json"},"siqa/acc_norm":{"file":"siqa_acc_norm.json"},"winogrande/acc_norm":{"file":"winogrande_acc_norm.json"},"arc/acc_norm":{"file":"arc_acc_norm.json"},"mmlu/acc_norm":{"file":"mmlu_acc_norm.json"}},"settings":{"defaultMetric":"agg_score","slider":{"min":0,"max":30,"default":5}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/mmlu_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2528519630432129,0.2616856694221496,0.2665999829769134,0.2683407664299011,0.2742894291877746,0.2762066125869751,0.2807516455650329,0.2767378389835357,0.2807380557060241,0.2788906991481781,0.2844051718711853,0.2856102883815765,0.2883394360542297,0.2875711619853973,0.2890409529209137,0.2894668281078338,0.2883355319499969,0.2872501015663147,0.291619062423706,0.2900333702564239,0.2962473034858703,0.2962896525859833,0.297355443239212,0.2932226359844208,0.2886744439601898,0.29665008187294,0.2976542115211487,0.2991503179073334,0.3004479110240936,0.3044549524784088,0.2976194322109222,0.3014707863330841,0.3048252463340759,0.3039425611495971,0.303354948759079,0.3027459383010864,0.2999922931194305,0.3050121665000915,0.2998814284801483,0.2978588044643402,0.3041949570178985,0.3010904192924499,0.3022017180919647,0.2997751235961914,0.3015910983085632,0.3096485137939453,0.3012076020240783,0.3065535724163055,0.3042872548103332,0.3104783594608307,0.2997980415821075,0.3051296770572662,0.303458571434021,0.3088337182998657,0.3145398199558258,0.3032208085060119,0.310806930065155,0.3075874149799347,0.3101692199707031,0.310107946395874,0.3066047430038452,0.3109066784381866,0.3081336915493011,0.3084586262702942,0.3086149394512176,0.3085348606109619,0.3136637806892395,0.3110873103141784,0.31076380610466,0.3084572553634643,0.3133681714534759,0.3125792145729065,0.3124453127384186,0.3097185790538788,0.3106793165206909,0.3089564740657806,0.3111244142055511,0.3123694658279419,0.3144859969615936,0.3135123550891876,0.311982125043869,0.3142133951187134,0.3122903704643249,0.3147654831409454,0.3078767359256744,0.314947634935379,0.3171303570270538,0.3129573762416839,0.3154936134815216,0.3158208429813385,0.3153132200241089,0.3141326904296875,0.3163397014141083,0.3166318237781524,0.3168410360813141,0.3198235332965851,0.3201336860656738,0.3212967813014984,0.3191385567188263,0.3178017139434814,0.3192791938781738,0.323061466217041,0.320336639881134,0.3165886104106903,0.3206393420696258,0.3167395293712616,0.3135207295417785,0.315539002418518,0.3191742599010467,0.321073055267334,0.3222262561321258,0.3193058371543884,0.3213480710983276,0.3198905289173126,0.3219239711761474,0.3211614489555359,0.318855881690979,0.3177095353603363,0.324197381734848,0.3208906352519989,0.3264936804771423,0.3245965242385864,0.3231639564037323,0.3221887946128845,0.3277338445186615,0.3227696120738983,0.3263820111751556,0.3258577883243561,0.3264622390270233,0.3222362995147705,0.3286814987659454,0.3235024213790893,0.32446950674057,0.3311836123466491,0.328130304813385,0.3271634578704834,0.3250012993812561,0.3309800624847412,0.3274554014205932,0.3273015916347503,0.3261759579181671,0.32697594165802,0.3303172886371612,0.3282814025878906,0.3289586305618286,0.3260826468467712,0.3258011937141418,0.3297208249568939,0.3254813551902771,0.3287739753723144,0.3287097811698913,0.3275279700756073,0.3293041586875915,0.3314100801944732,0.3287808299064636,0.3251930773258209,0.3288172781467438,0.3265027701854706,0.3275215625762939,0.3290774822235107,0.3261331617832184,0.3299777805805206,0.331955999135971,0.3305029273033142,0.3274719417095184,0.3235560953617096,0.3269940316677093,0.3323083519935608],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2510619163513183,0.2621481418609619,0.2632303833961487,0.2720474302768707,0.2719806432723999,0.2726832032203674,0.2786827087402344,0.2823672890663147,0.276201844215393,0.2816944718360901,0.280361145734787,0.2819306254386902,0.2823295891284942,0.2892518043518066,0.2872919738292694,0.2859259247779846,0.2885263860225677,0.2862614393234253,0.2933129370212555,0.2930494546890259,0.2884900867938995,0.2942298054695129,0.2927677929401397,0.2954220175743103,0.2918704748153686,0.2943699061870575,0.2891678512096405,0.291848212480545,0.2942944765090942,0.2973679602146148,0.2953736186027527,0.2963412702083587,0.297100305557251,0.2963026762008667,0.2944463491439819,0.2971296310424804,0.293870210647583,0.2982682287693023,0.2978119254112243,0.2989997565746307,0.2993503510951996,0.298117071390152,0.2977498769760132,0.3004056811332702,0.3012634217739105,0.3001384139060974,0.3052266240119934,0.3038219809532165,0.3037647306919098,0.3009455502033233,0.3038812279701233,0.303263396024704,0.3025077581405639,0.3056069612503052,0.3024908602237701,0.3050909340381622,0.3001562356948852,0.303833544254303,0.3019777834415436,0.3036664128303528,0.3022894859313965,0.3042722940444946,0.3023003339767456,0.3069425821304321,0.307883083820343,0.3026910126209259,0.3054113090038299,0.3046148121356964,0.305342435836792,0.3048149049282074,0.3066973984241485,0.3055126965045929,0.3063409924507141,0.307701051235199,0.3075169324874878,0.3091190159320831,0.3098153173923492,0.31436288356781,0.3096509575843811,0.3022815883159637,0.3119745552539825,0.3083471357822418,0.3085280954837799,0.3082001209259033,0.3080264329910278,0.3116717934608459,0.3097788393497467,0.3117353916168213,0.3170038759708404,0.3099159002304077,0.3133728504180908,0.3161626160144806,0.3095119595527649,0.3135432302951813,0.3103009164333343,0.3126655519008636,0.3121814131736755,0.3123973608016968,0.3148256838321686,0.3144133985042572,0.3124284744262695,0.3102188408374786,0.3123636841773987,0.3115113973617553,0.3151636719703674,0.3148572146892547,0.315061867237091,0.3127182424068451,0.3139308094978332,0.3134367167949676,0.3136025071144104,0.3172793388366699,0.3134761154651642,0.3109587132930755,0.3127998411655426,0.3161843717098236,0.3163313865661621,0.3145243525505066,0.3155156075954437,0.3127505779266357,0.3182451128959656,0.3162476718425751,0.3124897480010986,0.3128789663314819,0.3119811117649078,0.314126193523407,0.3136049509048462,0.3149912655353546,0.3146650791168213,0.3151968121528625,0.3179666996002197,0.3169245719909668,0.3202513754367828,0.3185319602489471,0.3202781081199646,0.3186031281948089,0.3166128396987915,0.3199457228183746,0.3194417059421539,0.3170624077320099,0.3184532523155212,0.3191981911659241,0.3191225528717041,0.3173209130764007,0.3195607960224151,0.3166368305683136,0.3188160359859466,0.3174867630004883,0.3184468746185303,0.3211863338947296,0.3184327483177185,0.3177861273288727,0.3180214762687683,0.3194973170757293,0.3212297558784485,0.3211282789707184,0.3200584352016449,0.3168685734272003,0.3211040198802948,0.3222841620445251,0.3196901082992553,0.3236229419708252,0.3204475045204162,0.3210069537162781,0.3191083669662475,0.31863734126091,0.3195922076702118],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2516599297523498,0.2610189318656921,0.2666046619415283,0.2667981088161468,0.2667821645736694,0.2708088159561157,0.2738403379917145,0.2726235687732696,0.2762763500213623,0.2768311202526092,0.2809228301048279,0.2836140990257263,0.2822815179824829,0.2831664383411407,0.2797218561172485,0.286342591047287,0.2855269610881805,0.2847287058830261,0.2888180613517761,0.286526083946228,0.2865165770053863,0.294582188129425,0.2925947606563568,0.2947863042354584,0.2892930805683136,0.2903610467910766,0.288201242685318,0.2873396277427673,0.2916238009929657,0.2908017039299011,0.2907920777797699,0.2952797412872314,0.2941452264785766,0.2921333611011505,0.2925891280174255,0.2968584895133972,0.2980035543441772,0.2964116632938385,0.2962304651737213,0.2950254380702972,0.2977516651153564,0.2944138348102569,0.3003402054309845,0.2976303696632385,0.3013098239898681,0.302829384803772,0.3018766045570373,0.305361807346344,0.2971298694610595,0.3014816343784332,0.3019805550575256,0.3037064969539642,0.2970167994499206,0.2995208501815796,0.2970106601715088,0.2990955114364624,0.3027818500995636,0.3048534691333771,0.2993872463703155,0.2986327707767486,0.3015393316745758,0.3003426790237427,0.3003274798393249,0.3017795085906982,0.3019182682037353,0.3015450537204742,0.3046211004257202,0.3031167984008789,0.3020436763763428,0.3011128306388855,0.3029948472976684,0.3045558631420135,0.301642894744873,0.3029441833496094,0.3035804331302643,0.3004390001296997,0.3021787703037262,0.306041270494461,0.3064048886299133,0.3087956011295318,0.3070018291473388,0.3065581619739532,0.3093871772289276,0.3060930073261261,0.3033313155174255,0.3072777390480041,0.306413859128952,0.3104493916034698,0.3056999444961548,0.3077532052993774,0.309231549501419,0.3070645034313202,0.3117790520191192,0.3114112913608551,0.312661737203598,0.3181777000427246,0.3117201030254364,0.3099702894687652,0.3074746131896972,0.3064963519573211,0.3105958700180053,0.3111456036567688,0.3084964454174042,0.3087405860424042,0.3121673166751861,0.3121528625488281,0.3100416660308838,0.3142979145050049,0.3129935264587402,0.3112611472606659,0.3119436800479889,0.3154115974903106,0.3091593086719513,0.3103814721107483,0.3130497634410858,0.3133455514907837,0.3152708411216736,0.3137963414192199,0.3099324703216553,0.3164172768592834,0.3133907914161682,0.3128255009651184,0.3134104907512665,0.3106969892978668,0.3130004107952118,0.3131391704082489,0.3130116462707519,0.3143952488899231,0.3143975436687469,0.3143710494041443,0.3163396418094635,0.3166862726211548,0.3184126019477844,0.3178988993167877,0.317479133605957,0.3184944093227386,0.316694974899292,0.3176258206367492,0.3182629346847534,0.3200214207172394,0.3181648552417755,0.320680022239685,0.3178716897964477,0.3182425796985626,0.3182984292507171,0.3158398568630218,0.3152642548084259,0.3132680356502533,0.3178914785385132,0.3156660795211792,0.3161703050136566,0.3176451921463012,0.3173815906047821,0.3194171786308288,0.3193057179450989,0.3172560334205627,0.317656546831131,0.3155770003795624,0.3199106156826019,0.3170182108879089,0.3156754970550537,0.3180731236934662,0.3205638229846954,0.3175432682037353,0.3184471428394317,0.3192788958549499,0.3197042346000671,0.3177168369293213],"label":"FineWeb filtered only"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/openbookqa_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2560000121593475,0.2840000092983246,0.3059999942779541,0.3059999942779541,0.2980000078678131,0.3240000009536743,0.3100000023841858,0.3000000119209289,0.3160000145435333,0.3140000104904175,0.3260000050067901,0.3199999928474426,0.2980000078678131,0.3179999887943268,0.3179999887943268,0.3319999873638153,0.3019999861717224,0.2939999997615814,0.3319999873638153,0.3319999873638153,0.3219999969005584,0.3379999995231628,0.3379999995231628,0.3339999914169311,0.3240000009536743,0.3479999899864197,0.3300000131130218,0.3240000009536743,0.3300000131130218,0.3400000035762787,0.3459999859333038,0.3319999873638153,0.3379999995231628,0.356000006198883,0.3339999914169311,0.3459999859333038,0.3440000116825104,0.3519999980926513,0.3479999899864197,0.3339999914169311,0.3400000035762787,0.3479999899864197,0.3379999995231628,0.3479999899864197,0.3499999940395355,0.3400000035762787,0.3499999940395355,0.3420000076293945,0.3659999966621399,0.3400000035762787,0.3459999859333038,0.3499999940395355,0.356000006198883,0.3400000035762787,0.356000006198883,0.3339999914169311,0.3339999914169311,0.3479999899864197,0.3420000076293945,0.3580000102519989,0.3339999914169311,0.3440000116825104,0.3400000035762787,0.3499999940395355,0.3540000021457672,0.3479999899864197,0.3499999940395355,0.3420000076293945,0.3379999995231628,0.335999995470047,0.356000006198883,0.3459999859333038,0.3499999940395355,0.3400000035762787,0.3440000116825104,0.356000006198883,0.3519999980926513,0.3400000035762787,0.3440000116825104,0.356000006198883,0.3400000035762787,0.356000006198883,0.3600000143051147,0.3540000021457672,0.3479999899864197,0.3379999995231628,0.3440000116825104,0.3300000131130218,0.3400000035762787,0.3459999859333038,0.3339999914169311,0.3499999940395355,0.3600000143051147,0.3440000116825104,0.3499999940395355,0.356000006198883,0.3420000076293945,0.3479999899864197,0.3379999995231628,0.3379999995231628,0.3459999859333038,0.356000006198883,0.328000009059906,0.3459999859333038,0.3519999980926513,0.3499999940395355,0.3519999980926513,0.3420000076293945,0.3499999940395355,0.3420000076293945,0.3339999914169311,0.335999995470047,0.3379999995231628,0.3379999995231628,0.3540000021457672,0.356000006198883,0.356000006198883,0.335999995470047,0.363999992609024,0.363999992609024,0.3499999940395355,0.356000006198883,0.3519999980926513,0.3519999980926513,0.3540000021457672,0.3459999859333038,0.3479999899864197,0.3519999980926513,0.3519999980926513,0.3420000076293945,0.3440000116825104,0.3379999995231628,0.3519999980926513,0.356000006198883,0.3420000076293945,0.3580000102519989,0.3499999940395355,0.3619999885559082,0.3519999980926513,0.3600000143051147,0.3459999859333038,0.3519999980926513,0.3519999980926513,0.3499999940395355,0.3580000102519989,0.356000006198883,0.3580000102519989,0.3600000143051147,0.3440000116825104,0.3600000143051147,0.3440000116825104,0.3479999899864197,0.3479999899864197,0.3580000102519989,0.3600000143051147,0.3580000102519989,0.3540000021457672,0.3519999980926513,0.3459999859333038,0.3459999859333038,0.3540000021457672,0.335999995470047,0.3540000021457672,0.3540000021457672,0.3519999980926513,0.356000006198883,0.3499999940395355,0.356000006198883],"label":"RefinedWeb"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2560000121593475,0.2720000147819519,0.2980000078678131,0.2840000092983246,0.2879999876022339,0.3039999902248382,0.2860000133514404,0.2899999916553497,0.3019999861717224,0.2960000038146972,0.3039999902248382,0.3100000023841858,0.3160000145435333,0.3260000050067901,0.3160000145435333,0.3260000050067901,0.3179999887943268,0.3420000076293945,0.3219999969005584,0.328000009059906,0.3240000009536743,0.3300000131130218,0.328000009059906,0.3199999928474426,0.3379999995231628,0.3400000035762787,0.3240000009536743,0.3120000064373016,0.3319999873638153,0.3260000050067901,0.3120000064373016,0.3160000145435333,0.3140000104904175,0.3179999887943268,0.3160000145435333,0.3199999928474426,0.3240000009536743,0.3260000050067901,0.3179999887943268,0.3300000131130218,0.3179999887943268,0.328000009059906,0.3240000009536743,0.328000009059906,0.3260000050067901,0.3199999928474426,0.3400000035762787,0.3339999914169311,0.328000009059906,0.328000009059906,0.3339999914169311,0.328000009059906,0.328000009059906,0.335999995470047,0.3580000102519989,0.3499999940395355,0.3260000050067901,0.3499999940395355,0.3420000076293945,0.3160000145435333,0.3339999914169311,0.335999995470047,0.3400000035762787,0.3240000009536743,0.3319999873638153,0.3379999995231628,0.3400000035762787,0.3379999995231628,0.3319999873638153,0.3319999873638153,0.3440000116825104,0.3300000131130218,0.3219999969005584,0.3260000050067901,0.3219999969005584,0.3339999914169311,0.328000009059906,0.3300000131130218,0.3219999969005584,0.3379999995231628,0.3400000035762787,0.3319999873638153,0.328000009059906,0.3440000116825104,0.3339999914169311,0.328000009059906,0.3379999995231628,0.3499999940395355,0.3339999914169311,0.3300000131130218,0.328000009059906,0.335999995470047,0.3240000009536743,0.335999995470047,0.3240000009536743,0.3400000035762787,0.3400000035762787,0.3420000076293945,0.3319999873638153,0.3339999914169311,0.3300000131130218,0.3400000035762787,0.3459999859333038,0.3400000035762787,0.3379999995231628,0.3459999859333038,0.3379999995231628,0.3300000131130218,0.3519999980926513,0.3379999995231628,0.356000006198883,0.335999995470047,0.3420000076293945,0.3400000035762787,0.328000009059906,0.3540000021457672,0.3499999940395355,0.3479999899864197,0.3440000116825104,0.3519999980926513,0.356000006198883,0.3540000021457672,0.3440000116825104,0.3499999940395355,0.356000006198883,0.356000006198883,0.356000006198883,0.363999992609024,0.3600000143051147,0.356000006198883,0.3479999899864197,0.356000006198883,0.3459999859333038,0.3479999899864197,0.3619999885559082,0.363999992609024,0.3499999940395355,0.3379999995231628,0.3479999899864197,0.3499999940395355,0.356000006198883,0.3519999980926513,0.3540000021457672,0.3619999885559082,0.3580000102519989,0.3540000021457672,0.356000006198883,0.3479999899864197,0.3519999980926513,0.356000006198883,0.3499999940395355,0.3379999995231628,0.3479999899864197,0.3499999940395355,0.3440000116825104,0.3580000102519989,0.356000006198883,0.3499999940395355,0.3479999899864197,0.3580000102519989,0.3519999980926513,0.3540000021457672,0.3519999980926513,0.3540000021457672,0.356000006198883,0.363999992609024,0.356000006198883,0.356000006198883],"label":"FineWeb filtered only"},"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2860000133514404,0.2460000067949295,0.2720000147819519,0.270000010728836,0.2939999997615814,0.2960000038146972,0.3240000009536743,0.3019999861717224,0.2879999876022339,0.3179999887943268,0.3059999942779541,0.2899999916553497,0.3100000023841858,0.3179999887943268,0.3219999969005584,0.3219999969005584,0.3300000131130218,0.3140000104904175,0.3240000009536743,0.3079999983310699,0.3260000050067901,0.3120000064373016,0.3160000145435333,0.3179999887943268,0.3260000050067901,0.3260000050067901,0.3240000009536743,0.3379999995231628,0.3219999969005584,0.3319999873638153,0.3379999995231628,0.3339999914169311,0.328000009059906,0.3319999873638153,0.3199999928474426,0.3000000119209289,0.3260000050067901,0.3240000009536743,0.328000009059906,0.3240000009536743,0.328000009059906,0.3260000050067901,0.3440000116825104,0.3199999928474426,0.3319999873638153,0.3219999969005584,0.335999995470047,0.3519999980926513,0.3379999995231628,0.328000009059906,0.3300000131130218,0.335999995470047,0.3479999899864197,0.3459999859333038,0.3479999899864197,0.3540000021457672,0.3479999899864197,0.3300000131130218,0.356000006198883,0.3479999899864197,0.356000006198883,0.335999995470047,0.335999995470047,0.3479999899864197,0.3339999914169311,0.3540000021457672,0.3300000131130218,0.3479999899864197,0.3499999940395355,0.3400000035762787,0.3459999859333038,0.3339999914169311,0.3479999899864197,0.335999995470047,0.3400000035762787,0.3179999887943268,0.335999995470047,0.328000009059906,0.328000009059906,0.3540000021457672,0.3479999899864197,0.3420000076293945,0.3580000102519989,0.3459999859333038,0.3420000076293945,0.3459999859333038,0.3440000116825104,0.3499999940395355,0.335999995470047,0.3540000021457672,0.356000006198883,0.3400000035762787,0.3600000143051147,0.3580000102519989,0.3519999980926513,0.3499999940395355,0.3540000021457672,0.3519999980926513,0.3499999940395355,0.3440000116825104,0.356000006198883,0.3479999899864197,0.3479999899864197,0.3440000116825104,0.3499999940395355,0.3440000116825104,0.3519999980926513,0.3440000116825104,0.356000006198883,0.3459999859333038,0.3580000102519989,0.356000006198883,0.3519999980926513,0.3420000076293945,0.3379999995231628,0.3479999899864197,0.3459999859333038,0.3499999940395355,0.3400000035762787,0.3440000116825104,0.3420000076293945,0.3420000076293945,0.3499999940395355,0.3459999859333038,0.3420000076293945,0.3459999859333038,0.3459999859333038,0.3479999899864197,0.3440000116825104,0.3720000088214874,0.3619999885559082,0.356000006198883,0.3519999980926513,0.3459999859333038,0.3440000116825104,0.3420000076293945,0.3580000102519989,0.3600000143051147,0.3519999980926513,0.3600000143051147,0.3440000116825104,0.3600000143051147,0.3619999885559082,0.3499999940395355,0.3499999940395355,0.363999992609024,0.3580000102519989,0.3499999940395355,0.3479999899864197,0.3479999899864197,0.3580000102519989,0.3540000021457672,0.3600000143051147,0.3420000076293945,0.3519999980926513,0.3440000116825104,0.3519999980926513,0.3540000021457672,0.356000006198883,0.3459999859333038,0.3499999940395355,0.3519999980926513,0.3580000102519989,0.3440000116825104,0.3499999940395355,0.3580000102519989,0.3479999899864197,0.3479999899864197],"label":"FineWeb full MinHash"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/piqa_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6019999980926514,0.652999997138977,0.6710000038146973,0.6740000247955322,0.6899999976158142,0.6919999718666077,0.6909999847412109,0.7070000171661377,0.7089999914169312,0.7129999995231628,0.7229999899864197,0.7120000123977661,0.7200000286102295,0.7300000190734863,0.7279999852180481,0.7369999885559082,0.7390000224113464,0.7350000143051147,0.7319999933242798,0.7279999852180481,0.7269999980926514,0.7459999918937683,0.7400000095367432,0.7390000224113464,0.7319999933242798,0.7390000224113464,0.7379999756813049,0.7390000224113464,0.7360000014305115,0.7440000176429749,0.7400000095367432,0.7360000014305115,0.7480000257492065,0.7360000014305115,0.7440000176429749,0.7459999918937683,0.7409999966621399,0.746999979019165,0.7440000176429749,0.7450000047683716,0.753000020980835,0.7390000224113464,0.7490000128746033,0.7419999837875366,0.7390000224113464,0.7559999823570251,0.7519999742507935,0.7549999952316284,0.7419999837875366,0.7490000128746033,0.7540000081062317,0.7480000257492065,0.7450000047683716,0.7429999709129333,0.7509999871253967,0.7549999952316284,0.7490000128746033,0.7490000128746033,0.7400000095367432,0.753000020980835,0.75,0.7509999871253967,0.7570000290870667,0.7590000033378601,0.7570000290870667,0.7329999804496765,0.7540000081062317,0.746999979019165,0.7409999966621399,0.7590000033378601,0.7509999871253967,0.7570000290870667,0.75,0.7540000081062317,0.7480000257492065,0.7580000162124634,0.7639999985694885,0.7630000114440918,0.7590000033378601,0.7549999952316284,0.7480000257492065,0.7509999871253967,0.7570000290870667,0.75,0.7540000081062317,0.7480000257492065,0.7549999952316284,0.7559999823570251,0.7580000162124634,0.7580000162124634,0.753000020980835,0.7490000128746033,0.7540000081062317,0.7639999985694885,0.7580000162124634,0.7519999742507935,0.7590000033378601,0.75,0.7570000290870667,0.7620000243186951,0.7710000276565552,0.7739999890327454,0.7620000243186951,0.7549999952316284,0.7599999904632568,0.765999972820282,0.7680000066757202,0.7639999985694885,0.7540000081062317,0.7649999856948853,0.7649999856948853,0.7609999775886536,0.7549999952316284,0.765999972820282,0.7639999985694885,0.7580000162124634,0.7710000276565552,0.7570000290870667,0.7630000114440918,0.7580000162124634,0.7599999904632568,0.7649999856948853,0.7670000195503235,0.7699999809265137,0.7710000276565552,0.7559999823570251,0.7609999775886536,0.7620000243186951,0.7620000243186951,0.7609999775886536,0.753000020980835,0.7570000290870667,0.7620000243186951,0.7609999775886536,0.7609999775886536,0.7559999823570251,0.7540000081062317,0.7570000290870667,0.7639999985694885,0.7590000033378601,0.7680000066757202,0.7680000066757202,0.765999972820282,0.765999972820282,0.7670000195503235,0.7739999890327454,0.7649999856948853,0.7749999761581421,0.7699999809265137,0.7639999985694885,0.7680000066757202,0.7630000114440918,0.7680000066757202,0.7699999809265137,0.7739999890327454,0.7749999761581421,0.765999972820282,0.7680000066757202,0.7710000276565552,0.7680000066757202,0.765999972820282,0.7689999938011169,0.7760000228881836,0.7710000276565552,0.7680000066757202,0.7649999856948853,0.7720000147819519,0.7730000019073486],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.6169999837875366,0.6359999775886536,0.6769999861717224,0.6769999861717224,0.6970000267028809,0.6990000009536743,0.6970000267028809,0.6959999799728394,0.7049999833106995,0.7089999914169312,0.7179999947547913,0.7099999785423279,0.7160000205039978,0.7260000109672546,0.7229999899864197,0.7179999947547913,0.7210000157356262,0.7200000286102295,0.734000027179718,0.7089999914169312,0.7229999899864197,0.7239999771118164,0.7310000061988831,0.7300000190734863,0.7260000109672546,0.7250000238418579,0.7239999771118164,0.7289999723434448,0.7390000224113464,0.7229999899864197,0.7310000061988831,0.7350000143051147,0.7289999723434448,0.734000027179718,0.7289999723434448,0.7329999804496765,0.7300000190734863,0.7319999933242798,0.7440000176429749,0.746999979019165,0.7310000061988831,0.7329999804496765,0.7480000257492065,0.7429999709129333,0.7369999885559082,0.7269999980926514,0.7269999980926514,0.7379999756813049,0.75,0.7360000014305115,0.746999979019165,0.7409999966621399,0.7369999885559082,0.7459999918937683,0.7400000095367432,0.7409999966621399,0.746999979019165,0.7360000014305115,0.7459999918937683,0.7400000095367432,0.7429999709129333,0.7350000143051147,0.7390000224113464,0.7379999756813049,0.7480000257492065,0.7329999804496765,0.734000027179718,0.7390000224113464,0.7459999918937683,0.7360000014305115,0.7419999837875366,0.7429999709129333,0.7400000095367432,0.7379999756813049,0.7310000061988831,0.7360000014305115,0.7390000224113464,0.75,0.7369999885559082,0.7570000290870667,0.7409999966621399,0.7459999918937683,0.7350000143051147,0.7459999918937683,0.7509999871253967,0.7429999709129333,0.7419999837875366,0.7419999837875366,0.75,0.7440000176429749,0.7450000047683716,0.75,0.7409999966621399,0.7490000128746033,0.7409999966621399,0.7419999837875366,0.7429999709129333,0.7490000128746033,0.7419999837875366,0.7419999837875366,0.75,0.753000020980835,0.75,0.746999979019165,0.7519999742507935,0.746999979019165,0.7570000290870667,0.7549999952316284,0.75,0.7540000081062317,0.7480000257492065,0.7490000128746033,0.7419999837875366,0.7419999837875366,0.746999979019165,0.746999979019165,0.75,0.7519999742507935,0.7580000162124634,0.7549999952316284,0.7490000128746033,0.7480000257492065,0.7519999742507935,0.7590000033378601,0.7450000047683716,0.75,0.7440000176429749,0.7419999837875366,0.7519999742507935,0.7450000047683716,0.753000020980835,0.7450000047683716,0.7440000176429749,0.7559999823570251,0.7509999871253967,0.7540000081062317,0.7440000176429749,0.7509999871253967,0.753000020980835,0.7490000128746033,0.7570000290870667,0.7490000128746033,0.746999979019165,0.746999979019165,0.7509999871253967,0.7509999871253967,0.7519999742507935,0.7570000290870667,0.7540000081062317,0.7440000176429749,0.7480000257492065,0.7509999871253967,0.7509999871253967,0.7509999871253967,0.7549999952316284,0.75,0.7559999823570251,0.746999979019165,0.7609999775886536,0.7549999952316284,0.746999979019165,0.7490000128746033,0.753000020980835,0.753000020980835,0.7609999775886536,0.746999979019165,0.7580000162124634],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.5099999904632568,0.621999979019165,0.6439999938011169,0.6700000166893005,0.6790000200271606,0.6869999766349792,0.6959999799728394,0.6790000200271606,0.6880000233650208,0.7049999833106995,0.699999988079071,0.6990000009536743,0.6940000057220459,0.7110000252723694,0.7089999914169312,0.7120000123977661,0.7070000171661377,0.7070000171661377,0.6990000009536743,0.7009999752044678,0.7160000205039978,0.7200000286102295,0.7149999737739563,0.7250000238418579,0.7210000157356262,0.722000002861023,0.7310000061988831,0.7289999723434448,0.7319999933242798,0.7250000238418579,0.722000002861023,0.7210000157356262,0.7170000076293945,0.7260000109672546,0.7250000238418579,0.7210000157356262,0.7200000286102295,0.7379999756813049,0.7239999771118164,0.7239999771118164,0.7080000042915344,0.7289999723434448,0.7289999723434448,0.7300000190734863,0.7329999804496765,0.7319999933242798,0.7350000143051147,0.7390000224113464,0.7350000143051147,0.7289999723434448,0.734000027179718,0.7329999804496765,0.7400000095367432,0.7409999966621399,0.7310000061988831,0.7350000143051147,0.7360000014305115,0.7360000014305115,0.7409999966621399,0.7319999933242798,0.7409999966621399,0.7400000095367432,0.7390000224113464,0.7329999804496765,0.7459999918937683,0.753000020980835,0.746999979019165,0.734000027179718,0.7369999885559082,0.7419999837875366,0.734000027179718,0.7419999837875366,0.7289999723434448,0.7350000143051147,0.7300000190734863,0.7519999742507935,0.7390000224113464,0.7400000095367432,0.7409999966621399,0.7429999709129333,0.7450000047683716,0.7329999804496765,0.7260000109672546,0.7570000290870667,0.7360000014305115,0.7519999742507935,0.7419999837875366,0.7379999756813049,0.7390000224113464,0.7490000128746033,0.734000027179718,0.7360000014305115,0.7390000224113464,0.7440000176429749,0.7450000047683716,0.7319999933242798,0.7429999709129333,0.7519999742507935,0.7540000081062317,0.7519999742507935,0.753000020980835,0.7480000257492065,0.7440000176429749,0.7459999918937683,0.7369999885559082,0.7419999837875366,0.7480000257492065,0.7419999837875366,0.765999972820282,0.746999979019165,0.7459999918937683,0.7570000290870667,0.7390000224113464,0.7409999966621399,0.7459999918937683,0.75,0.7570000290870667,0.753000020980835,0.7549999952316284,0.7519999742507935,0.7490000128746033,0.746999979019165,0.7459999918937683,0.7459999918937683,0.746999979019165,0.7409999966621399,0.7419999837875366,0.7459999918937683,0.7440000176429749,0.7459999918937683,0.7490000128746033,0.7450000047683716,0.7409999966621399,0.7419999837875366,0.7490000128746033,0.7590000033378601,0.7549999952316284,0.7549999952316284,0.746999979019165,0.753000020980835,0.7549999952316284,0.746999979019165,0.7580000162124634,0.7490000128746033,0.753000020980835,0.75,0.75,0.7540000081062317,0.7540000081062317,0.7490000128746033,0.7570000290870667,0.7570000290870667,0.7590000033378601,0.7559999823570251,0.7620000243186951,0.7590000033378601,0.7509999871253967,0.7639999985694885,0.7580000162124634,0.7599999904632568,0.7620000243186951,0.7590000033378601,0.7609999775886536,0.7559999823570251,0.75,0.7509999871253967,0.7549999952316284,0.7540000081062317,0.7540000081062317],"label":"FineWeb filtered only"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/siqa_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3619999885559082,0.3980000019073486,0.3899999856948852,0.3860000073909759,0.3919999897480011,0.402999997138977,0.3959999978542328,0.3959999978542328,0.4070000052452087,0.4009999930858612,0.4079999923706054,0.4009999930858612,0.3910000026226043,0.3980000019073486,0.395000010728836,0.4129999876022339,0.4020000100135803,0.4090000092983246,0.4120000004768371,0.4129999876022339,0.4129999876022339,0.4099999964237213,0.4110000133514404,0.4110000133514404,0.4090000092983246,0.4000000059604645,0.4050000011920929,0.3939999938011169,0.3889999985694885,0.4050000011920929,0.4099999964237213,0.3980000019073486,0.4090000092983246,0.4079999923706054,0.4070000052452087,0.4040000140666961,0.4129999876022339,0.4090000092983246,0.4059999883174896,0.4090000092983246,0.4090000092983246,0.4149999916553497,0.4059999883174896,0.4000000059604645,0.4000000059604645,0.4070000052452087,0.402999997138977,0.4040000140666961,0.3989999890327453,0.4020000100135803,0.4160000085830688,0.4050000011920929,0.4110000133514404,0.4059999883174896,0.3989999890327453,0.4169999957084656,0.4040000140666961,0.4050000011920929,0.4149999916553497,0.4020000100135803,0.402999997138977,0.4129999876022339,0.4009999930858612,0.4059999883174896,0.4040000140666961,0.4099999964237213,0.414000004529953,0.4210000038146972,0.4110000133514404,0.4070000052452087,0.4099999964237213,0.4169999957084656,0.4070000052452087,0.4199999868869781,0.4079999923706054,0.4180000126361847,0.4110000133514404,0.4110000133514404,0.4189999997615814,0.414000004529953,0.4129999876022339,0.4180000126361847,0.4070000052452087,0.4059999883174896,0.4059999883174896,0.4129999876022339,0.4149999916553497,0.4099999964237213,0.4009999930858612,0.4020000100135803,0.4099999964237213,0.4169999957084656,0.4129999876022339,0.414000004529953,0.4099999964237213,0.4189999997615814,0.4210000038146972,0.4090000092983246,0.4079999923706054,0.4099999964237213,0.4099999964237213,0.4129999876022339,0.4099999964237213,0.4099999964237213,0.4110000133514404,0.4020000100135803,0.4079999923706054,0.4079999923706054,0.414000004529953,0.4129999876022339,0.4189999997615814,0.4129999876022339,0.4180000126361847,0.4050000011920929,0.4230000078678131,0.4180000126361847,0.4120000004768371,0.4149999916553497,0.4189999997615814,0.4110000133514404,0.4160000085830688,0.4059999883174896,0.4110000133514404,0.4110000133514404,0.4110000133514404,0.4040000140666961,0.4149999916553497,0.414000004529953,0.4160000085830688,0.414000004529953,0.4129999876022339,0.4120000004768371,0.4149999916553497,0.4169999957084656,0.4110000133514404,0.414000004529953,0.4160000085830688,0.4110000133514404,0.4120000004768371,0.4110000133514404,0.4149999916553497,0.4129999876022339,0.4110000133514404,0.4129999876022339,0.4099999964237213,0.4180000126361847,0.414000004529953,0.4040000140666961,0.4099999964237213,0.4099999964237213,0.4120000004768371,0.4149999916553497,0.4129999876022339,0.4079999923706054,0.4040000140666961,0.4129999876022339,0.4149999916553497,0.4120000004768371,0.402999997138977,0.4090000092983246,0.4110000133514404,0.4090000092983246,0.4070000052452087,0.4149999916553497,0.4070000052452087,0.4120000004768371,0.4059999883174896,0.4059999883174896,0.4099999964237213],"label":"RefinedWeb"},"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3619999885559082,0.395000010728836,0.3919999897480011,0.3819999992847442,0.3840000033378601,0.3869999945163727,0.395000010728836,0.3959999978542328,0.4020000100135803,0.4009999930858612,0.4079999923706054,0.402999997138977,0.4000000059604645,0.3930000066757202,0.4050000011920929,0.4040000140666961,0.3959999978542328,0.4009999930858612,0.4059999883174896,0.3989999890327453,0.3970000147819519,0.4070000052452087,0.4079999923706054,0.4000000059604645,0.3959999978542328,0.3970000147819519,0.4009999930858612,0.3980000019073486,0.3959999978542328,0.3970000147819519,0.4000000059604645,0.3910000026226043,0.4110000133514404,0.4040000140666961,0.3919999897480011,0.4160000085830688,0.4120000004768371,0.4070000052452087,0.4000000059604645,0.4040000140666961,0.4120000004768371,0.3939999938011169,0.4020000100135803,0.4000000059604645,0.4090000092983246,0.4059999883174896,0.3980000019073486,0.4210000038146972,0.402999997138977,0.4149999916553497,0.4009999930858612,0.414000004529953,0.4129999876022339,0.4199999868869781,0.4090000092983246,0.3989999890327453,0.4040000140666961,0.402999997138977,0.402999997138977,0.4059999883174896,0.4050000011920929,0.4160000085830688,0.4169999957084656,0.4079999923706054,0.402999997138977,0.4020000100135803,0.3959999978542328,0.4169999957084656,0.3970000147819519,0.4099999964237213,0.402999997138977,0.4059999883174896,0.402999997138977,0.3939999938011169,0.3939999938011169,0.4020000100135803,0.3970000147819519,0.4120000004768371,0.4040000140666961,0.4040000140666961,0.4090000092983246,0.3980000019073486,0.4079999923706054,0.4070000052452087,0.4099999964237213,0.3989999890327453,0.4000000059604645,0.4070000052452087,0.3980000019073486,0.402999997138977,0.4090000092983246,0.4040000140666961,0.3889999985694885,0.4000000059604645,0.402999997138977,0.4050000011920929,0.395000010728836,0.4009999930858612,0.3989999890327453,0.3970000147819519,0.4009999930858612,0.3989999890327453,0.3970000147819519,0.4099999964237213,0.3989999890327453,0.4070000052452087,0.4009999930858612,0.3880000114440918,0.3959999978542328,0.3910000026226043,0.3930000066757202,0.3980000019073486,0.402999997138977,0.4009999930858612,0.4000000059604645,0.3919999897480011,0.3980000019073486,0.395000010728836,0.4020000100135803,0.3989999890327453,0.4020000100135803,0.4040000140666961,0.4070000052452087,0.4090000092983246,0.4079999923706054,0.4099999964237213,0.4040000140666961,0.3889999985694885,0.3989999890327453,0.4020000100135803,0.3989999890327453,0.3970000147819519,0.4009999930858612,0.4090000092983246,0.414000004529953,0.395000010728836,0.4009999930858612,0.4020000100135803,0.4009999930858612,0.3980000019073486,0.402999997138977,0.3980000019073486,0.402999997138977,0.395000010728836,0.4020000100135803,0.395000010728836,0.3989999890327453,0.3970000147819519,0.3980000019073486,0.3980000019073486,0.3970000147819519,0.3939999938011169,0.395000010728836,0.3989999890327453,0.3970000147819519,0.4020000100135803,0.3930000066757202,0.3989999890327453,0.4050000011920929,0.3930000066757202,0.4040000140666961,0.4000000059604645,0.4020000100135803,0.3880000114440918,0.395000010728836,0.3910000026226043,0.3980000019073486,0.4009999930858612],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3619999885559082,0.4000000059604645,0.395000010728836,0.3959999978542328,0.4020000100135803,0.4000000059604645,0.3959999978542328,0.3930000066757202,0.3899999856948852,0.402999997138977,0.4009999930858612,0.3930000066757202,0.4050000011920929,0.3939999938011169,0.4110000133514404,0.4000000059604645,0.3989999890327453,0.3959999978542328,0.4020000100135803,0.4000000059604645,0.3939999938011169,0.395000010728836,0.3919999897480011,0.3980000019073486,0.3910000026226043,0.3880000114440918,0.3959999978542328,0.3980000019073486,0.3989999890327453,0.402999997138977,0.3959999978542328,0.3980000019073486,0.395000010728836,0.4090000092983246,0.4090000092983246,0.3889999985694885,0.3959999978542328,0.3880000114440918,0.3840000033378601,0.3959999978542328,0.3880000114440918,0.3939999938011169,0.3970000147819519,0.3910000026226043,0.3939999938011169,0.4020000100135803,0.3980000019073486,0.3970000147819519,0.4009999930858612,0.3919999897480011,0.3899999856948852,0.3989999890327453,0.3860000073909759,0.3860000073909759,0.3970000147819519,0.3959999978542328,0.3939999938011169,0.3840000033378601,0.3869999945163727,0.402999997138977,0.4050000011920929,0.395000010728836,0.3880000114440918,0.3869999945163727,0.3939999938011169,0.402999997138977,0.3899999856948852,0.3910000026226043,0.3910000026226043,0.4009999930858612,0.3919999897480011,0.3970000147819519,0.3919999897480011,0.3930000066757202,0.3869999945163727,0.3880000114440918,0.3849999904632568,0.3930000066757202,0.395000010728836,0.3889999985694885,0.3959999978542328,0.3989999890327453,0.402999997138977,0.3939999938011169,0.4000000059604645,0.4000000059604645,0.4050000011920929,0.3989999890327453,0.3869999945163727,0.3910000026226043,0.3889999985694885,0.3889999985694885,0.4000000059604645,0.3910000026226043,0.3970000147819519,0.3989999890327453,0.3989999890327453,0.3959999978542328,0.3910000026226043,0.3880000114440918,0.3939999938011169,0.382999986410141,0.3849999904632568,0.3959999978542328,0.3989999890327453,0.3959999978542328,0.3880000114440918,0.3840000033378601,0.3980000019073486,0.4000000059604645,0.4000000059604645,0.4020000100135803,0.395000010728836,0.3910000026226043,0.3919999897480011,0.4040000140666961,0.3989999890327453,0.4020000100135803,0.3910000026226043,0.4009999930858612,0.3959999978542328,0.3939999938011169,0.3930000066757202,0.3910000026226043,0.3970000147819519,0.3880000114440918,0.3970000147819519,0.3959999978542328,0.3889999985694885,0.3970000147819519,0.4009999930858612,0.3970000147819519,0.3959999978542328,0.3959999978542328,0.3989999890327453,0.4040000140666961,0.3959999978542328,0.3980000019073486,0.3970000147819519,0.3970000147819519,0.3989999890327453,0.4020000100135803,0.3980000019073486,0.4000000059604645,0.4000000059604645,0.402999997138977,0.4090000092983246,0.3970000147819519,0.4020000100135803,0.3970000147819519,0.4009999930858612,0.3959999978542328,0.3970000147819519,0.3989999890327453,0.3939999938011169,0.3989999890327453,0.4000000059604645,0.4000000059604645,0.3989999890327453,0.4050000011920929,0.4059999883174896,0.4009999930858612,0.3989999890327453,0.3959999978542328,0.3939999938011169,0.3970000147819519,0.4009999930858612,0.3989999890327453,0.3939999938011169],"label":"FineWeb filtered only"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_dumps_bad/winogrande_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-fineweb-cross-dedup-fixed":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.4869999885559082,0.4959999918937683,0.4979999959468841,0.5099999904632568,0.515999972820282,0.5080000162124634,0.5249999761581421,0.5239999890327454,0.5299999713897705,0.5239999890327454,0.5149999856948853,0.5270000100135803,0.5249999761581421,0.5180000066757202,0.5220000147819519,0.5329999923706055,0.5289999842643738,0.5239999890327454,0.5299999713897705,0.5230000019073486,0.5130000114440918,0.5180000066757202,0.5299999713897705,0.5199999809265137,0.5270000100135803,0.5230000019073486,0.5299999713897705,0.5320000052452087,0.5429999828338623,0.527999997138977,0.5379999876022339,0.527999997138977,0.5419999957084656,0.5329999923706055,0.5450000166893005,0.5320000052452087,0.5410000085830688,0.5249999761581421,0.5400000214576721,0.5249999761581421,0.5289999842643738,0.5320000052452087,0.5339999794960022,0.5320000052452087,0.5350000262260437,0.5400000214576721,0.5450000166893005,0.5440000295639038,0.5400000214576721,0.5379999876022339,0.5350000262260437,0.5410000085830688,0.5490000247955322,0.531000018119812,0.5389999747276306,0.546999990940094,0.5529999732971191,0.5370000004768372,0.5440000295639038,0.5400000214576721,0.5490000247955322,0.550000011920929,0.5580000281333923,0.5609999895095825,0.5429999828338623,0.5529999732971191,0.5519999861717224,0.5450000166893005,0.550000011920929,0.5379999876022339,0.5490000247955322,0.5460000038146973,0.5419999957084656,0.5569999814033508,0.5509999990463257,0.5490000247955322,0.5529999732971191,0.5479999780654907,0.5590000152587891,0.5479999780654907,0.5509999990463257,0.5440000295639038,0.5509999990463257,0.5540000200271606,0.5559999942779541,0.5630000233650208,0.5649999976158142,0.5640000104904175,0.5649999976158142,0.5490000247955322,0.5709999799728394,0.5659999847412109,0.5630000233650208,0.5640000104904175,0.5580000281333923,0.546999990940094,0.5550000071525574,0.5580000281333923,0.5429999828338623,0.5440000295639038,0.5569999814033508,0.5569999814033508,0.5540000200271606,0.5550000071525574,0.5649999976158142,0.5540000200271606,0.5630000233650208,0.5609999895095825,0.5580000281333923,0.5509999990463257,0.5550000071525574,0.5550000071525574,0.5519999861717224,0.5609999895095825,0.5630000233650208,0.5509999990463257,0.550000011920929,0.5490000247955322,0.5540000200271606,0.550000011920929,0.5529999732971191,0.5460000038146973,0.550000011920929,0.5529999732971191,0.5519999861717224,0.5529999732971191,0.5609999895095825,0.5590000152587891,0.5550000071525574,0.550000011920929,0.5609999895095825,0.5619999766349792,0.5609999895095825,0.5540000200271606,0.550000011920929,0.5600000023841858,0.5559999942779541,0.5609999895095825,0.5569999814033508,0.5600000023841858,0.5680000185966492,0.5580000281333923,0.5559999942779541,0.5569999814033508,0.5669999718666077,0.5709999799728394,0.5640000104904175,0.5569999814033508,0.5600000023841858,0.5569999814033508,0.5649999976158142,0.5600000023841858,0.5580000281333923,0.5609999895095825,0.5590000152587891,0.5640000104904175,0.5529999732971191,0.5640000104904175,0.5649999976158142,0.5659999847412109,0.5630000233650208,0.5630000233650208,0.5619999766349792,0.5609999895095825,0.5559999942779541,0.5529999732971191,0.5600000023841858],"label":"FineWeb full MinHash"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.5239999890327454,0.4900000095367431,0.5040000081062317,0.5099999904632568,0.4990000128746032,0.5170000195503235,0.5040000081062317,0.5009999871253967,0.5230000019073486,0.5109999775886536,0.5059999823570251,0.5130000114440918,0.5090000033378601,0.5180000066757202,0.5220000147819519,0.5189999938011169,0.5180000066757202,0.5220000147819519,0.5120000243186951,0.5460000038146973,0.5239999890327454,0.5289999842643738,0.5440000295639038,0.5339999794960022,0.5299999713897705,0.5260000228881836,0.5360000133514404,0.5339999794960022,0.5360000133514404,0.5299999713897705,0.5180000066757202,0.5249999761581421,0.5440000295639038,0.5299999713897705,0.5339999794960022,0.5239999890327454,0.527999997138977,0.5139999985694885,0.5289999842643738,0.5360000133514404,0.5260000228881836,0.5389999747276306,0.5460000038146973,0.5270000100135803,0.5339999794960022,0.5320000052452087,0.5329999923706055,0.5260000228881836,0.5220000147819519,0.5260000228881836,0.5379999876022339,0.5410000085830688,0.5350000262260437,0.5389999747276306,0.5320000052452087,0.5389999747276306,0.5379999876022339,0.5329999923706055,0.5270000100135803,0.5170000195503235,0.5329999923706055,0.5370000004768372,0.5379999876022339,0.5249999761581421,0.5479999780654907,0.546999990940094,0.5400000214576721,0.5440000295639038,0.5360000133514404,0.5450000166893005,0.5440000295639038,0.5370000004768372,0.5370000004768372,0.5479999780654907,0.5379999876022339,0.5400000214576721,0.5479999780654907,0.5379999876022339,0.5509999990463257,0.5440000295639038,0.5379999876022339,0.550000011920929,0.5389999747276306,0.5370000004768372,0.5379999876022339,0.5419999957084656,0.5360000133514404,0.5509999990463257,0.5360000133514404,0.5419999957084656,0.5419999957084656,0.550000011920929,0.5360000133514404,0.5519999861717224,0.5540000200271606,0.546999990940094,0.5370000004768372,0.5379999876022339,0.5519999861717224,0.5329999923706055,0.5400000214576721,0.5429999828338623,0.550000011920929,0.5490000247955322,0.5360000133514404,0.550000011920929,0.5569999814033508,0.5490000247955322,0.5490000247955322,0.5479999780654907,0.5350000262260437,0.5490000247955322,0.5370000004768372,0.5440000295639038,0.5329999923706055,0.5440000295639038,0.5429999828338623,0.5389999747276306,0.5450000166893005,0.5320000052452087,0.5450000166893005,0.5400000214576721,0.5419999957084656,0.5460000038146973,0.5370000004768372,0.5400000214576721,0.5460000038146973,0.5370000004768372,0.5370000004768372,0.5460000038146973,0.5400000214576721,0.5490000247955322,0.5529999732971191,0.5379999876022339,0.5460000038146973,0.5450000166893005,0.5429999828338623,0.5460000038146973,0.5400000214576721,0.5479999780654907,0.5460000038146973,0.5540000200271606,0.5400000214576721,0.5350000262260437,0.5490000247955322,0.5460000038146973,0.5460000038146973,0.5509999990463257,0.5410000085830688,0.5429999828338623,0.5379999876022339,0.5450000166893005,0.5389999747276306,0.5400000214576721,0.5400000214576721,0.550000011920929,0.5440000295639038,0.5389999747276306,0.5450000166893005,0.5400000214576721,0.5389999747276306,0.5419999957084656,0.5410000085830688,0.5440000295639038,0.5519999861717224,0.5479999780654907,0.5450000166893005,0.5569999814033508],"label":"FineWeb filtered only"},"big-run-refinedweb":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.4970000088214874,0.5,0.4979999959468841,0.4950000047683716,0.4950000047683716,0.5049999952316284,0.5329999923706055,0.5220000147819519,0.5139999985694885,0.5339999794960022,0.5130000114440918,0.5389999747276306,0.5400000214576721,0.5270000100135803,0.5320000052452087,0.5260000228881836,0.5370000004768372,0.527999997138977,0.5289999842643738,0.5339999794960022,0.5270000100135803,0.531000018119812,0.527999997138977,0.5400000214576721,0.5479999780654907,0.550000011920929,0.5400000214576721,0.5350000262260437,0.5410000085830688,0.5379999876022339,0.5299999713897705,0.5490000247955322,0.5509999990463257,0.5519999861717224,0.5429999828338623,0.5429999828338623,0.5440000295639038,0.5379999876022339,0.5379999876022339,0.5419999957084656,0.5609999895095825,0.5540000200271606,0.5370000004768372,0.5440000295639038,0.5410000085830688,0.5379999876022339,0.5329999923706055,0.5419999957084656,0.5419999957084656,0.5519999861717224,0.550000011920929,0.5509999990463257,0.5400000214576721,0.5450000166893005,0.5509999990463257,0.5569999814033508,0.5550000071525574,0.5590000152587891,0.5479999780654907,0.5550000071525574,0.5440000295639038,0.5460000038146973,0.546999990940094,0.5559999942779541,0.5550000071525574,0.5490000247955322,0.5440000295639038,0.546999990940094,0.5450000166893005,0.546999990940094,0.5649999976158142,0.5490000247955322,0.5519999861717224,0.550000011920929,0.5509999990463257,0.5519999861717224,0.5519999861717224,0.5529999732971191,0.5490000247955322,0.546999990940094,0.550000011920929,0.5720000267028809,0.5619999766349792,0.5490000247955322,0.5680000185966492,0.5519999861717224,0.5569999814033508,0.5509999990463257,0.5619999766349792,0.5630000233650208,0.5529999732971191,0.5619999766349792,0.5609999895095825,0.550000011920929,0.5479999780654907,0.5529999732971191,0.5519999861717224,0.5580000281333923,0.5590000152587891,0.5529999732971191,0.550000011920929,0.5680000185966492,0.5580000281333923,0.5630000233650208,0.5630000233650208,0.5559999942779541,0.5649999976158142,0.5569999814033508,0.5649999976158142,0.5659999847412109,0.5559999942779541,0.5659999847412109,0.5630000233650208,0.5509999990463257,0.5669999718666077,0.5669999718666077,0.5479999780654907,0.5540000200271606,0.5580000281333923,0.5519999861717224,0.5590000152587891,0.5590000152587891,0.5619999766349792,0.5509999990463257,0.546999990940094,0.5609999895095825,0.5540000200271606,0.5630000233650208,0.5580000281333923,0.5559999942779541,0.5680000185966492,0.5649999976158142,0.5619999766349792,0.5580000281333923,0.5630000233650208,0.5559999942779541,0.5540000200271606,0.5540000200271606,0.5569999814033508,0.5619999766349792,0.5559999942779541,0.5600000023841858,0.5460000038146973,0.5429999828338623,0.5580000281333923,0.5550000071525574,0.5580000281333923,0.5540000200271606,0.5609999895095825,0.5519999861717224,0.550000011920929,0.5519999861717224,0.5590000152587891,0.5619999766349792,0.5600000023841858,0.5590000152587891,0.5690000057220459,0.5640000104904175,0.5580000281333923,0.5559999942779541,0.5569999814033508,0.5569999814033508,0.5540000200271606,0.5640000104904175,0.5600000023841858,0.5550000071525574,0.5640000104904175,0.5600000023841858,0.5540000200271606],"label":"RefinedWeb"}},"layout":{"title":{"text":"Dedup across all dumps does not improve performance"}}}
|
|
|
|
dist/assets/data/plots/all_filtering_steps/agg_score.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-fineweb-v1-all-dumps":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3552836012095213,0.3781493119895458,0.3866849727928638,0.4050675220787525,0.4032807648181915,0.4174600429832935,0.4206059761345386,0.427497424185276,0.4316632784903049,0.4385909177362919,0.4334069043397903,0.4360812865197658,0.4404293224215507,0.4385774843394756,0.4407080821692943,0.4467254020273685,0.4470436163246631,0.4486658610403538,0.4459679573774338,0.4454015754163265,0.4515932314097881,0.4482216536998749,0.4484201297163963,0.455057855695486,0.4526158757507801,0.453176885843277,0.450159091502428,0.4516039006412029,0.4549933448433876,0.4555377587676048,0.4575010798871517,0.4577344059944153,0.4540543705224991,0.4537974074482918,0.4611785635352134,0.4586966186761856,0.4594406597316265,0.4598931074142456,0.457538403570652,0.4591932781040668,0.4636382386088371,0.4582749158143997,0.4625946804881096,0.4633439630270004,0.4666871763765812,0.4649887941777706,0.4671247974038124,0.4665776938199997,0.4672530107200145,0.4666078947484493,0.4666155055165291,0.4727727174758911,0.467480719089508,0.4681386984884739,0.4651658721268177,0.4668439887464046,0.4671731516718864,0.4719251021742821,0.4699816256761551,0.4723306186497211,0.4686817973852157,0.468911949545145,0.4714248068630695,0.4724191203713417,0.4700912088155746,0.4685601107776165,0.4716645181179046,0.4724556542932987,0.4670086726546287,0.4703365340828895,0.4698334187269211,0.471625205129385,0.4688323326408863,0.4735309742391109,0.4729253277182579,0.4747676998376846,0.4723741039633751,0.4764323942363262,0.4737579710781574,0.4758132360875606,0.4755662642419338,0.4730159305036068,0.4787128046154976,0.4740134924650192,0.4785312972962856,0.4783577285706997,0.4752367511391639,0.474204134196043,0.4737414345145225,0.4780189953744411,0.477523285895586,0.4751617163419723,0.4776186011731624,0.4769949465990066,0.4790891669690609,0.479917362332344,0.4771673306822777,0.4825278185307979,0.4811677671968937,0.4787211790680885,0.4817796200513839,0.4819813556969166,0.4802381917834282,0.4810985140502453,0.481117732822895,0.4791575670242309,0.4798801243305206,0.4829155020415783,0.4822122864425182,0.4827562272548675,0.4839778505265713,0.4820474348962307,0.4858015961945057,0.4826803356409073,0.4831027314066887,0.4827458150684833,0.4819435514509678,0.4836879819631576,0.4835174195468426,0.4855972006917,0.4871680215001106,0.4840429238975048,0.4827739149332046,0.4881435632705688,0.4871019721031189,0.486987367272377,0.4836358055472374,0.4867987409234047,0.4869474284350872,0.4886575266718864,0.4855775311589241,0.4863000251352787,0.4841057248413563,0.488163661211729,0.4904011823236942,0.4870587214827537,0.4884037151932716,0.4873756393790245,0.4925794936716556,0.4874482750892639,0.4898910224437713,0.4893574342131614,0.4888269044458866,0.4887814335525036,0.4876748844981193,0.4853886738419533,0.4878034777939319,0.4911742769181728,0.4905468784272671,0.4896938055753708,0.4875142201781273,0.4900367334485054,0.4900274313986301,0.4905461706221103,0.4891181476414203,0.4881824217736721,0.4902780950069427,0.4895042479038238,0.4890727028250694,0.4897591508924961,0.4879062548279762,0.4897833876311779,0.4902243539690971,0.4884885586798191,0.4880276583135128,0.4927133433520794,0.4899616949260235],"label":"FineWeb: id mh + C4 + custom filters"},"big-run-sampled-fineweb-c4-filters":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3593025095760822,0.3753932043910026,0.3896549865603447,0.4011945575475693,0.4079862833023071,0.4100634902715683,0.4188448339700699,0.4182912856340408,0.4209799654781818,0.426167830824852,0.4270535074174404,0.4293412938714027,0.4376098960638046,0.4369498938322067,0.4447805918753147,0.4420784451067447,0.4401859976351261,0.4450364373624325,0.4467439614236355,0.4494622647762298,0.4474291987717151,0.4474774301052093,0.4496959559619427,0.4504862427711487,0.4483809620141983,0.4500409476459026,0.4506221041083336,0.4519891515374183,0.4511651210486889,0.4493776857852936,0.4546159133315086,0.4542211070656776,0.4540864638984203,0.4535767734050751,0.4580400213599205,0.451940905302763,0.4536588154733181,0.4593464843928814,0.4576366357505321,0.4563389606773853,0.4556163437664509,0.4611873291432857,0.4606512449681759,0.4602674432098865,0.4573654346168041,0.4579697586596012,0.4577618762850761,0.465243399143219,0.4626524560153484,0.4652697443962097,0.4616814218461513,0.4664025083184242,0.4648593515157699,0.4665380977094173,0.4670920372009277,0.4651120826601982,0.4648002386093139,0.4674604535102844,0.4694998189806938,0.4647957049310207,0.4655059054493904,0.4694474637508392,0.4685290567576885,0.4678448662161827,0.4666110426187515,0.466820664703846,0.4703560136258602,0.4655868485569954,0.4657375514507293,0.4673589915037155,0.4694744572043419,0.4697113968431949,0.4663790501654148,0.4678909480571747,0.4731503240764141,0.4703953340649605,0.4711540788412094,0.4689725339412689,0.4709760397672653,0.4721849896013737,0.4684626050293445,0.4728966951370239,0.4708623439073562,0.4755619578063488,0.4722185768187046,0.4752251170575619,0.4724387377500534,0.4767676629126072,0.4720797315239906,0.476152952760458,0.4784524105489254,0.472656887024641,0.4761070720851421,0.4791567139327526,0.4773554690182209,0.4749615713953972,0.4786102436482906,0.4776762872934341,0.4759960658848285,0.4783963784575462,0.4794723503291607,0.4783952049911022,0.4814380966126919,0.476895060390234,0.479157205671072,0.4783024378120899,0.4772652834653854,0.4805076755583286,0.4786335416138172,0.4829660281538963,0.4798073060810566,0.4846024662256241,0.4791539534926414,0.4836216196417808,0.482492484152317,0.4832956567406654,0.4811016321182251,0.480607770383358,0.4813096337020397,0.4819207563996315,0.482705220580101,0.4817859195172786,0.4817019775509834,0.4848218411207199,0.4850655570626259,0.4847046621143818,0.4811170361936092,0.4863272421061992,0.484540831297636,0.4826735481619835,0.4844910651445389,0.4825031049549579,0.4849743507802486,0.484294731169939,0.4857852198183536,0.4881704896688461,0.4850401543080807,0.4885894693434238,0.4855906665325165,0.4871751256287098,0.48358104377985,0.4859574064612388,0.4833582155406475,0.4867088869214058,0.4869902320206165,0.4876262210309505,0.4864178374409675,0.4864541031420231,0.4867057502269745,0.4884936697781086,0.4854058027267456,0.4880223199725151,0.4881350100040436,0.4871640801429748,0.4859121330082416,0.4894774369895458,0.4890438541769981,0.489189263433218,0.4893344156444073,0.4886334165930748,0.4900187514722347,0.4877792187035084,0.4887096807360649,0.4900767691433429,0.4877709597349167,0.48653694242239,0.4897000454366207],"label":"FineWeb: id mh + C4 filters"},"big-run-sampled_full_ind_minhash":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3608616776764393,0.3745453506708145,0.3862277194857597,0.3989979773759842,0.406296543776989,0.4094927236437797,0.4138859286904335,0.4177777022123337,0.4208802655339241,0.4254550077021122,0.4283009432256222,0.429458349943161,0.4330311268568039,0.4349483698606491,0.4348161295056343,0.438955657184124,0.4389265701174736,0.4393925778567791,0.4383306242525577,0.4436748661100864,0.4423373565077781,0.4460027255117893,0.4440812170505523,0.4476902261376381,0.4465879611670971,0.4497823156416416,0.4513350501656532,0.4518667235970497,0.45149727165699,0.4513994492590427,0.4521937072277069,0.4520382955670357,0.4530793912708759,0.4516105614602566,0.4530563354492187,0.4495660625398159,0.4520940892398357,0.4561133235692978,0.4522969461977482,0.4575686641037464,0.4589144177734852,0.4582882039248943,0.457970168441534,0.4554797261953354,0.4622044861316681,0.4596928395330906,0.4624353349208832,0.4619148448109627,0.461100060492754,0.458431463688612,0.4620467089116573,0.4562215581536293,0.4620163068175316,0.4631462283432483,0.4600549824535846,0.4620365314185619,0.458735141903162,0.461642112582922,0.461245734244585,0.4645131677389145,0.4629777930676937,0.4651660025119781,0.4653937108814716,0.4676259346306324,0.4667201824486255,0.4650012850761413,0.4676916748285293,0.4708514772355556,0.4673572592437267,0.4689626581966877,0.4678038358688354,0.4667215310037136,0.4646228328347206,0.4662510119378567,0.4674677737057209,0.4690804108977318,0.4634581170976162,0.4701276533305645,0.4676450751721859,0.4672758504748344,0.4674397967755794,0.4656238108873367,0.4690065123140812,0.4677213467657566,0.4678985886275768,0.4735414572060108,0.4705612398684025,0.4703374318778515,0.4704933613538742,0.4688010476529598,0.4699571952223778,0.4674785658717155,0.4701188169419765,0.4682065695524215,0.4729971997439861,0.4748715870082378,0.4745333231985569,0.4737020246684551,0.4747246317565441,0.4771635122597217,0.4740425907075405,0.475264236330986,0.4744705818593502,0.474684040993452,0.4721556939184665,0.475641455501318,0.476833701133728,0.4746401384472847,0.4742486327886581,0.4730467088520527,0.4773029200732708,0.4760043211281299,0.4770320989191532,0.4742161482572555,0.4780259765684604,0.4806670732796192,0.4784667380154133,0.4788618609309196,0.4762138128280639,0.4777246937155723,0.4796081893146038,0.4798486456274986,0.475479181855917,0.4779988899827003,0.4765858314931392,0.4772914499044418,0.47843898832798,0.4799034222960472,0.4803600236773491,0.4751846008002758,0.4777872562408447,0.4779460839927196,0.4787487275898456,0.4808406494557857,0.4810357913374901,0.4797308407723903,0.4800078608095646,0.4806460626423359,0.4810502976179123,0.4797912389039993,0.477332629263401,0.4818884879350662,0.482621606439352,0.4833096489310264,0.4821632876992225,0.4831674285233021,0.4830279909074306,0.4849893450736999,0.4845218025147915,0.4825541749596596,0.4833571836352348,0.4853803217411041,0.483093187212944,0.4850797094404697,0.485261783003807,0.4837660938501358,0.4835929833352566,0.4855643883347511,0.4832059442996979,0.484714712947607,0.4839249886572361,0.4829078912734985,0.4818423055112362,0.482727088034153,0.4824129492044449,0.4820138849318027,0.4865870922803879],"label":"FineWeb: independent MinHash (id mh)"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.3308933284133672,0.3605199865996837,0.3733148723840713,0.3882005847990513,0.3934122696518898,0.3947227671742439,0.4042885974049568,0.3974800482392311,0.4055779427289963,0.4133470430970192,0.4117913842201233,0.4113653488457203,0.4149517640471458,0.4187851920723915,0.4206527359783649,0.4240428246557712,0.422003373503685,0.4280910938978195,0.4244147576391697,0.4316282644867897,0.4295645765960216,0.4310102686285972,0.4360743537545204,0.4313482865691185,0.4350991360843181,0.4378576353192329,0.4335876516997814,0.4347924515604973,0.4348904751241207,0.436600212007761,0.430036511272192,0.4350974671542644,0.4399556629359722,0.4371416717767715,0.4363861419260502,0.4376698136329651,0.4405004419386387,0.4373639523983001,0.4379038028419018,0.4371281825006008,0.4393439553678036,0.440426729619503,0.4401675276458263,0.4429537951946258,0.4449137263000011,0.4434786736965179,0.4450470842421055,0.4454202279448509,0.4394537284970283,0.442185215651989,0.4461225643754005,0.4427758157253265,0.4430646039545536,0.4476901069283485,0.4478763341903686,0.4493869319558143,0.4448477327823639,0.450044184923172,0.4498609118163585,0.4457665979862213,0.4506924152374267,0.449855338782072,0.448790930211544,0.4474099352955818,0.4546772800385952,0.4529431238770485,0.452015146613121,0.4502020999789238,0.4493804536759853,0.4523266032338142,0.4551868587732315,0.4501944817602634,0.4493303671479225,0.4526805207133293,0.4533850513398647,0.4518048763275146,0.4518973492085933,0.4531301632523536,0.4518006071448326,0.4553494565188885,0.4528752230107784,0.4536322727799415,0.4561733976006508,0.4549491256475448,0.4574789106845855,0.4577847123146057,0.4563642293214798,0.4578686729073524,0.4561499990522861,0.4537816494703293,0.4542164430022239,0.4559455662965774,0.4554723873734474,0.4575514122843742,0.4575202167034149,0.4592722058296203,0.4585275091230869,0.4580587856471538,0.456934317946434,0.4577495418488979,0.4540119916200638,0.4570806957781315,0.4608120545744896,0.4588425755500793,0.4578334167599678,0.4610816091299057,0.4598177038133144,0.461849745362997,0.4631866924464702,0.4601576402783394,0.4646804705262184,0.4632389545440674,0.4604574106633663,0.4602976888418197,0.4581312239170074,0.4654182009398937,0.4655338563024997,0.4616620391607284,0.461054053157568,0.4613021649420261,0.4658613465726375,0.4633531905710697,0.4613638147711754,0.4643996246159076,0.462500050663948,0.4650798961520195,0.4648764543235302,0.4639869071543216,0.4634246975183487,0.46585888043046,0.4639799632132053,0.4630857892334461,0.4644265696406364,0.4642998576164245,0.4686848931014538,0.4687492996454239,0.4650243632495403,0.4627032242715359,0.4665953740477562,0.4660026729106903,0.4664581045508384,0.4676475040614605,0.4657339677214622,0.4664678275585174,0.4673498086631298,0.4676674827933311,0.4680955372750759,0.4681585058569908,0.4659864418208599,0.4686457589268684,0.4661462865769863,0.4658931568264961,0.4674226939678192,0.46805215254426,0.4682257212698459,0.4689070098102093,0.4699570722877979,0.4655096270143986,0.4688013233244419,0.4707522802054882,0.4661469310522079,0.4688841328024864,0.4671329781413078,0.4662554152309894,0.4697433896362781,0.4698473587632179,0.4676505327224731,0.4696521013975143],"label":"FineWeb: base filtering only"}},"layout":{"title":{"text":"The different FineWeb processing steps"}}}
|
|
|
|
dist/assets/data/plots/all_filtering_steps/arc_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-sampled_full_ind_minhash":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2939999997615814,0.3174999952316284,0.3294999897480011,0.3510000109672546,0.3485000133514404,0.3634999990463257,0.3700000047683716,0.3524999916553497,0.375,0.3804999887943268,0.37950000166893,0.3824999928474426,0.3799999952316284,0.395000010728836,0.3844999969005584,0.3894999921321869,0.3855000138282776,0.3955000042915344,0.3995000123977661,0.4009999930858612,0.3939999938011169,0.3970000147819519,0.3955000042915344,0.3955000042915344,0.4079999923706054,0.3959999978542328,0.4090000092983246,0.4045000076293945,0.3930000066757202,0.4099999964237213,0.4054999947547912,0.4124999940395355,0.4160000085830688,0.4149999916553497,0.4070000052452087,0.4110000133514404,0.4144999980926513,0.4120000004768371,0.4050000011920929,0.4165000021457672,0.4180000126361847,0.4050000011920929,0.4120000004768371,0.4135000109672546,0.4320000112056732,0.4284999966621399,0.4269999861717224,0.414000004529953,0.4255000054836273,0.4165000021457672,0.4144999980926513,0.4079999923706054,0.4205000102519989,0.4180000126361847,0.4244999885559082,0.4235000014305115,0.4244999885559082,0.4300000071525574,0.4160000085830688,0.4205000102519989,0.4329999983310699,0.4280000030994415,0.4244999885559082,0.4375,0.4244999885559082,0.4365000128746032,0.4329999983310699,0.4424999952316284,0.4390000104904175,0.4449999928474426,0.445499986410141,0.4320000112056732,0.4365000128746032,0.4244999885559082,0.429500013589859,0.4395000040531158,0.4284999966621399,0.44200000166893,0.4370000064373016,0.4399999976158142,0.4334999918937683,0.4429999887943268,0.44200000166893,0.4334999918937683,0.4384999871253967,0.4365000128746032,0.4390000104904175,0.4354999959468841,0.44200000166893,0.4350000023841858,0.4390000104904175,0.4404999911785126,0.4410000145435333,0.4305000007152557,0.4490000009536743,0.4510000050067901,0.4605000019073486,0.4490000009536743,0.449999988079071,0.4595000147819519,0.4514999985694885,0.4490000009536743,0.4474999904632568,0.4444999992847442,0.4524999856948852,0.4465000033378601,0.4519999921321869,0.4550000131130218,0.4524999856948852,0.4429999887943268,0.4550000131130218,0.4510000050067901,0.4560000002384186,0.4465000033378601,0.4485000073909759,0.4524999856948852,0.4440000057220459,0.457500010728836,0.4544999897480011,0.4480000138282776,0.4584999978542328,0.4544999897480011,0.4569999873638153,0.4584999978542328,0.4444999992847442,0.4629999995231628,0.457500010728836,0.4555000066757202,0.4569999873638153,0.4474999904632568,0.4564999938011169,0.4595000147819519,0.4634999930858612,0.4555000066757202,0.453000009059906,0.457500010728836,0.4614999890327453,0.460999995470047,0.4539999961853027,0.4595000147819519,0.4629999995231628,0.4670000076293945,0.4580000042915344,0.4639999866485595,0.457500010728836,0.4595000147819519,0.4665000140666961,0.4584999978542328,0.4629999995231628,0.4595000147819519,0.4659999907016754,0.4645000100135803,0.4675000011920929,0.4690000116825104,0.4715000092983246,0.4634999930858612,0.4634999930858612,0.4639999866485595,0.465499997138977,0.4675000011920929,0.4670000076293945,0.4600000083446502,0.4595000147819519,0.4625000059604645,0.4600000083446502,0.4645000100135803,0.4715000092983246],"label":"FineWeb: independent MinHash (id mh)"},"big-run-sampled-fineweb-c4-filters":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2985000014305115,0.3269999921321869,0.340499997138977,0.3495000004768371,0.3535000085830688,0.3519999980926513,0.3625000119209289,0.3569999933242798,0.3659999966621399,0.3619999885559082,0.3759999871253967,0.3779999911785126,0.3919999897480011,0.3835000097751617,0.402999997138977,0.3899999856948852,0.3869999945163727,0.3885000050067901,0.3989999890327453,0.390500009059906,0.4054999947547912,0.398499995470047,0.3989999890327453,0.398499995470047,0.4014999866485595,0.398499995470047,0.4135000109672546,0.4045000076293945,0.4144999980926513,0.4079999923706054,0.4124999940395355,0.4169999957084656,0.4074999988079071,0.4205000102519989,0.4135000109672546,0.4160000085830688,0.4124999940395355,0.4225000143051147,0.4214999973773956,0.418500006198883,0.4115000069141388,0.4165000021457672,0.4199999868869781,0.418500006198883,0.414000004529953,0.4194999933242798,0.4095000028610229,0.4214999973773956,0.4149999916553497,0.426499992609024,0.4160000085830688,0.4169999957084656,0.4314999878406524,0.4404999911785126,0.4325000047683716,0.4305000007152557,0.4275000095367431,0.4250000119209289,0.4230000078678131,0.4214999973773956,0.4275000095367431,0.4354999959468841,0.4235000014305115,0.4244999885559082,0.4199999868869781,0.4235000014305115,0.4275000095367431,0.4205000102519989,0.4244999885559082,0.4230000078678131,0.4235000014305115,0.4280000030994415,0.4305000007152557,0.4305000007152557,0.4359999895095825,0.4345000088214874,0.4395000040531158,0.4280000030994415,0.4350000023841858,0.4365000128746032,0.4255000054836273,0.4339999854564667,0.4314999878406524,0.4329999983310699,0.4345000088214874,0.4395000040531158,0.4350000023841858,0.4535000026226043,0.4449999928474426,0.445499986410141,0.4404999911785126,0.4424999952316284,0.4505000114440918,0.4440000057220459,0.4519999921321869,0.4449999928474426,0.4474999904632568,0.4494999945163727,0.4494999945163727,0.445499986410141,0.4510000050067901,0.4524999856948852,0.4395000040531158,0.4444999992847442,0.4469999969005584,0.4460000097751617,0.4539999961853027,0.4494999945163727,0.4465000033378601,0.4544999897480011,0.4474999904632568,0.4550000131130218,0.4510000050067901,0.4555000066757202,0.4480000138282776,0.4589999914169311,0.4550000131130218,0.4510000050067901,0.4519999921321869,0.4514999985694885,0.4539999961853027,0.4535000026226043,0.4569999873638153,0.4620000123977661,0.4634999930858612,0.4555000066757202,0.4465000033378601,0.4550000131130218,0.4485000073909759,0.4435000121593475,0.4480000138282776,0.4555000066757202,0.4469999969005584,0.4535000026226043,0.4555000066757202,0.4519999921321869,0.4485000073909759,0.4639999866485595,0.4584999978542328,0.4490000009536743,0.4524999856948852,0.453000009059906,0.4535000026226043,0.460999995470047,0.4589999914169311,0.4544999897480011,0.4589999914169311,0.4569999873638153,0.4544999897480011,0.4625000059604645,0.4474999904632568,0.4510000050067901,0.4480000138282776,0.453000009059906,0.4460000097751617,0.460999995470047,0.4634999930858612,0.4679999947547912,0.4639999866485595,0.4720000028610229,0.4659999907016754,0.4650000035762787,0.4620000123977661,0.4659999907016754,0.465499997138977,0.4595000147819519,0.4620000123977661],"label":"FineWeb: id mh + C4 filters"},"big-run-fineweb-v1-all-dumps":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.296999990940094,0.3219999969005584,0.3305000066757202,0.3555000126361847,0.351500004529953,0.3600000143051147,0.363999992609024,0.3680000007152557,0.3785000145435333,0.3765000104904175,0.382999986410141,0.3785000145435333,0.3835000097751617,0.3819999992847442,0.3935000002384186,0.387499988079071,0.3935000002384186,0.3959999978542328,0.3860000073909759,0.3935000002384186,0.3885000050067901,0.3810000121593475,0.3880000114440918,0.3964999914169311,0.4054999947547912,0.3935000002384186,0.3944999873638153,0.3989999890327453,0.3980000019073486,0.4050000011920929,0.4054999947547912,0.4009999930858612,0.4110000133514404,0.4054999947547912,0.4180000126361847,0.4110000133514404,0.4050000011920929,0.4079999923706054,0.4120000004768371,0.402999997138977,0.4205000102519989,0.4129999876022339,0.4120000004768371,0.4169999957084656,0.4269999861717224,0.4230000078678131,0.4225000143051147,0.4300000071525574,0.4180000126361847,0.4284999966621399,0.4165000021457672,0.4325000047683716,0.4235000014305115,0.4210000038146972,0.4239999949932098,0.4235000014305115,0.421999990940094,0.4280000030994415,0.4300000071525574,0.4275000095367431,0.4305000007152557,0.4244999885559082,0.4314999878406524,0.4325000047683716,0.4395000040531158,0.4325000047683716,0.4300000071525574,0.4399999976158142,0.4320000112056732,0.4370000064373016,0.4280000030994415,0.4309999942779541,0.4314999878406524,0.4370000064373016,0.4280000030994415,0.4325000047683716,0.4300000071525574,0.4334999918937683,0.4334999918937683,0.4379999935626983,0.4399999976158142,0.4350000023841858,0.4395000040531158,0.4375,0.4390000104904175,0.4365000128746032,0.4435000121593475,0.4365000128746032,0.445499986410141,0.4440000057220459,0.4460000097751617,0.4415000081062317,0.4415000081062317,0.4339999854564667,0.4429999887943268,0.4399999976158142,0.4359999895095825,0.4370000064373016,0.4469999969005584,0.4404999911785126,0.4435000121593475,0.445499986410141,0.4424999952316284,0.4480000138282776,0.4370000064373016,0.4444999992847442,0.4465000033378601,0.4309999942779541,0.4440000057220459,0.4469999969005584,0.4539999961853027,0.4440000057220459,0.4555000066757202,0.4519999921321869,0.4510000050067901,0.4519999921321869,0.4544999897480011,0.4494999945163727,0.4584999978542328,0.4580000042915344,0.4544999897480011,0.4514999985694885,0.4550000131130218,0.4560000002384186,0.4600000083446502,0.4589999914169311,0.4560000002384186,0.457500010728836,0.4679999947547912,0.4494999945163727,0.4505000114440918,0.4440000057220459,0.4539999961853027,0.4535000026226043,0.4514999985694885,0.457500010728836,0.4620000123977661,0.4564999938011169,0.4595000147819519,0.4564999938011169,0.4550000131130218,0.4539999961853027,0.4544999897480011,0.4569999873638153,0.457500010728836,0.4539999961853027,0.4595000147819519,0.4665000140666961,0.465499997138977,0.4625000059604645,0.4629999995231628,0.4580000042915344,0.4569999873638153,0.4620000123977661,0.457500010728836,0.4550000131130218,0.4645000100135803,0.4629999995231628,0.4584999978542328,0.465499997138977,0.460999995470047,0.4634999930858612,0.4605000019073486,0.4584999978542328,0.4550000131130218,0.4564999938011169,0.4600000083446502],"label":"FineWeb: id mh + C4 + custom filters"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2509999871253967,0.2894999980926513,0.3235000073909759,0.3389999866485595,0.3384999930858612,0.3459999859333038,0.359499990940094,0.3429999947547912,0.3619999885559082,0.3564999997615814,0.3625000119209289,0.363999992609024,0.3680000007152557,0.3680000007152557,0.3684999942779541,0.375,0.3734999895095825,0.3849999904632568,0.3944999873638153,0.3865000009536743,0.395000010728836,0.3935000002384186,0.3980000019073486,0.3910000026226043,0.3885000050067901,0.3914999961853027,0.3815000057220459,0.395000010728836,0.3894999921321869,0.395000010728836,0.3935000002384186,0.4034999907016754,0.4004999995231628,0.3970000147819519,0.3975000083446502,0.3995000123977661,0.3980000019073486,0.4034999907016754,0.3959999978542328,0.3989999890327453,0.402999997138977,0.3880000114440918,0.3980000019073486,0.4040000140666961,0.3989999890327453,0.3970000147819519,0.3925000131130218,0.4120000004768371,0.3935000002384186,0.395000010728836,0.4070000052452087,0.3935000002384186,0.4034999907016754,0.4189999997615814,0.4129999876022339,0.4160000085830688,0.4149999916553497,0.418500006198883,0.4225000143051147,0.4174999892711639,0.4210000038146972,0.4045000076293945,0.4079999923706054,0.4124999940395355,0.4144999980926513,0.4169999957084656,0.4194999933242798,0.4154999852180481,0.4169999957084656,0.4225000143051147,0.4225000143051147,0.4230000078678131,0.4160000085830688,0.4325000047683716,0.4325000047683716,0.4199999868869781,0.4199999868869781,0.4189999997615814,0.4269999861717224,0.4259999990463257,0.4230000078678131,0.4144999980926513,0.4329999983310699,0.4275000095367431,0.4305000007152557,0.4289999902248382,0.4235000014305115,0.4235000014305115,0.4325000047683716,0.4244999885559082,0.4314999878406524,0.4194999933242798,0.4350000023841858,0.4269999861717224,0.4235000014305115,0.4300000071525574,0.4284999966621399,0.4255000054836273,0.4280000030994415,0.4345000088214874,0.4225000143051147,0.4334999918937683,0.4300000071525574,0.4350000023841858,0.429500013589859,0.4325000047683716,0.4384999871253967,0.4345000088214874,0.4354999959468841,0.4359999895095825,0.4354999959468841,0.4424999952316284,0.4424999952316284,0.4320000112056732,0.4280000030994415,0.4390000104904175,0.4480000138282776,0.4415000081062317,0.4384999871253967,0.4390000104904175,0.4494999945163727,0.4449999928474426,0.4384999871253967,0.4424999952316284,0.4359999895095825,0.445499986410141,0.4399999976158142,0.4375,0.4410000145435333,0.4384999871253967,0.4375,0.4329999983310699,0.4370000064373016,0.4354999959468841,0.4440000057220459,0.4384999871253967,0.4384999871253967,0.4390000104904175,0.4424999952316284,0.4379999935626983,0.4345000088214874,0.4354999959468841,0.4440000057220459,0.4395000040531158,0.4465000033378601,0.4404999911785126,0.4505000114440918,0.4480000138282776,0.4449999928474426,0.445499986410141,0.4410000145435333,0.4485000073909759,0.4460000097751617,0.4480000138282776,0.4465000033378601,0.4460000097751617,0.4460000097751617,0.4395000040531158,0.4474999904632568,0.4469999969005584,0.4404999911785126,0.4440000057220459,0.4435000121593475,0.4435000121593475,0.4514999985694885,0.4474999904632568,0.4474999904632568,0.445499986410141],"label":"FineWeb: base filtering only"}},"layout":{"title":{"text":"The different FineWeb processing steps"}}}
|
|
|
|
dist/assets/data/plots/all_filtering_steps/commonsense_qa_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-sampled_full_ind_minhash":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2639999985694885,0.2790000140666961,0.296999990940094,0.3109999895095825,0.3240000009536743,0.3070000112056732,0.3210000097751617,0.31700000166893,0.3339999914169311,0.324999988079071,0.3260000050067901,0.3330000042915344,0.3409999907016754,0.3400000035762787,0.3529999852180481,0.3400000035762787,0.3490000069141388,0.3529999852180481,0.3499999940395355,0.3459999859333038,0.3370000123977661,0.356000006198883,0.3490000069141388,0.3429999947547912,0.3490000069141388,0.3610000014305115,0.3499999940395355,0.3569999933242798,0.3610000014305115,0.3619999885559082,0.3449999988079071,0.3409999907016754,0.3420000076293945,0.3449999988079071,0.3409999907016754,0.3379999995231628,0.3420000076293945,0.3569999933242798,0.3529999852180481,0.3610000014305115,0.363999992609024,0.3600000143051147,0.3540000021457672,0.3499999940395355,0.3689999878406524,0.367000013589859,0.3569999933242798,0.3610000014305115,0.3680000007152557,0.3630000054836273,0.3709999918937683,0.3540000021457672,0.3580000102519989,0.367000013589859,0.3529999852180481,0.356000006198883,0.3569999933242798,0.3610000014305115,0.3700000047683716,0.375,0.3709999918937683,0.3819999992847442,0.3709999918937683,0.3650000095367431,0.3709999918937683,0.3650000095367431,0.3709999918937683,0.3840000033378601,0.3740000128746032,0.375,0.356000006198883,0.3689999878406524,0.3700000047683716,0.3819999992847442,0.3799999952316284,0.3779999911785126,0.3729999959468841,0.3709999918937683,0.3759999871253967,0.3709999918937683,0.3759999871253967,0.3779999911785126,0.3779999911785126,0.3689999878406524,0.3840000033378601,0.3860000073909759,0.3849999904632568,0.3790000081062317,0.375,0.3849999904632568,0.3720000088214874,0.3770000040531158,0.3799999952316284,0.3810000121593475,0.382999986410141,0.3650000095367431,0.3740000128746032,0.382999986410141,0.3689999878406524,0.3759999871253967,0.3869999945163727,0.3889999985694885,0.3860000073909759,0.3819999992847442,0.3689999878406524,0.3860000073909759,0.3810000121593475,0.382999986410141,0.3819999992847442,0.3840000033378601,0.3889999985694885,0.3880000114440918,0.3849999904632568,0.3799999952316284,0.3910000026226043,0.3989999890327453,0.3880000114440918,0.3880000114440918,0.3840000033378601,0.3880000114440918,0.3860000073909759,0.3919999897480011,0.3880000114440918,0.3939999938011169,0.3869999945163727,0.3919999897480011,0.3910000026226043,0.382999986410141,0.3930000066757202,0.3840000033378601,0.3880000114440918,0.3840000033378601,0.3819999992847442,0.382999986410141,0.3880000114440918,0.3860000073909759,0.3860000073909759,0.3869999945163727,0.3860000073909759,0.3899999856948852,0.3819999992847442,0.3860000073909759,0.3889999985694885,0.3840000033378601,0.395000010728836,0.3899999856948852,0.3899999856948852,0.3910000026226043,0.3959999978542328,0.3959999978542328,0.3919999897480011,0.3980000019073486,0.3880000114440918,0.3930000066757202,0.4000000059604645,0.3919999897480011,0.3919999897480011,0.4040000140666961,0.3930000066757202,0.3970000147819519,0.3889999985694885,0.3959999978542328,0.3930000066757202,0.3939999938011169,0.3970000147819519,0.3910000026226043,0.4020000100135803],"label":"FineWeb: independent MinHash (id mh)"},"big-run-sampled-fineweb-c4-filters":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2630000114440918,0.2770000100135803,0.3050000071525574,0.3100000023841858,0.3149999976158142,0.3190000057220459,0.3350000083446502,0.3210000097751617,0.3310000002384186,0.3389999866485595,0.3289999961853027,0.3379999995231628,0.3420000076293945,0.3409999907016754,0.3510000109672546,0.3479999899864197,0.3440000116825104,0.3569999933242798,0.3529999852180481,0.3680000007152557,0.3549999892711639,0.3499999940395355,0.3589999973773956,0.3529999852180481,0.3459999859333038,0.3529999852180481,0.3630000054836273,0.3600000143051147,0.3490000069141388,0.3540000021457672,0.3600000143051147,0.356000006198883,0.3470000028610229,0.3470000028610229,0.3549999892711639,0.3440000116825104,0.3529999852180481,0.3630000054836273,0.3449999988079071,0.3479999899864197,0.3490000069141388,0.3519999980926513,0.367000013589859,0.356000006198883,0.356000006198883,0.3519999980926513,0.3580000102519989,0.3569999933242798,0.3659999966621399,0.3759999871253967,0.3689999878406524,0.3779999911785126,0.3549999892711639,0.3610000014305115,0.3650000095367431,0.3610000014305115,0.3580000102519989,0.3729999959468841,0.367000013589859,0.3689999878406524,0.3540000021457672,0.363999992609024,0.3700000047683716,0.3650000095367431,0.3529999852180481,0.3709999918937683,0.3740000128746032,0.3680000007152557,0.3689999878406524,0.3580000102519989,0.3650000095367431,0.3619999885559082,0.3619999885559082,0.3630000054836273,0.3610000014305115,0.3659999966621399,0.375,0.375,0.3700000047683716,0.3840000033378601,0.3779999911785126,0.382999986410141,0.367000013589859,0.3860000073909759,0.3770000040531158,0.3790000081062317,0.3880000114440918,0.3659999966621399,0.3630000054836273,0.3770000040531158,0.3779999911785126,0.3680000007152557,0.3779999911785126,0.375,0.3819999992847442,0.3720000088214874,0.3799999952316284,0.382999986410141,0.375,0.367000013589859,0.3869999945163727,0.3810000121593475,0.382999986410141,0.3709999918937683,0.3720000088214874,0.3689999878406524,0.367000013589859,0.3819999992847442,0.3720000088214874,0.3849999904632568,0.3709999918937683,0.3740000128746032,0.3709999918937683,0.3799999952316284,0.3799999952316284,0.3869999945163727,0.375,0.3680000007152557,0.3779999911785126,0.3799999952316284,0.3720000088214874,0.3799999952316284,0.3759999871253967,0.3819999992847442,0.3770000040531158,0.3810000121593475,0.3720000088214874,0.3860000073909759,0.3810000121593475,0.3790000081062317,0.3860000073909759,0.3759999871253967,0.3860000073909759,0.3810000121593475,0.3790000081062317,0.3799999952316284,0.3840000033378601,0.3810000121593475,0.3810000121593475,0.3849999904632568,0.3869999945163727,0.3819999992847442,0.3740000128746032,0.3779999911785126,0.3860000073909759,0.3889999985694885,0.3849999904632568,0.3889999985694885,0.3810000121593475,0.3849999904632568,0.3840000033378601,0.3860000073909759,0.3889999985694885,0.382999986410141,0.3849999904632568,0.3840000033378601,0.3880000114440918,0.3810000121593475,0.3849999904632568,0.3790000081062317,0.3799999952316284,0.3819999992847442,0.382999986410141,0.3790000081062317,0.3810000121593475,0.3779999911785126,0.3889999985694885],"label":"FineWeb: id mh + C4 filters"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2599999904632568,0.277999997138977,0.2910000085830688,0.3070000112056732,0.3140000104904175,0.3019999861717224,0.3059999942779541,0.3210000097751617,0.3230000138282776,0.324999988079071,0.3149999976158142,0.3109999895095825,0.3339999914169311,0.3319999873638153,0.3319999873638153,0.3300000131130218,0.3370000123977661,0.3219999969005584,0.3370000123977661,0.328000009059906,0.3339999914169311,0.3420000076293945,0.3400000035762787,0.3440000116825104,0.3510000109672546,0.3409999907016754,0.3449999988079071,0.3339999914169311,0.3540000021457672,0.3339999914169311,0.3470000028610229,0.3470000028610229,0.3440000116825104,0.3589999973773956,0.3569999933242798,0.3630000054836273,0.3549999892711639,0.3589999973773956,0.3449999988079071,0.3549999892711639,0.3449999988079071,0.3389999866485595,0.3499999940395355,0.3610000014305115,0.3619999885559082,0.3600000143051147,0.3519999980926513,0.3479999899864197,0.356000006198883,0.3519999980926513,0.3440000116825104,0.3490000069141388,0.3519999980926513,0.3470000028610229,0.3589999973773956,0.3449999988079071,0.3490000069141388,0.356000006198883,0.3619999885559082,0.3569999933242798,0.3659999966621399,0.3610000014305115,0.3549999892711639,0.3700000047683716,0.363999992609024,0.3600000143051147,0.3580000102519989,0.3549999892711639,0.3619999885559082,0.3689999878406524,0.3630000054836273,0.363999992609024,0.3700000047683716,0.367000013589859,0.3630000054836273,0.3630000054836273,0.3700000047683716,0.3589999973773956,0.3540000021457672,0.3540000021457672,0.3659999966621399,0.3619999885559082,0.3589999973773956,0.3650000095367431,0.3709999918937683,0.3680000007152557,0.3689999878406524,0.3650000095367431,0.3729999959468841,0.3619999885559082,0.3689999878406524,0.3569999933242798,0.3510000109672546,0.3680000007152557,0.363999992609024,0.3700000047683716,0.3659999966621399,0.3659999966621399,0.363999992609024,0.3619999885559082,0.3659999966621399,0.3680000007152557,0.3610000014305115,0.3720000088214874,0.3729999959468841,0.3810000121593475,0.3630000054836273,0.3689999878406524,0.3709999918937683,0.3759999871253967,0.382999986410141,0.3729999959468841,0.3720000088214874,0.3680000007152557,0.3659999966621399,0.3650000095367431,0.363999992609024,0.3589999973773956,0.356000006198883,0.3650000095367431,0.3659999966621399,0.367000013589859,0.3729999959468841,0.3720000088214874,0.375,0.3740000128746032,0.3700000047683716,0.3569999933242798,0.3759999871253967,0.3740000128746032,0.367000013589859,0.3770000040531158,0.3759999871253967,0.3709999918937683,0.3779999911785126,0.3709999918937683,0.3689999878406524,0.3799999952316284,0.3630000054836273,0.375,0.3700000047683716,0.3700000047683716,0.3729999959468841,0.3720000088214874,0.3790000081062317,0.375,0.3729999959468841,0.3770000040531158,0.3799999952316284,0.3779999911785126,0.3720000088214874,0.3799999952316284,0.3759999871253967,0.3799999952316284,0.3790000081062317,0.375,0.3740000128746032,0.3729999959468841,0.3840000033378601,0.3659999966621399,0.3759999871253967,0.3720000088214874,0.3720000088214874,0.3759999871253967,0.375,0.3650000095367431,0.3729999959468841],"label":"FineWeb: base filtering only"},"big-run-fineweb-v1-all-dumps":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2329999953508377,0.2630000114440918,0.2879999876022339,0.296999990940094,0.2960000038146972,0.3039999902248382,0.3129999935626983,0.3149999976158142,0.3300000131130218,0.3300000131130218,0.3350000083446502,0.3379999995231628,0.3370000123977661,0.3330000042915344,0.3370000123977661,0.3389999866485595,0.3429999947547912,0.3659999966621399,0.3459999859333038,0.3479999899864197,0.3440000116825104,0.3470000028610229,0.3569999933242798,0.3510000109672546,0.3680000007152557,0.3529999852180481,0.3680000007152557,0.3549999892711639,0.3540000021457672,0.3529999852180481,0.3499999940395355,0.3569999933242798,0.3529999852180481,0.3499999940395355,0.3540000021457672,0.3659999966621399,0.3600000143051147,0.3680000007152557,0.3659999966621399,0.3600000143051147,0.3659999966621399,0.3540000021457672,0.3580000102519989,0.367000013589859,0.3549999892711639,0.3729999959468841,0.3580000102519989,0.3619999885559082,0.3659999966621399,0.3680000007152557,0.3650000095367431,0.3619999885559082,0.3759999871253967,0.3689999878406524,0.3689999878406524,0.3619999885559082,0.3630000054836273,0.3650000095367431,0.3799999952316284,0.3729999959468841,0.3740000128746032,0.367000013589859,0.3720000088214874,0.3600000143051147,0.3650000095367431,0.3729999959468841,0.3589999973773956,0.3799999952316284,0.3589999973773956,0.3799999952316284,0.3680000007152557,0.367000013589859,0.367000013589859,0.3700000047683716,0.3790000081062317,0.3729999959468841,0.3770000040531158,0.3709999918937683,0.3759999871253967,0.3759999871253967,0.3700000047683716,0.3720000088214874,0.3840000033378601,0.3770000040531158,0.3770000040531158,0.3790000081062317,0.3860000073909759,0.3759999871253967,0.3650000095367431,0.3700000047683716,0.3819999992847442,0.3819999992847442,0.3630000054836273,0.3689999878406524,0.3759999871253967,0.3759999871253967,0.3779999911785126,0.3740000128746032,0.3860000073909759,0.3619999885559082,0.3740000128746032,0.3799999952316284,0.3819999992847442,0.3740000128746032,0.3770000040531158,0.375,0.3810000121593475,0.3729999959468841,0.3880000114440918,0.3840000033378601,0.3840000033378601,0.3770000040531158,0.3740000128746032,0.382999986410141,0.3840000033378601,0.3770000040531158,0.3869999945163727,0.3729999959468841,0.3770000040531158,0.3759999871253967,0.3840000033378601,0.3880000114440918,0.3759999871253967,0.3740000128746032,0.3720000088214874,0.3790000081062317,0.3740000128746032,0.3630000054836273,0.3810000121593475,0.3720000088214874,0.3729999959468841,0.3720000088214874,0.3840000033378601,0.3759999871253967,0.3840000033378601,0.3790000081062317,0.3819999992847442,0.3689999878406524,0.3700000047683716,0.3790000081062317,0.3729999959468841,0.3799999952316284,0.3799999952316284,0.3740000128746032,0.3689999878406524,0.3810000121593475,0.3720000088214874,0.382999986410141,0.3819999992847442,0.3720000088214874,0.3799999952316284,0.3740000128746032,0.3729999959468841,0.3790000081062317,0.3720000088214874,0.3680000007152557,0.3779999911785126,0.3799999952316284,0.3729999959468841,0.3740000128746032,0.3729999959468841,0.3759999871253967,0.3790000081062317,0.3689999878406524,0.3680000007152557,0.3659999966621399,0.3729999959468841,0.3680000007152557],"label":"FineWeb: id mh + C4 + custom filters"}},"layout":{"title":{"text":"The different FineWeb processing steps"}}}
|
|
|
|
dist/assets/data/plots/all_filtering_steps/hellaswag_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-fineweb-v1-all-dumps":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2919999957084656,0.3310000002384186,0.3549999892711639,0.3939999938011169,0.4149999916553497,0.4329999983310699,0.4460000097751617,0.4589999914169311,0.4819999933242798,0.4769999980926513,0.4830000102519989,0.4909999966621399,0.5059999823570251,0.5059999823570251,0.503000020980835,0.5170000195503235,0.5049999952316284,0.5210000276565552,0.5130000114440918,0.5189999938011169,0.5360000133514404,0.5320000052452087,0.5460000038146973,0.5400000214576721,0.5379999876022339,0.531000018119812,0.5460000038146973,0.5509999990463257,0.5519999861717224,0.5559999942779541,0.5609999895095825,0.5559999942779541,0.5580000281333923,0.5450000166893005,0.5509999990463257,0.5590000152587891,0.5649999976158142,0.5619999766349792,0.5680000185966492,0.5669999718666077,0.5709999799728394,0.5569999814033508,0.5640000104904175,0.5690000057220459,0.5720000267028809,0.5759999752044678,0.5839999914169312,0.5699999928474426,0.5740000009536743,0.5830000042915344,0.5839999914169312,0.5799999833106995,0.5830000042915344,0.574999988079071,0.5910000205039978,0.5799999833106995,0.5879999995231628,0.6039999723434448,0.578000009059906,0.5849999785423279,0.5889999866485596,0.5849999785423279,0.6019999980926514,0.5929999947547913,0.5820000171661377,0.5860000252723694,0.5910000205039978,0.5849999785423279,0.5849999785423279,0.5839999914169312,0.5860000252723694,0.5979999899864197,0.5849999785423279,0.597000002861023,0.5960000157356262,0.6019999980926514,0.6060000061988831,0.5989999771118164,0.5889999866485596,0.5920000076293945,0.5960000157356262,0.5950000286102295,0.6060000061988831,0.5960000157356262,0.6000000238418579,0.6069999933242798,0.6039999723434448,0.6069999933242798,0.6010000109672546,0.6060000061988831,0.6129999756813049,0.5989999771118164,0.6200000047683716,0.5979999899864197,0.609000027179718,0.6029999852180481,0.609000027179718,0.6179999709129333,0.6150000095367432,0.6060000061988831,0.6069999933242798,0.6119999885559082,0.6190000176429749,0.6079999804496765,0.6150000095367432,0.6079999804496765,0.6190000176429749,0.6079999804496765,0.609000027179718,0.6079999804496765,0.6179999709129333,0.6140000224113464,0.6200000047683716,0.621999979019165,0.6129999756813049,0.6200000047683716,0.6129999756813049,0.6110000014305115,0.6069999933242798,0.609000027179718,0.6159999966621399,0.6169999837875366,0.6129999756813049,0.6169999837875366,0.6159999966621399,0.6200000047683716,0.6150000095367432,0.6240000128746033,0.6179999709129333,0.6179999709129333,0.6129999756813049,0.6179999709129333,0.6110000014305115,0.6190000176429749,0.6200000047683716,0.6150000095367432,0.6159999966621399,0.621999979019165,0.6209999918937683,0.6230000257492065,0.6200000047683716,0.6240000128746033,0.6159999966621399,0.6200000047683716,0.6159999966621399,0.6179999709129333,0.6119999885559082,0.6269999742507935,0.6230000257492065,0.6200000047683716,0.6240000128746033,0.6190000176429749,0.6169999837875366,0.6299999952316284,0.625,0.6179999709129333,0.6150000095367432,0.6259999871253967,0.621999979019165,0.625,0.6190000176429749,0.6259999871253967,0.6340000033378601,0.628000020980835,0.6290000081062317,0.628000020980835,0.6269999742507935],"label":"FineWeb: id mh + C4 + custom filters"},"big-run-sampled-fineweb-c4-filters":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2849999964237213,0.3240000009536743,0.3580000102519989,0.3930000066757202,0.395000010728836,0.4309999942779541,0.44200000166893,0.4399999976158142,0.453000009059906,0.453000009059906,0.4650000035762787,0.4699999988079071,0.481000006198883,0.4839999973773956,0.4970000088214874,0.5059999823570251,0.4909999966621399,0.5120000243186951,0.5139999985694885,0.5170000195503235,0.5199999809265137,0.5170000195503235,0.5249999761581421,0.5220000147819519,0.5289999842643738,0.5350000262260437,0.531000018119812,0.5289999842643738,0.5339999794960022,0.527999997138977,0.5260000228881836,0.5429999828338623,0.5370000004768372,0.5329999923706055,0.5460000038146973,0.5429999828338623,0.5490000247955322,0.546999990940094,0.546999990940094,0.5490000247955322,0.5460000038146973,0.5559999942779541,0.5619999766349792,0.5569999814033508,0.5509999990463257,0.5550000071525574,0.5649999976158142,0.5690000057220459,0.5619999766349792,0.5529999732971191,0.5649999976158142,0.5730000138282776,0.5669999718666077,0.5740000009536743,0.5690000057220459,0.5699999928474426,0.574999988079071,0.5640000104904175,0.5789999961853027,0.5720000267028809,0.5640000104904175,0.574999988079071,0.5770000219345093,0.5740000009536743,0.5770000219345093,0.5740000009536743,0.5740000009536743,0.578000009059906,0.5759999752044678,0.5789999961853027,0.5799999833106995,0.578000009059906,0.5860000252723694,0.5809999704360962,0.5770000219345093,0.5849999785423279,0.5849999785423279,0.5799999833106995,0.578000009059906,0.5809999704360962,0.5870000123977661,0.5830000042915344,0.5720000267028809,0.5879999995231628,0.5830000042915344,0.5929999947547913,0.578000009059906,0.5889999866485596,0.5809999704360962,0.5789999961853027,0.593999981880188,0.5820000171661377,0.5910000205039978,0.5830000042915344,0.5879999995231628,0.5879999995231628,0.5889999866485596,0.5879999995231628,0.5899999737739563,0.5960000157356262,0.5899999737739563,0.5879999995231628,0.5870000123977661,0.5910000205039978,0.593999981880188,0.597000002861023,0.593999981880188,0.5979999899864197,0.593999981880188,0.5989999771118164,0.5929999947547913,0.597000002861023,0.6019999980926514,0.5989999771118164,0.6019999980926514,0.597000002861023,0.6000000238418579,0.6019999980926514,0.6039999723434448,0.597000002861023,0.6019999980926514,0.5950000286102295,0.6019999980926514,0.6079999804496765,0.6039999723434448,0.6100000143051147,0.6039999723434448,0.6029999852180481,0.6069999933242798,0.6060000061988831,0.6069999933242798,0.6000000238418579,0.6100000143051147,0.6100000143051147,0.6129999756813049,0.609000027179718,0.6010000109672546,0.6000000238418579,0.6110000014305115,0.609000027179718,0.6069999933242798,0.6119999885559082,0.6050000190734863,0.6110000014305115,0.6190000176429749,0.6169999837875366,0.6140000224113464,0.6100000143051147,0.6200000047683716,0.6200000047683716,0.6110000014305115,0.6150000095367432,0.6129999756813049,0.6079999804496765,0.6179999709129333,0.6200000047683716,0.6129999756813049,0.6190000176429749,0.6150000095367432,0.6240000128746033,0.6240000128746033,0.609000027179718,0.609000027179718,0.6159999966621399,0.6110000014305115,0.6110000014305115,0.6190000176429749],"label":"FineWeb: id mh + C4 filters"},"big-run-sampled_full_ind_minhash":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.3019999861717224,0.3059999942779541,0.335999995470047,0.3610000014305115,0.3819999992847442,0.4009999930858612,0.4020000100135803,0.4250000119209289,0.4309999942779541,0.4469999969005584,0.4519999921321869,0.453000009059906,0.4580000042915344,0.4749999940395355,0.4699999988079071,0.4799999892711639,0.4749999940395355,0.4769999980926513,0.481000006198883,0.4839999973773956,0.4959999918937683,0.5040000081062317,0.4970000088214874,0.4979999959468841,0.5070000290870667,0.5049999952316284,0.5109999775886536,0.515999972820282,0.5120000243186951,0.5120000243186951,0.515999972820282,0.5120000243186951,0.5249999761581421,0.5170000195503235,0.5199999809265137,0.5270000100135803,0.5170000195503235,0.5220000147819519,0.5260000228881836,0.5360000133514404,0.5339999794960022,0.5370000004768372,0.5339999794960022,0.5329999923706055,0.531000018119812,0.5329999923706055,0.5400000214576721,0.5429999828338623,0.5389999747276306,0.5419999957084656,0.5429999828338623,0.5360000133514404,0.5299999713897705,0.546999990940094,0.5360000133514404,0.5450000166893005,0.5440000295639038,0.5350000262260437,0.5339999794960022,0.5419999957084656,0.5450000166893005,0.5460000038146973,0.5370000004768372,0.5490000247955322,0.5440000295639038,0.550000011920929,0.5490000247955322,0.5450000166893005,0.5490000247955322,0.5559999942779541,0.5559999942779541,0.5410000085830688,0.5419999957084656,0.5529999732971191,0.5460000038146973,0.5540000200271606,0.5379999876022339,0.5509999990463257,0.5540000200271606,0.5419999957084656,0.546999990940094,0.5479999780654907,0.5460000038146973,0.5460000038146973,0.5519999861717224,0.5600000023841858,0.5540000200271606,0.5509999990463257,0.5609999895095825,0.5619999766349792,0.5590000152587891,0.5559999942779541,0.5580000281333923,0.5640000104904175,0.5649999976158142,0.5590000152587891,0.5550000071525574,0.5630000233650208,0.5630000233650208,0.5609999895095825,0.5559999942779541,0.5609999895095825,0.5630000233650208,0.5680000185966492,0.5630000233650208,0.5690000057220459,0.5609999895095825,0.5590000152587891,0.5640000104904175,0.5690000057220459,0.5640000104904175,0.5630000233650208,0.574999988079071,0.5630000233650208,0.5619999766349792,0.5690000057220459,0.5770000219345093,0.5690000057220459,0.5609999895095825,0.5649999976158142,0.5680000185966492,0.5590000152587891,0.5600000023841858,0.5619999766349792,0.5799999833106995,0.5619999766349792,0.5699999928474426,0.5709999799728394,0.5669999718666077,0.5680000185966492,0.5609999895095825,0.5649999976158142,0.5680000185966492,0.5730000138282776,0.5720000267028809,0.5709999799728394,0.5770000219345093,0.574999988079071,0.5730000138282776,0.5690000057220459,0.5740000009536743,0.578000009059906,0.574999988079071,0.5820000171661377,0.5730000138282776,0.5740000009536743,0.574999988079071,0.5770000219345093,0.5789999961853027,0.5759999752044678,0.5720000267028809,0.5770000219345093,0.5759999752044678,0.5789999961853027,0.5789999961853027,0.5730000138282776,0.5789999961853027,0.5759999752044678,0.5690000057220459,0.5849999785423279,0.5759999752044678,0.5699999928474426,0.5789999961853027,0.5820000171661377,0.5730000138282776,0.5730000138282776,0.5789999961853027],"label":"FineWeb: independent MinHash (id mh)"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.257999986410141,0.2809999883174896,0.3230000138282776,0.3409999907016754,0.3600000143051147,0.3569999933242798,0.3889999985694885,0.395000010728836,0.4199999868869781,0.4180000126361847,0.421999990940094,0.4289999902248382,0.4350000023841858,0.4359999895095825,0.4350000023841858,0.4480000138282776,0.4480000138282776,0.453000009059906,0.4550000131130218,0.4589999914169311,0.4639999866485595,0.4600000083446502,0.460999995470047,0.4589999914169311,0.481000006198883,0.4769999980926513,0.4709999859333038,0.4740000069141388,0.4679999947547912,0.4790000021457672,0.4729999899864197,0.4819999933242798,0.4850000143051147,0.4819999933242798,0.4819999933242798,0.4880000054836273,0.4869999885559082,0.4959999918937683,0.4850000143051147,0.4959999918937683,0.492000013589859,0.503000020980835,0.4930000007152557,0.5099999904632568,0.5040000081062317,0.5009999871253967,0.4970000088214874,0.4979999959468841,0.5059999823570251,0.5070000290870667,0.5040000081062317,0.5059999823570251,0.5049999952316284,0.5080000162124634,0.5049999952316284,0.5019999742507935,0.5120000243186951,0.5170000195503235,0.5170000195503235,0.5090000033378601,0.5239999890327454,0.527999997138977,0.5230000019073486,0.5210000276565552,0.5149999856948853,0.5189999938011169,0.5270000100135803,0.5149999856948853,0.5099999904632568,0.5299999713897705,0.5199999809265137,0.5230000019073486,0.5260000228881836,0.5249999761581421,0.5239999890327454,0.5329999923706055,0.5210000276565552,0.5260000228881836,0.5170000195503235,0.531000018119812,0.5289999842643738,0.531000018119812,0.5270000100135803,0.5299999713897705,0.5370000004768372,0.5379999876022339,0.5419999957084656,0.5329999923706055,0.5360000133514404,0.5299999713897705,0.5360000133514404,0.5270000100135803,0.5450000166893005,0.5410000085830688,0.546999990940094,0.5329999923706055,0.5329999923706055,0.5379999876022339,0.5299999713897705,0.5429999828338623,0.5360000133514404,0.5339999794960022,0.5419999957084656,0.5410000085830688,0.5370000004768372,0.5389999747276306,0.527999997138977,0.5400000214576721,0.5400000214576721,0.531000018119812,0.5440000295639038,0.5460000038146973,0.5479999780654907,0.5460000038146973,0.5410000085830688,0.5509999990463257,0.5479999780654907,0.5410000085830688,0.5389999747276306,0.550000011920929,0.5569999814033508,0.550000011920929,0.5490000247955322,0.5490000247955322,0.5569999814033508,0.5519999861717224,0.5479999780654907,0.5559999942779541,0.5550000071525574,0.5460000038146973,0.5540000200271606,0.5460000038146973,0.5460000038146973,0.5509999990463257,0.5460000038146973,0.5550000071525574,0.5479999780654907,0.5479999780654907,0.5540000200271606,0.5550000071525574,0.5529999732971191,0.5529999732971191,0.5509999990463257,0.5509999990463257,0.5419999957084656,0.546999990940094,0.5509999990463257,0.5559999942779541,0.5490000247955322,0.5509999990463257,0.5529999732971191,0.550000011920929,0.5540000200271606,0.5550000071525574,0.5580000281333923,0.550000011920929,0.5569999814033508,0.5490000247955322,0.5519999861717224,0.5519999861717224,0.5559999942779541,0.5569999814033508,0.5559999942779541,0.5550000071525574,0.5559999942779541,0.5490000247955322,0.5550000071525574,0.5600000023841858],"label":"FineWeb: base filtering only"}},"layout":{"title":{"text":"The different FineWeb processing steps"}}}
|
|
|
|
dist/assets/data/plots/all_filtering_steps/index.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"files":{"agg_score":{"file":"agg_score.json"},"commonsense_qa/acc_norm":{"file":"commonsense_qa_acc_norm.json"},"hellaswag/acc_norm":{"file":"hellaswag_acc_norm.json"},"openbookqa/acc_norm":{"file":"openbookqa_acc_norm.json"},"piqa/acc_norm":{"file":"piqa_acc_norm.json"},"siqa/acc_norm":{"file":"siqa_acc_norm.json"},"winogrande/acc_norm":{"file":"winogrande_acc_norm.json"},"arc/acc_norm":{"file":"arc_acc_norm.json"},"mmlu/acc_norm":{"file":"mmlu_acc_norm.json"}},"settings":{"defaultMetric":"agg_score","slider":{"min":0,"max":30,"default":5}}}
|
|
|
|
dist/assets/data/plots/all_filtering_steps/mmlu_acc_norm.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"data":{"big-run-sampled_full_ind_minhash":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2558934390544891,0.2618628144264221,0.2683217823505401,0.2699837982654571,0.2738722860813141,0.2744417488574981,0.2740873992443084,0.2807216048240661,0.2820421457290649,0.2891400754451751,0.2879075407981872,0.2881667613983154,0.2892490327358246,0.2935869693756103,0.2870290875434875,0.2911452651023865,0.2949125170707702,0.2916406095027923,0.2981449663639068,0.2953989207744598,0.2946988642215729,0.297021746635437,0.3001497685909271,0.3010218441486358,0.2977036237716675,0.2992585003376007,0.2986803948879242,0.2994338274002075,0.2989781498908996,0.3041955828666687,0.3030496537685394,0.303806334733963,0.3036351203918457,0.3058845102787018,0.300450712442398,0.3025284707546234,0.3072526752948761,0.3039065897464752,0.3073755502700805,0.3070493042469024,0.3083153367042541,0.3123056292533874,0.307761400938034,0.3053378164768219,0.3116358816623688,0.3080427348613739,0.308482676744461,0.307318776845932,0.3083004653453827,0.3089516758918762,0.3088736236095428,0.3077724277973175,0.3126304149627685,0.3101697862148285,0.3159398734569549,0.314792275428772,0.3103811144828796,0.3111368715763092,0.3129658997058868,0.311605304479599,0.3118223249912262,0.3133279979228973,0.3146496713161468,0.3195074200630188,0.3142614662647247,0.3125102519989013,0.3115333616733551,0.3183117806911468,0.3168580532073974,0.3187012672424316,0.3179306983947754,0.3157722651958465,0.3214826583862304,0.3145081698894501,0.3172421753406524,0.3151432573795318,0.3181649446487427,0.3180212080478668,0.3171605765819549,0.3212067782878876,0.3180184066295624,0.3209905624389648,0.319052129983902,0.3212707936763763,0.3196887373924255,0.3188316226005554,0.3164899051189422,0.3241994678974151,0.3179469406604767,0.3214083909988403,0.3206575512886047,0.3263285160064697,0.3219505250453949,0.3181525468826294,0.3219776451587677,0.3259726762771606,0.3197665512561798,0.3236161768436432,0.3177970349788666,0.3258080780506134,0.3208407461643219,0.3251138925552368,0.3242645859718323,0.3229723274707794,0.3227455914020538,0.3206316232681274,0.3256695866584778,0.3241210877895355,0.3224890530109405,0.3263737261295318,0.3214233517646789,0.3240345120429992,0.3222567737102508,0.3242291808128357,0.3257078528404236,0.3278365731239319,0.3277338743209839,0.3253948092460632,0.3232105076313019,0.3267974853515625,0.3263654410839081,0.3262891769409179,0.3238334357738495,0.3294911682605743,0.3261866867542267,0.3243315815925598,0.3250119090080261,0.326727420091629,0.3268802464008331,0.3269768059253692,0.3257980346679687,0.3280686736106872,0.3274897634983063,0.3282252252101898,0.3272863030433655,0.328346699476242,0.325562834739685,0.3301684856414795,0.3284023404121399,0.3268299400806427,0.3286610245704651,0.3291078805923462,0.324972927570343,0.3314772248268127,0.3278062343597412,0.326839417219162,0.3277239501476288,0.330414742231369,0.3271744549274444,0.3279334008693695,0.3288575112819671,0.3285425007343292,0.3282454907894134,0.3296376466751098,0.3305942714214325,0.3276287615299225,0.3292438983917236,0.329515129327774,0.3281475007534027,0.3282177448272705,0.3333999514579773,0.3302631080150604,0.330238401889801,0.3323166668415069,0.3313035368919372,0.32961106300354,0.3321967124938965],"label":"FineWeb: independent MinHash (id mh)"},"big-run-fineweb-v1-all-dumps":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2562687695026397,0.264194518327713,0.2659797668457031,0.2690401375293731,0.2707462012767792,0.2736803293228149,0.2808477580547333,0.2819793820381164,0.2818062305450439,0.2852273285388946,0.2852552533149719,0.293150246143341,0.2869345247745514,0.2926198840141296,0.2911646664142608,0.2883031964302063,0.2938489317893982,0.2923268675804138,0.2927436530590057,0.2957125902175903,0.2942458391189575,0.2957732379436493,0.2933609783649444,0.2939628064632416,0.2984270751476288,0.2989151179790497,0.3007727265357971,0.2968312501907348,0.2969468235969543,0.3013020753860473,0.3045085966587066,0.3018752634525299,0.3014349043369293,0.2988792657852173,0.3034284710884094,0.3015728890895843,0.3065252900123596,0.3021449446678161,0.3043071627616882,0.303546279668808,0.3056059181690216,0.2971993386745453,0.3057574033737182,0.3057517111301422,0.3124973773956299,0.3139103651046753,0.3144983947277069,0.3126215636730194,0.3140240907669067,0.3113631308078766,0.3124240636825561,0.3126817643642425,0.3123457431793213,0.3111095428466797,0.3113269805908203,0.3142518699169159,0.3163851797580719,0.3134008049964905,0.3138530254364013,0.3171449303627014,0.3119543790817261,0.3147956132888794,0.3138984441757202,0.3178529143333435,0.3162296414375305,0.315980851650238,0.3123161196708679,0.3166452944278717,0.3140694200992584,0.3176922798156738,0.3176673054695129,0.3150016367435455,0.3161586821079254,0.3222477436065674,0.3194025754928589,0.3176416158676147,0.3159928619861603,0.3169592320919037,0.3135637938976288,0.3155058920383453,0.3215300440788269,0.3201274275779724,0.3192023932933807,0.3156079053878784,0.3212503492832184,0.3163617849349975,0.3223940432071686,0.3191330432891845,0.3194314539432525,0.3221519589424133,0.3211863040924072,0.3197937309741974,0.3174488544464111,0.3159596025943756,0.3157133460044861,0.3193388879299164,0.3163386285305023,0.3202225565910339,0.3163421154022217,0.3212694227695465,0.3187369704246521,0.3203508555889129,0.3224054872989654,0.3207881152629852,0.3219418525695801,0.3197605609893799,0.3255409598350525,0.3253240585327148,0.319698303937912,0.3250498473644256,0.3228228390216827,0.3213794529438019,0.3219127357006073,0.3214426934719085,0.3238218128681183,0.3229665458202362,0.3220484256744385,0.3240038454532623,0.3246393501758575,0.3237775564193725,0.3258441984653473,0.322843462228775,0.3241913020610809,0.324148565530777,0.3238157927989959,0.3248989582061767,0.3280864655971527,0.3288898766040802,0.3265794515609741,0.3277602791786194,0.3231202363967895,0.3224002718925476,0.323845773935318,0.3278093039989471,0.3247094452381134,0.3289697468280792,0.3272296786308288,0.3275051414966583,0.3271359801292419,0.3280861675739288,0.3281281590461731,0.327859491109848,0.3281152546405792,0.3282515406608581,0.3258990049362182,0.3271094560623169,0.3259278535842895,0.3258941769599914,0.3278749883174896,0.3300504386425018,0.326113760471344,0.3242938220500946,0.3262194991111755,0.3263693153858185,0.3274452090263366,0.3254594206809997,0.3287247717380523,0.3250340223312378,0.3270816206932068,0.3275731801986694,0.3282500207424164,0.3257671594619751,0.3272948265075683,0.3274084031581878,0.3302212655544281,0.3322067260742187,0.3296935856342315],"label":"FineWeb: id mh + C4 + custom filters"},"big-run-sampled-fineweb-c4-filters":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.251920074224472,0.2591456174850464,0.2687398791313171,0.269056499004364,0.2683902382850647,0.2725079655647278,0.2752586305141449,0.2753303050994873,0.2848396897315979,0.2833426892757416,0.2844280302524566,0.2847303748130798,0.294879138469696,0.2900991439819336,0.2932447791099548,0.2926276624202728,0.2924879789352417,0.2937914729118347,0.2919517457485199,0.2991980910301208,0.2929336428642273,0.3003193736076355,0.2955676615238189,0.2993899285793304,0.2975476682186126,0.2978275716304779,0.2994768321514129,0.2984132170677185,0.2998209595680237,0.3030214607715606,0.2984272837638855,0.2997688949108124,0.3041917085647583,0.3071142137050628,0.3038201630115509,0.3035272359848022,0.3047704994678497,0.3072718679904938,0.3085931539535522,0.3052116930484772,0.3084307312965393,0.3089986145496368,0.3102100193500519,0.3066395819187164,0.3109234273433685,0.3082580268383026,0.3055950105190277,0.3064471781253814,0.3052197098731994,0.3076579868793487,0.3114514350891113,0.3092200756072998,0.3083749115467071,0.3078047931194305,0.3102362751960754,0.3083966672420501,0.3149019181728363,0.3096835613250732,0.3129985630512237,0.3098655939102173,0.3105471730232239,0.3110797703266144,0.3097324073314667,0.3102588951587677,0.3108883202075958,0.3140653371810913,0.3143481016159057,0.3121947944164276,0.3064004778861999,0.3148718774318695,0.3152956068515777,0.3166911900043487,0.3115324079990387,0.311627596616745,0.3122025728225708,0.3186626732349396,0.3177326321601867,0.3107803463935852,0.3128083050251007,0.3109799027442932,0.3142008483409881,0.3121736049652099,0.3163987696170807,0.3134956955909729,0.3152486085891723,0.3163009285926819,0.3165099024772644,0.3186413049697876,0.315637856721878,0.3207236230373382,0.3161193430423736,0.3157550990581512,0.320356547832489,0.3172537386417389,0.318843811750412,0.3146925568580627,0.3193819522857666,0.3169102966785431,0.3154685497283935,0.3166710138320923,0.3187788426876068,0.3196616470813751,0.3190047442913055,0.3186605274677276,0.3162576556205749,0.3164195120334625,0.3191222250461578,0.3135613799095154,0.3175683617591858,0.3212282657623291,0.3279584646224975,0.3228197395801544,0.3242316544055938,0.3254729807376861,0.3239398598670959,0.3223652243614197,0.3198129832744598,0.3218621611595154,0.3264771103858948,0.323866069316864,0.32564178109169,0.32478728890419,0.3236158192157745,0.3245747685432434,0.3280244767665863,0.3271372020244598,0.3254362642765045,0.3266178965568542,0.3218266665935516,0.3268883228302002,0.321928471326828,0.324524849653244,0.3237947523593902,0.3238577842712402,0.3237817287445068,0.3233639299869537,0.325821191072464,0.3257157802581787,0.3272253274917602,0.3244009912014007,0.3231483995914459,0.3226592242717743,0.3233656585216522,0.3266710937023163,0.3259218335151672,0.3275097906589508,0.3273427188396454,0.3276328444480896,0.3251460194587707,0.3274493515491485,0.3227463960647583,0.3261785507202148,0.32408007979393,0.3253126442432403,0.3242971301078796,0.326819509267807,0.3268508613109588,0.3265140950679779,0.3266753256320953,0.3250673115253448,0.3271500170230865,0.3292337656021118,0.3286773562431335,0.3286141455173492,0.3296676576137543,0.3257955610752105,0.3266003727912903],"label":"FineWeb: id mh + C4 filters"},"big-run-sampled_full_filtered_no_dedup":{"x":[0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,29.360128000000003,31.45728,33.554432000000006,35.651584,37.748736,39.845888,41.94304,44.040192000000005,46.137344000000006,48.234496,50.331648,52.4288,54.525952000000004,56.623104000000005,58.720256000000006,60.817408,62.91456,65.011712,67.10886400000001,69.206016,71.303168,73.40032000000001,75.497472,77.59462400000001,79.691776,81.788928,83.88608,85.983232,88.08038400000001,90.177536,92.27468800000001,94.37184,96.468992,98.56614400000001,100.663296,102.76044800000001,104.8576,106.95475200000001,109.05190400000001,111.149056,113.24620800000001,115.34336,117.44051200000001,119.537664,121.634816,123.73196800000001,125.82912,127.92627200000001,130.023424,132.120576,134.21772800000002,136.31488000000002,138.412032,140.509184,142.606336,144.70348800000002,146.80064000000002,148.897792,150.994944,153.092096,155.18924800000002,157.28640000000001,159.383552,161.480704,163.577856,165.67500800000002,167.77216,169.869312,171.966464,174.06361600000002,176.16076800000002,178.25792,180.355072,182.452224,184.54937600000002,186.64652800000002,188.74368,190.840832,192.937984,195.03513600000002,197.13228800000002,199.22944,201.326592,203.423744,205.52089600000002,207.61804800000002,209.7152,211.812352,213.90950400000003,216.00665600000002,218.10380800000001,220.20096,222.298112,224.39526400000003,226.49241600000002,228.589568,230.68672,232.783872,234.88102400000002,236.97817600000002,239.075328,241.17248,243.269632,245.36678400000002,247.46393600000002,249.561088,251.65824,253.75539200000003,255.85254400000002,257.949696,260.046848,262.144,264.241152,266.338304,268.43545600000004,270.53260800000004,272.62976000000003,274.726912,276.824064,278.921216,281.018368,283.11552,285.212672,287.309824,289.40697600000004,291.50412800000004,293.60128000000003,295.698432,297.795584,299.892736,301.989888,304.08704,306.184192,308.28134400000005,310.37849600000004,312.47564800000004,314.57280000000003,316.669952,318.767104,320.864256,322.961408,325.05856,327.155712,329.25286400000005,331.35001600000004,333.44716800000003,335.54432,337.641472,339.738624,341.835776,343.932928,346.03008,348.12723200000005,350.22438400000004],"y":[0.2501466572284698,0.2516599297523498,0.2610189318656921,0.2666046619415283,0.2667981088161468,0.2667821645736694,0.2708088159561157,0.2738403379917145,0.2726235687732696,0.2762763500213623,0.2768311202526092,0.2809228301048279,0.2836140990257263,0.2822815179824829,0.2797218561172485,0.286342591047287,0.2855269610881805,0.2847287058830261,0.2888180613517761,0.286526083946228,0.2865165770053863,0.294582188129425,0.2925947606563568,0.2947863042354584,0.2892930805683136,0.2903610467910766,0.288201242685318,0.2873396277427673,0.2916238009929657,0.2908017039299011,0.2907920777797699,0.2952797412872314,0.2941452264785766,0.2921333611011505,0.2925891280174255,0.2968584895133972,0.2980035543441772,0.2964116632938385,0.2962304651737213,0.2950254380702972,0.2977516651153564,0.2944138348102569,0.3003402054309845,0.2976303696632385,0.3013098239898681,0.302829384803772,0.3018766045570373,0.305361807346344,0.2971298694610595,0.3014816343784332,0.3019805550575256,0.3037064969539642,0.2970167994499206,0.2995208501815796,0.2970106601715088,0.2990955114364624,0.3027818500995636,0.3048534691333771,0.2993872463703155,0.2986327707767486,0.3015393316745758,0.3003426790237427,0.3003274798393249,0.3017795085906982,0.3019182682037353,0.3015450537204742,0.3046211004257202,0.3031167984008789,0.3020436763763428,0.3011128306388855,0.3029948472976684,0.3045558631420135,0.301642894744873,0.3029441833496094,0.3035804331302643,0.3004390001296997,0.3021787703037262,0.306041270494461,0.3064048886299133,0.3087956011295318,0.3070018291473388,0.3065581619739532,0.3093871772289276,0.3060930073261261,0.3033313155174255,0.3072777390480041,0.306413859128952,0.3104493916034698,0.3056999444961548,0.3077532052993774,0.309231549501419,0.3070645034313202,0.3117790520191192,0.3114112913608551,0.312661737203598,0.3181777000427246,0.3117201030254364,0.3099702894687652,0.3074746131896972,0.3064963519573211,0.3105958700180053,0.3111456036567688,0.3084964454174042,0.3087405860424042,0.3121673166751861,0.3121528625488281,0.3100416660308838,0.3142979145050049,0.3129935264587402,0.3112611472606659,0.3119436800479889,0.3154115974903106,0.3091593086719513,0.3103814721107483,0.3130497634410858,0.3133455514907837,0.3152708411216736,0.3137963414192199,0.3099324703216553,0.3164172768592834,0.3133907914161682,0.3128255009651184,0.3134104907512665,0.3106969892978668,0.3130004107952118,0.3131391704082489,0.3130116462707519,0.3143952488899231,0.3143975436687469,0.3143710494041443,0.3163396418094635,0.3166862726211548,0.3184126019477844,0.3178988993167877,0.317479133605957,0.3184944093227386,0.316694974899292,0.3176258206367492,0.3182629346847534,0.3200214207172394,0.3181648552417755,0.320680022239685,0.3178716897964477,0.3182425796985626,0.3182984292507171,0.3158398568630218,0.3152642548084259,0.3132680356502533,0.3178914785385132,0.3156660795211792,0.3161703050136566,0.3176451921463012,0.3173815906047821,0.3194171786308288,0.3193057179450989,0.3172560334205627,0.317656546831131,0.3155770003795624,0.3199106156826019,0.3170182108879089,0.3156754970550537,0.3180731236934662,0.3205638229846954,0.3175432682037353,0.3184471428394317,0.3192788958549499,0.3197042346000671,0.3177168369293213],"label":"FineWeb: base filtering only"}},"layout":{"title":{"text":"The different FineWeb processing steps"}}}
|
|
|
|