victormiller commited on
Commit
8c7dda2
1 Parent(s): b16daa1

Update web.py

Browse files
Files changed (1) hide show
  1. web.py +1 -0
web.py CHANGED
@@ -388,6 +388,7 @@ def web_data():
388
  Li("Each section is complete with code and comparisons to Dolma,", D_cite(bibtex_key="soldaini2024dolma"),
389
  "DataTrove,", D_cite(bibtex_key="penedo2024datatrove"),
390
  "and/or RedPajama-V-2", D_cite(bibtex_key="redpajama-v2"), style = "margin-bottom: 5px"),
 
391
  ),
392
  P("To generate a high-quality dataset from large-scale webpages, we have investigated the processing steps used by the community and made our choices based on careful manual inspection. Below is a comprehensive list of datasets we reviewed the comparison of filters we have applied."),
393
  ),
 
388
  Li("Each section is complete with code and comparisons to Dolma,", D_cite(bibtex_key="soldaini2024dolma"),
389
  "DataTrove,", D_cite(bibtex_key="penedo2024datatrove"),
390
  "and/or RedPajama-V-2", D_cite(bibtex_key="redpajama-v2"), style = "margin-bottom: 5px"),
391
+ Li(B("Estimated Reading Time: 31 minutes"), style = "margin-bottom: 5px"),
392
  ),
393
  P("To generate a high-quality dataset from large-scale webpages, we have investigated the processing steps used by the community and made our choices based on careful manual inspection. Below is a comprehensive list of datasets we reviewed the comparison of filters we have applied."),
394
  ),