victormiller commited on
Commit
872ea67
1 Parent(s): feb2faf

Update web.py

Browse files
Files changed (1) hide show
  1. web.py +1 -1
web.py CHANGED
@@ -254,7 +254,7 @@ def web_data():
254
  Li("Local Deduplication", style = "margin-bottom: 5px"),
255
  Li("Each section is complete with code and comparisons to Dolma,", D_cite(bibtex_key="soldaini2024dolma"),
256
  "DataTrove,", D_cite(bibtex_key="penedo2024datatrove"),
257
- "and/or RedPajama-V-2" D_cite(bibtex_key="redpajama-v2"),, style = "margin-bottom: 5px"),
258
  ),
259
  P("To generate a high-quality dataset from large-scale webpages, we have investigated the processing steps used by the community and made our choices based on careful manual inspection. Below is a comprehensive list of datasets we reviewed the comparison of filters we have applied."),
260
  ),
 
254
  Li("Local Deduplication", style = "margin-bottom: 5px"),
255
  Li("Each section is complete with code and comparisons to Dolma,", D_cite(bibtex_key="soldaini2024dolma"),
256
  "DataTrove,", D_cite(bibtex_key="penedo2024datatrove"),
257
+ "and/or RedPajama-V-2" D_cite(bibtex_key="redpajama-v2"), style = "margin-bottom: 5px"),
258
  ),
259
  P("To generate a high-quality dataset from large-scale webpages, we have investigated the processing steps used by the community and made our choices based on careful manual inspection. Below is a comprehensive list of datasets we reviewed the comparison of filters we have applied."),
260
  ),