Spaces:
Running
Running
change background to lighter color
Browse fileschange pink to lighter one
add margin-bottom
web.py
CHANGED
@@ -240,6 +240,7 @@ def web_data():
|
|
240 |
border: 1px solid #c3e6cb; /* Green border */
|
241 |
border-radius: 5px;
|
242 |
padding: 15px 15px 0px 15px;
|
|
|
243 |
""",
|
244 |
),
|
245 |
H3("TxT360 CommonCrawl Filtering vs Other Pretraining Datasets"),
|
@@ -301,6 +302,7 @@ def web_data():
|
|
301 |
padding: 15px;
|
302 |
# border: 1px solid #949494; /* Grey border */
|
303 |
border-radius: 12px;
|
|
|
304 |
""", #https://colors.muz.li/palette/d3d3d3/949494/d3d3d3/d3d3d3/949494
|
305 |
),
|
306 |
#DV2("data/sample_wet.json", "data/sample_warc.json", 3),
|
@@ -316,9 +318,10 @@ def web_data():
|
|
316 |
Summary("Non-English Documents"),
|
317 |
DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
318 |
style="""
|
319 |
-
background-color: #
|
320 |
padding: 15px;
|
321 |
border-radius: 12px;
|
|
|
322 |
""",
|
323 |
),
|
324 |
|
@@ -331,6 +334,7 @@ def web_data():
|
|
331 |
background-color: #EAFFF1; /* Light green background */
|
332 |
padding: 15px;
|
333 |
border-radius: 12px;
|
|
|
334 |
""",
|
335 |
),
|
336 |
|
@@ -350,9 +354,10 @@ def web_data():
|
|
350 |
Summary("24 URL domains with more than 4k matches"),
|
351 |
DVS(urls_high_matches, "24 URL domains with more than 4k matches"),
|
352 |
style="""
|
353 |
-
background-color: #
|
354 |
padding: 15px;
|
355 |
border-radius: 12px;
|
|
|
356 |
""",
|
357 |
),
|
358 |
|
@@ -363,9 +368,10 @@ def web_data():
|
|
363 |
Summary("6 url domains that are removed from the blocklist"),
|
364 |
DVS(urls_false_positives, "6 url domains that are removed from the blocklist"),
|
365 |
style="""
|
366 |
-
background-color: #
|
367 |
padding: 15px;
|
368 |
border-radius: 12px;
|
|
|
369 |
""",
|
370 |
),
|
371 |
|
@@ -377,9 +383,10 @@ def web_data():
|
|
377 |
"Sample documents whose urls are blocked by the refined url blocklist",
|
378 |
),
|
379 |
style="""
|
380 |
-
background-color: #
|
381 |
padding: 15px;
|
382 |
border-radius: 12px;
|
|
|
383 |
""",
|
384 |
),
|
385 |
|
@@ -395,9 +402,10 @@ def web_data():
|
|
395 |
"curated url domains that are excluded from our dataset",
|
396 |
),
|
397 |
style="""
|
398 |
-
background-color: #
|
399 |
padding: 15px;
|
400 |
border-radius: 12px;
|
|
|
401 |
""",
|
402 |
),
|
403 |
|
@@ -408,6 +416,7 @@ def web_data():
|
|
408 |
background-color: #EAFFF1; /* Light green background */
|
409 |
padding: 15px;
|
410 |
border-radius: 12px;
|
|
|
411 |
""",
|
412 |
),
|
413 |
|
@@ -438,9 +447,10 @@ def web_data():
|
|
438 |
"Sample documents with lines that are removed by the rule of terminal punctuation",
|
439 |
),
|
440 |
style="""
|
441 |
-
background-color: #
|
442 |
padding: 15px;
|
443 |
border-radius: 12px;
|
|
|
444 |
""",
|
445 |
),
|
446 |
|
@@ -464,9 +474,10 @@ def web_data():
|
|
464 |
"Sample documents that are removed by original C4 javascript rule but are kept after our refinement",
|
465 |
),
|
466 |
style="""
|
467 |
-
background-color: #
|
468 |
padding: 15px;
|
469 |
border-radius: 12px;
|
|
|
470 |
""",
|
471 |
),
|
472 |
H3("2.2 Other Rules from RefinedWeb"),
|
@@ -487,9 +498,10 @@ def web_data():
|
|
487 |
"Sample documents with lines that are removed by the RefinedWeb rules",
|
488 |
),
|
489 |
style="""
|
490 |
-
background-color: #
|
491 |
padding: 15px;
|
492 |
border-radius: 12px;
|
|
|
493 |
""",
|
494 |
),
|
495 |
H3("2.3 Toxic Lines"),
|
@@ -507,9 +519,10 @@ def web_data():
|
|
507 |
"Sample documents with toxic lines",
|
508 |
),
|
509 |
style="""
|
510 |
-
background-color: #
|
511 |
padding: 15px;
|
512 |
border-radius: 12px;
|
|
|
513 |
""",
|
514 |
),
|
515 |
|
@@ -527,6 +540,7 @@ def web_data():
|
|
527 |
background-color: #EAFFF1; /* Light green background */
|
528 |
padding: 15px;
|
529 |
border-radius: 12px;
|
|
|
530 |
""",
|
531 |
),
|
532 |
P("""Similar to previous sections, we will present sample documents filtered out by the given quality signals.
|
@@ -570,6 +584,7 @@ def web_data():
|
|
570 |
background-color: #FFFAEA; /* Light yellow background */
|
571 |
padding: 15px;
|
572 |
border-radius: 12px;
|
|
|
573 |
""",
|
574 |
),
|
575 |
Details(
|
@@ -609,6 +624,7 @@ def web_data():
|
|
609 |
background-color: #FFFAEA; /* Light yellow background */
|
610 |
padding: 15px;
|
611 |
border-radius: 12px;
|
|
|
612 |
""",
|
613 |
),
|
614 |
P("""
|
@@ -655,6 +671,7 @@ def web_data():
|
|
655 |
background-color: #EAFFF1; /* Light green background */
|
656 |
padding: 15px;
|
657 |
border-radius: 12px;
|
|
|
658 |
""",
|
659 |
),
|
660 |
Details(
|
@@ -668,6 +685,7 @@ def web_data():
|
|
668 |
background-color: #EAFFF1; /* Light green background */
|
669 |
padding: 15px;
|
670 |
border-radius: 12px;
|
|
|
671 |
""",
|
672 |
),
|
673 |
H3("3.1.2 Fraction of Characters in the Most Common N-grams (n=2,3,4)"),
|
@@ -696,6 +714,7 @@ def web_data():
|
|
696 |
background-color: #FFFAEA; /* Light yellow background */
|
697 |
padding: 15px;
|
698 |
border-radius: 12px;
|
|
|
699 |
""",
|
700 |
),
|
701 |
Details(
|
@@ -739,6 +758,7 @@ def web_data():
|
|
739 |
background-color: #FFFAEA; /* Light yellow background */
|
740 |
padding: 15px;
|
741 |
border-radius: 12px;
|
|
|
742 |
""",
|
743 |
),
|
744 |
|
@@ -767,6 +787,7 @@ def web_data():
|
|
767 |
background-color: #FFFAEA; /* Light yellow background */
|
768 |
padding: 15px;
|
769 |
border-radius: 12px;
|
|
|
770 |
""",
|
771 |
),
|
772 |
P("""
|
@@ -799,6 +820,7 @@ def web_data():
|
|
799 |
background-color: #EAFFF1; /* Light green background */
|
800 |
padding: 15px;
|
801 |
border-radius: 12px;
|
|
|
802 |
""",
|
803 |
),
|
804 |
Details(
|
@@ -812,6 +834,7 @@ def web_data():
|
|
812 |
background-color: #EAFFF1; /* Light green background */
|
813 |
padding: 15px;
|
814 |
border-radius: 12px;
|
|
|
815 |
""",
|
816 |
),
|
817 |
H3("3.1.3 Fraction of Characters in Duplicated N-grams (n=5,...,10)"),
|
@@ -843,6 +866,7 @@ def web_data():
|
|
843 |
background-color: #FFFAEA; /* Light yellow background */
|
844 |
padding: 15px;
|
845 |
border-radius: 12px;
|
|
|
846 |
""",
|
847 |
),
|
848 |
Details(
|
@@ -901,6 +925,7 @@ def web_data():
|
|
901 |
background-color: #FFFAEA; /* Light yellow background */
|
902 |
padding: 15px;
|
903 |
border-radius: 12px;
|
|
|
904 |
""",
|
905 |
),
|
906 |
|
@@ -931,6 +956,7 @@ def web_data():
|
|
931 |
background-color: #FFFAEA; /* Light yellow background */
|
932 |
padding: 15px;
|
933 |
border-radius: 12px;
|
|
|
934 |
""",
|
935 |
),
|
936 |
P("""
|
@@ -989,6 +1015,7 @@ def web_data():
|
|
989 |
background-color: #EAFFF1; /* Light green background */
|
990 |
padding: 15px;
|
991 |
border-radius: 12px;
|
|
|
992 |
""",
|
993 |
),
|
994 |
Details(
|
@@ -1008,6 +1035,7 @@ def web_data():
|
|
1008 |
background-color: #EAFFF1; /* Light green background */
|
1009 |
padding: 15px;
|
1010 |
border-radius: 12px;
|
|
|
1011 |
""",
|
1012 |
),
|
1013 |
H5(
|
@@ -1024,6 +1052,7 @@ def web_data():
|
|
1024 |
background-color: #EAFFF1; /* Light green background */
|
1025 |
padding: 15px;
|
1026 |
border-radius: 12px;
|
|
|
1027 |
""",
|
1028 |
),
|
1029 |
H3("3.2 Line-wise Heuristics"),
|
@@ -1055,6 +1084,7 @@ def web_data():
|
|
1055 |
background-color: #FFFAEA; /* Light yellow background */
|
1056 |
padding: 15px;
|
1057 |
border-radius: 12px;
|
|
|
1058 |
""",
|
1059 |
),
|
1060 |
Details(
|
@@ -1104,6 +1134,7 @@ def web_data():
|
|
1104 |
background-color: #FFFAEA; /* Light yellow background */
|
1105 |
padding: 15px;
|
1106 |
border-radius: 12px;
|
|
|
1107 |
""",
|
1108 |
),
|
1109 |
|
@@ -1119,6 +1150,7 @@ def web_data():
|
|
1119 |
background-color: #EAFFF1; /* Light green background */
|
1120 |
padding: 15px;
|
1121 |
border-radius: 12px;
|
|
|
1122 |
""",
|
1123 |
),
|
1124 |
|
@@ -1184,6 +1216,7 @@ def web_data():
|
|
1184 |
background-color: #FFFAEA; /* Light yellow background */
|
1185 |
padding: 15px;
|
1186 |
border-radius: 12px;
|
|
|
1187 |
""",
|
1188 |
),
|
1189 |
|
@@ -1200,6 +1233,7 @@ def web_data():
|
|
1200 |
background-color: #FFFAEA; /* Light yellow background */
|
1201 |
padding: 15px;
|
1202 |
border-radius: 12px;
|
|
|
1203 |
""",
|
1204 |
),
|
1205 |
P("""
|
@@ -1249,6 +1283,7 @@ def web_data():
|
|
1249 |
background-color: #FFFAEA; /* Light yellow background */
|
1250 |
padding: 15px;
|
1251 |
border-radius: 12px;
|
|
|
1252 |
""",
|
1253 |
),
|
1254 |
P("""
|
@@ -1270,6 +1305,7 @@ def web_data():
|
|
1270 |
background-color: #EAFFF1; /* Light green background */
|
1271 |
padding: 15px;
|
1272 |
border-radius: 12px;
|
|
|
1273 |
""",
|
1274 |
),
|
1275 |
|
@@ -1291,6 +1327,7 @@ def web_data():
|
|
1291 |
background-color: #FFFAEA; /* Light yellow background */
|
1292 |
padding: 15px;
|
1293 |
border-radius: 12px;
|
|
|
1294 |
""",
|
1295 |
),
|
1296 |
Details(
|
@@ -1322,6 +1359,7 @@ def web_data():
|
|
1322 |
background-color: #FFFAEA; /* Light yellow background */
|
1323 |
padding: 15px;
|
1324 |
border-radius: 12px;
|
|
|
1325 |
""",
|
1326 |
),
|
1327 |
|
@@ -1337,6 +1375,7 @@ def web_data():
|
|
1337 |
background-color: #FFFAEA; /* Light yellow background */
|
1338 |
padding: 15px;
|
1339 |
border-radius: 12px;
|
|
|
1340 |
""",
|
1341 |
),
|
1342 |
Details(
|
@@ -1352,6 +1391,7 @@ def web_data():
|
|
1352 |
background-color: #EAFFF1; /* Light green background */
|
1353 |
padding: 15px;
|
1354 |
border-radius: 12px;
|
|
|
1355 |
""",
|
1356 |
),
|
1357 |
|
@@ -1367,6 +1407,7 @@ def web_data():
|
|
1367 |
background-color: #FFFAEA; /* Light yellow background */
|
1368 |
padding: 15px;
|
1369 |
border-radius: 12px;
|
|
|
1370 |
""",
|
1371 |
),
|
1372 |
Details(
|
@@ -1396,6 +1437,7 @@ def web_data():
|
|
1396 |
background-color: #FFFAEA; /* Light yellow background */
|
1397 |
padding: 15px;
|
1398 |
border-radius: 12px;
|
|
|
1399 |
""",
|
1400 |
),
|
1401 |
Details(
|
@@ -1412,6 +1454,7 @@ def web_data():
|
|
1412 |
background-color: #FFFAEA; /* Light yellow background */
|
1413 |
padding: 15px;
|
1414 |
border-radius: 12px;
|
|
|
1415 |
""",
|
1416 |
),
|
1417 |
P("""
|
@@ -1443,6 +1486,7 @@ def web_data():
|
|
1443 |
background-color: #EAFFF1; /* Light green background */
|
1444 |
padding: 15px;
|
1445 |
border-radius: 12px;
|
|
|
1446 |
""",
|
1447 |
),
|
1448 |
H3("3.4 Others"),
|
@@ -1455,9 +1499,10 @@ def web_data():
|
|
1455 |
Summary("Sample documents containing 'lorem ipsum'"),
|
1456 |
DV("data/lorem_ipsum.json", 0, "Sample documents containing 'lorem ipsum'"),
|
1457 |
style="""
|
1458 |
-
background-color: #
|
1459 |
padding: 15px;
|
1460 |
border-radius: 12px;
|
|
|
1461 |
""",
|
1462 |
),
|
1463 |
H2("4. Deduplication"),
|
|
|
240 |
border: 1px solid #c3e6cb; /* Green border */
|
241 |
border-radius: 5px;
|
242 |
padding: 15px 15px 0px 15px;
|
243 |
+
marging-bottom: 15px
|
244 |
""",
|
245 |
),
|
246 |
H3("TxT360 CommonCrawl Filtering vs Other Pretraining Datasets"),
|
|
|
302 |
padding: 15px;
|
303 |
# border: 1px solid #949494; /* Grey border */
|
304 |
border-radius: 12px;
|
305 |
+
marging-bottom: 15px
|
306 |
""", #https://colors.muz.li/palette/d3d3d3/949494/d3d3d3/d3d3d3/949494
|
307 |
),
|
308 |
#DV2("data/sample_wet.json", "data/sample_warc.json", 3),
|
|
|
318 |
Summary("Non-English Documents"),
|
319 |
DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
320 |
style="""
|
321 |
+
background-color: #FAEAEA; /* Light pink background */
|
322 |
padding: 15px;
|
323 |
border-radius: 12px;
|
324 |
+
marging-bottom: 15px
|
325 |
""",
|
326 |
),
|
327 |
|
|
|
334 |
background-color: #EAFFF1; /* Light green background */
|
335 |
padding: 15px;
|
336 |
border-radius: 12px;
|
337 |
+
marging-bottom: 15px
|
338 |
""",
|
339 |
),
|
340 |
|
|
|
354 |
Summary("24 URL domains with more than 4k matches"),
|
355 |
DVS(urls_high_matches, "24 URL domains with more than 4k matches"),
|
356 |
style="""
|
357 |
+
background-color: #FAEAEA; /* Light pink background */
|
358 |
padding: 15px;
|
359 |
border-radius: 12px;
|
360 |
+
marging-bottom: 15px
|
361 |
""",
|
362 |
),
|
363 |
|
|
|
368 |
Summary("6 url domains that are removed from the blocklist"),
|
369 |
DVS(urls_false_positives, "6 url domains that are removed from the blocklist"),
|
370 |
style="""
|
371 |
+
background-color: #FAEAEA; /* Light pink background */
|
372 |
padding: 15px;
|
373 |
border-radius: 12px;
|
374 |
+
marging-bottom: 15px
|
375 |
""",
|
376 |
),
|
377 |
|
|
|
383 |
"Sample documents whose urls are blocked by the refined url blocklist",
|
384 |
),
|
385 |
style="""
|
386 |
+
background-color: #FAEAEA; /* Light pink background */
|
387 |
padding: 15px;
|
388 |
border-radius: 12px;
|
389 |
+
marging-bottom: 15px
|
390 |
""",
|
391 |
),
|
392 |
|
|
|
402 |
"curated url domains that are excluded from our dataset",
|
403 |
),
|
404 |
style="""
|
405 |
+
background-color: #FAEAEA; /* Light pink background */
|
406 |
padding: 15px;
|
407 |
border-radius: 12px;
|
408 |
+
marging-bottom: 15px
|
409 |
""",
|
410 |
),
|
411 |
|
|
|
416 |
background-color: #EAFFF1; /* Light green background */
|
417 |
padding: 15px;
|
418 |
border-radius: 12px;
|
419 |
+
marging-bottom: 15px
|
420 |
""",
|
421 |
),
|
422 |
|
|
|
447 |
"Sample documents with lines that are removed by the rule of terminal punctuation",
|
448 |
),
|
449 |
style="""
|
450 |
+
background-color: #FAEAEA; /* Light pink background */
|
451 |
padding: 15px;
|
452 |
border-radius: 12px;
|
453 |
+
marging-bottom: 15px
|
454 |
""",
|
455 |
),
|
456 |
|
|
|
474 |
"Sample documents that are removed by original C4 javascript rule but are kept after our refinement",
|
475 |
),
|
476 |
style="""
|
477 |
+
background-color: #FAEAEA; /* Light pink background */
|
478 |
padding: 15px;
|
479 |
border-radius: 12px;
|
480 |
+
marging-bottom: 15px
|
481 |
""",
|
482 |
),
|
483 |
H3("2.2 Other Rules from RefinedWeb"),
|
|
|
498 |
"Sample documents with lines that are removed by the RefinedWeb rules",
|
499 |
),
|
500 |
style="""
|
501 |
+
background-color: #FAEAEA; /* Light pink background */
|
502 |
padding: 15px;
|
503 |
border-radius: 12px;
|
504 |
+
marging-bottom: 15px
|
505 |
""",
|
506 |
),
|
507 |
H3("2.3 Toxic Lines"),
|
|
|
519 |
"Sample documents with toxic lines",
|
520 |
),
|
521 |
style="""
|
522 |
+
background-color: #FAEAEA; /* Light pink background */
|
523 |
padding: 15px;
|
524 |
border-radius: 12px;
|
525 |
+
marging-bottom: 15px
|
526 |
""",
|
527 |
),
|
528 |
|
|
|
540 |
background-color: #EAFFF1; /* Light green background */
|
541 |
padding: 15px;
|
542 |
border-radius: 12px;
|
543 |
+
marging-bottom: 15px
|
544 |
""",
|
545 |
),
|
546 |
P("""Similar to previous sections, we will present sample documents filtered out by the given quality signals.
|
|
|
584 |
background-color: #FFFAEA; /* Light yellow background */
|
585 |
padding: 15px;
|
586 |
border-radius: 12px;
|
587 |
+
marging-bottom: 15px
|
588 |
""",
|
589 |
),
|
590 |
Details(
|
|
|
624 |
background-color: #FFFAEA; /* Light yellow background */
|
625 |
padding: 15px;
|
626 |
border-radius: 12px;
|
627 |
+
marging-bottom: 15px
|
628 |
""",
|
629 |
),
|
630 |
P("""
|
|
|
671 |
background-color: #EAFFF1; /* Light green background */
|
672 |
padding: 15px;
|
673 |
border-radius: 12px;
|
674 |
+
marging-bottom: 15px
|
675 |
""",
|
676 |
),
|
677 |
Details(
|
|
|
685 |
background-color: #EAFFF1; /* Light green background */
|
686 |
padding: 15px;
|
687 |
border-radius: 12px;
|
688 |
+
marging-bottom: 15px
|
689 |
""",
|
690 |
),
|
691 |
H3("3.1.2 Fraction of Characters in the Most Common N-grams (n=2,3,4)"),
|
|
|
714 |
background-color: #FFFAEA; /* Light yellow background */
|
715 |
padding: 15px;
|
716 |
border-radius: 12px;
|
717 |
+
marging-bottom: 15px
|
718 |
""",
|
719 |
),
|
720 |
Details(
|
|
|
758 |
background-color: #FFFAEA; /* Light yellow background */
|
759 |
padding: 15px;
|
760 |
border-radius: 12px;
|
761 |
+
marging-bottom: 15px
|
762 |
""",
|
763 |
),
|
764 |
|
|
|
787 |
background-color: #FFFAEA; /* Light yellow background */
|
788 |
padding: 15px;
|
789 |
border-radius: 12px;
|
790 |
+
marging-bottom: 15px
|
791 |
""",
|
792 |
),
|
793 |
P("""
|
|
|
820 |
background-color: #EAFFF1; /* Light green background */
|
821 |
padding: 15px;
|
822 |
border-radius: 12px;
|
823 |
+
marging-bottom: 15px
|
824 |
""",
|
825 |
),
|
826 |
Details(
|
|
|
834 |
background-color: #EAFFF1; /* Light green background */
|
835 |
padding: 15px;
|
836 |
border-radius: 12px;
|
837 |
+
marging-bottom: 15px
|
838 |
""",
|
839 |
),
|
840 |
H3("3.1.3 Fraction of Characters in Duplicated N-grams (n=5,...,10)"),
|
|
|
866 |
background-color: #FFFAEA; /* Light yellow background */
|
867 |
padding: 15px;
|
868 |
border-radius: 12px;
|
869 |
+
marging-bottom: 15px
|
870 |
""",
|
871 |
),
|
872 |
Details(
|
|
|
925 |
background-color: #FFFAEA; /* Light yellow background */
|
926 |
padding: 15px;
|
927 |
border-radius: 12px;
|
928 |
+
marging-bottom: 15px
|
929 |
""",
|
930 |
),
|
931 |
|
|
|
956 |
background-color: #FFFAEA; /* Light yellow background */
|
957 |
padding: 15px;
|
958 |
border-radius: 12px;
|
959 |
+
marging-bottom: 15px
|
960 |
""",
|
961 |
),
|
962 |
P("""
|
|
|
1015 |
background-color: #EAFFF1; /* Light green background */
|
1016 |
padding: 15px;
|
1017 |
border-radius: 12px;
|
1018 |
+
marging-bottom: 15px
|
1019 |
""",
|
1020 |
),
|
1021 |
Details(
|
|
|
1035 |
background-color: #EAFFF1; /* Light green background */
|
1036 |
padding: 15px;
|
1037 |
border-radius: 12px;
|
1038 |
+
marging-bottom: 15px
|
1039 |
""",
|
1040 |
),
|
1041 |
H5(
|
|
|
1052 |
background-color: #EAFFF1; /* Light green background */
|
1053 |
padding: 15px;
|
1054 |
border-radius: 12px;
|
1055 |
+
marging-bottom: 15px
|
1056 |
""",
|
1057 |
),
|
1058 |
H3("3.2 Line-wise Heuristics"),
|
|
|
1084 |
background-color: #FFFAEA; /* Light yellow background */
|
1085 |
padding: 15px;
|
1086 |
border-radius: 12px;
|
1087 |
+
marging-bottom: 15px
|
1088 |
""",
|
1089 |
),
|
1090 |
Details(
|
|
|
1134 |
background-color: #FFFAEA; /* Light yellow background */
|
1135 |
padding: 15px;
|
1136 |
border-radius: 12px;
|
1137 |
+
marging-bottom: 15px
|
1138 |
""",
|
1139 |
),
|
1140 |
|
|
|
1150 |
background-color: #EAFFF1; /* Light green background */
|
1151 |
padding: 15px;
|
1152 |
border-radius: 12px;
|
1153 |
+
marging-bottom: 15px
|
1154 |
""",
|
1155 |
),
|
1156 |
|
|
|
1216 |
background-color: #FFFAEA; /* Light yellow background */
|
1217 |
padding: 15px;
|
1218 |
border-radius: 12px;
|
1219 |
+
marging-bottom: 15px
|
1220 |
""",
|
1221 |
),
|
1222 |
|
|
|
1233 |
background-color: #FFFAEA; /* Light yellow background */
|
1234 |
padding: 15px;
|
1235 |
border-radius: 12px;
|
1236 |
+
marging-bottom: 15px
|
1237 |
""",
|
1238 |
),
|
1239 |
P("""
|
|
|
1283 |
background-color: #FFFAEA; /* Light yellow background */
|
1284 |
padding: 15px;
|
1285 |
border-radius: 12px;
|
1286 |
+
marging-bottom: 15px
|
1287 |
""",
|
1288 |
),
|
1289 |
P("""
|
|
|
1305 |
background-color: #EAFFF1; /* Light green background */
|
1306 |
padding: 15px;
|
1307 |
border-radius: 12px;
|
1308 |
+
marging-bottom: 15px
|
1309 |
""",
|
1310 |
),
|
1311 |
|
|
|
1327 |
background-color: #FFFAEA; /* Light yellow background */
|
1328 |
padding: 15px;
|
1329 |
border-radius: 12px;
|
1330 |
+
marging-bottom: 15px
|
1331 |
""",
|
1332 |
),
|
1333 |
Details(
|
|
|
1359 |
background-color: #FFFAEA; /* Light yellow background */
|
1360 |
padding: 15px;
|
1361 |
border-radius: 12px;
|
1362 |
+
marging-bottom: 15px
|
1363 |
""",
|
1364 |
),
|
1365 |
|
|
|
1375 |
background-color: #FFFAEA; /* Light yellow background */
|
1376 |
padding: 15px;
|
1377 |
border-radius: 12px;
|
1378 |
+
marging-bottom: 15px
|
1379 |
""",
|
1380 |
),
|
1381 |
Details(
|
|
|
1391 |
background-color: #EAFFF1; /* Light green background */
|
1392 |
padding: 15px;
|
1393 |
border-radius: 12px;
|
1394 |
+
marging-bottom: 15px
|
1395 |
""",
|
1396 |
),
|
1397 |
|
|
|
1407 |
background-color: #FFFAEA; /* Light yellow background */
|
1408 |
padding: 15px;
|
1409 |
border-radius: 12px;
|
1410 |
+
marging-bottom: 15px
|
1411 |
""",
|
1412 |
),
|
1413 |
Details(
|
|
|
1437 |
background-color: #FFFAEA; /* Light yellow background */
|
1438 |
padding: 15px;
|
1439 |
border-radius: 12px;
|
1440 |
+
marging-bottom: 15px
|
1441 |
""",
|
1442 |
),
|
1443 |
Details(
|
|
|
1454 |
background-color: #FFFAEA; /* Light yellow background */
|
1455 |
padding: 15px;
|
1456 |
border-radius: 12px;
|
1457 |
+
marging-bottom: 15px
|
1458 |
""",
|
1459 |
),
|
1460 |
P("""
|
|
|
1486 |
background-color: #EAFFF1; /* Light green background */
|
1487 |
padding: 15px;
|
1488 |
border-radius: 12px;
|
1489 |
+
marging-bottom: 15px
|
1490 |
""",
|
1491 |
),
|
1492 |
H3("3.4 Others"),
|
|
|
1499 |
Summary("Sample documents containing 'lorem ipsum'"),
|
1500 |
DV("data/lorem_ipsum.json", 0, "Sample documents containing 'lorem ipsum'"),
|
1501 |
style="""
|
1502 |
+
background-color: #FAEAEA; /* Light pink background */
|
1503 |
padding: 15px;
|
1504 |
border-radius: 12px;
|
1505 |
+
marging-bottom: 15px
|
1506 |
""",
|
1507 |
),
|
1508 |
H2("4. Deduplication"),
|