Token Classification
Transformers
Safetensors
bert
Inference Endpoints
Xmm commited on
Commit
98960a8
1 Parent(s): 2ebd5ea

Upload 11 files

Browse files
Files changed (7) hide show
  1. config.json +14 -14
  2. model.safetensors +1 -1
  3. optimizer.pt +1 -1
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +533 -5
  7. training_args.bin +1 -1
config.json CHANGED
@@ -10,24 +10,24 @@
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
- "0": "O",
14
- "1": "B-PER",
15
- "2": "I-PER",
16
- "3": "B-ORG",
17
- "4": "I-ORG",
18
- "5": "B-LOC",
19
- "6": "I-LOC"
20
  },
21
  "initializer_range": 0.02,
22
  "intermediate_size": 3072,
23
  "label2id": {
24
- "O": 0,
25
- "B-PER": 1,
26
- "I-PER": 2,
27
- "B-ORG": 3,
28
- "I-ORG": 4,
29
- "B-LOC": 5,
30
- "I-LOC": 6
31
  },
32
  "layer_norm_eps": 1e-12,
33
  "max_position_embeddings": 512,
 
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6"
20
  },
21
  "initializer_range": 0.02,
22
  "intermediate_size": 3072,
23
  "label2id": {
24
+ "LABEL_0": 0,
25
+ "LABEL_1": 1,
26
+ "LABEL_2": 2,
27
+ "LABEL_3": 3,
28
+ "LABEL_4": 4,
29
+ "LABEL_5": 5,
30
+ "LABEL_6": 6
31
  },
32
  "layer_norm_eps": 1e-12,
33
  "max_position_embeddings": 512,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3464507543bcd5a63144ad9e7c40083a605e6ae28e76daa26b9df5a09404d6c
3
  size 709096284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13aae2a832fcc9a396a3d5b345e7240ec562d07e7044b6689f124e8871053e04
3
  size 709096284
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c4762c65f673b48b6e30a37ae48167d77dc5e9c0f1ef957bfd9c54a2b27f232
3
  size 1418312250
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd8b3f7f6f848ed76978c04c531e580dfaa7e4f8acac2247bfa36c8d866ce66
3
  size 1418312250
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e173b028a7b49b7598e1d0be461b9d1381eae9728de0257e5a2a3230ba0a5ad5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99ecc6ce06f99c747d7eccd8c8e5ba6a5a7d3824113e1e8d26da659a5de2e704
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e9d47ac0c4a6583ddb94f81f2a71d6ba82f5603d2acd75906e08ceb02509c00
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c01948096ca09d6133b37c1d4832dd98f343fc0f51ab59a285b9840e309e1d5c
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.9518328315919957,
5
  "eval_steps": 1000,
6
- "global_step": 21500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -469,6 +469,534 @@
469
  "eval_samples_per_second": 76.309,
470
  "eval_steps_per_second": 0.15,
471
  "step": 21000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  }
473
  ],
474
  "logging_steps": 1000,
@@ -476,7 +1004,7 @@
476
  "num_input_tokens_seen": 0,
477
  "num_train_epochs": 7,
478
  "save_steps": 500,
479
- "total_flos": 5913608888301312.0,
480
  "train_batch_size": 8,
481
  "trial_name": null,
482
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.257554292678833,
3
+ "best_model_checkpoint": "./checkpoint/checkpoint-45000",
4
+ "epoch": 1.9922082521692934,
5
  "eval_steps": 1000,
6
+ "global_step": 45000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
469
  "eval_samples_per_second": 76.309,
470
  "eval_steps_per_second": 0.15,
471
  "step": 21000
472
+ },
473
+ {
474
+ "epoch": 0.97,
475
+ "grad_norm": 9.784720420837402,
476
+ "learning_rate": 1.7217232917604798e-05,
477
+ "loss": 0.2862,
478
+ "step": 22000
479
+ },
480
+ {
481
+ "epoch": 0.97,
482
+ "eval_LOC_f1": 0.8464706167336854,
483
+ "eval_ORG_f1": 0.7516536234342064,
484
+ "eval_PER_f1": 0.8579948502396776,
485
+ "eval_loss": 0.2908647060394287,
486
+ "eval_overall_accuracy": 0.9129029665003394,
487
+ "eval_overall_f1": 0.822899354947748,
488
+ "eval_overall_precision": 0.8222161241750046,
489
+ "eval_overall_recall": 0.823583722143129,
490
+ "eval_runtime": 998.6892,
491
+ "eval_samples_per_second": 65.786,
492
+ "eval_steps_per_second": 0.515,
493
+ "step": 22000
494
+ },
495
+ {
496
+ "epoch": 1.02,
497
+ "grad_norm": 10.310678482055664,
498
+ "learning_rate": 1.709074350476865e-05,
499
+ "loss": 0.2578,
500
+ "step": 23000
501
+ },
502
+ {
503
+ "epoch": 1.02,
504
+ "eval_LOC_f1": 0.841074796336982,
505
+ "eval_ORG_f1": 0.7516604978958576,
506
+ "eval_PER_f1": 0.8534542846463892,
507
+ "eval_loss": 0.31625884771347046,
508
+ "eval_overall_accuracy": 0.9131046730418835,
509
+ "eval_overall_f1": 0.8203140861475114,
510
+ "eval_overall_precision": 0.8292370287689482,
511
+ "eval_overall_recall": 0.811581128511622,
512
+ "eval_runtime": 992.6607,
513
+ "eval_samples_per_second": 66.186,
514
+ "eval_steps_per_second": 0.518,
515
+ "step": 23000
516
+ },
517
+ {
518
+ "epoch": 1.06,
519
+ "grad_norm": 12.428755760192871,
520
+ "learning_rate": 1.6964254091932504e-05,
521
+ "loss": 0.241,
522
+ "step": 24000
523
+ },
524
+ {
525
+ "epoch": 1.06,
526
+ "eval_LOC_f1": 0.8458245540438758,
527
+ "eval_ORG_f1": 0.7417449698379979,
528
+ "eval_PER_f1": 0.8449764637383674,
529
+ "eval_loss": 0.31306418776512146,
530
+ "eval_overall_accuracy": 0.910274082719034,
531
+ "eval_overall_f1": 0.8166761890407407,
532
+ "eval_overall_precision": 0.8123452830057111,
533
+ "eval_overall_recall": 0.8210535218414784,
534
+ "eval_runtime": 1000.0329,
535
+ "eval_samples_per_second": 65.698,
536
+ "eval_steps_per_second": 0.514,
537
+ "step": 24000
538
+ },
539
+ {
540
+ "epoch": 1.11,
541
+ "grad_norm": 7.654708385467529,
542
+ "learning_rate": 1.683776467909636e-05,
543
+ "loss": 0.2273,
544
+ "step": 25000
545
+ },
546
+ {
547
+ "epoch": 1.11,
548
+ "eval_LOC_f1": 0.8447393652873104,
549
+ "eval_ORG_f1": 0.755644780920822,
550
+ "eval_PER_f1": 0.8620841122450023,
551
+ "eval_loss": 0.29408279061317444,
552
+ "eval_overall_accuracy": 0.9140618487334913,
553
+ "eval_overall_f1": 0.8249119505874042,
554
+ "eval_overall_precision": 0.818763755528247,
555
+ "eval_overall_recall": 0.8311531793129696,
556
+ "eval_runtime": 941.7518,
557
+ "eval_samples_per_second": 69.764,
558
+ "eval_steps_per_second": 0.546,
559
+ "step": 25000
560
+ },
561
+ {
562
+ "epoch": 1.15,
563
+ "grad_norm": 0.1430547833442688,
564
+ "learning_rate": 1.6711275266260215e-05,
565
+ "loss": 0.2512,
566
+ "step": 26000
567
+ },
568
+ {
569
+ "epoch": 1.15,
570
+ "eval_LOC_f1": 0.8500541479262808,
571
+ "eval_ORG_f1": 0.7624169717495258,
572
+ "eval_PER_f1": 0.865798904312724,
573
+ "eval_loss": 0.2906274199485779,
574
+ "eval_overall_accuracy": 0.9169281656762454,
575
+ "eval_overall_f1": 0.8295221228947797,
576
+ "eval_overall_precision": 0.8304302989207211,
577
+ "eval_overall_recall": 0.8286159310996152,
578
+ "eval_runtime": 948.7716,
579
+ "eval_samples_per_second": 69.247,
580
+ "eval_steps_per_second": 0.542,
581
+ "step": 26000
582
+ },
583
+ {
584
+ "epoch": 1.2,
585
+ "grad_norm": 2.0707778930664062,
586
+ "learning_rate": 1.658478585342407e-05,
587
+ "loss": 0.2355,
588
+ "step": 27000
589
+ },
590
+ {
591
+ "epoch": 1.2,
592
+ "eval_LOC_f1": 0.8528026151331602,
593
+ "eval_ORG_f1": 0.7654581692212784,
594
+ "eval_PER_f1": 0.8630401626842907,
595
+ "eval_loss": 0.30151626467704773,
596
+ "eval_overall_accuracy": 0.9169624036869872,
597
+ "eval_overall_f1": 0.8306308694863459,
598
+ "eval_overall_precision": 0.8223685119434121,
599
+ "eval_overall_recall": 0.8390609362445908,
600
+ "eval_runtime": 985.7403,
601
+ "eval_samples_per_second": 66.65,
602
+ "eval_steps_per_second": 0.521,
603
+ "step": 27000
604
+ },
605
+ {
606
+ "epoch": 1.24,
607
+ "grad_norm": 7.580867290496826,
608
+ "learning_rate": 1.6458296440587925e-05,
609
+ "loss": 0.2395,
610
+ "step": 28000
611
+ },
612
+ {
613
+ "epoch": 1.24,
614
+ "eval_LOC_f1": 0.8430476991285736,
615
+ "eval_ORG_f1": 0.7574625494543183,
616
+ "eval_PER_f1": 0.8637680570098303,
617
+ "eval_loss": 0.3095172941684723,
618
+ "eval_overall_accuracy": 0.914303747722428,
619
+ "eval_overall_f1": 0.8256623551811597,
620
+ "eval_overall_precision": 0.8230774618293879,
621
+ "eval_overall_recall": 0.8282635355144271,
622
+ "eval_runtime": 999.1758,
623
+ "eval_samples_per_second": 65.754,
624
+ "eval_steps_per_second": 0.514,
625
+ "step": 28000
626
+ },
627
+ {
628
+ "epoch": 1.28,
629
+ "grad_norm": 0.15234589576721191,
630
+ "learning_rate": 1.633180702775178e-05,
631
+ "loss": 0.2322,
632
+ "step": 29000
633
+ },
634
+ {
635
+ "epoch": 1.28,
636
+ "eval_LOC_f1": 0.8442432749878247,
637
+ "eval_ORG_f1": 0.7601534050352662,
638
+ "eval_PER_f1": 0.8612064440586263,
639
+ "eval_loss": 0.2927219569683075,
640
+ "eval_overall_accuracy": 0.9170986114253731,
641
+ "eval_overall_f1": 0.8258450109270158,
642
+ "eval_overall_precision": 0.8182069728832319,
643
+ "eval_overall_recall": 0.8336269963209901,
644
+ "eval_runtime": 972.0367,
645
+ "eval_samples_per_second": 67.59,
646
+ "eval_steps_per_second": 0.529,
647
+ "step": 29000
648
+ },
649
+ {
650
+ "epoch": 1.33,
651
+ "grad_norm": 12.916611671447754,
652
+ "learning_rate": 1.6205317614915632e-05,
653
+ "loss": 0.2433,
654
+ "step": 30000
655
+ },
656
+ {
657
+ "epoch": 1.33,
658
+ "eval_LOC_f1": 0.8444784033582501,
659
+ "eval_ORG_f1": 0.7596891611525098,
660
+ "eval_PER_f1": 0.8642486197465865,
661
+ "eval_loss": 0.2888515293598175,
662
+ "eval_overall_accuracy": 0.91348501268295,
663
+ "eval_overall_f1": 0.8271813480146815,
664
+ "eval_overall_precision": 0.8160067477678877,
665
+ "eval_overall_recall": 0.8386662531891801,
666
+ "eval_runtime": 961.788,
667
+ "eval_samples_per_second": 68.31,
668
+ "eval_steps_per_second": 0.534,
669
+ "step": 30000
670
+ },
671
+ {
672
+ "epoch": 1.37,
673
+ "grad_norm": 0.4655471444129944,
674
+ "learning_rate": 1.607882820207949e-05,
675
+ "loss": 0.2358,
676
+ "step": 31000
677
+ },
678
+ {
679
+ "epoch": 1.37,
680
+ "eval_LOC_f1": 0.8478889967668045,
681
+ "eval_ORG_f1": 0.7620526592328799,
682
+ "eval_PER_f1": 0.8645434620748959,
683
+ "eval_loss": 0.29194697737693787,
684
+ "eval_overall_accuracy": 0.9150346548213073,
685
+ "eval_overall_f1": 0.8278141773862643,
686
+ "eval_overall_precision": 0.8183533624875177,
687
+ "eval_overall_recall": 0.8374962998463555,
688
+ "eval_runtime": 937.9473,
689
+ "eval_samples_per_second": 70.047,
690
+ "eval_steps_per_second": 0.548,
691
+ "step": 31000
692
+ },
693
+ {
694
+ "epoch": 1.42,
695
+ "grad_norm": 4.9739909172058105,
696
+ "learning_rate": 1.5952338789243342e-05,
697
+ "loss": 0.2258,
698
+ "step": 32000
699
+ },
700
+ {
701
+ "epoch": 1.42,
702
+ "eval_LOC_f1": 0.8552193079338859,
703
+ "eval_ORG_f1": 0.763369485049633,
704
+ "eval_PER_f1": 0.8649409287399143,
705
+ "eval_loss": 0.2976300120353699,
706
+ "eval_overall_accuracy": 0.9165649450405498,
707
+ "eval_overall_f1": 0.8319835859514232,
708
+ "eval_overall_precision": 0.8266665738956187,
709
+ "eval_overall_recall": 0.8373694374356878,
710
+ "eval_runtime": 961.624,
711
+ "eval_samples_per_second": 68.322,
712
+ "eval_steps_per_second": 0.535,
713
+ "step": 32000
714
+ },
715
+ {
716
+ "epoch": 1.46,
717
+ "grad_norm": 13.10745620727539,
718
+ "learning_rate": 1.5825849376407196e-05,
719
+ "loss": 0.2372,
720
+ "step": 33000
721
+ },
722
+ {
723
+ "epoch": 1.46,
724
+ "eval_LOC_f1": 0.8430370470368156,
725
+ "eval_ORG_f1": 0.7732836874254703,
726
+ "eval_PER_f1": 0.8708081138557172,
727
+ "eval_loss": 0.2847268581390381,
728
+ "eval_overall_accuracy": 0.91798879970466,
729
+ "eval_overall_f1": 0.8324051096948847,
730
+ "eval_overall_precision": 0.8326281644453847,
731
+ "eval_overall_recall": 0.8321821744217188,
732
+ "eval_runtime": 1011.399,
733
+ "eval_samples_per_second": 64.96,
734
+ "eval_steps_per_second": 0.508,
735
+ "step": 33000
736
+ },
737
+ {
738
+ "epoch": 1.51,
739
+ "grad_norm": 2.8712925910949707,
740
+ "learning_rate": 1.569935996357105e-05,
741
+ "loss": 0.2358,
742
+ "step": 34000
743
+ },
744
+ {
745
+ "epoch": 1.51,
746
+ "eval_LOC_f1": 0.8512200643745139,
747
+ "eval_ORG_f1": 0.7646914589293013,
748
+ "eval_PER_f1": 0.8725994049229104,
749
+ "eval_loss": 0.28364238142967224,
750
+ "eval_overall_accuracy": 0.9192392314013171,
751
+ "eval_overall_f1": 0.8323193111236589,
752
+ "eval_overall_precision": 0.8335312495582227,
753
+ "eval_overall_recall": 0.831110891842747,
754
+ "eval_runtime": 994.3994,
755
+ "eval_samples_per_second": 66.07,
756
+ "eval_steps_per_second": 0.517,
757
+ "step": 34000
758
+ },
759
+ {
760
+ "epoch": 1.55,
761
+ "grad_norm": 1.0189175605773926,
762
+ "learning_rate": 1.5572870550734906e-05,
763
+ "loss": 0.2314,
764
+ "step": 35000
765
+ },
766
+ {
767
+ "epoch": 1.55,
768
+ "eval_LOC_f1": 0.8507849855779469,
769
+ "eval_ORG_f1": 0.7650251812241418,
770
+ "eval_PER_f1": 0.8689379418372184,
771
+ "eval_loss": 0.285095751285553,
772
+ "eval_overall_accuracy": 0.9186356003858475,
773
+ "eval_overall_f1": 0.8310027409855706,
774
+ "eval_overall_precision": 0.8214160303503928,
775
+ "eval_overall_recall": 0.8408158662588275,
776
+ "eval_runtime": 935.5326,
777
+ "eval_samples_per_second": 70.227,
778
+ "eval_steps_per_second": 0.549,
779
+ "step": 35000
780
+ },
781
+ {
782
+ "epoch": 1.59,
783
+ "grad_norm": 3.663295269012451,
784
+ "learning_rate": 1.544638113789876e-05,
785
+ "loss": 0.234,
786
+ "step": 36000
787
+ },
788
+ {
789
+ "epoch": 1.59,
790
+ "eval_LOC_f1": 0.8537102746066029,
791
+ "eval_ORG_f1": 0.7690944951636334,
792
+ "eval_PER_f1": 0.8700584783258904,
793
+ "eval_loss": 0.28537243604660034,
794
+ "eval_overall_accuracy": 0.9194431708566052,
795
+ "eval_overall_f1": 0.8341569064491033,
796
+ "eval_overall_precision": 0.8295923488735222,
797
+ "eval_overall_recall": 0.8387719718647365,
798
+ "eval_runtime": 945.3448,
799
+ "eval_samples_per_second": 69.498,
800
+ "eval_steps_per_second": 0.544,
801
+ "step": 36000
802
+ },
803
+ {
804
+ "epoch": 1.64,
805
+ "grad_norm": 2.8524346351623535,
806
+ "learning_rate": 1.5319891725062616e-05,
807
+ "loss": 0.218,
808
+ "step": 37000
809
+ },
810
+ {
811
+ "epoch": 1.64,
812
+ "eval_LOC_f1": 0.8480498985496354,
813
+ "eval_ORG_f1": 0.7720144752714113,
814
+ "eval_PER_f1": 0.8681549995333838,
815
+ "eval_loss": 0.28636589646339417,
816
+ "eval_overall_accuracy": 0.9189809577115909,
817
+ "eval_overall_f1": 0.8333121549691502,
818
+ "eval_overall_precision": 0.8346791871137447,
819
+ "eval_overall_recall": 0.8319495933354947,
820
+ "eval_runtime": 971.8405,
821
+ "eval_samples_per_second": 67.604,
822
+ "eval_steps_per_second": 0.529,
823
+ "step": 37000
824
+ },
825
+ {
826
+ "epoch": 1.68,
827
+ "grad_norm": 4.014327049255371,
828
+ "learning_rate": 1.5193402312226468e-05,
829
+ "loss": 0.239,
830
+ "step": 38000
831
+ },
832
+ {
833
+ "epoch": 1.68,
834
+ "eval_LOC_f1": 0.856354181853526,
835
+ "eval_ORG_f1": 0.7607628933739787,
836
+ "eval_PER_f1": 0.8713080831892713,
837
+ "eval_loss": 0.2887238562107086,
838
+ "eval_overall_accuracy": 0.9170405556680282,
839
+ "eval_overall_f1": 0.8318997253753843,
840
+ "eval_overall_precision": 0.8238473394292507,
841
+ "eval_overall_recall": 0.8401110750884513,
842
+ "eval_runtime": 984.8035,
843
+ "eval_samples_per_second": 66.714,
844
+ "eval_steps_per_second": 0.522,
845
+ "step": 38000
846
+ },
847
+ {
848
+ "epoch": 1.73,
849
+ "grad_norm": 48.57558059692383,
850
+ "learning_rate": 1.5066912899390323e-05,
851
+ "loss": 0.2374,
852
+ "step": 39000
853
+ },
854
+ {
855
+ "epoch": 1.73,
856
+ "eval_LOC_f1": 0.8629891838741397,
857
+ "eval_ORG_f1": 0.7734306355559895,
858
+ "eval_PER_f1": 0.8760364502093424,
859
+ "eval_loss": 0.28808096051216125,
860
+ "eval_overall_accuracy": 0.921347101975682,
861
+ "eval_overall_f1": 0.8414042377584563,
862
+ "eval_overall_precision": 0.8495036133499992,
863
+ "eval_overall_recall": 0.8334578464400998,
864
+ "eval_runtime": 1002.3252,
865
+ "eval_samples_per_second": 65.548,
866
+ "eval_steps_per_second": 0.513,
867
+ "step": 39000
868
+ },
869
+ {
870
+ "epoch": 1.77,
871
+ "grad_norm": 18.295597076416016,
872
+ "learning_rate": 1.4940423486554176e-05,
873
+ "loss": 0.2327,
874
+ "step": 40000
875
+ },
876
+ {
877
+ "epoch": 1.77,
878
+ "eval_LOC_f1": 0.8568252173162995,
879
+ "eval_ORG_f1": 0.765257865392274,
880
+ "eval_PER_f1": 0.8650392170611746,
881
+ "eval_loss": 0.27475544810295105,
882
+ "eval_overall_accuracy": 0.918402633051887,
883
+ "eval_overall_f1": 0.8338476462277082,
884
+ "eval_overall_precision": 0.8316554025631678,
885
+ "eval_overall_recall": 0.8360514779470842,
886
+ "eval_runtime": 966.7516,
887
+ "eval_samples_per_second": 67.96,
888
+ "eval_steps_per_second": 0.532,
889
+ "step": 40000
890
+ },
891
+ {
892
+ "epoch": 1.82,
893
+ "grad_norm": 8.551658630371094,
894
+ "learning_rate": 1.481393407371803e-05,
895
+ "loss": 0.2266,
896
+ "step": 41000
897
+ },
898
+ {
899
+ "epoch": 1.82,
900
+ "eval_LOC_f1": 0.8626095376422174,
901
+ "eval_ORG_f1": 0.7735328126106142,
902
+ "eval_PER_f1": 0.8753750051370567,
903
+ "eval_loss": 0.27126726508140564,
904
+ "eval_overall_accuracy": 0.9233812863964941,
905
+ "eval_overall_f1": 0.8405072872606979,
906
+ "eval_overall_precision": 0.835852936051577,
907
+ "eval_overall_recall": 0.845213763161975,
908
+ "eval_runtime": 949.0619,
909
+ "eval_samples_per_second": 69.226,
910
+ "eval_steps_per_second": 0.542,
911
+ "step": 41000
912
+ },
913
+ {
914
+ "epoch": 1.86,
915
+ "grad_norm": 15.628961563110352,
916
+ "learning_rate": 1.4687444660881885e-05,
917
+ "loss": 0.2321,
918
+ "step": 42000
919
+ },
920
+ {
921
+ "epoch": 1.86,
922
+ "eval_LOC_f1": 0.8591221352011853,
923
+ "eval_ORG_f1": 0.7785703957198631,
924
+ "eval_PER_f1": 0.8756674763500703,
925
+ "eval_loss": 0.2676946520805359,
926
+ "eval_overall_accuracy": 0.9217490264496077,
927
+ "eval_overall_f1": 0.8412990217616726,
928
+ "eval_overall_precision": 0.8391781494337506,
929
+ "eval_overall_recall": 0.8434306415009233,
930
+ "eval_runtime": 933.8514,
931
+ "eval_samples_per_second": 70.354,
932
+ "eval_steps_per_second": 0.55,
933
+ "step": 42000
934
+ },
935
+ {
936
+ "epoch": 1.9,
937
+ "grad_norm": 3.160637140274048,
938
+ "learning_rate": 1.456095524804574e-05,
939
+ "loss": 0.2103,
940
+ "step": 43000
941
+ },
942
+ {
943
+ "epoch": 1.9,
944
+ "eval_LOC_f1": 0.8572030651340997,
945
+ "eval_ORG_f1": 0.7727868545480081,
946
+ "eval_PER_f1": 0.8757355631042593,
947
+ "eval_loss": 0.2812298536300659,
948
+ "eval_overall_accuracy": 0.9198480725488561,
949
+ "eval_overall_f1": 0.8381508761238307,
950
+ "eval_overall_precision": 0.8330536854344057,
951
+ "eval_overall_recall": 0.8433108270019594,
952
+ "eval_runtime": 959.7072,
953
+ "eval_samples_per_second": 68.458,
954
+ "eval_steps_per_second": 0.536,
955
+ "step": 43000
956
+ },
957
+ {
958
+ "epoch": 1.95,
959
+ "grad_norm": 19.63943862915039,
960
+ "learning_rate": 1.4434465835209595e-05,
961
+ "loss": 0.2299,
962
+ "step": 44000
963
+ },
964
+ {
965
+ "epoch": 1.95,
966
+ "eval_LOC_f1": 0.8625562915195548,
967
+ "eval_ORG_f1": 0.7738310591448732,
968
+ "eval_PER_f1": 0.8727216526711657,
969
+ "eval_loss": 0.2698276937007904,
970
+ "eval_overall_accuracy": 0.9231393874075574,
971
+ "eval_overall_f1": 0.8398996262454074,
972
+ "eval_overall_precision": 0.8411791735887738,
973
+ "eval_overall_recall": 0.8386239657189575,
974
+ "eval_runtime": 959.292,
975
+ "eval_samples_per_second": 68.488,
976
+ "eval_steps_per_second": 0.536,
977
+ "step": 44000
978
+ },
979
+ {
980
+ "epoch": 1.99,
981
+ "grad_norm": 5.957523345947266,
982
+ "learning_rate": 1.4307976422373449e-05,
983
+ "loss": 0.217,
984
+ "step": 45000
985
+ },
986
+ {
987
+ "epoch": 1.99,
988
+ "eval_LOC_f1": 0.8548825805462975,
989
+ "eval_ORG_f1": 0.7795103416137936,
990
+ "eval_PER_f1": 0.8763479817569583,
991
+ "eval_loss": 0.257554292678833,
992
+ "eval_overall_accuracy": 0.9223258625001488,
993
+ "eval_overall_f1": 0.8396542612618684,
994
+ "eval_overall_precision": 0.8451246242708534,
995
+ "eval_overall_recall": 0.834254260462625,
996
+ "eval_runtime": 986.5064,
997
+ "eval_samples_per_second": 66.599,
998
+ "eval_steps_per_second": 0.521,
999
+ "step": 45000
1000
  }
1001
  ],
1002
  "logging_steps": 1000,
 
1004
  "num_input_tokens_seen": 0,
1005
  "num_train_epochs": 7,
1006
  "save_steps": 500,
1007
+ "total_flos": 1.2387920637624576e+16,
1008
  "train_batch_size": 8,
1009
  "trial_name": null,
1010
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0f7a60270f3ee223660c001ddc4e29051fd77c829918010af9f92d6bd976d2e
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044ffe2888cc55ab35cc5b7f4e9520b0a9a678ac5d8c8ae4f6fa63b6dfcbe6f5
3
  size 4856