Rodrigo1771 commited on
Commit
ae36b45
1 Parent(s): f4151db

End of training

Browse files
README.md CHANGED
@@ -2,9 +2,10 @@
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - symptemist-ner
8
  metrics:
9
  - precision
10
  - recall
@@ -17,24 +18,24 @@ model-index:
17
  name: Token Classification
18
  type: token-classification
19
  dataset:
20
- name: symptemist-ner
21
- type: symptemist-ner
22
  config: SympTEMIST NER
23
  split: validation
24
  args: SympTEMIST NER
25
  metrics:
26
  - name: Precision
27
  type: precision
28
- value: 0.6594676042189854
29
  - name: Recall
30
  type: recall
31
  value: 0.7186644772851669
32
  - name: F1
33
  type: f1
34
- value: 0.6877946568884233
35
  - name: Accuracy
36
  type: accuracy
37
- value: 0.9487631941993647
38
  ---
39
 
40
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -42,13 +43,13 @@ should probably proofread and complete it, then remove this comment. -->
42
 
43
  # output
44
 
45
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-ner dataset.
46
  It achieves the following results on the evaluation set:
47
- - Loss: 0.2767
48
- - Precision: 0.6595
49
  - Recall: 0.7187
50
- - F1: 0.6878
51
- - Accuracy: 0.9488
52
 
53
  ## Model description
54
 
 
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
5
+ - token-classification
6
  - generated_from_trainer
7
  datasets:
8
+ - Rodrigo1771/symptemist-ner
9
  metrics:
10
  - precision
11
  - recall
 
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
+ name: Rodrigo1771/symptemist-ner
22
+ type: Rodrigo1771/symptemist-ner
23
  config: SympTEMIST NER
24
  split: validation
25
  args: SympTEMIST NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
+ value: 0.6675139806812405
30
  - name: Recall
31
  type: recall
32
  value: 0.7186644772851669
33
  - name: F1
34
  type: f1
35
+ value: 0.6921454928835002
36
  - name: Accuracy
37
  type: accuracy
38
+ value: 0.9483461131252205
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
43
 
44
  # output
45
 
46
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-ner dataset.
47
  It achieves the following results on the evaluation set:
48
+ - Loss: 0.2747
49
+ - Precision: 0.6675
50
  - Recall: 0.7187
51
+ - F1: 0.6921
52
+ - Accuracy: 0.9483
53
 
54
  ## Model description
55
 
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.9988184887042326,
4
- "eval_f1": 0.936867469879518,
5
- "eval_loss": 0.006724909413605928,
6
- "eval_precision": 0.9328214971209213,
7
- "eval_recall": 0.9409486931268151,
8
- "eval_runtime": 14.1827,
9
- "eval_samples": 6798,
10
- "eval_samples_per_second": 479.317,
11
- "eval_steps_per_second": 59.932,
12
- "predict_accuracy": 0.9981367644802958,
13
- "predict_f1": 0.8965517241379309,
14
- "predict_loss": 0.010722821578383446,
15
- "predict_precision": 0.8768736616702355,
16
- "predict_recall": 0.9171332586786114,
17
- "predict_runtime": 27.7085,
18
- "predict_samples_per_second": 527.095,
19
- "predict_steps_per_second": 65.9,
20
- "total_flos": 1.4262694978690116e+16,
21
- "train_loss": 0.0022696754537961062,
22
- "train_runtime": 1261.5031,
23
- "train_samples": 27198,
24
- "train_samples_per_second": 215.6,
25
- "train_steps_per_second": 3.369
26
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.9483461131252205,
4
+ "eval_f1": 0.6921454928835002,
5
+ "eval_loss": 0.27473828196525574,
6
+ "eval_precision": 0.6675139806812405,
7
+ "eval_recall": 0.7186644772851669,
8
+ "eval_runtime": 5.5231,
9
+ "eval_samples": 2519,
10
+ "eval_samples_per_second": 456.084,
11
+ "eval_steps_per_second": 57.033,
12
+ "predict_accuracy": 0.9465558078053287,
13
+ "predict_f1": 0.6901960784313725,
14
+ "predict_loss": 0.29509031772613525,
15
+ "predict_precision": 0.6731946144430845,
16
+ "predict_recall": 0.7080785323463148,
17
+ "predict_runtime": 8.8975,
18
+ "predict_samples_per_second": 454.847,
19
+ "predict_steps_per_second": 56.87,
20
+ "total_flos": 4433220248012460.0,
21
+ "train_loss": 0.05337127685546875,
22
+ "train_runtime": 453.0745,
23
+ "train_samples": 9597,
24
+ "train_samples_per_second": 211.819,
25
+ "train_steps_per_second": 3.311
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.9988184887042326,
4
- "eval_f1": 0.936867469879518,
5
- "eval_loss": 0.006724909413605928,
6
- "eval_precision": 0.9328214971209213,
7
- "eval_recall": 0.9409486931268151,
8
- "eval_runtime": 14.1827,
9
- "eval_samples": 6798,
10
- "eval_samples_per_second": 479.317,
11
- "eval_steps_per_second": 59.932
12
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.9483461131252205,
4
+ "eval_f1": 0.6921454928835002,
5
+ "eval_loss": 0.27473828196525574,
6
+ "eval_precision": 0.6675139806812405,
7
+ "eval_recall": 0.7186644772851669,
8
+ "eval_runtime": 5.5231,
9
+ "eval_samples": 2519,
10
+ "eval_samples_per_second": 456.084,
11
+ "eval_steps_per_second": 57.033
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9981367644802958,
3
- "predict_f1": 0.8965517241379309,
4
- "predict_loss": 0.010722821578383446,
5
- "predict_precision": 0.8768736616702355,
6
- "predict_recall": 0.9171332586786114,
7
- "predict_runtime": 27.7085,
8
- "predict_samples_per_second": 527.095,
9
- "predict_steps_per_second": 65.9
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9465558078053287,
3
+ "predict_f1": 0.6901960784313725,
4
+ "predict_loss": 0.29509031772613525,
5
+ "predict_precision": 0.6731946144430845,
6
+ "predict_recall": 0.7080785323463148,
7
+ "predict_runtime": 8.8975,
8
+ "predict_samples_per_second": 454.847,
9
+ "predict_steps_per_second": 56.87
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725057364.6b97e535edda.51600.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:895e3f1ac3a8be4076f491ddd7b0d149722e94c01471a91b13d80e70d799cb09
3
+ size 560
train.log CHANGED
@@ -857,3 +857,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
857
  {'eval_loss': 0.27674129605293274, 'eval_precision': 0.6594676042189854, 'eval_recall': 0.7186644772851669, 'eval_f1': 0.6877946568884233, 'eval_accuracy': 0.9487631941993647, 'eval_runtime': 6.0833, 'eval_samples_per_second': 414.082, 'eval_steps_per_second': 51.781, 'epoch': 10.0}
858
  {'train_runtime': 453.0745, 'train_samples_per_second': 211.819, 'train_steps_per_second': 3.311, 'train_loss': 0.05337127685546875, 'epoch': 10.0}
859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
  0%| | 0/315 [00:00<?, ?it/s]
861
  3%|▎ | 8/315 [00:00<00:03, 78.55it/s]
862
  5%|▌ | 16/315 [00:00<00:03, 76.48it/s]
863
  8%|▊ | 24/315 [00:00<00:03, 77.21it/s]
864
  10%|█ | 32/315 [00:00<00:03, 74.08it/s]
865
  13%|█▎ | 41/315 [00:00<00:03, 76.11it/s]
866
  16%|█▌ | 49/315 [00:00<00:03, 75.89it/s]
867
  18%|█▊ | 58/315 [00:00<00:03, 78.15it/s]
868
  21%|██ | 66/315 [00:00<00:03, 76.95it/s]
869
  24%|██▍ | 75/315 [00:00<00:03, 77.89it/s]
870
  26%|██▋ | 83/315 [00:01<00:03, 77.02it/s]
871
  29%|██▉ | 91/315 [00:01<00:02, 76.65it/s]
872
  31%|███▏ | 99/315 [00:01<00:02, 74.96it/s]
873
  34%|███▍ | 108/315 [00:01<00:02, 76.95it/s]
874
  37%|███▋ | 117/315 [00:01<00:02, 78.34it/s]
875
  40%|███▉ | 125/315 [00:01<00:02, 76.97it/s]
876
  42%|████▏ | 133/315 [00:01<00:02, 77.15it/s]
877
  45%|████▍ | 141/315 [00:01<00:02, 77.50it/s]
878
  47%|████▋ | 149/315 [00:01<00:02, 72.28it/s]
879
  50%|█████ | 158/315 [00:02<00:02, 74.89it/s]
880
  53%|█████▎ | 166/315 [00:02<00:01, 75.30it/s]
881
  55%|█████▌ | 174/315 [00:02<00:01, 76.29it/s]
882
  58%|█████▊ | 182/315 [00:02<00:01, 76.29it/s]
883
  60%|██████ | 190/315 [00:02<00:01, 77.30it/s]
884
  63%|██████▎ | 198/315 [00:02<00:01, 75.64it/s]
885
  65%|██████▌ | 206/315 [00:02<00:01, 75.73it/s]
886
  68%|██████▊ | 215/315 [00:02<00:01, 77.49it/s]
887
  71%|███████ | 224/315 [00:02<00:01, 79.65it/s]
888
  74%|███████▍ | 233/315 [00:03<00:01, 80.99it/s]
889
  77%|███████▋ | 242/315 [00:03<00:00, 79.71it/s]
890
  80%|███████▉ | 251/315 [00:03<00:00, 80.17it/s]
891
  83%|████████▎ | 260/315 [00:03<00:00, 80.00it/s]
892
  85%|████████▌ | 269/315 [00:03<00:00, 79.61it/s]
893
  88%|████████▊ | 278/315 [00:03<00:00, 80.43it/s]
894
  91%|█████████ | 287/315 [00:03<00:00, 79.16it/s]
895
  94%|█████████▎| 295/315 [00:03<00:00, 78.94it/s]
896
  96%|█████████▌| 303/315 [00:03<00:00, 76.90it/s]
897
  99%|█████████▊| 311/315 [00:04<00:00, 77.62it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
  0%| | 0/506 [00:00<?, ?it/s]
899
  2%|▏ | 9/506 [00:00<00:06, 81.25it/s]
900
  4%|▎ | 18/506 [00:00<00:06, 80.18it/s]
901
  5%|▌ | 27/506 [00:00<00:05, 80.05it/s]
902
  7%|▋ | 36/506 [00:00<00:05, 80.13it/s]
903
  9%|▉ | 45/506 [00:00<00:05, 80.98it/s]
904
  11%|█ | 54/506 [00:00<00:05, 81.38it/s]
905
  12%|█▏ | 63/506 [00:00<00:05, 80.61it/s]
906
  14%|█▍ | 72/506 [00:00<00:05, 80.43it/s]
907
  16%|█▌ | 81/506 [00:01<00:05, 73.83it/s]
908
  18%|█▊ | 89/506 [00:01<00:05, 72.82it/s]
909
  19%|█▉ | 98/506 [00:01<00:05, 75.82it/s]
910
  21%|██ | 107/506 [00:01<00:05, 76.84it/s]
911
  23%|██▎ | 116/506 [00:01<00:05, 77.86it/s]
912
  25%|██▍ | 124/506 [00:01<00:05, 75.68it/s]
913
  26%|██▌ | 132/506 [00:01<00:05, 70.81it/s]
914
  28%|██▊ | 140/506 [00:01<00:05, 73.02it/s]
915
  29%|██▉ | 149/506 [00:01<00:04, 75.42it/s]
916
  31%|███ | 157/506 [00:02<00:04, 73.61it/s]
917
  33%|███▎ | 165/506 [00:02<00:04, 74.13it/s]
918
  34%|███▍ | 174/506 [00:02<00:04, 76.00it/s]
919
  36%|███▌ | 183/506 [00:02<00:04, 77.93it/s]
920
  38%|███▊ | 192/506 [00:02<00:03, 78.84it/s]
921
  40%|███▉ | 200/506 [00:02<00:03, 78.57it/s]
922
  41%|████ | 208/506 [00:02<00:03, 78.07it/s]
923
  43%|████▎ | 217/506 [00:02<00:03, 78.97it/s]
924
  44%|████▍ | 225/506 [00:02<00:03, 77.10it/s]
925
  46%|████▌ | 233/506 [00:03<00:03, 77.53it/s]
926
  48%|████▊ | 241/506 [00:03<00:03, 77.34it/s]
927
  49%|████▉ | 250/506 [00:03<00:03, 78.93it/s]
928
  51%|█████ | 259/506 [00:03<00:03, 79.67it/s]
929
  53%|█████▎ | 268/506 [00:03<00:02, 80.55it/s]
930
  55%|█████▍ | 277/506 [00:03<00:02, 80.74it/s]
931
  57%|█████▋ | 286/506 [00:03<00:02, 79.79it/s]
932
  58%|█████▊ | 295/506 [00:03<00:02, 80.40it/s]
933
  60%|██████ | 304/506 [00:03<00:02, 81.38it/s]
934
  62%|██████▏ | 313/506 [00:04<00:02, 81.61it/s]
935
  64%|██████▎ | 322/506 [00:04<00:02, 82.25it/s]
936
  65%|██████▌ | 331/506 [00:04<00:02, 83.19it/s]
937
  67%|██████▋ | 340/506 [00:04<00:01, 83.24it/s]
938
  69%|██████▉ | 349/506 [00:04<00:01, 83.80it/s]
939
  71%|███████ | 358/506 [00:04<00:01, 83.68it/s]
940
  73%|███████▎ | 367/506 [00:04<00:01, 81.45it/s]
941
  74%|███████▍ | 376/506 [00:04<00:01, 79.16it/s]
942
  76%|███████▌ | 384/506 [00:04<00:01, 77.68it/s]
943
  77%|███████▋ | 392/506 [00:05<00:01, 74.28it/s]
944
  79%|███████▉ | 400/506 [00:05<00:01, 73.39it/s]
945
  81%|████████ | 408/506 [00:05<00:01, 75.09it/s]
946
  82%|████████▏ | 416/506 [00:05<00:01, 75.31it/s]
947
  84%|████████▍ | 424/506 [00:05<00:01, 76.23it/s]
948
  86%|████████▌ | 433/506 [00:05<00:00, 78.69it/s]
949
  87%|████████▋ | 441/506 [00:05<00:00, 77.63it/s]
950
  89%|████████▊ | 449/506 [00:05<00:00, 78.02it/s]
951
  91%|█████████ | 458/506 [00:05<00:00, 79.39it/s]
952
  92%|█████████▏| 467/506 [00:05<00:00, 79.79it/s]
953
  94%|█████████▍| 475/506 [00:06<00:00, 77.02it/s]
954
  96%|█████████▌| 484/506 [00:06<00:00, 76.28it/s]
955
  97%|█████████▋| 493/506 [00:06<00:00, 77.37it/s]
956
  99%|█████████▉| 501/506 [00:06<00:00, 77.62it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
857
  {'eval_loss': 0.27674129605293274, 'eval_precision': 0.6594676042189854, 'eval_recall': 0.7186644772851669, 'eval_f1': 0.6877946568884233, 'eval_accuracy': 0.9487631941993647, 'eval_runtime': 6.0833, 'eval_samples_per_second': 414.082, 'eval_steps_per_second': 51.781, 'epoch': 10.0}
858
  {'train_runtime': 453.0745, 'train_samples_per_second': 211.819, 'train_steps_per_second': 3.311, 'train_loss': 0.05337127685546875, 'epoch': 10.0}
859
 
860
+ ***** train metrics *****
861
+ epoch = 10.0
862
+ total_flos = 4128758GF
863
+ train_loss = 0.0534
864
+ train_runtime = 0:07:33.07
865
+ train_samples = 9597
866
+ train_samples_per_second = 211.819
867
+ train_steps_per_second = 3.311
868
+ 08/30/2024 22:35:59 - INFO - __main__ - *** Evaluate ***
869
+ [INFO|trainer.py:805] 2024-08-30 22:35:59,010 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
870
+ [INFO|trainer.py:3788] 2024-08-30 22:35:59,013 >>
871
+ ***** Running Evaluation *****
872
+ [INFO|trainer.py:3790] 2024-08-30 22:35:59,013 >> Num examples = 2519
873
+ [INFO|trainer.py:3793] 2024-08-30 22:35:59,013 >> Batch size = 8
874
+
875
  0%| | 0/315 [00:00<?, ?it/s]
876
  3%|▎ | 8/315 [00:00<00:03, 78.55it/s]
877
  5%|▌ | 16/315 [00:00<00:03, 76.48it/s]
878
  8%|▊ | 24/315 [00:00<00:03, 77.21it/s]
879
  10%|█ | 32/315 [00:00<00:03, 74.08it/s]
880
  13%|█▎ | 41/315 [00:00<00:03, 76.11it/s]
881
  16%|█▌ | 49/315 [00:00<00:03, 75.89it/s]
882
  18%|█▊ | 58/315 [00:00<00:03, 78.15it/s]
883
  21%|██ | 66/315 [00:00<00:03, 76.95it/s]
884
  24%|██▍ | 75/315 [00:00<00:03, 77.89it/s]
885
  26%|██▋ | 83/315 [00:01<00:03, 77.02it/s]
886
  29%|██▉ | 91/315 [00:01<00:02, 76.65it/s]
887
  31%|███▏ | 99/315 [00:01<00:02, 74.96it/s]
888
  34%|███▍ | 108/315 [00:01<00:02, 76.95it/s]
889
  37%|███▋ | 117/315 [00:01<00:02, 78.34it/s]
890
  40%|███▉ | 125/315 [00:01<00:02, 76.97it/s]
891
  42%|████▏ | 133/315 [00:01<00:02, 77.15it/s]
892
  45%|████▍ | 141/315 [00:01<00:02, 77.50it/s]
893
  47%|████▋ | 149/315 [00:01<00:02, 72.28it/s]
894
  50%|█████ | 158/315 [00:02<00:02, 74.89it/s]
895
  53%|█████▎ | 166/315 [00:02<00:01, 75.30it/s]
896
  55%|█████▌ | 174/315 [00:02<00:01, 76.29it/s]
897
  58%|█████▊ | 182/315 [00:02<00:01, 76.29it/s]
898
  60%|██████ | 190/315 [00:02<00:01, 77.30it/s]
899
  63%|██████▎ | 198/315 [00:02<00:01, 75.64it/s]
900
  65%|██████▌ | 206/315 [00:02<00:01, 75.73it/s]
901
  68%|██████▊ | 215/315 [00:02<00:01, 77.49it/s]
902
  71%|███████ | 224/315 [00:02<00:01, 79.65it/s]
903
  74%|███████▍ | 233/315 [00:03<00:01, 80.99it/s]
904
  77%|███████▋ | 242/315 [00:03<00:00, 79.71it/s]
905
  80%|███████▉ | 251/315 [00:03<00:00, 80.17it/s]
906
  83%|████████▎ | 260/315 [00:03<00:00, 80.00it/s]
907
  85%|████████▌ | 269/315 [00:03<00:00, 79.61it/s]
908
  88%|████████▊ | 278/315 [00:03<00:00, 80.43it/s]
909
  91%|█████████ | 287/315 [00:03<00:00, 79.16it/s]
910
  94%|█████████▎| 295/315 [00:03<00:00, 78.94it/s]
911
  96%|█████████▌| 303/315 [00:03<00:00, 76.90it/s]
912
  99%|█████████▊| 311/315 [00:04<00:00, 77.62it/s]
913
+ ***** eval metrics *****
914
+ epoch = 10.0
915
+ eval_accuracy = 0.9483
916
+ eval_f1 = 0.6921
917
+ eval_loss = 0.2747
918
+ eval_precision = 0.6675
919
+ eval_recall = 0.7187
920
+ eval_runtime = 0:00:05.52
921
+ eval_samples = 2519
922
+ eval_samples_per_second = 456.084
923
+ eval_steps_per_second = 57.033
924
+ 08/30/2024 22:36:04 - INFO - __main__ - *** Predict ***
925
+ [INFO|trainer.py:805] 2024-08-30 22:36:04,538 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
926
+ [INFO|trainer.py:3788] 2024-08-30 22:36:04,541 >>
927
+ ***** Running Prediction *****
928
+ [INFO|trainer.py:3790] 2024-08-30 22:36:04,541 >> Num examples = 4047
929
+ [INFO|trainer.py:3793] 2024-08-30 22:36:04,541 >> Batch size = 8
930
+
931
  0%| | 0/506 [00:00<?, ?it/s]
932
  2%|▏ | 9/506 [00:00<00:06, 81.25it/s]
933
  4%|▎ | 18/506 [00:00<00:06, 80.18it/s]
934
  5%|▌ | 27/506 [00:00<00:05, 80.05it/s]
935
  7%|▋ | 36/506 [00:00<00:05, 80.13it/s]
936
  9%|▉ | 45/506 [00:00<00:05, 80.98it/s]
937
  11%|█ | 54/506 [00:00<00:05, 81.38it/s]
938
  12%|█▏ | 63/506 [00:00<00:05, 80.61it/s]
939
  14%|█▍ | 72/506 [00:00<00:05, 80.43it/s]
940
  16%|█▌ | 81/506 [00:01<00:05, 73.83it/s]
941
  18%|█▊ | 89/506 [00:01<00:05, 72.82it/s]
942
  19%|█▉ | 98/506 [00:01<00:05, 75.82it/s]
943
  21%|██ | 107/506 [00:01<00:05, 76.84it/s]
944
  23%|██▎ | 116/506 [00:01<00:05, 77.86it/s]
945
  25%|██▍ | 124/506 [00:01<00:05, 75.68it/s]
946
  26%|██▌ | 132/506 [00:01<00:05, 70.81it/s]
947
  28%|██▊ | 140/506 [00:01<00:05, 73.02it/s]
948
  29%|██▉ | 149/506 [00:01<00:04, 75.42it/s]
949
  31%|███ | 157/506 [00:02<00:04, 73.61it/s]
950
  33%|███▎ | 165/506 [00:02<00:04, 74.13it/s]
951
  34%|███▍ | 174/506 [00:02<00:04, 76.00it/s]
952
  36%|███▌ | 183/506 [00:02<00:04, 77.93it/s]
953
  38%|███▊ | 192/506 [00:02<00:03, 78.84it/s]
954
  40%|███▉ | 200/506 [00:02<00:03, 78.57it/s]
955
  41%|████ | 208/506 [00:02<00:03, 78.07it/s]
956
  43%|████▎ | 217/506 [00:02<00:03, 78.97it/s]
957
  44%|████▍ | 225/506 [00:02<00:03, 77.10it/s]
958
  46%|████▌ | 233/506 [00:03<00:03, 77.53it/s]
959
  48%|████▊ | 241/506 [00:03<00:03, 77.34it/s]
960
  49%|████▉ | 250/506 [00:03<00:03, 78.93it/s]
961
  51%|█████ | 259/506 [00:03<00:03, 79.67it/s]
962
  53%|█████▎ | 268/506 [00:03<00:02, 80.55it/s]
963
  55%|█████▍ | 277/506 [00:03<00:02, 80.74it/s]
964
  57%|█████▋ | 286/506 [00:03<00:02, 79.79it/s]
965
  58%|█████▊ | 295/506 [00:03<00:02, 80.40it/s]
966
  60%|██████ | 304/506 [00:03<00:02, 81.38it/s]
967
  62%|██████▏ | 313/506 [00:04<00:02, 81.61it/s]
968
  64%|██████▎ | 322/506 [00:04<00:02, 82.25it/s]
969
  65%|██████▌ | 331/506 [00:04<00:02, 83.19it/s]
970
  67%|██████▋ | 340/506 [00:04<00:01, 83.24it/s]
971
  69%|██████▉ | 349/506 [00:04<00:01, 83.80it/s]
972
  71%|███████ | 358/506 [00:04<00:01, 83.68it/s]
973
  73%|███████▎ | 367/506 [00:04<00:01, 81.45it/s]
974
  74%|███████▍ | 376/506 [00:04<00:01, 79.16it/s]
975
  76%|███████▌ | 384/506 [00:04<00:01, 77.68it/s]
976
  77%|███████▋ | 392/506 [00:05<00:01, 74.28it/s]
977
  79%|███████▉ | 400/506 [00:05<00:01, 73.39it/s]
978
  81%|████████ | 408/506 [00:05<00:01, 75.09it/s]
979
  82%|████████▏ | 416/506 [00:05<00:01, 75.31it/s]
980
  84%|████████▍ | 424/506 [00:05<00:01, 76.23it/s]
981
  86%|████████▌ | 433/506 [00:05<00:00, 78.69it/s]
982
  87%|████████▋ | 441/506 [00:05<00:00, 77.63it/s]
983
  89%|████████▊ | 449/506 [00:05<00:00, 78.02it/s]
984
  91%|█████████ | 458/506 [00:05<00:00, 79.39it/s]
985
  92%|█████████▏| 467/506 [00:05<00:00, 79.79it/s]
986
  94%|█████████▍| 475/506 [00:06<00:00, 77.02it/s]
987
  96%|█████████▌| 484/506 [00:06<00:00, 76.28it/s]
988
  97%|█████████▋| 493/506 [00:06<00:00, 77.37it/s]
989
  99%|█████████▉| 501/506 [00:06<00:00, 77.62it/s]
990
+ [INFO|trainer.py:3478] 2024-08-30 22:36:13,602 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
991
+ [INFO|configuration_utils.py:472] 2024-08-30 22:36:13,603 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
992
+ [INFO|modeling_utils.py:2690] 2024-08-30 22:36:14,973 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
993
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 22:36:14,974 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
994
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 22:36:14,975 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
995
+ ***** predict metrics *****
996
+ predict_accuracy = 0.9466
997
+ predict_f1 = 0.6902
998
+ predict_loss = 0.2951
999
+ predict_precision = 0.6732
1000
+ predict_recall = 0.7081
1001
+ predict_runtime = 0:00:08.89
1002
+ predict_samples_per_second = 454.847
1003
+ predict_steps_per_second = 56.87
1004
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
- "total_flos": 1.4262694978690116e+16,
4
- "train_loss": 0.0022696754537961062,
5
- "train_runtime": 1261.5031,
6
- "train_samples": 27198,
7
- "train_samples_per_second": 215.6,
8
- "train_steps_per_second": 3.369
9
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "total_flos": 4433220248012460.0,
4
+ "train_loss": 0.05337127685546875,
5
+ "train_runtime": 453.0745,
6
+ "train_samples": 9597,
7
+ "train_samples_per_second": 211.819,
8
+ "train_steps_per_second": 3.311
9
  }
trainer_state.json CHANGED
@@ -1,201 +1,166 @@
1
  {
2
- "best_metric": 0.936867469879518,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4250",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 4250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.9981303557517528,
14
- "eval_f1": 0.8939962476547841,
15
- "eval_loss": 0.00556989898905158,
16
- "eval_precision": 0.8671519563239308,
17
- "eval_recall": 0.9225556631171346,
18
- "eval_runtime": 14.25,
19
- "eval_samples_per_second": 477.051,
20
- "eval_steps_per_second": 59.649,
21
- "step": 425
22
- },
23
- {
24
- "epoch": 1.1764705882352942,
25
- "grad_norm": 0.1311497986316681,
26
- "learning_rate": 4.411764705882353e-05,
27
- "loss": 0.0104,
28
- "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
- "eval_accuracy": 0.9985782913528953,
33
- "eval_f1": 0.9216722729456991,
34
- "eval_loss": 0.0041933078318834305,
35
- "eval_precision": 0.9150763358778626,
36
- "eval_recall": 0.9283639883833494,
37
- "eval_runtime": 14.1751,
38
- "eval_samples_per_second": 479.575,
39
- "eval_steps_per_second": 59.965,
40
- "step": 850
41
- },
42
- {
43
- "epoch": 2.3529411764705883,
44
- "grad_norm": 0.002636878052726388,
45
- "learning_rate": 3.8235294117647055e-05,
46
- "loss": 0.0034,
47
- "step": 1000
48
  },
49
  {
50
  "epoch": 3.0,
51
- "eval_accuracy": 0.9985133731498312,
52
- "eval_f1": 0.9155339805825242,
53
- "eval_loss": 0.004266700241714716,
54
- "eval_precision": 0.9182083739045764,
55
- "eval_recall": 0.9128751210067764,
56
- "eval_runtime": 14.285,
57
- "eval_samples_per_second": 475.883,
58
- "eval_steps_per_second": 59.503,
59
- "step": 1275
60
- },
61
- {
62
- "epoch": 3.5294117647058822,
63
- "grad_norm": 0.08669757843017578,
64
- "learning_rate": 3.235294117647059e-05,
65
- "loss": 0.0022,
66
- "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
- "eval_accuracy": 0.9985847831732018,
71
- "eval_f1": 0.9250367466927977,
72
- "eval_loss": 0.0043651387095451355,
73
- "eval_precision": 0.9365079365079365,
74
- "eval_recall": 0.9138431752178122,
75
- "eval_runtime": 14.5173,
76
- "eval_samples_per_second": 468.27,
77
- "eval_steps_per_second": 58.551,
78
- "step": 1700
79
- },
80
- {
81
- "epoch": 4.705882352941177,
82
- "grad_norm": 0.27693310379981995,
83
- "learning_rate": 2.647058823529412e-05,
84
- "loss": 0.0012,
85
- "step": 2000
86
  },
87
  {
88
  "epoch": 5.0,
89
- "eval_accuracy": 0.9985393404310569,
90
- "eval_f1": 0.919463087248322,
91
- "eval_loss": 0.006118799094110727,
92
- "eval_precision": 0.9107312440645774,
93
- "eval_recall": 0.9283639883833494,
94
- "eval_runtime": 14.2824,
95
- "eval_samples_per_second": 475.97,
96
- "eval_steps_per_second": 59.514,
97
- "step": 2125
98
- },
99
- {
100
- "epoch": 5.882352941176471,
101
- "grad_norm": 0.008007431402802467,
102
- "learning_rate": 2.058823529411765e-05,
103
- "loss": 0.0009,
104
- "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
- "eval_accuracy": 0.9986626850168787,
109
- "eval_f1": 0.9221213569039655,
110
- "eval_loss": 0.005954863503575325,
111
- "eval_precision": 0.910377358490566,
112
- "eval_recall": 0.9341723136495643,
113
- "eval_runtime": 14.2494,
114
- "eval_samples_per_second": 477.072,
115
- "eval_steps_per_second": 59.652,
116
- "step": 2550
 
 
 
 
 
 
 
117
  },
118
  {
119
  "epoch": 7.0,
120
- "eval_accuracy": 0.9986691768371851,
121
- "eval_f1": 0.9314148681055155,
122
- "eval_loss": 0.006543714087456465,
123
- "eval_precision": 0.9230038022813688,
124
- "eval_recall": 0.9399806389157793,
125
- "eval_runtime": 14.386,
126
- "eval_samples_per_second": 472.542,
127
- "eval_steps_per_second": 59.085,
128
- "step": 2975
129
- },
130
- {
131
- "epoch": 7.0588235294117645,
132
- "grad_norm": 0.0017305670771747828,
133
- "learning_rate": 1.4705882352941177e-05,
134
- "loss": 0.0005,
135
- "step": 3000
136
  },
137
  {
138
  "epoch": 8.0,
139
- "eval_accuracy": 0.9986886522981044,
140
- "eval_f1": 0.9280540801545147,
141
- "eval_loss": 0.005883762612938881,
142
- "eval_precision": 0.9258188824662813,
143
- "eval_recall": 0.9303000968054211,
144
- "eval_runtime": 14.3169,
145
- "eval_samples_per_second": 474.822,
146
- "eval_steps_per_second": 59.37,
147
- "step": 3400
148
- },
149
- {
150
- "epoch": 8.235294117647058,
151
- "grad_norm": 0.00020609228522516787,
152
- "learning_rate": 8.823529411764707e-06,
153
- "loss": 0.0004,
154
- "step": 3500
155
  },
156
  {
157
  "epoch": 9.0,
158
- "eval_accuracy": 0.9987276032199429,
159
- "eval_f1": 0.9317307692307693,
160
- "eval_loss": 0.00656876852735877,
161
- "eval_precision": 0.9255014326647565,
162
- "eval_recall": 0.9380445304937076,
163
- "eval_runtime": 14.5715,
164
- "eval_samples_per_second": 466.526,
165
- "eval_steps_per_second": 58.333,
166
- "step": 3825
167
  },
168
  {
169
- "epoch": 9.411764705882353,
170
- "grad_norm": 0.00026785818045027554,
171
- "learning_rate": 2.9411764705882355e-06,
172
- "loss": 0.0001,
173
- "step": 4000
174
  },
175
  {
176
  "epoch": 10.0,
177
- "eval_accuracy": 0.9988184887042326,
178
- "eval_f1": 0.936867469879518,
179
- "eval_loss": 0.006724909413605928,
180
- "eval_precision": 0.9328214971209213,
181
- "eval_recall": 0.9409486931268151,
182
- "eval_runtime": 14.3451,
183
- "eval_samples_per_second": 473.891,
184
- "eval_steps_per_second": 59.254,
185
- "step": 4250
186
  },
187
  {
188
  "epoch": 10.0,
189
- "step": 4250,
190
- "total_flos": 1.4262694978690116e+16,
191
- "train_loss": 0.0022696754537961062,
192
- "train_runtime": 1261.5031,
193
- "train_samples_per_second": 215.6,
194
- "train_steps_per_second": 3.369
195
  }
196
  ],
197
  "logging_steps": 500,
198
- "max_steps": 4250,
199
  "num_input_tokens_seen": 0,
200
  "num_train_epochs": 10,
201
  "save_steps": 500,
@@ -211,7 +176,7 @@
211
  "attributes": {}
212
  }
213
  },
214
- "total_flos": 1.4262694978690116e+16,
215
  "train_batch_size": 32,
216
  "trial_name": null,
217
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6921454928835002,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1350",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.9456350861432834,
14
+ "eval_f1": 0.567482432759874,
15
+ "eval_loss": 0.1503801792860031,
16
+ "eval_precision": 0.5091304347826087,
17
+ "eval_recall": 0.6409414340448824,
18
+ "eval_runtime": 5.4744,
19
+ "eval_samples_per_second": 460.141,
20
+ "eval_steps_per_second": 57.541,
21
+ "step": 150
 
 
 
 
 
 
 
22
  },
23
  {
24
  "epoch": 2.0,
25
+ "eval_accuracy": 0.9462125830151753,
26
+ "eval_f1": 0.639,
27
+ "eval_loss": 0.15472079813480377,
28
+ "eval_precision": 0.5881270133456051,
29
+ "eval_recall": 0.6995073891625616,
30
+ "eval_runtime": 5.4777,
31
+ "eval_samples_per_second": 459.865,
32
+ "eval_steps_per_second": 57.506,
33
+ "step": 300
 
 
 
 
 
 
 
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.9475600757162566,
38
+ "eval_f1": 0.6589207332816938,
39
+ "eval_loss": 0.16183686256408691,
40
+ "eval_precision": 0.6236559139784946,
41
+ "eval_recall": 0.6984126984126984,
42
+ "eval_runtime": 5.4114,
43
+ "eval_samples_per_second": 465.5,
44
+ "eval_steps_per_second": 58.211,
45
+ "step": 450
46
+ },
47
+ {
48
+ "epoch": 3.3333333333333335,
49
+ "grad_norm": 0.5628494620323181,
50
+ "learning_rate": 3.3333333333333335e-05,
51
+ "loss": 0.126,
52
+ "step": 500
53
  },
54
  {
55
  "epoch": 4.0,
56
+ "eval_accuracy": 0.9450575892713915,
57
+ "eval_f1": 0.6627936347562516,
58
+ "eval_loss": 0.1920311450958252,
59
+ "eval_precision": 0.6153846153846154,
60
+ "eval_recall": 0.7181171319102354,
61
+ "eval_runtime": 5.502,
62
+ "eval_samples_per_second": 457.833,
63
+ "eval_steps_per_second": 57.252,
64
+ "step": 600
 
 
 
 
 
 
 
65
  },
66
  {
67
  "epoch": 5.0,
68
+ "eval_accuracy": 0.9488273605184638,
69
+ "eval_f1": 0.678646934460888,
70
+ "eval_loss": 0.21016015112400055,
71
+ "eval_precision": 0.6561062851303014,
72
+ "eval_recall": 0.7027914614121511,
73
+ "eval_runtime": 5.3774,
74
+ "eval_samples_per_second": 468.442,
75
+ "eval_steps_per_second": 58.579,
76
+ "step": 750
 
 
 
 
 
 
 
77
  },
78
  {
79
  "epoch": 6.0,
80
+ "eval_accuracy": 0.9467259135679682,
81
+ "eval_f1": 0.6750065155069064,
82
+ "eval_loss": 0.24135558307170868,
83
+ "eval_precision": 0.6442786069651741,
84
+ "eval_recall": 0.7088122605363985,
85
+ "eval_runtime": 5.4273,
86
+ "eval_samples_per_second": 464.135,
87
+ "eval_steps_per_second": 58.04,
88
+ "step": 900
89
+ },
90
+ {
91
+ "epoch": 6.666666666666667,
92
+ "grad_norm": 0.5565813779830933,
93
+ "learning_rate": 1.6666666666666667e-05,
94
+ "loss": 0.0251,
95
+ "step": 1000
96
  },
97
  {
98
  "epoch": 7.0,
99
+ "eval_accuracy": 0.9491642336937341,
100
+ "eval_f1": 0.6816380449141347,
101
+ "eval_loss": 0.25002309679985046,
102
+ "eval_precision": 0.658835546475996,
103
+ "eval_recall": 0.7060755336617406,
104
+ "eval_runtime": 5.3969,
105
+ "eval_samples_per_second": 466.75,
106
+ "eval_steps_per_second": 58.367,
107
+ "step": 1050
 
 
 
 
 
 
 
108
  },
109
  {
110
  "epoch": 8.0,
111
+ "eval_accuracy": 0.947383618338734,
112
+ "eval_f1": 0.6846153846153845,
113
+ "eval_loss": 0.26423653960227966,
114
+ "eval_precision": 0.6439942112879884,
115
+ "eval_recall": 0.7307060755336617,
116
+ "eval_runtime": 5.4091,
117
+ "eval_samples_per_second": 465.698,
118
+ "eval_steps_per_second": 58.235,
119
+ "step": 1200
 
 
 
 
 
 
 
120
  },
121
  {
122
  "epoch": 9.0,
123
+ "eval_accuracy": 0.9483461131252205,
124
+ "eval_f1": 0.6921454928835002,
125
+ "eval_loss": 0.27473828196525574,
126
+ "eval_precision": 0.6675139806812405,
127
+ "eval_recall": 0.7186644772851669,
128
+ "eval_runtime": 5.4389,
129
+ "eval_samples_per_second": 463.146,
130
+ "eval_steps_per_second": 57.916,
131
+ "step": 1350
132
  },
133
  {
134
+ "epoch": 10.0,
135
+ "grad_norm": 0.17641158401966095,
136
+ "learning_rate": 0.0,
137
+ "loss": 0.0091,
138
+ "step": 1500
139
  },
140
  {
141
  "epoch": 10.0,
142
+ "eval_accuracy": 0.9487631941993647,
143
+ "eval_f1": 0.6877946568884233,
144
+ "eval_loss": 0.27674129605293274,
145
+ "eval_precision": 0.6594676042189854,
146
+ "eval_recall": 0.7186644772851669,
147
+ "eval_runtime": 6.0833,
148
+ "eval_samples_per_second": 414.082,
149
+ "eval_steps_per_second": 51.781,
150
+ "step": 1500
151
  },
152
  {
153
  "epoch": 10.0,
154
+ "step": 1500,
155
+ "total_flos": 4433220248012460.0,
156
+ "train_loss": 0.05337127685546875,
157
+ "train_runtime": 453.0745,
158
+ "train_samples_per_second": 211.819,
159
+ "train_steps_per_second": 3.311
160
  }
161
  ],
162
  "logging_steps": 500,
163
+ "max_steps": 1500,
164
  "num_input_tokens_seen": 0,
165
  "num_train_epochs": 10,
166
  "save_steps": 500,
 
176
  "attributes": {}
177
  }
178
  },
179
+ "total_flos": 4433220248012460.0,
180
  "train_batch_size": 32,
181
  "trial_name": null,
182
  "trial_params": null