Rodrigo1771
commited on
Commit
•
ae36b45
1
Parent(s):
f4151db
End of training
Browse files- README.md +12 -11
- all_results.json +23 -23
- eval_results.json +9 -9
- predict_results.json +8 -8
- predictions.txt +0 -0
- tb/events.out.tfevents.1725057364.6b97e535edda.51600.1 +3 -0
- train.log +48 -0
- train_results.json +6 -6
- trainer_state.json +120 -155
README.md
CHANGED
@@ -2,9 +2,10 @@
|
|
2 |
license: apache-2.0
|
3 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
4 |
tags:
|
|
|
5 |
- generated_from_trainer
|
6 |
datasets:
|
7 |
-
- symptemist-ner
|
8 |
metrics:
|
9 |
- precision
|
10 |
- recall
|
@@ -17,24 +18,24 @@ model-index:
|
|
17 |
name: Token Classification
|
18 |
type: token-classification
|
19 |
dataset:
|
20 |
-
name: symptemist-ner
|
21 |
-
type: symptemist-ner
|
22 |
config: SympTEMIST NER
|
23 |
split: validation
|
24 |
args: SympTEMIST NER
|
25 |
metrics:
|
26 |
- name: Precision
|
27 |
type: precision
|
28 |
-
value: 0.
|
29 |
- name: Recall
|
30 |
type: recall
|
31 |
value: 0.7186644772851669
|
32 |
- name: F1
|
33 |
type: f1
|
34 |
-
value: 0.
|
35 |
- name: Accuracy
|
36 |
type: accuracy
|
37 |
-
value: 0.
|
38 |
---
|
39 |
|
40 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -42,13 +43,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
42 |
|
43 |
# output
|
44 |
|
45 |
-
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-ner dataset.
|
46 |
It achieves the following results on the evaluation set:
|
47 |
-
- Loss: 0.
|
48 |
-
- Precision: 0.
|
49 |
- Recall: 0.7187
|
50 |
-
- F1: 0.
|
51 |
-
- Accuracy: 0.
|
52 |
|
53 |
## Model description
|
54 |
|
|
|
2 |
license: apache-2.0
|
3 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
4 |
tags:
|
5 |
+
- token-classification
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
+
- Rodrigo1771/symptemist-ner
|
9 |
metrics:
|
10 |
- precision
|
11 |
- recall
|
|
|
18 |
name: Token Classification
|
19 |
type: token-classification
|
20 |
dataset:
|
21 |
+
name: Rodrigo1771/symptemist-ner
|
22 |
+
type: Rodrigo1771/symptemist-ner
|
23 |
config: SympTEMIST NER
|
24 |
split: validation
|
25 |
args: SympTEMIST NER
|
26 |
metrics:
|
27 |
- name: Precision
|
28 |
type: precision
|
29 |
+
value: 0.6675139806812405
|
30 |
- name: Recall
|
31 |
type: recall
|
32 |
value: 0.7186644772851669
|
33 |
- name: F1
|
34 |
type: f1
|
35 |
+
value: 0.6921454928835002
|
36 |
- name: Accuracy
|
37 |
type: accuracy
|
38 |
+
value: 0.9483461131252205
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
43 |
|
44 |
# output
|
45 |
|
46 |
+
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-ner dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
+
- Loss: 0.2747
|
49 |
+
- Precision: 0.6675
|
50 |
- Recall: 0.7187
|
51 |
+
- F1: 0.6921
|
52 |
+
- Accuracy: 0.9483
|
53 |
|
54 |
## Model description
|
55 |
|
all_results.json
CHANGED
@@ -1,26 +1,26 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
-
"eval_samples":
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
-
"predict_accuracy": 0.
|
13 |
-
"predict_f1": 0.
|
14 |
-
"predict_loss": 0.
|
15 |
-
"predict_precision": 0.
|
16 |
-
"predict_recall": 0.
|
17 |
-
"predict_runtime":
|
18 |
-
"predict_samples_per_second":
|
19 |
-
"predict_steps_per_second":
|
20 |
-
"total_flos":
|
21 |
-
"train_loss": 0.
|
22 |
-
"train_runtime":
|
23 |
-
"train_samples":
|
24 |
-
"train_samples_per_second":
|
25 |
-
"train_steps_per_second": 3.
|
26 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9483461131252205,
|
4 |
+
"eval_f1": 0.6921454928835002,
|
5 |
+
"eval_loss": 0.27473828196525574,
|
6 |
+
"eval_precision": 0.6675139806812405,
|
7 |
+
"eval_recall": 0.7186644772851669,
|
8 |
+
"eval_runtime": 5.5231,
|
9 |
+
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 456.084,
|
11 |
+
"eval_steps_per_second": 57.033,
|
12 |
+
"predict_accuracy": 0.9465558078053287,
|
13 |
+
"predict_f1": 0.6901960784313725,
|
14 |
+
"predict_loss": 0.29509031772613525,
|
15 |
+
"predict_precision": 0.6731946144430845,
|
16 |
+
"predict_recall": 0.7080785323463148,
|
17 |
+
"predict_runtime": 8.8975,
|
18 |
+
"predict_samples_per_second": 454.847,
|
19 |
+
"predict_steps_per_second": 56.87,
|
20 |
+
"total_flos": 4433220248012460.0,
|
21 |
+
"train_loss": 0.05337127685546875,
|
22 |
+
"train_runtime": 453.0745,
|
23 |
+
"train_samples": 9597,
|
24 |
+
"train_samples_per_second": 211.819,
|
25 |
+
"train_steps_per_second": 3.311
|
26 |
}
|
eval_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
-
"eval_samples":
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9483461131252205,
|
4 |
+
"eval_f1": 0.6921454928835002,
|
5 |
+
"eval_loss": 0.27473828196525574,
|
6 |
+
"eval_precision": 0.6675139806812405,
|
7 |
+
"eval_recall": 0.7186644772851669,
|
8 |
+
"eval_runtime": 5.5231,
|
9 |
+
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 456.084,
|
11 |
+
"eval_steps_per_second": 57.033
|
12 |
}
|
predict_results.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"predict_accuracy": 0.
|
3 |
-
"predict_f1": 0.
|
4 |
-
"predict_loss": 0.
|
5 |
-
"predict_precision": 0.
|
6 |
-
"predict_recall": 0.
|
7 |
-
"predict_runtime":
|
8 |
-
"predict_samples_per_second":
|
9 |
-
"predict_steps_per_second":
|
10 |
}
|
|
|
1 |
{
|
2 |
+
"predict_accuracy": 0.9465558078053287,
|
3 |
+
"predict_f1": 0.6901960784313725,
|
4 |
+
"predict_loss": 0.29509031772613525,
|
5 |
+
"predict_precision": 0.6731946144430845,
|
6 |
+
"predict_recall": 0.7080785323463148,
|
7 |
+
"predict_runtime": 8.8975,
|
8 |
+
"predict_samples_per_second": 454.847,
|
9 |
+
"predict_steps_per_second": 56.87
|
10 |
}
|
predictions.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725057364.6b97e535edda.51600.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:895e3f1ac3a8be4076f491ddd7b0d149722e94c01471a91b13d80e70d799cb09
|
3 |
+
size 560
|
train.log
CHANGED
@@ -857,3 +857,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
857 |
{'eval_loss': 0.27674129605293274, 'eval_precision': 0.6594676042189854, 'eval_recall': 0.7186644772851669, 'eval_f1': 0.6877946568884233, 'eval_accuracy': 0.9487631941993647, 'eval_runtime': 6.0833, 'eval_samples_per_second': 414.082, 'eval_steps_per_second': 51.781, 'epoch': 10.0}
|
858 |
{'train_runtime': 453.0745, 'train_samples_per_second': 211.819, 'train_steps_per_second': 3.311, 'train_loss': 0.05337127685546875, 'epoch': 10.0}
|
859 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
860 |
0%| | 0/315 [00:00<?, ?it/s]
|
861 |
3%|▎ | 8/315 [00:00<00:03, 78.55it/s]
|
862 |
5%|▌ | 16/315 [00:00<00:03, 76.48it/s]
|
863 |
8%|▊ | 24/315 [00:00<00:03, 77.21it/s]
|
864 |
10%|█ | 32/315 [00:00<00:03, 74.08it/s]
|
865 |
13%|█▎ | 41/315 [00:00<00:03, 76.11it/s]
|
866 |
16%|█▌ | 49/315 [00:00<00:03, 75.89it/s]
|
867 |
18%|█▊ | 58/315 [00:00<00:03, 78.15it/s]
|
868 |
21%|██ | 66/315 [00:00<00:03, 76.95it/s]
|
869 |
24%|██▍ | 75/315 [00:00<00:03, 77.89it/s]
|
870 |
26%|██▋ | 83/315 [00:01<00:03, 77.02it/s]
|
871 |
29%|██▉ | 91/315 [00:01<00:02, 76.65it/s]
|
872 |
31%|███▏ | 99/315 [00:01<00:02, 74.96it/s]
|
873 |
34%|███▍ | 108/315 [00:01<00:02, 76.95it/s]
|
874 |
37%|███▋ | 117/315 [00:01<00:02, 78.34it/s]
|
875 |
40%|███▉ | 125/315 [00:01<00:02, 76.97it/s]
|
876 |
42%|████▏ | 133/315 [00:01<00:02, 77.15it/s]
|
877 |
45%|████▍ | 141/315 [00:01<00:02, 77.50it/s]
|
878 |
47%|████▋ | 149/315 [00:01<00:02, 72.28it/s]
|
879 |
50%|█████ | 158/315 [00:02<00:02, 74.89it/s]
|
880 |
53%|█████▎ | 166/315 [00:02<00:01, 75.30it/s]
|
881 |
55%|█████▌ | 174/315 [00:02<00:01, 76.29it/s]
|
882 |
58%|█████▊ | 182/315 [00:02<00:01, 76.29it/s]
|
883 |
60%|██████ | 190/315 [00:02<00:01, 77.30it/s]
|
884 |
63%|██████▎ | 198/315 [00:02<00:01, 75.64it/s]
|
885 |
65%|██████▌ | 206/315 [00:02<00:01, 75.73it/s]
|
886 |
68%|██████▊ | 215/315 [00:02<00:01, 77.49it/s]
|
887 |
71%|███████ | 224/315 [00:02<00:01, 79.65it/s]
|
888 |
74%|███████▍ | 233/315 [00:03<00:01, 80.99it/s]
|
889 |
77%|███████▋ | 242/315 [00:03<00:00, 79.71it/s]
|
890 |
80%|███████▉ | 251/315 [00:03<00:00, 80.17it/s]
|
891 |
83%|████████▎ | 260/315 [00:03<00:00, 80.00it/s]
|
892 |
85%|████████▌ | 269/315 [00:03<00:00, 79.61it/s]
|
893 |
88%|████████▊ | 278/315 [00:03<00:00, 80.43it/s]
|
894 |
91%|█████████ | 287/315 [00:03<00:00, 79.16it/s]
|
895 |
94%|█████████▎| 295/315 [00:03<00:00, 78.94it/s]
|
896 |
96%|█████████▌| 303/315 [00:03<00:00, 76.90it/s]
|
897 |
99%|█████████▊| 311/315 [00:04<00:00, 77.62it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
898 |
0%| | 0/506 [00:00<?, ?it/s]
|
899 |
2%|▏ | 9/506 [00:00<00:06, 81.25it/s]
|
900 |
4%|▎ | 18/506 [00:00<00:06, 80.18it/s]
|
901 |
5%|▌ | 27/506 [00:00<00:05, 80.05it/s]
|
902 |
7%|▋ | 36/506 [00:00<00:05, 80.13it/s]
|
903 |
9%|▉ | 45/506 [00:00<00:05, 80.98it/s]
|
904 |
11%|█ | 54/506 [00:00<00:05, 81.38it/s]
|
905 |
12%|█▏ | 63/506 [00:00<00:05, 80.61it/s]
|
906 |
14%|█▍ | 72/506 [00:00<00:05, 80.43it/s]
|
907 |
16%|█▌ | 81/506 [00:01<00:05, 73.83it/s]
|
908 |
18%|█▊ | 89/506 [00:01<00:05, 72.82it/s]
|
909 |
19%|█▉ | 98/506 [00:01<00:05, 75.82it/s]
|
910 |
21%|██ | 107/506 [00:01<00:05, 76.84it/s]
|
911 |
23%|██▎ | 116/506 [00:01<00:05, 77.86it/s]
|
912 |
25%|██▍ | 124/506 [00:01<00:05, 75.68it/s]
|
913 |
26%|██▌ | 132/506 [00:01<00:05, 70.81it/s]
|
914 |
28%|██▊ | 140/506 [00:01<00:05, 73.02it/s]
|
915 |
29%|██▉ | 149/506 [00:01<00:04, 75.42it/s]
|
916 |
31%|███ | 157/506 [00:02<00:04, 73.61it/s]
|
917 |
33%|███▎ | 165/506 [00:02<00:04, 74.13it/s]
|
918 |
34%|███▍ | 174/506 [00:02<00:04, 76.00it/s]
|
919 |
36%|███▌ | 183/506 [00:02<00:04, 77.93it/s]
|
920 |
38%|███▊ | 192/506 [00:02<00:03, 78.84it/s]
|
921 |
40%|███▉ | 200/506 [00:02<00:03, 78.57it/s]
|
922 |
41%|████ | 208/506 [00:02<00:03, 78.07it/s]
|
923 |
43%|████▎ | 217/506 [00:02<00:03, 78.97it/s]
|
924 |
44%|████▍ | 225/506 [00:02<00:03, 77.10it/s]
|
925 |
46%|████▌ | 233/506 [00:03<00:03, 77.53it/s]
|
926 |
48%|████▊ | 241/506 [00:03<00:03, 77.34it/s]
|
927 |
49%|████▉ | 250/506 [00:03<00:03, 78.93it/s]
|
928 |
51%|█████ | 259/506 [00:03<00:03, 79.67it/s]
|
929 |
53%|█████▎ | 268/506 [00:03<00:02, 80.55it/s]
|
930 |
55%|█████▍ | 277/506 [00:03<00:02, 80.74it/s]
|
931 |
57%|█████▋ | 286/506 [00:03<00:02, 79.79it/s]
|
932 |
58%|█████▊ | 295/506 [00:03<00:02, 80.40it/s]
|
933 |
60%|██████ | 304/506 [00:03<00:02, 81.38it/s]
|
934 |
62%|██████▏ | 313/506 [00:04<00:02, 81.61it/s]
|
935 |
64%|██████▎ | 322/506 [00:04<00:02, 82.25it/s]
|
936 |
65%|██████▌ | 331/506 [00:04<00:02, 83.19it/s]
|
937 |
67%|██████▋ | 340/506 [00:04<00:01, 83.24it/s]
|
938 |
69%|██████▉ | 349/506 [00:04<00:01, 83.80it/s]
|
939 |
71%|███████ | 358/506 [00:04<00:01, 83.68it/s]
|
940 |
73%|███████▎ | 367/506 [00:04<00:01, 81.45it/s]
|
941 |
74%|███████▍ | 376/506 [00:04<00:01, 79.16it/s]
|
942 |
76%|███████▌ | 384/506 [00:04<00:01, 77.68it/s]
|
943 |
77%|███████▋ | 392/506 [00:05<00:01, 74.28it/s]
|
944 |
79%|███████▉ | 400/506 [00:05<00:01, 73.39it/s]
|
945 |
81%|████████ | 408/506 [00:05<00:01, 75.09it/s]
|
946 |
82%|████████▏ | 416/506 [00:05<00:01, 75.31it/s]
|
947 |
84%|████████▍ | 424/506 [00:05<00:01, 76.23it/s]
|
948 |
86%|████████▌ | 433/506 [00:05<00:00, 78.69it/s]
|
949 |
87%|████████▋ | 441/506 [00:05<00:00, 77.63it/s]
|
950 |
89%|████████▊ | 449/506 [00:05<00:00, 78.02it/s]
|
951 |
91%|█████████ | 458/506 [00:05<00:00, 79.39it/s]
|
952 |
92%|█████████▏| 467/506 [00:05<00:00, 79.79it/s]
|
953 |
94%|█████████▍| 475/506 [00:06<00:00, 77.02it/s]
|
954 |
96%|█████████▌| 484/506 [00:06<00:00, 76.28it/s]
|
955 |
97%|█████████▋| 493/506 [00:06<00:00, 77.37it/s]
|
956 |
99%|█████████▉| 501/506 [00:06<00:00, 77.62it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
857 |
{'eval_loss': 0.27674129605293274, 'eval_precision': 0.6594676042189854, 'eval_recall': 0.7186644772851669, 'eval_f1': 0.6877946568884233, 'eval_accuracy': 0.9487631941993647, 'eval_runtime': 6.0833, 'eval_samples_per_second': 414.082, 'eval_steps_per_second': 51.781, 'epoch': 10.0}
|
858 |
{'train_runtime': 453.0745, 'train_samples_per_second': 211.819, 'train_steps_per_second': 3.311, 'train_loss': 0.05337127685546875, 'epoch': 10.0}
|
859 |
|
860 |
+
***** train metrics *****
|
861 |
+
epoch = 10.0
|
862 |
+
total_flos = 4128758GF
|
863 |
+
train_loss = 0.0534
|
864 |
+
train_runtime = 0:07:33.07
|
865 |
+
train_samples = 9597
|
866 |
+
train_samples_per_second = 211.819
|
867 |
+
train_steps_per_second = 3.311
|
868 |
+
08/30/2024 22:35:59 - INFO - __main__ - *** Evaluate ***
|
869 |
+
[INFO|trainer.py:805] 2024-08-30 22:35:59,010 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
870 |
+
[INFO|trainer.py:3788] 2024-08-30 22:35:59,013 >>
|
871 |
+
***** Running Evaluation *****
|
872 |
+
[INFO|trainer.py:3790] 2024-08-30 22:35:59,013 >> Num examples = 2519
|
873 |
+
[INFO|trainer.py:3793] 2024-08-30 22:35:59,013 >> Batch size = 8
|
874 |
+
|
875 |
0%| | 0/315 [00:00<?, ?it/s]
|
876 |
3%|▎ | 8/315 [00:00<00:03, 78.55it/s]
|
877 |
5%|▌ | 16/315 [00:00<00:03, 76.48it/s]
|
878 |
8%|▊ | 24/315 [00:00<00:03, 77.21it/s]
|
879 |
10%|█ | 32/315 [00:00<00:03, 74.08it/s]
|
880 |
13%|█▎ | 41/315 [00:00<00:03, 76.11it/s]
|
881 |
16%|█▌ | 49/315 [00:00<00:03, 75.89it/s]
|
882 |
18%|█▊ | 58/315 [00:00<00:03, 78.15it/s]
|
883 |
21%|██ | 66/315 [00:00<00:03, 76.95it/s]
|
884 |
24%|██▍ | 75/315 [00:00<00:03, 77.89it/s]
|
885 |
26%|██▋ | 83/315 [00:01<00:03, 77.02it/s]
|
886 |
29%|██▉ | 91/315 [00:01<00:02, 76.65it/s]
|
887 |
31%|███▏ | 99/315 [00:01<00:02, 74.96it/s]
|
888 |
34%|███▍ | 108/315 [00:01<00:02, 76.95it/s]
|
889 |
37%|███▋ | 117/315 [00:01<00:02, 78.34it/s]
|
890 |
40%|███▉ | 125/315 [00:01<00:02, 76.97it/s]
|
891 |
42%|████▏ | 133/315 [00:01<00:02, 77.15it/s]
|
892 |
45%|████▍ | 141/315 [00:01<00:02, 77.50it/s]
|
893 |
47%|████▋ | 149/315 [00:01<00:02, 72.28it/s]
|
894 |
50%|█████ | 158/315 [00:02<00:02, 74.89it/s]
|
895 |
53%|█████▎ | 166/315 [00:02<00:01, 75.30it/s]
|
896 |
55%|█████▌ | 174/315 [00:02<00:01, 76.29it/s]
|
897 |
58%|█████▊ | 182/315 [00:02<00:01, 76.29it/s]
|
898 |
60%|██████ | 190/315 [00:02<00:01, 77.30it/s]
|
899 |
63%|██████▎ | 198/315 [00:02<00:01, 75.64it/s]
|
900 |
65%|██████▌ | 206/315 [00:02<00:01, 75.73it/s]
|
901 |
68%|██████▊ | 215/315 [00:02<00:01, 77.49it/s]
|
902 |
71%|███████ | 224/315 [00:02<00:01, 79.65it/s]
|
903 |
74%|███████▍ | 233/315 [00:03<00:01, 80.99it/s]
|
904 |
77%|███████▋ | 242/315 [00:03<00:00, 79.71it/s]
|
905 |
80%|███████▉ | 251/315 [00:03<00:00, 80.17it/s]
|
906 |
83%|████████▎ | 260/315 [00:03<00:00, 80.00it/s]
|
907 |
85%|████████▌ | 269/315 [00:03<00:00, 79.61it/s]
|
908 |
88%|████████▊ | 278/315 [00:03<00:00, 80.43it/s]
|
909 |
91%|█████████ | 287/315 [00:03<00:00, 79.16it/s]
|
910 |
94%|█████████▎| 295/315 [00:03<00:00, 78.94it/s]
|
911 |
96%|█████████▌| 303/315 [00:03<00:00, 76.90it/s]
|
912 |
99%|█████████▊| 311/315 [00:04<00:00, 77.62it/s]
|
913 |
+
***** eval metrics *****
|
914 |
+
epoch = 10.0
|
915 |
+
eval_accuracy = 0.9483
|
916 |
+
eval_f1 = 0.6921
|
917 |
+
eval_loss = 0.2747
|
918 |
+
eval_precision = 0.6675
|
919 |
+
eval_recall = 0.7187
|
920 |
+
eval_runtime = 0:00:05.52
|
921 |
+
eval_samples = 2519
|
922 |
+
eval_samples_per_second = 456.084
|
923 |
+
eval_steps_per_second = 57.033
|
924 |
+
08/30/2024 22:36:04 - INFO - __main__ - *** Predict ***
|
925 |
+
[INFO|trainer.py:805] 2024-08-30 22:36:04,538 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
926 |
+
[INFO|trainer.py:3788] 2024-08-30 22:36:04,541 >>
|
927 |
+
***** Running Prediction *****
|
928 |
+
[INFO|trainer.py:3790] 2024-08-30 22:36:04,541 >> Num examples = 4047
|
929 |
+
[INFO|trainer.py:3793] 2024-08-30 22:36:04,541 >> Batch size = 8
|
930 |
+
|
931 |
0%| | 0/506 [00:00<?, ?it/s]
|
932 |
2%|▏ | 9/506 [00:00<00:06, 81.25it/s]
|
933 |
4%|▎ | 18/506 [00:00<00:06, 80.18it/s]
|
934 |
5%|▌ | 27/506 [00:00<00:05, 80.05it/s]
|
935 |
7%|▋ | 36/506 [00:00<00:05, 80.13it/s]
|
936 |
9%|▉ | 45/506 [00:00<00:05, 80.98it/s]
|
937 |
11%|█ | 54/506 [00:00<00:05, 81.38it/s]
|
938 |
12%|█▏ | 63/506 [00:00<00:05, 80.61it/s]
|
939 |
14%|█▍ | 72/506 [00:00<00:05, 80.43it/s]
|
940 |
16%|█▌ | 81/506 [00:01<00:05, 73.83it/s]
|
941 |
18%|█▊ | 89/506 [00:01<00:05, 72.82it/s]
|
942 |
19%|█▉ | 98/506 [00:01<00:05, 75.82it/s]
|
943 |
21%|██ | 107/506 [00:01<00:05, 76.84it/s]
|
944 |
23%|██▎ | 116/506 [00:01<00:05, 77.86it/s]
|
945 |
25%|██▍ | 124/506 [00:01<00:05, 75.68it/s]
|
946 |
26%|██▌ | 132/506 [00:01<00:05, 70.81it/s]
|
947 |
28%|██▊ | 140/506 [00:01<00:05, 73.02it/s]
|
948 |
29%|██▉ | 149/506 [00:01<00:04, 75.42it/s]
|
949 |
31%|███ | 157/506 [00:02<00:04, 73.61it/s]
|
950 |
33%|███▎ | 165/506 [00:02<00:04, 74.13it/s]
|
951 |
34%|███▍ | 174/506 [00:02<00:04, 76.00it/s]
|
952 |
36%|███▌ | 183/506 [00:02<00:04, 77.93it/s]
|
953 |
38%|███▊ | 192/506 [00:02<00:03, 78.84it/s]
|
954 |
40%|███▉ | 200/506 [00:02<00:03, 78.57it/s]
|
955 |
41%|████ | 208/506 [00:02<00:03, 78.07it/s]
|
956 |
43%|████▎ | 217/506 [00:02<00:03, 78.97it/s]
|
957 |
44%|████▍ | 225/506 [00:02<00:03, 77.10it/s]
|
958 |
46%|████▌ | 233/506 [00:03<00:03, 77.53it/s]
|
959 |
48%|████▊ | 241/506 [00:03<00:03, 77.34it/s]
|
960 |
49%|████▉ | 250/506 [00:03<00:03, 78.93it/s]
|
961 |
51%|█████ | 259/506 [00:03<00:03, 79.67it/s]
|
962 |
53%|█████▎ | 268/506 [00:03<00:02, 80.55it/s]
|
963 |
55%|█████▍ | 277/506 [00:03<00:02, 80.74it/s]
|
964 |
57%|█████▋ | 286/506 [00:03<00:02, 79.79it/s]
|
965 |
58%|█████▊ | 295/506 [00:03<00:02, 80.40it/s]
|
966 |
60%|██████ | 304/506 [00:03<00:02, 81.38it/s]
|
967 |
62%|██████▏ | 313/506 [00:04<00:02, 81.61it/s]
|
968 |
64%|██████▎ | 322/506 [00:04<00:02, 82.25it/s]
|
969 |
65%|██████▌ | 331/506 [00:04<00:02, 83.19it/s]
|
970 |
67%|██████▋ | 340/506 [00:04<00:01, 83.24it/s]
|
971 |
69%|██████▉ | 349/506 [00:04<00:01, 83.80it/s]
|
972 |
71%|███████ | 358/506 [00:04<00:01, 83.68it/s]
|
973 |
73%|███████▎ | 367/506 [00:04<00:01, 81.45it/s]
|
974 |
74%|███████▍ | 376/506 [00:04<00:01, 79.16it/s]
|
975 |
76%|███████▌ | 384/506 [00:04<00:01, 77.68it/s]
|
976 |
77%|███████▋ | 392/506 [00:05<00:01, 74.28it/s]
|
977 |
79%|███████▉ | 400/506 [00:05<00:01, 73.39it/s]
|
978 |
81%|████████ | 408/506 [00:05<00:01, 75.09it/s]
|
979 |
82%|████████▏ | 416/506 [00:05<00:01, 75.31it/s]
|
980 |
84%|████████▍ | 424/506 [00:05<00:01, 76.23it/s]
|
981 |
86%|████████▌ | 433/506 [00:05<00:00, 78.69it/s]
|
982 |
87%|████████▋ | 441/506 [00:05<00:00, 77.63it/s]
|
983 |
89%|████████▊ | 449/506 [00:05<00:00, 78.02it/s]
|
984 |
91%|█████████ | 458/506 [00:05<00:00, 79.39it/s]
|
985 |
92%|█████████▏| 467/506 [00:05<00:00, 79.79it/s]
|
986 |
94%|█████████▍| 475/506 [00:06<00:00, 77.02it/s]
|
987 |
96%|█████████▌| 484/506 [00:06<00:00, 76.28it/s]
|
988 |
97%|█████████▋| 493/506 [00:06<00:00, 77.37it/s]
|
989 |
99%|█████████▉| 501/506 [00:06<00:00, 77.62it/s]
|
990 |
+
[INFO|trainer.py:3478] 2024-08-30 22:36:13,602 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
991 |
+
[INFO|configuration_utils.py:472] 2024-08-30 22:36:13,603 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
992 |
+
[INFO|modeling_utils.py:2690] 2024-08-30 22:36:14,973 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
993 |
+
[INFO|tokenization_utils_base.py:2574] 2024-08-30 22:36:14,974 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
994 |
+
[INFO|tokenization_utils_base.py:2583] 2024-08-30 22:36:14,975 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
995 |
+
***** predict metrics *****
|
996 |
+
predict_accuracy = 0.9466
|
997 |
+
predict_f1 = 0.6902
|
998 |
+
predict_loss = 0.2951
|
999 |
+
predict_precision = 0.6732
|
1000 |
+
predict_recall = 0.7081
|
1001 |
+
predict_runtime = 0:00:08.89
|
1002 |
+
predict_samples_per_second = 454.847
|
1003 |
+
predict_steps_per_second = 56.87
|
1004 |
+
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"total_flos":
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 3.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"total_flos": 4433220248012460.0,
|
4 |
+
"train_loss": 0.05337127685546875,
|
5 |
+
"train_runtime": 453.0745,
|
6 |
+
"train_samples": 9597,
|
7 |
+
"train_samples_per_second": 211.819,
|
8 |
+
"train_steps_per_second": 3.311
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,201 +1,166 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-
|
4 |
"epoch": 10.0,
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss": 0.
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime":
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second":
|
21 |
-
"step":
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"epoch": 1.1764705882352942,
|
25 |
-
"grad_norm": 0.1311497986316681,
|
26 |
-
"learning_rate": 4.411764705882353e-05,
|
27 |
-
"loss": 0.0104,
|
28 |
-
"step": 500
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
-
"eval_accuracy": 0.
|
33 |
-
"eval_f1": 0.
|
34 |
-
"eval_loss": 0.
|
35 |
-
"eval_precision": 0.
|
36 |
-
"eval_recall": 0.
|
37 |
-
"eval_runtime":
|
38 |
-
"eval_samples_per_second":
|
39 |
-
"eval_steps_per_second":
|
40 |
-
"step":
|
41 |
-
},
|
42 |
-
{
|
43 |
-
"epoch": 2.3529411764705883,
|
44 |
-
"grad_norm": 0.002636878052726388,
|
45 |
-
"learning_rate": 3.8235294117647055e-05,
|
46 |
-
"loss": 0.0034,
|
47 |
-
"step": 1000
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
-
"eval_accuracy": 0.
|
52 |
-
"eval_f1": 0.
|
53 |
-
"eval_loss": 0.
|
54 |
-
"eval_precision": 0.
|
55 |
-
"eval_recall": 0.
|
56 |
-
"eval_runtime":
|
57 |
-
"eval_samples_per_second":
|
58 |
-
"eval_steps_per_second":
|
59 |
-
"step":
|
60 |
-
},
|
61 |
-
{
|
62 |
-
"epoch": 3.
|
63 |
-
"grad_norm": 0.
|
64 |
-
"learning_rate": 3.
|
65 |
-
"loss": 0.
|
66 |
-
"step":
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
-
"eval_accuracy": 0.
|
71 |
-
"eval_f1": 0.
|
72 |
-
"eval_loss": 0.
|
73 |
-
"eval_precision": 0.
|
74 |
-
"eval_recall": 0.
|
75 |
-
"eval_runtime":
|
76 |
-
"eval_samples_per_second":
|
77 |
-
"eval_steps_per_second":
|
78 |
-
"step":
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"epoch": 4.705882352941177,
|
82 |
-
"grad_norm": 0.27693310379981995,
|
83 |
-
"learning_rate": 2.647058823529412e-05,
|
84 |
-
"loss": 0.0012,
|
85 |
-
"step": 2000
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
-
"eval_accuracy": 0.
|
90 |
-
"eval_f1": 0.
|
91 |
-
"eval_loss": 0.
|
92 |
-
"eval_precision": 0.
|
93 |
-
"eval_recall": 0.
|
94 |
-
"eval_runtime":
|
95 |
-
"eval_samples_per_second":
|
96 |
-
"eval_steps_per_second":
|
97 |
-
"step":
|
98 |
-
},
|
99 |
-
{
|
100 |
-
"epoch": 5.882352941176471,
|
101 |
-
"grad_norm": 0.008007431402802467,
|
102 |
-
"learning_rate": 2.058823529411765e-05,
|
103 |
-
"loss": 0.0009,
|
104 |
-
"step": 2500
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
-
"eval_accuracy": 0.
|
109 |
-
"eval_f1": 0.
|
110 |
-
"eval_loss": 0.
|
111 |
-
"eval_precision": 0.
|
112 |
-
"eval_recall": 0.
|
113 |
-
"eval_runtime":
|
114 |
-
"eval_samples_per_second":
|
115 |
-
"eval_steps_per_second":
|
116 |
-
"step":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
},
|
118 |
{
|
119 |
"epoch": 7.0,
|
120 |
-
"eval_accuracy": 0.
|
121 |
-
"eval_f1": 0.
|
122 |
-
"eval_loss": 0.
|
123 |
-
"eval_precision": 0.
|
124 |
-
"eval_recall": 0.
|
125 |
-
"eval_runtime":
|
126 |
-
"eval_samples_per_second":
|
127 |
-
"eval_steps_per_second":
|
128 |
-
"step":
|
129 |
-
},
|
130 |
-
{
|
131 |
-
"epoch": 7.0588235294117645,
|
132 |
-
"grad_norm": 0.0017305670771747828,
|
133 |
-
"learning_rate": 1.4705882352941177e-05,
|
134 |
-
"loss": 0.0005,
|
135 |
-
"step": 3000
|
136 |
},
|
137 |
{
|
138 |
"epoch": 8.0,
|
139 |
-
"eval_accuracy": 0.
|
140 |
-
"eval_f1": 0.
|
141 |
-
"eval_loss": 0.
|
142 |
-
"eval_precision": 0.
|
143 |
-
"eval_recall": 0.
|
144 |
-
"eval_runtime":
|
145 |
-
"eval_samples_per_second":
|
146 |
-
"eval_steps_per_second":
|
147 |
-
"step":
|
148 |
-
},
|
149 |
-
{
|
150 |
-
"epoch": 8.235294117647058,
|
151 |
-
"grad_norm": 0.00020609228522516787,
|
152 |
-
"learning_rate": 8.823529411764707e-06,
|
153 |
-
"loss": 0.0004,
|
154 |
-
"step": 3500
|
155 |
},
|
156 |
{
|
157 |
"epoch": 9.0,
|
158 |
-
"eval_accuracy": 0.
|
159 |
-
"eval_f1": 0.
|
160 |
-
"eval_loss": 0.
|
161 |
-
"eval_precision": 0.
|
162 |
-
"eval_recall": 0.
|
163 |
-
"eval_runtime":
|
164 |
-
"eval_samples_per_second":
|
165 |
-
"eval_steps_per_second":
|
166 |
-
"step":
|
167 |
},
|
168 |
{
|
169 |
-
"epoch":
|
170 |
-
"grad_norm": 0.
|
171 |
-
"learning_rate":
|
172 |
-
"loss": 0.
|
173 |
-
"step":
|
174 |
},
|
175 |
{
|
176 |
"epoch": 10.0,
|
177 |
-
"eval_accuracy": 0.
|
178 |
-
"eval_f1": 0.
|
179 |
-
"eval_loss": 0.
|
180 |
-
"eval_precision": 0.
|
181 |
-
"eval_recall": 0.
|
182 |
-
"eval_runtime":
|
183 |
-
"eval_samples_per_second":
|
184 |
-
"eval_steps_per_second":
|
185 |
-
"step":
|
186 |
},
|
187 |
{
|
188 |
"epoch": 10.0,
|
189 |
-
"step":
|
190 |
-
"total_flos":
|
191 |
-
"train_loss": 0.
|
192 |
-
"train_runtime":
|
193 |
-
"train_samples_per_second":
|
194 |
-
"train_steps_per_second": 3.
|
195 |
}
|
196 |
],
|
197 |
"logging_steps": 500,
|
198 |
-
"max_steps":
|
199 |
"num_input_tokens_seen": 0,
|
200 |
"num_train_epochs": 10,
|
201 |
"save_steps": 500,
|
@@ -211,7 +176,7 @@
|
|
211 |
"attributes": {}
|
212 |
}
|
213 |
},
|
214 |
-
"total_flos":
|
215 |
"train_batch_size": 32,
|
216 |
"trial_name": null,
|
217 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6921454928835002,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1350",
|
4 |
"epoch": 10.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.9456350861432834,
|
14 |
+
"eval_f1": 0.567482432759874,
|
15 |
+
"eval_loss": 0.1503801792860031,
|
16 |
+
"eval_precision": 0.5091304347826087,
|
17 |
+
"eval_recall": 0.6409414340448824,
|
18 |
+
"eval_runtime": 5.4744,
|
19 |
+
"eval_samples_per_second": 460.141,
|
20 |
+
"eval_steps_per_second": 57.541,
|
21 |
+
"step": 150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
},
|
23 |
{
|
24 |
"epoch": 2.0,
|
25 |
+
"eval_accuracy": 0.9462125830151753,
|
26 |
+
"eval_f1": 0.639,
|
27 |
+
"eval_loss": 0.15472079813480377,
|
28 |
+
"eval_precision": 0.5881270133456051,
|
29 |
+
"eval_recall": 0.6995073891625616,
|
30 |
+
"eval_runtime": 5.4777,
|
31 |
+
"eval_samples_per_second": 459.865,
|
32 |
+
"eval_steps_per_second": 57.506,
|
33 |
+
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
},
|
35 |
{
|
36 |
"epoch": 3.0,
|
37 |
+
"eval_accuracy": 0.9475600757162566,
|
38 |
+
"eval_f1": 0.6589207332816938,
|
39 |
+
"eval_loss": 0.16183686256408691,
|
40 |
+
"eval_precision": 0.6236559139784946,
|
41 |
+
"eval_recall": 0.6984126984126984,
|
42 |
+
"eval_runtime": 5.4114,
|
43 |
+
"eval_samples_per_second": 465.5,
|
44 |
+
"eval_steps_per_second": 58.211,
|
45 |
+
"step": 450
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 3.3333333333333335,
|
49 |
+
"grad_norm": 0.5628494620323181,
|
50 |
+
"learning_rate": 3.3333333333333335e-05,
|
51 |
+
"loss": 0.126,
|
52 |
+
"step": 500
|
53 |
},
|
54 |
{
|
55 |
"epoch": 4.0,
|
56 |
+
"eval_accuracy": 0.9450575892713915,
|
57 |
+
"eval_f1": 0.6627936347562516,
|
58 |
+
"eval_loss": 0.1920311450958252,
|
59 |
+
"eval_precision": 0.6153846153846154,
|
60 |
+
"eval_recall": 0.7181171319102354,
|
61 |
+
"eval_runtime": 5.502,
|
62 |
+
"eval_samples_per_second": 457.833,
|
63 |
+
"eval_steps_per_second": 57.252,
|
64 |
+
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
},
|
66 |
{
|
67 |
"epoch": 5.0,
|
68 |
+
"eval_accuracy": 0.9488273605184638,
|
69 |
+
"eval_f1": 0.678646934460888,
|
70 |
+
"eval_loss": 0.21016015112400055,
|
71 |
+
"eval_precision": 0.6561062851303014,
|
72 |
+
"eval_recall": 0.7027914614121511,
|
73 |
+
"eval_runtime": 5.3774,
|
74 |
+
"eval_samples_per_second": 468.442,
|
75 |
+
"eval_steps_per_second": 58.579,
|
76 |
+
"step": 750
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
},
|
78 |
{
|
79 |
"epoch": 6.0,
|
80 |
+
"eval_accuracy": 0.9467259135679682,
|
81 |
+
"eval_f1": 0.6750065155069064,
|
82 |
+
"eval_loss": 0.24135558307170868,
|
83 |
+
"eval_precision": 0.6442786069651741,
|
84 |
+
"eval_recall": 0.7088122605363985,
|
85 |
+
"eval_runtime": 5.4273,
|
86 |
+
"eval_samples_per_second": 464.135,
|
87 |
+
"eval_steps_per_second": 58.04,
|
88 |
+
"step": 900
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 6.666666666666667,
|
92 |
+
"grad_norm": 0.5565813779830933,
|
93 |
+
"learning_rate": 1.6666666666666667e-05,
|
94 |
+
"loss": 0.0251,
|
95 |
+
"step": 1000
|
96 |
},
|
97 |
{
|
98 |
"epoch": 7.0,
|
99 |
+
"eval_accuracy": 0.9491642336937341,
|
100 |
+
"eval_f1": 0.6816380449141347,
|
101 |
+
"eval_loss": 0.25002309679985046,
|
102 |
+
"eval_precision": 0.658835546475996,
|
103 |
+
"eval_recall": 0.7060755336617406,
|
104 |
+
"eval_runtime": 5.3969,
|
105 |
+
"eval_samples_per_second": 466.75,
|
106 |
+
"eval_steps_per_second": 58.367,
|
107 |
+
"step": 1050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
},
|
109 |
{
|
110 |
"epoch": 8.0,
|
111 |
+
"eval_accuracy": 0.947383618338734,
|
112 |
+
"eval_f1": 0.6846153846153845,
|
113 |
+
"eval_loss": 0.26423653960227966,
|
114 |
+
"eval_precision": 0.6439942112879884,
|
115 |
+
"eval_recall": 0.7307060755336617,
|
116 |
+
"eval_runtime": 5.4091,
|
117 |
+
"eval_samples_per_second": 465.698,
|
118 |
+
"eval_steps_per_second": 58.235,
|
119 |
+
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
},
|
121 |
{
|
122 |
"epoch": 9.0,
|
123 |
+
"eval_accuracy": 0.9483461131252205,
|
124 |
+
"eval_f1": 0.6921454928835002,
|
125 |
+
"eval_loss": 0.27473828196525574,
|
126 |
+
"eval_precision": 0.6675139806812405,
|
127 |
+
"eval_recall": 0.7186644772851669,
|
128 |
+
"eval_runtime": 5.4389,
|
129 |
+
"eval_samples_per_second": 463.146,
|
130 |
+
"eval_steps_per_second": 57.916,
|
131 |
+
"step": 1350
|
132 |
},
|
133 |
{
|
134 |
+
"epoch": 10.0,
|
135 |
+
"grad_norm": 0.17641158401966095,
|
136 |
+
"learning_rate": 0.0,
|
137 |
+
"loss": 0.0091,
|
138 |
+
"step": 1500
|
139 |
},
|
140 |
{
|
141 |
"epoch": 10.0,
|
142 |
+
"eval_accuracy": 0.9487631941993647,
|
143 |
+
"eval_f1": 0.6877946568884233,
|
144 |
+
"eval_loss": 0.27674129605293274,
|
145 |
+
"eval_precision": 0.6594676042189854,
|
146 |
+
"eval_recall": 0.7186644772851669,
|
147 |
+
"eval_runtime": 6.0833,
|
148 |
+
"eval_samples_per_second": 414.082,
|
149 |
+
"eval_steps_per_second": 51.781,
|
150 |
+
"step": 1500
|
151 |
},
|
152 |
{
|
153 |
"epoch": 10.0,
|
154 |
+
"step": 1500,
|
155 |
+
"total_flos": 4433220248012460.0,
|
156 |
+
"train_loss": 0.05337127685546875,
|
157 |
+
"train_runtime": 453.0745,
|
158 |
+
"train_samples_per_second": 211.819,
|
159 |
+
"train_steps_per_second": 3.311
|
160 |
}
|
161 |
],
|
162 |
"logging_steps": 500,
|
163 |
+
"max_steps": 1500,
|
164 |
"num_input_tokens_seen": 0,
|
165 |
"num_train_epochs": 10,
|
166 |
"save_steps": 500,
|
|
|
176 |
"attributes": {}
|
177 |
}
|
178 |
},
|
179 |
+
"total_flos": 4433220248012460.0,
|
180 |
"train_batch_size": 32,
|
181 |
"trial_name": null,
|
182 |
"trial_params": null
|