Rodrigo1771 commited on
Commit
f724b75
1 Parent(s): ba692f2

Training in progress, epoch 0

Browse files
README.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
+ tags:
5
+ - token-classification
6
+ - generated_from_trainer
7
+ datasets:
8
+ - Rodrigo1771/combined-train-distemist-dev-ner
9
+ metrics:
10
+ - precision
11
+ - recall
12
+ - f1
13
+ - accuracy
14
+ model-index:
15
+ - name: output
16
+ results:
17
+ - task:
18
+ name: Token Classification
19
+ type: token-classification
20
+ dataset:
21
+ name: Rodrigo1771/combined-train-distemist-dev-ner
22
+ type: Rodrigo1771/combined-train-distemist-dev-ner
23
+ config: CombinedTrainDisTEMISTDevNER
24
+ split: validation
25
+ args: CombinedTrainDisTEMISTDevNER
26
+ metrics:
27
+ - name: Precision
28
+ type: precision
29
+ value: 0.32197630636422075
30
+ - name: Recall
31
+ type: recall
32
+ value: 0.8203088441740758
33
+ - name: F1
34
+ type: f1
35
+ value: 0.4624414693662204
36
+ - name: Accuracy
37
+ type: accuracy
38
+ value: 0.8601754843670617
39
+ ---
40
+
41
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
42
+ should probably proofread and complete it, then remove this comment. -->
43
+
44
+ # output
45
+
46
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/combined-train-distemist-dev-ner dataset.
47
+ It achieves the following results on the evaluation set:
48
+ - Loss: 0.6266
49
+ - Precision: 0.3220
50
+ - Recall: 0.8203
51
+ - F1: 0.4624
52
+ - Accuracy: 0.8602
53
+
54
+ ## Model description
55
+
56
+ More information needed
57
+
58
+ ## Intended uses & limitations
59
+
60
+ More information needed
61
+
62
+ ## Training and evaluation data
63
+
64
+ More information needed
65
+
66
+ ## Training procedure
67
+
68
+ ### Training hyperparameters
69
+
70
+ The following hyperparameters were used during training:
71
+ - learning_rate: 5e-05
72
+ - train_batch_size: 32
73
+ - eval_batch_size: 8
74
+ - seed: 42
75
+ - gradient_accumulation_steps: 2
76
+ - total_train_batch_size: 64
77
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
78
+ - lr_scheduler_type: linear
79
+ - num_epochs: 10.0
80
+
81
+ ### Training results
82
+
83
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
84
+ |:-------------:|:------:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
85
+ | No log | 0.9988 | 425 | 0.3834 | 0.2920 | 0.7819 | 0.4252 | 0.8517 |
86
+ | 0.3349 | 2.0 | 851 | 0.5730 | 0.2681 | 0.8070 | 0.4025 | 0.8221 |
87
+ | 0.1788 | 2.9988 | 1276 | 0.5796 | 0.2848 | 0.8009 | 0.4202 | 0.8338 |
88
+ | 0.1227 | 4.0 | 1702 | 0.6591 | 0.2996 | 0.8109 | 0.4376 | 0.8388 |
89
+ | 0.0856 | 4.9988 | 2127 | 0.6266 | 0.3220 | 0.8203 | 0.4624 | 0.8602 |
90
+ | 0.0597 | 6.0 | 2553 | 0.7859 | 0.3075 | 0.8112 | 0.4460 | 0.8476 |
91
+ | 0.0597 | 6.9988 | 2978 | 0.8297 | 0.3137 | 0.8166 | 0.4532 | 0.8508 |
92
+ | 0.0458 | 8.0 | 3404 | 0.8468 | 0.3135 | 0.8205 | 0.4536 | 0.8532 |
93
+ | 0.0343 | 8.9988 | 3829 | 0.9241 | 0.3085 | 0.8182 | 0.4481 | 0.8494 |
94
+ | 0.0292 | 9.9882 | 4250 | 0.9384 | 0.3100 | 0.8163 | 0.4494 | 0.8499 |
95
+
96
+
97
+ ### Framework versions
98
+
99
+ - Transformers 4.42.4
100
+ - Pytorch 2.4.0+cu121
101
+ - Datasets 2.21.0
102
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.988249118683902,
3
+ "eval_accuracy": 0.8601754843670617,
4
+ "eval_f1": 0.4624414693662204,
5
+ "eval_loss": 0.6266195774078369,
6
+ "eval_precision": 0.32197630636422075,
7
+ "eval_recall": 0.8203088441740758,
8
+ "eval_runtime": 14.995,
9
+ "eval_samples": 6810,
10
+ "eval_samples_per_second": 454.15,
11
+ "eval_steps_per_second": 56.819,
12
+ "predict_accuracy": 0.9510588123763138,
13
+ "predict_f1": 0.6555634301913535,
14
+ "predict_loss": 0.18893657624721527,
15
+ "predict_precision": 0.5454009433962265,
16
+ "predict_recall": 0.8214920071047958,
17
+ "predict_runtime": 30.3218,
18
+ "predict_samples_per_second": 481.964,
19
+ "predict_steps_per_second": 60.254,
20
+ "total_flos": 1.2649810588547778e+16,
21
+ "train_loss": 0.10639642311544979,
22
+ "train_runtime": 1205.6346,
23
+ "train_samples": 27229,
24
+ "train_samples_per_second": 225.848,
25
+ "train_steps_per_second": 3.525
26
+ }
config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "ner",
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "O",
17
+ "1": "B-ENFERMEDAD",
18
+ "2": "I-ENFERMEDAD",
19
+ "3": "B-PROCEDIMIENTO",
20
+ "4": "I-PROCEDIMIENTO",
21
+ "5": "B-SINTOMA",
22
+ "6": "I-SINTOMA",
23
+ "7": "B-FARMACO",
24
+ "8": "I-FARMACO"
25
+ },
26
+ "initializer_range": 0.02,
27
+ "intermediate_size": 3072,
28
+ "label2id": {
29
+ "B-ENFERMEDAD": 1,
30
+ "B-FARMACO": 7,
31
+ "B-PROCEDIMIENTO": 3,
32
+ "B-SINTOMA": 5,
33
+ "I-ENFERMEDAD": 2,
34
+ "I-FARMACO": 8,
35
+ "I-PROCEDIMIENTO": 4,
36
+ "I-SINTOMA": 6,
37
+ "O": 0
38
+ },
39
+ "layer_norm_eps": 1e-05,
40
+ "max_position_embeddings": 514,
41
+ "model_type": "roberta",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 12,
44
+ "pad_token_id": 1,
45
+ "position_embedding_type": "absolute",
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.42.4",
48
+ "type_vocab_size": 1,
49
+ "use_cache": true,
50
+ "vocab_size": 50262
51
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.988249118683902,
3
+ "eval_accuracy": 0.8601754843670617,
4
+ "eval_f1": 0.4624414693662204,
5
+ "eval_loss": 0.6266195774078369,
6
+ "eval_precision": 0.32197630636422075,
7
+ "eval_recall": 0.8203088441740758,
8
+ "eval_runtime": 14.995,
9
+ "eval_samples": 6810,
10
+ "eval_samples_per_second": 454.15,
11
+ "eval_steps_per_second": 56.819
12
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:248b0b7048aa4d7483fba444efd0a93176b0ef8eb07838fef1140c669d738b7e
3
+ size 496262556
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.9510588123763138,
3
+ "predict_f1": 0.6555634301913535,
4
+ "predict_loss": 0.18893657624721527,
5
+ "predict_precision": 0.5454009433962265,
6
+ "predict_recall": 0.8214920071047958,
7
+ "predict_runtime": 30.3218,
8
+ "predict_samples_per_second": 481.964,
9
+ "predict_steps_per_second": 60.254
10
+ }
predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tb/events.out.tfevents.1725045346.6b97e535edda.2908.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e576d6fc5f437e9cdba3770a03c7980ab62c145def88b46a7b1d4e68f13bfde9
3
+ size 6546
tb/events.out.tfevents.1725046129.6b97e535edda.6370.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9e94268d839c5a3c21781840b052643e729f1bd38c4ed1942a53d09ddd89bfc
3
+ size 12153
tb/events.out.tfevents.1725047358.6b97e535edda.6370.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6babdcbcafd1e6dd843214457a56f608115a0ebceecc401a4e103c0ed26fb36d
3
+ size 560
tb/events.out.tfevents.1725047806.6b97e535edda.13440.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98176e24292706b54fcd3566af82c1b540c139fe67c287bbedee399f56ea439a
3
+ size 5863
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50261": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "max_len": 512,
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
train.log ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/4250 [00:00<?, ?it/s]
1
  0%| | 1/4250 [00:01<1:22:54, 1.17s/it]
2
  0%| | 2/4250 [00:01<42:01, 1.68it/s]
3
  0%| | 3/4250 [00:01<29:22, 2.41it/s]
4
  0%| | 4/4250 [00:01<23:09, 3.06it/s]
5
  0%| | 5/4250 [00:02<21:02, 3.36it/s]
6
  0%| | 6/4250 [00:02<18:48, 3.76it/s]
7
  0%| | 7/4250 [00:02<18:03, 3.92it/s]
8
  0%| | 8/4250 [00:02<17:04, 4.14it/s]
9
  0%| | 9/4250 [00:02<17:57, 3.94it/s]
10
  0%| | 10/4250 [00:03<17:19, 4.08it/s]
11
  0%| | 11/4250 [00:03<16:45, 4.22it/s]
12
  0%| | 12/4250 [00:03<15:21, 4.60it/s]
13
  0%| | 13/4250 [00:03<17:21, 4.07it/s]
14
  0%| | 14/4250 [00:04<16:55, 4.17it/s]
15
  0%| | 15/4250 [00:04<16:34, 4.26it/s]
16
  0%| | 16/4250 [00:04<15:42, 4.49it/s]
17
  0%| | 17/4250 [00:04<14:41, 4.80it/s]
18
  0%| | 18/4250 [00:04<14:23, 4.90it/s]
19
  0%| | 19/4250 [00:05<15:11, 4.64it/s]
20
  0%| | 20/4250 [00:05<14:17, 4.93it/s]
21
  0%| | 21/4250 [00:05<16:33, 4.26it/s]
22
  1%| | 22/4250 [00:05<15:53, 4.44it/s]
23
  1%| | 23/4250 [00:06<15:25, 4.57it/s]
24
  1%| | 24/4250 [00:06<15:15, 4.62it/s]
25
  1%| | 25/4250 [00:06<16:44, 4.21it/s]
26
  1%| | 26/4250 [00:06<15:11, 4.64it/s]
27
  1%| | 27/4250 [00:06<15:12, 4.63it/s]
28
  1%| | 28/4250 [00:07<13:58, 5.03it/s]
29
  1%| | 29/4250 [00:07<13:39, 5.15it/s]
30
  1%| | 30/4250 [00:07<16:45, 4.20it/s]
31
  1%| | 31/4250 [00:07<15:42, 4.48it/s]
32
  1%| | 32/4250 [00:08<16:49, 4.18it/s]
33
  1%| | 33/4250 [00:08<16:15, 4.32it/s]
34
  1%| | 34/4250 [00:08<16:30, 4.26it/s]
35
  1%| | 35/4250 [00:08<15:50, 4.44it/s]
36
  1%| | 36/4250 [00:08<16:12, 4.34it/s]
37
  1%| | 37/4250 [00:09<15:15, 4.60it/s]
38
  1%| | 38/4250 [00:09<15:04, 4.65it/s]
39
  1%| | 39/4250 [00:09<16:26, 4.27it/s]
40
  1%| | 40/4250 [00:09<15:17, 4.59it/s]
41
  1%| | 41/4250 [00:09<14:58, 4.68it/s]
42
  1%| | 42/4250 [00:10<15:05, 4.65it/s]
43
  1%| | 43/4250 [00:10<17:31, 4.00it/s]
44
  1%| | 44/4250 [00:11<27:49, 2.52it/s]
45
  1%| | 45/4250 [00:11<23:40, 2.96it/s]
46
  1%| | 46/4250 [00:11<22:36, 3.10it/s]
47
  1%| | 47/4250 [00:11<19:55, 3.51it/s]
48
  1%| | 48/4250 [00:12<18:43, 3.74it/s]
49
  1%| | 49/4250 [00:12<16:10, 4.33it/s]
50
  1%| | 50/4250 [00:12<16:42, 4.19it/s]
51
  1%| | 51/4250 [00:12<16:56, 4.13it/s]
52
  1%| | 52/4250 [00:13<16:31, 4.23it/s]
53
  1%| | 53/4250 [00:13<17:15, 4.05it/s]
54
  1%|▏ | 54/4250 [00:13<20:55, 3.34it/s]
55
  1%|▏ | 55/4250 [00:13<19:02, 3.67it/s]
56
  1%|▏ | 56/4250 [00:14<18:24, 3.80it/s]
57
  1%|▏ | 57/4250 [00:14<17:03, 4.10it/s]
58
  1%|▏ | 58/4250 [00:14<15:38, 4.47it/s]
59
  1%|▏ | 59/4250 [00:14<15:03, 4.64it/s]
60
  1%|▏ | 60/4250 [00:14<14:48, 4.72it/s]
61
  1%|▏ | 61/4250 [00:15<15:50, 4.41it/s]
62
  1%|▏ | 62/4250 [00:15<15:56, 4.38it/s]
63
  1%|▏ | 63/4250 [00:15<15:00, 4.65it/s]
64
  2%|▏ | 64/4250 [00:15<14:46, 4.72it/s]
65
  2%|▏ | 65/4250 [00:16<15:31, 4.49it/s]
66
  2%|▏ | 66/4250 [00:16<16:36, 4.20it/s]
67
  2%|▏ | 67/4250 [00:16<16:31, 4.22it/s]
68
  2%|▏ | 68/4250 [00:17<20:45, 3.36it/s]
69
  2%|▏ | 69/4250 [00:17<18:33, 3.75it/s]
70
  2%|▏ | 70/4250 [00:17<19:56, 3.49it/s]
71
  2%|▏ | 71/4250 [00:17<18:58, 3.67it/s]
72
  2%|▏ | 72/4250 [00:18<17:43, 3.93it/s]
73
  2%|▏ | 73/4250 [00:18<17:15, 4.03it/s]
74
  2%|▏ | 74/4250 [00:18<17:31, 3.97it/s]
75
  2%|▏ | 75/4250 [00:18<15:34, 4.47it/s]
76
  2%|▏ | 76/4250 [00:18<16:08, 4.31it/s]
77
  2%|▏ | 77/4250 [00:19<15:14, 4.56it/s]
78
  2%|▏ | 78/4250 [00:19<16:02, 4.33it/s]
79
  2%|▏ | 79/4250 [00:19<16:08, 4.31it/s]
80
  2%|▏ | 80/4250 [00:19<16:04, 4.32it/s]
81
  2%|▏ | 81/4250 [00:20<16:05, 4.32it/s]
82
  2%|▏ | 82/4250 [00:20<15:07, 4.59it/s]
83
  2%|▏ | 83/4250 [00:20<17:05, 4.06it/s]
84
  2%|▏ | 84/4250 [00:20<15:34, 4.46it/s]
85
  2%|▏ | 85/4250 [00:20<15:15, 4.55it/s]
86
  2%|▏ | 86/4250 [00:21<15:04, 4.60it/s]
87
  2%|▏ | 87/4250 [00:21<15:05, 4.60it/s]
88
  2%|▏ | 88/4250 [00:21<13:41, 5.07it/s]
89
  2%|▏ | 89/4250 [00:21<13:52, 5.00it/s]
90
  2%|▏ | 90/4250 [00:22<16:13, 4.27it/s]
91
  2%|▏ | 91/4250 [00:22<14:34, 4.76it/s]
92
  2%|▏ | 92/4250 [00:22<14:02, 4.94it/s]
93
  2%|▏ | 93/4250 [00:22<13:17, 5.21it/s]
94
  2%|▏ | 94/4250 [00:22<13:54, 4.98it/s]
95
  2%|▏ | 95/4250 [00:23<15:06, 4.59it/s]
96
  2%|▏ | 96/4250 [00:23<16:55, 4.09it/s]
97
  2%|▏ | 97/4250 [00:23<15:53, 4.36it/s]
98
  2%|▏ | 98/4250 [00:23<14:45, 4.69it/s]
99
  2%|▏ | 99/4250 [00:23<14:08, 4.89it/s]
100
  2%|▏ | 100/4250 [00:24<14:28, 4.78it/s]
101
  2%|▏ | 101/4250 [00:24<18:01, 3.84it/s]
102
  2%|▏ | 102/4250 [00:24<20:25, 3.38it/s]
103
  2%|▏ | 103/4250 [00:25<18:29, 3.74it/s]
104
  2%|▏ | 104/4250 [00:25<16:58, 4.07it/s]
105
  2%|▏ | 105/4250 [00:25<16:37, 4.15it/s]
106
  2%|▏ | 106/4250 [00:25<16:08, 4.28it/s]
107
  3%|▎ | 107/4250 [00:25<15:55, 4.34it/s]
108
  3%|▎ | 108/4250 [00:26<19:06, 3.61it/s]
109
  3%|▎ | 109/4250 [00:26<18:34, 3.72it/s]
110
  3%|▎ | 110/4250 [00:26<17:11, 4.01it/s]
111
  3%|▎ | 111/4250 [00:26<15:34, 4.43it/s]
112
  3%|▎ | 112/4250 [00:27<16:09, 4.27it/s]
113
  3%|▎ | 113/4250 [00:27<15:47, 4.37it/s]
114
  3%|▎ | 114/4250 [00:27<14:36, 4.72it/s]
115
  3%|▎ | 115/4250 [00:27<14:04, 4.89it/s]
116
  3%|▎ | 116/4250 [00:28<15:35, 4.42it/s]
117
  3%|▎ | 117/4250 [00:28<15:07, 4.55it/s]
118
  3%|▎ | 118/4250 [00:28<14:42, 4.68it/s]
119
  3%|▎ | 119/4250 [00:28<15:06, 4.56it/s]
120
  3%|▎ | 120/4250 [00:28<14:14, 4.83it/s]
121
  3%|▎ | 121/4250 [00:29<14:18, 4.81it/s]
122
  3%|▎ | 122/4250 [00:29<20:13, 3.40it/s]
123
  3%|▎ | 123/4250 [00:29<18:31, 3.71it/s]
124
  3%|▎ | 124/4250 [00:30<16:53, 4.07it/s]
125
  3%|▎ | 125/4250 [00:30<17:05, 4.02it/s]
126
  3%|▎ | 126/4250 [00:30<15:30, 4.43it/s]
127
  3%|▎ | 127/4250 [00:30<14:48, 4.64it/s]
128
  3%|▎ | 128/4250 [00:30<14:12, 4.83it/s]
129
  3%|▎ | 129/4250 [00:31<17:31, 3.92it/s]
130
  3%|▎ | 130/4250 [00:31<16:58, 4.04it/s]
131
  3%|▎ | 131/4250 [00:31<15:29, 4.43it/s]
132
  3%|▎ | 132/4250 [00:31<14:37, 4.69it/s]
133
  3%|▎ | 133/4250 [00:32<15:37, 4.39it/s]
134
  3%|▎ | 134/4250 [00:32<15:17, 4.48it/s]
135
  3%|▎ | 135/4250 [00:32<15:06, 4.54it/s]
136
  3%|▎ | 136/4250 [00:32<15:10, 4.52it/s]
137
  3%|▎ | 137/4250 [00:32<14:56, 4.59it/s]
138
  3%|▎ | 138/4250 [00:33<14:26, 4.74it/s]
139
  3%|▎ | 139/4250 [00:33<14:55, 4.59it/s]
140
  3%|▎ | 140/4250 [00:33<15:52, 4.31it/s]
141
  3%|▎ | 141/4250 [00:33<15:57, 4.29it/s]
142
  3%|▎ | 142/4250 [00:33<14:58, 4.57it/s]
143
  3%|▎ | 143/4250 [00:34<14:36, 4.68it/s]
144
  3%|▎ | 144/4250 [00:34<14:25, 4.74it/s]
145
  3%|▎ | 145/4250 [00:34<16:56, 4.04it/s]
146
  3%|▎ | 146/4250 [00:35<22:12, 3.08it/s]
147
  3%|▎ | 147/4250 [00:35<19:47, 3.45it/s]
148
  3%|▎ | 148/4250 [00:35<19:39, 3.48it/s]
149
  4%|▎ | 149/4250 [00:35<18:55, 3.61it/s]
150
  4%|▎ | 150/4250 [00:36<17:36, 3.88it/s]
151
  4%|▎ | 151/4250 [00:36<16:08, 4.23it/s]
152
  4%|▎ | 152/4250 [00:36<17:00, 4.02it/s]
153
  4%|▎ | 153/4250 [00:36<16:21, 4.18it/s]
154
  4%|▎ | 154/4250 [00:37<15:19, 4.46it/s]
155
  4%|▎ | 155/4250 [00:37<14:26, 4.72it/s]
156
  4%|▎ | 156/4250 [00:37<14:44, 4.63it/s]
157
  4%|▎ | 157/4250 [00:37<14:24, 4.73it/s]
158
  4%|▎ | 158/4250 [00:38<17:32, 3.89it/s]
159
  4%|▎ | 159/4250 [00:38<18:32, 3.68it/s]
160
  4%|▍ | 160/4250 [00:38<16:57, 4.02it/s]
161
  4%|▍ | 161/4250 [00:38<16:00, 4.26it/s]
162
  4%|▍ | 162/4250 [00:38<14:56, 4.56it/s]
163
  4%|▍ | 163/4250 [00:39<14:48, 4.60it/s]
164
  4%|▍ | 164/4250 [00:39<18:40, 3.65it/s]
165
  4%|▍ | 165/4250 [00:39<18:13, 3.73it/s]
166
  4%|▍ | 166/4250 [00:40<18:24, 3.70it/s]
167
  4%|▍ | 167/4250 [00:40<17:28, 3.89it/s]
168
  4%|▍ | 168/4250 [00:40<15:55, 4.27it/s]
169
  4%|▍ | 169/4250 [00:40<15:32, 4.38it/s]
170
  4%|▍ | 170/4250 [00:40<15:28, 4.40it/s]
171
  4%|▍ | 171/4250 [00:41<15:59, 4.25it/s]
172
  4%|▍ | 172/4250 [00:41<15:11, 4.47it/s]
173
  4%|▍ | 173/4250 [00:41<18:21, 3.70it/s]
174
  4%|▍ | 174/4250 [00:41<16:38, 4.08it/s]
175
  4%|▍ | 175/4250 [00:42<19:24, 3.50it/s]
176
  4%|▍ | 176/4250 [00:42<19:20, 3.51it/s]
177
  4%|▍ | 177/4250 [00:42<17:03, 3.98it/s]
178
  4%|▍ | 178/4250 [00:42<16:06, 4.21it/s]
179
  4%|▍ | 179/4250 [00:43<15:38, 4.34it/s]
180
  4%|▍ | 180/4250 [00:43<14:25, 4.70it/s]
181
  4%|▍ | 181/4250 [00:43<14:35, 4.65it/s]
182
  4%|▍ | 182/4250 [00:43<17:09, 3.95it/s]
183
  4%|▍ | 183/4250 [00:44<16:33, 4.09it/s]
184
  4%|▍ | 184/4250 [00:44<16:26, 4.12it/s]
185
  4%|▍ | 185/4250 [00:44<15:05, 4.49it/s]
186
  4%|▍ | 186/4250 [00:44<14:37, 4.63it/s]
187
  4%|▍ | 187/4250 [00:44<13:45, 4.92it/s]
188
  4%|▍ | 188/4250 [00:45<14:40, 4.61it/s]
189
  4%|▍ | 189/4250 [00:45<14:11, 4.77it/s]
190
  4%|▍ | 190/4250 [00:45<13:43, 4.93it/s]
191
  4%|▍ | 191/4250 [00:45<14:21, 4.71it/s]
192
  5%|▍ | 192/4250 [00:46<14:09, 4.78it/s]
193
  5%|▍ | 193/4250 [00:46<13:53, 4.87it/s]
194
  5%|▍ | 194/4250 [00:46<16:40, 4.06it/s]
195
  5%|▍ | 195/4250 [00:46<15:35, 4.33it/s]
196
  5%|▍ | 196/4250 [00:46<14:25, 4.69it/s]
197
  5%|▍ | 197/4250 [00:47<14:55, 4.53it/s]
198
  5%|▍ | 198/4250 [00:47<15:18, 4.41it/s]
199
  5%|▍ | 199/4250 [00:47<15:25, 4.38it/s]
200
  5%|▍ | 200/4250 [00:47<15:34, 4.33it/s]
201
  5%|▍ | 201/4250 [00:48<15:13, 4.43it/s]
202
  5%|▍ | 202/4250 [00:48<15:31, 4.35it/s]
203
  5%|▍ | 203/4250 [00:48<14:25, 4.67it/s]
204
  5%|▍ | 204/4250 [00:48<13:45, 4.90it/s]
205
  5%|▍ | 205/4250 [00:48<14:42, 4.58it/s]
206
  5%|▍ | 206/4250 [00:49<14:27, 4.66it/s]
207
  5%|▍ | 207/4250 [00:49<16:42, 4.03it/s]
208
  5%|▍ | 208/4250 [00:49<15:16, 4.41it/s]
209
  5%|▍ | 209/4250 [00:49<15:09, 4.44it/s]
210
  5%|▍ | 210/4250 [00:50<13:59, 4.81it/s]
211
  5%|▍ | 211/4250 [00:50<15:05, 4.46it/s]
212
  5%|▍ | 212/4250 [00:50<14:22, 4.68it/s]
213
  5%|▌ | 213/4250 [00:50<17:28, 3.85it/s]
214
  5%|▌ | 214/4250 [00:50<15:20, 4.38it/s]
215
  5%|▌ | 215/4250 [00:51<15:14, 4.41it/s]
216
  5%|▌ | 216/4250 [00:51<19:02, 3.53it/s]
217
  5%|▌ | 217/4250 [00:51<17:01, 3.95it/s]
218
  5%|▌ | 218/4250 [00:52<15:54, 4.22it/s]
219
  5%|▌ | 219/4250 [00:52<16:32, 4.06it/s]
220
  5%|▌ | 220/4250 [00:52<16:39, 4.03it/s]
221
  5%|▌ | 221/4250 [00:52<18:42, 3.59it/s]
222
  5%|▌ | 222/4250 [00:53<20:01, 3.35it/s]
223
  5%|▌ | 223/4250 [00:53<18:25, 3.64it/s]
224
  5%|▌ | 224/4250 [00:53<16:30, 4.06it/s]
225
  5%|▌ | 225/4250 [00:53<15:06, 4.44it/s]
226
  5%|▌ | 226/4250 [00:53<14:23, 4.66it/s]
227
  5%|▌ | 227/4250 [00:54<14:00, 4.79it/s]
228
  5%|▌ | 228/4250 [00:54<13:46, 4.87it/s]
229
  5%|▌ | 229/4250 [00:54<12:57, 5.17it/s]
230
  5%|▌ | 230/4250 [00:54<14:21, 4.66it/s]
231
  5%|▌ | 231/4250 [00:54<13:45, 4.87it/s]
232
  5%|▌ | 232/4250 [00:55<13:36, 4.92it/s]
233
  5%|▌ | 233/4250 [00:55<13:25, 4.99it/s]
234
  6%|▌ | 234/4250 [00:55<15:43, 4.26it/s]
235
  6%|▌ | 235/4250 [00:56<20:40, 3.24it/s]
236
  6%|▌ | 236/4250 [00:56<21:54, 3.05it/s]
237
  6%|▌ | 237/4250 [00:56<19:46, 3.38it/s]
238
  6%|▌ | 238/4250 [00:56<17:28, 3.83it/s]
239
  6%|▌ | 239/4250 [00:57<17:32, 3.81it/s]
240
  6%|▌ | 240/4250 [00:57<20:00, 3.34it/s]
241
  6%|▌ | 241/4250 [00:57<18:01, 3.71it/s]
242
  6%|▌ | 242/4250 [00:58<16:32, 4.04it/s]
243
  6%|▌ | 243/4250 [00:58<16:03, 4.16it/s]
244
  6%|▌ | 244/4250 [00:58<16:03, 4.16it/s]
245
  6%|▌ | 245/4250 [00:58<16:00, 4.17it/s]
246
  6%|▌ | 246/4250 [00:59<24:58, 2.67it/s]
247
  6%|▌ | 247/4250 [00:59<22:20, 2.99it/s]
248
  6%|▌ | 248/4250 [00:59<19:39, 3.39it/s]
249
  6%|▌ | 249/4250 [01:00<23:51, 2.79it/s]
250
  6%|▌ | 250/4250 [01:00<25:16, 2.64it/s]
251
  6%|▌ | 251/4250 [01:01<22:47, 2.92it/s]
252
  6%|▌ | 252/4250 [01:01<19:31, 3.41it/s]
253
  6%|▌ | 253/4250 [01:01<17:12, 3.87it/s]
254
  6%|▌ | 254/4250 [01:01<17:52, 3.73it/s]
255
  6%|▌ | 255/4250 [01:01<18:09, 3.67it/s]
256
  6%|▌ | 256/4250 [01:02<17:54, 3.72it/s]
257
  6%|▌ | 257/4250 [01:02<16:41, 3.99it/s]
258
  6%|▌ | 258/4250 [01:02<15:34, 4.27it/s]
259
  6%|▌ | 259/4250 [01:02<14:26, 4.61it/s]
260
  6%|▌ | 260/4250 [01:03<15:43, 4.23it/s]
261
  6%|▌ | 261/4250 [01:03<15:00, 4.43it/s]
262
  6%|▌ | 262/4250 [01:03<13:51, 4.79it/s]
263
  6%|▌ | 263/4250 [01:03<12:53, 5.15it/s]
264
  6%|▌ | 264/4250 [01:03<14:52, 4.47it/s]
265
  6%|▌ | 265/4250 [01:04<15:49, 4.20it/s]
266
  6%|▋ | 266/4250 [01:04<15:45, 4.21it/s]
267
  6%|▋ | 267/4250 [01:04<15:43, 4.22it/s]
268
  6%|▋ | 268/4250 [01:04<14:46, 4.49it/s]
269
  6%|▋ | 269/4250 [01:05<14:19, 4.63it/s]
270
  6%|▋ | 270/4250 [01:05<16:42, 3.97it/s]
271
  6%|▋ | 271/4250 [01:05<15:07, 4.39it/s]
272
  6%|▋ | 272/4250 [01:05<14:36, 4.54it/s]
273
  6%|▋ | 273/4250 [01:05<14:08, 4.69it/s]
274
  6%|▋ | 274/4250 [01:06<13:43, 4.83it/s]
275
  6%|▋ | 275/4250 [01:06<13:31, 4.90it/s]
276
  6%|▋ | 276/4250 [01:06<12:57, 5.11it/s]
277
  7%|▋ | 277/4250 [01:06<13:38, 4.85it/s]
278
  7%|▋ | 278/4250 [01:06<13:30, 4.90it/s]
279
  7%|▋ | 279/4250 [01:07<14:07, 4.68it/s]
280
  7%|▋ | 280/4250 [01:07<17:31, 3.77it/s]
281
  7%|▋ | 281/4250 [01:07<16:05, 4.11it/s]
282
  7%|▋ | 282/4250 [01:07<15:57, 4.14it/s]
283
  7%|▋ | 283/4250 [01:08<14:59, 4.41it/s]
284
  7%|▋ | 284/4250 [01:08<14:31, 4.55it/s]
285
  7%|▋ | 285/4250 [01:08<14:42, 4.49it/s]
286
  7%|▋ | 286/4250 [01:08<14:16, 4.63it/s]
287
  7%|▋ | 287/4250 [01:09<15:52, 4.16it/s]
288
  7%|▋ | 288/4250 [01:09<14:43, 4.48it/s]
289
  7%|▋ | 289/4250 [01:09<14:54, 4.43it/s]
290
  7%|▋ | 290/4250 [01:09<14:03, 4.69it/s]
291
  7%|▋ | 291/4250 [01:09<14:55, 4.42it/s]
292
  7%|▋ | 292/4250 [01:10<14:00, 4.71it/s]
293
  7%|▋ | 293/4250 [01:10<13:17, 4.96it/s]
294
  7%|▋ | 294/4250 [01:10<12:52, 5.12it/s]
295
  7%|▋ | 295/4250 [01:10<14:15, 4.63it/s]
296
  7%|▋ | 296/4250 [01:10<13:24, 4.92it/s]
297
  7%|▋ | 297/4250 [01:11<13:06, 5.02it/s]
298
  7%|▋ | 298/4250 [01:11<13:03, 5.05it/s]
299
  7%|▋ | 299/4250 [01:11<13:42, 4.80it/s]
300
  7%|▋ | 300/4250 [01:11<15:32, 4.24it/s]
301
  7%|▋ | 301/4250 [01:12<14:54, 4.41it/s]
302
  7%|▋ | 302/4250 [01:12<16:06, 4.08it/s]
303
  7%|▋ | 303/4250 [01:12<17:16, 3.81it/s]
304
  7%|▋ | 304/4250 [01:12<16:34, 3.97it/s]
305
  7%|▋ | 305/4250 [01:13<16:19, 4.03it/s]
306
  7%|▋ | 306/4250 [01:13<18:08, 3.62it/s]
307
  7%|▋ | 307/4250 [01:13<16:02, 4.09it/s]
308
  7%|▋ | 308/4250 [01:13<15:13, 4.31it/s]
309
  7%|▋ | 309/4250 [01:14<15:00, 4.38it/s]
310
  7%|▋ | 310/4250 [01:14<14:22, 4.57it/s]
311
  7%|▋ | 311/4250 [01:14<13:57, 4.70it/s]
312
  7%|▋ | 312/4250 [01:14<16:54, 3.88it/s]
313
  7%|▋ | 313/4250 [01:15<16:11, 4.05it/s]
314
  7%|▋ | 314/4250 [01:15<16:16, 4.03it/s]
315
  7%|▋ | 315/4250 [01:15<15:12, 4.31it/s]
316
  7%|▋ | 316/4250 [01:15<14:32, 4.51it/s]
317
  7%|▋ | 317/4250 [01:15<15:42, 4.17it/s]
318
  7%|▋ | 318/4250 [01:16<15:25, 4.25it/s]
319
  8%|▊ | 319/4250 [01:16<15:46, 4.15it/s]
320
  8%|▊ | 320/4250 [01:16<16:24, 3.99it/s]
321
  8%|▊ | 321/4250 [01:16<15:02, 4.35it/s]
322
  8%|▊ | 322/4250 [01:17<14:34, 4.49it/s]
323
  8%|▊ | 323/4250 [01:17<13:27, 4.87it/s]
324
  8%|▊ | 324/4250 [01:17<13:12, 4.95it/s]
325
  8%|▊ | 325/4250 [01:17<13:20, 4.90it/s]
326
  8%|▊ | 326/4250 [01:17<12:56, 5.06it/s]
327
  8%|▊ | 327/4250 [01:18<12:33, 5.21it/s]
328
  8%|▊ | 328/4250 [01:18<16:34, 3.94it/s]
329
  8%|▊ | 329/4250 [01:18<17:43, 3.69it/s]
330
  8%|▊ | 330/4250 [01:18<16:38, 3.92it/s]
331
  8%|▊ | 331/4250 [01:19<16:14, 4.02it/s]
332
  8%|▊ | 332/4250 [01:19<15:09, 4.31it/s]
333
  8%|▊ | 333/4250 [01:19<14:14, 4.59it/s]
334
  8%|▊ | 334/4250 [01:19<14:32, 4.49it/s]
335
  8%|▊ | 335/4250 [01:20<15:50, 4.12it/s]
336
  8%|▊ | 336/4250 [01:20<15:37, 4.17it/s]
337
  8%|▊ | 337/4250 [01:20<14:28, 4.50it/s]
338
  8%|▊ | 338/4250 [01:20<13:49, 4.72it/s]
339
  8%|▊ | 339/4250 [01:21<17:39, 3.69it/s]
340
  8%|▊ | 340/4250 [01:21<16:23, 3.98it/s]
341
  8%|▊ | 341/4250 [01:21<17:22, 3.75it/s]
342
  8%|▊ | 342/4250 [01:21<15:44, 4.14it/s]
343
  8%|▊ | 343/4250 [01:22<16:07, 4.04it/s]
344
  8%|▊ | 344/4250 [01:22<15:55, 4.09it/s]
345
  8%|▊ | 345/4250 [01:22<17:27, 3.73it/s]
346
  8%|▊ | 346/4250 [01:22<18:31, 3.51it/s]
347
  8%|▊ | 347/4250 [01:23<17:10, 3.79it/s]
348
  8%|▊ | 348/4250 [01:23<18:02, 3.61it/s]
349
  8%|▊ | 349/4250 [01:23<16:20, 3.98it/s]
350
  8%|▊ | 350/4250 [01:23<16:04, 4.04it/s]
351
  8%|▊ | 351/4250 [01:24<15:27, 4.20it/s]
352
  8%|▊ | 352/4250 [01:24<15:43, 4.13it/s]
353
  8%|▊ | 353/4250 [01:24<14:03, 4.62it/s]
354
  8%|▊ | 354/4250 [01:24<14:09, 4.58it/s]
355
  8%|▊ | 355/4250 [01:25<15:16, 4.25it/s]
356
  8%|▊ | 356/4250 [01:25<14:47, 4.39it/s]
357
  8%|▊ | 357/4250 [01:25<13:58, 4.64it/s]
358
  8%|▊ | 358/4250 [01:25<13:27, 4.82it/s]
359
  8%|▊ | 359/4250 [01:25<12:49, 5.06it/s]
360
  8%|▊ | 360/4250 [01:26<14:44, 4.40it/s]
361
  8%|▊ | 361/4250 [01:26<14:12, 4.56it/s]
362
  9%|▊ | 362/4250 [01:26<13:27, 4.81it/s]
363
  9%|▊ | 363/4250 [01:26<14:20, 4.52it/s]
364
  9%|▊ | 364/4250 [01:27<20:33, 3.15it/s]
365
  9%|▊ | 365/4250 [01:27<19:16, 3.36it/s]
366
  9%|▊ | 366/4250 [01:27<18:53, 3.43it/s]
367
  9%|▊ | 367/4250 [01:27<17:08, 3.77it/s]
368
  9%|▊ | 368/4250 [01:28<15:11, 4.26it/s]
369
  9%|▊ | 369/4250 [01:28<16:08, 4.01it/s]
370
  9%|▊ | 370/4250 [01:28<15:07, 4.27it/s]
371
  9%|▊ | 371/4250 [01:28<14:16, 4.53it/s]
372
  9%|▉ | 372/4250 [01:29<13:49, 4.67it/s]
373
  9%|▉ | 373/4250 [01:29<15:45, 4.10it/s]
374
  9%|▉ | 374/4250 [01:29<15:17, 4.22it/s]
375
  9%|▉ | 375/4250 [01:30<25:49, 2.50it/s]
376
  9%|▉ | 376/4250 [01:30<22:04, 2.92it/s]
377
  9%|▉ | 377/4250 [01:30<19:35, 3.30it/s]
378
  9%|▉ | 378/4250 [01:30<17:28, 3.69it/s]
379
  9%|▉ | 379/4250 [01:31<15:53, 4.06it/s]
380
  9%|▉ | 380/4250 [01:31<15:12, 4.24it/s]
381
  9%|▉ | 381/4250 [01:31<14:26, 4.47it/s]
382
  9%|▉ | 382/4250 [01:31<13:56, 4.62it/s]
383
  9%|▉ | 383/4250 [01:32<15:15, 4.22it/s]
384
  9%|▉ | 384/4250 [01:32<14:11, 4.54it/s]
385
  9%|▉ | 385/4250 [01:32<16:37, 3.87it/s]
386
  9%|▉ | 386/4250 [01:32<18:49, 3.42it/s]
387
  9%|▉ | 387/4250 [01:33<16:38, 3.87it/s]
388
  9%|▉ | 388/4250 [01:33<15:53, 4.05it/s]
389
  9%|▉ | 389/4250 [01:33<15:09, 4.24it/s]
390
  9%|▉ | 390/4250 [01:33<15:50, 4.06it/s]
391
  9%|▉ | 391/4250 [01:33<14:30, 4.44it/s]
392
  9%|▉ | 392/4250 [01:34<14:35, 4.41it/s]
393
  9%|▉ | 393/4250 [01:34<13:41, 4.70it/s]
394
  9%|▉ | 394/4250 [01:34<13:40, 4.70it/s]
395
  9%|▉ | 395/4250 [01:34<13:01, 4.93it/s]
396
  9%|▉ | 396/4250 [01:34<12:48, 5.02it/s]
397
  9%|▉ | 397/4250 [01:35<13:29, 4.76it/s]
398
  9%|▉ | 398/4250 [01:35<14:09, 4.53it/s]
399
  9%|▉ | 399/4250 [01:35<13:17, 4.83it/s]
400
  9%|▉ | 400/4250 [01:35<14:40, 4.37it/s]
401
  9%|▉ | 401/4250 [01:36<14:07, 4.54it/s]
402
  9%|▉ | 402/4250 [01:36<15:53, 4.04it/s]
403
  9%|▉ | 403/4250 [01:36<15:28, 4.14it/s]
404
  10%|▉ | 404/4250 [01:36<14:42, 4.36it/s]
405
  10%|▉ | 405/4250 [01:37<15:18, 4.18it/s]
406
  10%|▉ | 406/4250 [01:37<15:58, 4.01it/s]
407
  10%|▉ | 407/4250 [01:38<23:05, 2.77it/s]
408
  10%|▉ | 408/4250 [01:38<21:24, 2.99it/s]
409
  10%|▉ | 409/4250 [01:38<18:33, 3.45it/s]
410
  10%|▉ | 410/4250 [01:38<17:36, 3.63it/s]
411
  10%|▉ | 411/4250 [01:38<17:20, 3.69it/s]
412
  10%|▉ | 412/4250 [01:39<18:25, 3.47it/s]
413
  10%|▉ | 413/4250 [01:39<17:11, 3.72it/s]
414
  10%|▉ | 414/4250 [01:39<16:35, 3.86it/s]
415
  10%|▉ | 415/4250 [01:39<15:19, 4.17it/s]
416
  10%|▉ | 416/4250 [01:40<14:55, 4.28it/s]
417
  10%|▉ | 417/4250 [01:40<14:53, 4.29it/s]
418
  10%|▉ | 418/4250 [01:40<13:45, 4.64it/s]
419
  10%|▉ | 419/4250 [01:40<14:57, 4.27it/s]
420
  10%|▉ | 420/4250 [01:41<15:01, 4.25it/s]
421
  10%|▉ | 421/4250 [01:41<15:29, 4.12it/s]
422
  10%|▉ | 422/4250 [01:41<15:00, 4.25it/s]
423
  10%|▉ | 423/4250 [01:41<13:52, 4.60it/s]
424
  10%|▉ | 424/4250 [01:42<14:35, 4.37it/s]
425
  10%|█ | 425/4250 [01:42<14:28, 4.40it/s][INFO|trainer.py:805] 2024-08-30 19:58:29,112 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
 
 
 
 
 
 
426
  0%| | 0/852 [00:00<?, ?it/s]
 
427
  1%| | 10/852 [00:00<00:09, 89.20it/s]
 
428
  2%|▏ | 19/852 [00:00<00:10, 78.42it/s]
 
429
  3%|▎ | 27/852 [00:00<00:10, 78.13it/s]
 
430
  4%|▍ | 35/852 [00:00<00:10, 77.05it/s]
 
431
  5%|▌ | 43/852 [00:00<00:10, 77.06it/s]
 
432
  6%|▌ | 51/852 [00:00<00:10, 77.66it/s]
 
433
  7%|▋ | 60/852 [00:00<00:10, 78.68it/s]
 
434
  8%|▊ | 68/852 [00:00<00:10, 76.32it/s]
 
435
  9%|▉ | 76/852 [00:00<00:10, 76.81it/s]
 
436
  10%|▉ | 84/852 [00:01<00:10, 76.39it/s]
 
437
  11%|█ | 92/852 [00:01<00:09, 76.10it/s]
 
438
  12%|█▏ | 100/852 [00:01<00:09, 75.52it/s]
 
439
  13%|█▎ | 108/852 [00:01<00:09, 75.37it/s]
 
440
  14%|█▎ | 116/852 [00:01<00:09, 75.61it/s]
 
441
  15%|█▍ | 125/852 [00:01<00:09, 77.44it/s]
 
442
  16%|█▌ | 133/852 [00:01<00:09, 74.02it/s]
 
443
  17%|█▋ | 141/852 [00:01<00:09, 74.52it/s]
 
444
  17%|█▋ | 149/852 [00:01<00:09, 74.17it/s]
 
445
  19%|█▊ | 158/852 [00:02<00:09, 76.52it/s]
 
446
  19%|█▉ | 166/852 [00:02<00:08, 76.93it/s]
 
447
  20%|██ | 174/852 [00:02<00:08, 77.40it/s]
 
448
  21%|██▏ | 182/852 [00:02<00:08, 77.81it/s]
 
449
  22%|██▏ | 191/852 [00:02<00:08, 78.32it/s]
 
450
  23%|██▎ | 199/852 [00:02<00:08, 78.40it/s]
 
451
  24%|██▍ | 207/852 [00:02<00:08, 77.19it/s]
 
452
  25%|██▌ | 215/852 [00:02<00:08, 76.20it/s]
 
453
  26%|██▌ | 223/852 [00:02<00:08, 77.17it/s]
 
454
  27%|██▋ | 231/852 [00:03<00:07, 77.72it/s]
 
455
  28%|██▊ | 239/852 [00:03<00:07, 76.96it/s]
 
456
  29%|██▉ | 247/852 [00:03<00:08, 75.20it/s]
 
457
  30%|███ | 256/852 [00:03<00:07, 77.34it/s]
 
458
  31%|███ | 264/852 [00:03<00:07, 76.46it/s]
 
459
  32%|███▏ | 272/852 [00:03<00:07, 73.76it/s]
 
460
  33%|███▎ | 280/852 [00:03<00:07, 74.90it/s]
 
461
  34%|███▍ | 288/852 [00:03<00:07, 74.86it/s]
 
462
  35%|███▍ | 296/852 [00:03<00:07, 76.06it/s]
 
463
  36%|███▌ | 305/852 [00:03<00:07, 77.75it/s]
 
464
  37%|███▋ | 313/852 [00:04<00:07, 75.79it/s]
 
465
  38%|███▊ | 321/852 [00:04<00:06, 76.62it/s]
 
466
  39%|███▊ | 329/852 [00:04<00:06, 76.40it/s]
 
467
  40%|███▉ | 337/852 [00:04<00:06, 76.71it/s]
 
468
  41%|████ | 346/852 [00:04<00:06, 77.07it/s]
 
469
  42%|████▏ | 354/852 [00:04<00:06, 76.21it/s]
 
470
  42%|████▏ | 362/852 [00:04<00:06, 76.53it/s]
 
471
  43%|████▎ | 370/852 [00:04<00:06, 76.40it/s]
 
472
  44%|████▍ | 378/852 [00:04<00:06, 76.51it/s]
 
473
  45%|████▌ | 386/852 [00:05<00:06, 75.64it/s]
 
474
  46%|████▌ | 394/852 [00:05<00:05, 76.44it/s]
 
475
  47%|████▋ | 402/852 [00:05<00:05, 76.42it/s]
 
476
  48%|████▊ | 410/852 [00:05<00:06, 72.56it/s]
 
477
  49%|████▉ | 418/852 [00:05<00:05, 74.22it/s]
 
478
  50%|█████ | 426/852 [00:05<00:05, 73.15it/s]
 
479
  51%|█████ | 435/852 [00:05<00:05, 74.99it/s]
 
480
  52%|█████▏ | 443/852 [00:05<00:05, 76.20it/s]
 
481
  53%|█████▎ | 451/852 [00:05<00:05, 76.98it/s]
 
482
  54%|█████▍ | 459/852 [00:06<00:05, 77.54it/s]
 
483
  55%|█████▍ | 467/852 [00:06<00:05, 74.54it/s]
 
484
  56%|█████▌ | 475/852 [00:06<00:05, 71.46it/s]
 
485
  57%|█████▋ | 483/852 [00:06<00:05, 72.27it/s]
 
486
  58%|█████▊ | 491/852 [00:06<00:04, 72.68it/s]
 
487
  59%|█████▊ | 499/852 [00:06<00:04, 73.96it/s]
 
488
  60%|█████▉ | 507/852 [00:06<00:04, 73.71it/s]
 
489
  60%|██████ | 515/852 [00:06<00:04, 74.07it/s]
 
490
  61%|██████▏ | 523/852 [00:06<00:04, 73.58it/s]
 
491
  62%|██████▏ | 531/852 [00:06<00:04, 74.92it/s]
 
492
  63%|██████▎ | 540/852 [00:07<00:04, 76.68it/s]
 
493
  64%|██████▍ | 548/852 [00:07<00:03, 77.09it/s]
 
494
  65%|██████▌ | 556/852 [00:07<00:03, 74.59it/s]
 
495
  66%|██████▌ | 564/852 [00:07<00:03, 75.78it/s]
 
496
  67%|██████▋ | 572/852 [00:07<00:03, 76.36it/s]
 
497
  68%|██████▊ | 580/852 [00:07<00:03, 77.03it/s]
 
498
  69%|██████▉ | 588/852 [00:07<00:03, 76.20it/s]
 
499
  70%|███████ | 597/852 [00:07<00:03, 77.37it/s]
 
500
  71%|███████ | 605/852 [00:07<00:03, 77.54it/s]
 
501
  72%|███████▏ | 613/852 [00:08<00:03, 77.35it/s]
 
502
  73%|███████▎ | 621/852 [00:08<00:03, 76.88it/s]
 
503
  74%|███████▍ | 629/852 [00:08<00:02, 75.44it/s]
 
504
  75%|███████▍ | 637/852 [00:08<00:02, 76.45it/s]
 
505
  76%|███████▌ | 645/852 [00:08<00:02, 74.01it/s]
 
506
  77%|███████▋ | 654/852 [00:08<00:02, 76.16it/s]
 
507
  78%|███████▊ | 662/852 [00:08<00:02, 76.57it/s]
 
508
  79%|███████▊ | 670/852 [00:08<00:02, 77.05it/s]
 
509
  80%|███████▉ | 678/852 [00:08<00:02, 77.66it/s]
 
510
  81%|████████ | 686/852 [00:09<00:02, 78.20it/s]
 
511
  81%|████████▏ | 694/852 [00:09<00:02, 78.37it/s]
 
512
  83%|████████▎ | 703/852 [00:09<00:01, 79.27it/s]
 
513
  84%|████████▎ | 712/852 [00:09<00:01, 80.17it/s]
 
514
  85%|████████▍ | 721/852 [00:09<00:01, 79.07it/s]
 
515
  86%|████████▌ | 730/852 [00:09<00:01, 79.67it/s]
 
516
  87%|████████▋ | 738/852 [00:09<00:01, 79.74it/s]
 
517
  88%|████████▊ | 746/852 [00:09<00:01, 79.62it/s]
 
518
  88%|████████▊ | 754/852 [00:09<00:01, 79.63it/s]
 
519
  90%|████████▉ | 763/852 [00:09<00:01, 79.84it/s]
 
520
  90%|█████████ | 771/852 [00:10<00:01, 78.47it/s]
 
521
  91%|█████████▏| 779/852 [00:10<00:00, 77.58it/s]
 
522
  92%|█████████▏| 787/852 [00:10<00:00, 76.86it/s]
 
523
  93%|█████████▎| 796/852 [00:10<00:00, 77.54it/s]
 
524
  94%|█████████▍| 805/852 [00:10<00:00, 78.72it/s]
 
525
  95%|█████████▌| 813/852 [00:10<00:00, 77.73it/s]
 
526
  96%|█████████▋| 822/852 [00:10<00:00, 78.66it/s]
 
527
  98%|█████████▊| 831/852 [00:10<00:00, 79.83it/s]
 
528
  98%|█████████▊| 839/852 [00:10<00:00, 79.61it/s]
 
529
  99%|█████████▉| 847/852 [00:11<00:00, 77.55it/s]/usr/local/lib/python3.10/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
 
 
530
 
 
531
 
532
  10%|█ | 425/4250 [01:57<14:28, 4.40it/s]
 
 
533
  [INFO|trainer.py:3478] 2024-08-30 19:58:43,890 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-425
 
 
 
 
 
 
 
534
  10%|█ | 426/4250 [02:01<6:11:12, 5.82s/it]
535
  10%|█ | 427/4250 [02:01<4:23:04, 4.13s/it]
536
  10%|█ | 428/4250 [02:01<3:08:07, 2.95s/it]
537
  10%|█ | 429/4250 [02:01<2:17:52, 2.17s/it]
538
  10%|█ | 430/4250 [02:01<1:39:43, 1.57s/it]
539
  10%|█ | 431/4250 [02:02<1:14:51, 1.18s/it]
540
  10%|█ | 432/4250 [02:02<56:05, 1.13it/s]
541
  10%|█ | 433/4250 [02:02<42:36, 1.49it/s]
542
  10%|█ | 434/4250 [02:02<35:09, 1.81it/s]
543
  10%|█ | 435/4250 [02:03<27:53, 2.28it/s]
544
  10%|█ | 436/4250 [02:03<23:52, 2.66it/s]
545
  10%|█ | 437/4250 [02:03<20:33, 3.09it/s]
546
  10%|█ | 438/4250 [02:03<18:19, 3.47it/s]
547
  10%|█ | 439/4250 [02:03<16:56, 3.75it/s]
548
  10%|█ | 440/4250 [02:04<15:28, 4.10it/s]
549
  10%|█ | 441/4250 [02:04<14:40, 4.33it/s]
550
  10%|█ | 442/4250 [02:04<14:39, 4.33it/s]
551
  10%|█ | 443/4250 [02:04<16:18, 3.89it/s]
552
  10%|█ | 444/4250 [02:05<15:50, 4.01it/s]
553
  10%|█ | 445/4250 [02:05<14:21, 4.42it/s]
554
  10%|█ | 446/4250 [02:05<15:17, 4.15it/s]
555
  11%|█ | 447/4250 [02:05<14:24, 4.40it/s]
556
  11%|█ | 448/4250 [02:05<13:17, 4.77it/s]
557
  11%|█ | 449/4250 [02:06<13:08, 4.82it/s]
558
  11%|█ | 450/4250 [02:06<15:05, 4.20it/s]
559
  11%|█ | 451/4250 [02:06<15:49, 4.00it/s]
560
  11%|█ | 452/4250 [02:06<14:51, 4.26it/s]
561
  11%|█ | 453/4250 [02:07<15:37, 4.05it/s]
562
  11%|█ | 454/4250 [02:07<15:38, 4.04it/s]
563
  11%|█ | 455/4250 [02:07<15:50, 3.99it/s]
564
  11%|█ | 456/4250 [02:07<14:25, 4.38it/s]
565
  11%|█ | 457/4250 [02:08<13:17, 4.76it/s]
566
  11%|█ | 458/4250 [02:08<14:22, 4.40it/s]
567
  11%|█ | 459/4250 [02:09<24:54, 2.54it/s]
568
  11%|█ | 460/4250 [02:09<25:12, 2.51it/s]
569
  11%|█ | 461/4250 [02:09<23:10, 2.73it/s]
570
  11%|█ | 462/4250 [02:09<20:15, 3.12it/s]
571
  11%|█ | 463/4250 [02:10<17:57, 3.52it/s]
572
  11%|█ | 464/4250 [02:10<16:49, 3.75it/s]
573
  11%|█ | 465/4250 [02:10<15:28, 4.08it/s]
574
  11%|█ | 466/4250 [02:10<14:54, 4.23it/s]
575
  11%|█ | 467/4250 [02:11<15:20, 4.11it/s]
576
  11%|█ | 468/4250 [02:11<15:20, 4.11it/s]
577
  11%|█ | 469/4250 [02:11<14:55, 4.22it/s]
578
  11%|█ | 470/4250 [02:11<14:12, 4.44it/s]
579
  11%|█ | 471/4250 [02:11<13:01, 4.84it/s]
580
  11%|█ | 472/4250 [02:12<14:33, 4.33it/s]
581
  11%|█ | 473/4250 [02:12<14:12, 4.43it/s]
582
  11%|█ | 474/4250 [02:12<15:29, 4.06it/s]
583
  11%|█ | 475/4250 [02:13<19:41, 3.20it/s]
584
  11%|█ | 476/4250 [02:13<19:02, 3.30it/s]
585
  11%|█ | 477/4250 [02:13<18:12, 3.45it/s]
586
  11%|█ | 478/4250 [02:14<18:30, 3.40it/s]
587
  11%|█▏ | 479/4250 [02:14<17:11, 3.66it/s]
588
  11%|█▏ | 480/4250 [02:14<15:16, 4.11it/s]
589
  11%|█▏ | 481/4250 [02:14<16:04, 3.91it/s]
590
  11%|█▏ | 482/4250 [02:15<17:51, 3.52it/s]
591
  11%|█▏ | 483/4250 [02:15<16:15, 3.86it/s]
592
  11%|█▏ | 484/4250 [02:15<14:37, 4.29it/s]
593
  11%|█▏ | 485/4250 [02:15<14:49, 4.23it/s]
594
  11%|█▏ | 486/4250 [02:15<15:41, 4.00it/s]
595
  11%|█▏ | 487/4250 [02:16<14:31, 4.32it/s]
596
  11%|█▏ | 488/4250 [02:16<14:11, 4.42it/s]
597
  12%|█▏ | 489/4250 [02:16<18:02, 3.48it/s]
598
  12%|█▏ | 490/4250 [02:17<16:37, 3.77it/s]
599
  12%|█▏ | 491/4250 [02:17<16:43, 3.74it/s]
600
  12%|█▏ | 492/4250 [02:17<15:55, 3.93it/s]
601
  12%|█▏ | 493/4250 [02:17<14:34, 4.30it/s]
602
  12%|█▏ | 494/4250 [02:17<15:06, 4.14it/s]
603
  12%|█▏ | 495/4250 [02:18<17:03, 3.67it/s]
604
  12%|█▏ | 496/4250 [02:18<16:05, 3.89it/s]
605
  12%|█▏ | 497/4250 [02:18<14:37, 4.28it/s]
606
  12%|█▏ | 498/4250 [02:18<13:21, 4.68it/s]
607
  12%|█▏ | 499/4250 [02:19<14:14, 4.39it/s]
608
  12%|█▏ | 500/4250 [02:19<13:55, 4.49it/s]
609
 
610
  12%|█▏ | 500/4250 [02:19<13:55, 4.49it/s]
611
  12%|█▏ | 501/4250 [02:19<16:34, 3.77it/s]
 
1
+ 2024-08-30 19:56:24.380746: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2
+ 2024-08-30 19:56:24.398707: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
3
+ 2024-08-30 19:56:24.420048: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
4
+ 2024-08-30 19:56:24.426474: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
5
+ 2024-08-30 19:56:24.441801: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
6
+ To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
7
+ 2024-08-30 19:56:25.730410: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
8
+ /usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1494: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
9
+ warnings.warn(
10
+ 08/30/2024 19:56:27 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
11
+ 08/30/2024 19:56:27 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
12
+ _n_gpu=1,
13
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
14
+ adafactor=False,
15
+ adam_beta1=0.9,
16
+ adam_beta2=0.999,
17
+ adam_epsilon=1e-08,
18
+ auto_find_batch_size=False,
19
+ batch_eval_metrics=False,
20
+ bf16=False,
21
+ bf16_full_eval=False,
22
+ data_seed=None,
23
+ dataloader_drop_last=False,
24
+ dataloader_num_workers=0,
25
+ dataloader_persistent_workers=False,
26
+ dataloader_pin_memory=True,
27
+ dataloader_prefetch_factor=None,
28
+ ddp_backend=None,
29
+ ddp_broadcast_buffers=None,
30
+ ddp_bucket_cap_mb=None,
31
+ ddp_find_unused_parameters=None,
32
+ ddp_timeout=1800,
33
+ debug=[],
34
+ deepspeed=None,
35
+ disable_tqdm=False,
36
+ dispatch_batches=None,
37
+ do_eval=True,
38
+ do_predict=True,
39
+ do_train=True,
40
+ eval_accumulation_steps=None,
41
+ eval_delay=0,
42
+ eval_do_concat_batches=True,
43
+ eval_on_start=False,
44
+ eval_steps=None,
45
+ eval_strategy=epoch,
46
+ evaluation_strategy=epoch,
47
+ fp16=False,
48
+ fp16_backend=auto,
49
+ fp16_full_eval=False,
50
+ fp16_opt_level=O1,
51
+ fsdp=[],
52
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
53
+ fsdp_min_num_params=0,
54
+ fsdp_transformer_layer_cls_to_wrap=None,
55
+ full_determinism=False,
56
+ gradient_accumulation_steps=2,
57
+ gradient_checkpointing=False,
58
+ gradient_checkpointing_kwargs=None,
59
+ greater_is_better=True,
60
+ group_by_length=False,
61
+ half_precision_backend=auto,
62
+ hub_always_push=False,
63
+ hub_model_id=None,
64
+ hub_private_repo=False,
65
+ hub_strategy=every_save,
66
+ hub_token=<HUB_TOKEN>,
67
+ ignore_data_skip=False,
68
+ include_inputs_for_metrics=False,
69
+ include_num_input_tokens_seen=False,
70
+ include_tokens_per_second=False,
71
+ jit_mode_eval=False,
72
+ label_names=None,
73
+ label_smoothing_factor=0.0,
74
+ learning_rate=5e-05,
75
+ length_column_name=length,
76
+ load_best_model_at_end=True,
77
+ local_rank=0,
78
+ log_level=passive,
79
+ log_level_replica=warning,
80
+ log_on_each_node=True,
81
+ logging_dir=/content/dissertation/scripts/ner/output/tb,
82
+ logging_first_step=False,
83
+ logging_nan_inf_filter=True,
84
+ logging_steps=500,
85
+ logging_strategy=steps,
86
+ lr_scheduler_kwargs={},
87
+ lr_scheduler_type=linear,
88
+ max_grad_norm=1.0,
89
+ max_steps=-1,
90
+ metric_for_best_model=f1,
91
+ mp_parameters=,
92
+ neftune_noise_alpha=None,
93
+ no_cuda=False,
94
+ num_train_epochs=10.0,
95
+ optim=adamw_torch,
96
+ optim_args=None,
97
+ optim_target_modules=None,
98
+ output_dir=/content/dissertation/scripts/ner/output,
99
+ overwrite_output_dir=True,
100
+ past_index=-1,
101
+ per_device_eval_batch_size=8,
102
+ per_device_train_batch_size=32,
103
+ prediction_loss_only=False,
104
+ push_to_hub=True,
105
+ push_to_hub_model_id=None,
106
+ push_to_hub_organization=None,
107
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
108
+ ray_scope=last,
109
+ remove_unused_columns=True,
110
+ report_to=['tensorboard'],
111
+ restore_callback_states_from_checkpoint=False,
112
+ resume_from_checkpoint=None,
113
+ run_name=/content/dissertation/scripts/ner/output,
114
+ save_on_each_node=False,
115
+ save_only_model=False,
116
+ save_safetensors=True,
117
+ save_steps=500,
118
+ save_strategy=epoch,
119
+ save_total_limit=None,
120
+ seed=42,
121
+ skip_memory_metrics=True,
122
+ split_batches=None,
123
+ tf32=None,
124
+ torch_compile=False,
125
+ torch_compile_backend=None,
126
+ torch_compile_mode=None,
127
+ torchdynamo=None,
128
+ tpu_metrics_debug=False,
129
+ tpu_num_cores=None,
130
+ use_cpu=False,
131
+ use_ipex=False,
132
+ use_legacy_prediction_loop=False,
133
+ use_mps_device=False,
134
+ warmup_ratio=0.0,
135
+ warmup_steps=0,
136
+ weight_decay=0.0,
137
+ )
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+ [INFO|configuration_utils.py:733] 2024-08-30 19:56:39,725 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
146
+ [INFO|configuration_utils.py:800] 2024-08-30 19:56:39,729 >> Model config RobertaConfig {
147
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
148
+ "architectures": [
149
+ "RobertaForMaskedLM"
150
+ ],
151
+ "attention_probs_dropout_prob": 0.1,
152
+ "bos_token_id": 0,
153
+ "classifier_dropout": null,
154
+ "eos_token_id": 2,
155
+ "finetuning_task": "ner",
156
+ "gradient_checkpointing": false,
157
+ "hidden_act": "gelu",
158
+ "hidden_dropout_prob": 0.1,
159
+ "hidden_size": 768,
160
+ "id2label": {
161
+ "0": "O",
162
+ "1": "B-ENFERMEDAD",
163
+ "2": "I-ENFERMEDAD",
164
+ "3": "B-PROCEDIMIENTO",
165
+ "4": "I-PROCEDIMIENTO",
166
+ "5": "B-SINTOMA",
167
+ "6": "I-SINTOMA",
168
+ "7": "B-FARMACO",
169
+ "8": "I-FARMACO"
170
+ },
171
+ "initializer_range": 0.02,
172
+ "intermediate_size": 3072,
173
+ "label2id": {
174
+ "B-ENFERMEDAD": 1,
175
+ "B-FARMACO": 7,
176
+ "B-PROCEDIMIENTO": 3,
177
+ "B-SINTOMA": 5,
178
+ "I-ENFERMEDAD": 2,
179
+ "I-FARMACO": 8,
180
+ "I-PROCEDIMIENTO": 4,
181
+ "I-SINTOMA": 6,
182
+ "O": 0
183
+ },
184
+ "layer_norm_eps": 1e-05,
185
+ "max_position_embeddings": 514,
186
+ "model_type": "roberta",
187
+ "num_attention_heads": 12,
188
+ "num_hidden_layers": 12,
189
+ "pad_token_id": 1,
190
+ "position_embedding_type": "absolute",
191
+ "transformers_version": "4.42.4",
192
+ "type_vocab_size": 1,
193
+ "use_cache": true,
194
+ "vocab_size": 50262
195
+ }
196
+
197
+ [INFO|configuration_utils.py:733] 2024-08-30 19:56:39,829 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
198
+ [INFO|configuration_utils.py:800] 2024-08-30 19:56:39,830 >> Model config RobertaConfig {
199
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
200
+ "architectures": [
201
+ "RobertaForMaskedLM"
202
+ ],
203
+ "attention_probs_dropout_prob": 0.1,
204
+ "bos_token_id": 0,
205
+ "classifier_dropout": null,
206
+ "eos_token_id": 2,
207
+ "gradient_checkpointing": false,
208
+ "hidden_act": "gelu",
209
+ "hidden_dropout_prob": 0.1,
210
+ "hidden_size": 768,
211
+ "initializer_range": 0.02,
212
+ "intermediate_size": 3072,
213
+ "layer_norm_eps": 1e-05,
214
+ "max_position_embeddings": 514,
215
+ "model_type": "roberta",
216
+ "num_attention_heads": 12,
217
+ "num_hidden_layers": 12,
218
+ "pad_token_id": 1,
219
+ "position_embedding_type": "absolute",
220
+ "transformers_version": "4.42.4",
221
+ "type_vocab_size": 1,
222
+ "use_cache": true,
223
+ "vocab_size": 50262
224
+ }
225
+
226
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 19:56:39,840 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
227
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 19:56:39,840 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
228
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 19:56:39,840 >> loading file tokenizer.json from cache at None
229
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 19:56:39,840 >> loading file added_tokens.json from cache at None
230
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 19:56:39,840 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
231
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 19:56:39,840 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
232
+ [INFO|configuration_utils.py:733] 2024-08-30 19:56:39,841 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
233
+ [INFO|configuration_utils.py:800] 2024-08-30 19:56:39,842 >> Model config RobertaConfig {
234
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
235
+ "architectures": [
236
+ "RobertaForMaskedLM"
237
+ ],
238
+ "attention_probs_dropout_prob": 0.1,
239
+ "bos_token_id": 0,
240
+ "classifier_dropout": null,
241
+ "eos_token_id": 2,
242
+ "gradient_checkpointing": false,
243
+ "hidden_act": "gelu",
244
+ "hidden_dropout_prob": 0.1,
245
+ "hidden_size": 768,
246
+ "initializer_range": 0.02,
247
+ "intermediate_size": 3072,
248
+ "layer_norm_eps": 1e-05,
249
+ "max_position_embeddings": 514,
250
+ "model_type": "roberta",
251
+ "num_attention_heads": 12,
252
+ "num_hidden_layers": 12,
253
+ "pad_token_id": 1,
254
+ "position_embedding_type": "absolute",
255
+ "transformers_version": "4.42.4",
256
+ "type_vocab_size": 1,
257
+ "use_cache": true,
258
+ "vocab_size": 50262
259
+ }
260
+
261
+ [INFO|configuration_utils.py:733] 2024-08-30 19:56:39,926 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
262
+ [INFO|configuration_utils.py:800] 2024-08-30 19:56:39,927 >> Model config RobertaConfig {
263
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
264
+ "architectures": [
265
+ "RobertaForMaskedLM"
266
+ ],
267
+ "attention_probs_dropout_prob": 0.1,
268
+ "bos_token_id": 0,
269
+ "classifier_dropout": null,
270
+ "eos_token_id": 2,
271
+ "gradient_checkpointing": false,
272
+ "hidden_act": "gelu",
273
+ "hidden_dropout_prob": 0.1,
274
+ "hidden_size": 768,
275
+ "initializer_range": 0.02,
276
+ "intermediate_size": 3072,
277
+ "layer_norm_eps": 1e-05,
278
+ "max_position_embeddings": 514,
279
+ "model_type": "roberta",
280
+ "num_attention_heads": 12,
281
+ "num_hidden_layers": 12,
282
+ "pad_token_id": 1,
283
+ "position_embedding_type": "absolute",
284
+ "transformers_version": "4.42.4",
285
+ "type_vocab_size": 1,
286
+ "use_cache": true,
287
+ "vocab_size": 50262
288
+ }
289
+
290
+ [INFO|modeling_utils.py:3556] 2024-08-30 19:56:40,114 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
291
+ [INFO|modeling_utils.py:4354] 2024-08-30 19:56:40,253 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
292
+ - This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
293
+ - This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
294
+ [WARNING|modeling_utils.py:4366] 2024-08-30 19:56:40,253 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
295
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
296
+
297
+
298
+
299
+ /content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
300
+ metric = load_metric("seqeval", trust_remote_code=True)
301
+ [INFO|trainer.py:805] 2024-08-30 19:56:46,202 >> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
302
+ [INFO|trainer.py:2128] 2024-08-30 19:56:46,780 >> ***** Running training *****
303
+ [INFO|trainer.py:2129] 2024-08-30 19:56:46,781 >> Num examples = 27,229
304
+ [INFO|trainer.py:2130] 2024-08-30 19:56:46,781 >> Num Epochs = 10
305
+ [INFO|trainer.py:2131] 2024-08-30 19:56:46,781 >> Instantaneous batch size per device = 32
306
+ [INFO|trainer.py:2134] 2024-08-30 19:56:46,781 >> Total train batch size (w. parallel, distributed & accumulation) = 64
307
+ [INFO|trainer.py:2135] 2024-08-30 19:56:46,781 >> Gradient Accumulation steps = 2
308
+ [INFO|trainer.py:2136] 2024-08-30 19:56:46,781 >> Total optimization steps = 4,250
309
+ [INFO|trainer.py:2137] 2024-08-30 19:56:46,781 >> Number of trainable parameters = 124,059,657
310
+
311
  0%| | 0/4250 [00:00<?, ?it/s]
312
  0%| | 1/4250 [00:01<1:22:54, 1.17s/it]
313
  0%| | 2/4250 [00:01<42:01, 1.68it/s]
314
  0%| | 3/4250 [00:01<29:22, 2.41it/s]
315
  0%| | 4/4250 [00:01<23:09, 3.06it/s]
316
  0%| | 5/4250 [00:02<21:02, 3.36it/s]
317
  0%| | 6/4250 [00:02<18:48, 3.76it/s]
318
  0%| | 7/4250 [00:02<18:03, 3.92it/s]
319
  0%| | 8/4250 [00:02<17:04, 4.14it/s]
320
  0%| | 9/4250 [00:02<17:57, 3.94it/s]
321
  0%| | 10/4250 [00:03<17:19, 4.08it/s]
322
  0%| | 11/4250 [00:03<16:45, 4.22it/s]
323
  0%| | 12/4250 [00:03<15:21, 4.60it/s]
324
  0%| | 13/4250 [00:03<17:21, 4.07it/s]
325
  0%| | 14/4250 [00:04<16:55, 4.17it/s]
326
  0%| | 15/4250 [00:04<16:34, 4.26it/s]
327
  0%| | 16/4250 [00:04<15:42, 4.49it/s]
328
  0%| | 17/4250 [00:04<14:41, 4.80it/s]
329
  0%| | 18/4250 [00:04<14:23, 4.90it/s]
330
  0%| | 19/4250 [00:05<15:11, 4.64it/s]
331
  0%| | 20/4250 [00:05<14:17, 4.93it/s]
332
  0%| | 21/4250 [00:05<16:33, 4.26it/s]
333
  1%| | 22/4250 [00:05<15:53, 4.44it/s]
334
  1%| | 23/4250 [00:06<15:25, 4.57it/s]
335
  1%| | 24/4250 [00:06<15:15, 4.62it/s]
336
  1%| | 25/4250 [00:06<16:44, 4.21it/s]
337
  1%| | 26/4250 [00:06<15:11, 4.64it/s]
338
  1%| | 27/4250 [00:06<15:12, 4.63it/s]
339
  1%| | 28/4250 [00:07<13:58, 5.03it/s]
340
  1%| | 29/4250 [00:07<13:39, 5.15it/s]
341
  1%| | 30/4250 [00:07<16:45, 4.20it/s]
342
  1%| | 31/4250 [00:07<15:42, 4.48it/s]
343
  1%| | 32/4250 [00:08<16:49, 4.18it/s]
344
  1%| | 33/4250 [00:08<16:15, 4.32it/s]
345
  1%| | 34/4250 [00:08<16:30, 4.26it/s]
346
  1%| | 35/4250 [00:08<15:50, 4.44it/s]
347
  1%| | 36/4250 [00:08<16:12, 4.34it/s]
348
  1%| | 37/4250 [00:09<15:15, 4.60it/s]
349
  1%| | 38/4250 [00:09<15:04, 4.65it/s]
350
  1%| | 39/4250 [00:09<16:26, 4.27it/s]
351
  1%| | 40/4250 [00:09<15:17, 4.59it/s]
352
  1%| | 41/4250 [00:09<14:58, 4.68it/s]
353
  1%| | 42/4250 [00:10<15:05, 4.65it/s]
354
  1%| | 43/4250 [00:10<17:31, 4.00it/s]
355
  1%| | 44/4250 [00:11<27:49, 2.52it/s]
356
  1%| | 45/4250 [00:11<23:40, 2.96it/s]
357
  1%| | 46/4250 [00:11<22:36, 3.10it/s]
358
  1%| | 47/4250 [00:11<19:55, 3.51it/s]
359
  1%| | 48/4250 [00:12<18:43, 3.74it/s]
360
  1%| | 49/4250 [00:12<16:10, 4.33it/s]
361
  1%| | 50/4250 [00:12<16:42, 4.19it/s]
362
  1%| | 51/4250 [00:12<16:56, 4.13it/s]
363
  1%| | 52/4250 [00:13<16:31, 4.23it/s]
364
  1%| | 53/4250 [00:13<17:15, 4.05it/s]
365
  1%|▏ | 54/4250 [00:13<20:55, 3.34it/s]
366
  1%|▏ | 55/4250 [00:13<19:02, 3.67it/s]
367
  1%|▏ | 56/4250 [00:14<18:24, 3.80it/s]
368
  1%|▏ | 57/4250 [00:14<17:03, 4.10it/s]
369
  1%|▏ | 58/4250 [00:14<15:38, 4.47it/s]
370
  1%|▏ | 59/4250 [00:14<15:03, 4.64it/s]
371
  1%|▏ | 60/4250 [00:14<14:48, 4.72it/s]
372
  1%|▏ | 61/4250 [00:15<15:50, 4.41it/s]
373
  1%|▏ | 62/4250 [00:15<15:56, 4.38it/s]
374
  1%|▏ | 63/4250 [00:15<15:00, 4.65it/s]
375
  2%|▏ | 64/4250 [00:15<14:46, 4.72it/s]
376
  2%|▏ | 65/4250 [00:16<15:31, 4.49it/s]
377
  2%|▏ | 66/4250 [00:16<16:36, 4.20it/s]
378
  2%|▏ | 67/4250 [00:16<16:31, 4.22it/s]
379
  2%|▏ | 68/4250 [00:17<20:45, 3.36it/s]
380
  2%|▏ | 69/4250 [00:17<18:33, 3.75it/s]
381
  2%|▏ | 70/4250 [00:17<19:56, 3.49it/s]
382
  2%|▏ | 71/4250 [00:17<18:58, 3.67it/s]
383
  2%|▏ | 72/4250 [00:18<17:43, 3.93it/s]
384
  2%|▏ | 73/4250 [00:18<17:15, 4.03it/s]
385
  2%|▏ | 74/4250 [00:18<17:31, 3.97it/s]
386
  2%|▏ | 75/4250 [00:18<15:34, 4.47it/s]
387
  2%|▏ | 76/4250 [00:18<16:08, 4.31it/s]
388
  2%|▏ | 77/4250 [00:19<15:14, 4.56it/s]
389
  2%|▏ | 78/4250 [00:19<16:02, 4.33it/s]
390
  2%|▏ | 79/4250 [00:19<16:08, 4.31it/s]
391
  2%|▏ | 80/4250 [00:19<16:04, 4.32it/s]
392
  2%|▏ | 81/4250 [00:20<16:05, 4.32it/s]
393
  2%|▏ | 82/4250 [00:20<15:07, 4.59it/s]
394
  2%|▏ | 83/4250 [00:20<17:05, 4.06it/s]
395
  2%|▏ | 84/4250 [00:20<15:34, 4.46it/s]
396
  2%|▏ | 85/4250 [00:20<15:15, 4.55it/s]
397
  2%|▏ | 86/4250 [00:21<15:04, 4.60it/s]
398
  2%|▏ | 87/4250 [00:21<15:05, 4.60it/s]
399
  2%|▏ | 88/4250 [00:21<13:41, 5.07it/s]
400
  2%|▏ | 89/4250 [00:21<13:52, 5.00it/s]
401
  2%|▏ | 90/4250 [00:22<16:13, 4.27it/s]
402
  2%|▏ | 91/4250 [00:22<14:34, 4.76it/s]
403
  2%|▏ | 92/4250 [00:22<14:02, 4.94it/s]
404
  2%|▏ | 93/4250 [00:22<13:17, 5.21it/s]
405
  2%|▏ | 94/4250 [00:22<13:54, 4.98it/s]
406
  2%|▏ | 95/4250 [00:23<15:06, 4.59it/s]
407
  2%|▏ | 96/4250 [00:23<16:55, 4.09it/s]
408
  2%|▏ | 97/4250 [00:23<15:53, 4.36it/s]
409
  2%|▏ | 98/4250 [00:23<14:45, 4.69it/s]
410
  2%|▏ | 99/4250 [00:23<14:08, 4.89it/s]
411
  2%|▏ | 100/4250 [00:24<14:28, 4.78it/s]
412
  2%|▏ | 101/4250 [00:24<18:01, 3.84it/s]
413
  2%|▏ | 102/4250 [00:24<20:25, 3.38it/s]
414
  2%|▏ | 103/4250 [00:25<18:29, 3.74it/s]
415
  2%|▏ | 104/4250 [00:25<16:58, 4.07it/s]
416
  2%|▏ | 105/4250 [00:25<16:37, 4.15it/s]
417
  2%|▏ | 106/4250 [00:25<16:08, 4.28it/s]
418
  3%|▎ | 107/4250 [00:25<15:55, 4.34it/s]
419
  3%|▎ | 108/4250 [00:26<19:06, 3.61it/s]
420
  3%|▎ | 109/4250 [00:26<18:34, 3.72it/s]
421
  3%|▎ | 110/4250 [00:26<17:11, 4.01it/s]
422
  3%|▎ | 111/4250 [00:26<15:34, 4.43it/s]
423
  3%|▎ | 112/4250 [00:27<16:09, 4.27it/s]
424
  3%|▎ | 113/4250 [00:27<15:47, 4.37it/s]
425
  3%|▎ | 114/4250 [00:27<14:36, 4.72it/s]
426
  3%|▎ | 115/4250 [00:27<14:04, 4.89it/s]
427
  3%|▎ | 116/4250 [00:28<15:35, 4.42it/s]
428
  3%|▎ | 117/4250 [00:28<15:07, 4.55it/s]
429
  3%|▎ | 118/4250 [00:28<14:42, 4.68it/s]
430
  3%|▎ | 119/4250 [00:28<15:06, 4.56it/s]
431
  3%|▎ | 120/4250 [00:28<14:14, 4.83it/s]
432
  3%|▎ | 121/4250 [00:29<14:18, 4.81it/s]
433
  3%|▎ | 122/4250 [00:29<20:13, 3.40it/s]
434
  3%|▎ | 123/4250 [00:29<18:31, 3.71it/s]
435
  3%|▎ | 124/4250 [00:30<16:53, 4.07it/s]
436
  3%|▎ | 125/4250 [00:30<17:05, 4.02it/s]
437
  3%|▎ | 126/4250 [00:30<15:30, 4.43it/s]
438
  3%|▎ | 127/4250 [00:30<14:48, 4.64it/s]
439
  3%|▎ | 128/4250 [00:30<14:12, 4.83it/s]
440
  3%|▎ | 129/4250 [00:31<17:31, 3.92it/s]
441
  3%|▎ | 130/4250 [00:31<16:58, 4.04it/s]
442
  3%|▎ | 131/4250 [00:31<15:29, 4.43it/s]
443
  3%|▎ | 132/4250 [00:31<14:37, 4.69it/s]
444
  3%|▎ | 133/4250 [00:32<15:37, 4.39it/s]
445
  3%|▎ | 134/4250 [00:32<15:17, 4.48it/s]
446
  3%|▎ | 135/4250 [00:32<15:06, 4.54it/s]
447
  3%|▎ | 136/4250 [00:32<15:10, 4.52it/s]
448
  3%|▎ | 137/4250 [00:32<14:56, 4.59it/s]
449
  3%|▎ | 138/4250 [00:33<14:26, 4.74it/s]
450
  3%|▎ | 139/4250 [00:33<14:55, 4.59it/s]
451
  3%|▎ | 140/4250 [00:33<15:52, 4.31it/s]
452
  3%|▎ | 141/4250 [00:33<15:57, 4.29it/s]
453
  3%|▎ | 142/4250 [00:33<14:58, 4.57it/s]
454
  3%|▎ | 143/4250 [00:34<14:36, 4.68it/s]
455
  3%|▎ | 144/4250 [00:34<14:25, 4.74it/s]
456
  3%|▎ | 145/4250 [00:34<16:56, 4.04it/s]
457
  3%|▎ | 146/4250 [00:35<22:12, 3.08it/s]
458
  3%|▎ | 147/4250 [00:35<19:47, 3.45it/s]
459
  3%|▎ | 148/4250 [00:35<19:39, 3.48it/s]
460
  4%|▎ | 149/4250 [00:35<18:55, 3.61it/s]
461
  4%|▎ | 150/4250 [00:36<17:36, 3.88it/s]
462
  4%|▎ | 151/4250 [00:36<16:08, 4.23it/s]
463
  4%|▎ | 152/4250 [00:36<17:00, 4.02it/s]
464
  4%|▎ | 153/4250 [00:36<16:21, 4.18it/s]
465
  4%|▎ | 154/4250 [00:37<15:19, 4.46it/s]
466
  4%|▎ | 155/4250 [00:37<14:26, 4.72it/s]
467
  4%|▎ | 156/4250 [00:37<14:44, 4.63it/s]
468
  4%|▎ | 157/4250 [00:37<14:24, 4.73it/s]
469
  4%|▎ | 158/4250 [00:38<17:32, 3.89it/s]
470
  4%|▎ | 159/4250 [00:38<18:32, 3.68it/s]
471
  4%|▍ | 160/4250 [00:38<16:57, 4.02it/s]
472
  4%|▍ | 161/4250 [00:38<16:00, 4.26it/s]
473
  4%|▍ | 162/4250 [00:38<14:56, 4.56it/s]
474
  4%|▍ | 163/4250 [00:39<14:48, 4.60it/s]
475
  4%|▍ | 164/4250 [00:39<18:40, 3.65it/s]
476
  4%|▍ | 165/4250 [00:39<18:13, 3.73it/s]
477
  4%|▍ | 166/4250 [00:40<18:24, 3.70it/s]
478
  4%|▍ | 167/4250 [00:40<17:28, 3.89it/s]
479
  4%|▍ | 168/4250 [00:40<15:55, 4.27it/s]
480
  4%|▍ | 169/4250 [00:40<15:32, 4.38it/s]
481
  4%|▍ | 170/4250 [00:40<15:28, 4.40it/s]
482
  4%|▍ | 171/4250 [00:41<15:59, 4.25it/s]
483
  4%|▍ | 172/4250 [00:41<15:11, 4.47it/s]
484
  4%|▍ | 173/4250 [00:41<18:21, 3.70it/s]
485
  4%|▍ | 174/4250 [00:41<16:38, 4.08it/s]
486
  4%|▍ | 175/4250 [00:42<19:24, 3.50it/s]
487
  4%|▍ | 176/4250 [00:42<19:20, 3.51it/s]
488
  4%|▍ | 177/4250 [00:42<17:03, 3.98it/s]
489
  4%|▍ | 178/4250 [00:42<16:06, 4.21it/s]
490
  4%|▍ | 179/4250 [00:43<15:38, 4.34it/s]
491
  4%|▍ | 180/4250 [00:43<14:25, 4.70it/s]
492
  4%|▍ | 181/4250 [00:43<14:35, 4.65it/s]
493
  4%|▍ | 182/4250 [00:43<17:09, 3.95it/s]
494
  4%|▍ | 183/4250 [00:44<16:33, 4.09it/s]
495
  4%|▍ | 184/4250 [00:44<16:26, 4.12it/s]
496
  4%|▍ | 185/4250 [00:44<15:05, 4.49it/s]
497
  4%|▍ | 186/4250 [00:44<14:37, 4.63it/s]
498
  4%|▍ | 187/4250 [00:44<13:45, 4.92it/s]
499
  4%|▍ | 188/4250 [00:45<14:40, 4.61it/s]
500
  4%|▍ | 189/4250 [00:45<14:11, 4.77it/s]
501
  4%|▍ | 190/4250 [00:45<13:43, 4.93it/s]
502
  4%|▍ | 191/4250 [00:45<14:21, 4.71it/s]
503
  5%|▍ | 192/4250 [00:46<14:09, 4.78it/s]
504
  5%|▍ | 193/4250 [00:46<13:53, 4.87it/s]
505
  5%|▍ | 194/4250 [00:46<16:40, 4.06it/s]
506
  5%|▍ | 195/4250 [00:46<15:35, 4.33it/s]
507
  5%|▍ | 196/4250 [00:46<14:25, 4.69it/s]
508
  5%|▍ | 197/4250 [00:47<14:55, 4.53it/s]
509
  5%|▍ | 198/4250 [00:47<15:18, 4.41it/s]
510
  5%|▍ | 199/4250 [00:47<15:25, 4.38it/s]
511
  5%|▍ | 200/4250 [00:47<15:34, 4.33it/s]
512
  5%|▍ | 201/4250 [00:48<15:13, 4.43it/s]
513
  5%|▍ | 202/4250 [00:48<15:31, 4.35it/s]
514
  5%|▍ | 203/4250 [00:48<14:25, 4.67it/s]
515
  5%|▍ | 204/4250 [00:48<13:45, 4.90it/s]
516
  5%|▍ | 205/4250 [00:48<14:42, 4.58it/s]
517
  5%|▍ | 206/4250 [00:49<14:27, 4.66it/s]
518
  5%|▍ | 207/4250 [00:49<16:42, 4.03it/s]
519
  5%|▍ | 208/4250 [00:49<15:16, 4.41it/s]
520
  5%|▍ | 209/4250 [00:49<15:09, 4.44it/s]
521
  5%|▍ | 210/4250 [00:50<13:59, 4.81it/s]
522
  5%|▍ | 211/4250 [00:50<15:05, 4.46it/s]
523
  5%|▍ | 212/4250 [00:50<14:22, 4.68it/s]
524
  5%|▌ | 213/4250 [00:50<17:28, 3.85it/s]
525
  5%|▌ | 214/4250 [00:50<15:20, 4.38it/s]
526
  5%|▌ | 215/4250 [00:51<15:14, 4.41it/s]
527
  5%|▌ | 216/4250 [00:51<19:02, 3.53it/s]
528
  5%|▌ | 217/4250 [00:51<17:01, 3.95it/s]
529
  5%|▌ | 218/4250 [00:52<15:54, 4.22it/s]
530
  5%|▌ | 219/4250 [00:52<16:32, 4.06it/s]
531
  5%|▌ | 220/4250 [00:52<16:39, 4.03it/s]
532
  5%|▌ | 221/4250 [00:52<18:42, 3.59it/s]
533
  5%|▌ | 222/4250 [00:53<20:01, 3.35it/s]
534
  5%|▌ | 223/4250 [00:53<18:25, 3.64it/s]
535
  5%|▌ | 224/4250 [00:53<16:30, 4.06it/s]
536
  5%|▌ | 225/4250 [00:53<15:06, 4.44it/s]
537
  5%|▌ | 226/4250 [00:53<14:23, 4.66it/s]
538
  5%|▌ | 227/4250 [00:54<14:00, 4.79it/s]
539
  5%|▌ | 228/4250 [00:54<13:46, 4.87it/s]
540
  5%|▌ | 229/4250 [00:54<12:57, 5.17it/s]
541
  5%|▌ | 230/4250 [00:54<14:21, 4.66it/s]
542
  5%|▌ | 231/4250 [00:54<13:45, 4.87it/s]
543
  5%|▌ | 232/4250 [00:55<13:36, 4.92it/s]
544
  5%|▌ | 233/4250 [00:55<13:25, 4.99it/s]
545
  6%|▌ | 234/4250 [00:55<15:43, 4.26it/s]
546
  6%|▌ | 235/4250 [00:56<20:40, 3.24it/s]
547
  6%|▌ | 236/4250 [00:56<21:54, 3.05it/s]
548
  6%|▌ | 237/4250 [00:56<19:46, 3.38it/s]
549
  6%|▌ | 238/4250 [00:56<17:28, 3.83it/s]
550
  6%|▌ | 239/4250 [00:57<17:32, 3.81it/s]
551
  6%|▌ | 240/4250 [00:57<20:00, 3.34it/s]
552
  6%|▌ | 241/4250 [00:57<18:01, 3.71it/s]
553
  6%|▌ | 242/4250 [00:58<16:32, 4.04it/s]
554
  6%|▌ | 243/4250 [00:58<16:03, 4.16it/s]
555
  6%|▌ | 244/4250 [00:58<16:03, 4.16it/s]
556
  6%|▌ | 245/4250 [00:58<16:00, 4.17it/s]
557
  6%|▌ | 246/4250 [00:59<24:58, 2.67it/s]
558
  6%|▌ | 247/4250 [00:59<22:20, 2.99it/s]
559
  6%|▌ | 248/4250 [00:59<19:39, 3.39it/s]
560
  6%|▌ | 249/4250 [01:00<23:51, 2.79it/s]
561
  6%|▌ | 250/4250 [01:00<25:16, 2.64it/s]
562
  6%|▌ | 251/4250 [01:01<22:47, 2.92it/s]
563
  6%|▌ | 252/4250 [01:01<19:31, 3.41it/s]
564
  6%|▌ | 253/4250 [01:01<17:12, 3.87it/s]
565
  6%|▌ | 254/4250 [01:01<17:52, 3.73it/s]
566
  6%|▌ | 255/4250 [01:01<18:09, 3.67it/s]
567
  6%|▌ | 256/4250 [01:02<17:54, 3.72it/s]
568
  6%|▌ | 257/4250 [01:02<16:41, 3.99it/s]
569
  6%|▌ | 258/4250 [01:02<15:34, 4.27it/s]
570
  6%|▌ | 259/4250 [01:02<14:26, 4.61it/s]
571
  6%|▌ | 260/4250 [01:03<15:43, 4.23it/s]
572
  6%|▌ | 261/4250 [01:03<15:00, 4.43it/s]
573
  6%|▌ | 262/4250 [01:03<13:51, 4.79it/s]
574
  6%|▌ | 263/4250 [01:03<12:53, 5.15it/s]
575
  6%|▌ | 264/4250 [01:03<14:52, 4.47it/s]
576
  6%|▌ | 265/4250 [01:04<15:49, 4.20it/s]
577
  6%|▋ | 266/4250 [01:04<15:45, 4.21it/s]
578
  6%|▋ | 267/4250 [01:04<15:43, 4.22it/s]
579
  6%|▋ | 268/4250 [01:04<14:46, 4.49it/s]
580
  6%|▋ | 269/4250 [01:05<14:19, 4.63it/s]
581
  6%|▋ | 270/4250 [01:05<16:42, 3.97it/s]
582
  6%|▋ | 271/4250 [01:05<15:07, 4.39it/s]
583
  6%|▋ | 272/4250 [01:05<14:36, 4.54it/s]
584
  6%|▋ | 273/4250 [01:05<14:08, 4.69it/s]
585
  6%|▋ | 274/4250 [01:06<13:43, 4.83it/s]
586
  6%|▋ | 275/4250 [01:06<13:31, 4.90it/s]
587
  6%|▋ | 276/4250 [01:06<12:57, 5.11it/s]
588
  7%|▋ | 277/4250 [01:06<13:38, 4.85it/s]
589
  7%|▋ | 278/4250 [01:06<13:30, 4.90it/s]
590
  7%|▋ | 279/4250 [01:07<14:07, 4.68it/s]
591
  7%|▋ | 280/4250 [01:07<17:31, 3.77it/s]
592
  7%|▋ | 281/4250 [01:07<16:05, 4.11it/s]
593
  7%|▋ | 282/4250 [01:07<15:57, 4.14it/s]
594
  7%|▋ | 283/4250 [01:08<14:59, 4.41it/s]
595
  7%|▋ | 284/4250 [01:08<14:31, 4.55it/s]
596
  7%|▋ | 285/4250 [01:08<14:42, 4.49it/s]
597
  7%|▋ | 286/4250 [01:08<14:16, 4.63it/s]
598
  7%|▋ | 287/4250 [01:09<15:52, 4.16it/s]
599
  7%|▋ | 288/4250 [01:09<14:43, 4.48it/s]
600
  7%|▋ | 289/4250 [01:09<14:54, 4.43it/s]
601
  7%|▋ | 290/4250 [01:09<14:03, 4.69it/s]
602
  7%|▋ | 291/4250 [01:09<14:55, 4.42it/s]
603
  7%|▋ | 292/4250 [01:10<14:00, 4.71it/s]
604
  7%|▋ | 293/4250 [01:10<13:17, 4.96it/s]
605
  7%|▋ | 294/4250 [01:10<12:52, 5.12it/s]
606
  7%|▋ | 295/4250 [01:10<14:15, 4.63it/s]
607
  7%|▋ | 296/4250 [01:10<13:24, 4.92it/s]
608
  7%|▋ | 297/4250 [01:11<13:06, 5.02it/s]
609
  7%|▋ | 298/4250 [01:11<13:03, 5.05it/s]
610
  7%|▋ | 299/4250 [01:11<13:42, 4.80it/s]
611
  7%|▋ | 300/4250 [01:11<15:32, 4.24it/s]
612
  7%|▋ | 301/4250 [01:12<14:54, 4.41it/s]
613
  7%|▋ | 302/4250 [01:12<16:06, 4.08it/s]
614
  7%|▋ | 303/4250 [01:12<17:16, 3.81it/s]
615
  7%|▋ | 304/4250 [01:12<16:34, 3.97it/s]
616
  7%|▋ | 305/4250 [01:13<16:19, 4.03it/s]
617
  7%|▋ | 306/4250 [01:13<18:08, 3.62it/s]
618
  7%|▋ | 307/4250 [01:13<16:02, 4.09it/s]
619
  7%|▋ | 308/4250 [01:13<15:13, 4.31it/s]
620
  7%|▋ | 309/4250 [01:14<15:00, 4.38it/s]
621
  7%|▋ | 310/4250 [01:14<14:22, 4.57it/s]
622
  7%|▋ | 311/4250 [01:14<13:57, 4.70it/s]
623
  7%|▋ | 312/4250 [01:14<16:54, 3.88it/s]
624
  7%|▋ | 313/4250 [01:15<16:11, 4.05it/s]
625
  7%|▋ | 314/4250 [01:15<16:16, 4.03it/s]
626
  7%|▋ | 315/4250 [01:15<15:12, 4.31it/s]
627
  7%|▋ | 316/4250 [01:15<14:32, 4.51it/s]
628
  7%|▋ | 317/4250 [01:15<15:42, 4.17it/s]
629
  7%|▋ | 318/4250 [01:16<15:25, 4.25it/s]
630
  8%|▊ | 319/4250 [01:16<15:46, 4.15it/s]
631
  8%|▊ | 320/4250 [01:16<16:24, 3.99it/s]
632
  8%|▊ | 321/4250 [01:16<15:02, 4.35it/s]
633
  8%|▊ | 322/4250 [01:17<14:34, 4.49it/s]
634
  8%|▊ | 323/4250 [01:17<13:27, 4.87it/s]
635
  8%|▊ | 324/4250 [01:17<13:12, 4.95it/s]
636
  8%|▊ | 325/4250 [01:17<13:20, 4.90it/s]
637
  8%|▊ | 326/4250 [01:17<12:56, 5.06it/s]
638
  8%|▊ | 327/4250 [01:18<12:33, 5.21it/s]
639
  8%|▊ | 328/4250 [01:18<16:34, 3.94it/s]
640
  8%|▊ | 329/4250 [01:18<17:43, 3.69it/s]
641
  8%|▊ | 330/4250 [01:18<16:38, 3.92it/s]
642
  8%|▊ | 331/4250 [01:19<16:14, 4.02it/s]
643
  8%|▊ | 332/4250 [01:19<15:09, 4.31it/s]
644
  8%|▊ | 333/4250 [01:19<14:14, 4.59it/s]
645
  8%|▊ | 334/4250 [01:19<14:32, 4.49it/s]
646
  8%|▊ | 335/4250 [01:20<15:50, 4.12it/s]
647
  8%|▊ | 336/4250 [01:20<15:37, 4.17it/s]
648
  8%|▊ | 337/4250 [01:20<14:28, 4.50it/s]
649
  8%|▊ | 338/4250 [01:20<13:49, 4.72it/s]
650
  8%|▊ | 339/4250 [01:21<17:39, 3.69it/s]
651
  8%|▊ | 340/4250 [01:21<16:23, 3.98it/s]
652
  8%|▊ | 341/4250 [01:21<17:22, 3.75it/s]
653
  8%|▊ | 342/4250 [01:21<15:44, 4.14it/s]
654
  8%|▊ | 343/4250 [01:22<16:07, 4.04it/s]
655
  8%|▊ | 344/4250 [01:22<15:55, 4.09it/s]
656
  8%|▊ | 345/4250 [01:22<17:27, 3.73it/s]
657
  8%|▊ | 346/4250 [01:22<18:31, 3.51it/s]
658
  8%|▊ | 347/4250 [01:23<17:10, 3.79it/s]
659
  8%|▊ | 348/4250 [01:23<18:02, 3.61it/s]
660
  8%|▊ | 349/4250 [01:23<16:20, 3.98it/s]
661
  8%|▊ | 350/4250 [01:23<16:04, 4.04it/s]
662
  8%|▊ | 351/4250 [01:24<15:27, 4.20it/s]
663
  8%|▊ | 352/4250 [01:24<15:43, 4.13it/s]
664
  8%|▊ | 353/4250 [01:24<14:03, 4.62it/s]
665
  8%|▊ | 354/4250 [01:24<14:09, 4.58it/s]
666
  8%|▊ | 355/4250 [01:25<15:16, 4.25it/s]
667
  8%|▊ | 356/4250 [01:25<14:47, 4.39it/s]
668
  8%|▊ | 357/4250 [01:25<13:58, 4.64it/s]
669
  8%|▊ | 358/4250 [01:25<13:27, 4.82it/s]
670
  8%|▊ | 359/4250 [01:25<12:49, 5.06it/s]
671
  8%|▊ | 360/4250 [01:26<14:44, 4.40it/s]
672
  8%|▊ | 361/4250 [01:26<14:12, 4.56it/s]
673
  9%|▊ | 362/4250 [01:26<13:27, 4.81it/s]
674
  9%|▊ | 363/4250 [01:26<14:20, 4.52it/s]
675
  9%|▊ | 364/4250 [01:27<20:33, 3.15it/s]
676
  9%|▊ | 365/4250 [01:27<19:16, 3.36it/s]
677
  9%|▊ | 366/4250 [01:27<18:53, 3.43it/s]
678
  9%|▊ | 367/4250 [01:27<17:08, 3.77it/s]
679
  9%|▊ | 368/4250 [01:28<15:11, 4.26it/s]
680
  9%|▊ | 369/4250 [01:28<16:08, 4.01it/s]
681
  9%|▊ | 370/4250 [01:28<15:07, 4.27it/s]
682
  9%|▊ | 371/4250 [01:28<14:16, 4.53it/s]
683
  9%|▉ | 372/4250 [01:29<13:49, 4.67it/s]
684
  9%|▉ | 373/4250 [01:29<15:45, 4.10it/s]
685
  9%|▉ | 374/4250 [01:29<15:17, 4.22it/s]
686
  9%|▉ | 375/4250 [01:30<25:49, 2.50it/s]
687
  9%|▉ | 376/4250 [01:30<22:04, 2.92it/s]
688
  9%|▉ | 377/4250 [01:30<19:35, 3.30it/s]
689
  9%|▉ | 378/4250 [01:30<17:28, 3.69it/s]
690
  9%|▉ | 379/4250 [01:31<15:53, 4.06it/s]
691
  9%|▉ | 380/4250 [01:31<15:12, 4.24it/s]
692
  9%|▉ | 381/4250 [01:31<14:26, 4.47it/s]
693
  9%|▉ | 382/4250 [01:31<13:56, 4.62it/s]
694
  9%|▉ | 383/4250 [01:32<15:15, 4.22it/s]
695
  9%|▉ | 384/4250 [01:32<14:11, 4.54it/s]
696
  9%|▉ | 385/4250 [01:32<16:37, 3.87it/s]
697
  9%|▉ | 386/4250 [01:32<18:49, 3.42it/s]
698
  9%|▉ | 387/4250 [01:33<16:38, 3.87it/s]
699
  9%|▉ | 388/4250 [01:33<15:53, 4.05it/s]
700
  9%|▉ | 389/4250 [01:33<15:09, 4.24it/s]
701
  9%|▉ | 390/4250 [01:33<15:50, 4.06it/s]
702
  9%|▉ | 391/4250 [01:33<14:30, 4.44it/s]
703
  9%|▉ | 392/4250 [01:34<14:35, 4.41it/s]
704
  9%|▉ | 393/4250 [01:34<13:41, 4.70it/s]
705
  9%|▉ | 394/4250 [01:34<13:40, 4.70it/s]
706
  9%|▉ | 395/4250 [01:34<13:01, 4.93it/s]
707
  9%|▉ | 396/4250 [01:34<12:48, 5.02it/s]
708
  9%|▉ | 397/4250 [01:35<13:29, 4.76it/s]
709
  9%|▉ | 398/4250 [01:35<14:09, 4.53it/s]
710
  9%|▉ | 399/4250 [01:35<13:17, 4.83it/s]
711
  9%|▉ | 400/4250 [01:35<14:40, 4.37it/s]
712
  9%|▉ | 401/4250 [01:36<14:07, 4.54it/s]
713
  9%|▉ | 402/4250 [01:36<15:53, 4.04it/s]
714
  9%|▉ | 403/4250 [01:36<15:28, 4.14it/s]
715
  10%|▉ | 404/4250 [01:36<14:42, 4.36it/s]
716
  10%|▉ | 405/4250 [01:37<15:18, 4.18it/s]
717
  10%|▉ | 406/4250 [01:37<15:58, 4.01it/s]
718
  10%|▉ | 407/4250 [01:38<23:05, 2.77it/s]
719
  10%|▉ | 408/4250 [01:38<21:24, 2.99it/s]
720
  10%|▉ | 409/4250 [01:38<18:33, 3.45it/s]
721
  10%|▉ | 410/4250 [01:38<17:36, 3.63it/s]
722
  10%|▉ | 411/4250 [01:38<17:20, 3.69it/s]
723
  10%|▉ | 412/4250 [01:39<18:25, 3.47it/s]
724
  10%|▉ | 413/4250 [01:39<17:11, 3.72it/s]
725
  10%|▉ | 414/4250 [01:39<16:35, 3.86it/s]
726
  10%|▉ | 415/4250 [01:39<15:19, 4.17it/s]
727
  10%|▉ | 416/4250 [01:40<14:55, 4.28it/s]
728
  10%|▉ | 417/4250 [01:40<14:53, 4.29it/s]
729
  10%|▉ | 418/4250 [01:40<13:45, 4.64it/s]
730
  10%|▉ | 419/4250 [01:40<14:57, 4.27it/s]
731
  10%|▉ | 420/4250 [01:41<15:01, 4.25it/s]
732
  10%|▉ | 421/4250 [01:41<15:29, 4.12it/s]
733
  10%|▉ | 422/4250 [01:41<15:00, 4.25it/s]
734
  10%|▉ | 423/4250 [01:41<13:52, 4.60it/s]
735
  10%|▉ | 424/4250 [01:42<14:35, 4.37it/s]
736
  10%|█ | 425/4250 [01:42<14:28, 4.40it/s][INFO|trainer.py:805] 2024-08-30 19:58:29,112 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
737
+ [INFO|trainer.py:3788] 2024-08-30 19:58:29,114 >>
738
+ ***** Running Evaluation *****
739
+ [INFO|trainer.py:3790] 2024-08-30 19:58:29,114 >> Num examples = 6810
740
+ [INFO|trainer.py:3793] 2024-08-30 19:58:29,114 >> Batch size = 8
741
+
742
+
743
  0%| | 0/852 [00:00<?, ?it/s]
744
+
745
  1%| | 10/852 [00:00<00:09, 89.20it/s]
746
+
747
  2%|▏ | 19/852 [00:00<00:10, 78.42it/s]
748
+
749
  3%|▎ | 27/852 [00:00<00:10, 78.13it/s]
750
+
751
  4%|▍ | 35/852 [00:00<00:10, 77.05it/s]
752
+
753
  5%|▌ | 43/852 [00:00<00:10, 77.06it/s]
754
+
755
  6%|▌ | 51/852 [00:00<00:10, 77.66it/s]
756
+
757
  7%|▋ | 60/852 [00:00<00:10, 78.68it/s]
758
+
759
  8%|▊ | 68/852 [00:00<00:10, 76.32it/s]
760
+
761
  9%|▉ | 76/852 [00:00<00:10, 76.81it/s]
762
+
763
  10%|▉ | 84/852 [00:01<00:10, 76.39it/s]
764
+
765
  11%|█ | 92/852 [00:01<00:09, 76.10it/s]
766
+
767
  12%|█▏ | 100/852 [00:01<00:09, 75.52it/s]
768
+
769
  13%|█▎ | 108/852 [00:01<00:09, 75.37it/s]
770
+
771
  14%|█▎ | 116/852 [00:01<00:09, 75.61it/s]
772
+
773
  15%|█▍ | 125/852 [00:01<00:09, 77.44it/s]
774
+
775
  16%|█▌ | 133/852 [00:01<00:09, 74.02it/s]
776
+
777
  17%|█▋ | 141/852 [00:01<00:09, 74.52it/s]
778
+
779
  17%|█▋ | 149/852 [00:01<00:09, 74.17it/s]
780
+
781
  19%|█▊ | 158/852 [00:02<00:09, 76.52it/s]
782
+
783
  19%|█▉ | 166/852 [00:02<00:08, 76.93it/s]
784
+
785
  20%|██ | 174/852 [00:02<00:08, 77.40it/s]
786
+
787
  21%|██▏ | 182/852 [00:02<00:08, 77.81it/s]
788
+
789
  22%|██▏ | 191/852 [00:02<00:08, 78.32it/s]
790
+
791
  23%|██▎ | 199/852 [00:02<00:08, 78.40it/s]
792
+
793
  24%|██▍ | 207/852 [00:02<00:08, 77.19it/s]
794
+
795
  25%|██▌ | 215/852 [00:02<00:08, 76.20it/s]
796
+
797
  26%|██▌ | 223/852 [00:02<00:08, 77.17it/s]
798
+
799
  27%|██▋ | 231/852 [00:03<00:07, 77.72it/s]
800
+
801
  28%|██▊ | 239/852 [00:03<00:07, 76.96it/s]
802
+
803
  29%|██▉ | 247/852 [00:03<00:08, 75.20it/s]
804
+
805
  30%|███ | 256/852 [00:03<00:07, 77.34it/s]
806
+
807
  31%|███ | 264/852 [00:03<00:07, 76.46it/s]
808
+
809
  32%|███▏ | 272/852 [00:03<00:07, 73.76it/s]
810
+
811
  33%|███▎ | 280/852 [00:03<00:07, 74.90it/s]
812
+
813
  34%|███▍ | 288/852 [00:03<00:07, 74.86it/s]
814
+
815
  35%|███▍ | 296/852 [00:03<00:07, 76.06it/s]
816
+
817
  36%|███▌ | 305/852 [00:03<00:07, 77.75it/s]
818
+
819
  37%|███▋ | 313/852 [00:04<00:07, 75.79it/s]
820
+
821
  38%|███▊ | 321/852 [00:04<00:06, 76.62it/s]
822
+
823
  39%|███▊ | 329/852 [00:04<00:06, 76.40it/s]
824
+
825
  40%|███▉ | 337/852 [00:04<00:06, 76.71it/s]
826
+
827
  41%|████ | 346/852 [00:04<00:06, 77.07it/s]
828
+
829
  42%|████▏ | 354/852 [00:04<00:06, 76.21it/s]
830
+
831
  42%|████▏ | 362/852 [00:04<00:06, 76.53it/s]
832
+
833
  43%|████▎ | 370/852 [00:04<00:06, 76.40it/s]
834
+
835
  44%|████▍ | 378/852 [00:04<00:06, 76.51it/s]
836
+
837
  45%|████▌ | 386/852 [00:05<00:06, 75.64it/s]
838
+
839
  46%|████▌ | 394/852 [00:05<00:05, 76.44it/s]
840
+
841
  47%|████▋ | 402/852 [00:05<00:05, 76.42it/s]
842
+
843
  48%|████▊ | 410/852 [00:05<00:06, 72.56it/s]
844
+
845
  49%|████▉ | 418/852 [00:05<00:05, 74.22it/s]
846
+
847
  50%|█████ | 426/852 [00:05<00:05, 73.15it/s]
848
+
849
  51%|█████ | 435/852 [00:05<00:05, 74.99it/s]
850
+
851
  52%|█████▏ | 443/852 [00:05<00:05, 76.20it/s]
852
+
853
  53%|█████▎ | 451/852 [00:05<00:05, 76.98it/s]
854
+
855
  54%|█████▍ | 459/852 [00:06<00:05, 77.54it/s]
856
+
857
  55%|█████▍ | 467/852 [00:06<00:05, 74.54it/s]
858
+
859
  56%|█████▌ | 475/852 [00:06<00:05, 71.46it/s]
860
+
861
  57%|█████▋ | 483/852 [00:06<00:05, 72.27it/s]
862
+
863
  58%|█████▊ | 491/852 [00:06<00:04, 72.68it/s]
864
+
865
  59%|█████▊ | 499/852 [00:06<00:04, 73.96it/s]
866
+
867
  60%|█████▉ | 507/852 [00:06<00:04, 73.71it/s]
868
+
869
  60%|██████ | 515/852 [00:06<00:04, 74.07it/s]
870
+
871
  61%|██████▏ | 523/852 [00:06<00:04, 73.58it/s]
872
+
873
  62%|██████▏ | 531/852 [00:06<00:04, 74.92it/s]
874
+
875
  63%|██████▎ | 540/852 [00:07<00:04, 76.68it/s]
876
+
877
  64%|██████▍ | 548/852 [00:07<00:03, 77.09it/s]
878
+
879
  65%|██████▌ | 556/852 [00:07<00:03, 74.59it/s]
880
+
881
  66%|██████▌ | 564/852 [00:07<00:03, 75.78it/s]
882
+
883
  67%|██████▋ | 572/852 [00:07<00:03, 76.36it/s]
884
+
885
  68%|██████▊ | 580/852 [00:07<00:03, 77.03it/s]
886
+
887
  69%|██████▉ | 588/852 [00:07<00:03, 76.20it/s]
888
+
889
  70%|███████ | 597/852 [00:07<00:03, 77.37it/s]
890
+
891
  71%|███████ | 605/852 [00:07<00:03, 77.54it/s]
892
+
893
  72%|███████▏ | 613/852 [00:08<00:03, 77.35it/s]
894
+
895
  73%|███████▎ | 621/852 [00:08<00:03, 76.88it/s]
896
+
897
  74%|███████▍ | 629/852 [00:08<00:02, 75.44it/s]
898
+
899
  75%|███████▍ | 637/852 [00:08<00:02, 76.45it/s]
900
+
901
  76%|███████▌ | 645/852 [00:08<00:02, 74.01it/s]
902
+
903
  77%|███████▋ | 654/852 [00:08<00:02, 76.16it/s]
904
+
905
  78%|███████▊ | 662/852 [00:08<00:02, 76.57it/s]
906
+
907
  79%|███████▊ | 670/852 [00:08<00:02, 77.05it/s]
908
+
909
  80%|███████▉ | 678/852 [00:08<00:02, 77.66it/s]
910
+
911
  81%|████████ | 686/852 [00:09<00:02, 78.20it/s]
912
+
913
  81%|████████▏ | 694/852 [00:09<00:02, 78.37it/s]
914
+
915
  83%|████████▎ | 703/852 [00:09<00:01, 79.27it/s]
916
+
917
  84%|████████▎ | 712/852 [00:09<00:01, 80.17it/s]
918
+
919
  85%|████████▍ | 721/852 [00:09<00:01, 79.07it/s]
920
+
921
  86%|████████▌ | 730/852 [00:09<00:01, 79.67it/s]
922
+
923
  87%|████████▋ | 738/852 [00:09<00:01, 79.74it/s]
924
+
925
  88%|████████▊ | 746/852 [00:09<00:01, 79.62it/s]
926
+
927
  88%|████████▊ | 754/852 [00:09<00:01, 79.63it/s]
928
+
929
  90%|████████▉ | 763/852 [00:09<00:01, 79.84it/s]
930
+
931
  90%|█████████ | 771/852 [00:10<00:01, 78.47it/s]
932
+
933
  91%|█████████▏| 779/852 [00:10<00:00, 77.58it/s]
934
+
935
  92%|█████████▏| 787/852 [00:10<00:00, 76.86it/s]
936
+
937
  93%|█████████▎| 796/852 [00:10<00:00, 77.54it/s]
938
+
939
  94%|█████████▍| 805/852 [00:10<00:00, 78.72it/s]
940
+
941
  95%|█████████▌| 813/852 [00:10<00:00, 77.73it/s]
942
+
943
  96%|█████████▋| 822/852 [00:10<00:00, 78.66it/s]
944
+
945
  98%|█████████▊| 831/852 [00:10<00:00, 79.83it/s]
946
+
947
  98%|█████████▊| 839/852 [00:10<00:00, 79.61it/s]
948
+
949
  99%|█████████▉| 847/852 [00:11<00:00, 77.55it/s]/usr/local/lib/python3.10/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
950
+ _warn_prf(average, modifier, msg_start, len(result))
951
+
952
 
953
+
954
 
955
  10%|█ | 425/4250 [01:57<14:28, 4.40it/s]
956
+
957
+
958
  [INFO|trainer.py:3478] 2024-08-30 19:58:43,890 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-425
959
+ [INFO|configuration_utils.py:472] 2024-08-30 19:58:43,892 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-425/config.json
960
+ [INFO|modeling_utils.py:2690] 2024-08-30 19:58:45,270 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-425/model.safetensors
961
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 19:58:45,271 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-425/tokenizer_config.json
962
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 19:58:45,271 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-425/special_tokens_map.json
963
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 19:58:47,606 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
964
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 19:58:47,607 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
965
+
966
  10%|█ | 426/4250 [02:01<6:11:12, 5.82s/it]
967
  10%|█ | 427/4250 [02:01<4:23:04, 4.13s/it]
968
  10%|█ | 428/4250 [02:01<3:08:07, 2.95s/it]
969
  10%|█ | 429/4250 [02:01<2:17:52, 2.17s/it]
970
  10%|█ | 430/4250 [02:01<1:39:43, 1.57s/it]
971
  10%|█ | 431/4250 [02:02<1:14:51, 1.18s/it]
972
  10%|█ | 432/4250 [02:02<56:05, 1.13it/s]
973
  10%|█ | 433/4250 [02:02<42:36, 1.49it/s]
974
  10%|█ | 434/4250 [02:02<35:09, 1.81it/s]
975
  10%|█ | 435/4250 [02:03<27:53, 2.28it/s]
976
  10%|█ | 436/4250 [02:03<23:52, 2.66it/s]
977
  10%|█ | 437/4250 [02:03<20:33, 3.09it/s]
978
  10%|█ | 438/4250 [02:03<18:19, 3.47it/s]
979
  10%|█ | 439/4250 [02:03<16:56, 3.75it/s]
980
  10%|█ | 440/4250 [02:04<15:28, 4.10it/s]
981
  10%|█ | 441/4250 [02:04<14:40, 4.33it/s]
982
  10%|█ | 442/4250 [02:04<14:39, 4.33it/s]
983
  10%|█ | 443/4250 [02:04<16:18, 3.89it/s]
984
  10%|█ | 444/4250 [02:05<15:50, 4.01it/s]
985
  10%|█ | 445/4250 [02:05<14:21, 4.42it/s]
986
  10%|█ | 446/4250 [02:05<15:17, 4.15it/s]
987
  11%|█ | 447/4250 [02:05<14:24, 4.40it/s]
988
  11%|█ | 448/4250 [02:05<13:17, 4.77it/s]
989
  11%|█ | 449/4250 [02:06<13:08, 4.82it/s]
990
  11%|█ | 450/4250 [02:06<15:05, 4.20it/s]
991
  11%|█ | 451/4250 [02:06<15:49, 4.00it/s]
992
  11%|█ | 452/4250 [02:06<14:51, 4.26it/s]
993
  11%|█ | 453/4250 [02:07<15:37, 4.05it/s]
994
  11%|█ | 454/4250 [02:07<15:38, 4.04it/s]
995
  11%|█ | 455/4250 [02:07<15:50, 3.99it/s]
996
  11%|█ | 456/4250 [02:07<14:25, 4.38it/s]
997
  11%|█ | 457/4250 [02:08<13:17, 4.76it/s]
998
  11%|█ | 458/4250 [02:08<14:22, 4.40it/s]
999
  11%|█ | 459/4250 [02:09<24:54, 2.54it/s]
1000
  11%|█ | 460/4250 [02:09<25:12, 2.51it/s]
1001
  11%|█ | 461/4250 [02:09<23:10, 2.73it/s]
1002
  11%|█ | 462/4250 [02:09<20:15, 3.12it/s]
1003
  11%|█ | 463/4250 [02:10<17:57, 3.52it/s]
1004
  11%|█ | 464/4250 [02:10<16:49, 3.75it/s]
1005
  11%|█ | 465/4250 [02:10<15:28, 4.08it/s]
1006
  11%|█ | 466/4250 [02:10<14:54, 4.23it/s]
1007
  11%|█ | 467/4250 [02:11<15:20, 4.11it/s]
1008
  11%|█ | 468/4250 [02:11<15:20, 4.11it/s]
1009
  11%|█ | 469/4250 [02:11<14:55, 4.22it/s]
1010
  11%|█ | 470/4250 [02:11<14:12, 4.44it/s]
1011
  11%|█ | 471/4250 [02:11<13:01, 4.84it/s]
1012
  11%|█ | 472/4250 [02:12<14:33, 4.33it/s]
1013
  11%|█ | 473/4250 [02:12<14:12, 4.43it/s]
1014
  11%|█ | 474/4250 [02:12<15:29, 4.06it/s]
1015
  11%|█ | 475/4250 [02:13<19:41, 3.20it/s]
1016
  11%|█ | 476/4250 [02:13<19:02, 3.30it/s]
1017
  11%|█ | 477/4250 [02:13<18:12, 3.45it/s]
1018
  11%|█ | 478/4250 [02:14<18:30, 3.40it/s]
1019
  11%|█▏ | 479/4250 [02:14<17:11, 3.66it/s]
1020
  11%|█▏ | 480/4250 [02:14<15:16, 4.11it/s]
1021
  11%|█▏ | 481/4250 [02:14<16:04, 3.91it/s]
1022
  11%|█▏ | 482/4250 [02:15<17:51, 3.52it/s]
1023
  11%|█▏ | 483/4250 [02:15<16:15, 3.86it/s]
1024
  11%|█▏ | 484/4250 [02:15<14:37, 4.29it/s]
1025
  11%|█▏ | 485/4250 [02:15<14:49, 4.23it/s]
1026
  11%|█▏ | 486/4250 [02:15<15:41, 4.00it/s]
1027
  11%|█▏ | 487/4250 [02:16<14:31, 4.32it/s]
1028
  11%|█▏ | 488/4250 [02:16<14:11, 4.42it/s]
1029
  12%|█▏ | 489/4250 [02:16<18:02, 3.48it/s]
1030
  12%|█▏ | 490/4250 [02:17<16:37, 3.77it/s]
1031
  12%|█▏ | 491/4250 [02:17<16:43, 3.74it/s]
1032
  12%|█▏ | 492/4250 [02:17<15:55, 3.93it/s]
1033
  12%|█▏ | 493/4250 [02:17<14:34, 4.30it/s]
1034
  12%|█▏ | 494/4250 [02:17<15:06, 4.14it/s]
1035
  12%|█▏ | 495/4250 [02:18<17:03, 3.67it/s]
1036
  12%|█▏ | 496/4250 [02:18<16:05, 3.89it/s]
1037
  12%|█▏ | 497/4250 [02:18<14:37, 4.28it/s]
1038
  12%|█▏ | 498/4250 [02:18<13:21, 4.68it/s]
1039
  12%|█▏ | 499/4250 [02:19<14:14, 4.39it/s]
1040
  12%|█▏ | 500/4250 [02:19<13:55, 4.49it/s]
1041
 
1042
  12%|█▏ | 500/4250 [02:19<13:55, 4.49it/s]
1043
  12%|█▏ | 501/4250 [02:19<16:34, 3.77it/s]
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.988249118683902,
3
+ "total_flos": 1.2649810588547778e+16,
4
+ "train_loss": 0.10639642311544979,
5
+ "train_runtime": 1205.6346,
6
+ "train_samples": 27229,
7
+ "train_samples_per_second": 225.848,
8
+ "train_steps_per_second": 3.525
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4624414693662204,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2127",
4
+ "epoch": 9.988249118683902,
5
+ "eval_steps": 500,
6
+ "global_step": 4250,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9988249118683902,
13
+ "eval_accuracy": 0.8517239485355707,
14
+ "eval_f1": 0.4251637936518033,
15
+ "eval_loss": 0.3833567500114441,
16
+ "eval_precision": 0.29195422381409974,
17
+ "eval_recall": 0.7819372952737482,
18
+ "eval_runtime": 14.7317,
19
+ "eval_samples_per_second": 462.27,
20
+ "eval_steps_per_second": 57.835,
21
+ "step": 425
22
+ },
23
+ {
24
+ "epoch": 1.1750881316098707,
25
+ "grad_norm": 1.4406206607818604,
26
+ "learning_rate": 4.411764705882353e-05,
27
+ "loss": 0.3349,
28
+ "step": 500
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_accuracy": 0.822140140332569,
33
+ "eval_f1": 0.40249737425603915,
34
+ "eval_loss": 0.5730458498001099,
35
+ "eval_precision": 0.2681125621890547,
36
+ "eval_recall": 0.8069723912026205,
37
+ "eval_runtime": 14.9036,
38
+ "eval_samples_per_second": 456.937,
39
+ "eval_steps_per_second": 57.167,
40
+ "step": 851
41
+ },
42
+ {
43
+ "epoch": 2.3501762632197414,
44
+ "grad_norm": 2.988006591796875,
45
+ "learning_rate": 3.8235294117647055e-05,
46
+ "loss": 0.1788,
47
+ "step": 1000
48
+ },
49
+ {
50
+ "epoch": 2.99882491186839,
51
+ "eval_accuracy": 0.8338185013799827,
52
+ "eval_f1": 0.42018044559013074,
53
+ "eval_loss": 0.5796028971672058,
54
+ "eval_precision": 0.2847990681421083,
55
+ "eval_recall": 0.8008890968647637,
56
+ "eval_runtime": 14.7541,
57
+ "eval_samples_per_second": 461.567,
58
+ "eval_steps_per_second": 57.747,
59
+ "step": 1276
60
+ },
61
+ {
62
+ "epoch": 3.525264394829612,
63
+ "grad_norm": 1.1645787954330444,
64
+ "learning_rate": 3.235294117647059e-05,
65
+ "loss": 0.1227,
66
+ "step": 1500
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "eval_accuracy": 0.8388097820863142,
71
+ "eval_f1": 0.43759863644971914,
72
+ "eval_loss": 0.6590859889984131,
73
+ "eval_precision": 0.2996455433560993,
74
+ "eval_recall": 0.8109499298081423,
75
+ "eval_runtime": 14.8634,
76
+ "eval_samples_per_second": 458.172,
77
+ "eval_steps_per_second": 57.322,
78
+ "step": 1702
79
+ },
80
+ {
81
+ "epoch": 4.700352526439483,
82
+ "grad_norm": 1.1214195489883423,
83
+ "learning_rate": 2.647058823529412e-05,
84
+ "loss": 0.0856,
85
+ "step": 2000
86
+ },
87
+ {
88
+ "epoch": 4.9988249118683905,
89
+ "eval_accuracy": 0.8601754843670617,
90
+ "eval_f1": 0.4624414693662204,
91
+ "eval_loss": 0.6266195774078369,
92
+ "eval_precision": 0.32197630636422075,
93
+ "eval_recall": 0.8203088441740758,
94
+ "eval_runtime": 14.7766,
95
+ "eval_samples_per_second": 460.863,
96
+ "eval_steps_per_second": 57.659,
97
+ "step": 2127
98
+ },
99
+ {
100
+ "epoch": 5.875440658049354,
101
+ "grad_norm": 1.1390776634216309,
102
+ "learning_rate": 2.058823529411765e-05,
103
+ "loss": 0.0597,
104
+ "step": 2500
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "eval_accuracy": 0.8475977316105291,
109
+ "eval_f1": 0.44600244420145363,
110
+ "eval_loss": 0.7858611941337585,
111
+ "eval_precision": 0.3075490109110263,
112
+ "eval_recall": 0.8111839026672906,
113
+ "eval_runtime": 14.8163,
114
+ "eval_samples_per_second": 459.628,
115
+ "eval_steps_per_second": 57.504,
116
+ "step": 2553
117
+ },
118
+ {
119
+ "epoch": 6.9988249118683905,
120
+ "eval_accuracy": 0.8507970944841886,
121
+ "eval_f1": 0.45324675324675323,
122
+ "eval_loss": 0.829707145690918,
123
+ "eval_precision": 0.3136796692432141,
124
+ "eval_recall": 0.8165652784277024,
125
+ "eval_runtime": 14.8867,
126
+ "eval_samples_per_second": 457.456,
127
+ "eval_steps_per_second": 57.232,
128
+ "step": 2978
129
+ },
130
+ {
131
+ "epoch": 7.050528789659224,
132
+ "grad_norm": 0.5195357799530029,
133
+ "learning_rate": 1.4705882352941177e-05,
134
+ "loss": 0.0458,
135
+ "step": 3000
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_accuracy": 0.8532206461889135,
140
+ "eval_f1": 0.453628249902988,
141
+ "eval_loss": 0.8468152284622192,
142
+ "eval_precision": 0.3134608509116911,
143
+ "eval_recall": 0.8205428170332242,
144
+ "eval_runtime": 15.05,
145
+ "eval_samples_per_second": 452.493,
146
+ "eval_steps_per_second": 56.611,
147
+ "step": 3404
148
+ },
149
+ {
150
+ "epoch": 8.225616921269095,
151
+ "grad_norm": 0.8723571300506592,
152
+ "learning_rate": 8.823529411764707e-06,
153
+ "loss": 0.0343,
154
+ "step": 3500
155
+ },
156
+ {
157
+ "epoch": 8.99882491186839,
158
+ "eval_accuracy": 0.8493827838576352,
159
+ "eval_f1": 0.44807482862451153,
160
+ "eval_loss": 0.9241161346435547,
161
+ "eval_precision": 0.30851345390383766,
162
+ "eval_recall": 0.8182030884417407,
163
+ "eval_runtime": 14.662,
164
+ "eval_samples_per_second": 464.465,
165
+ "eval_steps_per_second": 58.109,
166
+ "step": 3829
167
+ },
168
+ {
169
+ "epoch": 9.400705052878966,
170
+ "grad_norm": 0.6075822710990906,
171
+ "learning_rate": 2.9411764705882355e-06,
172
+ "loss": 0.0292,
173
+ "step": 4000
174
+ },
175
+ {
176
+ "epoch": 9.988249118683902,
177
+ "eval_accuracy": 0.8498565092616749,
178
+ "eval_f1": 0.44938176197836166,
179
+ "eval_loss": 0.938378632068634,
180
+ "eval_precision": 0.31002310289674784,
181
+ "eval_recall": 0.816331305568554,
182
+ "eval_runtime": 15.4697,
183
+ "eval_samples_per_second": 440.215,
184
+ "eval_steps_per_second": 55.075,
185
+ "step": 4250
186
+ },
187
+ {
188
+ "epoch": 9.988249118683902,
189
+ "step": 4250,
190
+ "total_flos": 1.2649810588547778e+16,
191
+ "train_loss": 0.10639642311544979,
192
+ "train_runtime": 1205.6346,
193
+ "train_samples_per_second": 225.848,
194
+ "train_steps_per_second": 3.525
195
+ }
196
+ ],
197
+ "logging_steps": 500,
198
+ "max_steps": 4250,
199
+ "num_input_tokens_seen": 0,
200
+ "num_train_epochs": 10,
201
+ "save_steps": 500,
202
+ "stateful_callbacks": {
203
+ "TrainerControl": {
204
+ "args": {
205
+ "should_epoch_stop": false,
206
+ "should_evaluate": false,
207
+ "should_log": false,
208
+ "should_save": true,
209
+ "should_training_stop": true
210
+ },
211
+ "attributes": {}
212
+ }
213
+ },
214
+ "total_flos": 1.2649810588547778e+16,
215
+ "train_batch_size": 32,
216
+ "trial_name": null,
217
+ "trial_params": null
218
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7fb15010252c8cbc6a884d820aedc076e55d4b9641d17f55264378c0fcf155
3
+ size 5176
vocab.json ADDED
The diff for this file is too large to render. See raw diff