pranaydeeps commited on
Commit
1f78c04
1 Parent(s): 4835db8

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - precision
7
+ - recall
8
+ - f1
9
+ - accuracy
10
+ model-index:
11
+ - name: pos_final_xlm_de
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # pos_final_xlm_de
19
+
20
+ This model is a fine-tuned version of [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) on the None dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - Loss: 0.0580
23
+ - Precision: 0.9895
24
+ - Recall: 0.9894
25
+ - F1: 0.9894
26
+ - Accuracy: 0.9901
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-05
46
+ - train_batch_size: 256
47
+ - eval_batch_size: 256
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 4
50
+ - total_train_batch_size: 1024
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_steps: 500
54
+ - num_epochs: 40.0
55
+ - mixed_precision_training: Native AMP
56
+
57
+ ### Training results
58
+
59
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
60
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
61
+ | No log | 0.99 | 128 | 0.3828 | 0.9159 | 0.9106 | 0.9133 | 0.9196 |
62
+ | No log | 1.99 | 256 | 0.0659 | 0.9810 | 0.9812 | 0.9811 | 0.9824 |
63
+ | No log | 2.99 | 384 | 0.0447 | 0.9857 | 0.9857 | 0.9857 | 0.9865 |
64
+ | 0.7525 | 3.99 | 512 | 0.0388 | 0.9870 | 0.9871 | 0.9871 | 0.9878 |
65
+ | 0.7525 | 4.99 | 640 | 0.0373 | 0.9871 | 0.9875 | 0.9873 | 0.9881 |
66
+ | 0.7525 | 5.99 | 768 | 0.0354 | 0.9880 | 0.9882 | 0.9881 | 0.9889 |
67
+ | 0.7525 | 6.99 | 896 | 0.0350 | 0.9883 | 0.9885 | 0.9884 | 0.9891 |
68
+ | 0.0318 | 7.99 | 1024 | 0.0354 | 0.9884 | 0.9886 | 0.9885 | 0.9891 |
69
+ | 0.0318 | 8.99 | 1152 | 0.0356 | 0.9888 | 0.9888 | 0.9888 | 0.9894 |
70
+ | 0.0318 | 9.99 | 1280 | 0.0367 | 0.9888 | 0.9889 | 0.9888 | 0.9895 |
71
+ | 0.0318 | 10.99 | 1408 | 0.0370 | 0.9887 | 0.9888 | 0.9887 | 0.9894 |
72
+ | 0.0205 | 11.99 | 1536 | 0.0370 | 0.9889 | 0.9891 | 0.9890 | 0.9896 |
73
+ | 0.0205 | 12.99 | 1664 | 0.0388 | 0.9888 | 0.9889 | 0.9888 | 0.9895 |
74
+ | 0.0205 | 13.99 | 1792 | 0.0397 | 0.9890 | 0.9891 | 0.9890 | 0.9897 |
75
+ | 0.0205 | 14.99 | 1920 | 0.0403 | 0.9891 | 0.9891 | 0.9891 | 0.9897 |
76
+ | 0.0146 | 15.99 | 2048 | 0.0413 | 0.9891 | 0.9891 | 0.9891 | 0.9897 |
77
+ | 0.0146 | 16.99 | 2176 | 0.0423 | 0.9891 | 0.9891 | 0.9891 | 0.9898 |
78
+ | 0.0146 | 17.99 | 2304 | 0.0429 | 0.9891 | 0.9891 | 0.9891 | 0.9897 |
79
+ | 0.0146 | 18.99 | 2432 | 0.0443 | 0.9893 | 0.9894 | 0.9893 | 0.9899 |
80
+ | 0.0103 | 19.99 | 2560 | 0.0457 | 0.9890 | 0.9889 | 0.9890 | 0.9896 |
81
+ | 0.0103 | 20.99 | 2688 | 0.0455 | 0.9891 | 0.9892 | 0.9891 | 0.9898 |
82
+ | 0.0103 | 21.99 | 2816 | 0.0468 | 0.9891 | 0.9892 | 0.9891 | 0.9898 |
83
+ | 0.0103 | 22.99 | 2944 | 0.0491 | 0.9891 | 0.9892 | 0.9892 | 0.9898 |
84
+ | 0.0073 | 23.99 | 3072 | 0.0495 | 0.9894 | 0.9894 | 0.9894 | 0.9900 |
85
+ | 0.0073 | 24.99 | 3200 | 0.0503 | 0.9892 | 0.9892 | 0.9892 | 0.9898 |
86
+ | 0.0073 | 25.99 | 3328 | 0.0519 | 0.9892 | 0.9892 | 0.9892 | 0.9898 |
87
+ | 0.0073 | 26.99 | 3456 | 0.0522 | 0.9892 | 0.9893 | 0.9892 | 0.9899 |
88
+ | 0.0052 | 27.99 | 3584 | 0.0526 | 0.9892 | 0.9892 | 0.9892 | 0.9899 |
89
+ | 0.0052 | 28.99 | 3712 | 0.0535 | 0.9892 | 0.9892 | 0.9892 | 0.9899 |
90
+ | 0.0052 | 29.99 | 3840 | 0.0544 | 0.9894 | 0.9894 | 0.9894 | 0.9900 |
91
+ | 0.0052 | 30.99 | 3968 | 0.0548 | 0.9893 | 0.9894 | 0.9894 | 0.9900 |
92
+ | 0.0038 | 31.99 | 4096 | 0.0563 | 0.9892 | 0.9892 | 0.9892 | 0.9899 |
93
+ | 0.0038 | 32.99 | 4224 | 0.0562 | 0.9894 | 0.9894 | 0.9894 | 0.9900 |
94
+ | 0.0038 | 33.99 | 4352 | 0.0577 | 0.9891 | 0.9892 | 0.9892 | 0.9898 |
95
+ | 0.0038 | 34.99 | 4480 | 0.0580 | 0.9895 | 0.9894 | 0.9894 | 0.9901 |
96
+ | 0.003 | 35.99 | 4608 | 0.0581 | 0.9893 | 0.9894 | 0.9894 | 0.9900 |
97
+ | 0.003 | 36.99 | 4736 | 0.0585 | 0.9893 | 0.9893 | 0.9893 | 0.9899 |
98
+ | 0.003 | 37.99 | 4864 | 0.0586 | 0.9893 | 0.9894 | 0.9893 | 0.9900 |
99
+ | 0.003 | 38.99 | 4992 | 0.0588 | 0.9893 | 0.9894 | 0.9894 | 0.9900 |
100
+ | 0.0024 | 39.99 | 5120 | 0.0589 | 0.9894 | 0.9894 | 0.9894 | 0.9900 |
101
+
102
+
103
+ ### Framework versions
104
+
105
+ - Transformers 4.25.1
106
+ - Pytorch 1.12.0
107
+ - Datasets 2.18.0
108
+ - Tokenizers 0.13.2
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "eval_accuracy": 0.9900658187453014,
4
+ "eval_f1": 0.9894462659525121,
5
+ "eval_loss": 0.05798300728201866,
6
+ "eval_precision": 0.989465880076756,
7
+ "eval_recall": 0.9894266526058723,
8
+ "eval_runtime": 18.9966,
9
+ "eval_samples": 437,
10
+ "eval_samples_per_second": 771.139,
11
+ "eval_steps_per_second": 3.053,
12
+ "train_loss": 0.08320926361484453,
13
+ "train_runtime": 4249.1875,
14
+ "train_samples": 131833,
15
+ "train_samples_per_second": 1241.018,
16
+ "train_steps_per_second": 1.205
17
+ }
config.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "xlm-roberta-base",
3
+ "architectures": [
4
+ "XLMRobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "pos",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "ART",
16
+ "1": "PWAV",
17
+ "2": "PIAT",
18
+ "3": "ADV",
19
+ "4": "KON",
20
+ "5": "VAPP",
21
+ "6": "ITJ",
22
+ "7": "$,",
23
+ "8": "PPOSAT",
24
+ "9": "VAINF",
25
+ "10": "PRELAT",
26
+ "11": "FM",
27
+ "12": "VVPP",
28
+ "13": "PWS",
29
+ "14": "VVIZU",
30
+ "15": "ADJD",
31
+ "16": "APZR",
32
+ "17": "NN",
33
+ "18": "TRUNC",
34
+ "19": "PTKA",
35
+ "20": "PROAV",
36
+ "21": "CARD",
37
+ "22": "PDS",
38
+ "23": "VMINF",
39
+ "24": "PRELS",
40
+ "25": "VVIMP",
41
+ "26": "PPOSS",
42
+ "27": "PDAT",
43
+ "28": "KOKOM",
44
+ "29": "PTKANT",
45
+ "30": "APPRART",
46
+ "31": "KOUI",
47
+ "32": "PIS",
48
+ "33": "PPER",
49
+ "34": "VVINF",
50
+ "35": "APPR",
51
+ "36": "KOUS",
52
+ "37": "PTKNEG",
53
+ "38": "PRF",
54
+ "39": "PWAT",
55
+ "40": "APPO",
56
+ "41": "$.",
57
+ "42": "$(",
58
+ "43": "PTKVZ",
59
+ "44": "VMFIN",
60
+ "45": "VMPP",
61
+ "46": "XY",
62
+ "47": "VAIMP",
63
+ "48": "ADJA",
64
+ "49": "VVFIN",
65
+ "50": "NE",
66
+ "51": "VAFIN",
67
+ "52": "PTKZU"
68
+ },
69
+ "initializer_range": 0.02,
70
+ "intermediate_size": 3072,
71
+ "label2id": {
72
+ "$(": 42,
73
+ "$,": 7,
74
+ "$.": 41,
75
+ "ADJA": 48,
76
+ "ADJD": 15,
77
+ "ADV": 3,
78
+ "APPO": 40,
79
+ "APPR": 35,
80
+ "APPRART": 30,
81
+ "APZR": 16,
82
+ "ART": 0,
83
+ "CARD": 21,
84
+ "FM": 11,
85
+ "ITJ": 6,
86
+ "KOKOM": 28,
87
+ "KON": 4,
88
+ "KOUI": 31,
89
+ "KOUS": 36,
90
+ "NE": 50,
91
+ "NN": 17,
92
+ "PDAT": 27,
93
+ "PDS": 22,
94
+ "PIAT": 2,
95
+ "PIS": 32,
96
+ "PPER": 33,
97
+ "PPOSAT": 8,
98
+ "PPOSS": 26,
99
+ "PRELAT": 10,
100
+ "PRELS": 24,
101
+ "PRF": 38,
102
+ "PROAV": 20,
103
+ "PTKA": 19,
104
+ "PTKANT": 29,
105
+ "PTKNEG": 37,
106
+ "PTKVZ": 43,
107
+ "PTKZU": 52,
108
+ "PWAT": 39,
109
+ "PWAV": 1,
110
+ "PWS": 13,
111
+ "TRUNC": 18,
112
+ "VAFIN": 51,
113
+ "VAIMP": 47,
114
+ "VAINF": 9,
115
+ "VAPP": 5,
116
+ "VMFIN": 44,
117
+ "VMINF": 23,
118
+ "VMPP": 45,
119
+ "VVFIN": 49,
120
+ "VVIMP": 25,
121
+ "VVINF": 34,
122
+ "VVIZU": 14,
123
+ "VVPP": 12,
124
+ "XY": 46
125
+ },
126
+ "layer_norm_eps": 1e-05,
127
+ "max_position_embeddings": 514,
128
+ "model_type": "xlm-roberta",
129
+ "num_attention_heads": 12,
130
+ "num_hidden_layers": 12,
131
+ "output_past": true,
132
+ "pad_token_id": 1,
133
+ "position_embedding_type": "absolute",
134
+ "torch_dtype": "float32",
135
+ "transformers_version": "4.25.1",
136
+ "type_vocab_size": 1,
137
+ "use_cache": true,
138
+ "vocab_size": 250002
139
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "eval_accuracy": 0.9900658187453014,
4
+ "eval_f1": 0.9894462659525121,
5
+ "eval_loss": 0.05798300728201866,
6
+ "eval_precision": 0.989465880076756,
7
+ "eval_recall": 0.9894266526058723,
8
+ "eval_runtime": 18.9966,
9
+ "eval_samples": 437,
10
+ "eval_samples_per_second": 771.139,
11
+ "eval_steps_per_second": 3.053
12
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a350051c0c5aa4a2de0ad5428149141a07bea1a180f07e9ed68a1598404b51b
3
+ size 1110046321
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c509a525eb51aebb33fb59c24ee923c1d4c1db23c3ae81fe05ccf354084f7b
3
+ size 17082758
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "__type": "AddedToken",
7
+ "content": "<mask>",
8
+ "lstrip": true,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "model_max_length": 512,
14
+ "name_or_path": "xlm-roberta-base",
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "special_tokens_map_file": null,
18
+ "token": null,
19
+ "tokenizer_class": "XLMRobertaTokenizer",
20
+ "unk_token": "<unk>"
21
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "train_loss": 0.08320926361484453,
4
+ "train_runtime": 4249.1875,
5
+ "train_samples": 131833,
6
+ "train_samples_per_second": 1241.018,
7
+ "train_steps_per_second": 1.205
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9894462659525121,
3
+ "best_model_checkpoint": "models/pos_final_xlm_de/checkpoint-4480",
4
+ "epoch": 39.994174757281556,
5
+ "global_step": 5120,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.99,
12
+ "eval_accuracy": 0.9195868301139938,
13
+ "eval_f1": 0.9132772848631359,
14
+ "eval_loss": 0.3827908933162689,
15
+ "eval_precision": 0.9159339498123846,
16
+ "eval_recall": 0.9106359866475313,
17
+ "eval_runtime": 21.0552,
18
+ "eval_samples_per_second": 695.744,
19
+ "eval_steps_per_second": 2.755,
20
+ "step": 128
21
+ },
22
+ {
23
+ "epoch": 1.99,
24
+ "eval_accuracy": 0.982432992221421,
25
+ "eval_f1": 0.9811226512328548,
26
+ "eval_loss": 0.06585212051868439,
27
+ "eval_precision": 0.9810370934113413,
28
+ "eval_recall": 0.9812082239789405,
29
+ "eval_runtime": 20.8303,
30
+ "eval_samples_per_second": 703.256,
31
+ "eval_steps_per_second": 2.784,
32
+ "step": 256
33
+ },
34
+ {
35
+ "epoch": 2.99,
36
+ "eval_accuracy": 0.9865217324598413,
37
+ "eval_f1": 0.9856564607376338,
38
+ "eval_loss": 0.04468955472111702,
39
+ "eval_precision": 0.9856525531240089,
40
+ "eval_recall": 0.9856603683822421,
41
+ "eval_runtime": 22.0421,
42
+ "eval_samples_per_second": 664.592,
43
+ "eval_steps_per_second": 2.631,
44
+ "step": 384
45
+ },
46
+ {
47
+ "epoch": 3.9,
48
+ "learning_rate": 4.9800000000000004e-05,
49
+ "loss": 0.7525,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 3.99,
54
+ "eval_accuracy": 0.9877989843354454,
55
+ "eval_f1": 0.9870567994418368,
56
+ "eval_loss": 0.038840390741825104,
57
+ "eval_precision": 0.986986371960646,
58
+ "eval_recall": 0.9871272369746034,
59
+ "eval_runtime": 19.2442,
60
+ "eval_samples_per_second": 761.216,
61
+ "eval_steps_per_second": 3.014,
62
+ "step": 512
63
+ },
64
+ {
65
+ "epoch": 4.99,
66
+ "eval_accuracy": 0.9880904892680158,
67
+ "eval_f1": 0.987278301765262,
68
+ "eval_loss": 0.03728002309799194,
69
+ "eval_precision": 0.9870924532264904,
70
+ "eval_recall": 0.9874642202998755,
71
+ "eval_runtime": 19.7895,
72
+ "eval_samples_per_second": 740.24,
73
+ "eval_steps_per_second": 2.931,
74
+ "step": 640
75
+ },
76
+ {
77
+ "epoch": 5.99,
78
+ "eval_accuracy": 0.9888691142852759,
79
+ "eval_f1": 0.9881274230760082,
80
+ "eval_loss": 0.03543518856167793,
81
+ "eval_precision": 0.9880334223857051,
82
+ "eval_recall": 0.9882214416543106,
83
+ "eval_runtime": 20.2566,
84
+ "eval_samples_per_second": 723.172,
85
+ "eval_steps_per_second": 2.863,
86
+ "step": 768
87
+ },
88
+ {
89
+ "epoch": 6.99,
90
+ "eval_accuracy": 0.9891414412617561,
91
+ "eval_f1": 0.9883990811231861,
92
+ "eval_loss": 0.03501536697149277,
93
+ "eval_precision": 0.988295262217043,
94
+ "eval_recall": 0.9885029218436556,
95
+ "eval_runtime": 20.0491,
96
+ "eval_samples_per_second": 730.658,
97
+ "eval_steps_per_second": 2.893,
98
+ "step": 896
99
+ },
100
+ {
101
+ "epoch": 7.81,
102
+ "learning_rate": 4.462121212121213e-05,
103
+ "loss": 0.0318,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 7.99,
108
+ "eval_accuracy": 0.9890839073934856,
109
+ "eval_f1": 0.9884860173195352,
110
+ "eval_loss": 0.03539884835481644,
111
+ "eval_precision": 0.9884056936962649,
112
+ "eval_recall": 0.988566353999001,
113
+ "eval_runtime": 19.1691,
114
+ "eval_samples_per_second": 764.201,
115
+ "eval_steps_per_second": 3.026,
116
+ "step": 1024
117
+ },
118
+ {
119
+ "epoch": 8.99,
120
+ "eval_accuracy": 0.989383083508492,
121
+ "eval_f1": 0.9887981223821485,
122
+ "eval_loss": 0.03559485822916031,
123
+ "eval_precision": 0.9888118431120679,
124
+ "eval_recall": 0.9887844020330006,
125
+ "eval_runtime": 19.05,
126
+ "eval_samples_per_second": 768.977,
127
+ "eval_steps_per_second": 3.045,
128
+ "step": 1152
129
+ },
130
+ {
131
+ "epoch": 9.99,
132
+ "eval_accuracy": 0.9894904800625969,
133
+ "eval_f1": 0.9888288531140862,
134
+ "eval_loss": 0.036680448800325394,
135
+ "eval_precision": 0.9887543801233569,
136
+ "eval_recall": 0.9889033373242732,
137
+ "eval_runtime": 20.8166,
138
+ "eval_samples_per_second": 703.718,
139
+ "eval_steps_per_second": 2.786,
140
+ "step": 1280
141
+ },
142
+ {
143
+ "epoch": 10.99,
144
+ "eval_accuracy": 0.9893677411436199,
145
+ "eval_f1": 0.9887373190775782,
146
+ "eval_loss": 0.03701608628034592,
147
+ "eval_precision": 0.9886942043922937,
148
+ "eval_recall": 0.9887804375232915,
149
+ "eval_runtime": 19.5262,
150
+ "eval_samples_per_second": 750.222,
151
+ "eval_steps_per_second": 2.97,
152
+ "step": 1408
153
+ },
154
+ {
155
+ "epoch": 11.71,
156
+ "learning_rate": 3.9209956709956716e-05,
157
+ "loss": 0.0205,
158
+ "step": 1500
159
+ },
160
+ {
161
+ "epoch": 11.99,
162
+ "eval_accuracy": 0.9896170545727918,
163
+ "eval_f1": 0.9890034963648904,
164
+ "eval_loss": 0.03703853860497475,
165
+ "eval_precision": 0.9889094123336214,
166
+ "eval_recall": 0.9890975983000182,
167
+ "eval_runtime": 18.6632,
168
+ "eval_samples_per_second": 784.912,
169
+ "eval_steps_per_second": 3.108,
170
+ "step": 1536
171
+ },
172
+ {
173
+ "epoch": 12.99,
174
+ "eval_accuracy": 0.9894828088801608,
175
+ "eval_f1": 0.9888483647175421,
176
+ "eval_loss": 0.03879130259156227,
177
+ "eval_precision": 0.9888013256059176,
178
+ "eval_recall": 0.9888954083048549,
179
+ "eval_runtime": 18.564,
180
+ "eval_samples_per_second": 789.109,
181
+ "eval_steps_per_second": 3.124,
182
+ "step": 1664
183
+ },
184
+ {
185
+ "epoch": 13.99,
186
+ "eval_accuracy": 0.9896592460761902,
187
+ "eval_f1": 0.989038521766364,
188
+ "eval_loss": 0.03970788046717644,
189
+ "eval_precision": 0.9890032348090828,
190
+ "eval_recall": 0.9890738112417637,
191
+ "eval_runtime": 19.0522,
192
+ "eval_samples_per_second": 768.889,
193
+ "eval_steps_per_second": 3.044,
194
+ "step": 1792
195
+ },
196
+ {
197
+ "epoch": 14.99,
198
+ "eval_accuracy": 0.9896784240322803,
199
+ "eval_f1": 0.989059827026727,
200
+ "eval_loss": 0.040303945541381836,
201
+ "eval_precision": 0.989069630064266,
202
+ "eval_recall": 0.9890500241835092,
203
+ "eval_runtime": 18.8982,
204
+ "eval_samples_per_second": 775.155,
205
+ "eval_steps_per_second": 3.069,
206
+ "step": 1920
207
+ },
208
+ {
209
+ "epoch": 15.62,
210
+ "learning_rate": 3.3798701298701305e-05,
211
+ "loss": 0.0146,
212
+ "step": 2000
213
+ },
214
+ {
215
+ "epoch": 15.99,
216
+ "eval_accuracy": 0.9897052731708066,
217
+ "eval_f1": 0.9891171188315472,
218
+ "eval_loss": 0.04127529263496399,
219
+ "eval_precision": 0.9891445699061152,
220
+ "eval_recall": 0.9890896692806,
221
+ "eval_runtime": 18.6421,
222
+ "eval_samples_per_second": 785.8,
223
+ "eval_steps_per_second": 3.111,
224
+ "step": 2048
225
+ },
226
+ {
227
+ "epoch": 16.99,
228
+ "eval_accuracy": 0.989758971447859,
229
+ "eval_f1": 0.9890990835617984,
230
+ "eval_loss": 0.04227915033698082,
231
+ "eval_precision": 0.9891441825426719,
232
+ "eval_recall": 0.9890539886932183,
233
+ "eval_runtime": 18.5222,
234
+ "eval_samples_per_second": 790.89,
235
+ "eval_steps_per_second": 3.131,
236
+ "step": 2176
237
+ },
238
+ {
239
+ "epoch": 17.99,
240
+ "eval_accuracy": 0.9897436290829869,
241
+ "eval_f1": 0.9891194678073816,
242
+ "eval_loss": 0.04291819408535957,
243
+ "eval_precision": 0.9891135858167388,
244
+ "eval_recall": 0.9891253498679818,
245
+ "eval_runtime": 19.3021,
246
+ "eval_samples_per_second": 758.935,
247
+ "eval_steps_per_second": 3.005,
248
+ "step": 2304
249
+ },
250
+ {
251
+ "epoch": 18.99,
252
+ "eval_accuracy": 0.9899277374614524,
253
+ "eval_f1": 0.9893141089045129,
254
+ "eval_loss": 0.04433906078338623,
255
+ "eval_precision": 0.9892729298062706,
256
+ "eval_recall": 0.9893552914311087,
257
+ "eval_runtime": 20.0023,
258
+ "eval_samples_per_second": 732.367,
259
+ "eval_steps_per_second": 2.9,
260
+ "step": 2432
261
+ },
262
+ {
263
+ "epoch": 19.53,
264
+ "learning_rate": 2.838744588744589e-05,
265
+ "loss": 0.0103,
266
+ "step": 2500
267
+ },
268
+ {
269
+ "epoch": 19.99,
270
+ "eval_accuracy": 0.9895978766167017,
271
+ "eval_f1": 0.9889502105288197,
272
+ "eval_loss": 0.04566018655896187,
273
+ "eval_precision": 0.9890129497315686,
274
+ "eval_recall": 0.9888874792854367,
275
+ "eval_runtime": 18.9384,
276
+ "eval_samples_per_second": 773.508,
277
+ "eval_steps_per_second": 3.063,
278
+ "step": 2560
279
+ },
280
+ {
281
+ "epoch": 20.99,
282
+ "eval_accuracy": 0.9898049985424754,
283
+ "eval_f1": 0.9891496101074732,
284
+ "eval_loss": 0.04549423232674599,
285
+ "eval_precision": 0.9891064774439071,
286
+ "eval_recall": 0.9891927465330362,
287
+ "eval_runtime": 18.8835,
288
+ "eval_samples_per_second": 775.757,
289
+ "eval_steps_per_second": 3.071,
290
+ "step": 2688
291
+ },
292
+ {
293
+ "epoch": 21.99,
294
+ "eval_accuracy": 0.9897704782215131,
295
+ "eval_f1": 0.9891315336173181,
296
+ "eval_loss": 0.04684610292315483,
297
+ "eval_precision": 0.989109966739214,
298
+ "eval_recall": 0.9891531014359454,
299
+ "eval_runtime": 18.9876,
300
+ "eval_samples_per_second": 771.504,
301
+ "eval_steps_per_second": 3.055,
302
+ "step": 2816
303
+ },
304
+ {
305
+ "epoch": 22.99,
306
+ "eval_accuracy": 0.9898471900458736,
307
+ "eval_f1": 0.9891676858093711,
308
+ "eval_loss": 0.049145638942718506,
309
+ "eval_precision": 0.9891029884528939,
310
+ "eval_recall": 0.9892323916301271,
311
+ "eval_runtime": 18.5499,
312
+ "eval_samples_per_second": 789.708,
313
+ "eval_steps_per_second": 3.127,
314
+ "step": 2944
315
+ },
316
+ {
317
+ "epoch": 23.43,
318
+ "learning_rate": 2.2976190476190476e-05,
319
+ "loss": 0.0073,
320
+ "step": 3000
321
+ },
322
+ {
323
+ "epoch": 23.99,
324
+ "eval_accuracy": 0.9899622577824145,
325
+ "eval_f1": 0.9894013665041952,
326
+ "eval_loss": 0.04954079911112785,
327
+ "eval_precision": 0.9893562619667725,
328
+ "eval_recall": 0.9894464751544176,
329
+ "eval_runtime": 19.048,
330
+ "eval_samples_per_second": 769.056,
331
+ "eval_steps_per_second": 3.045,
332
+ "step": 3072
333
+ },
334
+ {
335
+ "epoch": 24.99,
336
+ "eval_accuracy": 0.9897858205863852,
337
+ "eval_f1": 0.9891747989478087,
338
+ "eval_loss": 0.05031678453087807,
339
+ "eval_precision": 0.9891846031248885,
340
+ "eval_recall": 0.9891649949650727,
341
+ "eval_runtime": 18.6731,
342
+ "eval_samples_per_second": 784.498,
343
+ "eval_steps_per_second": 3.106,
344
+ "step": 3200
345
+ },
346
+ {
347
+ "epoch": 25.99,
348
+ "eval_accuracy": 0.9898126697249114,
349
+ "eval_f1": 0.9892364414843007,
350
+ "eval_loss": 0.05185426026582718,
351
+ "eval_precision": 0.9892285979337303,
352
+ "eval_recall": 0.9892442851592543,
353
+ "eval_runtime": 19.572,
354
+ "eval_samples_per_second": 748.468,
355
+ "eval_steps_per_second": 2.963,
356
+ "step": 3328
357
+ },
358
+ {
359
+ "epoch": 26.99,
360
+ "eval_accuracy": 0.9898548612283097,
361
+ "eval_f1": 0.9892308576661506,
362
+ "eval_loss": 0.05215698853135109,
363
+ "eval_precision": 0.9891896820331485,
364
+ "eval_recall": 0.989272036727218,
365
+ "eval_runtime": 19.399,
366
+ "eval_samples_per_second": 755.14,
367
+ "eval_steps_per_second": 2.99,
368
+ "step": 3456
369
+ },
370
+ {
371
+ "epoch": 27.34,
372
+ "learning_rate": 1.7564935064935065e-05,
373
+ "loss": 0.0052,
374
+ "step": 3500
375
+ },
376
+ {
377
+ "epoch": 27.99,
378
+ "eval_accuracy": 0.9898702035931819,
379
+ "eval_f1": 0.9892028504316283,
380
+ "eval_loss": 0.05260332301259041,
381
+ "eval_precision": 0.9891852033919135,
382
+ "eval_recall": 0.9892204981009999,
383
+ "eval_runtime": 19.6509,
384
+ "eval_samples_per_second": 745.461,
385
+ "eval_steps_per_second": 2.952,
386
+ "step": 3584
387
+ },
388
+ {
389
+ "epoch": 28.99,
390
+ "eval_accuracy": 0.9898663680019638,
391
+ "eval_f1": 0.9891989716326818,
392
+ "eval_loss": 0.05352339521050453,
393
+ "eval_precision": 0.9891734819683569,
394
+ "eval_recall": 0.989224462610709,
395
+ "eval_runtime": 19.4431,
396
+ "eval_samples_per_second": 753.429,
397
+ "eval_steps_per_second": 2.983,
398
+ "step": 3712
399
+ },
400
+ {
401
+ "epoch": 29.99,
402
+ "eval_accuracy": 0.9900121204682489,
403
+ "eval_f1": 0.9893592137553174,
404
+ "eval_loss": 0.054358094930648804,
405
+ "eval_precision": 0.9893631361106265,
406
+ "eval_recall": 0.9893552914311087,
407
+ "eval_runtime": 18.4688,
408
+ "eval_samples_per_second": 793.177,
409
+ "eval_steps_per_second": 3.14,
410
+ "step": 3840
411
+ },
412
+ {
413
+ "epoch": 30.99,
414
+ "eval_accuracy": 0.9899660933736326,
415
+ "eval_f1": 0.9893714865647028,
416
+ "eval_loss": 0.05478381738066673,
417
+ "eval_precision": 0.989340109572098,
418
+ "eval_recall": 0.9894028655476177,
419
+ "eval_runtime": 18.761,
420
+ "eval_samples_per_second": 780.821,
421
+ "eval_steps_per_second": 3.092,
422
+ "step": 3968
423
+ },
424
+ {
425
+ "epoch": 31.25,
426
+ "learning_rate": 1.2153679653679655e-05,
427
+ "loss": 0.0038,
428
+ "step": 4000
429
+ },
430
+ {
431
+ "epoch": 31.99,
432
+ "eval_accuracy": 0.9898702035931819,
433
+ "eval_f1": 0.989232477006026,
434
+ "eval_loss": 0.05625994876027107,
435
+ "eval_precision": 0.9892246334868896,
436
+ "eval_recall": 0.9892403206495453,
437
+ "eval_runtime": 19.3617,
438
+ "eval_samples_per_second": 756.597,
439
+ "eval_steps_per_second": 2.996,
440
+ "step": 4096
441
+ },
442
+ {
443
+ "epoch": 32.99,
444
+ "eval_accuracy": 0.9900466407892112,
445
+ "eval_f1": 0.9894185977362381,
446
+ "eval_loss": 0.05615779384970665,
447
+ "eval_precision": 0.9894303656950744,
448
+ "eval_recall": 0.9894068300573268,
449
+ "eval_runtime": 18.6932,
450
+ "eval_samples_per_second": 783.655,
451
+ "eval_steps_per_second": 3.103,
452
+ "step": 4224
453
+ },
454
+ {
455
+ "epoch": 33.99,
456
+ "eval_accuracy": 0.9898318476810015,
457
+ "eval_f1": 0.989171306638546,
458
+ "eval_loss": 0.057734012603759766,
459
+ "eval_precision": 0.9891379754613387,
460
+ "eval_recall": 0.9892046400621635,
461
+ "eval_runtime": 18.8622,
462
+ "eval_samples_per_second": 776.632,
463
+ "eval_steps_per_second": 3.075,
464
+ "step": 4352
465
+ },
466
+ {
467
+ "epoch": 34.99,
468
+ "eval_accuracy": 0.9900658187453014,
469
+ "eval_f1": 0.9894462659525121,
470
+ "eval_loss": 0.05798300728201866,
471
+ "eval_precision": 0.989465880076756,
472
+ "eval_recall": 0.9894266526058723,
473
+ "eval_runtime": 19.4979,
474
+ "eval_samples_per_second": 751.311,
475
+ "eval_steps_per_second": 2.975,
476
+ "step": 4480
477
+ },
478
+ {
479
+ "epoch": 35.16,
480
+ "learning_rate": 6.742424242424243e-06,
481
+ "loss": 0.003,
482
+ "step": 4500
483
+ },
484
+ {
485
+ "epoch": 35.99,
486
+ "eval_accuracy": 0.9899776001472868,
487
+ "eval_f1": 0.9893636842960725,
488
+ "eval_loss": 0.05809687077999115,
489
+ "eval_precision": 0.9893205422976294,
490
+ "eval_recall": 0.9894068300573268,
491
+ "eval_runtime": 19.0851,
492
+ "eval_samples_per_second": 767.563,
493
+ "eval_steps_per_second": 3.039,
494
+ "step": 4608
495
+ },
496
+ {
497
+ "epoch": 36.99,
498
+ "eval_accuracy": 0.989889381549272,
499
+ "eval_f1": 0.9892901530063094,
500
+ "eval_loss": 0.058496102690696716,
501
+ "eval_precision": 0.9892646609924242,
502
+ "eval_recall": 0.9893156463340179,
503
+ "eval_runtime": 19.0831,
504
+ "eval_samples_per_second": 767.642,
505
+ "eval_steps_per_second": 3.039,
506
+ "step": 4736
507
+ },
508
+ {
509
+ "epoch": 37.99,
510
+ "eval_accuracy": 0.9899699289648506,
511
+ "eval_f1": 0.9893475735699306,
512
+ "eval_loss": 0.05856472626328468,
513
+ "eval_precision": 0.9893279629570898,
514
+ "eval_recall": 0.989367184960236,
515
+ "eval_runtime": 19.3383,
516
+ "eval_samples_per_second": 757.512,
517
+ "eval_steps_per_second": 2.999,
518
+ "step": 4864
519
+ },
520
+ {
521
+ "epoch": 38.99,
522
+ "eval_accuracy": 0.9899737645560687,
523
+ "eval_f1": 0.9893515802159814,
524
+ "eval_loss": 0.05881791561841965,
525
+ "eval_precision": 0.9893280475718533,
526
+ "eval_recall": 0.9893751139796542,
527
+ "eval_runtime": 19.2023,
528
+ "eval_samples_per_second": 762.879,
529
+ "eval_steps_per_second": 3.02,
530
+ "step": 4992
531
+ },
532
+ {
533
+ "epoch": 39.06,
534
+ "learning_rate": 1.3311688311688312e-06,
535
+ "loss": 0.0024,
536
+ "step": 5000
537
+ },
538
+ {
539
+ "epoch": 39.99,
540
+ "eval_accuracy": 0.9899891069209408,
541
+ "eval_f1": 0.9893752824668374,
542
+ "eval_loss": 0.058905407786369324,
543
+ "eval_precision": 0.9893595934127796,
544
+ "eval_recall": 0.9893909720184905,
545
+ "eval_runtime": 18.6772,
546
+ "eval_samples_per_second": 784.326,
547
+ "eval_steps_per_second": 3.105,
548
+ "step": 5120
549
+ },
550
+ {
551
+ "epoch": 39.99,
552
+ "step": 5120,
553
+ "total_flos": 3.2246769193641984e+17,
554
+ "train_loss": 0.08320926361484453,
555
+ "train_runtime": 4249.1875,
556
+ "train_samples_per_second": 1241.018,
557
+ "train_steps_per_second": 1.205
558
+ }
559
+ ],
560
+ "max_steps": 5120,
561
+ "num_train_epochs": 40,
562
+ "total_flos": 3.2246769193641984e+17,
563
+ "trial_name": null,
564
+ "trial_params": null
565
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf69f2cdb60730064aaa3d1b29fee1e13e162d0afe04f46a7ba422163b930418
3
+ size 3439