pranaydeeps commited on
Commit
70df694
·
verified ·
1 Parent(s): acf1cca

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - precision
7
+ - recall
8
+ - f1
9
+ - accuracy
10
+ model-index:
11
+ - name: pos_final_mono_de
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # pos_final_mono_de
19
+
20
+ This model is a fine-tuned version of [dbmdz/bert-base-german-cased](https://huggingface.co/dbmdz/bert-base-german-cased) on the None dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - Loss: 0.1567
23
+ - Precision: 0.9771
24
+ - Recall: 0.9791
25
+ - F1: 0.9781
26
+ - Accuracy: 0.9810
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-05
46
+ - train_batch_size: 256
47
+ - eval_batch_size: 256
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 4
50
+ - total_train_batch_size: 1024
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_steps: 500
54
+ - num_epochs: 40.0
55
+ - mixed_precision_training: Native AMP
56
+
57
+ ### Training results
58
+
59
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
60
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
61
+ | No log | 0.99 | 128 | 0.2357 | 0.9443 | 0.9413 | 0.9428 | 0.9475 |
62
+ | No log | 1.99 | 256 | 0.0513 | 0.9843 | 0.9842 | 0.9842 | 0.9853 |
63
+ | No log | 2.99 | 384 | 0.0406 | 0.9868 | 0.9866 | 0.9867 | 0.9875 |
64
+ | 0.6822 | 3.99 | 512 | 0.0365 | 0.9877 | 0.9877 | 0.9877 | 0.9885 |
65
+ | 0.6822 | 4.99 | 640 | 0.0352 | 0.9881 | 0.9882 | 0.9882 | 0.9890 |
66
+ | 0.6822 | 5.99 | 768 | 0.0345 | 0.9887 | 0.9887 | 0.9887 | 0.9895 |
67
+ | 0.6822 | 6.99 | 896 | 0.0353 | 0.9888 | 0.9888 | 0.9888 | 0.9896 |
68
+ | 0.024 | 7.99 | 1024 | 0.0371 | 0.9886 | 0.9888 | 0.9887 | 0.9895 |
69
+ | 0.024 | 8.99 | 1152 | 0.0387 | 0.9888 | 0.9888 | 0.9888 | 0.9896 |
70
+ | 0.024 | 9.99 | 1280 | 0.0402 | 0.9890 | 0.9889 | 0.9890 | 0.9898 |
71
+ | 0.024 | 10.99 | 1408 | 0.0429 | 0.9889 | 0.9890 | 0.9889 | 0.9897 |
72
+ | 0.0128 | 11.99 | 1536 | 0.0454 | 0.9889 | 0.9889 | 0.9889 | 0.9896 |
73
+ | 0.0128 | 12.99 | 1664 | 0.0461 | 0.9889 | 0.9889 | 0.9889 | 0.9897 |
74
+ | 0.0128 | 13.99 | 1792 | 0.0477 | 0.9892 | 0.9891 | 0.9891 | 0.9899 |
75
+ | 0.0128 | 14.99 | 1920 | 0.0507 | 0.9890 | 0.9891 | 0.9890 | 0.9898 |
76
+ | 0.0069 | 15.99 | 2048 | 0.0514 | 0.9893 | 0.9893 | 0.9893 | 0.9901 |
77
+ | 0.0069 | 16.99 | 2176 | 0.0530 | 0.9892 | 0.9892 | 0.9892 | 0.9899 |
78
+ | 0.0069 | 17.99 | 2304 | 0.0552 | 0.9890 | 0.9891 | 0.9891 | 0.9898 |
79
+ | 0.0069 | 18.99 | 2432 | 0.0567 | 0.9891 | 0.9892 | 0.9892 | 0.9898 |
80
+ | 0.0037 | 19.99 | 2560 | 0.0577 | 0.9892 | 0.9893 | 0.9892 | 0.9900 |
81
+ | 0.0037 | 20.99 | 2688 | 0.0592 | 0.9892 | 0.9893 | 0.9893 | 0.9899 |
82
+ | 0.0037 | 21.99 | 2816 | 0.0606 | 0.9893 | 0.9893 | 0.9893 | 0.9900 |
83
+ | 0.0037 | 22.99 | 2944 | 0.0628 | 0.9893 | 0.9893 | 0.9893 | 0.9900 |
84
+ | 0.0023 | 23.99 | 3072 | 0.0629 | 0.9892 | 0.9891 | 0.9891 | 0.9899 |
85
+ | 0.0023 | 24.99 | 3200 | 0.0625 | 0.9892 | 0.9893 | 0.9893 | 0.9900 |
86
+ | 0.0023 | 25.99 | 3328 | 0.0636 | 0.9893 | 0.9893 | 0.9893 | 0.9900 |
87
+ | 0.0023 | 26.99 | 3456 | 0.0650 | 0.9894 | 0.9894 | 0.9894 | 0.9901 |
88
+ | 0.0017 | 27.99 | 3584 | 0.0644 | 0.9894 | 0.9894 | 0.9894 | 0.9901 |
89
+ | 0.0017 | 28.99 | 3712 | 0.0656 | 0.9895 | 0.9895 | 0.9895 | 0.9901 |
90
+ | 0.0017 | 29.99 | 3840 | 0.0668 | 0.9895 | 0.9895 | 0.9895 | 0.9902 |
91
+ | 0.0017 | 30.99 | 3968 | 0.0666 | 0.9895 | 0.9894 | 0.9894 | 0.9901 |
92
+ | 0.0011 | 31.99 | 4096 | 0.0678 | 0.9894 | 0.9894 | 0.9894 | 0.9900 |
93
+ | 0.0011 | 32.99 | 4224 | 0.0685 | 0.9896 | 0.9896 | 0.9896 | 0.9902 |
94
+ | 0.0011 | 33.99 | 4352 | 0.0692 | 0.9894 | 0.9894 | 0.9894 | 0.9901 |
95
+ | 0.0011 | 34.99 | 4480 | 0.0698 | 0.9895 | 0.9895 | 0.9895 | 0.9902 |
96
+ | 0.0009 | 35.99 | 4608 | 0.0698 | 0.9894 | 0.9894 | 0.9894 | 0.9901 |
97
+ | 0.0009 | 36.99 | 4736 | 0.0695 | 0.9895 | 0.9895 | 0.9895 | 0.9902 |
98
+ | 0.0009 | 37.99 | 4864 | 0.0696 | 0.9894 | 0.9895 | 0.9894 | 0.9902 |
99
+ | 0.0009 | 38.99 | 4992 | 0.0699 | 0.9895 | 0.9895 | 0.9895 | 0.9902 |
100
+ | 0.0007 | 39.99 | 5120 | 0.0697 | 0.9894 | 0.9894 | 0.9894 | 0.9901 |
101
+
102
+
103
+ ### Framework versions
104
+
105
+ - Transformers 4.25.1
106
+ - Pytorch 1.12.0
107
+ - Datasets 2.18.0
108
+ - Tokenizers 0.13.2
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "eval_accuracy": 0.9810301218670959,
4
+ "eval_f1": 0.9781145801758109,
5
+ "eval_loss": 0.1567157655954361,
6
+ "eval_precision": 0.9771075581395349,
7
+ "eval_recall": 0.9791236800582594,
8
+ "eval_runtime": 1.068,
9
+ "eval_samples": 437,
10
+ "eval_samples_per_second": 409.172,
11
+ "eval_steps_per_second": 1.873,
12
+ "train_loss": 0.07192220802244265,
13
+ "train_runtime": 4057.7347,
14
+ "train_samples": 131833,
15
+ "train_samples_per_second": 1299.572,
16
+ "train_steps_per_second": 1.262
17
+ }
config.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dbmdz/bert-base-german-cased",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "finetuning_task": "pos",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "ADV",
14
+ "1": "VMINF",
15
+ "2": "APPO",
16
+ "3": "CARD",
17
+ "4": "PTKVZ",
18
+ "5": "PDAT",
19
+ "6": "PTKZU",
20
+ "7": "PRELS",
21
+ "8": "ITJ",
22
+ "9": "APPR",
23
+ "10": "PIAT",
24
+ "11": "NN",
25
+ "12": "PWS",
26
+ "13": "VVINF",
27
+ "14": "APPRART",
28
+ "15": "VAPP",
29
+ "16": "APZR",
30
+ "17": "KOKOM",
31
+ "18": "$,",
32
+ "19": "PDS",
33
+ "20": "VAIMP",
34
+ "21": "PTKANT",
35
+ "22": "PRF",
36
+ "23": "PWAV",
37
+ "24": "KON",
38
+ "25": "VVPP",
39
+ "26": "PPOSS",
40
+ "27": "VVFIN",
41
+ "28": "PTKNEG",
42
+ "29": "ART",
43
+ "30": "VMFIN",
44
+ "31": "FW",
45
+ "32": "PPER",
46
+ "33": "$",
47
+ "34": "VAINF",
48
+ "35": "PTKA",
49
+ "36": "$.",
50
+ "37": "ADJA",
51
+ "38": "XY",
52
+ "39": "KOUS",
53
+ "40": "PPOSAT",
54
+ "41": "VAFIN",
55
+ "42": "FM",
56
+ "43": "PIS",
57
+ "44": "VVIZU",
58
+ "45": "ADJD",
59
+ "46": "KOUI",
60
+ "47": "PROAV",
61
+ "48": "PRELAT",
62
+ "49": "VMPP",
63
+ "50": "VVIMP",
64
+ "51": "PWAT",
65
+ "52": "TRUNC",
66
+ "53": "NE"
67
+ },
68
+ "initializer_range": 0.02,
69
+ "intermediate_size": 3072,
70
+ "label2id": {
71
+ "$": 33,
72
+ "$,": 18,
73
+ "$.": 36,
74
+ "ADJA": 37,
75
+ "ADJD": 45,
76
+ "ADV": 0,
77
+ "APPO": 2,
78
+ "APPR": 9,
79
+ "APPRART": 14,
80
+ "APZR": 16,
81
+ "ART": 29,
82
+ "CARD": 3,
83
+ "FM": 42,
84
+ "FW": 31,
85
+ "ITJ": 8,
86
+ "KOKOM": 17,
87
+ "KON": 24,
88
+ "KOUI": 46,
89
+ "KOUS": 39,
90
+ "NE": 53,
91
+ "NN": 11,
92
+ "PDAT": 5,
93
+ "PDS": 19,
94
+ "PIAT": 10,
95
+ "PIS": 43,
96
+ "PPER": 32,
97
+ "PPOSAT": 40,
98
+ "PPOSS": 26,
99
+ "PRELAT": 48,
100
+ "PRELS": 7,
101
+ "PRF": 22,
102
+ "PROAV": 47,
103
+ "PTKA": 35,
104
+ "PTKANT": 21,
105
+ "PTKNEG": 28,
106
+ "PTKVZ": 4,
107
+ "PTKZU": 6,
108
+ "PWAT": 51,
109
+ "PWAV": 23,
110
+ "PWS": 12,
111
+ "TRUNC": 52,
112
+ "VAFIN": 41,
113
+ "VAIMP": 20,
114
+ "VAINF": 34,
115
+ "VAPP": 15,
116
+ "VMFIN": 30,
117
+ "VMINF": 1,
118
+ "VMPP": 49,
119
+ "VVFIN": 27,
120
+ "VVIMP": 50,
121
+ "VVINF": 13,
122
+ "VVIZU": 44,
123
+ "VVPP": 25,
124
+ "XY": 38
125
+ },
126
+ "layer_norm_eps": 1e-12,
127
+ "max_position_embeddings": 512,
128
+ "model_type": "bert",
129
+ "num_attention_heads": 12,
130
+ "num_hidden_layers": 12,
131
+ "output_past": true,
132
+ "pad_token_id": 0,
133
+ "position_embedding_type": "absolute",
134
+ "torch_dtype": "float32",
135
+ "transformers_version": "4.25.1",
136
+ "type_vocab_size": 2,
137
+ "use_cache": true,
138
+ "vocab_size": 31102
139
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "eval_accuracy": 0.9810301218670959,
4
+ "eval_f1": 0.9781145801758109,
5
+ "eval_loss": 0.1567157655954361,
6
+ "eval_precision": 0.9771075581395349,
7
+ "eval_recall": 0.9791236800582594,
8
+ "eval_runtime": 1.068,
9
+ "eval_samples": 437,
10
+ "eval_samples_per_second": 409.172,
11
+ "eval_steps_per_second": 1.873
12
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c56e41223a9c0bf47efb09c096f4589a5af4ea4a1e4549dea5919e47dce0c69
3
+ size 437584881
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "max_len": 512,
7
+ "model_max_length": 512,
8
+ "name_or_path": "dbmdz/bert-base-german-cased",
9
+ "never_split": null,
10
+ "pad_token": "[PAD]",
11
+ "sep_token": "[SEP]",
12
+ "special_tokens_map_file": null,
13
+ "strip_accents": null,
14
+ "token": null,
15
+ "tokenize_chinese_chars": true,
16
+ "tokenizer_class": "BertTokenizer",
17
+ "unk_token": "[UNK]"
18
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "train_loss": 0.07192220802244265,
4
+ "train_runtime": 4057.7347,
5
+ "train_samples": 131833,
6
+ "train_samples_per_second": 1299.572,
7
+ "train_steps_per_second": 1.262
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9895929814239887,
3
+ "best_model_checkpoint": "models/pos_final_mono_de/checkpoint-4224",
4
+ "epoch": 39.994174757281556,
5
+ "global_step": 5120,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.99,
12
+ "eval_accuracy": 0.9475099341812547,
13
+ "eval_f1": 0.9428053278974075,
14
+ "eval_loss": 0.235727921128273,
15
+ "eval_precision": 0.9442734211134948,
16
+ "eval_recall": 0.941341792581462,
17
+ "eval_runtime": 18.99,
18
+ "eval_samples_per_second": 771.406,
19
+ "eval_steps_per_second": 3.054,
20
+ "step": 128
21
+ },
22
+ {
23
+ "epoch": 1.99,
24
+ "eval_accuracy": 0.9852943432700717,
25
+ "eval_f1": 0.9842332493182053,
26
+ "eval_loss": 0.05128009244799614,
27
+ "eval_precision": 0.9842997713944935,
28
+ "eval_recall": 0.9841667362328519,
29
+ "eval_runtime": 19.2922,
30
+ "eval_samples_per_second": 759.323,
31
+ "eval_steps_per_second": 3.006,
32
+ "step": 256
33
+ },
34
+ {
35
+ "epoch": 2.99,
36
+ "eval_accuracy": 0.9875228217677473,
37
+ "eval_f1": 0.9867158568898448,
38
+ "eval_loss": 0.04063262417912483,
39
+ "eval_precision": 0.9867884320258268,
40
+ "eval_recall": 0.9866432924284164,
41
+ "eval_runtime": 19.7655,
42
+ "eval_samples_per_second": 741.14,
43
+ "eval_steps_per_second": 2.934,
44
+ "step": 384
45
+ },
46
+ {
47
+ "epoch": 3.9,
48
+ "learning_rate": 5e-05,
49
+ "loss": 0.6822,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 3.99,
54
+ "eval_accuracy": 0.9884893907546909,
55
+ "eval_f1": 0.9876851402812782,
56
+ "eval_loss": 0.036450713872909546,
57
+ "eval_precision": 0.9876576580157648,
58
+ "eval_recall": 0.9877126240762605,
59
+ "eval_runtime": 18.3474,
60
+ "eval_samples_per_second": 798.424,
61
+ "eval_steps_per_second": 3.161,
62
+ "step": 512
63
+ },
64
+ {
65
+ "epoch": 4.99,
66
+ "eval_accuracy": 0.9889918532042529,
67
+ "eval_f1": 0.9881764176274528,
68
+ "eval_loss": 0.03515882417559624,
69
+ "eval_precision": 0.9881194651573207,
70
+ "eval_recall": 0.9882333766631287,
71
+ "eval_runtime": 19.0555,
72
+ "eval_samples_per_second": 768.756,
73
+ "eval_steps_per_second": 3.044,
74
+ "step": 640
75
+ },
76
+ {
77
+ "epoch": 5.99,
78
+ "eval_accuracy": 0.9894521241504165,
79
+ "eval_f1": 0.9887225068869429,
80
+ "eval_loss": 0.0344870425760746,
81
+ "eval_precision": 0.9887067858661908,
82
+ "eval_recall": 0.9887382284076499,
83
+ "eval_runtime": 18.7765,
84
+ "eval_samples_per_second": 780.176,
85
+ "eval_steps_per_second": 3.089,
86
+ "step": 768
87
+ },
88
+ {
89
+ "epoch": 6.99,
90
+ "eval_accuracy": 0.9896017122079197,
91
+ "eval_f1": 0.9887940595397575,
92
+ "eval_loss": 0.03525426983833313,
93
+ "eval_precision": 0.9887783373812941,
94
+ "eval_recall": 0.9888097821982119,
95
+ "eval_runtime": 18.226,
96
+ "eval_samples_per_second": 803.744,
97
+ "eval_steps_per_second": 3.182,
98
+ "step": 896
99
+ },
100
+ {
101
+ "epoch": 7.81,
102
+ "learning_rate": 4.458874458874459e-05,
103
+ "loss": 0.024,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 7.99,
108
+ "eval_accuracy": 0.9894866444713788,
109
+ "eval_f1": 0.9887030802192603,
110
+ "eval_loss": 0.037094976752996445,
111
+ "eval_precision": 0.9886480621017779,
112
+ "eval_recall": 0.9887581044605838,
113
+ "eval_runtime": 18.998,
114
+ "eval_samples_per_second": 771.08,
115
+ "eval_steps_per_second": 3.053,
116
+ "step": 1024
117
+ },
118
+ {
119
+ "epoch": 8.99,
120
+ "eval_accuracy": 0.9895556851133034,
121
+ "eval_f1": 0.988807816838561,
122
+ "eval_loss": 0.03866518661379814,
123
+ "eval_precision": 0.9888058514867228,
124
+ "eval_recall": 0.9888097821982119,
125
+ "eval_runtime": 19.63,
126
+ "eval_samples_per_second": 746.255,
127
+ "eval_steps_per_second": 2.955,
128
+ "step": 1152
129
+ },
130
+ {
131
+ "epoch": 9.99,
132
+ "eval_accuracy": 0.9897513002654229,
133
+ "eval_f1": 0.9889666056460926,
134
+ "eval_loss": 0.04022372514009476,
135
+ "eval_precision": 0.9889842973563904,
136
+ "eval_recall": 0.9889489145687492,
137
+ "eval_runtime": 17.8198,
138
+ "eval_samples_per_second": 822.064,
139
+ "eval_steps_per_second": 3.255,
140
+ "step": 1280
141
+ },
142
+ {
143
+ "epoch": 10.99,
144
+ "eval_accuracy": 0.9897282867181147,
145
+ "eval_f1": 0.9889296106084937,
146
+ "eval_loss": 0.04293292760848999,
147
+ "eval_precision": 0.9888785102450464,
148
+ "eval_recall": 0.9889807162534435,
149
+ "eval_runtime": 18.5105,
150
+ "eval_samples_per_second": 791.389,
151
+ "eval_steps_per_second": 3.133,
152
+ "step": 1408
153
+ },
154
+ {
155
+ "epoch": 11.71,
156
+ "learning_rate": 3.917748917748918e-05,
157
+ "loss": 0.0128,
158
+ "step": 1500
159
+ },
160
+ {
161
+ "epoch": 11.99,
162
+ "eval_accuracy": 0.989628561346446,
163
+ "eval_f1": 0.9888770954828604,
164
+ "eval_loss": 0.045427996665239334,
165
+ "eval_precision": 0.9889006825762183,
166
+ "eval_recall": 0.9888535095146666,
167
+ "eval_runtime": 18.3742,
168
+ "eval_samples_per_second": 797.257,
169
+ "eval_steps_per_second": 3.157,
170
+ "step": 1536
171
+ },
172
+ {
173
+ "epoch": 12.99,
174
+ "eval_accuracy": 0.9896899308059345,
175
+ "eval_f1": 0.9889251953792704,
176
+ "eval_loss": 0.04608777165412903,
177
+ "eval_precision": 0.9889134021028363,
178
+ "eval_recall": 0.9889369889369889,
179
+ "eval_runtime": 18.3253,
180
+ "eval_samples_per_second": 799.387,
181
+ "eval_steps_per_second": 3.165,
182
+ "step": 1664
183
+ },
184
+ {
185
+ "epoch": 13.99,
186
+ "eval_accuracy": 0.989889381549272,
187
+ "eval_f1": 0.9891329626839416,
188
+ "eval_loss": 0.04769197106361389,
189
+ "eval_precision": 0.9892057156034064,
190
+ "eval_recall": 0.9890602204651792,
191
+ "eval_runtime": 18.8558,
192
+ "eval_samples_per_second": 776.896,
193
+ "eval_steps_per_second": 3.076,
194
+ "step": 1792
195
+ },
196
+ {
197
+ "epoch": 14.99,
198
+ "eval_accuracy": 0.9897743138127311,
199
+ "eval_f1": 0.9890247489724366,
200
+ "eval_loss": 0.0506986528635025,
201
+ "eval_precision": 0.9889972294324113,
202
+ "eval_recall": 0.9890522700440055,
203
+ "eval_runtime": 17.9307,
204
+ "eval_samples_per_second": 816.978,
205
+ "eval_steps_per_second": 3.235,
206
+ "step": 1920
207
+ },
208
+ {
209
+ "epoch": 15.62,
210
+ "learning_rate": 3.376623376623377e-05,
211
+ "loss": 0.0069,
212
+ "step": 2000
213
+ },
214
+ {
215
+ "epoch": 15.99,
216
+ "eval_accuracy": 0.9900581475628654,
217
+ "eval_f1": 0.9893163454944793,
218
+ "eval_loss": 0.05137912556529045,
219
+ "eval_precision": 0.9893419096308429,
220
+ "eval_recall": 0.9892907826792124,
221
+ "eval_runtime": 18.9861,
222
+ "eval_samples_per_second": 771.564,
223
+ "eval_steps_per_second": 3.055,
224
+ "step": 2048
225
+ },
226
+ {
227
+ "epoch": 16.99,
228
+ "eval_accuracy": 0.989889381549272,
229
+ "eval_f1": 0.989197257872486,
230
+ "eval_loss": 0.053016748279333115,
231
+ "eval_precision": 0.9892070887364145,
232
+ "eval_recall": 0.9891874272039561,
233
+ "eval_runtime": 18.1987,
234
+ "eval_samples_per_second": 804.946,
235
+ "eval_steps_per_second": 3.187,
236
+ "step": 2176
237
+ },
238
+ {
239
+ "epoch": 17.99,
240
+ "eval_accuracy": 0.9898203409073475,
241
+ "eval_f1": 0.9890543664272952,
242
+ "eval_loss": 0.05524001270532608,
243
+ "eval_precision": 0.9890445373741871,
244
+ "eval_recall": 0.989064195675766,
245
+ "eval_runtime": 18.53,
246
+ "eval_samples_per_second": 790.554,
247
+ "eval_steps_per_second": 3.13,
248
+ "step": 2304
249
+ },
250
+ {
251
+ "epoch": 18.99,
252
+ "eval_accuracy": 0.9898395188634376,
253
+ "eval_f1": 0.9891659296212747,
254
+ "eval_loss": 0.0566512756049633,
255
+ "eval_precision": 0.9891325086653735,
256
+ "eval_recall": 0.9891993528357165,
257
+ "eval_runtime": 19.0959,
258
+ "eval_samples_per_second": 767.129,
259
+ "eval_steps_per_second": 3.037,
260
+ "step": 2432
261
+ },
262
+ {
263
+ "epoch": 19.53,
264
+ "learning_rate": 2.8354978354978357e-05,
265
+ "loss": 0.0037,
266
+ "step": 2500
267
+ },
268
+ {
269
+ "epoch": 19.99,
270
+ "eval_accuracy": 0.9899507510087605,
271
+ "eval_f1": 0.989249406222982,
272
+ "eval_loss": 0.057712409645318985,
273
+ "eval_precision": 0.9892159824466563,
274
+ "eval_recall": 0.9892828322580389,
275
+ "eval_runtime": 18.1495,
276
+ "eval_samples_per_second": 807.13,
277
+ "eval_steps_per_second": 3.196,
278
+ "step": 2560
279
+ },
280
+ {
281
+ "epoch": 20.99,
282
+ "eval_accuracy": 0.989897052731708,
283
+ "eval_f1": 0.9892537230374182,
284
+ "eval_loss": 0.05920035019516945,
285
+ "eval_precision": 0.9891888454322872,
286
+ "eval_recall": 0.9893186091533199,
287
+ "eval_runtime": 18.5483,
288
+ "eval_samples_per_second": 789.775,
289
+ "eval_steps_per_second": 3.127,
290
+ "step": 2688
291
+ },
292
+ {
293
+ "epoch": 21.99,
294
+ "eval_accuracy": 0.9899584221911966,
295
+ "eval_f1": 0.9892630842496084,
296
+ "eval_loss": 0.06059529632329941,
297
+ "eval_precision": 0.9892512869437322,
298
+ "eval_recall": 0.9892748818368653,
299
+ "eval_runtime": 18.2219,
300
+ "eval_samples_per_second": 803.923,
301
+ "eval_steps_per_second": 3.183,
302
+ "step": 2816
303
+ },
304
+ {
305
+ "epoch": 22.99,
306
+ "eval_accuracy": 0.9899699289648506,
307
+ "eval_f1": 0.9892710345759693,
308
+ "eval_loss": 0.06275586783885956,
309
+ "eval_precision": 0.9892592371752827,
310
+ "eval_recall": 0.9892828322580389,
311
+ "eval_runtime": 18.6724,
312
+ "eval_samples_per_second": 784.529,
313
+ "eval_steps_per_second": 3.106,
314
+ "step": 2944
315
+ },
316
+ {
317
+ "epoch": 23.43,
318
+ "learning_rate": 2.2943722943722946e-05,
319
+ "loss": 0.0023,
320
+ "step": 3000
321
+ },
322
+ {
323
+ "epoch": 23.99,
324
+ "eval_accuracy": 0.9899162306877982,
325
+ "eval_f1": 0.9891494254701287,
326
+ "eval_loss": 0.06293565034866333,
327
+ "eval_precision": 0.9891710528408098,
328
+ "eval_recall": 0.9891277990451545,
329
+ "eval_runtime": 18.198,
330
+ "eval_samples_per_second": 804.98,
331
+ "eval_steps_per_second": 3.187,
332
+ "step": 3072
333
+ },
334
+ {
335
+ "epoch": 24.99,
336
+ "eval_accuracy": 0.9899776001472868,
337
+ "eval_f1": 0.9892692816043408,
338
+ "eval_loss": 0.06246413290500641,
339
+ "eval_precision": 0.9892358571564855,
340
+ "eval_recall": 0.9893027083109728,
341
+ "eval_runtime": 18.2292,
342
+ "eval_samples_per_second": 803.601,
343
+ "eval_steps_per_second": 3.182,
344
+ "step": 3200
345
+ },
346
+ {
347
+ "epoch": 25.99,
348
+ "eval_accuracy": 0.990008284877031,
349
+ "eval_f1": 0.9893007845031315,
350
+ "eval_loss": 0.06362640857696533,
351
+ "eval_precision": 0.9892948855550521,
352
+ "eval_recall": 0.9893066835215596,
353
+ "eval_runtime": 19.3067,
354
+ "eval_samples_per_second": 758.751,
355
+ "eval_steps_per_second": 3.004,
356
+ "step": 3328
357
+ },
358
+ {
359
+ "epoch": 26.99,
360
+ "eval_accuracy": 0.9900926678838277,
361
+ "eval_f1": 0.9893981976538494,
362
+ "eval_loss": 0.0649913027882576,
363
+ "eval_precision": 0.9893903316465458,
364
+ "eval_recall": 0.9894060637862291,
365
+ "eval_runtime": 18.4146,
366
+ "eval_samples_per_second": 795.511,
367
+ "eval_steps_per_second": 3.15,
368
+ "step": 3456
369
+ },
370
+ {
371
+ "epoch": 27.34,
372
+ "learning_rate": 1.7532467532467535e-05,
373
+ "loss": 0.0017,
374
+ "step": 3500
375
+ },
376
+ {
377
+ "epoch": 27.99,
378
+ "eval_accuracy": 0.9901003390662637,
379
+ "eval_f1": 0.989384347826087,
380
+ "eval_loss": 0.0644073411822319,
381
+ "eval_precision": 0.9893705826701542,
382
+ "eval_recall": 0.9893981133650556,
383
+ "eval_runtime": 18.6787,
384
+ "eval_samples_per_second": 784.263,
385
+ "eval_steps_per_second": 3.105,
386
+ "step": 3584
387
+ },
388
+ {
389
+ "epoch": 28.99,
390
+ "eval_accuracy": 0.9901425305696621,
391
+ "eval_f1": 0.9894557748763214,
392
+ "eval_loss": 0.06558605283498764,
393
+ "eval_precision": 0.9894538082366036,
394
+ "eval_recall": 0.9894577415238572,
395
+ "eval_runtime": 18.1086,
396
+ "eval_samples_per_second": 808.954,
397
+ "eval_steps_per_second": 3.203,
398
+ "step": 3712
399
+ },
400
+ {
401
+ "epoch": 29.99,
402
+ "eval_accuracy": 0.9901502017520981,
403
+ "eval_f1": 0.9894956104173334,
404
+ "eval_loss": 0.0667632669210434,
405
+ "eval_precision": 0.989485776979218,
406
+ "eval_recall": 0.9895054440508986,
407
+ "eval_runtime": 18.5261,
408
+ "eval_samples_per_second": 790.723,
409
+ "eval_steps_per_second": 3.131,
410
+ "step": 3840
411
+ },
412
+ {
413
+ "epoch": 30.99,
414
+ "eval_accuracy": 0.9901003390662637,
415
+ "eval_f1": 0.9894474469341146,
416
+ "eval_loss": 0.06663960218429565,
417
+ "eval_precision": 0.9894808819203155,
418
+ "eval_recall": 0.9894140142074026,
419
+ "eval_runtime": 18.0695,
420
+ "eval_samples_per_second": 810.702,
421
+ "eval_steps_per_second": 3.21,
422
+ "step": 3968
423
+ },
424
+ {
425
+ "epoch": 31.25,
426
+ "learning_rate": 1.2121212121212122e-05,
427
+ "loss": 0.0011,
428
+ "step": 4000
429
+ },
430
+ {
431
+ "epoch": 31.99,
432
+ "eval_accuracy": 0.9900466407892112,
433
+ "eval_f1": 0.9893740508996081,
434
+ "eval_loss": 0.06780469417572021,
435
+ "eval_precision": 0.9893937165323654,
436
+ "eval_recall": 0.9893543860486009,
437
+ "eval_runtime": 18.1642,
438
+ "eval_samples_per_second": 806.478,
439
+ "eval_steps_per_second": 3.193,
440
+ "step": 4096
441
+ },
442
+ {
443
+ "epoch": 32.99,
444
+ "eval_accuracy": 0.9902230779852407,
445
+ "eval_f1": 0.9895929814239887,
446
+ "eval_loss": 0.06849976629018784,
447
+ "eval_precision": 0.9895851138680967,
448
+ "eval_recall": 0.9896008491049814,
449
+ "eval_runtime": 18.9151,
450
+ "eval_samples_per_second": 774.46,
451
+ "eval_steps_per_second": 3.066,
452
+ "step": 4224
453
+ },
454
+ {
455
+ "epoch": 33.99,
456
+ "eval_accuracy": 0.99014636616088,
457
+ "eval_f1": 0.9894398320867711,
458
+ "eval_loss": 0.06920044124126434,
459
+ "eval_precision": 0.9894417987104366,
460
+ "eval_recall": 0.9894378654709233,
461
+ "eval_runtime": 18.3423,
462
+ "eval_samples_per_second": 798.645,
463
+ "eval_steps_per_second": 3.162,
464
+ "step": 4352
465
+ },
466
+ {
467
+ "epoch": 34.99,
468
+ "eval_accuracy": 0.9902000644379325,
469
+ "eval_f1": 0.9895391709648887,
470
+ "eval_loss": 0.06976373493671417,
471
+ "eval_precision": 0.9895450714751387,
472
+ "eval_recall": 0.9895332705250061,
473
+ "eval_runtime": 18.8004,
474
+ "eval_samples_per_second": 779.185,
475
+ "eval_steps_per_second": 3.085,
476
+ "step": 4480
477
+ },
478
+ {
479
+ "epoch": 35.16,
480
+ "learning_rate": 6.709956709956711e-06,
481
+ "loss": 0.0009,
482
+ "step": 4500
483
+ },
484
+ {
485
+ "epoch": 35.99,
486
+ "eval_accuracy": 0.9900658187453014,
487
+ "eval_f1": 0.9893825501754999,
488
+ "eval_loss": 0.06981877237558365,
489
+ "eval_precision": 0.9893510881446884,
490
+ "eval_recall": 0.9894140142074026,
491
+ "eval_runtime": 18.1896,
492
+ "eval_samples_per_second": 805.351,
493
+ "eval_steps_per_second": 3.189,
494
+ "step": 4608
495
+ },
496
+ {
497
+ "epoch": 36.99,
498
+ "eval_accuracy": 0.9902039000291505,
499
+ "eval_f1": 0.9894797097330076,
500
+ "eval_loss": 0.0695314109325409,
501
+ "eval_precision": 0.9894698764529106,
502
+ "eval_recall": 0.9894895432085514,
503
+ "eval_runtime": 18.7061,
504
+ "eval_samples_per_second": 783.113,
505
+ "eval_steps_per_second": 3.101,
506
+ "step": 4736
507
+ },
508
+ {
509
+ "epoch": 37.99,
510
+ "eval_accuracy": 0.9901732152994063,
511
+ "eval_f1": 0.9894400419774727,
512
+ "eval_loss": 0.06961216777563095,
513
+ "eval_precision": 0.9894223430643007,
514
+ "eval_recall": 0.9894577415238572,
515
+ "eval_runtime": 18.6705,
516
+ "eval_samples_per_second": 784.607,
517
+ "eval_steps_per_second": 3.107,
518
+ "step": 4864
519
+ },
520
+ {
521
+ "epoch": 38.99,
522
+ "eval_accuracy": 0.9901962288467144,
523
+ "eval_f1": 0.9894779103694458,
524
+ "eval_loss": 0.06985215842723846,
525
+ "eval_precision": 0.9894503782202383,
526
+ "eval_recall": 0.9895054440508986,
527
+ "eval_runtime": 18.2919,
528
+ "eval_samples_per_second": 800.846,
529
+ "eval_steps_per_second": 3.171,
530
+ "step": 4992
531
+ },
532
+ {
533
+ "epoch": 39.06,
534
+ "learning_rate": 1.2987012987012988e-06,
535
+ "loss": 0.0007,
536
+ "step": 5000
537
+ },
538
+ {
539
+ "epoch": 39.99,
540
+ "eval_accuracy": 0.990138694978444,
541
+ "eval_f1": 0.9894261920378432,
542
+ "eval_loss": 0.06969785690307617,
543
+ "eval_precision": 0.9894025940986839,
544
+ "eval_recall": 0.9894497911026837,
545
+ "eval_runtime": 18.675,
546
+ "eval_samples_per_second": 784.419,
547
+ "eval_steps_per_second": 3.106,
548
+ "step": 5120
549
+ },
550
+ {
551
+ "epoch": 39.99,
552
+ "step": 5120,
553
+ "total_flos": 2.72643266432467e+17,
554
+ "train_loss": 0.07192220802244265,
555
+ "train_runtime": 4057.7347,
556
+ "train_samples_per_second": 1299.572,
557
+ "train_steps_per_second": 1.262
558
+ }
559
+ ],
560
+ "max_steps": 5120,
561
+ "num_train_epochs": 40,
562
+ "total_flos": 2.72643266432467e+17,
563
+ "trial_name": null,
564
+ "trial_params": null
565
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ef6dcd651fd93358c9bef18839cad5184cd6bdf5d92b85da1278d4445f323b
3
+ size 3439
vocab.txt ADDED
The diff for this file is too large to render. See raw diff