HueyNemud
commited on
Commit
•
bdd207b
1
Parent(s):
4d92e52
model data
Browse files- README +34 -0
- config.json +51 -0
- optimizer.pt +3 -0
- pytorch_model.bin +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- trainer_state.json +280 -0
- training_args.bin +3 -0
README
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
***** Running training *****
|
2 |
+
Num examples = 6004
|
3 |
+
Num Epochs = 14
|
4 |
+
Instantaneous batch size per device = 16
|
5 |
+
Total train batch size (w. parallel, distributed & accumulation) = 16
|
6 |
+
Gradient Accumulation steps = 1
|
7 |
+
Total optimization steps = 5000
|
8 |
+
[2500/5000 12:15 < 12:15, 3.40 it/s, Epoch 6/14]
|
9 |
+
Step Training Loss Validation Loss Precision Recall F1 Accuracy
|
10 |
+
100 No log 0.247325 0.912333 0.925744 0.918990 0.960895
|
11 |
+
200 No log 0.171694 0.930514 0.928760 0.929636 0.963143
|
12 |
+
300 No log 0.132045 0.935375 0.943837 0.939587 0.970515
|
13 |
+
400 No log 0.142074 0.936490 0.939314 0.937900 0.968141
|
14 |
+
500 0.245500 0.105783 0.949794 0.955522 0.952649 0.975887
|
15 |
+
600 0.245500 0.107380 0.948120 0.950622 0.949369 0.973138
|
16 |
+
700 0.245500 0.111011 0.951504 0.954014 0.952757 0.972889
|
17 |
+
800 0.245500 0.093002 0.947999 0.955145 0.951558 0.975387
|
18 |
+
900 0.245500 0.100926 0.956193 0.954391 0.955291 0.976262
|
19 |
+
1000 0.086800 0.090775 0.955263 0.957784 0.956522 0.976637
|
20 |
+
1100 0.086800 0.099250 0.953829 0.957784 0.955802 0.976137
|
21 |
+
1200 0.086800 0.088502 0.952327 0.956276 0.954298 0.976762
|
22 |
+
1300 0.086800 0.094135 0.957078 0.958161 0.957619 0.977011
|
23 |
+
1400 0.086800 0.099687 0.957768 0.957407 0.957587 0.975887
|
24 |
+
1500 0.056000 0.108563 0.958930 0.959291 0.959111 0.974888
|
25 |
+
1600 0.056000 0.101031 0.957784 0.957784 0.957784 0.976262
|
26 |
+
1700 0.056000 0.099654 0.960135 0.962307 0.961220 0.978386
|
27 |
+
1800 0.056000 0.106387 0.954118 0.956276 0.955196 0.975512
|
28 |
+
1900 0.056000 0.096317 0.953846 0.958161 0.955998 0.975762
|
29 |
+
2000 0.040000 0.094224 0.959444 0.963061 0.961249 0.977761
|
30 |
+
2100 0.040000 0.110398 0.956669 0.957030 0.956849 0.975262
|
31 |
+
2200 0.040000 0.096151 0.955706 0.959668 0.957683 0.977386
|
32 |
+
2300 0.040000 0.108148 0.945149 0.954768 0.949934 0.974513
|
33 |
+
2400 0.040000 0.109966 0.950991 0.958161 0.954563 0.976637
|
34 |
+
2500 0.030900 0.117515 0.947921 0.953637 0.950770 0.973888
|
config.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Jean-Baptiste/camembert-ner",
|
3 |
+
"architectures": [
|
4 |
+
"CamembertForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 5,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 6,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "O",
|
16 |
+
"1": "I-LOC",
|
17 |
+
"2": "I-PER",
|
18 |
+
"3": "I-MISC",
|
19 |
+
"4": "I-ORG",
|
20 |
+
"5": "I-CARDINAL",
|
21 |
+
"6": "I-ACT",
|
22 |
+
"7": "I-TITRE",
|
23 |
+
"8": "I-FT"
|
24 |
+
},
|
25 |
+
"initializer_range": 0.02,
|
26 |
+
"intermediate_size": 3072,
|
27 |
+
"label2id": {
|
28 |
+
"I-ACT": 6,
|
29 |
+
"I-CARDINAL": 5,
|
30 |
+
"I-FT": 8,
|
31 |
+
"I-LOC": 1,
|
32 |
+
"I-MISC": 3,
|
33 |
+
"I-ORG": 4,
|
34 |
+
"I-PER": 2,
|
35 |
+
"I-TITRE": 7,
|
36 |
+
"O": 0
|
37 |
+
},
|
38 |
+
"layer_norm_eps": 1e-05,
|
39 |
+
"max_position_embeddings": 514,
|
40 |
+
"model_type": "camembert",
|
41 |
+
"num_attention_heads": 12,
|
42 |
+
"num_hidden_layers": 12,
|
43 |
+
"output_past": true,
|
44 |
+
"pad_token_id": 1,
|
45 |
+
"position_embedding_type": "absolute",
|
46 |
+
"torch_dtype": "float32",
|
47 |
+
"transformers_version": "4.15.0",
|
48 |
+
"type_vocab_size": 1,
|
49 |
+
"use_cache": true,
|
50 |
+
"vocab_size": 32005
|
51 |
+
}
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a79e5d7804ef407c5b2fbffe41db1698f53c051a01ef68ea9f8224e2a2d623fd
|
3 |
+
size 880421605
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:392027a1e97297c1e36b64a814594211ff3e68f9b65f51c417cf8305841dbacb
|
3 |
+
size 440237809
|
rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a74c47781022724c1a2706ba33421f7e3935186d7b6f4f51050f89cce7c7ae1e
|
3 |
+
size 14503
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51041b89734cb38f61e5930792b2813f44d4562e4b27ac1f3179a1197589a078
|
3 |
+
size 623
|
sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:988bc5a00281c6d210a5d34bd143d0363741a432fefe741bf71e61b1869d4314
|
3 |
+
size 810912
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>", "additional_special_tokens": ["<s>NOTUSED", "</s>NOTUSED"]}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "additional_special_tokens": ["<s>NOTUSED", "</s>NOTUSED"], "model_max_length": 512, "name_or_path": "Jean-Baptiste/camembert-ner", "special_tokens_map_file": "/root/.cache/huggingface/transformers/fe0e213c44079a9ee091098f81fff2941484006e9ba3001a9bf1ee9f87537599.cb3ec3a6c1200d181228d8825ae9767572abca54efa1bbb37fd83d721b2ef323", "sp_model_kwargs": {}, "tokenizer_class": "CamembertTokenizer"}
|
trainer_state.json
ADDED
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.9612490594431903,
|
3 |
+
"best_model_checkpoint": "/content/drive/MyDrive/SODUCO/article_das_2022/44-camembert_finetuned_pero/checkpoint-2000",
|
4 |
+
"epoch": 5.319148936170213,
|
5 |
+
"global_step": 2000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.27,
|
12 |
+
"eval_accuracy": 0.9608945527236382,
|
13 |
+
"eval_f1": 0.9189897100093545,
|
14 |
+
"eval_loss": 0.24732524156570435,
|
15 |
+
"eval_precision": 0.912332838038633,
|
16 |
+
"eval_recall": 0.9257444402563136,
|
17 |
+
"eval_runtime": 2.7834,
|
18 |
+
"eval_samples_per_second": 239.997,
|
19 |
+
"eval_steps_per_second": 15.09,
|
20 |
+
"step": 100
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.53,
|
24 |
+
"eval_accuracy": 0.9631434282858571,
|
25 |
+
"eval_f1": 0.9296359177513677,
|
26 |
+
"eval_loss": 0.17169421911239624,
|
27 |
+
"eval_precision": 0.9305135951661632,
|
28 |
+
"eval_recall": 0.9287598944591029,
|
29 |
+
"eval_runtime": 2.6347,
|
30 |
+
"eval_samples_per_second": 253.541,
|
31 |
+
"eval_steps_per_second": 15.941,
|
32 |
+
"step": 200
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 0.8,
|
36 |
+
"eval_accuracy": 0.9705147426286856,
|
37 |
+
"eval_f1": 0.9395872420262664,
|
38 |
+
"eval_loss": 0.13204523921012878,
|
39 |
+
"eval_precision": 0.9353754202465446,
|
40 |
+
"eval_recall": 0.9438371654730494,
|
41 |
+
"eval_runtime": 2.6,
|
42 |
+
"eval_samples_per_second": 256.926,
|
43 |
+
"eval_steps_per_second": 16.154,
|
44 |
+
"step": 300
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 1.06,
|
48 |
+
"eval_accuracy": 0.9681409295352323,
|
49 |
+
"eval_f1": 0.9378998870907039,
|
50 |
+
"eval_loss": 0.1420740932226181,
|
51 |
+
"eval_precision": 0.936490041337843,
|
52 |
+
"eval_recall": 0.9393139841688655,
|
53 |
+
"eval_runtime": 2.565,
|
54 |
+
"eval_samples_per_second": 260.431,
|
55 |
+
"eval_steps_per_second": 16.374,
|
56 |
+
"step": 400
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 1.33,
|
60 |
+
"learning_rate": 9e-05,
|
61 |
+
"loss": 0.2455,
|
62 |
+
"step": 500
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 1.33,
|
66 |
+
"eval_accuracy": 0.9758870564717641,
|
67 |
+
"eval_f1": 0.9526493799323562,
|
68 |
+
"eval_loss": 0.1057828888297081,
|
69 |
+
"eval_precision": 0.9497939303109779,
|
70 |
+
"eval_recall": 0.9555220505088579,
|
71 |
+
"eval_runtime": 2.6085,
|
72 |
+
"eval_samples_per_second": 256.084,
|
73 |
+
"eval_steps_per_second": 16.101,
|
74 |
+
"step": 500
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 1.6,
|
78 |
+
"eval_accuracy": 0.9731384307846077,
|
79 |
+
"eval_f1": 0.9493694711086017,
|
80 |
+
"eval_loss": 0.10737968236207962,
|
81 |
+
"eval_precision": 0.9481203007518797,
|
82 |
+
"eval_recall": 0.9506219374293253,
|
83 |
+
"eval_runtime": 2.5863,
|
84 |
+
"eval_samples_per_second": 258.283,
|
85 |
+
"eval_steps_per_second": 16.239,
|
86 |
+
"step": 600
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.86,
|
90 |
+
"eval_accuracy": 0.972888555722139,
|
91 |
+
"eval_f1": 0.9527573875399962,
|
92 |
+
"eval_loss": 0.11101129651069641,
|
93 |
+
"eval_precision": 0.9515037593984963,
|
94 |
+
"eval_recall": 0.9540143234074633,
|
95 |
+
"eval_runtime": 2.5978,
|
96 |
+
"eval_samples_per_second": 257.138,
|
97 |
+
"eval_steps_per_second": 16.167,
|
98 |
+
"step": 700
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 2.13,
|
102 |
+
"eval_accuracy": 0.9753873063468266,
|
103 |
+
"eval_f1": 0.9515583927900865,
|
104 |
+
"eval_loss": 0.09300221502780914,
|
105 |
+
"eval_precision": 0.9479985035540591,
|
106 |
+
"eval_recall": 0.9551451187335093,
|
107 |
+
"eval_runtime": 2.6079,
|
108 |
+
"eval_samples_per_second": 256.141,
|
109 |
+
"eval_steps_per_second": 16.105,
|
110 |
+
"step": 800
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 2.39,
|
114 |
+
"eval_accuracy": 0.9762618690654673,
|
115 |
+
"eval_f1": 0.9552914544425579,
|
116 |
+
"eval_loss": 0.10092608630657196,
|
117 |
+
"eval_precision": 0.9561933534743202,
|
118 |
+
"eval_recall": 0.9543912551828119,
|
119 |
+
"eval_runtime": 2.604,
|
120 |
+
"eval_samples_per_second": 256.532,
|
121 |
+
"eval_steps_per_second": 16.129,
|
122 |
+
"step": 900
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"epoch": 2.66,
|
126 |
+
"learning_rate": 8e-05,
|
127 |
+
"loss": 0.0868,
|
128 |
+
"step": 1000
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 2.66,
|
132 |
+
"eval_accuracy": 0.9766366816591704,
|
133 |
+
"eval_f1": 0.9565217391304348,
|
134 |
+
"eval_loss": 0.09077496081590652,
|
135 |
+
"eval_precision": 0.9552631578947368,
|
136 |
+
"eval_recall": 0.9577836411609498,
|
137 |
+
"eval_runtime": 2.6166,
|
138 |
+
"eval_samples_per_second": 255.294,
|
139 |
+
"eval_steps_per_second": 16.051,
|
140 |
+
"step": 1000
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"epoch": 2.93,
|
144 |
+
"eval_accuracy": 0.9761369315342329,
|
145 |
+
"eval_f1": 0.9558021440662028,
|
146 |
+
"eval_loss": 0.09924957156181335,
|
147 |
+
"eval_precision": 0.9538288288288288,
|
148 |
+
"eval_recall": 0.9577836411609498,
|
149 |
+
"eval_runtime": 2.6844,
|
150 |
+
"eval_samples_per_second": 248.843,
|
151 |
+
"eval_steps_per_second": 15.646,
|
152 |
+
"step": 1100
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 3.19,
|
156 |
+
"eval_accuracy": 0.9767616191904048,
|
157 |
+
"eval_f1": 0.9542975362046267,
|
158 |
+
"eval_loss": 0.08850151300430298,
|
159 |
+
"eval_precision": 0.9523273273273273,
|
160 |
+
"eval_recall": 0.9562759140595553,
|
161 |
+
"eval_runtime": 2.8038,
|
162 |
+
"eval_samples_per_second": 238.25,
|
163 |
+
"eval_steps_per_second": 14.98,
|
164 |
+
"step": 1200
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"epoch": 3.46,
|
168 |
+
"eval_accuracy": 0.9770114942528736,
|
169 |
+
"eval_f1": 0.9576191373139952,
|
170 |
+
"eval_loss": 0.09413458406925201,
|
171 |
+
"eval_precision": 0.9570783132530121,
|
172 |
+
"eval_recall": 0.9581605729362985,
|
173 |
+
"eval_runtime": 2.631,
|
174 |
+
"eval_samples_per_second": 253.897,
|
175 |
+
"eval_steps_per_second": 15.964,
|
176 |
+
"step": 1300
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 3.72,
|
180 |
+
"eval_accuracy": 0.9758870564717641,
|
181 |
+
"eval_f1": 0.9575871819038644,
|
182 |
+
"eval_loss": 0.09968729317188263,
|
183 |
+
"eval_precision": 0.9577677224736049,
|
184 |
+
"eval_recall": 0.9574067093856012,
|
185 |
+
"eval_runtime": 2.6616,
|
186 |
+
"eval_samples_per_second": 250.976,
|
187 |
+
"eval_steps_per_second": 15.78,
|
188 |
+
"step": 1400
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 3.99,
|
192 |
+
"learning_rate": 7e-05,
|
193 |
+
"loss": 0.056,
|
194 |
+
"step": 1500
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 3.99,
|
198 |
+
"eval_accuracy": 0.974887556221889,
|
199 |
+
"eval_f1": 0.9591106086301111,
|
200 |
+
"eval_loss": 0.10856281220912933,
|
201 |
+
"eval_precision": 0.9589299171062547,
|
202 |
+
"eval_recall": 0.9592913682623445,
|
203 |
+
"eval_runtime": 2.616,
|
204 |
+
"eval_samples_per_second": 255.356,
|
205 |
+
"eval_steps_per_second": 16.055,
|
206 |
+
"step": 1500
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 4.26,
|
210 |
+
"eval_accuracy": 0.9762618690654673,
|
211 |
+
"eval_f1": 0.9577836411609498,
|
212 |
+
"eval_loss": 0.10103093087673187,
|
213 |
+
"eval_precision": 0.9577836411609498,
|
214 |
+
"eval_recall": 0.9577836411609498,
|
215 |
+
"eval_runtime": 2.6677,
|
216 |
+
"eval_samples_per_second": 250.406,
|
217 |
+
"eval_steps_per_second": 15.744,
|
218 |
+
"step": 1600
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"epoch": 4.52,
|
222 |
+
"eval_accuracy": 0.9783858070964517,
|
223 |
+
"eval_f1": 0.9612198795180723,
|
224 |
+
"eval_loss": 0.09965386986732483,
|
225 |
+
"eval_precision": 0.9601353892440767,
|
226 |
+
"eval_recall": 0.9623068224651338,
|
227 |
+
"eval_runtime": 2.9477,
|
228 |
+
"eval_samples_per_second": 226.62,
|
229 |
+
"eval_steps_per_second": 14.249,
|
230 |
+
"step": 1700
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"epoch": 4.79,
|
234 |
+
"eval_accuracy": 0.975512243878061,
|
235 |
+
"eval_f1": 0.9551957831325302,
|
236 |
+
"eval_loss": 0.10638727992773056,
|
237 |
+
"eval_precision": 0.9541180895073336,
|
238 |
+
"eval_recall": 0.9562759140595553,
|
239 |
+
"eval_runtime": 2.6219,
|
240 |
+
"eval_samples_per_second": 254.779,
|
241 |
+
"eval_steps_per_second": 16.019,
|
242 |
+
"step": 1800
|
243 |
+
},
|
244 |
+
{
|
245 |
+
"epoch": 5.05,
|
246 |
+
"eval_accuracy": 0.9757621189405298,
|
247 |
+
"eval_f1": 0.9559984956750659,
|
248 |
+
"eval_loss": 0.09631699323654175,
|
249 |
+
"eval_precision": 0.9538461538461539,
|
250 |
+
"eval_recall": 0.9581605729362985,
|
251 |
+
"eval_runtime": 2.6401,
|
252 |
+
"eval_samples_per_second": 253.021,
|
253 |
+
"eval_steps_per_second": 15.908,
|
254 |
+
"step": 1900
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 5.32,
|
258 |
+
"learning_rate": 6e-05,
|
259 |
+
"loss": 0.04,
|
260 |
+
"step": 2000
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 5.32,
|
264 |
+
"eval_accuracy": 0.9777611194402799,
|
265 |
+
"eval_f1": 0.9612490594431903,
|
266 |
+
"eval_loss": 0.09422445297241211,
|
267 |
+
"eval_precision": 0.9594442358242583,
|
268 |
+
"eval_recall": 0.9630606860158312,
|
269 |
+
"eval_runtime": 2.6383,
|
270 |
+
"eval_samples_per_second": 253.191,
|
271 |
+
"eval_steps_per_second": 15.919,
|
272 |
+
"step": 2000
|
273 |
+
}
|
274 |
+
],
|
275 |
+
"max_steps": 5000,
|
276 |
+
"num_train_epochs": 14,
|
277 |
+
"total_flos": 790348180152456.0,
|
278 |
+
"trial_name": null,
|
279 |
+
"trial_params": null
|
280 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:414647f811ebf9ca8f02bde659b5ecabd916d671a6e16632fd0c18225c2e2901
|
3 |
+
size 3055
|