José Antonio García Díaz
commited on
Commit
·
308b9d4
1
Parent(s):
816f243
update model with adjusted corpus
Browse files- config.json +6 -6
- hyperparameters.csv +11 -0
- model.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +22 -147
- training_args.bin +1 -1
- training_resume.json +1 -1
config.json
CHANGED
@@ -12,16 +12,16 @@
|
|
12 |
"hidden_dropout_prob": 0.0,
|
13 |
"hidden_size": 768,
|
14 |
"id2label": {
|
15 |
-
"0": "
|
16 |
-
"1": "
|
17 |
-
"2": "
|
18 |
},
|
19 |
"initializer_range": 0.02,
|
20 |
"intermediate_size": 3072,
|
21 |
"label2id": {
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
},
|
26 |
"layer_norm_eps": 1e-05,
|
27 |
"max_position_embeddings": 514,
|
|
|
12 |
"hidden_dropout_prob": 0.0,
|
13 |
"hidden_size": 768,
|
14 |
"id2label": {
|
15 |
+
"0": "LABEL_0",
|
16 |
+
"1": "LABEL_1",
|
17 |
+
"2": "LABEL_2"
|
18 |
},
|
19 |
"initializer_range": 0.02,
|
20 |
"intermediate_size": 3072,
|
21 |
"label2id": {
|
22 |
+
"LABEL_0": 0,
|
23 |
+
"LABEL_1": 1,
|
24 |
+
"LABEL_2": 2
|
25 |
},
|
26 |
"layer_norm_eps": 1e-05,
|
27 |
"max_position_embeddings": 514,
|
hyperparameters.csv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
objective,best,learning_rate,num_train_epochs,per_device_train_batch_size,warmup_steps,weight_decay,time_this_iter_s
|
2 |
+
0.8308016875112519,False,2.1765787781353998e-05,1,16,0,0.2779928681001568,70.68486714363098
|
3 |
+
0.8208579588481721,False,1.0571835122759025e-05,4,16,0,0.08349868710232541,69.73215866088867
|
4 |
+
0.832166006303993,False,1.2795649540620917e-05,1,8,0,0.2704258566938061,76.97464203834534
|
5 |
+
0.8394192385480443,True,4.65591694361634e-05,1,16,250,0.2103754328402818,70.83545136451721
|
6 |
+
0.8366512897395006,False,2.6370464742949582e-05,1,8,250,0.26429846288311887,77.25557279586792
|
7 |
+
0.8110507698879218,False,4.64238812312384e-05,4,8,500,0.13639810320763826,76.0791552066803
|
8 |
+
0.8303948363746132,False,3.9043689561606625e-05,1,16,250,0.06075214917397778,71.24034976959229
|
9 |
+
0.8015180450557305,False,2.2502123920264867e-05,3,8,250,0.21316207286076047,76.04998445510864
|
10 |
+
0.793748592229527,False,2.7807755379517034e-05,1,8,1000,0.16241717486314383,77.22514963150024
|
11 |
+
0.8033119941220733,False,4.858723883045379e-05,3,16,250,0.1574600638437009,69.87509036064148
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 498606684
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b114d27450122a79631f3e70e5f993549e165d051a7c80ce2645b0a526bf6df
|
3 |
size 498606684
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 997333242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af3ecf6d848a47a23e0f58ce7e993b4ba7c9fd33c56a201776ce768cd6e0055e
|
3 |
size 997333242
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f753249e6e05a99e4c20b9f6975ad02a63ca0f1e4cdb54441b4628482980e8c
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,159 +1,34 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
-
"best_model_checkpoint":
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": true,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"grad_norm": 1.
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 1.
|
16 |
"step": 1
|
17 |
},
|
18 |
-
{
|
19 |
-
"epoch": 0.26737967914438504,
|
20 |
-
"grad_norm": 11.680069923400879,
|
21 |
-
"learning_rate": 1.584171029515808e-05,
|
22 |
-
"loss": 0.7897,
|
23 |
-
"step": 1000
|
24 |
-
},
|
25 |
-
{
|
26 |
-
"epoch": 0.5347593582887701,
|
27 |
-
"grad_norm": 11.042302131652832,
|
28 |
-
"learning_rate": 1.4706917294072254e-05,
|
29 |
-
"loss": 0.6084,
|
30 |
-
"step": 2000
|
31 |
-
},
|
32 |
-
{
|
33 |
-
"epoch": 0.8021390374331551,
|
34 |
-
"grad_norm": 17.39794921875,
|
35 |
-
"learning_rate": 1.3572124292986432e-05,
|
36 |
-
"loss": 0.5471,
|
37 |
-
"step": 3000
|
38 |
-
},
|
39 |
{
|
40 |
"epoch": 1.0,
|
41 |
-
"eval_f1": 0.
|
42 |
-
"eval_loss": 0.
|
43 |
-
"eval_runtime":
|
44 |
-
"eval_samples_per_second":
|
45 |
-
"eval_steps_per_second":
|
46 |
-
"step":
|
47 |
-
},
|
48 |
-
{
|
49 |
-
"epoch": 1.0695187165775402,
|
50 |
-
"grad_norm": 2.134364604949951,
|
51 |
-
"learning_rate": 1.243733129190061e-05,
|
52 |
-
"loss": 0.4751,
|
53 |
-
"step": 4000
|
54 |
-
},
|
55 |
-
{
|
56 |
-
"epoch": 1.3368983957219251,
|
57 |
-
"grad_norm": 41.253849029541016,
|
58 |
-
"learning_rate": 1.1302538290814788e-05,
|
59 |
-
"loss": 0.3362,
|
60 |
-
"step": 5000
|
61 |
-
},
|
62 |
-
{
|
63 |
-
"epoch": 1.6042780748663101,
|
64 |
-
"grad_norm": 17.477731704711914,
|
65 |
-
"learning_rate": 1.0167745289728967e-05,
|
66 |
-
"loss": 0.343,
|
67 |
-
"step": 6000
|
68 |
-
},
|
69 |
-
{
|
70 |
-
"epoch": 1.8716577540106951,
|
71 |
-
"grad_norm": 31.10905647277832,
|
72 |
-
"learning_rate": 9.032952288643143e-06,
|
73 |
-
"loss": 0.3246,
|
74 |
-
"step": 7000
|
75 |
-
},
|
76 |
-
{
|
77 |
-
"epoch": 2.0,
|
78 |
-
"eval_f1": 0.807926137512124,
|
79 |
-
"eval_loss": 0.6871171593666077,
|
80 |
-
"eval_runtime": 29.3563,
|
81 |
-
"eval_samples_per_second": 339.757,
|
82 |
-
"eval_steps_per_second": 42.478,
|
83 |
-
"step": 7480
|
84 |
-
},
|
85 |
-
{
|
86 |
-
"epoch": 2.1390374331550803,
|
87 |
-
"grad_norm": 38.711708068847656,
|
88 |
-
"learning_rate": 7.89815928755732e-06,
|
89 |
-
"loss": 0.2229,
|
90 |
-
"step": 8000
|
91 |
-
},
|
92 |
-
{
|
93 |
-
"epoch": 2.406417112299465,
|
94 |
-
"grad_norm": 22.373706817626953,
|
95 |
-
"learning_rate": 6.7633662864715e-06,
|
96 |
-
"loss": 0.1367,
|
97 |
-
"step": 9000
|
98 |
-
},
|
99 |
-
{
|
100 |
-
"epoch": 2.6737967914438503,
|
101 |
-
"grad_norm": 53.95636749267578,
|
102 |
-
"learning_rate": 5.628573285385677e-06,
|
103 |
-
"loss": 0.1458,
|
104 |
-
"step": 10000
|
105 |
-
},
|
106 |
-
{
|
107 |
-
"epoch": 2.9411764705882355,
|
108 |
-
"grad_norm": 1.9054352045059204,
|
109 |
-
"learning_rate": 4.493780284299856e-06,
|
110 |
-
"loss": 0.1314,
|
111 |
-
"step": 11000
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"epoch": 3.0,
|
115 |
-
"eval_f1": 0.8155244394452302,
|
116 |
-
"eval_loss": 0.9170186519622803,
|
117 |
-
"eval_runtime": 29.3912,
|
118 |
-
"eval_samples_per_second": 339.353,
|
119 |
-
"eval_steps_per_second": 42.428,
|
120 |
-
"step": 11220
|
121 |
-
},
|
122 |
-
{
|
123 |
-
"epoch": 3.2085561497326203,
|
124 |
-
"grad_norm": 0.004946542903780937,
|
125 |
-
"learning_rate": 3.358987283214033e-06,
|
126 |
-
"loss": 0.0585,
|
127 |
-
"step": 12000
|
128 |
-
},
|
129 |
-
{
|
130 |
-
"epoch": 3.4759358288770055,
|
131 |
-
"grad_norm": 0.1009206771850586,
|
132 |
-
"learning_rate": 2.224194282128211e-06,
|
133 |
-
"loss": 0.037,
|
134 |
-
"step": 13000
|
135 |
-
},
|
136 |
-
{
|
137 |
-
"epoch": 3.7433155080213902,
|
138 |
-
"grad_norm": 0.021053463220596313,
|
139 |
-
"learning_rate": 1.0894012810423893e-06,
|
140 |
-
"loss": 0.0432,
|
141 |
-
"step": 14000
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"epoch": 4.0,
|
145 |
-
"eval_f1": 0.8205950902835818,
|
146 |
-
"eval_loss": 1.032849669456482,
|
147 |
-
"eval_runtime": 29.3592,
|
148 |
-
"eval_samples_per_second": 339.723,
|
149 |
-
"eval_steps_per_second": 42.474,
|
150 |
-
"step": 14960
|
151 |
}
|
152 |
],
|
153 |
"logging_steps": 1000,
|
154 |
-
"max_steps":
|
155 |
"num_input_tokens_seen": 0,
|
156 |
-
"num_train_epochs":
|
157 |
"save_steps": 500,
|
158 |
"stateful_callbacks": {
|
159 |
"TrainerControl": {
|
@@ -167,14 +42,14 @@
|
|
167 |
"attributes": {}
|
168 |
}
|
169 |
},
|
170 |
-
"total_flos":
|
171 |
-
"train_batch_size":
|
172 |
"trial_name": null,
|
173 |
"trial_params": {
|
174 |
-
"learning_rate":
|
175 |
-
"num_train_epochs":
|
176 |
-
"per_device_train_batch_size":
|
177 |
"warmup_steps": 250,
|
178 |
-
"weight_decay": 0.
|
179 |
}
|
180 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 414,
|
7 |
"is_hyper_param_search": true,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.0024154589371980675,
|
13 |
+
"grad_norm": 1.6261011362075806,
|
14 |
+
"learning_rate": 1.862366777446536e-07,
|
15 |
+
"loss": 1.1,
|
16 |
"step": 1
|
17 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_f1": 0.8394192385480443,
|
21 |
+
"eval_loss": 0.38631680607795715,
|
22 |
+
"eval_runtime": 2.1374,
|
23 |
+
"eval_samples_per_second": 1033.029,
|
24 |
+
"eval_steps_per_second": 129.129,
|
25 |
+
"step": 414
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
}
|
27 |
],
|
28 |
"logging_steps": 1000,
|
29 |
+
"max_steps": 414,
|
30 |
"num_input_tokens_seen": 0,
|
31 |
+
"num_train_epochs": 1,
|
32 |
"save_steps": 500,
|
33 |
"stateful_callbacks": {
|
34 |
"TrainerControl": {
|
|
|
42 |
"attributes": {}
|
43 |
}
|
44 |
},
|
45 |
+
"total_flos": 4209814683648.0,
|
46 |
+
"train_batch_size": 16,
|
47 |
"trial_name": null,
|
48 |
"trial_params": {
|
49 |
+
"learning_rate": 4.65591694361634e-05,
|
50 |
+
"num_train_epochs": 1,
|
51 |
+
"per_device_train_batch_size": 16,
|
52 |
"warmup_steps": 250,
|
53 |
+
"weight_decay": 0.2103754328402818
|
54 |
}
|
55 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83b594183cbe9f09ddaf478eae629dcdb3d51b9bcb4757de1f514b20d09ee833
|
3 |
size 5240
|
training_resume.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"resources_per_trial": {
|
20 |
"gpu": 1
|
21 |
},
|
22 |
-
"run_id": "
|
23 |
"task": "",
|
24 |
"task_type": "classification",
|
25 |
"tokenizer_field": "tweet",
|
|
|
19 |
"resources_per_trial": {
|
20 |
"gpu": 1
|
21 |
},
|
22 |
+
"run_id": "24fa3a3c",
|
23 |
"task": "",
|
24 |
"task_type": "classification",
|
25 |
"tokenizer_field": "tweet",
|