José Antonio García Díaz
commited on
Commit
·
6050e39
1
Parent(s):
cad4207
another datasets
Browse files- config.json +6 -6
- hyperparameters.csv +10 -10
- model.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +43 -20
- training_args.bin +1 -1
- training_resume.json +1 -1
config.json
CHANGED
@@ -12,16 +12,16 @@
|
|
12 |
"hidden_dropout_prob": 0.0,
|
13 |
"hidden_size": 768,
|
14 |
"id2label": {
|
15 |
-
"0": "
|
16 |
-
"1": "
|
17 |
-
"2": "
|
18 |
},
|
19 |
"initializer_range": 0.02,
|
20 |
"intermediate_size": 3072,
|
21 |
"label2id": {
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
},
|
26 |
"layer_norm_eps": 1e-05,
|
27 |
"max_position_embeddings": 514,
|
|
|
12 |
"hidden_dropout_prob": 0.0,
|
13 |
"hidden_size": 768,
|
14 |
"id2label": {
|
15 |
+
"0": "LABEL_0",
|
16 |
+
"1": "LABEL_1",
|
17 |
+
"2": "LABEL_2"
|
18 |
},
|
19 |
"initializer_range": 0.02,
|
20 |
"intermediate_size": 3072,
|
21 |
"label2id": {
|
22 |
+
"LABEL_0": 0,
|
23 |
+
"LABEL_1": 1,
|
24 |
+
"LABEL_2": 2
|
25 |
},
|
26 |
"layer_norm_eps": 1e-05,
|
27 |
"max_position_embeddings": 514,
|
hyperparameters.csv
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
objective,best,learning_rate,num_train_epochs,per_device_train_batch_size,warmup_steps,weight_decay,time_this_iter_s
|
2 |
-
0.
|
3 |
-
0.
|
4 |
-
0.
|
5 |
-
0.
|
6 |
-
0.
|
7 |
-
0.
|
8 |
-
0.
|
9 |
-
0.
|
10 |
-
0.
|
11 |
-
0.
|
|
|
1 |
objective,best,learning_rate,num_train_epochs,per_device_train_batch_size,warmup_steps,weight_decay,time_this_iter_s
|
2 |
+
0.585795322825296,True,2.7790954197781407e-05,2,16,250,0.27440436845088456,188.93630051612854
|
3 |
+
0.5403308930283185,False,3.7814091027622886e-05,2,8,1000,0.1538840941218964,217.0473895072937
|
4 |
+
0.5444770730498769,False,3.32179557296645e-05,4,16,500,0.16179634310824192,189.567147731781
|
5 |
+
0.5588174791467849,False,1.1504988654860045e-05,5,16,500,0.01734062039773394,190.21717524528503
|
6 |
+
0.580070364008772,False,4.8487400003981596e-05,1,8,0,0.06409376822222429,219.16021513938904
|
7 |
+
0.5418848883243373,False,4.1163569633169964e-05,2,8,500,0.11476947426883025,218.05247330665588
|
8 |
+
0.5614360549687367,False,3.577023497296174e-05,2,16,1000,0.019970396453250393,189.73834896087646
|
9 |
+
0.5822306986164425,False,4.176691733490237e-05,1,8,1000,0.0762936568728031,219.25422358512878
|
10 |
+
0.5583033816322266,False,3.563806920310468e-05,5,16,1000,0.07122837923660795,190.06790375709534
|
11 |
+
0.5410159442467878,False,4.216937363471069e-05,2,8,0,0.17991049930425,218.05449318885803
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 498606684
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d08b27797a18fc2d245b7f491cb1b24152316a2902b22e3ead2e1e71826de47
|
3 |
size 498606684
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 997333242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6efd6ffd5083cff2e4d806c6dcaa720d7cc9b207f982274fbe5a31aadfbea628
|
3 |
size 997333242
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9c11846eae1a717de3a335649e31add72e7cd98f8bfe0d0551bb18428458ab2
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,34 +1,57 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
-
"best_model_checkpoint":
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": true,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate": 1.
|
15 |
-
"loss": 1.
|
16 |
"step": 1
|
17 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_f1": 0.
|
21 |
-
"eval_loss": 0.
|
22 |
-
"eval_runtime":
|
23 |
-
"eval_samples_per_second":
|
24 |
-
"eval_steps_per_second":
|
25 |
-
"step":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
}
|
27 |
],
|
28 |
"logging_steps": 1000,
|
29 |
-
"max_steps":
|
30 |
"num_input_tokens_seen": 0,
|
31 |
-
"num_train_epochs":
|
32 |
"save_steps": 500,
|
33 |
"stateful_callbacks": {
|
34 |
"TrainerControl": {
|
@@ -42,14 +65,14 @@
|
|
42 |
"attributes": {}
|
43 |
}
|
44 |
},
|
45 |
-
"total_flos":
|
46 |
"train_batch_size": 16,
|
47 |
"trial_name": null,
|
48 |
"trial_params": {
|
49 |
-
"learning_rate":
|
50 |
-
"num_train_epochs":
|
51 |
"per_device_train_batch_size": 16,
|
52 |
"warmup_steps": 250,
|
53 |
-
"weight_decay": 0.
|
54 |
}
|
55 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8141798973083496,
|
3 |
+
"best_model_checkpoint": "./results/run-7a00201a/checkpoint-1275",
|
4 |
+
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2550,
|
7 |
"is_hyper_param_search": true,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.000784313725490196,
|
13 |
+
"grad_norm": 0.6707581281661987,
|
14 |
+
"learning_rate": 1.1116381679112563e-07,
|
15 |
+
"loss": 1.0997,
|
16 |
"step": 1
|
17 |
},
|
18 |
+
{
|
19 |
+
"epoch": 0.7843137254901961,
|
20 |
+
"grad_norm": 2.603963613510132,
|
21 |
+
"learning_rate": 1.8728686524591818e-05,
|
22 |
+
"loss": 0.8879,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
{
|
26 |
"epoch": 1.0,
|
27 |
+
"eval_f1": 0.5636810375356157,
|
28 |
+
"eval_loss": 0.8141798973083496,
|
29 |
+
"eval_runtime": 19.9525,
|
30 |
+
"eval_samples_per_second": 340.71,
|
31 |
+
"eval_steps_per_second": 42.601,
|
32 |
+
"step": 1275
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 1.5686274509803921,
|
36 |
+
"grad_norm": 5.432121276855469,
|
37 |
+
"learning_rate": 6.645662960339033e-06,
|
38 |
+
"loss": 0.7432,
|
39 |
+
"step": 2000
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 2.0,
|
43 |
+
"eval_f1": 0.585795322825296,
|
44 |
+
"eval_loss": 0.8491290807723999,
|
45 |
+
"eval_runtime": 19.9821,
|
46 |
+
"eval_samples_per_second": 340.205,
|
47 |
+
"eval_steps_per_second": 42.538,
|
48 |
+
"step": 2550
|
49 |
}
|
50 |
],
|
51 |
"logging_steps": 1000,
|
52 |
+
"max_steps": 2550,
|
53 |
"num_input_tokens_seen": 0,
|
54 |
+
"num_train_epochs": 2,
|
55 |
"save_steps": 500,
|
56 |
"stateful_callbacks": {
|
57 |
"TrainerControl": {
|
|
|
65 |
"attributes": {}
|
66 |
}
|
67 |
},
|
68 |
+
"total_flos": 6987202920860400.0,
|
69 |
"train_batch_size": 16,
|
70 |
"trial_name": null,
|
71 |
"trial_params": {
|
72 |
+
"learning_rate": 2.7790954197781407e-05,
|
73 |
+
"num_train_epochs": 2,
|
74 |
"per_device_train_batch_size": 16,
|
75 |
"warmup_steps": 250,
|
76 |
+
"weight_decay": 0.27440436845088456
|
77 |
}
|
78 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d9c68b7f2c1a165772c07fdda547a991f041674086b03e586624ae202c5e1b2
|
3 |
size 5240
|
training_resume.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"resources_per_trial": {
|
20 |
"gpu": 1
|
21 |
},
|
22 |
-
"run_id": "
|
23 |
"task": "",
|
24 |
"task_type": "classification",
|
25 |
"tokenizer_field": "tweet",
|
|
|
19 |
"resources_per_trial": {
|
20 |
"gpu": 1
|
21 |
},
|
22 |
+
"run_id": "7a00201a",
|
23 |
"task": "",
|
24 |
"task_type": "classification",
|
25 |
"tokenizer_field": "tweet",
|