MHGanainy commited on
Commit
22677a5
1 Parent(s): 766adef

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +117 -26
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.0,
3
- "total_flos": 2.898875076784947e+16,
4
- "train_loss": 0.1999838718148165,
5
- "train_runtime": 196.4851,
6
  "train_samples": 55000,
7
- "train_samples_per_second": 559.839,
8
- "train_steps_per_second": 2.188
9
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "total_flos": 2.896768241664e+16,
4
+ "train_loss": 0.08159883026469232,
5
+ "train_runtime": 766.6169,
6
  "train_samples": 55000,
7
+ "train_samples_per_second": 143.488,
8
+ "train_steps_per_second": 8.969
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.0,
3
- "total_flos": 2.898875076784947e+16,
4
- "train_loss": 0.1999838718148165,
5
- "train_runtime": 196.4851,
6
  "train_samples": 55000,
7
- "train_samples_per_second": 559.839,
8
- "train_steps_per_second": 2.188
9
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "total_flos": 2.896768241664e+16,
4
+ "train_loss": 0.08159883026469232,
5
+ "train_runtime": 766.6169,
6
  "train_samples": 55000,
7
+ "train_samples_per_second": 143.488,
8
+ "train_steps_per_second": 8.969
9
  }
trainer_state.json CHANGED
@@ -1,45 +1,136 @@
1
  {
2
- "best_metric": 0.0,
3
- "best_model_checkpoint": "logs/eurlex/roberta-base/seed_1/checkpoint-215",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 430,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.17694051563739777,
14
- "eval_macro-f1": 0.0,
15
- "eval_micro-f1": 0.0,
16
- "eval_runtime": 3.944,
17
- "eval_samples_per_second": 1267.741,
18
- "eval_steps_per_second": 5.071,
19
- "step": 215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_loss": 0.17320719361305237,
24
- "eval_macro-f1": 0.0,
25
- "eval_micro-f1": 0.0,
26
- "eval_runtime": 3.964,
27
- "eval_samples_per_second": 1261.34,
28
- "eval_steps_per_second": 5.045,
29
- "step": 430
30
  },
31
  {
32
  "epoch": 2.0,
33
- "step": 430,
34
- "total_flos": 2.898875076784947e+16,
35
- "train_loss": 0.1999838718148165,
36
- "train_runtime": 196.4851,
37
- "train_samples_per_second": 559.839,
38
- "train_steps_per_second": 2.188
39
  }
40
  ],
41
  "logging_steps": 500,
42
- "max_steps": 430,
43
  "num_input_tokens_seen": 0,
44
  "num_train_epochs": 2,
45
  "save_steps": 500,
@@ -64,8 +155,8 @@
64
  "attributes": {}
65
  }
66
  },
67
- "total_flos": 2.898875076784947e+16,
68
- "train_batch_size": 128,
69
  "trial_name": null,
70
  "trial_params": null
71
  }
 
1
  {
2
+ "best_metric": 0.716598119152416,
3
+ "best_model_checkpoint": "logs/eurlex/roberta-base/seed_1/checkpoint-6876",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 6876,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.14543339150668994,
13
+ "grad_norm": 10488.72265625,
14
+ "learning_rate": 2.7818499127399653e-05,
15
+ "loss": 0.1782,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.29086678301337987,
20
+ "grad_norm": 8601.787109375,
21
+ "learning_rate": 2.56369982547993e-05,
22
+ "loss": 0.109,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.4363001745200698,
27
+ "grad_norm": 13134.970703125,
28
+ "learning_rate": 2.3455497382198953e-05,
29
+ "loss": 0.0926,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.5817335660267597,
34
+ "grad_norm": 16072.458984375,
35
+ "learning_rate": 2.1273996509598605e-05,
36
+ "loss": 0.0851,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.7271669575334497,
41
+ "grad_norm": 12878.12109375,
42
+ "learning_rate": 1.9092495636998257e-05,
43
+ "loss": 0.078,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.8726003490401396,
48
+ "grad_norm": 12653.638671875,
49
+ "learning_rate": 1.6910994764397905e-05,
50
+ "loss": 0.0758,
51
+ "step": 3000
52
+ },
53
  {
54
  "epoch": 1.0,
55
+ "eval_loss": 0.08141529560089111,
56
+ "eval_macro-f1": 0.296432741440559,
57
+ "eval_micro-f1": 0.6862200624783061,
58
+ "eval_runtime": 20.7429,
59
+ "eval_samples_per_second": 241.047,
60
+ "eval_steps_per_second": 15.09,
61
+ "step": 3438
62
+ },
63
+ {
64
+ "epoch": 1.0180337405468296,
65
+ "grad_norm": 14271.0712890625,
66
+ "learning_rate": 1.4729493891797557e-05,
67
+ "loss": 0.0715,
68
+ "step": 3500
69
+ },
70
+ {
71
+ "epoch": 1.1634671320535195,
72
+ "grad_norm": 15767.826171875,
73
+ "learning_rate": 1.2547993019197209e-05,
74
+ "loss": 0.0679,
75
+ "step": 4000
76
+ },
77
+ {
78
+ "epoch": 1.3089005235602094,
79
+ "grad_norm": 13860.2705078125,
80
+ "learning_rate": 1.0366492146596857e-05,
81
+ "loss": 0.0652,
82
+ "step": 4500
83
+ },
84
+ {
85
+ "epoch": 1.4543339150668992,
86
+ "grad_norm": 11340.1640625,
87
+ "learning_rate": 8.18499127399651e-06,
88
+ "loss": 0.0651,
89
+ "step": 5000
90
+ },
91
+ {
92
+ "epoch": 1.5997673065735893,
93
+ "grad_norm": 14212.9599609375,
94
+ "learning_rate": 6.003490401396161e-06,
95
+ "loss": 0.0633,
96
+ "step": 5500
97
+ },
98
+ {
99
+ "epoch": 1.7452006980802792,
100
+ "grad_norm": 16454.734375,
101
+ "learning_rate": 3.821989528795812e-06,
102
+ "loss": 0.0624,
103
+ "step": 6000
104
+ },
105
+ {
106
+ "epoch": 1.8906340895869693,
107
+ "grad_norm": 11580.310546875,
108
+ "learning_rate": 1.6404886561954625e-06,
109
+ "loss": 0.0618,
110
+ "step": 6500
111
  },
112
  {
113
  "epoch": 2.0,
114
+ "eval_loss": 0.07441535592079163,
115
+ "eval_macro-f1": 0.35506427791546535,
116
+ "eval_micro-f1": 0.716598119152416,
117
+ "eval_runtime": 19.4037,
118
+ "eval_samples_per_second": 257.682,
119
+ "eval_steps_per_second": 16.131,
120
+ "step": 6876
121
  },
122
  {
123
  "epoch": 2.0,
124
+ "step": 6876,
125
+ "total_flos": 2.896768241664e+16,
126
+ "train_loss": 0.08159883026469232,
127
+ "train_runtime": 766.6169,
128
+ "train_samples_per_second": 143.488,
129
+ "train_steps_per_second": 8.969
130
  }
131
  ],
132
  "logging_steps": 500,
133
+ "max_steps": 6876,
134
  "num_input_tokens_seen": 0,
135
  "num_train_epochs": 2,
136
  "save_steps": 500,
 
155
  "attributes": {}
156
  }
157
  },
158
+ "total_flos": 2.896768241664e+16,
159
+ "train_batch_size": 16,
160
  "trial_name": null,
161
  "trial_params": null
162
  }