Atnafu commited on
Commit
11dce01
1 Parent(s): 329d91a

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_samples": 12458,
4
+ "train_loss": 0.764641843215564,
5
+ "train_runtime": 18256.242,
6
+ "train_samples": 130319,
7
+ "train_samples_per_second": 35.692,
8
+ "train_steps_per_second": 1.116
9
+ }
eval_results.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_samples": 12458
4
+ }
runs/Mar14_07-54-59_1ca1daddd6ef/events.out.tfevents.1678799269.1ca1daddd6ef.83661.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f9a39aee9391d899556e3e6cd57c81d7cee12162898fb238ef85a60ebc69fdf
3
+ size 91
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.764641843215564,
4
+ "train_runtime": 18256.242,
5
+ "train_samples": 130319,
6
+ "train_samples_per_second": 35.692,
7
+ "train_steps_per_second": 1.116
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 20365,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.25,
12
+ "learning_rate": 2.8526884360422293e-05,
13
+ "loss": 3.7774,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.25,
18
+ "step": 1000
19
+ },
20
+ {
21
+ "epoch": 0.49,
22
+ "learning_rate": 2.705376872084459e-05,
23
+ "loss": 1.1215,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 0.49,
28
+ "step": 2000
29
+ },
30
+ {
31
+ "epoch": 0.74,
32
+ "learning_rate": 2.558065308126688e-05,
33
+ "loss": 0.891,
34
+ "step": 3000
35
+ },
36
+ {
37
+ "epoch": 0.74,
38
+ "step": 3000
39
+ },
40
+ {
41
+ "epoch": 0.98,
42
+ "learning_rate": 2.4107537441689173e-05,
43
+ "loss": 0.7978,
44
+ "step": 4000
45
+ },
46
+ {
47
+ "epoch": 0.98,
48
+ "step": 4000
49
+ },
50
+ {
51
+ "epoch": 1.23,
52
+ "learning_rate": 2.263442180211147e-05,
53
+ "loss": 0.7133,
54
+ "step": 5000
55
+ },
56
+ {
57
+ "epoch": 1.23,
58
+ "step": 5000
59
+ },
60
+ {
61
+ "epoch": 1.47,
62
+ "learning_rate": 2.116130616253376e-05,
63
+ "loss": 0.6749,
64
+ "step": 6000
65
+ },
66
+ {
67
+ "epoch": 1.47,
68
+ "step": 6000
69
+ },
70
+ {
71
+ "epoch": 1.72,
72
+ "learning_rate": 1.9688190522956053e-05,
73
+ "loss": 0.6433,
74
+ "step": 7000
75
+ },
76
+ {
77
+ "epoch": 1.72,
78
+ "step": 7000
79
+ },
80
+ {
81
+ "epoch": 1.96,
82
+ "learning_rate": 1.8215074883378345e-05,
83
+ "loss": 0.6125,
84
+ "step": 8000
85
+ },
86
+ {
87
+ "epoch": 1.96,
88
+ "step": 8000
89
+ },
90
+ {
91
+ "epoch": 2.21,
92
+ "learning_rate": 1.674195924380064e-05,
93
+ "loss": 0.5748,
94
+ "step": 9000
95
+ },
96
+ {
97
+ "epoch": 2.21,
98
+ "step": 9000
99
+ },
100
+ {
101
+ "epoch": 2.46,
102
+ "learning_rate": 1.5268843604222933e-05,
103
+ "loss": 0.5683,
104
+ "step": 10000
105
+ },
106
+ {
107
+ "epoch": 2.46,
108
+ "step": 10000
109
+ },
110
+ {
111
+ "epoch": 2.7,
112
+ "learning_rate": 1.3795727964645225e-05,
113
+ "loss": 0.5413,
114
+ "step": 11000
115
+ },
116
+ {
117
+ "epoch": 2.7,
118
+ "step": 11000
119
+ },
120
+ {
121
+ "epoch": 2.95,
122
+ "learning_rate": 1.2322612325067517e-05,
123
+ "loss": 0.5412,
124
+ "step": 12000
125
+ },
126
+ {
127
+ "epoch": 2.95,
128
+ "step": 12000
129
+ },
130
+ {
131
+ "epoch": 3.19,
132
+ "learning_rate": 1.0849496685489811e-05,
133
+ "loss": 0.5168,
134
+ "step": 13000
135
+ },
136
+ {
137
+ "epoch": 3.19,
138
+ "step": 13000
139
+ },
140
+ {
141
+ "epoch": 3.44,
142
+ "learning_rate": 9.376381045912103e-06,
143
+ "loss": 0.5038,
144
+ "step": 14000
145
+ },
146
+ {
147
+ "epoch": 3.44,
148
+ "step": 14000
149
+ },
150
+ {
151
+ "epoch": 3.68,
152
+ "learning_rate": 7.903265406334397e-06,
153
+ "loss": 0.5042,
154
+ "step": 15000
155
+ },
156
+ {
157
+ "epoch": 3.68,
158
+ "step": 15000
159
+ },
160
+ {
161
+ "epoch": 3.93,
162
+ "learning_rate": 6.43014976675669e-06,
163
+ "loss": 0.4929,
164
+ "step": 16000
165
+ },
166
+ {
167
+ "epoch": 3.93,
168
+ "step": 16000
169
+ },
170
+ {
171
+ "epoch": 4.17,
172
+ "learning_rate": 4.957034127178984e-06,
173
+ "loss": 0.4919,
174
+ "step": 17000
175
+ },
176
+ {
177
+ "epoch": 4.17,
178
+ "step": 17000
179
+ },
180
+ {
181
+ "epoch": 4.42,
182
+ "learning_rate": 3.483918487601277e-06,
183
+ "loss": 0.4765,
184
+ "step": 18000
185
+ },
186
+ {
187
+ "epoch": 4.42,
188
+ "step": 18000
189
+ },
190
+ {
191
+ "epoch": 4.66,
192
+ "learning_rate": 2.01080284802357e-06,
193
+ "loss": 0.4772,
194
+ "step": 19000
195
+ },
196
+ {
197
+ "epoch": 4.66,
198
+ "step": 19000
199
+ },
200
+ {
201
+ "epoch": 4.91,
202
+ "learning_rate": 5.37687208445863e-07,
203
+ "loss": 0.4729,
204
+ "step": 20000
205
+ },
206
+ {
207
+ "epoch": 4.91,
208
+ "step": 20000
209
+ },
210
+ {
211
+ "epoch": 5.0,
212
+ "step": 20365,
213
+ "total_flos": 5.859693481446605e+17,
214
+ "train_loss": 0.764641843215564,
215
+ "train_runtime": 18256.242,
216
+ "train_samples_per_second": 35.692,
217
+ "train_steps_per_second": 1.116
218
+ }
219
+ ],
220
+ "max_steps": 20365,
221
+ "num_train_epochs": 5,
222
+ "total_flos": 5.859693481446605e+17,
223
+ "trial_name": null,
224
+ "trial_params": null
225
+ }