tferhan commited on
Commit
d22d8f3
1 Parent(s): f0f8fd2

Upload log_history.json

Browse files
Files changed (1) hide show
  1. log_history.json +199 -0
log_history.json ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 0.6629,
4
+ "grad_norm": 1.981173038482666,
5
+ "learning_rate": 1.8575498575498575e-05,
6
+ "epoch": 0.7122507122507122,
7
+ "step": 500
8
+ },
9
+ {
10
+ "eval_loss": 0.3868238925933838,
11
+ "eval_accuracy": 0.8307254623044097,
12
+ "eval_runtime": 3.5243,
13
+ "eval_samples_per_second": 199.474,
14
+ "eval_steps_per_second": 49.939,
15
+ "epoch": 1.0,
16
+ "step": 702
17
+ },
18
+ {
19
+ "loss": 0.4271,
20
+ "grad_norm": 0.7268210053443909,
21
+ "learning_rate": 1.7150997150997152e-05,
22
+ "epoch": 1.4245014245014245,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "eval_loss": 0.12105754762887955,
27
+ "eval_accuracy": 0.9544807965860598,
28
+ "eval_runtime": 4.3029,
29
+ "eval_samples_per_second": 163.38,
30
+ "eval_steps_per_second": 40.903,
31
+ "epoch": 2.0,
32
+ "step": 1404
33
+ },
34
+ {
35
+ "loss": 0.1895,
36
+ "grad_norm": 0.8351957201957703,
37
+ "learning_rate": 1.5726495726495726e-05,
38
+ "epoch": 2.1367521367521367,
39
+ "step": 1500
40
+ },
41
+ {
42
+ "loss": 0.1534,
43
+ "grad_norm": 0.24414250254631042,
44
+ "learning_rate": 1.4301994301994305e-05,
45
+ "epoch": 2.849002849002849,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "eval_loss": 0.09802740812301636,
50
+ "eval_accuracy": 0.9701280227596017,
51
+ "eval_runtime": 3.5319,
52
+ "eval_samples_per_second": 199.043,
53
+ "eval_steps_per_second": 49.832,
54
+ "epoch": 3.0,
55
+ "step": 2106
56
+ },
57
+ {
58
+ "loss": 0.1275,
59
+ "grad_norm": 0.11179369688034058,
60
+ "learning_rate": 1.2877492877492879e-05,
61
+ "epoch": 3.561253561253561,
62
+ "step": 2500
63
+ },
64
+ {
65
+ "eval_loss": 0.093312107026577,
66
+ "eval_accuracy": 0.9772403982930299,
67
+ "eval_runtime": 3.7294,
68
+ "eval_samples_per_second": 188.501,
69
+ "eval_steps_per_second": 47.192,
70
+ "epoch": 4.0,
71
+ "step": 2808
72
+ },
73
+ {
74
+ "loss": 0.1302,
75
+ "grad_norm": 0.037690628319978714,
76
+ "learning_rate": 1.1452991452991454e-05,
77
+ "epoch": 4.273504273504273,
78
+ "step": 3000
79
+ },
80
+ {
81
+ "loss": 0.1332,
82
+ "grad_norm": 0.06977783888578415,
83
+ "learning_rate": 1.002849002849003e-05,
84
+ "epoch": 4.985754985754986,
85
+ "step": 3500
86
+ },
87
+ {
88
+ "eval_loss": 0.09014463424682617,
89
+ "eval_accuracy": 0.9786628733997155,
90
+ "eval_runtime": 3.6975,
91
+ "eval_samples_per_second": 190.126,
92
+ "eval_steps_per_second": 47.599,
93
+ "epoch": 5.0,
94
+ "step": 3510
95
+ },
96
+ {
97
+ "loss": 0.1051,
98
+ "grad_norm": 0.07191012799739838,
99
+ "learning_rate": 8.603988603988605e-06,
100
+ "epoch": 5.698005698005698,
101
+ "step": 4000
102
+ },
103
+ {
104
+ "eval_loss": 0.08116021007299423,
105
+ "eval_accuracy": 0.9800853485064012,
106
+ "eval_runtime": 3.558,
107
+ "eval_samples_per_second": 197.582,
108
+ "eval_steps_per_second": 49.466,
109
+ "epoch": 6.0,
110
+ "step": 4212
111
+ },
112
+ {
113
+ "loss": 0.1026,
114
+ "grad_norm": 9.66889476776123,
115
+ "learning_rate": 7.17948717948718e-06,
116
+ "epoch": 6.410256410256411,
117
+ "step": 4500
118
+ },
119
+ {
120
+ "eval_loss": 0.07601884752511978,
121
+ "eval_accuracy": 0.9800853485064012,
122
+ "eval_runtime": 7.4677,
123
+ "eval_samples_per_second": 94.138,
124
+ "eval_steps_per_second": 23.568,
125
+ "epoch": 7.0,
126
+ "step": 4914
127
+ },
128
+ {
129
+ "loss": 0.092,
130
+ "grad_norm": 0.02262728288769722,
131
+ "learning_rate": 5.7549857549857555e-06,
132
+ "epoch": 7.122507122507122,
133
+ "step": 5000
134
+ },
135
+ {
136
+ "loss": 0.0864,
137
+ "grad_norm": 0.1124846562743187,
138
+ "learning_rate": 4.330484330484331e-06,
139
+ "epoch": 7.834757834757835,
140
+ "step": 5500
141
+ },
142
+ {
143
+ "eval_loss": 0.07228563725948334,
144
+ "eval_accuracy": 0.9800853485064012,
145
+ "eval_runtime": 4.228,
146
+ "eval_samples_per_second": 166.273,
147
+ "eval_steps_per_second": 41.627,
148
+ "epoch": 8.0,
149
+ "step": 5616
150
+ },
151
+ {
152
+ "loss": 0.1273,
153
+ "grad_norm": 0.0037623795215040445,
154
+ "learning_rate": 2.9059829059829063e-06,
155
+ "epoch": 8.547008547008547,
156
+ "step": 6000
157
+ },
158
+ {
159
+ "eval_loss": 0.0683717355132103,
160
+ "eval_accuracy": 0.9815078236130867,
161
+ "eval_runtime": 3.5474,
162
+ "eval_samples_per_second": 198.175,
163
+ "eval_steps_per_second": 49.614,
164
+ "epoch": 9.0,
165
+ "step": 6318
166
+ },
167
+ {
168
+ "loss": 0.0724,
169
+ "grad_norm": 0.004779215436428785,
170
+ "learning_rate": 1.4814814814814815e-06,
171
+ "epoch": 9.25925925925926,
172
+ "step": 6500
173
+ },
174
+ {
175
+ "loss": 0.1116,
176
+ "grad_norm": 2.0302906036376953,
177
+ "learning_rate": 5.6980056980056986e-08,
178
+ "epoch": 9.971509971509972,
179
+ "step": 7000
180
+ },
181
+ {
182
+ "eval_loss": 0.06746786087751389,
183
+ "eval_accuracy": 0.9815078236130867,
184
+ "eval_runtime": 7.6005,
185
+ "eval_samples_per_second": 92.494,
186
+ "eval_steps_per_second": 23.156,
187
+ "epoch": 10.0,
188
+ "step": 7020
189
+ },
190
+ {
191
+ "train_runtime": 449.8629,
192
+ "train_samples_per_second": 62.419,
193
+ "train_steps_per_second": 15.605,
194
+ "total_flos": 520823902075200.0,
195
+ "train_loss": 0.17984749846308998,
196
+ "epoch": 10.0,
197
+ "step": 7020
198
+ }
199
+ ]