jpodivin commited on
Commit
ce6e5dc
1 Parent(s): 5acc563

End of training

Browse files
Files changed (5) hide show
  1. README.md +14 -2
  2. all_results.json +14 -14
  3. eval_results.json +10 -10
  4. train_results.json +5 -5
  5. trainer_state.json +596 -194
README.md CHANGED
@@ -3,11 +3,23 @@ license: apache-2.0
3
  base_model: google-t5/t5-small
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - rouge
8
  model-index:
9
  - name: pep_summarization
10
- results: []
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +27,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # pep_summarization
17
 
18
- This model is a fine-tuned version of [google-t5/t5-small](https://huggingface.co/google-t5/t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.0459
21
  - Rouge1: 87.1522
 
3
  base_model: google-t5/t5-small
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - fedora-copr/pep-sum
8
  metrics:
9
  - rouge
10
  model-index:
11
  - name: pep_summarization
12
+ results:
13
+ - task:
14
+ name: Summarization
15
+ type: summarization
16
+ dataset:
17
+ name: fedora-copr/pep-sum
18
+ type: fedora-copr/pep-sum
19
+ metrics:
20
+ - name: Rouge1
21
+ type: rouge
22
+ value: 87.1522
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
27
 
28
  # pep_summarization
29
 
30
+ This model is a fine-tuned version of [google-t5/t5-small](https://huggingface.co/google-t5/t5-small) on the fedora-copr/pep-sum dataset.
31
  It achieves the following results on the evaluation set:
32
  - Loss: 0.0459
33
  - Rouge1: 87.1522
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_gen_len": 67.08695652173913,
4
- "eval_loss": 0.07390377670526505,
5
- "eval_rouge1": 83.9415,
6
- "eval_rouge2": 83.3937,
7
- "eval_rougeL": 84.0648,
8
- "eval_rougeLsum": 84.0055,
9
- "eval_runtime": 3.0684,
10
  "eval_samples": 69,
11
- "eval_samples_per_second": 22.487,
12
- "eval_steps_per_second": 2.933,
13
- "train_loss": 0.1759471893310547,
14
- "train_runtime": 142.9885,
15
  "train_samples": 276,
16
- "train_samples_per_second": 38.604,
17
- "train_steps_per_second": 4.895
18
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "eval_gen_len": 68.34782608695652,
4
+ "eval_loss": 0.04586370289325714,
5
+ "eval_rouge1": 87.1522,
6
+ "eval_rouge2": 86.6031,
7
+ "eval_rougeL": 87.1206,
8
+ "eval_rougeLsum": 87.0701,
9
+ "eval_runtime": 3.3714,
10
  "eval_samples": 69,
11
+ "eval_samples_per_second": 20.467,
12
+ "eval_steps_per_second": 2.67,
13
+ "train_loss": 0.08541564777919225,
14
+ "train_runtime": 358.6661,
15
  "train_samples": 276,
16
+ "train_samples_per_second": 38.476,
17
+ "train_steps_per_second": 4.879
18
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_gen_len": 67.08695652173913,
4
- "eval_loss": 0.07390377670526505,
5
- "eval_rouge1": 83.9415,
6
- "eval_rouge2": 83.3937,
7
- "eval_rougeL": 84.0648,
8
- "eval_rougeLsum": 84.0055,
9
- "eval_runtime": 3.0684,
10
  "eval_samples": 69,
11
- "eval_samples_per_second": 22.487,
12
- "eval_steps_per_second": 2.933
13
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "eval_gen_len": 68.34782608695652,
4
+ "eval_loss": 0.04586370289325714,
5
+ "eval_rouge1": 87.1522,
6
+ "eval_rouge2": 86.6031,
7
+ "eval_rougeL": 87.1206,
8
+ "eval_rougeLsum": 87.0701,
9
+ "eval_runtime": 3.3714,
10
  "eval_samples": 69,
11
+ "eval_samples_per_second": 20.467,
12
+ "eval_steps_per_second": 2.67
13
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 20.0,
3
- "train_loss": 0.1759471893310547,
4
- "train_runtime": 142.9885,
5
  "train_samples": 276,
6
- "train_samples_per_second": 38.604,
7
- "train_steps_per_second": 4.895
8
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "train_loss": 0.08541564777919225,
4
+ "train_runtime": 358.6661,
5
  "train_samples": 276,
6
+ "train_samples_per_second": 38.476,
7
+ "train_steps_per_second": 4.879
8
  }
trainer_state.json CHANGED
@@ -1,295 +1,697 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.0,
5
  "eval_steps": 500,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_gen_len": 81.30434782608695,
14
- "eval_loss": 0.4793303906917572,
15
- "eval_rouge1": 53.6231,
16
- "eval_rouge2": 44.4209,
17
- "eval_rougeL": 49.0707,
18
- "eval_rougeLsum": 51.5197,
19
- "eval_runtime": 3.1743,
20
- "eval_samples_per_second": 21.737,
21
- "eval_steps_per_second": 2.835,
22
  "step": 35
23
  },
24
  {
25
  "epoch": 2.0,
26
- "eval_gen_len": 66.68115942028986,
27
- "eval_loss": 0.2904551923274994,
28
- "eval_rouge1": 62.9386,
29
- "eval_rouge2": 57.5356,
30
- "eval_rougeL": 60.385,
31
- "eval_rougeLsum": 61.6177,
32
- "eval_runtime": 2.9628,
33
- "eval_samples_per_second": 23.289,
34
- "eval_steps_per_second": 3.038,
35
  "step": 70
36
  },
37
  {
38
  "epoch": 3.0,
39
- "eval_gen_len": 62.028985507246375,
40
- "eval_loss": 0.22017963230609894,
41
- "eval_rouge1": 64.16,
42
- "eval_rouge2": 60.1179,
43
- "eval_rougeL": 62.5159,
44
- "eval_rougeLsum": 63.4504,
45
- "eval_runtime": 2.9458,
46
- "eval_samples_per_second": 23.423,
47
- "eval_steps_per_second": 3.055,
48
  "step": 105
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_gen_len": 63.79710144927536,
53
- "eval_loss": 0.18573065102100372,
54
- "eval_rouge1": 68.0771,
55
- "eval_rouge2": 64.1938,
56
- "eval_rougeL": 66.2172,
57
- "eval_rougeLsum": 67.4379,
58
- "eval_runtime": 2.8038,
59
- "eval_samples_per_second": 24.609,
60
- "eval_steps_per_second": 3.21,
61
  "step": 140
62
  },
63
  {
64
  "epoch": 5.0,
65
- "eval_gen_len": 60.36231884057971,
66
- "eval_loss": 0.16388797760009766,
67
- "eval_rouge1": 72.5114,
68
- "eval_rouge2": 69.7863,
69
- "eval_rougeL": 71.3965,
70
- "eval_rougeLsum": 71.7778,
71
- "eval_runtime": 2.8796,
72
- "eval_samples_per_second": 23.961,
73
- "eval_steps_per_second": 3.125,
74
  "step": 175
75
  },
76
  {
77
  "epoch": 6.0,
78
- "eval_gen_len": 64.05797101449275,
79
- "eval_loss": 0.14642906188964844,
80
- "eval_rouge1": 75.0133,
81
- "eval_rouge2": 73.3835,
82
- "eval_rougeL": 74.5891,
83
- "eval_rougeLsum": 74.5624,
84
- "eval_runtime": 3.1388,
85
- "eval_samples_per_second": 21.983,
86
- "eval_steps_per_second": 2.867,
87
  "step": 210
88
  },
89
  {
90
  "epoch": 7.0,
91
- "eval_gen_len": 64.85507246376811,
92
- "eval_loss": 0.13372762501239777,
93
- "eval_rouge1": 76.8715,
94
- "eval_rouge2": 75.6755,
95
- "eval_rougeL": 76.8114,
96
- "eval_rougeLsum": 76.6515,
97
- "eval_runtime": 3.0949,
98
- "eval_samples_per_second": 22.295,
99
- "eval_steps_per_second": 2.908,
100
  "step": 245
101
  },
102
  {
103
  "epoch": 8.0,
104
- "eval_gen_len": 66.3768115942029,
105
- "eval_loss": 0.1221652403473854,
106
- "eval_rouge1": 79.3923,
107
- "eval_rouge2": 78.5756,
108
- "eval_rougeL": 79.2249,
109
- "eval_rougeLsum": 79.3579,
110
- "eval_runtime": 3.0252,
111
- "eval_samples_per_second": 22.808,
112
- "eval_steps_per_second": 2.975,
113
  "step": 280
114
  },
115
  {
116
  "epoch": 9.0,
117
- "eval_gen_len": 65.53623188405797,
118
- "eval_loss": 0.11279460042715073,
119
- "eval_rouge1": 81.6449,
120
- "eval_rouge2": 81.0708,
121
- "eval_rougeL": 81.7166,
122
- "eval_rougeLsum": 81.4895,
123
- "eval_runtime": 2.8183,
124
- "eval_samples_per_second": 24.482,
125
- "eval_steps_per_second": 3.193,
126
  "step": 315
127
  },
128
  {
129
  "epoch": 10.0,
130
- "eval_gen_len": 63.88405797101449,
131
- "eval_loss": 0.10504022985696793,
132
- "eval_rouge1": 83.4877,
133
- "eval_rouge2": 83.0142,
134
- "eval_rougeL": 83.6498,
135
- "eval_rougeLsum": 83.5732,
136
- "eval_runtime": 3.1321,
137
- "eval_samples_per_second": 22.03,
138
- "eval_steps_per_second": 2.874,
139
  "step": 350
140
  },
141
  {
142
  "epoch": 11.0,
143
- "eval_gen_len": 65.92753623188406,
144
- "eval_loss": 0.09819629788398743,
145
- "eval_rouge1": 83.1207,
146
- "eval_rouge2": 82.583,
147
- "eval_rougeL": 83.2399,
148
- "eval_rougeLsum": 83.09,
149
- "eval_runtime": 3.0914,
150
- "eval_samples_per_second": 22.32,
151
- "eval_steps_per_second": 2.911,
152
  "step": 385
153
  },
154
  {
155
  "epoch": 12.0,
156
- "eval_gen_len": 64.43478260869566,
157
- "eval_loss": 0.09112720936536789,
158
- "eval_rouge1": 83.9059,
159
- "eval_rouge2": 83.3589,
160
- "eval_rougeL": 84.0599,
161
- "eval_rougeLsum": 83.9385,
162
- "eval_runtime": 2.9617,
163
- "eval_samples_per_second": 23.298,
164
- "eval_steps_per_second": 3.039,
165
  "step": 420
166
  },
167
  {
168
  "epoch": 13.0,
169
- "eval_gen_len": 66.82608695652173,
170
- "eval_loss": 0.08676959574222565,
171
- "eval_rouge1": 83.9578,
172
- "eval_rouge2": 83.425,
173
- "eval_rougeL": 84.1007,
174
- "eval_rougeLsum": 84.0404,
175
- "eval_runtime": 3.0066,
176
- "eval_samples_per_second": 22.949,
177
- "eval_steps_per_second": 2.993,
178
  "step": 455
179
  },
180
  {
181
  "epoch": 14.0,
182
- "eval_gen_len": 66.82608695652173,
183
- "eval_loss": 0.08292272686958313,
184
- "eval_rouge1": 83.9578,
185
- "eval_rouge2": 83.425,
186
- "eval_rougeL": 84.1007,
187
- "eval_rougeLsum": 84.0404,
188
- "eval_runtime": 3.2002,
189
- "eval_samples_per_second": 21.561,
190
- "eval_steps_per_second": 2.812,
191
  "step": 490
192
  },
193
  {
194
  "epoch": 14.29,
195
- "learning_rate": 5.7142857142857145e-06,
196
- "loss": 0.2115,
197
  "step": 500
198
  },
199
  {
200
  "epoch": 15.0,
201
- "eval_gen_len": 66.84057971014492,
202
- "eval_loss": 0.0795898288488388,
203
- "eval_rouge1": 83.9594,
204
- "eval_rouge2": 83.4287,
205
- "eval_rougeL": 84.1054,
206
- "eval_rougeLsum": 84.0427,
207
- "eval_runtime": 3.1795,
208
- "eval_samples_per_second": 21.701,
209
- "eval_steps_per_second": 2.831,
210
  "step": 525
211
  },
212
  {
213
  "epoch": 16.0,
214
- "eval_gen_len": 67.1159420289855,
215
- "eval_loss": 0.0777181014418602,
216
- "eval_rouge1": 83.7657,
217
- "eval_rouge2": 83.2066,
218
- "eval_rougeL": 83.9053,
219
- "eval_rougeLsum": 83.8554,
220
- "eval_runtime": 3.1843,
221
- "eval_samples_per_second": 21.669,
222
- "eval_steps_per_second": 2.826,
223
  "step": 560
224
  },
225
  {
226
  "epoch": 17.0,
227
- "eval_gen_len": 67.08695652173913,
228
- "eval_loss": 0.07602674514055252,
229
- "eval_rouge1": 83.9415,
230
- "eval_rouge2": 83.3937,
231
- "eval_rougeL": 84.0648,
232
- "eval_rougeLsum": 84.0055,
233
- "eval_runtime": 3.2183,
234
- "eval_samples_per_second": 21.44,
235
- "eval_steps_per_second": 2.797,
236
  "step": 595
237
  },
238
  {
239
  "epoch": 18.0,
240
- "eval_gen_len": 67.08695652173913,
241
- "eval_loss": 0.07477501034736633,
242
- "eval_rouge1": 83.9415,
243
- "eval_rouge2": 83.3937,
244
- "eval_rougeL": 84.0648,
245
- "eval_rougeLsum": 84.0055,
246
- "eval_runtime": 3.2689,
247
- "eval_samples_per_second": 21.108,
248
- "eval_steps_per_second": 2.753,
249
  "step": 630
250
  },
251
  {
252
  "epoch": 19.0,
253
- "eval_gen_len": 67.08695652173913,
254
- "eval_loss": 0.07416118681430817,
255
- "eval_rouge1": 83.9415,
256
- "eval_rouge2": 83.3937,
257
- "eval_rougeL": 84.0648,
258
- "eval_rougeLsum": 84.0055,
259
- "eval_runtime": 3.1956,
260
- "eval_samples_per_second": 21.592,
261
- "eval_steps_per_second": 2.816,
262
  "step": 665
263
  },
264
  {
265
  "epoch": 20.0,
266
- "eval_gen_len": 67.08695652173913,
267
- "eval_loss": 0.07390377670526505,
268
- "eval_rouge1": 83.9415,
269
- "eval_rouge2": 83.3937,
270
- "eval_rougeL": 84.0648,
271
- "eval_rougeLsum": 84.0055,
272
- "eval_runtime": 3.2574,
273
- "eval_samples_per_second": 21.183,
274
- "eval_steps_per_second": 2.763,
275
  "step": 700
276
  },
277
  {
278
- "epoch": 20.0,
279
- "step": 700,
280
- "total_flos": 1494173488250880.0,
281
- "train_loss": 0.1759471893310547,
282
- "train_runtime": 142.9885,
283
- "train_samples_per_second": 38.604,
284
- "train_steps_per_second": 4.895
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  }
286
  ],
287
  "logging_steps": 500,
288
- "max_steps": 700,
289
  "num_input_tokens_seen": 0,
290
- "num_train_epochs": 20,
291
  "save_steps": 500,
292
- "total_flos": 1494173488250880.0,
293
  "train_batch_size": 8,
294
  "trial_name": null,
295
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
  "eval_steps": 500,
6
+ "global_step": 1750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_gen_len": 81.21739130434783,
14
+ "eval_loss": 0.4761563539505005,
15
+ "eval_rouge1": 53.8677,
16
+ "eval_rouge2": 44.8002,
17
+ "eval_rougeL": 49.2937,
18
+ "eval_rougeLsum": 51.8392,
19
+ "eval_runtime": 3.1021,
20
+ "eval_samples_per_second": 22.243,
21
+ "eval_steps_per_second": 2.901,
22
  "step": 35
23
  },
24
  {
25
  "epoch": 2.0,
26
+ "eval_gen_len": 66.27536231884058,
27
+ "eval_loss": 0.28426334261894226,
28
+ "eval_rouge1": 63.6062,
29
+ "eval_rouge2": 58.4113,
30
+ "eval_rougeL": 61.1896,
31
+ "eval_rougeLsum": 62.3844,
32
+ "eval_runtime": 2.9392,
33
+ "eval_samples_per_second": 23.476,
34
+ "eval_steps_per_second": 3.062,
35
  "step": 70
36
  },
37
  {
38
  "epoch": 3.0,
39
+ "eval_gen_len": 61.20289855072464,
40
+ "eval_loss": 0.21173293888568878,
41
+ "eval_rouge1": 64.8696,
42
+ "eval_rouge2": 60.8866,
43
+ "eval_rougeL": 63.1615,
44
+ "eval_rougeLsum": 64.2496,
45
+ "eval_runtime": 2.8249,
46
+ "eval_samples_per_second": 24.425,
47
+ "eval_steps_per_second": 3.186,
48
  "step": 105
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_gen_len": 62.82608695652174,
53
+ "eval_loss": 0.17675279080867767,
54
+ "eval_rouge1": 70.8455,
55
+ "eval_rouge2": 67.8574,
56
+ "eval_rougeL": 69.4736,
57
+ "eval_rougeLsum": 70.0874,
58
+ "eval_runtime": 2.7232,
59
+ "eval_samples_per_second": 25.338,
60
+ "eval_steps_per_second": 3.305,
61
  "step": 140
62
  },
63
  {
64
  "epoch": 5.0,
65
+ "eval_gen_len": 61.46376811594203,
66
+ "eval_loss": 0.15457534790039062,
67
+ "eval_rouge1": 74.0309,
68
+ "eval_rouge2": 71.9404,
69
+ "eval_rougeL": 73.3747,
70
+ "eval_rougeLsum": 73.5531,
71
+ "eval_runtime": 2.77,
72
+ "eval_samples_per_second": 24.91,
73
+ "eval_steps_per_second": 3.249,
74
  "step": 175
75
  },
76
  {
77
  "epoch": 6.0,
78
+ "eval_gen_len": 63.0,
79
+ "eval_loss": 0.13463033735752106,
80
+ "eval_rouge1": 77.1153,
81
+ "eval_rouge2": 75.713,
82
+ "eval_rougeL": 77.0396,
83
+ "eval_rougeLsum": 76.815,
84
+ "eval_runtime": 2.9058,
85
+ "eval_samples_per_second": 23.745,
86
+ "eval_steps_per_second": 3.097,
87
  "step": 210
88
  },
89
  {
90
  "epoch": 7.0,
91
+ "eval_gen_len": 65.55072463768116,
92
+ "eval_loss": 0.12134861946105957,
93
+ "eval_rouge1": 80.2369,
94
+ "eval_rouge2": 79.6485,
95
+ "eval_rougeL": 80.4178,
96
+ "eval_rougeLsum": 80.1986,
97
+ "eval_runtime": 2.934,
98
+ "eval_samples_per_second": 23.517,
99
+ "eval_steps_per_second": 3.067,
100
  "step": 245
101
  },
102
  {
103
  "epoch": 8.0,
104
+ "eval_gen_len": 65.23188405797102,
105
+ "eval_loss": 0.10570676624774933,
106
+ "eval_rouge1": 82.3014,
107
+ "eval_rouge2": 81.6093,
108
+ "eval_rougeL": 82.3372,
109
+ "eval_rougeLsum": 82.1797,
110
+ "eval_runtime": 2.7932,
111
+ "eval_samples_per_second": 24.703,
112
+ "eval_steps_per_second": 3.222,
113
  "step": 280
114
  },
115
  {
116
  "epoch": 9.0,
117
+ "eval_gen_len": 66.08695652173913,
118
+ "eval_loss": 0.09365525096654892,
119
+ "eval_rouge1": 83.5682,
120
+ "eval_rouge2": 82.9738,
121
+ "eval_rougeL": 83.6388,
122
+ "eval_rougeLsum": 83.5486,
123
+ "eval_runtime": 2.9365,
124
+ "eval_samples_per_second": 23.497,
125
+ "eval_steps_per_second": 3.065,
126
  "step": 315
127
  },
128
  {
129
  "epoch": 10.0,
130
+ "eval_gen_len": 66.08695652173913,
131
+ "eval_loss": 0.08428314328193665,
132
+ "eval_rouge1": 83.5735,
133
+ "eval_rouge2": 82.9779,
134
+ "eval_rougeL": 83.6425,
135
+ "eval_rougeLsum": 83.5533,
136
+ "eval_runtime": 2.9255,
137
+ "eval_samples_per_second": 23.586,
138
+ "eval_steps_per_second": 3.076,
139
  "step": 350
140
  },
141
  {
142
  "epoch": 11.0,
143
+ "eval_gen_len": 67.69565217391305,
144
+ "eval_loss": 0.07632659375667572,
145
+ "eval_rouge1": 83.5502,
146
+ "eval_rouge2": 83.0151,
147
+ "eval_rougeL": 83.7004,
148
+ "eval_rougeLsum": 83.6188,
149
+ "eval_runtime": 3.0282,
150
+ "eval_samples_per_second": 22.786,
151
+ "eval_steps_per_second": 2.972,
152
  "step": 385
153
  },
154
  {
155
  "epoch": 12.0,
156
+ "eval_gen_len": 65.6086956521739,
157
+ "eval_loss": 0.06880246102809906,
158
+ "eval_rouge1": 83.8612,
159
+ "eval_rouge2": 83.4112,
160
+ "eval_rougeL": 84.0555,
161
+ "eval_rougeLsum": 83.8541,
162
+ "eval_runtime": 2.9936,
163
+ "eval_samples_per_second": 23.049,
164
+ "eval_steps_per_second": 3.006,
165
  "step": 420
166
  },
167
  {
168
  "epoch": 13.0,
169
+ "eval_gen_len": 68.4927536231884,
170
+ "eval_loss": 0.0652654618024826,
171
+ "eval_rouge1": 83.2559,
172
+ "eval_rouge2": 82.7398,
173
+ "eval_rougeL": 83.437,
174
+ "eval_rougeLsum": 83.3363,
175
+ "eval_runtime": 3.0469,
176
+ "eval_samples_per_second": 22.646,
177
+ "eval_steps_per_second": 2.954,
178
  "step": 455
179
  },
180
  {
181
  "epoch": 14.0,
182
+ "eval_gen_len": 67.76811594202898,
183
+ "eval_loss": 0.06127766892313957,
184
+ "eval_rouge1": 84.2407,
185
+ "eval_rouge2": 83.738,
186
+ "eval_rougeL": 84.3104,
187
+ "eval_rougeLsum": 84.3012,
188
+ "eval_runtime": 3.1021,
189
+ "eval_samples_per_second": 22.243,
190
+ "eval_steps_per_second": 2.901,
191
  "step": 490
192
  },
193
  {
194
  "epoch": 14.29,
195
+ "learning_rate": 1.4285714285714287e-05,
196
+ "loss": 0.1963,
197
  "step": 500
198
  },
199
  {
200
  "epoch": 15.0,
201
+ "eval_gen_len": 68.55072463768116,
202
+ "eval_loss": 0.05842842161655426,
203
+ "eval_rouge1": 83.8361,
204
+ "eval_rouge2": 83.3141,
205
+ "eval_rougeL": 83.8976,
206
+ "eval_rougeLsum": 83.8751,
207
+ "eval_runtime": 3.2109,
208
+ "eval_samples_per_second": 21.489,
209
+ "eval_steps_per_second": 2.803,
210
  "step": 525
211
  },
212
  {
213
  "epoch": 16.0,
214
+ "eval_gen_len": 71.4927536231884,
215
+ "eval_loss": 0.05631176754832268,
216
+ "eval_rouge1": 83.5952,
217
+ "eval_rouge2": 83.1416,
218
+ "eval_rougeL": 83.5644,
219
+ "eval_rougeLsum": 83.5389,
220
+ "eval_runtime": 3.2752,
221
+ "eval_samples_per_second": 21.067,
222
+ "eval_steps_per_second": 2.748,
223
  "step": 560
224
  },
225
  {
226
  "epoch": 17.0,
227
+ "eval_gen_len": 69.47826086956522,
228
+ "eval_loss": 0.05389421060681343,
229
+ "eval_rouge1": 84.6048,
230
+ "eval_rouge2": 84.1681,
231
+ "eval_rougeL": 84.7325,
232
+ "eval_rougeLsum": 84.567,
233
+ "eval_runtime": 3.2569,
234
+ "eval_samples_per_second": 21.186,
235
+ "eval_steps_per_second": 2.763,
236
  "step": 595
237
  },
238
  {
239
  "epoch": 18.0,
240
+ "eval_gen_len": 68.72463768115942,
241
+ "eval_loss": 0.052007660269737244,
242
+ "eval_rouge1": 84.9204,
243
+ "eval_rouge2": 84.4493,
244
+ "eval_rougeL": 85.0357,
245
+ "eval_rougeLsum": 84.9063,
246
+ "eval_runtime": 3.1538,
247
+ "eval_samples_per_second": 21.879,
248
+ "eval_steps_per_second": 2.854,
249
  "step": 630
250
  },
251
  {
252
  "epoch": 19.0,
253
+ "eval_gen_len": 69.79710144927536,
254
+ "eval_loss": 0.051409389823675156,
255
+ "eval_rouge1": 84.3924,
256
+ "eval_rouge2": 83.9735,
257
+ "eval_rougeL": 84.4126,
258
+ "eval_rougeLsum": 84.3779,
259
+ "eval_runtime": 3.2185,
260
+ "eval_samples_per_second": 21.438,
261
+ "eval_steps_per_second": 2.796,
262
  "step": 665
263
  },
264
  {
265
  "epoch": 20.0,
266
+ "eval_gen_len": 67.23188405797102,
267
+ "eval_loss": 0.050410542637109756,
268
+ "eval_rouge1": 86.1503,
269
+ "eval_rouge2": 85.6151,
270
+ "eval_rougeL": 86.2393,
271
+ "eval_rougeLsum": 86.0495,
272
+ "eval_runtime": 3.144,
273
+ "eval_samples_per_second": 21.947,
274
+ "eval_steps_per_second": 2.863,
275
  "step": 700
276
  },
277
  {
278
+ "epoch": 21.0,
279
+ "eval_gen_len": 66.52173913043478,
280
+ "eval_loss": 0.04962162673473358,
281
+ "eval_rouge1": 86.4875,
282
+ "eval_rouge2": 85.9614,
283
+ "eval_rougeL": 86.5042,
284
+ "eval_rougeLsum": 86.3616,
285
+ "eval_runtime": 3.1455,
286
+ "eval_samples_per_second": 21.936,
287
+ "eval_steps_per_second": 2.861,
288
+ "step": 735
289
+ },
290
+ {
291
+ "epoch": 22.0,
292
+ "eval_gen_len": 67.94202898550725,
293
+ "eval_loss": 0.04964025691151619,
294
+ "eval_rouge1": 85.6339,
295
+ "eval_rouge2": 85.1604,
296
+ "eval_rougeL": 85.6862,
297
+ "eval_rougeLsum": 85.5112,
298
+ "eval_runtime": 3.1386,
299
+ "eval_samples_per_second": 21.984,
300
+ "eval_steps_per_second": 2.868,
301
+ "step": 770
302
+ },
303
+ {
304
+ "epoch": 23.0,
305
+ "eval_gen_len": 65.52173913043478,
306
+ "eval_loss": 0.04901711642742157,
307
+ "eval_rouge1": 87.0368,
308
+ "eval_rouge2": 86.5415,
309
+ "eval_rougeL": 87.1099,
310
+ "eval_rougeLsum": 86.9317,
311
+ "eval_runtime": 3.1705,
312
+ "eval_samples_per_second": 21.763,
313
+ "eval_steps_per_second": 2.839,
314
+ "step": 805
315
+ },
316
+ {
317
+ "epoch": 24.0,
318
+ "eval_gen_len": 67.94202898550725,
319
+ "eval_loss": 0.04925404489040375,
320
+ "eval_rouge1": 85.6323,
321
+ "eval_rouge2": 85.1577,
322
+ "eval_rougeL": 85.6823,
323
+ "eval_rougeLsum": 85.5102,
324
+ "eval_runtime": 3.1481,
325
+ "eval_samples_per_second": 21.918,
326
+ "eval_steps_per_second": 2.859,
327
+ "step": 840
328
+ },
329
+ {
330
+ "epoch": 25.0,
331
+ "eval_gen_len": 69.04347826086956,
332
+ "eval_loss": 0.049253568053245544,
333
+ "eval_rouge1": 86.3078,
334
+ "eval_rouge2": 85.7832,
335
+ "eval_rougeL": 86.3026,
336
+ "eval_rougeLsum": 86.1442,
337
+ "eval_runtime": 3.2721,
338
+ "eval_samples_per_second": 21.087,
339
+ "eval_steps_per_second": 2.751,
340
+ "step": 875
341
+ },
342
+ {
343
+ "epoch": 26.0,
344
+ "eval_gen_len": 70.28985507246377,
345
+ "eval_loss": 0.04901302605867386,
346
+ "eval_rouge1": 85.877,
347
+ "eval_rouge2": 85.3534,
348
+ "eval_rougeL": 85.9035,
349
+ "eval_rougeLsum": 85.8208,
350
+ "eval_runtime": 3.3048,
351
+ "eval_samples_per_second": 20.879,
352
+ "eval_steps_per_second": 2.723,
353
+ "step": 910
354
+ },
355
+ {
356
+ "epoch": 27.0,
357
+ "eval_gen_len": 69.2463768115942,
358
+ "eval_loss": 0.04779437184333801,
359
+ "eval_rouge1": 86.6353,
360
+ "eval_rouge2": 86.054,
361
+ "eval_rougeL": 86.5856,
362
+ "eval_rougeLsum": 86.5515,
363
+ "eval_runtime": 3.295,
364
+ "eval_samples_per_second": 20.941,
365
+ "eval_steps_per_second": 2.731,
366
+ "step": 945
367
+ },
368
+ {
369
+ "epoch": 28.0,
370
+ "eval_gen_len": 68.56521739130434,
371
+ "eval_loss": 0.047761447727680206,
372
+ "eval_rouge1": 87.0975,
373
+ "eval_rouge2": 86.5716,
374
+ "eval_rougeL": 87.1452,
375
+ "eval_rougeLsum": 87.0713,
376
+ "eval_runtime": 3.2938,
377
+ "eval_samples_per_second": 20.948,
378
+ "eval_steps_per_second": 2.732,
379
+ "step": 980
380
+ },
381
+ {
382
+ "epoch": 28.57,
383
+ "learning_rate": 8.571428571428571e-06,
384
+ "loss": 0.0499,
385
+ "step": 1000
386
+ },
387
+ {
388
+ "epoch": 29.0,
389
+ "eval_gen_len": 67.0,
390
+ "eval_loss": 0.04680383577942848,
391
+ "eval_rouge1": 87.9989,
392
+ "eval_rouge2": 87.509,
393
+ "eval_rougeL": 88.0597,
394
+ "eval_rougeLsum": 87.9458,
395
+ "eval_runtime": 3.282,
396
+ "eval_samples_per_second": 21.024,
397
+ "eval_steps_per_second": 2.742,
398
+ "step": 1015
399
+ },
400
+ {
401
+ "epoch": 30.0,
402
+ "eval_gen_len": 69.1304347826087,
403
+ "eval_loss": 0.046802520751953125,
404
+ "eval_rouge1": 86.6642,
405
+ "eval_rouge2": 86.1007,
406
+ "eval_rougeL": 86.6429,
407
+ "eval_rougeLsum": 86.582,
408
+ "eval_runtime": 3.306,
409
+ "eval_samples_per_second": 20.871,
410
+ "eval_steps_per_second": 2.722,
411
+ "step": 1050
412
+ },
413
+ {
414
+ "epoch": 31.0,
415
+ "eval_gen_len": 69.30434782608695,
416
+ "eval_loss": 0.04671892151236534,
417
+ "eval_rouge1": 86.3475,
418
+ "eval_rouge2": 85.7566,
419
+ "eval_rougeL": 86.257,
420
+ "eval_rougeLsum": 86.2544,
421
+ "eval_runtime": 3.2952,
422
+ "eval_samples_per_second": 20.94,
423
+ "eval_steps_per_second": 2.731,
424
+ "step": 1085
425
+ },
426
+ {
427
+ "epoch": 32.0,
428
+ "eval_gen_len": 69.14492753623189,
429
+ "eval_loss": 0.046326328068971634,
430
+ "eval_rouge1": 86.6794,
431
+ "eval_rouge2": 86.1044,
432
+ "eval_rougeL": 86.6438,
433
+ "eval_rougeLsum": 86.5856,
434
+ "eval_runtime": 3.3092,
435
+ "eval_samples_per_second": 20.851,
436
+ "eval_steps_per_second": 2.72,
437
+ "step": 1120
438
+ },
439
+ {
440
+ "epoch": 33.0,
441
+ "eval_gen_len": 69.14492753623189,
442
+ "eval_loss": 0.0462319478392601,
443
+ "eval_rouge1": 86.6794,
444
+ "eval_rouge2": 86.1044,
445
+ "eval_rougeL": 86.6438,
446
+ "eval_rougeLsum": 86.5856,
447
+ "eval_runtime": 3.2895,
448
+ "eval_samples_per_second": 20.976,
449
+ "eval_steps_per_second": 2.736,
450
+ "step": 1155
451
+ },
452
+ {
453
+ "epoch": 34.0,
454
+ "eval_gen_len": 68.46376811594203,
455
+ "eval_loss": 0.04606299102306366,
456
+ "eval_rouge1": 87.1151,
457
+ "eval_rouge2": 86.605,
458
+ "eval_rougeL": 87.1857,
459
+ "eval_rougeLsum": 87.1151,
460
+ "eval_runtime": 3.2939,
461
+ "eval_samples_per_second": 20.948,
462
+ "eval_steps_per_second": 2.732,
463
+ "step": 1190
464
+ },
465
+ {
466
+ "epoch": 35.0,
467
+ "eval_gen_len": 67.01449275362319,
468
+ "eval_loss": 0.045941609889268875,
469
+ "eval_rouge1": 88.0068,
470
+ "eval_rouge2": 87.5135,
471
+ "eval_rougeL": 88.0611,
472
+ "eval_rougeLsum": 87.9535,
473
+ "eval_runtime": 3.2649,
474
+ "eval_samples_per_second": 21.134,
475
+ "eval_steps_per_second": 2.757,
476
+ "step": 1225
477
+ },
478
+ {
479
+ "epoch": 36.0,
480
+ "eval_gen_len": 67.57971014492753,
481
+ "eval_loss": 0.045919787138700485,
482
+ "eval_rouge1": 87.6823,
483
+ "eval_rouge2": 87.1982,
484
+ "eval_rougeL": 87.7207,
485
+ "eval_rougeLsum": 87.6541,
486
+ "eval_runtime": 3.2871,
487
+ "eval_samples_per_second": 20.991,
488
+ "eval_steps_per_second": 2.738,
489
+ "step": 1260
490
+ },
491
+ {
492
+ "epoch": 37.0,
493
+ "eval_gen_len": 69.14492753623189,
494
+ "eval_loss": 0.04610535874962807,
495
+ "eval_rouge1": 86.6794,
496
+ "eval_rouge2": 86.1044,
497
+ "eval_rougeL": 86.6438,
498
+ "eval_rougeLsum": 86.5856,
499
+ "eval_runtime": 3.2941,
500
+ "eval_samples_per_second": 20.946,
501
+ "eval_steps_per_second": 2.732,
502
+ "step": 1295
503
+ },
504
+ {
505
+ "epoch": 38.0,
506
+ "eval_gen_len": 69.14492753623189,
507
+ "eval_loss": 0.046049315482378006,
508
+ "eval_rouge1": 86.6794,
509
+ "eval_rouge2": 86.1044,
510
+ "eval_rougeL": 86.6438,
511
+ "eval_rougeLsum": 86.5856,
512
+ "eval_runtime": 3.2849,
513
+ "eval_samples_per_second": 21.005,
514
+ "eval_steps_per_second": 2.74,
515
+ "step": 1330
516
+ },
517
+ {
518
+ "epoch": 39.0,
519
+ "eval_gen_len": 69.04347826086956,
520
+ "eval_loss": 0.04580928757786751,
521
+ "eval_rouge1": 86.701,
522
+ "eval_rouge2": 86.1532,
523
+ "eval_rougeL": 86.6831,
524
+ "eval_rougeLsum": 86.6226,
525
+ "eval_runtime": 3.2714,
526
+ "eval_samples_per_second": 21.092,
527
+ "eval_steps_per_second": 2.751,
528
+ "step": 1365
529
+ },
530
+ {
531
+ "epoch": 40.0,
532
+ "eval_gen_len": 69.04347826086956,
533
+ "eval_loss": 0.04582460597157478,
534
+ "eval_rouge1": 86.701,
535
+ "eval_rouge2": 86.1532,
536
+ "eval_rougeL": 86.6831,
537
+ "eval_rougeLsum": 86.6226,
538
+ "eval_runtime": 3.2721,
539
+ "eval_samples_per_second": 21.087,
540
+ "eval_steps_per_second": 2.751,
541
+ "step": 1400
542
+ },
543
+ {
544
+ "epoch": 41.0,
545
+ "eval_gen_len": 67.47826086956522,
546
+ "eval_loss": 0.0457453578710556,
547
+ "eval_rouge1": 87.6998,
548
+ "eval_rouge2": 87.2471,
549
+ "eval_rougeL": 87.7695,
550
+ "eval_rougeLsum": 87.6934,
551
+ "eval_runtime": 3.2688,
552
+ "eval_samples_per_second": 21.109,
553
+ "eval_steps_per_second": 2.753,
554
+ "step": 1435
555
+ },
556
+ {
557
+ "epoch": 42.0,
558
+ "eval_gen_len": 69.14492753623189,
559
+ "eval_loss": 0.04603540897369385,
560
+ "eval_rouge1": 86.6794,
561
+ "eval_rouge2": 86.1044,
562
+ "eval_rougeL": 86.6438,
563
+ "eval_rougeLsum": 86.5856,
564
+ "eval_runtime": 3.3002,
565
+ "eval_samples_per_second": 20.908,
566
+ "eval_steps_per_second": 2.727,
567
+ "step": 1470
568
+ },
569
+ {
570
+ "epoch": 42.86,
571
+ "learning_rate": 2.8571428571428573e-06,
572
+ "loss": 0.0362,
573
+ "step": 1500
574
+ },
575
+ {
576
+ "epoch": 43.0,
577
+ "eval_gen_len": 67.65217391304348,
578
+ "eval_loss": 0.045989979058504105,
579
+ "eval_rouge1": 87.5977,
580
+ "eval_rouge2": 87.1424,
581
+ "eval_rougeL": 87.6777,
582
+ "eval_rougeLsum": 87.5976,
583
+ "eval_runtime": 3.2827,
584
+ "eval_samples_per_second": 21.02,
585
+ "eval_steps_per_second": 2.742,
586
+ "step": 1505
587
+ },
588
+ {
589
+ "epoch": 44.0,
590
+ "eval_gen_len": 67.76811594202898,
591
+ "eval_loss": 0.04582388699054718,
592
+ "eval_rouge1": 87.571,
593
+ "eval_rouge2": 87.1237,
594
+ "eval_rougeL": 87.6515,
595
+ "eval_rougeLsum": 87.5747,
596
+ "eval_runtime": 3.3039,
597
+ "eval_samples_per_second": 20.884,
598
+ "eval_steps_per_second": 2.724,
599
+ "step": 1540
600
+ },
601
+ {
602
+ "epoch": 45.0,
603
+ "eval_gen_len": 67.10144927536231,
604
+ "eval_loss": 0.045716848224401474,
605
+ "eval_rouge1": 87.9476,
606
+ "eval_rouge2": 87.4526,
607
+ "eval_rougeL": 88.0306,
608
+ "eval_rougeLsum": 87.9122,
609
+ "eval_runtime": 3.2864,
610
+ "eval_samples_per_second": 20.996,
611
+ "eval_steps_per_second": 2.739,
612
+ "step": 1575
613
+ },
614
+ {
615
+ "epoch": 46.0,
616
+ "eval_gen_len": 67.66666666666667,
617
+ "eval_loss": 0.04581255465745926,
618
+ "eval_rouge1": 87.6074,
619
+ "eval_rouge2": 87.1494,
620
+ "eval_rougeL": 87.6861,
621
+ "eval_rougeLsum": 87.6103,
622
+ "eval_runtime": 3.297,
623
+ "eval_samples_per_second": 20.928,
624
+ "eval_steps_per_second": 2.73,
625
+ "step": 1610
626
+ },
627
+ {
628
+ "epoch": 47.0,
629
+ "eval_gen_len": 67.66666666666667,
630
+ "eval_loss": 0.045844241976737976,
631
+ "eval_rouge1": 87.6074,
632
+ "eval_rouge2": 87.1494,
633
+ "eval_rougeL": 87.6861,
634
+ "eval_rougeLsum": 87.6103,
635
+ "eval_runtime": 3.2878,
636
+ "eval_samples_per_second": 20.987,
637
+ "eval_steps_per_second": 2.737,
638
+ "step": 1645
639
+ },
640
+ {
641
+ "epoch": 48.0,
642
+ "eval_gen_len": 67.66666666666667,
643
+ "eval_loss": 0.045833244919776917,
644
+ "eval_rouge1": 87.6074,
645
+ "eval_rouge2": 87.1494,
646
+ "eval_rougeL": 87.6861,
647
+ "eval_rougeLsum": 87.6103,
648
+ "eval_runtime": 3.2729,
649
+ "eval_samples_per_second": 21.082,
650
+ "eval_steps_per_second": 2.75,
651
+ "step": 1680
652
+ },
653
+ {
654
+ "epoch": 49.0,
655
+ "eval_gen_len": 68.34782608695652,
656
+ "eval_loss": 0.045867208391427994,
657
+ "eval_rouge1": 87.1522,
658
+ "eval_rouge2": 86.6031,
659
+ "eval_rougeL": 87.1206,
660
+ "eval_rougeLsum": 87.0701,
661
+ "eval_runtime": 3.2833,
662
+ "eval_samples_per_second": 21.016,
663
+ "eval_steps_per_second": 2.741,
664
+ "step": 1715
665
+ },
666
+ {
667
+ "epoch": 50.0,
668
+ "eval_gen_len": 68.34782608695652,
669
+ "eval_loss": 0.04586370289325714,
670
+ "eval_rouge1": 87.1522,
671
+ "eval_rouge2": 86.6031,
672
+ "eval_rougeL": 87.1206,
673
+ "eval_rougeLsum": 87.0701,
674
+ "eval_runtime": 3.2773,
675
+ "eval_samples_per_second": 21.054,
676
+ "eval_steps_per_second": 2.746,
677
+ "step": 1750
678
+ },
679
+ {
680
+ "epoch": 50.0,
681
+ "step": 1750,
682
+ "total_flos": 3735433720627200.0,
683
+ "train_loss": 0.08541564777919225,
684
+ "train_runtime": 358.6661,
685
+ "train_samples_per_second": 38.476,
686
+ "train_steps_per_second": 4.879
687
  }
688
  ],
689
  "logging_steps": 500,
690
+ "max_steps": 1750,
691
  "num_input_tokens_seen": 0,
692
+ "num_train_epochs": 50,
693
  "save_steps": 500,
694
+ "total_flos": 3735433720627200.0,
695
  "train_batch_size": 8,
696
  "trial_name": null,
697
  "trial_params": null