alxxtexxr commited on
Commit
3bf9c64
1 Parent(s): 1aeafa8

Upload folder using huggingface_hub

Browse files
checkpoint-108/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "v_proj"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
checkpoint-108/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f9050ee62fd25a9062f57f81a90e27e01c501d7ccc6f10786f762e66fe32520
3
+ size 16822989
checkpoint-108/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e51cdc64e56f277b8e62ef82b5c783f1ac01825ad8d9e8906eb52bcfb0f3b1
3
+ size 33661637
checkpoint-108/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d31d965312af96ad728bd0641d6d3f8b83ba520beadb760402d53868041d7d
3
+ size 14575
checkpoint-108/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2050e9766ba8af43a09060f4537765109e5253e38bb78cac58004e495ea42c4
3
+ size 627
checkpoint-108/trainer_state.json ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.14893724024295807,
3
+ "best_model_checkpoint": "./lora-out/checkpoint-108",
4
+ "epoch": 7.646017699115045,
5
+ "eval_steps": 4,
6
+ "global_step": 108,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.28,
13
+ "eval_loss": 0.3006412982940674,
14
+ "eval_runtime": 54.7333,
15
+ "eval_samples_per_second": 0.914,
16
+ "eval_steps_per_second": 0.238,
17
+ "step": 4
18
+ },
19
+ {
20
+ "epoch": 0.57,
21
+ "eval_loss": 0.300335168838501,
22
+ "eval_runtime": 54.8113,
23
+ "eval_samples_per_second": 0.912,
24
+ "eval_steps_per_second": 0.237,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 0.71,
29
+ "learning_rate": 1e-05,
30
+ "loss": 0.3024,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.85,
35
+ "eval_loss": 0.2993900179862976,
36
+ "eval_runtime": 54.782,
37
+ "eval_samples_per_second": 0.913,
38
+ "eval_steps_per_second": 0.237,
39
+ "step": 12
40
+ },
41
+ {
42
+ "epoch": 1.13,
43
+ "eval_loss": 0.29816487431526184,
44
+ "eval_runtime": 54.8049,
45
+ "eval_samples_per_second": 0.912,
46
+ "eval_steps_per_second": 0.237,
47
+ "step": 16
48
+ },
49
+ {
50
+ "epoch": 1.42,
51
+ "learning_rate": 2e-05,
52
+ "loss": 0.3035,
53
+ "step": 20
54
+ },
55
+ {
56
+ "epoch": 1.42,
57
+ "eval_loss": 0.29595255851745605,
58
+ "eval_runtime": 54.7885,
59
+ "eval_samples_per_second": 0.913,
60
+ "eval_steps_per_second": 0.237,
61
+ "step": 20
62
+ },
63
+ {
64
+ "epoch": 1.7,
65
+ "eval_loss": 0.2939557135105133,
66
+ "eval_runtime": 54.7999,
67
+ "eval_samples_per_second": 0.912,
68
+ "eval_steps_per_second": 0.237,
69
+ "step": 24
70
+ },
71
+ {
72
+ "epoch": 1.98,
73
+ "eval_loss": 0.29013773798942566,
74
+ "eval_runtime": 54.7805,
75
+ "eval_samples_per_second": 0.913,
76
+ "eval_steps_per_second": 0.237,
77
+ "step": 28
78
+ },
79
+ {
80
+ "epoch": 2.12,
81
+ "learning_rate": 3e-05,
82
+ "loss": 0.2959,
83
+ "step": 30
84
+ },
85
+ {
86
+ "epoch": 2.27,
87
+ "eval_loss": 0.28251081705093384,
88
+ "eval_runtime": 54.7706,
89
+ "eval_samples_per_second": 0.913,
90
+ "eval_steps_per_second": 0.237,
91
+ "step": 32
92
+ },
93
+ {
94
+ "epoch": 2.55,
95
+ "eval_loss": 0.2771329879760742,
96
+ "eval_runtime": 54.7818,
97
+ "eval_samples_per_second": 0.913,
98
+ "eval_steps_per_second": 0.237,
99
+ "step": 36
100
+ },
101
+ {
102
+ "epoch": 2.83,
103
+ "learning_rate": 4e-05,
104
+ "loss": 0.284,
105
+ "step": 40
106
+ },
107
+ {
108
+ "epoch": 2.83,
109
+ "eval_loss": 0.27146488428115845,
110
+ "eval_runtime": 54.803,
111
+ "eval_samples_per_second": 0.912,
112
+ "eval_steps_per_second": 0.237,
113
+ "step": 40
114
+ },
115
+ {
116
+ "epoch": 3.12,
117
+ "eval_loss": 0.26464152336120605,
118
+ "eval_runtime": 54.8467,
119
+ "eval_samples_per_second": 0.912,
120
+ "eval_steps_per_second": 0.237,
121
+ "step": 44
122
+ },
123
+ {
124
+ "epoch": 3.4,
125
+ "eval_loss": 0.25653430819511414,
126
+ "eval_runtime": 54.8327,
127
+ "eval_samples_per_second": 0.912,
128
+ "eval_steps_per_second": 0.237,
129
+ "step": 48
130
+ },
131
+ {
132
+ "epoch": 3.54,
133
+ "learning_rate": 5e-05,
134
+ "loss": 0.263,
135
+ "step": 50
136
+ },
137
+ {
138
+ "epoch": 3.68,
139
+ "eval_loss": 0.24627122282981873,
140
+ "eval_runtime": 54.813,
141
+ "eval_samples_per_second": 0.912,
142
+ "eval_steps_per_second": 0.237,
143
+ "step": 52
144
+ },
145
+ {
146
+ "epoch": 3.96,
147
+ "eval_loss": 0.23474617302417755,
148
+ "eval_runtime": 54.7901,
149
+ "eval_samples_per_second": 0.913,
150
+ "eval_steps_per_second": 0.237,
151
+ "step": 56
152
+ },
153
+ {
154
+ "epoch": 4.25,
155
+ "learning_rate": 6e-05,
156
+ "loss": 0.241,
157
+ "step": 60
158
+ },
159
+ {
160
+ "epoch": 4.25,
161
+ "eval_loss": 0.2220366895198822,
162
+ "eval_runtime": 54.7983,
163
+ "eval_samples_per_second": 0.912,
164
+ "eval_steps_per_second": 0.237,
165
+ "step": 60
166
+ },
167
+ {
168
+ "epoch": 4.53,
169
+ "eval_loss": 0.20926769077777863,
170
+ "eval_runtime": 54.7403,
171
+ "eval_samples_per_second": 0.913,
172
+ "eval_steps_per_second": 0.237,
173
+ "step": 64
174
+ },
175
+ {
176
+ "epoch": 4.81,
177
+ "eval_loss": 0.19629451632499695,
178
+ "eval_runtime": 54.7525,
179
+ "eval_samples_per_second": 0.913,
180
+ "eval_steps_per_second": 0.237,
181
+ "step": 68
182
+ },
183
+ {
184
+ "epoch": 4.96,
185
+ "learning_rate": 7e-05,
186
+ "loss": 0.2101,
187
+ "step": 70
188
+ },
189
+ {
190
+ "epoch": 5.1,
191
+ "eval_loss": 0.18524658679962158,
192
+ "eval_runtime": 54.7303,
193
+ "eval_samples_per_second": 0.914,
194
+ "eval_steps_per_second": 0.238,
195
+ "step": 72
196
+ },
197
+ {
198
+ "epoch": 5.38,
199
+ "eval_loss": 0.17731742560863495,
200
+ "eval_runtime": 54.7552,
201
+ "eval_samples_per_second": 0.913,
202
+ "eval_steps_per_second": 0.237,
203
+ "step": 76
204
+ },
205
+ {
206
+ "epoch": 5.66,
207
+ "learning_rate": 8e-05,
208
+ "loss": 0.1788,
209
+ "step": 80
210
+ },
211
+ {
212
+ "epoch": 5.66,
213
+ "eval_loss": 0.16993452608585358,
214
+ "eval_runtime": 54.729,
215
+ "eval_samples_per_second": 0.914,
216
+ "eval_steps_per_second": 0.238,
217
+ "step": 80
218
+ },
219
+ {
220
+ "epoch": 5.95,
221
+ "eval_loss": 0.164781853556633,
222
+ "eval_runtime": 54.741,
223
+ "eval_samples_per_second": 0.913,
224
+ "eval_steps_per_second": 0.237,
225
+ "step": 84
226
+ },
227
+ {
228
+ "epoch": 6.23,
229
+ "eval_loss": 0.16103117167949677,
230
+ "eval_runtime": 54.7837,
231
+ "eval_samples_per_second": 0.913,
232
+ "eval_steps_per_second": 0.237,
233
+ "step": 88
234
+ },
235
+ {
236
+ "epoch": 6.37,
237
+ "learning_rate": 9e-05,
238
+ "loss": 0.1615,
239
+ "step": 90
240
+ },
241
+ {
242
+ "epoch": 6.51,
243
+ "eval_loss": 0.15781742334365845,
244
+ "eval_runtime": 54.7138,
245
+ "eval_samples_per_second": 0.914,
246
+ "eval_steps_per_second": 0.238,
247
+ "step": 92
248
+ },
249
+ {
250
+ "epoch": 6.8,
251
+ "eval_loss": 0.15516981482505798,
252
+ "eval_runtime": 54.7516,
253
+ "eval_samples_per_second": 0.913,
254
+ "eval_steps_per_second": 0.237,
255
+ "step": 96
256
+ },
257
+ {
258
+ "epoch": 7.08,
259
+ "learning_rate": 0.0001,
260
+ "loss": 0.1533,
261
+ "step": 100
262
+ },
263
+ {
264
+ "epoch": 7.08,
265
+ "eval_loss": 0.15261690318584442,
266
+ "eval_runtime": 54.6891,
267
+ "eval_samples_per_second": 0.914,
268
+ "eval_steps_per_second": 0.238,
269
+ "step": 100
270
+ },
271
+ {
272
+ "epoch": 7.36,
273
+ "eval_loss": 0.15066812932491302,
274
+ "eval_runtime": 54.6884,
275
+ "eval_samples_per_second": 0.914,
276
+ "eval_steps_per_second": 0.238,
277
+ "step": 104
278
+ },
279
+ {
280
+ "epoch": 7.65,
281
+ "eval_loss": 0.14893724024295807,
282
+ "eval_runtime": 54.6275,
283
+ "eval_samples_per_second": 0.915,
284
+ "eval_steps_per_second": 0.238,
285
+ "step": 108
286
+ }
287
+ ],
288
+ "logging_steps": 10,
289
+ "max_steps": 210,
290
+ "num_input_tokens_seen": 0,
291
+ "num_train_epochs": 15,
292
+ "save_steps": 12,
293
+ "total_flos": 3.553655402645422e+17,
294
+ "train_batch_size": 4,
295
+ "trial_name": null,
296
+ "trial_params": null
297
+ }
checkpoint-108/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85310c54a4f279d40e8badbc8f6f7406b57e15fc0c79500de525827feedf5072
3
+ size 4219
checkpoint-120/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "v_proj"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
checkpoint-120/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab2a1e562884c0574f07ef80b399f6f2536bc226604adc506df3776c8477bde0
3
+ size 16822989
checkpoint-120/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c688a73e9177c4c85cb2171a6fcd0043885669c2e8f6aee958e16e92e9e706d7
3
+ size 33661637
checkpoint-120/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb8ebeef2d7062f8918cac9d79f55abf7a0736d6e439d85b087b02f5b32707d
3
+ size 14575
checkpoint-120/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faeed8541dd712b0b41ff7783005b44f5bb694e062c68740a92308cb1f7e61f5
3
+ size 627
checkpoint-120/trainer_state.json ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.14450186491012573,
3
+ "best_model_checkpoint": "./lora-out/checkpoint-120",
4
+ "epoch": 8.495575221238939,
5
+ "eval_steps": 4,
6
+ "global_step": 120,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.28,
13
+ "eval_loss": 0.3006412982940674,
14
+ "eval_runtime": 54.7333,
15
+ "eval_samples_per_second": 0.914,
16
+ "eval_steps_per_second": 0.238,
17
+ "step": 4
18
+ },
19
+ {
20
+ "epoch": 0.57,
21
+ "eval_loss": 0.300335168838501,
22
+ "eval_runtime": 54.8113,
23
+ "eval_samples_per_second": 0.912,
24
+ "eval_steps_per_second": 0.237,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 0.71,
29
+ "learning_rate": 1e-05,
30
+ "loss": 0.3024,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.85,
35
+ "eval_loss": 0.2993900179862976,
36
+ "eval_runtime": 54.782,
37
+ "eval_samples_per_second": 0.913,
38
+ "eval_steps_per_second": 0.237,
39
+ "step": 12
40
+ },
41
+ {
42
+ "epoch": 1.13,
43
+ "eval_loss": 0.29816487431526184,
44
+ "eval_runtime": 54.8049,
45
+ "eval_samples_per_second": 0.912,
46
+ "eval_steps_per_second": 0.237,
47
+ "step": 16
48
+ },
49
+ {
50
+ "epoch": 1.42,
51
+ "learning_rate": 2e-05,
52
+ "loss": 0.3035,
53
+ "step": 20
54
+ },
55
+ {
56
+ "epoch": 1.42,
57
+ "eval_loss": 0.29595255851745605,
58
+ "eval_runtime": 54.7885,
59
+ "eval_samples_per_second": 0.913,
60
+ "eval_steps_per_second": 0.237,
61
+ "step": 20
62
+ },
63
+ {
64
+ "epoch": 1.7,
65
+ "eval_loss": 0.2939557135105133,
66
+ "eval_runtime": 54.7999,
67
+ "eval_samples_per_second": 0.912,
68
+ "eval_steps_per_second": 0.237,
69
+ "step": 24
70
+ },
71
+ {
72
+ "epoch": 1.98,
73
+ "eval_loss": 0.29013773798942566,
74
+ "eval_runtime": 54.7805,
75
+ "eval_samples_per_second": 0.913,
76
+ "eval_steps_per_second": 0.237,
77
+ "step": 28
78
+ },
79
+ {
80
+ "epoch": 2.12,
81
+ "learning_rate": 3e-05,
82
+ "loss": 0.2959,
83
+ "step": 30
84
+ },
85
+ {
86
+ "epoch": 2.27,
87
+ "eval_loss": 0.28251081705093384,
88
+ "eval_runtime": 54.7706,
89
+ "eval_samples_per_second": 0.913,
90
+ "eval_steps_per_second": 0.237,
91
+ "step": 32
92
+ },
93
+ {
94
+ "epoch": 2.55,
95
+ "eval_loss": 0.2771329879760742,
96
+ "eval_runtime": 54.7818,
97
+ "eval_samples_per_second": 0.913,
98
+ "eval_steps_per_second": 0.237,
99
+ "step": 36
100
+ },
101
+ {
102
+ "epoch": 2.83,
103
+ "learning_rate": 4e-05,
104
+ "loss": 0.284,
105
+ "step": 40
106
+ },
107
+ {
108
+ "epoch": 2.83,
109
+ "eval_loss": 0.27146488428115845,
110
+ "eval_runtime": 54.803,
111
+ "eval_samples_per_second": 0.912,
112
+ "eval_steps_per_second": 0.237,
113
+ "step": 40
114
+ },
115
+ {
116
+ "epoch": 3.12,
117
+ "eval_loss": 0.26464152336120605,
118
+ "eval_runtime": 54.8467,
119
+ "eval_samples_per_second": 0.912,
120
+ "eval_steps_per_second": 0.237,
121
+ "step": 44
122
+ },
123
+ {
124
+ "epoch": 3.4,
125
+ "eval_loss": 0.25653430819511414,
126
+ "eval_runtime": 54.8327,
127
+ "eval_samples_per_second": 0.912,
128
+ "eval_steps_per_second": 0.237,
129
+ "step": 48
130
+ },
131
+ {
132
+ "epoch": 3.54,
133
+ "learning_rate": 5e-05,
134
+ "loss": 0.263,
135
+ "step": 50
136
+ },
137
+ {
138
+ "epoch": 3.68,
139
+ "eval_loss": 0.24627122282981873,
140
+ "eval_runtime": 54.813,
141
+ "eval_samples_per_second": 0.912,
142
+ "eval_steps_per_second": 0.237,
143
+ "step": 52
144
+ },
145
+ {
146
+ "epoch": 3.96,
147
+ "eval_loss": 0.23474617302417755,
148
+ "eval_runtime": 54.7901,
149
+ "eval_samples_per_second": 0.913,
150
+ "eval_steps_per_second": 0.237,
151
+ "step": 56
152
+ },
153
+ {
154
+ "epoch": 4.25,
155
+ "learning_rate": 6e-05,
156
+ "loss": 0.241,
157
+ "step": 60
158
+ },
159
+ {
160
+ "epoch": 4.25,
161
+ "eval_loss": 0.2220366895198822,
162
+ "eval_runtime": 54.7983,
163
+ "eval_samples_per_second": 0.912,
164
+ "eval_steps_per_second": 0.237,
165
+ "step": 60
166
+ },
167
+ {
168
+ "epoch": 4.53,
169
+ "eval_loss": 0.20926769077777863,
170
+ "eval_runtime": 54.7403,
171
+ "eval_samples_per_second": 0.913,
172
+ "eval_steps_per_second": 0.237,
173
+ "step": 64
174
+ },
175
+ {
176
+ "epoch": 4.81,
177
+ "eval_loss": 0.19629451632499695,
178
+ "eval_runtime": 54.7525,
179
+ "eval_samples_per_second": 0.913,
180
+ "eval_steps_per_second": 0.237,
181
+ "step": 68
182
+ },
183
+ {
184
+ "epoch": 4.96,
185
+ "learning_rate": 7e-05,
186
+ "loss": 0.2101,
187
+ "step": 70
188
+ },
189
+ {
190
+ "epoch": 5.1,
191
+ "eval_loss": 0.18524658679962158,
192
+ "eval_runtime": 54.7303,
193
+ "eval_samples_per_second": 0.914,
194
+ "eval_steps_per_second": 0.238,
195
+ "step": 72
196
+ },
197
+ {
198
+ "epoch": 5.38,
199
+ "eval_loss": 0.17731742560863495,
200
+ "eval_runtime": 54.7552,
201
+ "eval_samples_per_second": 0.913,
202
+ "eval_steps_per_second": 0.237,
203
+ "step": 76
204
+ },
205
+ {
206
+ "epoch": 5.66,
207
+ "learning_rate": 8e-05,
208
+ "loss": 0.1788,
209
+ "step": 80
210
+ },
211
+ {
212
+ "epoch": 5.66,
213
+ "eval_loss": 0.16993452608585358,
214
+ "eval_runtime": 54.729,
215
+ "eval_samples_per_second": 0.914,
216
+ "eval_steps_per_second": 0.238,
217
+ "step": 80
218
+ },
219
+ {
220
+ "epoch": 5.95,
221
+ "eval_loss": 0.164781853556633,
222
+ "eval_runtime": 54.741,
223
+ "eval_samples_per_second": 0.913,
224
+ "eval_steps_per_second": 0.237,
225
+ "step": 84
226
+ },
227
+ {
228
+ "epoch": 6.23,
229
+ "eval_loss": 0.16103117167949677,
230
+ "eval_runtime": 54.7837,
231
+ "eval_samples_per_second": 0.913,
232
+ "eval_steps_per_second": 0.237,
233
+ "step": 88
234
+ },
235
+ {
236
+ "epoch": 6.37,
237
+ "learning_rate": 9e-05,
238
+ "loss": 0.1615,
239
+ "step": 90
240
+ },
241
+ {
242
+ "epoch": 6.51,
243
+ "eval_loss": 0.15781742334365845,
244
+ "eval_runtime": 54.7138,
245
+ "eval_samples_per_second": 0.914,
246
+ "eval_steps_per_second": 0.238,
247
+ "step": 92
248
+ },
249
+ {
250
+ "epoch": 6.8,
251
+ "eval_loss": 0.15516981482505798,
252
+ "eval_runtime": 54.7516,
253
+ "eval_samples_per_second": 0.913,
254
+ "eval_steps_per_second": 0.237,
255
+ "step": 96
256
+ },
257
+ {
258
+ "epoch": 7.08,
259
+ "learning_rate": 0.0001,
260
+ "loss": 0.1533,
261
+ "step": 100
262
+ },
263
+ {
264
+ "epoch": 7.08,
265
+ "eval_loss": 0.15261690318584442,
266
+ "eval_runtime": 54.6891,
267
+ "eval_samples_per_second": 0.914,
268
+ "eval_steps_per_second": 0.238,
269
+ "step": 100
270
+ },
271
+ {
272
+ "epoch": 7.36,
273
+ "eval_loss": 0.15066812932491302,
274
+ "eval_runtime": 54.6884,
275
+ "eval_samples_per_second": 0.914,
276
+ "eval_steps_per_second": 0.238,
277
+ "step": 104
278
+ },
279
+ {
280
+ "epoch": 7.65,
281
+ "eval_loss": 0.14893724024295807,
282
+ "eval_runtime": 54.6275,
283
+ "eval_samples_per_second": 0.915,
284
+ "eval_steps_per_second": 0.238,
285
+ "step": 108
286
+ },
287
+ {
288
+ "epoch": 7.79,
289
+ "learning_rate": 9.090909090909092e-05,
290
+ "loss": 0.1463,
291
+ "step": 110
292
+ },
293
+ {
294
+ "epoch": 7.93,
295
+ "eval_loss": 0.14742153882980347,
296
+ "eval_runtime": 54.6174,
297
+ "eval_samples_per_second": 0.915,
298
+ "eval_steps_per_second": 0.238,
299
+ "step": 112
300
+ },
301
+ {
302
+ "epoch": 8.21,
303
+ "eval_loss": 0.14575307071208954,
304
+ "eval_runtime": 54.6366,
305
+ "eval_samples_per_second": 0.915,
306
+ "eval_steps_per_second": 0.238,
307
+ "step": 116
308
+ },
309
+ {
310
+ "epoch": 8.5,
311
+ "learning_rate": 8.181818181818183e-05,
312
+ "loss": 0.1399,
313
+ "step": 120
314
+ },
315
+ {
316
+ "epoch": 8.5,
317
+ "eval_loss": 0.14450186491012573,
318
+ "eval_runtime": 54.6303,
319
+ "eval_samples_per_second": 0.915,
320
+ "eval_steps_per_second": 0.238,
321
+ "step": 120
322
+ }
323
+ ],
324
+ "logging_steps": 10,
325
+ "max_steps": 210,
326
+ "num_input_tokens_seen": 0,
327
+ "num_train_epochs": 15,
328
+ "save_steps": 12,
329
+ "total_flos": 3.9566860550445466e+17,
330
+ "train_batch_size": 4,
331
+ "trial_name": null,
332
+ "trial_params": null
333
+ }
checkpoint-120/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85310c54a4f279d40e8badbc8f6f7406b57e15fc0c79500de525827feedf5072
3
+ size 4219
checkpoint-96/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "v_proj"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
checkpoint-96/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0525137a8c5c2e3b8a67336c3faa14bb35cd8a82fe0e48fb5bbb1cc63517a683
3
+ size 16822989
checkpoint-96/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b880941c0dde00e00c16127caaf28a86198b08be138268bce8178197dcf9531
3
+ size 33661637
checkpoint-96/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:989cc2f27d5146cf2ca96268118fce87275257fa4db48a24d9c45c7a01b6b870
3
+ size 14575
checkpoint-96/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40465654f063e78a5631104698736f11b5438cb73c746c832c97195415dfdf51
3
+ size 627
checkpoint-96/trainer_state.json ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.15516981482505798,
3
+ "best_model_checkpoint": "./lora-out/checkpoint-96",
4
+ "epoch": 6.79646017699115,
5
+ "eval_steps": 4,
6
+ "global_step": 96,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.28,
13
+ "eval_loss": 0.3006412982940674,
14
+ "eval_runtime": 54.7333,
15
+ "eval_samples_per_second": 0.914,
16
+ "eval_steps_per_second": 0.238,
17
+ "step": 4
18
+ },
19
+ {
20
+ "epoch": 0.57,
21
+ "eval_loss": 0.300335168838501,
22
+ "eval_runtime": 54.8113,
23
+ "eval_samples_per_second": 0.912,
24
+ "eval_steps_per_second": 0.237,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 0.71,
29
+ "learning_rate": 1e-05,
30
+ "loss": 0.3024,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.85,
35
+ "eval_loss": 0.2993900179862976,
36
+ "eval_runtime": 54.782,
37
+ "eval_samples_per_second": 0.913,
38
+ "eval_steps_per_second": 0.237,
39
+ "step": 12
40
+ },
41
+ {
42
+ "epoch": 1.13,
43
+ "eval_loss": 0.29816487431526184,
44
+ "eval_runtime": 54.8049,
45
+ "eval_samples_per_second": 0.912,
46
+ "eval_steps_per_second": 0.237,
47
+ "step": 16
48
+ },
49
+ {
50
+ "epoch": 1.42,
51
+ "learning_rate": 2e-05,
52
+ "loss": 0.3035,
53
+ "step": 20
54
+ },
55
+ {
56
+ "epoch": 1.42,
57
+ "eval_loss": 0.29595255851745605,
58
+ "eval_runtime": 54.7885,
59
+ "eval_samples_per_second": 0.913,
60
+ "eval_steps_per_second": 0.237,
61
+ "step": 20
62
+ },
63
+ {
64
+ "epoch": 1.7,
65
+ "eval_loss": 0.2939557135105133,
66
+ "eval_runtime": 54.7999,
67
+ "eval_samples_per_second": 0.912,
68
+ "eval_steps_per_second": 0.237,
69
+ "step": 24
70
+ },
71
+ {
72
+ "epoch": 1.98,
73
+ "eval_loss": 0.29013773798942566,
74
+ "eval_runtime": 54.7805,
75
+ "eval_samples_per_second": 0.913,
76
+ "eval_steps_per_second": 0.237,
77
+ "step": 28
78
+ },
79
+ {
80
+ "epoch": 2.12,
81
+ "learning_rate": 3e-05,
82
+ "loss": 0.2959,
83
+ "step": 30
84
+ },
85
+ {
86
+ "epoch": 2.27,
87
+ "eval_loss": 0.28251081705093384,
88
+ "eval_runtime": 54.7706,
89
+ "eval_samples_per_second": 0.913,
90
+ "eval_steps_per_second": 0.237,
91
+ "step": 32
92
+ },
93
+ {
94
+ "epoch": 2.55,
95
+ "eval_loss": 0.2771329879760742,
96
+ "eval_runtime": 54.7818,
97
+ "eval_samples_per_second": 0.913,
98
+ "eval_steps_per_second": 0.237,
99
+ "step": 36
100
+ },
101
+ {
102
+ "epoch": 2.83,
103
+ "learning_rate": 4e-05,
104
+ "loss": 0.284,
105
+ "step": 40
106
+ },
107
+ {
108
+ "epoch": 2.83,
109
+ "eval_loss": 0.27146488428115845,
110
+ "eval_runtime": 54.803,
111
+ "eval_samples_per_second": 0.912,
112
+ "eval_steps_per_second": 0.237,
113
+ "step": 40
114
+ },
115
+ {
116
+ "epoch": 3.12,
117
+ "eval_loss": 0.26464152336120605,
118
+ "eval_runtime": 54.8467,
119
+ "eval_samples_per_second": 0.912,
120
+ "eval_steps_per_second": 0.237,
121
+ "step": 44
122
+ },
123
+ {
124
+ "epoch": 3.4,
125
+ "eval_loss": 0.25653430819511414,
126
+ "eval_runtime": 54.8327,
127
+ "eval_samples_per_second": 0.912,
128
+ "eval_steps_per_second": 0.237,
129
+ "step": 48
130
+ },
131
+ {
132
+ "epoch": 3.54,
133
+ "learning_rate": 5e-05,
134
+ "loss": 0.263,
135
+ "step": 50
136
+ },
137
+ {
138
+ "epoch": 3.68,
139
+ "eval_loss": 0.24627122282981873,
140
+ "eval_runtime": 54.813,
141
+ "eval_samples_per_second": 0.912,
142
+ "eval_steps_per_second": 0.237,
143
+ "step": 52
144
+ },
145
+ {
146
+ "epoch": 3.96,
147
+ "eval_loss": 0.23474617302417755,
148
+ "eval_runtime": 54.7901,
149
+ "eval_samples_per_second": 0.913,
150
+ "eval_steps_per_second": 0.237,
151
+ "step": 56
152
+ },
153
+ {
154
+ "epoch": 4.25,
155
+ "learning_rate": 6e-05,
156
+ "loss": 0.241,
157
+ "step": 60
158
+ },
159
+ {
160
+ "epoch": 4.25,
161
+ "eval_loss": 0.2220366895198822,
162
+ "eval_runtime": 54.7983,
163
+ "eval_samples_per_second": 0.912,
164
+ "eval_steps_per_second": 0.237,
165
+ "step": 60
166
+ },
167
+ {
168
+ "epoch": 4.53,
169
+ "eval_loss": 0.20926769077777863,
170
+ "eval_runtime": 54.7403,
171
+ "eval_samples_per_second": 0.913,
172
+ "eval_steps_per_second": 0.237,
173
+ "step": 64
174
+ },
175
+ {
176
+ "epoch": 4.81,
177
+ "eval_loss": 0.19629451632499695,
178
+ "eval_runtime": 54.7525,
179
+ "eval_samples_per_second": 0.913,
180
+ "eval_steps_per_second": 0.237,
181
+ "step": 68
182
+ },
183
+ {
184
+ "epoch": 4.96,
185
+ "learning_rate": 7e-05,
186
+ "loss": 0.2101,
187
+ "step": 70
188
+ },
189
+ {
190
+ "epoch": 5.1,
191
+ "eval_loss": 0.18524658679962158,
192
+ "eval_runtime": 54.7303,
193
+ "eval_samples_per_second": 0.914,
194
+ "eval_steps_per_second": 0.238,
195
+ "step": 72
196
+ },
197
+ {
198
+ "epoch": 5.38,
199
+ "eval_loss": 0.17731742560863495,
200
+ "eval_runtime": 54.7552,
201
+ "eval_samples_per_second": 0.913,
202
+ "eval_steps_per_second": 0.237,
203
+ "step": 76
204
+ },
205
+ {
206
+ "epoch": 5.66,
207
+ "learning_rate": 8e-05,
208
+ "loss": 0.1788,
209
+ "step": 80
210
+ },
211
+ {
212
+ "epoch": 5.66,
213
+ "eval_loss": 0.16993452608585358,
214
+ "eval_runtime": 54.729,
215
+ "eval_samples_per_second": 0.914,
216
+ "eval_steps_per_second": 0.238,
217
+ "step": 80
218
+ },
219
+ {
220
+ "epoch": 5.95,
221
+ "eval_loss": 0.164781853556633,
222
+ "eval_runtime": 54.741,
223
+ "eval_samples_per_second": 0.913,
224
+ "eval_steps_per_second": 0.237,
225
+ "step": 84
226
+ },
227
+ {
228
+ "epoch": 6.23,
229
+ "eval_loss": 0.16103117167949677,
230
+ "eval_runtime": 54.7837,
231
+ "eval_samples_per_second": 0.913,
232
+ "eval_steps_per_second": 0.237,
233
+ "step": 88
234
+ },
235
+ {
236
+ "epoch": 6.37,
237
+ "learning_rate": 9e-05,
238
+ "loss": 0.1615,
239
+ "step": 90
240
+ },
241
+ {
242
+ "epoch": 6.51,
243
+ "eval_loss": 0.15781742334365845,
244
+ "eval_runtime": 54.7138,
245
+ "eval_samples_per_second": 0.914,
246
+ "eval_steps_per_second": 0.238,
247
+ "step": 92
248
+ },
249
+ {
250
+ "epoch": 6.8,
251
+ "eval_loss": 0.15516981482505798,
252
+ "eval_runtime": 54.7516,
253
+ "eval_samples_per_second": 0.913,
254
+ "eval_steps_per_second": 0.237,
255
+ "step": 96
256
+ }
257
+ ],
258
+ "logging_steps": 10,
259
+ "max_steps": 210,
260
+ "num_input_tokens_seen": 0,
261
+ "num_train_epochs": 15,
262
+ "save_steps": 12,
263
+ "total_flos": 3.166594152769782e+17,
264
+ "train_batch_size": 4,
265
+ "trial_name": null,
266
+ "trial_params": null
267
+ }
checkpoint-96/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85310c54a4f279d40e8badbc8f6f7406b57e15fc0c79500de525827feedf5072
3
+ size 4219