youndukn commited on
Commit
148b15a
·
verified ·
1 Parent(s): 375d0a3

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -22,10 +22,7 @@
22
  "v_proj",
23
  "k_proj",
24
  "o_proj",
25
- "up_proj",
26
- "q_proj",
27
- "gate_proj",
28
- "down_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
22
  "v_proj",
23
  "k_proj",
24
  "o_proj",
25
+ "q_proj"
 
 
 
26
  ],
27
  "task_type": "CAUSAL_LM"
28
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae7caec5c4fee863bf7714e37b40940fc7b9a2ccc6b15c0c30d3921c100527e0
3
- size 83946192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fecbabb257d4b758223de98c6bb3df3630a2ef8350900d11d691c68ddc87dfff
3
+ size 27297544
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d739e54f808f363bd08093c5f2a5f61697f9f45e81c0cf8e124386e8db92dfec
3
- size 168150290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07a718a3f933b148012bfd6ae847854cce775de7021cf1ccfbf9c28ebeeeb0e4
3
+ size 54741498
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36dcfcfc803739dc7e39932af6ce6f1f2ce2c025976e7f00f389005f2c0584c1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a398f9b8128cc403794327e2cb46084defdbfce858e6984a5b910adfab5b773f
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ca6f8824afe881b980664c8bcb10698cb1c06b90190b1056d469e315eaee37a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5449d9d082a25061677758fd48dfced59c627b0e0309a0f36524214a3219778
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.28735632183908044,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17,242 +17,122 @@
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 1.4285714285714285e-05,
20
- "loss": 2.3645,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.01,
25
  "learning_rate": 2.857142857142857e-05,
26
- "loss": 2.1283,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.02,
31
  "learning_rate": 4.2857142857142856e-05,
32
- "loss": 1.4859,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.03,
37
  "learning_rate": 5.714285714285714e-05,
38
- "loss": 1.2983,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.04,
43
  "learning_rate": 7.142857142857143e-05,
44
- "loss": 1.2019,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.04,
49
  "learning_rate": 8.571428571428571e-05,
50
- "loss": 1.1346,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.05,
55
  "learning_rate": 0.0001,
56
- "loss": 1.0858,
57
  "step": 35
58
  },
59
  {
60
  "epoch": 0.06,
61
  "learning_rate": 0.00011428571428571428,
62
- "loss": 1.0524,
63
  "step": 40
64
  },
65
  {
66
  "epoch": 0.06,
67
  "learning_rate": 0.00012857142857142858,
68
- "loss": 1.0344,
69
  "step": 45
70
  },
71
  {
72
  "epoch": 0.07,
73
  "learning_rate": 0.00014285714285714287,
74
- "loss": 1.0069,
75
  "step": 50
76
  },
77
  {
78
  "epoch": 0.08,
79
  "learning_rate": 0.00015714285714285716,
80
- "loss": 1.0073,
81
  "step": 55
82
  },
83
  {
84
  "epoch": 0.09,
85
  "learning_rate": 0.00017142857142857143,
86
- "loss": 0.9816,
87
  "step": 60
88
  },
89
  {
90
  "epoch": 0.09,
91
  "learning_rate": 0.00018571428571428572,
92
- "loss": 0.9896,
93
  "step": 65
94
  },
95
  {
96
  "epoch": 0.1,
97
  "learning_rate": 0.0002,
98
- "loss": 0.9846,
99
  "step": 70
100
  },
101
  {
102
  "epoch": 0.11,
103
  "learning_rate": 0.0001999685197404432,
104
- "loss": 0.9712,
105
  "step": 75
106
  },
107
  {
108
  "epoch": 0.11,
109
  "learning_rate": 0.00019987409878190752,
110
- "loss": 0.9792,
111
  "step": 80
112
  },
113
  {
114
  "epoch": 0.12,
115
  "learning_rate": 0.00019971679657231872,
116
- "loss": 0.9515,
117
  "step": 85
118
  },
119
  {
120
  "epoch": 0.13,
121
  "learning_rate": 0.00019949671214996445,
122
- "loss": 0.9689,
123
  "step": 90
124
  },
125
  {
126
  "epoch": 0.14,
127
  "learning_rate": 0.00019921398408113955,
128
- "loss": 0.9604,
129
  "step": 95
130
  },
131
  {
132
  "epoch": 0.14,
133
  "learning_rate": 0.00019886879037290384,
134
- "loss": 0.9637,
135
  "step": 100
136
- },
137
- {
138
- "epoch": 0.15,
139
- "learning_rate": 0.00019846134836100796,
140
- "loss": 0.9787,
141
- "step": 105
142
- },
143
- {
144
- "epoch": 0.16,
145
- "learning_rate": 0.00019799191457305768,
146
- "loss": 0.9616,
147
- "step": 110
148
- },
149
- {
150
- "epoch": 0.17,
151
- "learning_rate": 0.0001974607845670028,
152
- "loss": 0.9312,
153
- "step": 115
154
- },
155
- {
156
- "epoch": 0.17,
157
- "learning_rate": 0.0001968682927450523,
158
- "loss": 0.945,
159
- "step": 120
160
- },
161
- {
162
- "epoch": 0.18,
163
- "learning_rate": 0.00019621481214313297,
164
- "loss": 0.9363,
165
- "step": 125
166
- },
167
- {
168
- "epoch": 0.19,
169
- "learning_rate": 0.00019550075419602408,
170
- "loss": 0.9322,
171
- "step": 130
172
- },
173
- {
174
- "epoch": 0.19,
175
- "learning_rate": 0.00019472656847831595,
176
- "loss": 0.939,
177
- "step": 135
178
- },
179
- {
180
- "epoch": 0.2,
181
- "learning_rate": 0.0001938927424213553,
182
- "loss": 0.9282,
183
- "step": 140
184
- },
185
- {
186
- "epoch": 0.21,
187
- "learning_rate": 0.00019299980100635612,
188
- "loss": 0.955,
189
- "step": 145
190
- },
191
- {
192
- "epoch": 0.22,
193
- "learning_rate": 0.00019204830643386868,
194
- "loss": 0.9281,
195
- "step": 150
196
- },
197
- {
198
- "epoch": 0.22,
199
- "learning_rate": 0.00019103885776981515,
200
- "loss": 0.9308,
201
- "step": 155
202
- },
203
- {
204
- "epoch": 0.23,
205
- "learning_rate": 0.00018997209056831462,
206
- "loss": 0.9409,
207
- "step": 160
208
- },
209
- {
210
- "epoch": 0.24,
211
- "learning_rate": 0.00018884867647153483,
212
- "loss": 0.9366,
213
- "step": 165
214
- },
215
- {
216
- "epoch": 0.24,
217
- "learning_rate": 0.000187669322786823,
218
- "loss": 0.9203,
219
- "step": 170
220
- },
221
- {
222
- "epoch": 0.25,
223
- "learning_rate": 0.00018643477204138113,
224
- "loss": 0.9254,
225
- "step": 175
226
- },
227
- {
228
- "epoch": 0.26,
229
- "learning_rate": 0.0001851458015147673,
230
- "loss": 0.9481,
231
- "step": 180
232
- },
233
- {
234
- "epoch": 0.27,
235
- "learning_rate": 0.0001838032227495163,
236
- "loss": 0.9369,
237
- "step": 185
238
- },
239
- {
240
- "epoch": 0.27,
241
- "learning_rate": 0.00018240788104018822,
242
- "loss": 0.9435,
243
- "step": 190
244
- },
245
- {
246
- "epoch": 0.28,
247
- "learning_rate": 0.0001809606549011667,
248
- "loss": 0.9409,
249
- "step": 195
250
- },
251
- {
252
- "epoch": 0.29,
253
- "learning_rate": 0.00017946245551354157,
254
- "loss": 0.9396,
255
- "step": 200
256
  }
257
  ],
258
  "logging_steps": 5,
@@ -260,7 +140,7 @@
260
  "num_input_tokens_seen": 0,
261
  "num_train_epochs": 1,
262
  "save_steps": 100,
263
- "total_flos": 1.406259049707602e+17,
264
  "train_batch_size": 4,
265
  "trial_name": null,
266
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.14367816091954022,
5
  "eval_steps": 500,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 1.4285714285714285e-05,
20
+ "loss": 2.3695,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.01,
25
  "learning_rate": 2.857142857142857e-05,
26
+ "loss": 2.2916,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.02,
31
  "learning_rate": 4.2857142857142856e-05,
32
+ "loss": 1.7865,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.03,
37
  "learning_rate": 5.714285714285714e-05,
38
+ "loss": 1.4162,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.04,
43
  "learning_rate": 7.142857142857143e-05,
44
+ "loss": 1.3012,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.04,
49
  "learning_rate": 8.571428571428571e-05,
50
+ "loss": 1.2177,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.05,
55
  "learning_rate": 0.0001,
56
+ "loss": 1.1726,
57
  "step": 35
58
  },
59
  {
60
  "epoch": 0.06,
61
  "learning_rate": 0.00011428571428571428,
62
+ "loss": 1.1367,
63
  "step": 40
64
  },
65
  {
66
  "epoch": 0.06,
67
  "learning_rate": 0.00012857142857142858,
68
+ "loss": 1.105,
69
  "step": 45
70
  },
71
  {
72
  "epoch": 0.07,
73
  "learning_rate": 0.00014285714285714287,
74
+ "loss": 1.0671,
75
  "step": 50
76
  },
77
  {
78
  "epoch": 0.08,
79
  "learning_rate": 0.00015714285714285716,
80
+ "loss": 1.0583,
81
  "step": 55
82
  },
83
  {
84
  "epoch": 0.09,
85
  "learning_rate": 0.00017142857142857143,
86
+ "loss": 1.0278,
87
  "step": 60
88
  },
89
  {
90
  "epoch": 0.09,
91
  "learning_rate": 0.00018571428571428572,
92
+ "loss": 1.0297,
93
  "step": 65
94
  },
95
  {
96
  "epoch": 0.1,
97
  "learning_rate": 0.0002,
98
+ "loss": 1.0224,
99
  "step": 70
100
  },
101
  {
102
  "epoch": 0.11,
103
  "learning_rate": 0.0001999685197404432,
104
+ "loss": 1.0077,
105
  "step": 75
106
  },
107
  {
108
  "epoch": 0.11,
109
  "learning_rate": 0.00019987409878190752,
110
+ "loss": 1.0135,
111
  "step": 80
112
  },
113
  {
114
  "epoch": 0.12,
115
  "learning_rate": 0.00019971679657231872,
116
+ "loss": 0.9836,
117
  "step": 85
118
  },
119
  {
120
  "epoch": 0.13,
121
  "learning_rate": 0.00019949671214996445,
122
+ "loss": 1.0012,
123
  "step": 90
124
  },
125
  {
126
  "epoch": 0.14,
127
  "learning_rate": 0.00019921398408113955,
128
+ "loss": 0.9927,
129
  "step": 95
130
  },
131
  {
132
  "epoch": 0.14,
133
  "learning_rate": 0.00019886879037290384,
134
+ "loss": 0.9966,
135
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  }
137
  ],
138
  "logging_steps": 5,
 
140
  "num_input_tokens_seen": 0,
141
  "num_train_epochs": 1,
142
  "save_steps": 100,
143
+ "total_flos": 7.003463853749043e+16,
144
  "train_batch_size": 4,
145
  "trial_name": null,
146
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35bfe367412d6cce2d4651bb47f4aa1514a8740353adbe218792b83227be32d2
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d247e6edadd8eec25fafda911325a153e7ec9bfb084946dbb826f80e3be6ebd
3
  size 4792