EvgeniyZh commited on
Commit
809aecb
1 Parent(s): efe17ed

Model save

Browse files
adapter_config.json CHANGED
@@ -17,9 +17,9 @@
17
  "revision": null,
18
  "target_modules": [
19
  "k_proj",
20
- "v_proj",
21
  "q_proj",
22
- "o_proj"
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
17
  "revision": null,
18
  "target_modules": [
19
  "k_proj",
 
20
  "q_proj",
21
+ "o_proj",
22
+ "v_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b55781a0314f695d8e8230b4bb6fa7c6f34fea0b06e4d3c140c535cd51b3cbba
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b34a428074e5b7cc32b3fd5343936b35b0a5c869b9ecd3c09f7000dcb50b11
3
  size 109086672
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 1.0666674375534058,
4
- "eval_runtime": 2391.2283,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 9.664,
7
- "eval_steps_per_second": 0.151,
8
- "train_loss": 1.1599184581462074,
9
- "train_runtime": 72731.1653,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 2.858,
12
  "train_steps_per_second": 0.006
13
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 1.0666700601577759,
4
+ "eval_runtime": 2384.3246,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 9.692,
7
+ "eval_steps_per_second": 0.152,
8
+ "train_loss": 1.1599246359923308,
9
+ "train_runtime": 72626.7254,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 2.862,
12
  "train_steps_per_second": 0.006
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 1.0666674375534058,
4
- "eval_runtime": 2391.2283,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 9.664,
7
- "eval_steps_per_second": 0.151
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 1.0666700601577759,
4
+ "eval_runtime": 2384.3246,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 9.692,
7
+ "eval_steps_per_second": 0.152
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "train_loss": 1.1599184581462074,
4
- "train_runtime": 72731.1653,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 2.858,
7
  "train_steps_per_second": 0.006
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "train_loss": 1.1599246359923308,
4
+ "train_runtime": 72626.7254,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 2.862,
7
  "train_steps_per_second": 0.006
8
  }
trainer_state.json CHANGED
@@ -77,7 +77,7 @@
77
  {
78
  "epoch": 0.14,
79
  "learning_rate": 1.910797282022027e-05,
80
- "loss": 1.2333,
81
  "step": 55
82
  },
83
  {
@@ -149,7 +149,7 @@
149
  {
150
  "epoch": 0.28,
151
  "learning_rate": 1.629520819706912e-05,
152
- "loss": 1.1069,
153
  "step": 115
154
  },
155
  {
@@ -179,7 +179,7 @@
179
  {
180
  "epoch": 0.34,
181
  "learning_rate": 1.4684084406997903e-05,
182
- "loss": 1.0909,
183
  "step": 140
184
  },
185
  {
@@ -275,7 +275,7 @@
275
  {
276
  "epoch": 0.54,
277
  "learning_rate": 8.688345254588579e-06,
278
- "loss": 1.0685,
279
  "step": 220
280
  },
281
  {
@@ -340,9 +340,9 @@
340
  },
341
  {
342
  "epoch": 0.67,
343
- "eval_loss": 1.0667219161987305,
344
- "eval_runtime": 2389.1593,
345
- "eval_samples_per_second": 9.673,
346
  "eval_steps_per_second": 0.152,
347
  "step": 272
348
  },
@@ -350,9 +350,9 @@
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 4.932776606721638e+16,
353
- "train_loss": 1.1599184581462074,
354
- "train_runtime": 72731.1653,
355
- "train_samples_per_second": 2.858,
356
  "train_steps_per_second": 0.006
357
  }
358
  ],
 
77
  {
78
  "epoch": 0.14,
79
  "learning_rate": 1.910797282022027e-05,
80
+ "loss": 1.2334,
81
  "step": 55
82
  },
83
  {
 
149
  {
150
  "epoch": 0.28,
151
  "learning_rate": 1.629520819706912e-05,
152
+ "loss": 1.107,
153
  "step": 115
154
  },
155
  {
 
179
  {
180
  "epoch": 0.34,
181
  "learning_rate": 1.4684084406997903e-05,
182
+ "loss": 1.091,
183
  "step": 140
184
  },
185
  {
 
275
  {
276
  "epoch": 0.54,
277
  "learning_rate": 8.688345254588579e-06,
278
+ "loss": 1.0686,
279
  "step": 220
280
  },
281
  {
 
340
  },
341
  {
342
  "epoch": 0.67,
343
+ "eval_loss": 1.066724419593811,
344
+ "eval_runtime": 2385.329,
345
+ "eval_samples_per_second": 9.688,
346
  "eval_steps_per_second": 0.152,
347
  "step": 272
348
  },
 
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 4.932776606721638e+16,
353
+ "train_loss": 1.1599246359923308,
354
+ "train_runtime": 72626.7254,
355
+ "train_samples_per_second": 2.862,
356
  "train_steps_per_second": 0.006
357
  }
358
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a7bafc436c3a41a9a2a47904c2a1fb60e8d46cd8f97932f14a89887397ae60d
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a116181e8e43014f8554b0cc2a81c8131193600bb4d374a77cd396fcfdc6416
3
  size 5624