EvgeniyZh commited on
Commit
efe17ed
1 Parent(s): a008024

Model save

Browse files
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "q_proj",
20
  "v_proj",
21
- "o_proj",
22
- "k_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "k_proj",
20
  "v_proj",
21
+ "q_proj",
22
+ "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38c4cc3db797cde9c3e4214f38f663a87bbca856e02bb4d2a95bb006aca3741f
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b55781a0314f695d8e8230b4bb6fa7c6f34fea0b06e4d3c140c535cd51b3cbba
3
  size 109086672
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 1.0666559934616089,
4
- "eval_runtime": 2404.6287,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 9.611,
7
  "eval_steps_per_second": 0.151,
8
- "train_loss": 1.1599171240540112,
9
- "train_runtime": 73159.9281,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 2.841,
12
  "train_steps_per_second": 0.006
13
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 1.0666674375534058,
4
+ "eval_runtime": 2391.2283,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 9.664,
7
  "eval_steps_per_second": 0.151,
8
+ "train_loss": 1.1599184581462074,
9
+ "train_runtime": 72731.1653,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 2.858,
12
  "train_steps_per_second": 0.006
13
  }
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistralai/Mistral-7B-v0.1",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 14336,
12
+ "max_position_embeddings": 32768,
13
+ "model_type": "mistral",
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
+ "num_key_value_heads": 8,
17
+ "rms_norm_eps": 1e-05,
18
+ "rope_theta": 10000.0,
19
+ "sliding_window": 4096,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "bfloat16",
22
+ "transformers_version": "4.35.0",
23
+ "use_cache": true,
24
+ "vocab_size": 32000
25
+ }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 1.0666559934616089,
4
- "eval_runtime": 2404.6287,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 9.611,
7
  "eval_steps_per_second": 0.151
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 1.0666674375534058,
4
+ "eval_runtime": 2391.2283,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 9.664,
7
  "eval_steps_per_second": 0.151
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "train_loss": 1.1599171240540112,
4
- "train_runtime": 73159.9281,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 2.841,
7
  "train_steps_per_second": 0.006
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "train_loss": 1.1599184581462074,
4
+ "train_runtime": 72731.1653,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 2.858,
7
  "train_steps_per_second": 0.006
8
  }
trainer_state.json CHANGED
@@ -65,19 +65,19 @@
65
  {
66
  "epoch": 0.11,
67
  "learning_rate": 1.939986331449053e-05,
68
- "loss": 1.2736,
69
  "step": 45
70
  },
71
  {
72
  "epoch": 0.12,
73
  "learning_rate": 1.926084840336821e-05,
74
- "loss": 1.2586,
75
  "step": 50
76
  },
77
  {
78
  "epoch": 0.14,
79
  "learning_rate": 1.910797282022027e-05,
80
- "loss": 1.2334,
81
  "step": 55
82
  },
83
  {
@@ -251,7 +251,7 @@
251
  {
252
  "epoch": 0.49,
253
  "learning_rate": 1.0232116539815558e-05,
254
- "loss": 1.0671,
255
  "step": 200
256
  },
257
  {
@@ -269,13 +269,13 @@
269
  {
270
  "epoch": 0.53,
271
  "learning_rate": 9.072784204417995e-06,
272
- "loss": 1.0795,
273
  "step": 215
274
  },
275
  {
276
  "epoch": 0.54,
277
  "learning_rate": 8.688345254588579e-06,
278
- "loss": 1.0686,
279
  "step": 220
280
  },
281
  {
@@ -340,19 +340,19 @@
340
  },
341
  {
342
  "epoch": 0.67,
343
- "eval_loss": 1.066710114479065,
344
- "eval_runtime": 2407.1881,
345
- "eval_samples_per_second": 9.6,
346
- "eval_steps_per_second": 0.15,
347
  "step": 272
348
  },
349
  {
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 4.932776606721638e+16,
353
- "train_loss": 1.1599171240540112,
354
- "train_runtime": 73159.9281,
355
- "train_samples_per_second": 2.841,
356
  "train_steps_per_second": 0.006
357
  }
358
  ],
 
65
  {
66
  "epoch": 0.11,
67
  "learning_rate": 1.939986331449053e-05,
68
+ "loss": 1.2735,
69
  "step": 45
70
  },
71
  {
72
  "epoch": 0.12,
73
  "learning_rate": 1.926084840336821e-05,
74
+ "loss": 1.2587,
75
  "step": 50
76
  },
77
  {
78
  "epoch": 0.14,
79
  "learning_rate": 1.910797282022027e-05,
80
+ "loss": 1.2333,
81
  "step": 55
82
  },
83
  {
 
251
  {
252
  "epoch": 0.49,
253
  "learning_rate": 1.0232116539815558e-05,
254
+ "loss": 1.0672,
255
  "step": 200
256
  },
257
  {
 
269
  {
270
  "epoch": 0.53,
271
  "learning_rate": 9.072784204417995e-06,
272
+ "loss": 1.0796,
273
  "step": 215
274
  },
275
  {
276
  "epoch": 0.54,
277
  "learning_rate": 8.688345254588579e-06,
278
+ "loss": 1.0685,
279
  "step": 220
280
  },
281
  {
 
340
  },
341
  {
342
  "epoch": 0.67,
343
+ "eval_loss": 1.0667219161987305,
344
+ "eval_runtime": 2389.1593,
345
+ "eval_samples_per_second": 9.673,
346
+ "eval_steps_per_second": 0.152,
347
  "step": 272
348
  },
349
  {
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 4.932776606721638e+16,
353
+ "train_loss": 1.1599184581462074,
354
+ "train_runtime": 72731.1653,
355
+ "train_samples_per_second": 2.858,
356
  "train_steps_per_second": 0.006
357
  }
358
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:894ab50c9898c2dee04f953d44234befab17dc2499229944290efadb69ed352d
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a7bafc436c3a41a9a2a47904c2a1fb60e8d46cd8f97932f14a89887397ae60d
3
  size 5624