Serega6678 commited on
Commit
66e1cfc
1 Parent(s): e779ba2

Model save

Browse files
README.md CHANGED
@@ -2,11 +2,10 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - generated_from_trainer
7
  base_model: mistralai/Mistral-7B-v0.1
8
  datasets:
9
- - HuggingFaceH4/ultrachat_200k
10
  model-index:
11
  - name: Test_with_new_script
12
  results: []
@@ -17,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # Test_with_new_script
19
 
20
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/ultrachat_200k dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 1.0914
23
 
24
  ## Model description
25
 
@@ -39,12 +38,11 @@ More information needed
39
 
40
  The following hyperparameters were used during training:
41
  - learning_rate: 0.0002
42
- - train_batch_size: 16
43
  - eval_batch_size: 8
44
  - seed: 42
45
  - distributed_type: multi-GPU
46
  - num_devices: 2
47
- - gradient_accumulation_steps: 2
48
  - total_train_batch_size: 64
49
  - total_eval_batch_size: 16
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
@@ -56,7 +54,7 @@ The following hyperparameters were used during training:
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:-----:|:----:|:---------------:|
59
- | 1.1117 | 1.0 | 18 | 1.0914 |
60
 
61
 
62
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - generated_from_trainer
6
  base_model: mistralai/Mistral-7B-v0.1
7
  datasets:
8
+ - generator
9
  model-index:
10
  - name: Test_with_new_script
11
  results: []
 
16
 
17
  # Test_with_new_script
18
 
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.0905
22
 
23
  ## Model description
24
 
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 0.0002
41
+ - train_batch_size: 32
42
  - eval_batch_size: 8
43
  - seed: 42
44
  - distributed_type: multi-GPU
45
  - num_devices: 2
 
46
  - total_train_batch_size: 64
47
  - total_eval_batch_size: 16
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
54
 
55
  | Training Loss | Epoch | Step | Validation Loss |
56
  |:-------------:|:-----:|:----:|:---------------:|
57
+ | 1.11 | 1.0 | 18 | 1.0905 |
58
 
59
 
60
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "v_proj",
23
- "up_proj",
24
- "down_proj",
25
  "o_proj",
 
 
 
26
  "gate_proj",
27
- "q_proj",
28
- "k_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "k_proj",
 
 
23
  "o_proj",
24
+ "down_proj",
25
+ "up_proj",
26
+ "v_proj",
27
  "gate_proj",
28
+ "q_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a556de130555737da15606adb66dd0e66194f78ed23944c51c1073777131b36
3
  size 83946192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14d37b719ded8577e0c19e669ca2cebb01db619d1866b04e1652272e8172b6b1
3
  size 83946192
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.0914130210876465,
4
- "eval_runtime": 15.6172,
5
  "eval_samples": 124,
6
- "eval_samples_per_second": 7.94,
7
- "eval_steps_per_second": 0.512,
8
- "train_loss": 1.161470651626587,
9
- "train_runtime": 443.1723,
10
  "train_samples": 1143,
11
- "train_samples_per_second": 2.579,
12
- "train_steps_per_second": 0.041
13
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 1.0904877185821533,
4
+ "eval_runtime": 15.5995,
5
  "eval_samples": 124,
6
+ "eval_samples_per_second": 7.949,
7
+ "eval_steps_per_second": 0.513,
8
+ "train_loss": 1.168492575486501,
9
+ "train_runtime": 457.8696,
10
  "train_samples": 1143,
11
+ "train_samples_per_second": 2.496,
12
+ "train_steps_per_second": 0.039
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.0914130210876465,
4
- "eval_runtime": 15.6172,
5
  "eval_samples": 124,
6
- "eval_samples_per_second": 7.94,
7
- "eval_steps_per_second": 0.512
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 1.0904877185821533,
4
+ "eval_runtime": 15.5995,
5
  "eval_samples": 124,
6
+ "eval_samples_per_second": 7.949,
7
+ "eval_steps_per_second": 0.513
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 1.161470651626587,
4
- "train_runtime": 443.1723,
5
  "train_samples": 1143,
6
- "train_samples_per_second": 2.579,
7
- "train_steps_per_second": 0.041
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 1.168492575486501,
4
+ "train_runtime": 457.8696,
5
  "train_samples": 1143,
6
+ "train_samples_per_second": 2.496,
7
+ "train_steps_per_second": 0.039
8
  }
trainer_state.json CHANGED
@@ -11,43 +11,43 @@
11
  {
12
  "epoch": 0.06,
13
  "learning_rate": 0.0001,
14
- "loss": 1.2833,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.28,
19
  "learning_rate": 0.00018314696123025454,
20
- "loss": 1.2373,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.56,
25
  "learning_rate": 0.0001,
26
- "loss": 1.168,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.83,
31
  "learning_rate": 1.6853038769745467e-05,
32
- "loss": 1.1117,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 1.0,
37
- "eval_loss": 1.0914130210876465,
38
- "eval_runtime": 15.7258,
39
- "eval_samples_per_second": 7.885,
40
- "eval_steps_per_second": 0.509,
41
  "step": 18
42
  },
43
  {
44
  "epoch": 1.0,
45
  "step": 18,
46
  "total_flos": 1.0560439065981747e+17,
47
- "train_loss": 1.161470651626587,
48
- "train_runtime": 443.1723,
49
- "train_samples_per_second": 2.579,
50
- "train_steps_per_second": 0.041
51
  }
52
  ],
53
  "logging_steps": 5,
@@ -56,7 +56,7 @@
56
  "num_train_epochs": 1,
57
  "save_steps": 100,
58
  "total_flos": 1.0560439065981747e+17,
59
- "train_batch_size": 16,
60
  "trial_name": null,
61
  "trial_params": null
62
  }
 
11
  {
12
  "epoch": 0.06,
13
  "learning_rate": 0.0001,
14
+ "loss": 1.3159,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.28,
19
  "learning_rate": 0.00018314696123025454,
20
+ "loss": 1.2515,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.56,
25
  "learning_rate": 0.0001,
26
+ "loss": 1.1683,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.83,
31
  "learning_rate": 1.6853038769745467e-05,
32
+ "loss": 1.11,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 1.0,
37
+ "eval_loss": 1.0904877185821533,
38
+ "eval_runtime": 15.6992,
39
+ "eval_samples_per_second": 7.899,
40
+ "eval_steps_per_second": 0.51,
41
  "step": 18
42
  },
43
  {
44
  "epoch": 1.0,
45
  "step": 18,
46
  "total_flos": 1.0560439065981747e+17,
47
+ "train_loss": 1.168492575486501,
48
+ "train_runtime": 457.8696,
49
+ "train_samples_per_second": 2.496,
50
+ "train_steps_per_second": 0.039
51
  }
52
  ],
53
  "logging_steps": 5,
 
56
  "num_train_epochs": 1,
57
  "save_steps": 100,
58
  "total_flos": 1.0560439065981747e+17,
59
+ "train_batch_size": 32,
60
  "trial_name": null,
61
  "trial_params": null
62
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f31dabe80950b12d6098c0f5d3b662eee5c22504bfac428a3670697aac3bdf5
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a0e14ac8baf459cacbf69a626c50bdbd4c30f0415167c5ee51730735ba152c
3
  size 4856