yakazimir commited on
Commit
cab8770
1 Parent(s): 6c93541

Model save

Browse files
README.md CHANGED
@@ -1,17 +1,11 @@
1
  ---
2
  library_name: transformers
3
- license: apache-2.0
4
- base_model: mistralai/Mistral-7B-Instruct-v0.2
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - simpo
9
  - generated_from_trainer
10
- - trl
11
- - simpo
12
- - generated_from_trainer
13
- datasets:
14
- - princeton-nlp/mistral-instruct-ultrafeedback
15
  model-index:
16
  - name: simpo-exps
17
  results: []
@@ -22,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # simpo-exps
24
 
25
- This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the princeton-nlp/mistral-instruct-ultrafeedback dataset.
26
  It achieves the following results on the evaluation set:
27
- - Loss: 0.7905
28
- - Rewards/chosen: -1.7087
29
- - Rewards/rejected: -1.8663
30
- - Rewards/accuracies: 0.6051
31
- - Rewards/margins: 0.1576
32
- - Logps/rejected: -0.7465
33
- - Logps/chosen: -0.6835
34
- - Logits/rejected: -2.7543
35
- - Logits/chosen: -2.7563
36
 
37
  ## Model description
38
 
@@ -51,7 +45,7 @@ More information needed
51
  ### Training hyperparameters
52
 
53
  The following hyperparameters were used during training:
54
- - learning_rate: 5e-07
55
  - train_batch_size: 2
56
  - eval_batch_size: 4
57
  - seed: 42
@@ -69,7 +63,7 @@ The following hyperparameters were used during training:
69
 
70
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
71
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
72
- | 0.7494 | 0.8573 | 400 | 0.7909 | -1.6908 | -1.8447 | 0.6024 | 0.1538 | -0.7379 | -0.6763 | -2.7573 | -2.7593 |
73
 
74
 
75
  ### Framework versions
 
1
  ---
2
  library_name: transformers
3
+ license: llama3
4
+ base_model: meta-llama/Meta-Llama-3-8B-Instruct
5
  tags:
 
6
  - trl
7
  - simpo
8
  - generated_from_trainer
 
 
 
 
 
9
  model-index:
10
  - name: simpo-exps
11
  results: []
 
16
 
17
  # simpo-exps
18
 
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.1795
22
+ - Rewards/chosen: -17.3452
23
+ - Rewards/rejected: -23.3517
24
+ - Rewards/accuracies: 0.8435
25
+ - Rewards/margins: 6.0065
26
+ - Logps/rejected: -2.3352
27
+ - Logps/chosen: -1.7345
28
+ - Logits/rejected: -1.4942
29
+ - Logits/chosen: -1.4832
30
 
31
  ## Model description
32
 
 
45
  ### Training hyperparameters
46
 
47
  The following hyperparameters were used during training:
48
+ - learning_rate: 1e-06
49
  - train_batch_size: 2
50
  - eval_batch_size: 4
51
  - seed: 42
 
63
 
64
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 1.1882 | 0.8550 | 400 | 1.1795 | -17.3452 | -23.3517 | 0.8435 | 6.0065 | -2.3352 | -1.7345 | -1.4942 | -1.4832 |
67
 
68
 
69
  ### Framework versions
all_results.json CHANGED
@@ -1,22 +1,9 @@
1
  {
2
- "epoch": 0.9987943737441393,
3
- "eval_logits/chosen": -2.756258010864258,
4
- "eval_logits/rejected": -2.754289388656616,
5
- "eval_logps/chosen": -0.6834676861763,
6
- "eval_logps/rejected": -0.7465164661407471,
7
- "eval_loss": 0.7904670238494873,
8
- "eval_rewards/accuracies": 0.605053186416626,
9
- "eval_rewards/chosen": -1.7086691856384277,
10
- "eval_rewards/margins": 0.15762200951576233,
11
- "eval_rewards/rejected": -1.8662911653518677,
12
- "eval_runtime": 102.313,
13
- "eval_samples": 2994,
14
- "eval_samples_per_second": 29.263,
15
- "eval_steps_per_second": 1.837,
16
  "total_flos": 0.0,
17
- "train_loss": 0.7914171927476645,
18
- "train_runtime": 8402.7408,
19
- "train_samples": 59720,
20
- "train_samples_per_second": 7.107,
21
- "train_steps_per_second": 0.055
22
  }
 
1
  {
2
+ "epoch": 0.9982631930527722,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 1.7882541670789045,
5
+ "train_runtime": 8154.3088,
6
+ "train_samples": 59876,
7
+ "train_samples_per_second": 7.343,
8
+ "train_steps_per_second": 0.057
9
  }
generation_config.json CHANGED
@@ -1,6 +1,12 @@
1
  {
2
- "_from_model_config": true,
3
- "bos_token_id": 1,
4
- "eos_token_id": 2,
 
 
 
 
 
 
5
  "transformers_version": "4.44.2"
6
  }
 
1
  {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128009
7
+ ],
8
+ "max_length": 4096,
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
  "transformers_version": "4.44.2"
12
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.9987943737441393,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7914171927476645,
5
- "train_runtime": 8402.7408,
6
- "train_samples": 59720,
7
- "train_samples_per_second": 7.107,
8
- "train_steps_per_second": 0.055
9
  }
 
1
  {
2
+ "epoch": 0.9982631930527722,
3
  "total_flos": 0.0,
4
+ "train_loss": 1.7882541670789045,
5
+ "train_runtime": 8154.3088,
6
+ "train_samples": 59876,
7
+ "train_samples_per_second": 7.343,
8
+ "train_steps_per_second": 0.057
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff