devamanyu commited on
Commit
8be5ed5
1 Parent(s): db9f83b

Model save

Browse files
README.md CHANGED
@@ -4,7 +4,6 @@ library_name: transformers
4
  model_name: zephyr-7b-dpo-full
5
  tags:
6
  - generated_from_trainer
7
- - alignment-handbook
8
  - trl
9
  - dpo
10
  licence: license
@@ -28,7 +27,7 @@ print(output["generated_text"])
28
 
29
  ## Training procedure
30
 
31
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/devamanyu/huggingface/runs/adglv44w)
32
 
33
  This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
34
 
 
4
  model_name: zephyr-7b-dpo-full
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - dpo
9
  licence: license
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/devamanyu/huggingface/runs/2spwkmbc)
31
 
32
  This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
33
 
all_results.json CHANGED
@@ -15,8 +15,8 @@
15
  "eval_steps_per_second": 0.8,
16
  "total_flos": 0.0,
17
  "train_loss": 0.0,
18
- "train_runtime": 1.2352,
19
  "train_samples": 100,
20
- "train_samples_per_second": 8095.94,
21
- "train_steps_per_second": 80.959
22
  }
 
15
  "eval_steps_per_second": 0.8,
16
  "total_flos": 0.0,
17
  "train_loss": 0.0,
18
+ "train_runtime": 1.1955,
19
  "train_samples": 100,
20
+ "train_samples_per_second": 8364.905,
21
+ "train_steps_per_second": 83.649
22
  }
config.json CHANGED
@@ -22,6 +22,6 @@
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.46.1",
25
- "use_cache": true,
26
  "vocab_size": 32000
27
  }
 
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.46.1",
25
+ "use_cache": false,
26
  "vocab_size": 32000
27
  }
runs/Nov22_20-16-58_dvhaz-sleeper-cmh-gpu/events.out.tfevents.1732306662.dvhaz-sleeper-cmh-gpu.10458.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c0fa702483a32cd212fb4eff6fb1ae60473c6b9a617c9c5703661321108a17
3
+ size 6760
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
  "train_loss": 0.0,
5
- "train_runtime": 1.2352,
6
  "train_samples": 100,
7
- "train_samples_per_second": 8095.94,
8
- "train_steps_per_second": 80.959
9
  }
 
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
  "train_loss": 0.0,
5
+ "train_runtime": 1.1955,
6
  "train_samples": 100,
7
+ "train_samples_per_second": 8364.905,
8
+ "train_steps_per_second": 83.649
9
  }
trainer_state.json CHANGED
@@ -797,9 +797,9 @@
797
  "step": 478,
798
  "total_flos": 0.0,
799
  "train_loss": 0.0,
800
- "train_runtime": 1.2352,
801
- "train_samples_per_second": 8095.94,
802
- "train_steps_per_second": 80.959
803
  }
804
  ],
805
  "logging_steps": 10,
 
797
  "step": 478,
798
  "total_flos": 0.0,
799
  "train_loss": 0.0,
800
+ "train_runtime": 1.1955,
801
+ "train_samples_per_second": 8364.905,
802
+ "train_steps_per_second": 83.649
803
  }
804
  ],
805
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a8158ba2af669bd22aca4ac8a83903e60cd43f83e0a28dd5cb7139946725296
3
  size 7736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5493cf08969e29ad06ed8c2bb6e5927f0f327e31cccd994af7be298bb5852953
3
  size 7736