AlekseyKorshuk commited on
Commit
668b228
1 Parent(s): be61806

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/nirvana")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2so60o7u/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Nirvana's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/l6zav1kr) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/l6zav1kr/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/nirvana")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/plmwg217/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Nirvana's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2aaxwzd4) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2aaxwzd4/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.128573179244995, "eval_runtime": 1.2287, "eval_samples_per_second": 43.137, "eval_steps_per_second": 5.697, "epoch": 2.0}
 
1
+ {"eval_loss": 1.7358663082122803, "eval_runtime": 2.0349, "eval_samples_per_second": 20.64, "eval_steps_per_second": 2.949, "epoch": 3.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1740b9403c2980e1c12ea39ca34dd90e719cb426af2e560c4662fb685a4adc5c
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b8911734a29e325912427efb96ad710eeebdec940f173c4c1cdf4bd6295893
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c7d18d91a68d3aac0f231a3453ae4f8448552fe1cceca43dfeddf734b115cf
3
  size 995603825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea53252af8c636e7d87ccfe4ffa643cb49756436eab6186e4214ea668b415a6
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:190fc0c852d0ebcb0d529ca1ef8faf27bb0e63a90b3312da7edff4f56cd84c70
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c2f6598fea7212344467f26fdd0991f32656d8a34cf5910e5c27adb467fa3d
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d8690557d45fc8d72f6eb673bd42b9a6c47364d62cbc2a5af224d1d6fa8d651
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:379e3990b5cff526d3b72fba15374693b30ed8e1821d3514ef232ad47d9ec9f7
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95317d63b9a0922ed3de67fdcb9f6bbf4898f9cd35c41f60f3577d5b1e618ec0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5704cdbfa3a8ad0fca4ee30e2feb20b3d392393975fb15db2691fbf243dcbce0
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.128573179244995,
3
- "best_model_checkpoint": "output/nirvana/checkpoint-72",
4
- "epoch": 2.0,
5
- "global_step": 72,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -114,11 +114,75 @@
114
  "eval_samples_per_second": 44.396,
115
  "eval_steps_per_second": 5.864,
116
  "step": 72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
  ],
119
- "max_steps": 72,
120
- "num_train_epochs": 2,
121
- "total_flos": 73161768960000.0,
122
  "trial_name": null,
123
  "trial_params": null
124
  }
 
1
  {
2
+ "best_metric": 1.7358663082122803,
3
+ "best_model_checkpoint": "output/nirvana/checkpoint-111",
4
+ "epoch": 3.0,
5
+ "global_step": 111,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
114
  "eval_samples_per_second": 44.396,
115
  "eval_steps_per_second": 5.864,
116
  "step": 72
117
+ },
118
+ {
119
+ "epoch": 2.0,
120
+ "eval_loss": 1.8826501369476318,
121
+ "eval_runtime": 1.8444,
122
+ "eval_samples_per_second": 22.772,
123
+ "eval_steps_per_second": 3.253,
124
+ "step": 74
125
+ },
126
+ {
127
+ "epoch": 2.03,
128
+ "learning_rate": 0.0001369528677140173,
129
+ "loss": 2.0939,
130
+ "step": 75
131
+ },
132
+ {
133
+ "epoch": 2.16,
134
+ "learning_rate": 0.00012848876816285752,
135
+ "loss": 2.1654,
136
+ "step": 80
137
+ },
138
+ {
139
+ "epoch": 2.3,
140
+ "learning_rate": 0.00010939183589447406,
141
+ "loss": 2.0452,
142
+ "step": 85
143
+ },
144
+ {
145
+ "epoch": 2.43,
146
+ "learning_rate": 8.305259792170682e-05,
147
+ "loss": 1.7357,
148
+ "step": 90
149
+ },
150
+ {
151
+ "epoch": 2.57,
152
+ "learning_rate": 5.414740207829316e-05,
153
+ "loss": 2.0788,
154
+ "step": 95
155
+ },
156
+ {
157
+ "epoch": 2.7,
158
+ "learning_rate": 2.7808164105525978e-05,
159
+ "loss": 1.928,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 2.84,
164
+ "learning_rate": 8.711231837142545e-06,
165
+ "loss": 1.9376,
166
+ "step": 105
167
+ },
168
+ {
169
+ "epoch": 2.97,
170
+ "learning_rate": 2.4713228598268823e-07,
171
+ "loss": 1.9652,
172
+ "step": 110
173
+ },
174
+ {
175
+ "epoch": 3.0,
176
+ "eval_loss": 1.7358663082122803,
177
+ "eval_runtime": 1.8478,
178
+ "eval_samples_per_second": 22.729,
179
+ "eval_steps_per_second": 3.247,
180
+ "step": 111
181
  }
182
  ],
183
+ "max_steps": 111,
184
+ "num_train_epochs": 3,
185
+ "total_flos": 112878157824000.0,
186
  "trial_name": null,
187
  "trial_params": null
188
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ccf6b2194f29afd6fd696a5ff25947b9d920926b5a6ac01723ed2e5798c9d44
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4d2cede8ad0eb56c1a914509bcb224f91e3e03e5504f88d252a2fa457cfb418
3
  size 3055