AlekseyKorshuk
commited on
Commit
•
d800da3
1
Parent(s):
47f6cea
huggingartists
Browse files- README.md +3 -3
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +2 -2
- pytorch_model.bin +2 -2
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- trainer_state.json +32 -6
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/eminem")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/eminem")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/ofa47ov0/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/11x8tlht) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/11x8tlht/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 0.
|
|
|
1 |
+
{"eval_loss": 0.31595703959465027, "eval_runtime": 694.9722, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.112, "epoch": 3.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:051a2aa5612a00aefd0743508426504ceff0614f418bc3ffd2fae00bc7e2e01a
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:593f24adf357165ae7c067bfdec9e2e06abf005b482f2df7913d65b234eb4c23
|
3 |
+
size 995599857
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4f3731cebd57313c0396ec7ec7278cbf2f4370924b703a6aa6ddad82d193a89
|
3 |
+
size 510401385
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a26923031d40478a72fa6bcd5409ab432e85028c059b7b08da05ecaa1c93850
|
3 |
+
size 13547
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2587493ffef7a83ac744fa257014e43dd335fe886fa10856cabfbe630d057f3b
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "output/eminem/checkpoint-
|
4 |
"epoch": 2.0,
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1102,11 +1102,37 @@
|
|
1102 |
"eval_samples_per_second": 76.147,
|
1103 |
"eval_steps_per_second": 9.59,
|
1104 |
"step": 904
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1105 |
}
|
1106 |
],
|
1107 |
-
"max_steps":
|
1108 |
-
"num_train_epochs":
|
1109 |
-
"total_flos":
|
1110 |
"trial_name": null,
|
1111 |
"trial_params": null
|
1112 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.31595703959465027,
|
3 |
+
"best_model_checkpoint": "output/eminem/checkpoint-916",
|
4 |
"epoch": 2.0,
|
5 |
+
"global_step": 916,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1102 |
"eval_samples_per_second": 76.147,
|
1103 |
"eval_steps_per_second": 9.59,
|
1104 |
"step": 904
|
1105 |
+
},
|
1106 |
+
{
|
1107 |
+
"epoch": 1.98,
|
1108 |
+
"learning_rate": 4.956064306819408e-05,
|
1109 |
+
"loss": 0.7479,
|
1110 |
+
"step": 905
|
1111 |
+
},
|
1112 |
+
{
|
1113 |
+
"epoch": 1.99,
|
1114 |
+
"learning_rate": 4.7311949670299134e-05,
|
1115 |
+
"loss": 0.7304,
|
1116 |
+
"step": 910
|
1117 |
+
},
|
1118 |
+
{
|
1119 |
+
"epoch": 2.0,
|
1120 |
+
"learning_rate": 4.508829441538651e-05,
|
1121 |
+
"loss": 0.666,
|
1122 |
+
"step": 915
|
1123 |
+
},
|
1124 |
+
{
|
1125 |
+
"epoch": 2.0,
|
1126 |
+
"eval_loss": 0.31595703959465027,
|
1127 |
+
"eval_runtime": 690.3622,
|
1128 |
+
"eval_samples_per_second": 0.898,
|
1129 |
+
"eval_steps_per_second": 0.113,
|
1130 |
+
"step": 916
|
1131 |
}
|
1132 |
],
|
1133 |
+
"max_steps": 1374,
|
1134 |
+
"num_train_epochs": 3,
|
1135 |
+
"total_flos": 955022376960000.0,
|
1136 |
"trial_name": null,
|
1137 |
"trial_params": null
|
1138 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3055
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98ff7520005bd1b7cd3b14407c7fae7975d1a94564c7ae5c3bb82fb25346e68c
|
3 |
size 3055
|