Upload folder using huggingface_hub
Browse files- pytorch_model-00001-of-00003.bin +1 -1
- pytorch_model-00002-of-00003.bin +1 -1
- pytorch_model-00003-of-00003.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +71 -3
pytorch_model-00001-of-00003.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9949048046
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:667777c452190f8efb5e339659cf4fa836a754a548e0733c79257adbd40e4a11
|
3 |
size 9949048046
|
pytorch_model-00002-of-00003.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9904474400
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc05ba86ee883984ddcd71eb936a37e3d0b8c83ba9eff9d9f8a9a921dc0dacf9
|
3 |
size 9904474400
|
pytorch_model-00003-of-00003.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6179210249
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75fcd7c91eeaf5a795d2fcc071e019dfcfdce82b861e42071e758e82882d68b6
|
3 |
size 6179210249
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14511
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dae77d1d86eb7275f2a9fab8fdd0f3d2e9b085c5393ceeb24294803290f3941e
|
3 |
size 14511
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3415aff70181afd51efc048fb7c4348442ee37b0317e7e93f002f0a59e0e3ea3
|
3 |
size 627
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2782,11 +2782,79 @@
|
|
2782 |
"eval_samples_per_second": 2.969,
|
2783 |
"eval_steps_per_second": 0.742,
|
2784 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2785 |
}
|
2786 |
],
|
2787 |
"max_steps": 10000,
|
2788 |
"num_train_epochs": 1,
|
2789 |
-
"total_flos": 2.
|
2790 |
"trial_name": null,
|
2791 |
"trial_params": null
|
2792 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.005352650945642216,
|
5 |
+
"global_step": 8500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2782 |
"eval_samples_per_second": 2.969,
|
2783 |
"eval_steps_per_second": 0.742,
|
2784 |
"step": 8000
|
2785 |
+
},
|
2786 |
+
{
|
2787 |
+
"epoch": 0.01,
|
2788 |
+
"learning_rate": 0.0002,
|
2789 |
+
"loss": 1.9936,
|
2790 |
+
"step": 8050
|
2791 |
+
},
|
2792 |
+
{
|
2793 |
+
"epoch": 0.01,
|
2794 |
+
"learning_rate": 0.0002,
|
2795 |
+
"loss": 1.9682,
|
2796 |
+
"step": 8100
|
2797 |
+
},
|
2798 |
+
{
|
2799 |
+
"epoch": 0.01,
|
2800 |
+
"learning_rate": 0.0002,
|
2801 |
+
"loss": 2.0048,
|
2802 |
+
"step": 8150
|
2803 |
+
},
|
2804 |
+
{
|
2805 |
+
"epoch": 0.01,
|
2806 |
+
"learning_rate": 0.0002,
|
2807 |
+
"loss": 1.9783,
|
2808 |
+
"step": 8200
|
2809 |
+
},
|
2810 |
+
{
|
2811 |
+
"epoch": 0.01,
|
2812 |
+
"learning_rate": 0.0002,
|
2813 |
+
"loss": 1.9703,
|
2814 |
+
"step": 8250
|
2815 |
+
},
|
2816 |
+
{
|
2817 |
+
"epoch": 0.01,
|
2818 |
+
"learning_rate": 0.0002,
|
2819 |
+
"loss": 2.0042,
|
2820 |
+
"step": 8300
|
2821 |
+
},
|
2822 |
+
{
|
2823 |
+
"epoch": 0.01,
|
2824 |
+
"learning_rate": 0.0002,
|
2825 |
+
"loss": 1.9848,
|
2826 |
+
"step": 8350
|
2827 |
+
},
|
2828 |
+
{
|
2829 |
+
"epoch": 0.01,
|
2830 |
+
"learning_rate": 0.0002,
|
2831 |
+
"loss": 1.9808,
|
2832 |
+
"step": 8400
|
2833 |
+
},
|
2834 |
+
{
|
2835 |
+
"epoch": 0.01,
|
2836 |
+
"learning_rate": 0.0002,
|
2837 |
+
"loss": 1.9871,
|
2838 |
+
"step": 8450
|
2839 |
+
},
|
2840 |
+
{
|
2841 |
+
"epoch": 0.01,
|
2842 |
+
"learning_rate": 0.0002,
|
2843 |
+
"loss": 1.9733,
|
2844 |
+
"step": 8500
|
2845 |
+
},
|
2846 |
+
{
|
2847 |
+
"epoch": 0.01,
|
2848 |
+
"eval_loss": 2.0437986850738525,
|
2849 |
+
"eval_runtime": 34093.5093,
|
2850 |
+
"eval_samples_per_second": 2.97,
|
2851 |
+
"eval_steps_per_second": 0.743,
|
2852 |
+
"step": 8500
|
2853 |
}
|
2854 |
],
|
2855 |
"max_steps": 10000,
|
2856 |
"num_train_epochs": 1,
|
2857 |
+
"total_flos": 2.679889113792e+19,
|
2858 |
"trial_name": null,
|
2859 |
"trial_params": null
|
2860 |
}
|