Training in progress, step 2000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1218 -5
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4736616809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6f0b117cb3b5ca9f3104796b19da86da2f7d1e2c40756714f09621415d49b13
|
3 |
size 4736616809
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2368281769
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdb640acc1ad0b52fbe5849e2fe9ec8023752b0a552442e328c1f4ad282eab9d
|
3 |
size 2368281769
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f471bea3861187dceb51828819a95fc512e9746b6a62f37798a37a6d6a9d142
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b35301bff95034222da17e187f0935a7b363754032e82a831e804e9485f6ec34
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "output/checkpoint-
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -10923,11 +10923,1224 @@
|
|
10923 |
"eval_samples_per_second": 0.055,
|
10924 |
"eval_steps_per_second": 0.055,
|
10925 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10926 |
}
|
10927 |
],
|
10928 |
"max_steps": 2000,
|
10929 |
"num_train_epochs": 9223372036854775807,
|
10930 |
-
"total_flos": 1.
|
10931 |
"trial_name": null,
|
10932 |
"trial_params": null
|
10933 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.0949487686157227,
|
3 |
+
"best_model_checkpoint": "output/checkpoint-2000",
|
4 |
+
"epoch": 1.38650390625,
|
5 |
+
"global_step": 2000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
10923 |
"eval_samples_per_second": 0.055,
|
10924 |
"eval_steps_per_second": 0.055,
|
10925 |
"step": 1800
|
10926 |
+
},
|
10927 |
+
{
|
10928 |
+
"epoch": 1.29,
|
10929 |
+
"learning_rate": 8.844444444444445e-05,
|
10930 |
+
"loss": 2.0691,
|
10931 |
+
"step": 1801
|
10932 |
+
},
|
10933 |
+
{
|
10934 |
+
"epoch": 1.29,
|
10935 |
+
"learning_rate": 8.800000000000001e-05,
|
10936 |
+
"loss": 2.0835,
|
10937 |
+
"step": 1802
|
10938 |
+
},
|
10939 |
+
{
|
10940 |
+
"epoch": 1.29,
|
10941 |
+
"learning_rate": 8.755555555555556e-05,
|
10942 |
+
"loss": 2.0686,
|
10943 |
+
"step": 1803
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 1.29,
|
10947 |
+
"learning_rate": 8.711111111111112e-05,
|
10948 |
+
"loss": 2.2013,
|
10949 |
+
"step": 1804
|
10950 |
+
},
|
10951 |
+
{
|
10952 |
+
"epoch": 1.29,
|
10953 |
+
"learning_rate": 8.666666666666667e-05,
|
10954 |
+
"loss": 2.0697,
|
10955 |
+
"step": 1805
|
10956 |
+
},
|
10957 |
+
{
|
10958 |
+
"epoch": 1.29,
|
10959 |
+
"learning_rate": 8.622222222222222e-05,
|
10960 |
+
"loss": 2.15,
|
10961 |
+
"step": 1806
|
10962 |
+
},
|
10963 |
+
{
|
10964 |
+
"epoch": 1.29,
|
10965 |
+
"learning_rate": 8.577777777777777e-05,
|
10966 |
+
"loss": 2.079,
|
10967 |
+
"step": 1807
|
10968 |
+
},
|
10969 |
+
{
|
10970 |
+
"epoch": 1.29,
|
10971 |
+
"learning_rate": 8.533333333333334e-05,
|
10972 |
+
"loss": 2.1418,
|
10973 |
+
"step": 1808
|
10974 |
+
},
|
10975 |
+
{
|
10976 |
+
"epoch": 1.29,
|
10977 |
+
"learning_rate": 8.488888888888889e-05,
|
10978 |
+
"loss": 2.1518,
|
10979 |
+
"step": 1809
|
10980 |
+
},
|
10981 |
+
{
|
10982 |
+
"epoch": 1.29,
|
10983 |
+
"learning_rate": 8.444444444444444e-05,
|
10984 |
+
"loss": 2.12,
|
10985 |
+
"step": 1810
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 1.29,
|
10989 |
+
"learning_rate": 8.4e-05,
|
10990 |
+
"loss": 2.1908,
|
10991 |
+
"step": 1811
|
10992 |
+
},
|
10993 |
+
{
|
10994 |
+
"epoch": 1.29,
|
10995 |
+
"learning_rate": 8.355555555555556e-05,
|
10996 |
+
"loss": 2.1359,
|
10997 |
+
"step": 1812
|
10998 |
+
},
|
10999 |
+
{
|
11000 |
+
"epoch": 1.29,
|
11001 |
+
"learning_rate": 8.311111111111111e-05,
|
11002 |
+
"loss": 2.1208,
|
11003 |
+
"step": 1813
|
11004 |
+
},
|
11005 |
+
{
|
11006 |
+
"epoch": 1.29,
|
11007 |
+
"learning_rate": 8.266666666666667e-05,
|
11008 |
+
"loss": 2.1014,
|
11009 |
+
"step": 1814
|
11010 |
+
},
|
11011 |
+
{
|
11012 |
+
"epoch": 1.29,
|
11013 |
+
"learning_rate": 8.222222222222222e-05,
|
11014 |
+
"loss": 2.1451,
|
11015 |
+
"step": 1815
|
11016 |
+
},
|
11017 |
+
{
|
11018 |
+
"epoch": 1.29,
|
11019 |
+
"learning_rate": 8.177777777777778e-05,
|
11020 |
+
"loss": 2.0757,
|
11021 |
+
"step": 1816
|
11022 |
+
},
|
11023 |
+
{
|
11024 |
+
"epoch": 1.3,
|
11025 |
+
"learning_rate": 8.133333333333334e-05,
|
11026 |
+
"loss": 2.0587,
|
11027 |
+
"step": 1817
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 1.3,
|
11031 |
+
"learning_rate": 8.088888888888889e-05,
|
11032 |
+
"loss": 2.1008,
|
11033 |
+
"step": 1818
|
11034 |
+
},
|
11035 |
+
{
|
11036 |
+
"epoch": 1.3,
|
11037 |
+
"learning_rate": 8.044444444444444e-05,
|
11038 |
+
"loss": 2.0753,
|
11039 |
+
"step": 1819
|
11040 |
+
},
|
11041 |
+
{
|
11042 |
+
"epoch": 1.3,
|
11043 |
+
"learning_rate": 8e-05,
|
11044 |
+
"loss": 2.0912,
|
11045 |
+
"step": 1820
|
11046 |
+
},
|
11047 |
+
{
|
11048 |
+
"epoch": 1.3,
|
11049 |
+
"learning_rate": 7.955555555555556e-05,
|
11050 |
+
"loss": 2.0648,
|
11051 |
+
"step": 1821
|
11052 |
+
},
|
11053 |
+
{
|
11054 |
+
"epoch": 1.3,
|
11055 |
+
"learning_rate": 7.911111111111111e-05,
|
11056 |
+
"loss": 2.0948,
|
11057 |
+
"step": 1822
|
11058 |
+
},
|
11059 |
+
{
|
11060 |
+
"epoch": 1.3,
|
11061 |
+
"learning_rate": 7.866666666666666e-05,
|
11062 |
+
"loss": 2.1252,
|
11063 |
+
"step": 1823
|
11064 |
+
},
|
11065 |
+
{
|
11066 |
+
"epoch": 1.3,
|
11067 |
+
"learning_rate": 7.822222222222223e-05,
|
11068 |
+
"loss": 2.1548,
|
11069 |
+
"step": 1824
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 1.3,
|
11073 |
+
"learning_rate": 7.777777777777778e-05,
|
11074 |
+
"loss": 2.1061,
|
11075 |
+
"step": 1825
|
11076 |
+
},
|
11077 |
+
{
|
11078 |
+
"epoch": 1.3,
|
11079 |
+
"learning_rate": 7.733333333333333e-05,
|
11080 |
+
"loss": 2.2017,
|
11081 |
+
"step": 1826
|
11082 |
+
},
|
11083 |
+
{
|
11084 |
+
"epoch": 1.3,
|
11085 |
+
"learning_rate": 7.688888888888889e-05,
|
11086 |
+
"loss": 2.1743,
|
11087 |
+
"step": 1827
|
11088 |
+
},
|
11089 |
+
{
|
11090 |
+
"epoch": 1.3,
|
11091 |
+
"learning_rate": 7.644444444444445e-05,
|
11092 |
+
"loss": 2.1126,
|
11093 |
+
"step": 1828
|
11094 |
+
},
|
11095 |
+
{
|
11096 |
+
"epoch": 1.3,
|
11097 |
+
"learning_rate": 7.6e-05,
|
11098 |
+
"loss": 2.0536,
|
11099 |
+
"step": 1829
|
11100 |
+
},
|
11101 |
+
{
|
11102 |
+
"epoch": 1.3,
|
11103 |
+
"learning_rate": 7.555555555555556e-05,
|
11104 |
+
"loss": 2.0527,
|
11105 |
+
"step": 1830
|
11106 |
+
},
|
11107 |
+
{
|
11108 |
+
"epoch": 1.3,
|
11109 |
+
"learning_rate": 7.511111111111111e-05,
|
11110 |
+
"loss": 2.0798,
|
11111 |
+
"step": 1831
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 1.3,
|
11115 |
+
"learning_rate": 7.466666666666667e-05,
|
11116 |
+
"loss": 2.1436,
|
11117 |
+
"step": 1832
|
11118 |
+
},
|
11119 |
+
{
|
11120 |
+
"epoch": 1.3,
|
11121 |
+
"learning_rate": 7.422222222222223e-05,
|
11122 |
+
"loss": 2.1289,
|
11123 |
+
"step": 1833
|
11124 |
+
},
|
11125 |
+
{
|
11126 |
+
"epoch": 1.3,
|
11127 |
+
"learning_rate": 7.377777777777778e-05,
|
11128 |
+
"loss": 2.1487,
|
11129 |
+
"step": 1834
|
11130 |
+
},
|
11131 |
+
{
|
11132 |
+
"epoch": 1.3,
|
11133 |
+
"learning_rate": 7.333333333333333e-05,
|
11134 |
+
"loss": 2.0779,
|
11135 |
+
"step": 1835
|
11136 |
+
},
|
11137 |
+
{
|
11138 |
+
"epoch": 1.3,
|
11139 |
+
"learning_rate": 7.28888888888889e-05,
|
11140 |
+
"loss": 2.1045,
|
11141 |
+
"step": 1836
|
11142 |
+
},
|
11143 |
+
{
|
11144 |
+
"epoch": 1.31,
|
11145 |
+
"learning_rate": 7.244444444444445e-05,
|
11146 |
+
"loss": 2.0659,
|
11147 |
+
"step": 1837
|
11148 |
+
},
|
11149 |
+
{
|
11150 |
+
"epoch": 1.31,
|
11151 |
+
"learning_rate": 7.2e-05,
|
11152 |
+
"loss": 2.0704,
|
11153 |
+
"step": 1838
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 1.31,
|
11157 |
+
"learning_rate": 7.155555555555555e-05,
|
11158 |
+
"loss": 2.0901,
|
11159 |
+
"step": 1839
|
11160 |
+
},
|
11161 |
+
{
|
11162 |
+
"epoch": 1.31,
|
11163 |
+
"learning_rate": 7.111111111111112e-05,
|
11164 |
+
"loss": 2.104,
|
11165 |
+
"step": 1840
|
11166 |
+
},
|
11167 |
+
{
|
11168 |
+
"epoch": 1.31,
|
11169 |
+
"learning_rate": 7.066666666666667e-05,
|
11170 |
+
"loss": 2.1784,
|
11171 |
+
"step": 1841
|
11172 |
+
},
|
11173 |
+
{
|
11174 |
+
"epoch": 1.31,
|
11175 |
+
"learning_rate": 7.022222222222222e-05,
|
11176 |
+
"loss": 2.0993,
|
11177 |
+
"step": 1842
|
11178 |
+
},
|
11179 |
+
{
|
11180 |
+
"epoch": 1.31,
|
11181 |
+
"learning_rate": 6.977777777777779e-05,
|
11182 |
+
"loss": 2.1538,
|
11183 |
+
"step": 1843
|
11184 |
+
},
|
11185 |
+
{
|
11186 |
+
"epoch": 1.31,
|
11187 |
+
"learning_rate": 6.933333333333334e-05,
|
11188 |
+
"loss": 2.0939,
|
11189 |
+
"step": 1844
|
11190 |
+
},
|
11191 |
+
{
|
11192 |
+
"epoch": 1.31,
|
11193 |
+
"learning_rate": 6.88888888888889e-05,
|
11194 |
+
"loss": 2.2134,
|
11195 |
+
"step": 1845
|
11196 |
+
},
|
11197 |
+
{
|
11198 |
+
"epoch": 1.31,
|
11199 |
+
"learning_rate": 6.844444444444445e-05,
|
11200 |
+
"loss": 2.0872,
|
11201 |
+
"step": 1846
|
11202 |
+
},
|
11203 |
+
{
|
11204 |
+
"epoch": 1.31,
|
11205 |
+
"learning_rate": 6.800000000000001e-05,
|
11206 |
+
"loss": 2.0762,
|
11207 |
+
"step": 1847
|
11208 |
+
},
|
11209 |
+
{
|
11210 |
+
"epoch": 1.31,
|
11211 |
+
"learning_rate": 6.755555555555557e-05,
|
11212 |
+
"loss": 2.1751,
|
11213 |
+
"step": 1848
|
11214 |
+
},
|
11215 |
+
{
|
11216 |
+
"epoch": 1.31,
|
11217 |
+
"learning_rate": 6.711111111111112e-05,
|
11218 |
+
"loss": 2.1107,
|
11219 |
+
"step": 1849
|
11220 |
+
},
|
11221 |
+
{
|
11222 |
+
"epoch": 1.31,
|
11223 |
+
"learning_rate": 6.666666666666667e-05,
|
11224 |
+
"loss": 2.1072,
|
11225 |
+
"step": 1850
|
11226 |
+
},
|
11227 |
+
{
|
11228 |
+
"epoch": 1.31,
|
11229 |
+
"learning_rate": 6.622222222222224e-05,
|
11230 |
+
"loss": 2.1557,
|
11231 |
+
"step": 1851
|
11232 |
+
},
|
11233 |
+
{
|
11234 |
+
"epoch": 1.31,
|
11235 |
+
"learning_rate": 6.577777777777779e-05,
|
11236 |
+
"loss": 2.1582,
|
11237 |
+
"step": 1852
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 1.31,
|
11241 |
+
"learning_rate": 6.533333333333334e-05,
|
11242 |
+
"loss": 2.1279,
|
11243 |
+
"step": 1853
|
11244 |
+
},
|
11245 |
+
{
|
11246 |
+
"epoch": 1.31,
|
11247 |
+
"learning_rate": 6.488888888888889e-05,
|
11248 |
+
"loss": 2.1797,
|
11249 |
+
"step": 1854
|
11250 |
+
},
|
11251 |
+
{
|
11252 |
+
"epoch": 1.31,
|
11253 |
+
"learning_rate": 6.444444444444446e-05,
|
11254 |
+
"loss": 2.1951,
|
11255 |
+
"step": 1855
|
11256 |
+
},
|
11257 |
+
{
|
11258 |
+
"epoch": 1.31,
|
11259 |
+
"learning_rate": 6.400000000000001e-05,
|
11260 |
+
"loss": 2.018,
|
11261 |
+
"step": 1856
|
11262 |
+
},
|
11263 |
+
{
|
11264 |
+
"epoch": 1.32,
|
11265 |
+
"learning_rate": 6.355555555555556e-05,
|
11266 |
+
"loss": 2.1234,
|
11267 |
+
"step": 1857
|
11268 |
+
},
|
11269 |
+
{
|
11270 |
+
"epoch": 1.32,
|
11271 |
+
"learning_rate": 6.311111111111112e-05,
|
11272 |
+
"loss": 2.1022,
|
11273 |
+
"step": 1858
|
11274 |
+
},
|
11275 |
+
{
|
11276 |
+
"epoch": 1.32,
|
11277 |
+
"learning_rate": 6.266666666666667e-05,
|
11278 |
+
"loss": 2.0655,
|
11279 |
+
"step": 1859
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 1.32,
|
11283 |
+
"learning_rate": 6.222222222222222e-05,
|
11284 |
+
"loss": 2.138,
|
11285 |
+
"step": 1860
|
11286 |
+
},
|
11287 |
+
{
|
11288 |
+
"epoch": 1.32,
|
11289 |
+
"learning_rate": 6.177777777777779e-05,
|
11290 |
+
"loss": 2.1443,
|
11291 |
+
"step": 1861
|
11292 |
+
},
|
11293 |
+
{
|
11294 |
+
"epoch": 1.32,
|
11295 |
+
"learning_rate": 6.133333333333334e-05,
|
11296 |
+
"loss": 2.1101,
|
11297 |
+
"step": 1862
|
11298 |
+
},
|
11299 |
+
{
|
11300 |
+
"epoch": 1.32,
|
11301 |
+
"learning_rate": 6.08888888888889e-05,
|
11302 |
+
"loss": 2.0674,
|
11303 |
+
"step": 1863
|
11304 |
+
},
|
11305 |
+
{
|
11306 |
+
"epoch": 1.32,
|
11307 |
+
"learning_rate": 6.044444444444445e-05,
|
11308 |
+
"loss": 2.1124,
|
11309 |
+
"step": 1864
|
11310 |
+
},
|
11311 |
+
{
|
11312 |
+
"epoch": 1.32,
|
11313 |
+
"learning_rate": 6e-05,
|
11314 |
+
"loss": 2.1006,
|
11315 |
+
"step": 1865
|
11316 |
+
},
|
11317 |
+
{
|
11318 |
+
"epoch": 1.32,
|
11319 |
+
"learning_rate": 5.9555555555555554e-05,
|
11320 |
+
"loss": 2.1313,
|
11321 |
+
"step": 1866
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 1.32,
|
11325 |
+
"learning_rate": 5.911111111111112e-05,
|
11326 |
+
"loss": 2.0989,
|
11327 |
+
"step": 1867
|
11328 |
+
},
|
11329 |
+
{
|
11330 |
+
"epoch": 1.32,
|
11331 |
+
"learning_rate": 5.866666666666667e-05,
|
11332 |
+
"loss": 2.0372,
|
11333 |
+
"step": 1868
|
11334 |
+
},
|
11335 |
+
{
|
11336 |
+
"epoch": 1.32,
|
11337 |
+
"learning_rate": 5.8222222222222224e-05,
|
11338 |
+
"loss": 2.1849,
|
11339 |
+
"step": 1869
|
11340 |
+
},
|
11341 |
+
{
|
11342 |
+
"epoch": 1.32,
|
11343 |
+
"learning_rate": 5.7777777777777776e-05,
|
11344 |
+
"loss": 2.1394,
|
11345 |
+
"step": 1870
|
11346 |
+
},
|
11347 |
+
{
|
11348 |
+
"epoch": 1.32,
|
11349 |
+
"learning_rate": 5.7333333333333336e-05,
|
11350 |
+
"loss": 2.1078,
|
11351 |
+
"step": 1871
|
11352 |
+
},
|
11353 |
+
{
|
11354 |
+
"epoch": 1.32,
|
11355 |
+
"learning_rate": 5.6888888888888895e-05,
|
11356 |
+
"loss": 2.2069,
|
11357 |
+
"step": 1872
|
11358 |
+
},
|
11359 |
+
{
|
11360 |
+
"epoch": 1.32,
|
11361 |
+
"learning_rate": 5.644444444444445e-05,
|
11362 |
+
"loss": 2.0549,
|
11363 |
+
"step": 1873
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 1.32,
|
11367 |
+
"learning_rate": 5.6000000000000006e-05,
|
11368 |
+
"loss": 2.0907,
|
11369 |
+
"step": 1874
|
11370 |
+
},
|
11371 |
+
{
|
11372 |
+
"epoch": 1.32,
|
11373 |
+
"learning_rate": 5.555555555555556e-05,
|
11374 |
+
"loss": 2.1449,
|
11375 |
+
"step": 1875
|
11376 |
+
},
|
11377 |
+
{
|
11378 |
+
"epoch": 1.32,
|
11379 |
+
"learning_rate": 5.511111111111111e-05,
|
11380 |
+
"loss": 2.1387,
|
11381 |
+
"step": 1876
|
11382 |
+
},
|
11383 |
+
{
|
11384 |
+
"epoch": 1.33,
|
11385 |
+
"learning_rate": 5.466666666666666e-05,
|
11386 |
+
"loss": 2.0901,
|
11387 |
+
"step": 1877
|
11388 |
+
},
|
11389 |
+
{
|
11390 |
+
"epoch": 1.33,
|
11391 |
+
"learning_rate": 5.422222222222223e-05,
|
11392 |
+
"loss": 2.0245,
|
11393 |
+
"step": 1878
|
11394 |
+
},
|
11395 |
+
{
|
11396 |
+
"epoch": 1.33,
|
11397 |
+
"learning_rate": 5.377777777777778e-05,
|
11398 |
+
"loss": 2.1397,
|
11399 |
+
"step": 1879
|
11400 |
+
},
|
11401 |
+
{
|
11402 |
+
"epoch": 1.33,
|
11403 |
+
"learning_rate": 5.333333333333333e-05,
|
11404 |
+
"loss": 2.1229,
|
11405 |
+
"step": 1880
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 1.33,
|
11409 |
+
"learning_rate": 5.2888888888888885e-05,
|
11410 |
+
"loss": 2.1114,
|
11411 |
+
"step": 1881
|
11412 |
+
},
|
11413 |
+
{
|
11414 |
+
"epoch": 1.33,
|
11415 |
+
"learning_rate": 5.244444444444445e-05,
|
11416 |
+
"loss": 2.1436,
|
11417 |
+
"step": 1882
|
11418 |
+
},
|
11419 |
+
{
|
11420 |
+
"epoch": 1.33,
|
11421 |
+
"learning_rate": 5.2000000000000004e-05,
|
11422 |
+
"loss": 2.0848,
|
11423 |
+
"step": 1883
|
11424 |
+
},
|
11425 |
+
{
|
11426 |
+
"epoch": 1.33,
|
11427 |
+
"learning_rate": 5.1555555555555556e-05,
|
11428 |
+
"loss": 2.1456,
|
11429 |
+
"step": 1884
|
11430 |
+
},
|
11431 |
+
{
|
11432 |
+
"epoch": 1.33,
|
11433 |
+
"learning_rate": 5.111111111111111e-05,
|
11434 |
+
"loss": 2.0572,
|
11435 |
+
"step": 1885
|
11436 |
+
},
|
11437 |
+
{
|
11438 |
+
"epoch": 1.33,
|
11439 |
+
"learning_rate": 5.0666666666666674e-05,
|
11440 |
+
"loss": 2.1019,
|
11441 |
+
"step": 1886
|
11442 |
+
},
|
11443 |
+
{
|
11444 |
+
"epoch": 1.33,
|
11445 |
+
"learning_rate": 5.0222222222222226e-05,
|
11446 |
+
"loss": 2.0934,
|
11447 |
+
"step": 1887
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 1.33,
|
11451 |
+
"learning_rate": 4.977777777777778e-05,
|
11452 |
+
"loss": 2.1123,
|
11453 |
+
"step": 1888
|
11454 |
+
},
|
11455 |
+
{
|
11456 |
+
"epoch": 1.33,
|
11457 |
+
"learning_rate": 4.933333333333334e-05,
|
11458 |
+
"loss": 2.1475,
|
11459 |
+
"step": 1889
|
11460 |
+
},
|
11461 |
+
{
|
11462 |
+
"epoch": 1.33,
|
11463 |
+
"learning_rate": 4.888888888888889e-05,
|
11464 |
+
"loss": 2.14,
|
11465 |
+
"step": 1890
|
11466 |
+
},
|
11467 |
+
{
|
11468 |
+
"epoch": 1.33,
|
11469 |
+
"learning_rate": 4.844444444444445e-05,
|
11470 |
+
"loss": 2.1103,
|
11471 |
+
"step": 1891
|
11472 |
+
},
|
11473 |
+
{
|
11474 |
+
"epoch": 1.33,
|
11475 |
+
"learning_rate": 4.8e-05,
|
11476 |
+
"loss": 2.0636,
|
11477 |
+
"step": 1892
|
11478 |
+
},
|
11479 |
+
{
|
11480 |
+
"epoch": 1.33,
|
11481 |
+
"learning_rate": 4.755555555555556e-05,
|
11482 |
+
"loss": 2.0378,
|
11483 |
+
"step": 1893
|
11484 |
+
},
|
11485 |
+
{
|
11486 |
+
"epoch": 1.33,
|
11487 |
+
"learning_rate": 4.711111111111111e-05,
|
11488 |
+
"loss": 2.1109,
|
11489 |
+
"step": 1894
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 1.33,
|
11493 |
+
"learning_rate": 4.666666666666667e-05,
|
11494 |
+
"loss": 2.0696,
|
11495 |
+
"step": 1895
|
11496 |
+
},
|
11497 |
+
{
|
11498 |
+
"epoch": 1.33,
|
11499 |
+
"learning_rate": 4.6222222222222224e-05,
|
11500 |
+
"loss": 2.117,
|
11501 |
+
"step": 1896
|
11502 |
+
},
|
11503 |
+
{
|
11504 |
+
"epoch": 1.34,
|
11505 |
+
"learning_rate": 4.577777777777778e-05,
|
11506 |
+
"loss": 2.0447,
|
11507 |
+
"step": 1897
|
11508 |
+
},
|
11509 |
+
{
|
11510 |
+
"epoch": 1.34,
|
11511 |
+
"learning_rate": 4.5333333333333335e-05,
|
11512 |
+
"loss": 2.1653,
|
11513 |
+
"step": 1898
|
11514 |
+
},
|
11515 |
+
{
|
11516 |
+
"epoch": 1.34,
|
11517 |
+
"learning_rate": 4.4888888888888894e-05,
|
11518 |
+
"loss": 2.0236,
|
11519 |
+
"step": 1899
|
11520 |
+
},
|
11521 |
+
{
|
11522 |
+
"epoch": 1.34,
|
11523 |
+
"learning_rate": 4.4444444444444447e-05,
|
11524 |
+
"loss": 2.1244,
|
11525 |
+
"step": 1900
|
11526 |
+
},
|
11527 |
+
{
|
11528 |
+
"epoch": 1.34,
|
11529 |
+
"learning_rate": 4.4000000000000006e-05,
|
11530 |
+
"loss": 2.1211,
|
11531 |
+
"step": 1901
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 1.34,
|
11535 |
+
"learning_rate": 4.355555555555556e-05,
|
11536 |
+
"loss": 2.2072,
|
11537 |
+
"step": 1902
|
11538 |
+
},
|
11539 |
+
{
|
11540 |
+
"epoch": 1.34,
|
11541 |
+
"learning_rate": 4.311111111111111e-05,
|
11542 |
+
"loss": 2.1041,
|
11543 |
+
"step": 1903
|
11544 |
+
},
|
11545 |
+
{
|
11546 |
+
"epoch": 1.34,
|
11547 |
+
"learning_rate": 4.266666666666667e-05,
|
11548 |
+
"loss": 2.071,
|
11549 |
+
"step": 1904
|
11550 |
+
},
|
11551 |
+
{
|
11552 |
+
"epoch": 1.34,
|
11553 |
+
"learning_rate": 4.222222222222222e-05,
|
11554 |
+
"loss": 2.0229,
|
11555 |
+
"step": 1905
|
11556 |
+
},
|
11557 |
+
{
|
11558 |
+
"epoch": 1.34,
|
11559 |
+
"learning_rate": 4.177777777777778e-05,
|
11560 |
+
"loss": 2.0974,
|
11561 |
+
"step": 1906
|
11562 |
+
},
|
11563 |
+
{
|
11564 |
+
"epoch": 1.34,
|
11565 |
+
"learning_rate": 4.133333333333333e-05,
|
11566 |
+
"loss": 2.1007,
|
11567 |
+
"step": 1907
|
11568 |
+
},
|
11569 |
+
{
|
11570 |
+
"epoch": 1.34,
|
11571 |
+
"learning_rate": 4.088888888888889e-05,
|
11572 |
+
"loss": 2.1028,
|
11573 |
+
"step": 1908
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 1.34,
|
11577 |
+
"learning_rate": 4.0444444444444444e-05,
|
11578 |
+
"loss": 2.097,
|
11579 |
+
"step": 1909
|
11580 |
+
},
|
11581 |
+
{
|
11582 |
+
"epoch": 1.34,
|
11583 |
+
"learning_rate": 4e-05,
|
11584 |
+
"loss": 2.1031,
|
11585 |
+
"step": 1910
|
11586 |
+
},
|
11587 |
+
{
|
11588 |
+
"epoch": 1.34,
|
11589 |
+
"learning_rate": 3.9555555555555556e-05,
|
11590 |
+
"loss": 2.0905,
|
11591 |
+
"step": 1911
|
11592 |
+
},
|
11593 |
+
{
|
11594 |
+
"epoch": 1.34,
|
11595 |
+
"learning_rate": 3.9111111111111115e-05,
|
11596 |
+
"loss": 2.1131,
|
11597 |
+
"step": 1912
|
11598 |
+
},
|
11599 |
+
{
|
11600 |
+
"epoch": 1.34,
|
11601 |
+
"learning_rate": 3.866666666666667e-05,
|
11602 |
+
"loss": 2.1349,
|
11603 |
+
"step": 1913
|
11604 |
+
},
|
11605 |
+
{
|
11606 |
+
"epoch": 1.34,
|
11607 |
+
"learning_rate": 3.8222222222222226e-05,
|
11608 |
+
"loss": 2.1394,
|
11609 |
+
"step": 1914
|
11610 |
+
},
|
11611 |
+
{
|
11612 |
+
"epoch": 1.34,
|
11613 |
+
"learning_rate": 3.777777777777778e-05,
|
11614 |
+
"loss": 2.0631,
|
11615 |
+
"step": 1915
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 1.34,
|
11619 |
+
"learning_rate": 3.733333333333334e-05,
|
11620 |
+
"loss": 2.1042,
|
11621 |
+
"step": 1916
|
11622 |
+
},
|
11623 |
+
{
|
11624 |
+
"epoch": 1.35,
|
11625 |
+
"learning_rate": 3.688888888888889e-05,
|
11626 |
+
"loss": 2.1382,
|
11627 |
+
"step": 1917
|
11628 |
+
},
|
11629 |
+
{
|
11630 |
+
"epoch": 1.35,
|
11631 |
+
"learning_rate": 3.644444444444445e-05,
|
11632 |
+
"loss": 2.1726,
|
11633 |
+
"step": 1918
|
11634 |
+
},
|
11635 |
+
{
|
11636 |
+
"epoch": 1.35,
|
11637 |
+
"learning_rate": 3.6e-05,
|
11638 |
+
"loss": 2.1671,
|
11639 |
+
"step": 1919
|
11640 |
+
},
|
11641 |
+
{
|
11642 |
+
"epoch": 1.35,
|
11643 |
+
"learning_rate": 3.555555555555556e-05,
|
11644 |
+
"loss": 2.0649,
|
11645 |
+
"step": 1920
|
11646 |
+
},
|
11647 |
+
{
|
11648 |
+
"epoch": 1.35,
|
11649 |
+
"learning_rate": 3.511111111111111e-05,
|
11650 |
+
"loss": 2.0917,
|
11651 |
+
"step": 1921
|
11652 |
+
},
|
11653 |
+
{
|
11654 |
+
"epoch": 1.35,
|
11655 |
+
"learning_rate": 3.466666666666667e-05,
|
11656 |
+
"loss": 2.0466,
|
11657 |
+
"step": 1922
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 1.35,
|
11661 |
+
"learning_rate": 3.4222222222222224e-05,
|
11662 |
+
"loss": 2.0903,
|
11663 |
+
"step": 1923
|
11664 |
+
},
|
11665 |
+
{
|
11666 |
+
"epoch": 1.35,
|
11667 |
+
"learning_rate": 3.377777777777778e-05,
|
11668 |
+
"loss": 2.0701,
|
11669 |
+
"step": 1924
|
11670 |
+
},
|
11671 |
+
{
|
11672 |
+
"epoch": 1.35,
|
11673 |
+
"learning_rate": 3.3333333333333335e-05,
|
11674 |
+
"loss": 2.0987,
|
11675 |
+
"step": 1925
|
11676 |
+
},
|
11677 |
+
{
|
11678 |
+
"epoch": 1.35,
|
11679 |
+
"learning_rate": 3.2888888888888894e-05,
|
11680 |
+
"loss": 2.1408,
|
11681 |
+
"step": 1926
|
11682 |
+
},
|
11683 |
+
{
|
11684 |
+
"epoch": 1.35,
|
11685 |
+
"learning_rate": 3.2444444444444446e-05,
|
11686 |
+
"loss": 2.104,
|
11687 |
+
"step": 1927
|
11688 |
+
},
|
11689 |
+
{
|
11690 |
+
"epoch": 1.35,
|
11691 |
+
"learning_rate": 3.2000000000000005e-05,
|
11692 |
+
"loss": 2.088,
|
11693 |
+
"step": 1928
|
11694 |
+
},
|
11695 |
+
{
|
11696 |
+
"epoch": 1.35,
|
11697 |
+
"learning_rate": 3.155555555555556e-05,
|
11698 |
+
"loss": 2.0903,
|
11699 |
+
"step": 1929
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 1.35,
|
11703 |
+
"learning_rate": 3.111111111111111e-05,
|
11704 |
+
"loss": 2.0852,
|
11705 |
+
"step": 1930
|
11706 |
+
},
|
11707 |
+
{
|
11708 |
+
"epoch": 1.35,
|
11709 |
+
"learning_rate": 3.066666666666667e-05,
|
11710 |
+
"loss": 2.0749,
|
11711 |
+
"step": 1931
|
11712 |
+
},
|
11713 |
+
{
|
11714 |
+
"epoch": 1.35,
|
11715 |
+
"learning_rate": 3.0222222222222225e-05,
|
11716 |
+
"loss": 2.0794,
|
11717 |
+
"step": 1932
|
11718 |
+
},
|
11719 |
+
{
|
11720 |
+
"epoch": 1.35,
|
11721 |
+
"learning_rate": 2.9777777777777777e-05,
|
11722 |
+
"loss": 2.0552,
|
11723 |
+
"step": 1933
|
11724 |
+
},
|
11725 |
+
{
|
11726 |
+
"epoch": 1.35,
|
11727 |
+
"learning_rate": 2.9333333333333336e-05,
|
11728 |
+
"loss": 2.0896,
|
11729 |
+
"step": 1934
|
11730 |
+
},
|
11731 |
+
{
|
11732 |
+
"epoch": 1.35,
|
11733 |
+
"learning_rate": 2.8888888888888888e-05,
|
11734 |
+
"loss": 2.1586,
|
11735 |
+
"step": 1935
|
11736 |
+
},
|
11737 |
+
{
|
11738 |
+
"epoch": 1.35,
|
11739 |
+
"learning_rate": 2.8444444444444447e-05,
|
11740 |
+
"loss": 2.1477,
|
11741 |
+
"step": 1936
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 1.36,
|
11745 |
+
"learning_rate": 2.8000000000000003e-05,
|
11746 |
+
"loss": 2.0847,
|
11747 |
+
"step": 1937
|
11748 |
+
},
|
11749 |
+
{
|
11750 |
+
"epoch": 1.36,
|
11751 |
+
"learning_rate": 2.7555555555555555e-05,
|
11752 |
+
"loss": 2.0892,
|
11753 |
+
"step": 1938
|
11754 |
+
},
|
11755 |
+
{
|
11756 |
+
"epoch": 1.36,
|
11757 |
+
"learning_rate": 2.7111111111111114e-05,
|
11758 |
+
"loss": 2.1891,
|
11759 |
+
"step": 1939
|
11760 |
+
},
|
11761 |
+
{
|
11762 |
+
"epoch": 1.36,
|
11763 |
+
"learning_rate": 2.6666666666666667e-05,
|
11764 |
+
"loss": 2.0856,
|
11765 |
+
"step": 1940
|
11766 |
+
},
|
11767 |
+
{
|
11768 |
+
"epoch": 1.36,
|
11769 |
+
"learning_rate": 2.6222222222222226e-05,
|
11770 |
+
"loss": 2.1166,
|
11771 |
+
"step": 1941
|
11772 |
+
},
|
11773 |
+
{
|
11774 |
+
"epoch": 1.36,
|
11775 |
+
"learning_rate": 2.5777777777777778e-05,
|
11776 |
+
"loss": 2.15,
|
11777 |
+
"step": 1942
|
11778 |
+
},
|
11779 |
+
{
|
11780 |
+
"epoch": 1.36,
|
11781 |
+
"learning_rate": 2.5333333333333337e-05,
|
11782 |
+
"loss": 2.0967,
|
11783 |
+
"step": 1943
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 1.36,
|
11787 |
+
"learning_rate": 2.488888888888889e-05,
|
11788 |
+
"loss": 2.1111,
|
11789 |
+
"step": 1944
|
11790 |
+
},
|
11791 |
+
{
|
11792 |
+
"epoch": 1.36,
|
11793 |
+
"learning_rate": 2.4444444444444445e-05,
|
11794 |
+
"loss": 2.097,
|
11795 |
+
"step": 1945
|
11796 |
+
},
|
11797 |
+
{
|
11798 |
+
"epoch": 1.36,
|
11799 |
+
"learning_rate": 2.4e-05,
|
11800 |
+
"loss": 2.1431,
|
11801 |
+
"step": 1946
|
11802 |
+
},
|
11803 |
+
{
|
11804 |
+
"epoch": 1.36,
|
11805 |
+
"learning_rate": 2.3555555555555556e-05,
|
11806 |
+
"loss": 2.1265,
|
11807 |
+
"step": 1947
|
11808 |
+
},
|
11809 |
+
{
|
11810 |
+
"epoch": 1.36,
|
11811 |
+
"learning_rate": 2.3111111111111112e-05,
|
11812 |
+
"loss": 2.2027,
|
11813 |
+
"step": 1948
|
11814 |
+
},
|
11815 |
+
{
|
11816 |
+
"epoch": 1.36,
|
11817 |
+
"learning_rate": 2.2666666666666668e-05,
|
11818 |
+
"loss": 2.0808,
|
11819 |
+
"step": 1949
|
11820 |
+
},
|
11821 |
+
{
|
11822 |
+
"epoch": 1.36,
|
11823 |
+
"learning_rate": 2.2222222222222223e-05,
|
11824 |
+
"loss": 2.0719,
|
11825 |
+
"step": 1950
|
11826 |
+
},
|
11827 |
+
{
|
11828 |
+
"epoch": 1.36,
|
11829 |
+
"learning_rate": 2.177777777777778e-05,
|
11830 |
+
"loss": 2.0903,
|
11831 |
+
"step": 1951
|
11832 |
+
},
|
11833 |
+
{
|
11834 |
+
"epoch": 1.36,
|
11835 |
+
"learning_rate": 2.1333333333333335e-05,
|
11836 |
+
"loss": 2.0455,
|
11837 |
+
"step": 1952
|
11838 |
+
},
|
11839 |
+
{
|
11840 |
+
"epoch": 1.36,
|
11841 |
+
"learning_rate": 2.088888888888889e-05,
|
11842 |
+
"loss": 2.1111,
|
11843 |
+
"step": 1953
|
11844 |
+
},
|
11845 |
+
{
|
11846 |
+
"epoch": 1.36,
|
11847 |
+
"learning_rate": 2.0444444444444446e-05,
|
11848 |
+
"loss": 2.1216,
|
11849 |
+
"step": 1954
|
11850 |
+
},
|
11851 |
+
{
|
11852 |
+
"epoch": 1.36,
|
11853 |
+
"learning_rate": 2e-05,
|
11854 |
+
"loss": 2.1088,
|
11855 |
+
"step": 1955
|
11856 |
+
},
|
11857 |
+
{
|
11858 |
+
"epoch": 1.36,
|
11859 |
+
"learning_rate": 1.9555555555555557e-05,
|
11860 |
+
"loss": 2.0789,
|
11861 |
+
"step": 1956
|
11862 |
+
},
|
11863 |
+
{
|
11864 |
+
"epoch": 1.37,
|
11865 |
+
"learning_rate": 1.9111111111111113e-05,
|
11866 |
+
"loss": 2.0954,
|
11867 |
+
"step": 1957
|
11868 |
+
},
|
11869 |
+
{
|
11870 |
+
"epoch": 1.37,
|
11871 |
+
"learning_rate": 1.866666666666667e-05,
|
11872 |
+
"loss": 2.0936,
|
11873 |
+
"step": 1958
|
11874 |
+
},
|
11875 |
+
{
|
11876 |
+
"epoch": 1.37,
|
11877 |
+
"learning_rate": 1.8222222222222224e-05,
|
11878 |
+
"loss": 2.1256,
|
11879 |
+
"step": 1959
|
11880 |
+
},
|
11881 |
+
{
|
11882 |
+
"epoch": 1.37,
|
11883 |
+
"learning_rate": 1.777777777777778e-05,
|
11884 |
+
"loss": 2.1112,
|
11885 |
+
"step": 1960
|
11886 |
+
},
|
11887 |
+
{
|
11888 |
+
"epoch": 1.37,
|
11889 |
+
"learning_rate": 1.7333333333333336e-05,
|
11890 |
+
"loss": 2.0945,
|
11891 |
+
"step": 1961
|
11892 |
+
},
|
11893 |
+
{
|
11894 |
+
"epoch": 1.37,
|
11895 |
+
"learning_rate": 1.688888888888889e-05,
|
11896 |
+
"loss": 2.1225,
|
11897 |
+
"step": 1962
|
11898 |
+
},
|
11899 |
+
{
|
11900 |
+
"epoch": 1.37,
|
11901 |
+
"learning_rate": 1.6444444444444447e-05,
|
11902 |
+
"loss": 2.0004,
|
11903 |
+
"step": 1963
|
11904 |
+
},
|
11905 |
+
{
|
11906 |
+
"epoch": 1.37,
|
11907 |
+
"learning_rate": 1.6000000000000003e-05,
|
11908 |
+
"loss": 2.1143,
|
11909 |
+
"step": 1964
|
11910 |
+
},
|
11911 |
+
{
|
11912 |
+
"epoch": 1.37,
|
11913 |
+
"learning_rate": 1.5555555555555555e-05,
|
11914 |
+
"loss": 2.0987,
|
11915 |
+
"step": 1965
|
11916 |
+
},
|
11917 |
+
{
|
11918 |
+
"epoch": 1.37,
|
11919 |
+
"learning_rate": 1.5111111111111112e-05,
|
11920 |
+
"loss": 2.0606,
|
11921 |
+
"step": 1966
|
11922 |
+
},
|
11923 |
+
{
|
11924 |
+
"epoch": 1.37,
|
11925 |
+
"learning_rate": 1.4666666666666668e-05,
|
11926 |
+
"loss": 2.0714,
|
11927 |
+
"step": 1967
|
11928 |
+
},
|
11929 |
+
{
|
11930 |
+
"epoch": 1.37,
|
11931 |
+
"learning_rate": 1.4222222222222224e-05,
|
11932 |
+
"loss": 2.0926,
|
11933 |
+
"step": 1968
|
11934 |
+
},
|
11935 |
+
{
|
11936 |
+
"epoch": 1.37,
|
11937 |
+
"learning_rate": 1.3777777777777778e-05,
|
11938 |
+
"loss": 2.139,
|
11939 |
+
"step": 1969
|
11940 |
+
},
|
11941 |
+
{
|
11942 |
+
"epoch": 1.37,
|
11943 |
+
"learning_rate": 1.3333333333333333e-05,
|
11944 |
+
"loss": 2.1304,
|
11945 |
+
"step": 1970
|
11946 |
+
},
|
11947 |
+
{
|
11948 |
+
"epoch": 1.37,
|
11949 |
+
"learning_rate": 1.2888888888888889e-05,
|
11950 |
+
"loss": 2.1202,
|
11951 |
+
"step": 1971
|
11952 |
+
},
|
11953 |
+
{
|
11954 |
+
"epoch": 1.37,
|
11955 |
+
"learning_rate": 1.2444444444444445e-05,
|
11956 |
+
"loss": 2.1232,
|
11957 |
+
"step": 1972
|
11958 |
+
},
|
11959 |
+
{
|
11960 |
+
"epoch": 1.37,
|
11961 |
+
"learning_rate": 1.2e-05,
|
11962 |
+
"loss": 2.1089,
|
11963 |
+
"step": 1973
|
11964 |
+
},
|
11965 |
+
{
|
11966 |
+
"epoch": 1.37,
|
11967 |
+
"learning_rate": 1.1555555555555556e-05,
|
11968 |
+
"loss": 2.1162,
|
11969 |
+
"step": 1974
|
11970 |
+
},
|
11971 |
+
{
|
11972 |
+
"epoch": 1.37,
|
11973 |
+
"learning_rate": 1.1111111111111112e-05,
|
11974 |
+
"loss": 2.0465,
|
11975 |
+
"step": 1975
|
11976 |
+
},
|
11977 |
+
{
|
11978 |
+
"epoch": 1.37,
|
11979 |
+
"learning_rate": 1.0666666666666667e-05,
|
11980 |
+
"loss": 2.1557,
|
11981 |
+
"step": 1976
|
11982 |
+
},
|
11983 |
+
{
|
11984 |
+
"epoch": 1.38,
|
11985 |
+
"learning_rate": 1.0222222222222223e-05,
|
11986 |
+
"loss": 2.1571,
|
11987 |
+
"step": 1977
|
11988 |
+
},
|
11989 |
+
{
|
11990 |
+
"epoch": 1.38,
|
11991 |
+
"learning_rate": 9.777777777777779e-06,
|
11992 |
+
"loss": 2.0609,
|
11993 |
+
"step": 1978
|
11994 |
+
},
|
11995 |
+
{
|
11996 |
+
"epoch": 1.38,
|
11997 |
+
"learning_rate": 9.333333333333334e-06,
|
11998 |
+
"loss": 2.1276,
|
11999 |
+
"step": 1979
|
12000 |
+
},
|
12001 |
+
{
|
12002 |
+
"epoch": 1.38,
|
12003 |
+
"learning_rate": 8.88888888888889e-06,
|
12004 |
+
"loss": 2.0715,
|
12005 |
+
"step": 1980
|
12006 |
+
},
|
12007 |
+
{
|
12008 |
+
"epoch": 1.38,
|
12009 |
+
"learning_rate": 8.444444444444446e-06,
|
12010 |
+
"loss": 2.0817,
|
12011 |
+
"step": 1981
|
12012 |
+
},
|
12013 |
+
{
|
12014 |
+
"epoch": 1.38,
|
12015 |
+
"learning_rate": 8.000000000000001e-06,
|
12016 |
+
"loss": 2.1797,
|
12017 |
+
"step": 1982
|
12018 |
+
},
|
12019 |
+
{
|
12020 |
+
"epoch": 1.38,
|
12021 |
+
"learning_rate": 7.555555555555556e-06,
|
12022 |
+
"loss": 2.1437,
|
12023 |
+
"step": 1983
|
12024 |
+
},
|
12025 |
+
{
|
12026 |
+
"epoch": 1.38,
|
12027 |
+
"learning_rate": 7.111111111111112e-06,
|
12028 |
+
"loss": 2.1116,
|
12029 |
+
"step": 1984
|
12030 |
+
},
|
12031 |
+
{
|
12032 |
+
"epoch": 1.38,
|
12033 |
+
"learning_rate": 6.666666666666667e-06,
|
12034 |
+
"loss": 2.0851,
|
12035 |
+
"step": 1985
|
12036 |
+
},
|
12037 |
+
{
|
12038 |
+
"epoch": 1.38,
|
12039 |
+
"learning_rate": 6.222222222222222e-06,
|
12040 |
+
"loss": 2.1341,
|
12041 |
+
"step": 1986
|
12042 |
+
},
|
12043 |
+
{
|
12044 |
+
"epoch": 1.38,
|
12045 |
+
"learning_rate": 5.777777777777778e-06,
|
12046 |
+
"loss": 2.1147,
|
12047 |
+
"step": 1987
|
12048 |
+
},
|
12049 |
+
{
|
12050 |
+
"epoch": 1.38,
|
12051 |
+
"learning_rate": 5.333333333333334e-06,
|
12052 |
+
"loss": 2.1872,
|
12053 |
+
"step": 1988
|
12054 |
+
},
|
12055 |
+
{
|
12056 |
+
"epoch": 1.38,
|
12057 |
+
"learning_rate": 4.888888888888889e-06,
|
12058 |
+
"loss": 2.1453,
|
12059 |
+
"step": 1989
|
12060 |
+
},
|
12061 |
+
{
|
12062 |
+
"epoch": 1.38,
|
12063 |
+
"learning_rate": 4.444444444444445e-06,
|
12064 |
+
"loss": 2.1466,
|
12065 |
+
"step": 1990
|
12066 |
+
},
|
12067 |
+
{
|
12068 |
+
"epoch": 1.38,
|
12069 |
+
"learning_rate": 4.000000000000001e-06,
|
12070 |
+
"loss": 2.213,
|
12071 |
+
"step": 1991
|
12072 |
+
},
|
12073 |
+
{
|
12074 |
+
"epoch": 1.38,
|
12075 |
+
"learning_rate": 3.555555555555556e-06,
|
12076 |
+
"loss": 2.0331,
|
12077 |
+
"step": 1992
|
12078 |
+
},
|
12079 |
+
{
|
12080 |
+
"epoch": 1.38,
|
12081 |
+
"learning_rate": 3.111111111111111e-06,
|
12082 |
+
"loss": 2.0973,
|
12083 |
+
"step": 1993
|
12084 |
+
},
|
12085 |
+
{
|
12086 |
+
"epoch": 1.38,
|
12087 |
+
"learning_rate": 2.666666666666667e-06,
|
12088 |
+
"loss": 2.0288,
|
12089 |
+
"step": 1994
|
12090 |
+
},
|
12091 |
+
{
|
12092 |
+
"epoch": 1.38,
|
12093 |
+
"learning_rate": 2.2222222222222225e-06,
|
12094 |
+
"loss": 2.1309,
|
12095 |
+
"step": 1995
|
12096 |
+
},
|
12097 |
+
{
|
12098 |
+
"epoch": 1.38,
|
12099 |
+
"learning_rate": 1.777777777777778e-06,
|
12100 |
+
"loss": 2.1932,
|
12101 |
+
"step": 1996
|
12102 |
+
},
|
12103 |
+
{
|
12104 |
+
"epoch": 1.39,
|
12105 |
+
"learning_rate": 1.3333333333333334e-06,
|
12106 |
+
"loss": 2.037,
|
12107 |
+
"step": 1997
|
12108 |
+
},
|
12109 |
+
{
|
12110 |
+
"epoch": 1.39,
|
12111 |
+
"learning_rate": 8.88888888888889e-07,
|
12112 |
+
"loss": 2.1351,
|
12113 |
+
"step": 1998
|
12114 |
+
},
|
12115 |
+
{
|
12116 |
+
"epoch": 1.39,
|
12117 |
+
"learning_rate": 4.444444444444445e-07,
|
12118 |
+
"loss": 2.1637,
|
12119 |
+
"step": 1999
|
12120 |
+
},
|
12121 |
+
{
|
12122 |
+
"epoch": 1.39,
|
12123 |
+
"learning_rate": 0.0,
|
12124 |
+
"loss": 2.1263,
|
12125 |
+
"step": 2000
|
12126 |
+
},
|
12127 |
+
{
|
12128 |
+
"epoch": 1.39,
|
12129 |
+
"eval_gen_len": 1023.0,
|
12130 |
+
"eval_loss": 2.0949487686157227,
|
12131 |
+
"eval_rouge1": 14.0237,
|
12132 |
+
"eval_rouge2": 4.8219,
|
12133 |
+
"eval_rougeL": 8.8145,
|
12134 |
+
"eval_rougeLsum": 8.793,
|
12135 |
+
"eval_runtime": 9159.2647,
|
12136 |
+
"eval_samples_per_second": 0.055,
|
12137 |
+
"eval_steps_per_second": 0.055,
|
12138 |
+
"step": 2000
|
12139 |
}
|
12140 |
],
|
12141 |
"max_steps": 2000,
|
12142 |
"num_train_epochs": 9223372036854775807,
|
12143 |
+
"total_flos": 1.608210613960658e+18,
|
12144 |
"trial_name": null,
|
12145 |
"trial_params": null
|
12146 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2368281769
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdb640acc1ad0b52fbe5849e2fe9ec8023752b0a552442e328c1f4ad282eab9d
|
3 |
size 2368281769
|