Training in progress, step 2072, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 232169792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b3348347c5794cd8071f0cef1cc9e1495550a9f8017f064f8d08001da935de6
|
3 |
size 232169792
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 117446154
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6294f772f9da965b0dc3c23ee7713331071f97aea88c9f208bf5e6c9239b1585
|
3 |
size 117446154
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35f511fc6ceb095d52923c4d373851bf545d0c675a13d2bfda0c9c62cf0ead84
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -37231,6 +37231,78 @@
|
|
37231 |
"rewards/margins": 0.19329190254211426,
|
37232 |
"rewards/rejected": -0.28255730867385864,
|
37233 |
"step": 2068
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37234 |
}
|
37235 |
],
|
37236 |
"logging_steps": 1,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.998078878969375,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2072,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
37231 |
"rewards/margins": 0.19329190254211426,
|
37232 |
"rewards/rejected": -0.28255730867385864,
|
37233 |
"step": 2068
|
37234 |
+
},
|
37235 |
+
{
|
37236 |
+
"epoch": 2.9937394055825517,
|
37237 |
+
"grad_norm": 0.5331955552101135,
|
37238 |
+
"learning_rate": 4.5933528375341485e-10,
|
37239 |
+
"log_odds_chosen": 2.6847963333129883,
|
37240 |
+
"log_odds_ratio": -0.4565506875514984,
|
37241 |
+
"logits/chosen": -1.810483694076538,
|
37242 |
+
"logits/rejected": -1.6029425859451294,
|
37243 |
+
"logps/chosen": -0.806846022605896,
|
37244 |
+
"logps/rejected": -2.9770960807800293,
|
37245 |
+
"loss": 1.06,
|
37246 |
+
"nll_loss": 1.0143355131149292,
|
37247 |
+
"rewards/accuracies": 0.65625,
|
37248 |
+
"rewards/chosen": -0.0806846022605896,
|
37249 |
+
"rewards/margins": 0.2170250117778778,
|
37250 |
+
"rewards/rejected": -0.2977096438407898,
|
37251 |
+
"step": 2069
|
37252 |
+
},
|
37253 |
+
{
|
37254 |
+
"epoch": 2.995185896711493,
|
37255 |
+
"grad_norm": 0.5308060050010681,
|
37256 |
+
"learning_rate": 2.58376443265651e-10,
|
37257 |
+
"log_odds_chosen": 2.103809118270874,
|
37258 |
+
"log_odds_ratio": -0.5330842733383179,
|
37259 |
+
"logits/chosen": -1.7134472131729126,
|
37260 |
+
"logits/rejected": -1.536543846130371,
|
37261 |
+
"logps/chosen": -0.7203590869903564,
|
37262 |
+
"logps/rejected": -2.3935389518737793,
|
37263 |
+
"loss": 1.015,
|
37264 |
+
"nll_loss": 0.9616963863372803,
|
37265 |
+
"rewards/accuracies": 0.65625,
|
37266 |
+
"rewards/chosen": -0.07203590869903564,
|
37267 |
+
"rewards/margins": 0.16731798648834229,
|
37268 |
+
"rewards/rejected": -0.23935389518737793,
|
37269 |
+
"step": 2070
|
37270 |
+
},
|
37271 |
+
{
|
37272 |
+
"epoch": 2.996632387840434,
|
37273 |
+
"grad_norm": 1.1535701751708984,
|
37274 |
+
"learning_rate": 1.1483408467460876e-10,
|
37275 |
+
"log_odds_chosen": 2.2093892097473145,
|
37276 |
+
"log_odds_ratio": -0.5061578154563904,
|
37277 |
+
"logits/chosen": -1.8256726264953613,
|
37278 |
+
"logits/rejected": -1.6192009449005127,
|
37279 |
+
"logps/chosen": -0.8687729239463806,
|
37280 |
+
"logps/rejected": -2.67683482170105,
|
37281 |
+
"loss": 1.1116,
|
37282 |
+
"nll_loss": 1.0610023736953735,
|
37283 |
+
"rewards/accuracies": 0.65625,
|
37284 |
+
"rewards/chosen": -0.08687728643417358,
|
37285 |
+
"rewards/margins": 0.18080618977546692,
|
37286 |
+
"rewards/rejected": -0.2676834762096405,
|
37287 |
+
"step": 2071
|
37288 |
+
},
|
37289 |
+
{
|
37290 |
+
"epoch": 2.998078878969375,
|
37291 |
+
"grad_norm": 0.5611602663993835,
|
37292 |
+
"learning_rate": 2.8708537652688548e-11,
|
37293 |
+
"log_odds_chosen": 1.6254621744155884,
|
37294 |
+
"log_odds_ratio": -0.5093374252319336,
|
37295 |
+
"logits/chosen": -1.76937997341156,
|
37296 |
+
"logits/rejected": -1.670184850692749,
|
37297 |
+
"logps/chosen": -0.8887354135513306,
|
37298 |
+
"logps/rejected": -2.101914882659912,
|
37299 |
+
"loss": 1.1095,
|
37300 |
+
"nll_loss": 1.0585343837738037,
|
37301 |
+
"rewards/accuracies": 0.734375,
|
37302 |
+
"rewards/chosen": -0.08887353539466858,
|
37303 |
+
"rewards/margins": 0.12131794542074203,
|
37304 |
+
"rewards/rejected": -0.2101914882659912,
|
37305 |
+
"step": 2072
|
37306 |
}
|
37307 |
],
|
37308 |
"logging_steps": 1,
|