Training in progress, step 1600
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +22 -0
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18516456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f11513d52bcbabbebdae5c22382754c386d07651a25539e1cdcc0f4b4982a0f7
|
3 |
size 18516456
|
trainer_log.jsonl
CHANGED
@@ -332,3 +332,25 @@
|
|
332 |
{"current_steps": 1495, "total_steps": 3400, "loss": 0.5973, "lr": 6.391686172908506e-05, "epoch": 0.7700231779551893, "percentage": 43.97, "elapsed_time": "5:37:12", "remaining_time": "7:09:41", "throughput": 864.25, "total_tokens": 17485936}
|
333 |
{"current_steps": 1500, "total_steps": 3400, "loss": 0.6021, "lr": 6.368314950360415e-05, "epoch": 0.7725985063095545, "percentage": 44.12, "elapsed_time": "5:38:15", "remaining_time": "7:08:27", "throughput": 864.44, "total_tokens": 17544440}
|
334 |
{"current_steps": 1500, "total_steps": 3400, "eval_loss": 0.632923424243927, "epoch": 0.7725985063095545, "percentage": 44.12, "elapsed_time": "5:38:31", "remaining_time": "7:08:48", "throughput": 863.76, "total_tokens": 17544440}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
{"current_steps": 1495, "total_steps": 3400, "loss": 0.5973, "lr": 6.391686172908506e-05, "epoch": 0.7700231779551893, "percentage": 43.97, "elapsed_time": "5:37:12", "remaining_time": "7:09:41", "throughput": 864.25, "total_tokens": 17485936}
|
333 |
{"current_steps": 1500, "total_steps": 3400, "loss": 0.6021, "lr": 6.368314950360415e-05, "epoch": 0.7725985063095545, "percentage": 44.12, "elapsed_time": "5:38:15", "remaining_time": "7:08:27", "throughput": 864.44, "total_tokens": 17544440}
|
334 |
{"current_steps": 1500, "total_steps": 3400, "eval_loss": 0.632923424243927, "epoch": 0.7725985063095545, "percentage": 44.12, "elapsed_time": "5:38:31", "remaining_time": "7:08:48", "throughput": 863.76, "total_tokens": 17544440}
|
335 |
+
{"current_steps": 1505, "total_steps": 3400, "loss": 0.5779, "lr": 6.344911366961934e-05, "epoch": 0.7751738346639196, "percentage": 44.26, "elapsed_time": "5:39:39", "remaining_time": "7:07:40", "throughput": 863.76, "total_tokens": 17602952}
|
336 |
+
{"current_steps": 1510, "total_steps": 3400, "loss": 0.6707, "lr": 6.321475976211266e-05, "epoch": 0.7777491630182848, "percentage": 44.41, "elapsed_time": "5:40:42", "remaining_time": "7:06:27", "throughput": 863.95, "total_tokens": 17661440}
|
337 |
+
{"current_steps": 1515, "total_steps": 3400, "loss": 0.6326, "lr": 6.298009332358856e-05, "epoch": 0.78032449137265, "percentage": 44.56, "elapsed_time": "5:41:45", "remaining_time": "7:05:13", "throughput": 864.15, "total_tokens": 17719928}
|
338 |
+
{"current_steps": 1520, "total_steps": 3400, "loss": 0.6472, "lr": 6.274511990394294e-05, "epoch": 0.7828998197270152, "percentage": 44.71, "elapsed_time": "5:42:48", "remaining_time": "7:04:00", "throughput": 864.34, "total_tokens": 17778424}
|
339 |
+
{"current_steps": 1525, "total_steps": 3400, "loss": 0.6215, "lr": 6.250984506033183e-05, "epoch": 0.7854751480813804, "percentage": 44.85, "elapsed_time": "5:43:51", "remaining_time": "7:02:46", "throughput": 864.55, "total_tokens": 17836936}
|
340 |
+
{"current_steps": 1530, "total_steps": 3400, "loss": 0.6102, "lr": 6.227427435703997e-05, "epoch": 0.7880504764357456, "percentage": 45.0, "elapsed_time": "5:44:54", "remaining_time": "7:01:33", "throughput": 864.73, "total_tokens": 17895392}
|
341 |
+
{"current_steps": 1535, "total_steps": 3400, "loss": 0.6161, "lr": 6.203841336534924e-05, "epoch": 0.7906258047901107, "percentage": 45.15, "elapsed_time": "5:45:57", "remaining_time": "7:00:20", "throughput": 864.92, "total_tokens": 17953872}
|
342 |
+
{"current_steps": 1540, "total_steps": 3400, "loss": 0.6103, "lr": 6.180226766340688e-05, "epoch": 0.7932011331444759, "percentage": 45.29, "elapsed_time": "5:47:00", "remaining_time": "6:59:07", "throughput": 865.11, "total_tokens": 18012320}
|
343 |
+
{"current_steps": 1545, "total_steps": 3400, "loss": 0.5791, "lr": 6.156584283609359e-05, "epoch": 0.7957764614988411, "percentage": 45.44, "elapsed_time": "5:48:03", "remaining_time": "6:57:54", "throughput": 865.3, "total_tokens": 18070792}
|
344 |
+
{"current_steps": 1550, "total_steps": 3400, "loss": 0.667, "lr": 6.132914447489137e-05, "epoch": 0.7983517898532063, "percentage": 45.59, "elapsed_time": "5:49:06", "remaining_time": "6:56:40", "throughput": 865.5, "total_tokens": 18129304}
|
345 |
+
{"current_steps": 1550, "total_steps": 3400, "eval_loss": 0.6617516279220581, "epoch": 0.7983517898532063, "percentage": 45.59, "elapsed_time": "5:49:22", "remaining_time": "6:57:00", "throughput": 864.83, "total_tokens": 18129304}
|
346 |
+
{"current_steps": 1555, "total_steps": 3400, "loss": 0.5681, "lr": 6.109217817775139e-05, "epoch": 0.8009271182075715, "percentage": 45.74, "elapsed_time": "5:50:30", "remaining_time": "6:55:52", "throughput": 864.83, "total_tokens": 18187728}
|
347 |
+
{"current_steps": 1560, "total_steps": 3400, "loss": 0.6292, "lr": 6.085494954896156e-05, "epoch": 0.8035024465619367, "percentage": 45.88, "elapsed_time": "5:51:33", "remaining_time": "6:54:39", "throughput": 865.0, "total_tokens": 18246192}
|
348 |
+
{"current_steps": 1565, "total_steps": 3400, "loss": 0.6512, "lr": 6.061746419901388e-05, "epoch": 0.8060777749163018, "percentage": 46.03, "elapsed_time": "5:52:37", "remaining_time": "6:53:27", "throughput": 865.16, "total_tokens": 18304632}
|
349 |
+
{"current_steps": 1570, "total_steps": 3400, "loss": 0.5476, "lr": 6.0379727744471936e-05, "epoch": 0.808653103270667, "percentage": 46.18, "elapsed_time": "5:53:40", "remaining_time": "6:52:15", "throughput": 865.34, "total_tokens": 18363136}
|
350 |
+
{"current_steps": 1575, "total_steps": 3400, "loss": 0.5632, "lr": 6.014174580783794e-05, "epoch": 0.8112284316250322, "percentage": 46.32, "elapsed_time": "5:54:44", "remaining_time": "6:51:02", "throughput": 865.51, "total_tokens": 18421592}
|
351 |
+
{"current_steps": 1580, "total_steps": 3400, "loss": 0.6225, "lr": 5.990352401741981e-05, "epoch": 0.8138037599793974, "percentage": 46.47, "elapsed_time": "5:55:47", "remaining_time": "6:49:49", "throughput": 865.69, "total_tokens": 18480104}
|
352 |
+
{"current_steps": 1585, "total_steps": 3400, "loss": 0.5801, "lr": 5.9665068007197976e-05, "epoch": 0.8163790883337626, "percentage": 46.62, "elapsed_time": "5:56:50", "remaining_time": "6:48:37", "throughput": 865.85, "total_tokens": 18538600}
|
353 |
+
{"current_steps": 1590, "total_steps": 3400, "loss": 0.6364, "lr": 5.94263834166923e-05, "epoch": 0.8189544166881277, "percentage": 46.76, "elapsed_time": "5:57:53", "remaining_time": "6:47:25", "throughput": 866.03, "total_tokens": 18597104}
|
354 |
+
{"current_steps": 1595, "total_steps": 3400, "loss": 0.6088, "lr": 5.918747589082853e-05, "epoch": 0.8215297450424929, "percentage": 46.91, "elapsed_time": "5:58:57", "remaining_time": "6:46:12", "throughput": 866.21, "total_tokens": 18655584}
|
355 |
+
{"current_steps": 1600, "total_steps": 3400, "loss": 0.6564, "lr": 5.8948351079804875e-05, "epoch": 0.8241050733968581, "percentage": 47.06, "elapsed_time": "6:00:00", "remaining_time": "6:45:00", "throughput": 866.39, "total_tokens": 18714072}
|
356 |
+
{"current_steps": 1600, "total_steps": 3400, "eval_loss": 0.6319106221199036, "epoch": 0.8241050733968581, "percentage": 47.06, "elapsed_time": "6:00:16", "remaining_time": "6:45:18", "throughput": 865.75, "total_tokens": 18714072}
|