Training in progress, step 305, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f85ee4007de30cce47e53502885ad7a6303b7507013ef2ac0c367abd0d2db62
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98ccbba4e80f30bfb9eb8ba273a60067269331eebe7952e1fad15758917114fc
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87f85da328c2d350b694213a4eb2a046df41e793247213e45a460c5007fc25d8
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9ae4e393408096742838e5dcf898dde9c63683742cac0999c60bedcf0debf4c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2115,6 +2115,41 @@
|
|
2115 |
"learning_rate": 9.894140129553981e-05,
|
2116 |
"loss": 0.8469,
|
2117 |
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2118 |
}
|
2119 |
],
|
2120 |
"logging_steps": 1,
|
@@ -2134,7 +2169,7 @@
|
|
2134 |
"attributes": {}
|
2135 |
}
|
2136 |
},
|
2137 |
-
"total_flos": 3.
|
2138 |
"train_batch_size": 4,
|
2139 |
"trial_name": null,
|
2140 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.39469427369783244,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 305,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2115 |
"learning_rate": 9.894140129553981e-05,
|
2116 |
"loss": 0.8469,
|
2117 |
"step": 300
|
2118 |
+
},
|
2119 |
+
{
|
2120 |
+
"epoch": 0.38951795535425426,
|
2121 |
+
"grad_norm": 0.8275448083877563,
|
2122 |
+
"learning_rate": 9.893294587985843e-05,
|
2123 |
+
"loss": 0.8295,
|
2124 |
+
"step": 301
|
2125 |
+
},
|
2126 |
+
{
|
2127 |
+
"epoch": 0.3908120349401488,
|
2128 |
+
"grad_norm": 0.9197372198104858,
|
2129 |
+
"learning_rate": 9.892445719430493e-05,
|
2130 |
+
"loss": 0.9363,
|
2131 |
+
"step": 302
|
2132 |
+
},
|
2133 |
+
{
|
2134 |
+
"epoch": 0.3921061145260433,
|
2135 |
+
"grad_norm": 0.7137726545333862,
|
2136 |
+
"learning_rate": 9.891593524465083e-05,
|
2137 |
+
"loss": 0.8207,
|
2138 |
+
"step": 303
|
2139 |
+
},
|
2140 |
+
{
|
2141 |
+
"epoch": 0.3934001941119379,
|
2142 |
+
"grad_norm": 0.9908462762832642,
|
2143 |
+
"learning_rate": 9.890738003669029e-05,
|
2144 |
+
"loss": 0.9287,
|
2145 |
+
"step": 304
|
2146 |
+
},
|
2147 |
+
{
|
2148 |
+
"epoch": 0.39469427369783244,
|
2149 |
+
"grad_norm": 1.0070343017578125,
|
2150 |
+
"learning_rate": 9.889879157624002e-05,
|
2151 |
+
"loss": 1.2092,
|
2152 |
+
"step": 305
|
2153 |
}
|
2154 |
],
|
2155 |
"logging_steps": 1,
|
|
|
2169 |
"attributes": {}
|
2170 |
}
|
2171 |
},
|
2172 |
+
"total_flos": 3.4097083736653824e+17,
|
2173 |
"train_batch_size": 4,
|
2174 |
"trial_name": null,
|
2175 |
"trial_params": null
|