Training in progress, step 285, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bff5ea59540fe9486298d16b1c5e07e830cb7454df8e49fb5701941042f38970
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf8e705be49471bb0c8b7ab4e2654c27c1bbbac7d427de970d6341d4d6a51632
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bd08400b897a4c94f407fc6f069e4949e235fcc43163cb517cd39dcd6f04847
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00305a50c12f8c5ecbafd69a728a0a7449bc703aaaf50988de0f32f2f4bdb6e8
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1975,6 +1975,41 @@
|
|
1975 |
"learning_rate": 9.910351424831546e-05,
|
1976 |
"loss": 0.9349,
|
1977 |
"step": 280
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1978 |
}
|
1979 |
],
|
1980 |
"logging_steps": 1,
|
@@ -1994,7 +2029,7 @@
|
|
1994 |
"attributes": {}
|
1995 |
}
|
1996 |
},
|
1997 |
-
"total_flos": 3.
|
1998 |
"train_batch_size": 4,
|
1999 |
"trial_name": null,
|
2000 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.36881268197994177,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 285,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1975 |
"learning_rate": 9.910351424831546e-05,
|
1976 |
"loss": 0.9349,
|
1977 |
"step": 280
|
1978 |
+
},
|
1979 |
+
{
|
1980 |
+
"epoch": 0.36363636363636365,
|
1981 |
+
"grad_norm": 0.8846459984779358,
|
1982 |
+
"learning_rate": 9.909572540723202e-05,
|
1983 |
+
"loss": 0.7986,
|
1984 |
+
"step": 281
|
1985 |
+
},
|
1986 |
+
{
|
1987 |
+
"epoch": 0.36493044322225815,
|
1988 |
+
"grad_norm": 0.8271780610084534,
|
1989 |
+
"learning_rate": 9.908790318560146e-05,
|
1990 |
+
"loss": 0.8179,
|
1991 |
+
"step": 282
|
1992 |
+
},
|
1993 |
+
{
|
1994 |
+
"epoch": 0.3662245228081527,
|
1995 |
+
"grad_norm": 0.8694506883621216,
|
1996 |
+
"learning_rate": 9.908004758874216e-05,
|
1997 |
+
"loss": 0.8453,
|
1998 |
+
"step": 283
|
1999 |
+
},
|
2000 |
+
{
|
2001 |
+
"epoch": 0.3675186023940472,
|
2002 |
+
"grad_norm": 0.8625207543373108,
|
2003 |
+
"learning_rate": 9.90721586219952e-05,
|
2004 |
+
"loss": 0.7825,
|
2005 |
+
"step": 284
|
2006 |
+
},
|
2007 |
+
{
|
2008 |
+
"epoch": 0.36881268197994177,
|
2009 |
+
"grad_norm": 0.8270084261894226,
|
2010 |
+
"learning_rate": 9.906423629072434e-05,
|
2011 |
+
"loss": 1.0889,
|
2012 |
+
"step": 285
|
2013 |
}
|
2014 |
],
|
2015 |
"logging_steps": 1,
|
|
|
2029 |
"attributes": {}
|
2030 |
}
|
2031 |
},
|
2032 |
+
"total_flos": 3.186120939326669e+17,
|
2033 |
"train_batch_size": 4,
|
2034 |
"trial_name": null,
|
2035 |
"trial_params": null
|