Training in progress, step 2862
Browse files
run-16/checkpoint-2862/tokenizer.json
CHANGED
@@ -1,11 +1,6 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
4 |
-
"direction": "Right",
|
5 |
-
"max_length": 512,
|
6 |
-
"strategy": "LongestFirst",
|
7 |
-
"stride": 0
|
8 |
-
},
|
9 |
"padding": null,
|
10 |
"added_tokens": [
|
11 |
{
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": null,
|
|
|
|
|
|
|
|
|
|
|
4 |
"padding": null,
|
5 |
"added_tokens": [
|
6 |
{
|
run-16/checkpoint-2862/trainer_state.json
CHANGED
@@ -19,9 +19,9 @@
|
|
19 |
"epoch": 1.0,
|
20 |
"eval_accuracy": 0.6696774193548387,
|
21 |
"eval_loss": 0.40919938683509827,
|
22 |
-
"eval_runtime": 1.
|
23 |
-
"eval_samples_per_second":
|
24 |
-
"eval_steps_per_second":
|
25 |
"step": 318
|
26 |
},
|
27 |
{
|
@@ -35,9 +35,9 @@
|
|
35 |
"epoch": 2.0,
|
36 |
"eval_accuracy": 0.844516129032258,
|
37 |
"eval_loss": 0.13953416049480438,
|
38 |
-
"eval_runtime": 1.
|
39 |
-
"eval_samples_per_second":
|
40 |
-
"eval_steps_per_second":
|
41 |
"step": 636
|
42 |
},
|
43 |
{
|
@@ -51,9 +51,9 @@
|
|
51 |
"epoch": 3.0,
|
52 |
"eval_accuracy": 0.8964516129032258,
|
53 |
"eval_loss": 0.0722731500864029,
|
54 |
-
"eval_runtime": 1.
|
55 |
-
"eval_samples_per_second":
|
56 |
-
"eval_steps_per_second":
|
57 |
"step": 954
|
58 |
},
|
59 |
{
|
@@ -67,9 +67,9 @@
|
|
67 |
"epoch": 4.0,
|
68 |
"eval_accuracy": 0.9180645161290323,
|
69 |
"eval_loss": 0.050948865711688995,
|
70 |
-
"eval_runtime": 1.
|
71 |
-
"eval_samples_per_second":
|
72 |
-
"eval_steps_per_second": 40.
|
73 |
"step": 1272
|
74 |
},
|
75 |
{
|
@@ -83,9 +83,9 @@
|
|
83 |
"epoch": 5.0,
|
84 |
"eval_accuracy": 0.9232258064516129,
|
85 |
"eval_loss": 0.042863838374614716,
|
86 |
-
"eval_runtime": 1.
|
87 |
-
"eval_samples_per_second":
|
88 |
-
"eval_steps_per_second":
|
89 |
"step": 1590
|
90 |
},
|
91 |
{
|
@@ -99,9 +99,9 @@
|
|
99 |
"epoch": 6.0,
|
100 |
"eval_accuracy": 0.9261290322580645,
|
101 |
"eval_loss": 0.03817891329526901,
|
102 |
-
"eval_runtime": 1.
|
103 |
-
"eval_samples_per_second":
|
104 |
-
"eval_steps_per_second":
|
105 |
"step": 1908
|
106 |
},
|
107 |
{
|
@@ -115,9 +115,9 @@
|
|
115 |
"epoch": 7.0,
|
116 |
"eval_accuracy": 0.9303225806451613,
|
117 |
"eval_loss": 0.035540465265512466,
|
118 |
-
"eval_runtime": 1.
|
119 |
-
"eval_samples_per_second":
|
120 |
-
"eval_steps_per_second":
|
121 |
"step": 2226
|
122 |
},
|
123 |
{
|
@@ -131,9 +131,9 @@
|
|
131 |
"epoch": 8.0,
|
132 |
"eval_accuracy": 0.932258064516129,
|
133 |
"eval_loss": 0.034126147627830505,
|
134 |
-
"eval_runtime": 1.
|
135 |
-
"eval_samples_per_second":
|
136 |
-
"eval_steps_per_second":
|
137 |
"step": 2544
|
138 |
},
|
139 |
{
|
@@ -161,11 +161,11 @@
|
|
161 |
"attributes": {}
|
162 |
}
|
163 |
},
|
164 |
-
"total_flos":
|
165 |
"train_batch_size": 48,
|
166 |
"trial_name": null,
|
167 |
"trial_params": {
|
168 |
-
"alpha": 0.
|
169 |
"num_train_epochs": 9,
|
170 |
"temperature": 2
|
171 |
}
|
|
|
19 |
"epoch": 1.0,
|
20 |
"eval_accuracy": 0.6696774193548387,
|
21 |
"eval_loss": 0.40919938683509827,
|
22 |
+
"eval_runtime": 1.562,
|
23 |
+
"eval_samples_per_second": 1984.622,
|
24 |
+
"eval_steps_per_second": 41.613,
|
25 |
"step": 318
|
26 |
},
|
27 |
{
|
|
|
35 |
"epoch": 2.0,
|
36 |
"eval_accuracy": 0.844516129032258,
|
37 |
"eval_loss": 0.13953416049480438,
|
38 |
+
"eval_runtime": 1.3987,
|
39 |
+
"eval_samples_per_second": 2216.309,
|
40 |
+
"eval_steps_per_second": 46.471,
|
41 |
"step": 636
|
42 |
},
|
43 |
{
|
|
|
51 |
"epoch": 3.0,
|
52 |
"eval_accuracy": 0.8964516129032258,
|
53 |
"eval_loss": 0.0722731500864029,
|
54 |
+
"eval_runtime": 1.4106,
|
55 |
+
"eval_samples_per_second": 2197.617,
|
56 |
+
"eval_steps_per_second": 46.079,
|
57 |
"step": 954
|
58 |
},
|
59 |
{
|
|
|
67 |
"epoch": 4.0,
|
68 |
"eval_accuracy": 0.9180645161290323,
|
69 |
"eval_loss": 0.050948865711688995,
|
70 |
+
"eval_runtime": 1.604,
|
71 |
+
"eval_samples_per_second": 1932.713,
|
72 |
+
"eval_steps_per_second": 40.525,
|
73 |
"step": 1272
|
74 |
},
|
75 |
{
|
|
|
83 |
"epoch": 5.0,
|
84 |
"eval_accuracy": 0.9232258064516129,
|
85 |
"eval_loss": 0.042863838374614716,
|
86 |
+
"eval_runtime": 1.6025,
|
87 |
+
"eval_samples_per_second": 1934.471,
|
88 |
+
"eval_steps_per_second": 40.561,
|
89 |
"step": 1590
|
90 |
},
|
91 |
{
|
|
|
99 |
"epoch": 6.0,
|
100 |
"eval_accuracy": 0.9261290322580645,
|
101 |
"eval_loss": 0.03817891329526901,
|
102 |
+
"eval_runtime": 1.6062,
|
103 |
+
"eval_samples_per_second": 1930.001,
|
104 |
+
"eval_steps_per_second": 40.468,
|
105 |
"step": 1908
|
106 |
},
|
107 |
{
|
|
|
115 |
"epoch": 7.0,
|
116 |
"eval_accuracy": 0.9303225806451613,
|
117 |
"eval_loss": 0.035540465265512466,
|
118 |
+
"eval_runtime": 1.4224,
|
119 |
+
"eval_samples_per_second": 2179.412,
|
120 |
+
"eval_steps_per_second": 45.697,
|
121 |
"step": 2226
|
122 |
},
|
123 |
{
|
|
|
131 |
"epoch": 8.0,
|
132 |
"eval_accuracy": 0.932258064516129,
|
133 |
"eval_loss": 0.034126147627830505,
|
134 |
+
"eval_runtime": 1.422,
|
135 |
+
"eval_samples_per_second": 2180.024,
|
136 |
+
"eval_steps_per_second": 45.71,
|
137 |
"step": 2544
|
138 |
},
|
139 |
{
|
|
|
161 |
"attributes": {}
|
162 |
}
|
163 |
},
|
164 |
+
"total_flos": 742838622388212.0,
|
165 |
"train_batch_size": 48,
|
166 |
"trial_name": null,
|
167 |
"trial_params": {
|
168 |
+
"alpha": 0.30033087334188296,
|
169 |
"num_train_epochs": 9,
|
170 |
"temperature": 2
|
171 |
}
|
run-16/checkpoint-2862/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c86f3d5ef41ce4b0c9fef4ceb006e512b2745b2eac6815d060edaed99ccb914
|
3 |
size 5368
|
runs/Dec21_12-52-13_P920/events.out.tfevents.1734762203.P920.912596.17
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fa933ecfb408cd33395883e029415cbf11cb8445429c4738b511bd3c4c615e5
|
3 |
+
size 17738
|