CMLL commited on
Commit
30c199d
·
verified ·
1 Parent(s): 4a9e478

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +3 -3
  2. adapter_model.safetensors +1 -1
  3. all_results.json +4 -4
  4. checkpoint-100/adapter_model.safetensors +1 -1
  5. checkpoint-100/optimizer.pt +1 -1
  6. checkpoint-100/scheduler.pt +1 -1
  7. checkpoint-100/trainer_state.json +40 -40
  8. checkpoint-100/training_args.bin +1 -1
  9. checkpoint-1000/adapter_model.safetensors +1 -1
  10. checkpoint-1000/optimizer.pt +1 -1
  11. checkpoint-1000/rng_state.pth +1 -1
  12. checkpoint-1000/scheduler.pt +1 -1
  13. checkpoint-1000/trainer_state.json +400 -400
  14. checkpoint-1000/training_args.bin +1 -1
  15. checkpoint-1100/adapter_model.safetensors +1 -1
  16. checkpoint-1100/optimizer.pt +1 -1
  17. checkpoint-1100/rng_state.pth +1 -1
  18. checkpoint-1100/scheduler.pt +1 -1
  19. checkpoint-1100/trainer_state.json +440 -440
  20. checkpoint-1100/training_args.bin +1 -1
  21. checkpoint-1200/adapter_model.safetensors +1 -1
  22. checkpoint-1200/optimizer.pt +1 -1
  23. checkpoint-1200/rng_state.pth +1 -1
  24. checkpoint-1200/scheduler.pt +1 -1
  25. checkpoint-1200/trainer_state.json +480 -480
  26. checkpoint-1200/training_args.bin +1 -1
  27. checkpoint-1300/adapter_model.safetensors +1 -1
  28. checkpoint-1300/optimizer.pt +1 -1
  29. checkpoint-1300/rng_state.pth +1 -1
  30. checkpoint-1300/scheduler.pt +1 -1
  31. checkpoint-1300/trainer_state.json +520 -520
  32. checkpoint-1300/training_args.bin +1 -1
  33. checkpoint-1400/adapter_model.safetensors +1 -1
  34. checkpoint-1400/optimizer.pt +1 -1
  35. checkpoint-1400/rng_state.pth +1 -1
  36. checkpoint-1400/scheduler.pt +1 -1
  37. checkpoint-1400/trainer_state.json +560 -560
  38. checkpoint-1400/training_args.bin +1 -1
  39. checkpoint-1500/adapter_model.safetensors +1 -1
  40. checkpoint-1500/optimizer.pt +1 -1
  41. checkpoint-1500/rng_state.pth +1 -1
  42. checkpoint-1500/scheduler.pt +1 -1
  43. checkpoint-1500/trainer_state.json +600 -600
  44. checkpoint-1500/training_args.bin +1 -1
  45. checkpoint-1600/adapter_model.safetensors +1 -1
  46. checkpoint-1600/optimizer.pt +1 -1
  47. checkpoint-1600/rng_state.pth +1 -1
  48. checkpoint-1600/scheduler.pt +1 -1
  49. checkpoint-1600/trainer_state.json +640 -640
  50. checkpoint-1600/training_args.bin +1 -1
README.md CHANGED
@@ -7,14 +7,14 @@ tags:
7
  - generated_from_trainer
8
  base_model: Qwen/Qwen1.5-0.5B-Chat
9
  model-index:
10
- - name: train_2024-02-22-01-50-49
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # train_2024-02-22-01-50-49
18
 
19
  This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat) on the TCM dataset.
20
 
@@ -35,7 +35,7 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 2e-05
39
  - train_batch_size: 4
40
  - eval_batch_size: 8
41
  - seed: 42
 
7
  - generated_from_trainer
8
  base_model: Qwen/Qwen1.5-0.5B-Chat
9
  model-index:
10
+ - name: train_2024-02-22-16
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # train_2024-02-22-16
18
 
19
  This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat) on the TCM dataset.
20
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 5e-05
39
  - train_batch_size: 4
40
  - eval_batch_size: 8
41
  - seed: 42
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a72061ac045d65f070c70835e3ef8fbeee8b539105c37d935cb148d8a1c243c
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b54d33d6a2dcc2d74fa883dc6ad486fa65f1646a4586664cdaddb91b171a149
3
  size 3158328
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 2.635484080746734,
4
- "train_runtime": 8118.4972,
5
- "train_samples_per_second": 14.744,
6
- "train_steps_per_second": 0.922
7
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 2.352705135602933,
4
+ "train_runtime": 8221.6909,
5
+ "train_samples_per_second": 14.559,
6
+ "train_steps_per_second": 0.91
7
  }
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47cb2f49992f5a7aaef8bdc14b1dfc4ab17a01fc867360b66a8f38be93fcc27f
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6451fe7eb7e7465c00a064da2078e81ed4fc291ef9364056891d1845385673
3
  size 3158328
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0254f7f39d0c7383561d56725c864a1d53a3a3c738e786cd4b9acdc8dc921347
3
  size 6372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dd493eb016f0614f29b4db9029bd6040bb0956e8f6031ed6ec7a433bd0f8900
3
  size 6372346
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af875236f293f3366cb3d9c3c7f77550b6d835517e2e3242699c4c71898cef49
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a40310012af07efbd7aed7d72e21741500df9d564c341c2396d7011df171596
3
  size 1064
checkpoint-100/trainer_state.json CHANGED
@@ -10,122 +10,122 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.999997796189788e-05,
14
- "loss": 2.7465,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
- "learning_rate": 1.9999911847688657e-05,
20
- "loss": 2.6877,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 1.999980165766374e-05,
26
- "loss": 2.6848,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
- "learning_rate": 1.9999647392308798e-05,
32
- "loss": 2.7677,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
- "learning_rate": 1.9999449052303777e-05,
38
- "loss": 2.7784,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
- "learning_rate": 1.9999206638522888e-05,
44
- "loss": 2.8161,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
- "learning_rate": 1.9998920152034595e-05,
50
- "loss": 2.6752,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
- "learning_rate": 1.9998589594101623e-05,
56
- "loss": 2.6403,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
- "learning_rate": 1.9998214966180948e-05,
62
- "loss": 2.6433,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
- "learning_rate": 1.999779626992378e-05,
68
- "loss": 2.5787,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
- "learning_rate": 1.9997333507175583e-05,
74
- "loss": 2.6812,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
- "learning_rate": 1.9996826679976033e-05,
80
- "loss": 2.7287,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
- "learning_rate": 1.9996275790559037e-05,
86
- "loss": 2.6465,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
- "learning_rate": 1.99956808413527e-05,
92
- "loss": 2.5268,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
- "learning_rate": 1.999504183497934e-05,
98
- "loss": 2.6893,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
- "learning_rate": 1.9994358774255444e-05,
104
- "loss": 2.6274,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
- "learning_rate": 1.9993631662191696e-05,
110
- "loss": 2.6232,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
- "learning_rate": 1.9992860501992924e-05,
116
- "loss": 2.7188,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
- "learning_rate": 1.9992045297058108e-05,
122
- "loss": 2.5388,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
- "learning_rate": 1.9991186050980366e-05,
128
- "loss": 2.6793,
129
  "step": 100
130
  }
131
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 4.99999449047447e-05,
14
+ "loss": 2.7413,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
+ "learning_rate": 4.9999779619221645e-05,
20
+ "loss": 2.6684,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 4.999950414415935e-05,
26
+ "loss": 2.6511,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
+ "learning_rate": 4.999911848077199e-05,
32
+ "loss": 2.72,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
+ "learning_rate": 4.999862263075944e-05,
38
+ "loss": 2.7222,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 4.999801659630722e-05,
44
+ "loss": 2.7479,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
+ "learning_rate": 4.999730038008649e-05,
50
+ "loss": 2.6063,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
+ "learning_rate": 4.9996473985254055e-05,
56
+ "loss": 2.576,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
+ "learning_rate": 4.999553741545237e-05,
62
+ "loss": 2.579,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
+ "learning_rate": 4.999449067480945e-05,
68
+ "loss": 2.5114,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
+ "learning_rate": 4.9993333767938954e-05,
74
+ "loss": 2.6174,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
+ "learning_rate": 4.9992066699940085e-05,
80
+ "loss": 2.6581,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
+ "learning_rate": 4.9990689476397586e-05,
86
+ "loss": 2.586,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
+ "learning_rate": 4.998920210338175e-05,
92
+ "loss": 2.4642,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
+ "learning_rate": 4.9987604587448343e-05,
98
+ "loss": 2.6262,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
+ "learning_rate": 4.998589693563861e-05,
104
+ "loss": 2.5678,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
+ "learning_rate": 4.998407915547924e-05,
110
+ "loss": 2.5597,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
+ "learning_rate": 4.9982151254982304e-05,
116
+ "loss": 2.6578,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
+ "learning_rate": 4.998011324264527e-05,
122
+ "loss": 2.4825,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
+ "learning_rate": 4.997796512745091e-05,
128
+ "loss": 2.6197,
129
  "step": 100
130
  }
131
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c481a87016b183d9406558b8812ee05f00d60a3c721055833e3cdda34cf9bb26
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b42f69ee6ce9030b2875b2859f3499c1e1c8f11e9ffa417d0dbdab2b51173da
3
  size 4920
checkpoint-1000/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daa3ace4db8788cb931ef75e75be426e406a0c3385c5ed0daee45e45870e81fc
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5b675d3e7ec1a779b8012f9dcbd83c98ffcb47cd6819b92275a68e32b94920f
3
  size 3158328
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e77b0f72e8de4935175f876165c3e5b30af28872e5cf0f93f3175b50a7142f51
3
  size 6372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23a39341f0721091475bf06f2e4d54a467f4fe2a2f69c6fce2afb2d8ec68196f
3
  size 6372346
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2c0360ce941ee730c6f2a46af569dcd5ffb08bf45fafc080b3769edd586dde9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7acf51402f91f1239e4ad985d4685f0b3be92dd192090a0bd41e3db3711b157f
3
  size 14244
checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbb15b708822e2f21bbe2b8b32e42ae6f47a448e81da506799b4867a45b68e6a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86de478e9bf42a74f4dc8332822a04a6a8582e87a1e4dba535e120377810c392
3
  size 1064
checkpoint-1000/trainer_state.json CHANGED
@@ -10,1202 +10,1202 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.999997796189788e-05,
14
- "loss": 2.7465,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
- "learning_rate": 1.9999911847688657e-05,
20
- "loss": 2.6877,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 1.999980165766374e-05,
26
- "loss": 2.6848,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
- "learning_rate": 1.9999647392308798e-05,
32
- "loss": 2.7677,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
- "learning_rate": 1.9999449052303777e-05,
38
- "loss": 2.7784,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
- "learning_rate": 1.9999206638522888e-05,
44
- "loss": 2.8161,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
- "learning_rate": 1.9998920152034595e-05,
50
- "loss": 2.6752,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
- "learning_rate": 1.9998589594101623e-05,
56
- "loss": 2.6403,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
- "learning_rate": 1.9998214966180948e-05,
62
- "loss": 2.6433,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
- "learning_rate": 1.999779626992378e-05,
68
- "loss": 2.5787,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
- "learning_rate": 1.9997333507175583e-05,
74
- "loss": 2.6812,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
- "learning_rate": 1.9996826679976033e-05,
80
- "loss": 2.7287,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
- "learning_rate": 1.9996275790559037e-05,
86
- "loss": 2.6465,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
- "learning_rate": 1.99956808413527e-05,
92
- "loss": 2.5268,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
- "learning_rate": 1.999504183497934e-05,
98
- "loss": 2.6893,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
- "learning_rate": 1.9994358774255444e-05,
104
- "loss": 2.6274,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
- "learning_rate": 1.9993631662191696e-05,
110
- "loss": 2.6232,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
- "learning_rate": 1.9992860501992924e-05,
116
- "loss": 2.7188,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
- "learning_rate": 1.9992045297058108e-05,
122
- "loss": 2.5388,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
- "learning_rate": 1.9991186050980366e-05,
128
- "loss": 2.6793,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
- "learning_rate": 1.9990282767546926e-05,
134
- "loss": 2.5523,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
- "learning_rate": 1.998933545073912e-05,
140
- "loss": 2.5763,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
- "learning_rate": 1.998834410473236e-05,
146
- "loss": 2.6447,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
- "learning_rate": 1.998730873389612e-05,
152
- "loss": 2.5579,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
- "learning_rate": 1.998622934279393e-05,
158
- "loss": 2.5884,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
- "learning_rate": 1.9985105936183327e-05,
164
- "loss": 2.5051,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
- "learning_rate": 1.9983938519015868e-05,
170
- "loss": 2.6014,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
- "learning_rate": 1.998272709643708e-05,
176
- "loss": 2.5878,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
- "learning_rate": 1.998147167378645e-05,
182
- "loss": 2.6642,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
- "learning_rate": 1.998017225659742e-05,
188
- "loss": 2.5077,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
- "learning_rate": 1.9978828850597312e-05,
194
- "loss": 2.5921,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
- "learning_rate": 1.9977441461707358e-05,
200
- "loss": 2.5577,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
- "learning_rate": 1.9976010096042634e-05,
206
- "loss": 2.524,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
- "learning_rate": 1.9974534759912068e-05,
212
- "loss": 2.5708,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
- "learning_rate": 1.997301545981837e-05,
218
- "loss": 2.5578,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
- "learning_rate": 1.9971452202458048e-05,
224
- "loss": 2.6874,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
- "learning_rate": 1.9969844994721338e-05,
230
- "loss": 2.535,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
- "learning_rate": 1.996819384369221e-05,
236
- "loss": 2.5816,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
- "learning_rate": 1.9966498756648305e-05,
242
- "loss": 2.6225,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
- "learning_rate": 1.9964759741060926e-05,
248
- "loss": 2.5387,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
- "learning_rate": 1.9962976804594993e-05,
254
- "loss": 2.524,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
- "learning_rate": 1.996114995510901e-05,
260
- "loss": 2.488,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
- "learning_rate": 1.9959279200655044e-05,
266
- "loss": 2.5824,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
- "learning_rate": 1.9957364549478663e-05,
272
- "loss": 2.5828,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
- "learning_rate": 1.9955406010018928e-05,
278
- "loss": 2.5137,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
- "learning_rate": 1.9953403590908334e-05,
284
- "loss": 2.5539,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
- "learning_rate": 1.995135730097278e-05,
290
- "loss": 2.6099,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
- "learning_rate": 1.994926714923155e-05,
296
- "loss": 2.5309,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
- "learning_rate": 1.9947133144897225e-05,
302
- "loss": 2.5152,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
- "learning_rate": 1.9944955297375693e-05,
308
- "loss": 2.4738,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
- "learning_rate": 1.9942733616266076e-05,
314
- "loss": 2.5173,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
- "learning_rate": 1.99404681113607e-05,
320
- "loss": 2.578,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
- "learning_rate": 1.993815879264506e-05,
326
- "loss": 2.5089,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
- "learning_rate": 1.9935805670297744e-05,
332
- "loss": 2.5872,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
- "learning_rate": 1.993340875469043e-05,
338
- "loss": 2.5882,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
- "learning_rate": 1.993096805638781e-05,
344
- "loss": 2.5789,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
- "learning_rate": 1.9928483586147553e-05,
350
- "loss": 2.488,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
- "learning_rate": 1.9925955354920265e-05,
356
- "loss": 2.617,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
- "learning_rate": 1.992338337384943e-05,
362
- "loss": 2.5752,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
- "learning_rate": 1.992076765427136e-05,
368
- "loss": 2.5529,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
- "learning_rate": 1.9918108207715156e-05,
374
- "loss": 2.4942,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
- "learning_rate": 1.991540504590265e-05,
380
- "loss": 2.5456,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
- "learning_rate": 1.991265818074835e-05,
386
- "loss": 2.5957,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
- "learning_rate": 1.99098676243594e-05,
392
- "loss": 2.5699,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
- "learning_rate": 1.9907033389035512e-05,
398
- "loss": 2.5544,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
- "learning_rate": 1.9904155487268912e-05,
404
- "loss": 2.538,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
- "learning_rate": 1.990123393174431e-05,
410
- "loss": 2.6055,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
- "learning_rate": 1.9898268735338807e-05,
416
- "loss": 2.5846,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
- "learning_rate": 1.9895259911121866e-05,
422
- "loss": 2.5405,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
- "learning_rate": 1.9892207472355243e-05,
428
- "loss": 2.5162,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
- "learning_rate": 1.988911143249292e-05,
434
- "loss": 2.5484,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
- "learning_rate": 1.9885971805181083e-05,
440
- "loss": 2.671,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
- "learning_rate": 1.9882788604258e-05,
446
- "loss": 2.5696,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
- "learning_rate": 1.987956184375402e-05,
452
- "loss": 2.5021,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
- "learning_rate": 1.9876291537891482e-05,
458
- "loss": 2.5644,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
- "learning_rate": 1.9872977701084645e-05,
464
- "loss": 2.5386,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
- "learning_rate": 1.9869620347939652e-05,
470
- "loss": 2.554,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
- "learning_rate": 1.9866219493254433e-05,
476
- "loss": 2.4798,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
- "learning_rate": 1.986277515201867e-05,
482
- "loss": 2.538,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
- "learning_rate": 1.9859287339413714e-05,
488
- "loss": 2.5041,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
- "learning_rate": 1.9855756070812514e-05,
494
- "loss": 2.7067,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
- "learning_rate": 1.9852181361779563e-05,
500
- "loss": 2.5054,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
- "learning_rate": 1.984856322807082e-05,
506
- "loss": 2.4815,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
- "learning_rate": 1.9844901685633648e-05,
512
- "loss": 2.5885,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
- "learning_rate": 1.9841196750606735e-05,
518
- "loss": 2.521,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
- "learning_rate": 1.9837448439320027e-05,
524
- "loss": 2.4937,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
- "learning_rate": 1.983365676829466e-05,
530
- "loss": 2.5877,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
- "learning_rate": 1.9829821754242885e-05,
536
- "loss": 2.5761,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
- "learning_rate": 1.9825943414067974e-05,
542
- "loss": 2.4917,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
- "learning_rate": 1.9822021764864194e-05,
548
- "loss": 2.5434,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
- "learning_rate": 1.9818056823916675e-05,
554
- "loss": 2.5906,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
- "learning_rate": 1.9814048608701374e-05,
560
- "loss": 2.5508,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
- "learning_rate": 1.980999713688499e-05,
566
- "loss": 2.6005,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
- "learning_rate": 1.980590242632486e-05,
572
- "loss": 2.523,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
- "learning_rate": 1.9801764495068923e-05,
578
- "loss": 2.5453,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
- "learning_rate": 1.979758336135561e-05,
584
- "loss": 2.6426,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
- "learning_rate": 1.9793359043613768e-05,
590
- "loss": 2.5454,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
- "learning_rate": 1.9789091560462587e-05,
596
- "loss": 2.6071,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
- "learning_rate": 1.9784780930711514e-05,
602
- "loss": 2.5913,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
- "learning_rate": 1.9780427173360165e-05,
608
- "loss": 2.5082,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
- "learning_rate": 1.977603030759825e-05,
614
- "loss": 2.565,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
- "learning_rate": 1.977159035280549e-05,
620
- "loss": 2.6365,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
- "learning_rate": 1.9767107328551515e-05,
626
- "loss": 2.6872,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
- "learning_rate": 1.9762581254595797e-05,
632
- "loss": 2.6222,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
- "learning_rate": 1.975801215088755e-05,
638
- "loss": 2.5633,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
- "learning_rate": 1.9753400037565653e-05,
644
- "loss": 2.6579,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
- "learning_rate": 1.9748744934958548e-05,
650
- "loss": 2.5572,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
- "learning_rate": 1.974404686358416e-05,
656
- "loss": 2.5845,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
- "learning_rate": 1.97393058441498e-05,
662
- "loss": 2.6484,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
- "learning_rate": 1.973452189755209e-05,
668
- "loss": 2.5473,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
- "learning_rate": 1.9729695044876847e-05,
674
- "loss": 2.5995,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
- "learning_rate": 1.9724825307399003e-05,
680
- "loss": 2.5473,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
- "learning_rate": 1.971991270658252e-05,
686
- "loss": 2.5798,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
- "learning_rate": 1.971495726408027e-05,
692
- "loss": 2.655,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
- "learning_rate": 1.970995900173397e-05,
698
- "loss": 2.6904,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
- "learning_rate": 1.9704917941574053e-05,
704
- "loss": 2.6973,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
- "learning_rate": 1.969983410581961e-05,
710
- "loss": 2.6518,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
- "learning_rate": 1.969470751687825e-05,
716
- "loss": 2.5849,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
- "learning_rate": 1.9689538197346035e-05,
722
- "loss": 2.4715,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
- "learning_rate": 1.9684326170007365e-05,
728
- "loss": 2.7246,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
- "learning_rate": 1.9679071457834874e-05,
734
- "loss": 2.5482,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
- "learning_rate": 1.967377408398934e-05,
740
- "loss": 2.6084,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
- "learning_rate": 1.966843407181958e-05,
746
- "loss": 2.5144,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
- "learning_rate": 1.9663051444862335e-05,
752
- "loss": 2.7663,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
- "learning_rate": 1.9657626226842187e-05,
758
- "loss": 2.5697,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
- "learning_rate": 1.9652158441671435e-05,
764
- "loss": 2.6379,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
- "learning_rate": 1.964664811345e-05,
770
- "loss": 2.6784,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
- "learning_rate": 1.964109526646532e-05,
776
- "loss": 2.6936,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
- "learning_rate": 1.963549992519223e-05,
782
- "loss": 2.5672,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
- "learning_rate": 1.962986211429288e-05,
788
- "loss": 2.62,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
- "learning_rate": 1.9624181858616593e-05,
794
- "loss": 2.7293,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
- "learning_rate": 1.9618459183199782e-05,
800
- "loss": 2.6636,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
- "learning_rate": 1.961269411326583e-05,
806
- "loss": 2.5106,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
- "learning_rate": 1.9606886674224977e-05,
812
- "loss": 2.6878,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
- "learning_rate": 1.960103689167421e-05,
818
- "loss": 2.693,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
- "learning_rate": 1.9595144791397142e-05,
824
- "loss": 2.6562,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
- "learning_rate": 1.9589210399363925e-05,
830
- "loss": 2.6269,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
- "learning_rate": 1.95832337417311e-05,
836
- "loss": 2.6459,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
- "learning_rate": 1.9577214844841515e-05,
842
- "loss": 2.5765,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
- "learning_rate": 1.957115373522417e-05,
848
- "loss": 2.6113,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
- "learning_rate": 1.956505043959414e-05,
854
- "loss": 2.6641,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
- "learning_rate": 1.955890498485244e-05,
860
- "loss": 2.6493,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
- "learning_rate": 1.9552717398085898e-05,
866
- "loss": 2.6135,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
- "learning_rate": 1.954648770656705e-05,
872
- "loss": 2.6182,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
- "learning_rate": 1.954021593775401e-05,
878
- "loss": 2.6487,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
- "learning_rate": 1.9533902119290352e-05,
884
- "loss": 2.5927,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
- "learning_rate": 1.952754627900499e-05,
890
- "loss": 2.5825,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
- "learning_rate": 1.9521148444912065e-05,
896
- "loss": 2.7193,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
- "learning_rate": 1.9514708645210793e-05,
902
- "loss": 2.5802,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
- "learning_rate": 1.9508226908285368e-05,
908
- "loss": 2.6628,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
- "learning_rate": 1.950170326270483e-05,
914
- "loss": 2.5847,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
- "learning_rate": 1.9495137737222925e-05,
920
- "loss": 2.6594,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
- "learning_rate": 1.9488530360778007e-05,
926
- "loss": 2.6096,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
- "learning_rate": 1.948188116249287e-05,
932
- "loss": 2.6378,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
- "learning_rate": 1.9475190171674675e-05,
938
- "loss": 2.5984,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
- "learning_rate": 1.9468457417814753e-05,
944
- "loss": 2.6437,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
- "learning_rate": 1.9461682930588534e-05,
950
- "loss": 2.6522,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
- "learning_rate": 1.9454866739855384e-05,
956
- "loss": 2.6242,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
- "learning_rate": 1.944800887565849e-05,
962
- "loss": 2.5961,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
- "learning_rate": 1.9441109368224704e-05,
968
- "loss": 2.6365,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
- "learning_rate": 1.9434168247964447e-05,
974
- "loss": 2.5674,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
- "learning_rate": 1.9427185545471537e-05,
980
- "loss": 2.6369,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
- "learning_rate": 1.9420161291523076e-05,
986
- "loss": 2.5763,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
- "learning_rate": 1.941309551707931e-05,
992
- "loss": 2.5651,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
- "learning_rate": 1.9405988253283492e-05,
998
- "loss": 2.6223,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
- "learning_rate": 1.939883953146174e-05,
1004
- "loss": 2.6616,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
- "learning_rate": 1.939164938312291e-05,
1010
- "loss": 2.6496,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
- "learning_rate": 1.9384417839958443e-05,
1016
- "loss": 2.7161,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
- "learning_rate": 1.937714493384224e-05,
1022
- "loss": 2.6047,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
- "learning_rate": 1.936983069683051e-05,
1028
- "loss": 2.5978,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
- "learning_rate": 1.936247516116163e-05,
1034
- "loss": 2.6331,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
- "learning_rate": 1.935507835925601e-05,
1040
- "loss": 2.5679,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
- "learning_rate": 1.934764032371595e-05,
1046
- "loss": 2.6647,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
- "learning_rate": 1.9340161087325483e-05,
1052
- "loss": 2.6142,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
- "learning_rate": 1.9332640683050243e-05,
1058
- "loss": 2.6181,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
- "learning_rate": 1.932507914403732e-05,
1064
- "loss": 2.6105,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
- "learning_rate": 1.9317476503615108e-05,
1070
- "loss": 2.6415,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
- "learning_rate": 1.9309832795293156e-05,
1076
- "loss": 2.6736,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
- "learning_rate": 1.930214805276204e-05,
1082
- "loss": 2.6507,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
- "learning_rate": 1.9294422309893177e-05,
1088
- "loss": 2.6771,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
- "learning_rate": 1.9286655600738707e-05,
1094
- "loss": 2.6763,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
- "learning_rate": 1.9278847959531348e-05,
1100
- "loss": 2.7135,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
- "learning_rate": 1.927099942068421e-05,
1106
- "loss": 2.6166,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
- "learning_rate": 1.9263110018790673e-05,
1112
- "loss": 2.6238,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
- "learning_rate": 1.9255179788624233e-05,
1118
- "loss": 2.6424,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
- "learning_rate": 1.9247208765138325e-05,
1124
- "loss": 2.6023,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
- "learning_rate": 1.9239196983466204e-05,
1130
- "loss": 2.58,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
- "learning_rate": 1.9231144478920756e-05,
1136
- "loss": 2.6173,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
- "learning_rate": 1.9223051286994368e-05,
1142
- "loss": 2.628,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
- "learning_rate": 1.9214917443358753e-05,
1148
- "loss": 2.6868,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
- "learning_rate": 1.9206742983864813e-05,
1154
- "loss": 2.6342,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
- "learning_rate": 1.9198527944542462e-05,
1160
- "loss": 2.5934,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
- "learning_rate": 1.919027236160047e-05,
1166
- "loss": 2.6354,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
- "learning_rate": 1.9181976271426315e-05,
1172
- "loss": 2.5955,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
- "learning_rate": 1.9173639710586015e-05,
1178
- "loss": 2.6134,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
- "learning_rate": 1.9165262715823966e-05,
1184
- "loss": 2.7,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
- "learning_rate": 1.915684532406278e-05,
1190
- "loss": 2.7197,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
- "learning_rate": 1.9148387572403123e-05,
1196
- "loss": 2.4881,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
- "learning_rate": 1.913988949812356e-05,
1202
- "loss": 2.6999,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
- "learning_rate": 1.9131351138680368e-05,
1208
- "loss": 2.5981,
1209
  "step": 1000
1210
  }
1211
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 4.99999449047447e-05,
14
+ "loss": 2.7413,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
+ "learning_rate": 4.9999779619221645e-05,
20
+ "loss": 2.6684,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 4.999950414415935e-05,
26
+ "loss": 2.6511,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
+ "learning_rate": 4.999911848077199e-05,
32
+ "loss": 2.72,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
+ "learning_rate": 4.999862263075944e-05,
38
+ "loss": 2.7222,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 4.999801659630722e-05,
44
+ "loss": 2.7479,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
+ "learning_rate": 4.999730038008649e-05,
50
+ "loss": 2.6063,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
+ "learning_rate": 4.9996473985254055e-05,
56
+ "loss": 2.576,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
+ "learning_rate": 4.999553741545237e-05,
62
+ "loss": 2.579,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
+ "learning_rate": 4.999449067480945e-05,
68
+ "loss": 2.5114,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
+ "learning_rate": 4.9993333767938954e-05,
74
+ "loss": 2.6174,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
+ "learning_rate": 4.9992066699940085e-05,
80
+ "loss": 2.6581,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
+ "learning_rate": 4.9990689476397586e-05,
86
+ "loss": 2.586,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
+ "learning_rate": 4.998920210338175e-05,
92
+ "loss": 2.4642,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
+ "learning_rate": 4.9987604587448343e-05,
98
+ "loss": 2.6262,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
+ "learning_rate": 4.998589693563861e-05,
104
+ "loss": 2.5678,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
+ "learning_rate": 4.998407915547924e-05,
110
+ "loss": 2.5597,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
+ "learning_rate": 4.9982151254982304e-05,
116
+ "loss": 2.6578,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
+ "learning_rate": 4.998011324264527e-05,
122
+ "loss": 2.4825,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
+ "learning_rate": 4.997796512745091e-05,
128
+ "loss": 2.6197,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
+ "learning_rate": 4.997570691886732e-05,
134
+ "loss": 2.4921,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
+ "learning_rate": 4.99733386268478e-05,
140
+ "loss": 2.5126,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
+ "learning_rate": 4.99708602618309e-05,
146
+ "loss": 2.584,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
+ "learning_rate": 4.9968271834740305e-05,
152
+ "loss": 2.4991,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
+ "learning_rate": 4.996557335698482e-05,
158
+ "loss": 2.5242,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
+ "learning_rate": 4.996276484045832e-05,
164
+ "loss": 2.4506,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
+ "learning_rate": 4.995984629753967e-05,
170
+ "loss": 2.5479,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
+ "learning_rate": 4.9956817741092696e-05,
176
+ "loss": 2.5316,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
+ "learning_rate": 4.995367918446613e-05,
182
+ "loss": 2.6053,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
+ "learning_rate": 4.995043064149354e-05,
188
+ "loss": 2.4533,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
+ "learning_rate": 4.9947072126493276e-05,
194
+ "loss": 2.5279,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
+ "learning_rate": 4.994360365426839e-05,
200
+ "loss": 2.4994,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
+ "learning_rate": 4.994002524010659e-05,
206
+ "loss": 2.4675,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
+ "learning_rate": 4.9936336899780166e-05,
212
+ "loss": 2.5233,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
+ "learning_rate": 4.993253864954592e-05,
218
+ "loss": 2.4974,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
+ "learning_rate": 4.992863050614511e-05,
224
+ "loss": 2.6295,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
+ "learning_rate": 4.9924612486803346e-05,
230
+ "loss": 2.4744,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
+ "learning_rate": 4.992048460923052e-05,
236
+ "loss": 2.5248,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
+ "learning_rate": 4.991624689162076e-05,
242
+ "loss": 2.5727,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
+ "learning_rate": 4.991189935265231e-05,
248
+ "loss": 2.4845,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
+ "learning_rate": 4.990744201148748e-05,
254
+ "loss": 2.4544,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
+ "learning_rate": 4.990287488777253e-05,
260
+ "loss": 2.4383,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
+ "learning_rate": 4.989819800163761e-05,
266
+ "loss": 2.5263,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
+ "learning_rate": 4.989341137369666e-05,
272
+ "loss": 2.5219,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
+ "learning_rate": 4.9888515025047316e-05,
278
+ "loss": 2.4512,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
+ "learning_rate": 4.988350897727083e-05,
284
+ "loss": 2.491,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
+ "learning_rate": 4.9878393252431953e-05,
290
+ "loss": 2.5404,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
+ "learning_rate": 4.987316787307888e-05,
296
+ "loss": 2.4777,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
+ "learning_rate": 4.9867832862243055e-05,
302
+ "loss": 2.4498,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
+ "learning_rate": 4.986238824343923e-05,
308
+ "loss": 2.4174,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
+ "learning_rate": 4.985683404066519e-05,
314
+ "loss": 2.4589,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
+ "learning_rate": 4.985117027840175e-05,
320
+ "loss": 2.5029,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
+ "learning_rate": 4.984539698161264e-05,
326
+ "loss": 2.4376,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
+ "learning_rate": 4.983951417574436e-05,
332
+ "loss": 2.526,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
+ "learning_rate": 4.983352188672608e-05,
338
+ "loss": 2.5214,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
+ "learning_rate": 4.982742014096952e-05,
344
+ "loss": 2.5188,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
+ "learning_rate": 4.982120896536888e-05,
350
+ "loss": 2.4175,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
+ "learning_rate": 4.981488838730066e-05,
356
+ "loss": 2.5346,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
+ "learning_rate": 4.980845843462357e-05,
362
+ "loss": 2.496,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
+ "learning_rate": 4.98019191356784e-05,
368
+ "loss": 2.477,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
+ "learning_rate": 4.9795270519287886e-05,
374
+ "loss": 2.4205,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
+ "learning_rate": 4.9788512614756624e-05,
380
+ "loss": 2.4621,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
+ "learning_rate": 4.9781645451870875e-05,
386
+ "loss": 2.5098,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
+ "learning_rate": 4.9774669060898496e-05,
392
+ "loss": 2.4847,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
+ "learning_rate": 4.976758347258877e-05,
398
+ "loss": 2.4873,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
+ "learning_rate": 4.976038871817228e-05,
404
+ "loss": 2.4462,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
+ "learning_rate": 4.9753084829360776e-05,
410
+ "loss": 2.4994,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
+ "learning_rate": 4.974567183834702e-05,
416
+ "loss": 2.4994,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
+ "learning_rate": 4.9738149777804665e-05,
422
+ "loss": 2.4584,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
+ "learning_rate": 4.97305186808881e-05,
428
+ "loss": 2.4294,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
+ "learning_rate": 4.9722778581232305e-05,
434
+ "loss": 2.4499,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
+ "learning_rate": 4.9714929512952704e-05,
440
+ "loss": 2.5851,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
+ "learning_rate": 4.9706971510645e-05,
446
+ "loss": 2.4746,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
+ "learning_rate": 4.969890460938505e-05,
452
+ "loss": 2.4184,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
+ "learning_rate": 4.9690728844728704e-05,
458
+ "loss": 2.4716,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
+ "learning_rate": 4.968244425271161e-05,
464
+ "loss": 2.4477,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
+ "learning_rate": 4.9674050869849124e-05,
470
+ "loss": 2.4563,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
+ "learning_rate": 4.966554873313608e-05,
476
+ "loss": 2.3708,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
+ "learning_rate": 4.9656937880046676e-05,
482
+ "loss": 2.4351,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
+ "learning_rate": 4.9648218348534284e-05,
488
+ "loss": 2.3933,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
+ "learning_rate": 4.963939017703128e-05,
494
+ "loss": 2.5871,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
+ "learning_rate": 4.9630453404448905e-05,
500
+ "loss": 2.3965,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
+ "learning_rate": 4.962140807017705e-05,
506
+ "loss": 2.372,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
+ "learning_rate": 4.961225421408412e-05,
512
+ "loss": 2.4847,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
+ "learning_rate": 4.960299187651684e-05,
518
+ "loss": 2.4046,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
+ "learning_rate": 4.959362109830007e-05,
524
+ "loss": 2.3912,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
+ "learning_rate": 4.9584141920736656e-05,
530
+ "loss": 2.4732,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
+ "learning_rate": 4.957455438560721e-05,
536
+ "loss": 2.4854,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
+ "learning_rate": 4.956485853516993e-05,
542
+ "loss": 2.3736,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
+ "learning_rate": 4.9555054412160476e-05,
548
+ "loss": 2.4222,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
+ "learning_rate": 4.9545142059791686e-05,
554
+ "loss": 2.4498,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
+ "learning_rate": 4.9535121521753434e-05,
560
+ "loss": 2.4172,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
+ "learning_rate": 4.952499284221247e-05,
566
+ "loss": 2.4742,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
+ "learning_rate": 4.951475606581215e-05,
572
+ "loss": 2.4008,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
+ "learning_rate": 4.950441123767231e-05,
578
+ "loss": 2.4278,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
+ "learning_rate": 4.949395840338903e-05,
584
+ "loss": 2.5041,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
+ "learning_rate": 4.948339760903442e-05,
590
+ "loss": 2.4088,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
+ "learning_rate": 4.947272890115647e-05,
596
+ "loss": 2.466,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
+ "learning_rate": 4.946195232677878e-05,
602
+ "loss": 2.4404,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
+ "learning_rate": 4.9451067933400406e-05,
608
+ "loss": 2.3658,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
+ "learning_rate": 4.9440075768995625e-05,
614
+ "loss": 2.4247,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
+ "learning_rate": 4.942897588201372e-05,
620
+ "loss": 2.5082,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
+ "learning_rate": 4.941776832137879e-05,
626
+ "loss": 2.5545,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
+ "learning_rate": 4.940645313648949e-05,
632
+ "loss": 2.4665,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
+ "learning_rate": 4.939503037721888e-05,
638
+ "loss": 2.4195,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
+ "learning_rate": 4.938350009391413e-05,
644
+ "loss": 2.4908,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
+ "learning_rate": 4.937186233739637e-05,
650
+ "loss": 2.4093,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
+ "learning_rate": 4.93601171589604e-05,
656
+ "loss": 2.44,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
+ "learning_rate": 4.9348264610374494e-05,
662
+ "loss": 2.4989,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
+ "learning_rate": 4.9336304743880226e-05,
668
+ "loss": 2.3786,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
+ "learning_rate": 4.932423761219211e-05,
674
+ "loss": 2.4305,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
+ "learning_rate": 4.931206326849751e-05,
680
+ "loss": 2.4012,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
+ "learning_rate": 4.92997817664563e-05,
686
+ "loss": 2.4263,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
+ "learning_rate": 4.9287393160200676e-05,
692
+ "loss": 2.4749,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
+ "learning_rate": 4.927489750433492e-05,
698
+ "loss": 2.5083,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
+ "learning_rate": 4.926229485393513e-05,
704
+ "loss": 2.5228,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
+ "learning_rate": 4.924958526454902e-05,
710
+ "loss": 2.4623,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
+ "learning_rate": 4.923676879219562e-05,
716
+ "loss": 2.4199,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
+ "learning_rate": 4.9223845493365085e-05,
722
+ "loss": 2.2906,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
+ "learning_rate": 4.9210815425018406e-05,
728
+ "loss": 2.5539,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
+ "learning_rate": 4.919767864458718e-05,
734
+ "loss": 2.3456,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
+ "learning_rate": 4.9184435209973354e-05,
740
+ "loss": 2.4328,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
+ "learning_rate": 4.917108517954895e-05,
746
+ "loss": 2.3412,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
+ "learning_rate": 4.9157628612155836e-05,
752
+ "loss": 2.604,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
+ "learning_rate": 4.9144065567105465e-05,
758
+ "loss": 2.4007,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
+ "learning_rate": 4.913039610417859e-05,
764
+ "loss": 2.437,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
+ "learning_rate": 4.9116620283624996e-05,
770
+ "loss": 2.5004,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
+ "learning_rate": 4.91027381661633e-05,
776
+ "loss": 2.5045,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
+ "learning_rate": 4.908874981298057e-05,
782
+ "loss": 2.3794,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
+ "learning_rate": 4.9074655285732196e-05,
788
+ "loss": 2.4621,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
+ "learning_rate": 4.906045464654148e-05,
794
+ "loss": 2.549,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
+ "learning_rate": 4.9046147957999454e-05,
800
+ "loss": 2.4567,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
+ "learning_rate": 4.9031735283164574e-05,
806
+ "loss": 2.3243,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
+ "learning_rate": 4.901721668556244e-05,
812
+ "loss": 2.4758,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
+ "learning_rate": 4.9002592229185515e-05,
818
+ "loss": 2.5083,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
+ "learning_rate": 4.898786197849285e-05,
824
+ "loss": 2.4428,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
+ "learning_rate": 4.897302599840981e-05,
830
+ "loss": 2.4403,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
+ "learning_rate": 4.895808435432776e-05,
836
+ "loss": 2.4401,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
+ "learning_rate": 4.8943037112103784e-05,
842
+ "loss": 2.3789,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
+ "learning_rate": 4.892788433806042e-05,
848
+ "loss": 2.3945,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
+ "learning_rate": 4.8912626098985355e-05,
854
+ "loss": 2.4758,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
+ "learning_rate": 4.88972624621311e-05,
860
+ "loss": 2.4646,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
+ "learning_rate": 4.888179349521475e-05,
866
+ "loss": 2.4225,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
+ "learning_rate": 4.886621926641762e-05,
872
+ "loss": 2.4074,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
+ "learning_rate": 4.8850539844385017e-05,
878
+ "loss": 2.4438,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
+ "learning_rate": 4.883475529822587e-05,
884
+ "loss": 2.3666,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
+ "learning_rate": 4.881886569751248e-05,
890
+ "loss": 2.3782,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
+ "learning_rate": 4.880287111228016e-05,
896
+ "loss": 2.4916,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
+ "learning_rate": 4.878677161302698e-05,
902
+ "loss": 2.3639,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
+ "learning_rate": 4.877056727071342e-05,
908
+ "loss": 2.4333,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
+ "learning_rate": 4.8754258156762075e-05,
914
+ "loss": 2.3907,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
+ "learning_rate": 4.8737844343057315e-05,
920
+ "loss": 2.433,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
+ "learning_rate": 4.8721325901945015e-05,
926
+ "loss": 2.3849,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
+ "learning_rate": 4.870470290623218e-05,
932
+ "loss": 2.419,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
+ "learning_rate": 4.8687975429186685e-05,
938
+ "loss": 2.3694,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
+ "learning_rate": 4.867114354453688e-05,
944
+ "loss": 2.4335,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
+ "learning_rate": 4.865420732647133e-05,
950
+ "loss": 2.4168,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
+ "learning_rate": 4.863716684963846e-05,
956
+ "loss": 2.419,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
+ "learning_rate": 4.862002218914622e-05,
962
+ "loss": 2.335,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
+ "learning_rate": 4.860277342056176e-05,
968
+ "loss": 2.4059,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
+ "learning_rate": 4.858542061991112e-05,
974
+ "loss": 2.3423,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
+ "learning_rate": 4.856796386367884e-05,
980
+ "loss": 2.4135,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
+ "learning_rate": 4.855040322880769e-05,
986
+ "loss": 2.3652,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
+ "learning_rate": 4.853273879269827e-05,
992
+ "loss": 2.3511,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
+ "learning_rate": 4.8514970633208726e-05,
998
+ "loss": 2.413,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
+ "learning_rate": 4.849709882865435e-05,
1004
+ "loss": 2.4445,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
+ "learning_rate": 4.847912345780727e-05,
1010
+ "loss": 2.4221,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
+ "learning_rate": 4.846104459989611e-05,
1016
+ "loss": 2.4992,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
+ "learning_rate": 4.84428623346056e-05,
1022
+ "loss": 2.3923,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
+ "learning_rate": 4.842457674207627e-05,
1028
+ "loss": 2.3666,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
+ "learning_rate": 4.8406187902904076e-05,
1034
+ "loss": 2.4329,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
+ "learning_rate": 4.8387695898140026e-05,
1040
+ "loss": 2.3429,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
+ "learning_rate": 4.836910080928987e-05,
1046
+ "loss": 2.4428,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
+ "learning_rate": 4.83504027183137e-05,
1052
+ "loss": 2.3842,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
+ "learning_rate": 4.833160170762561e-05,
1058
+ "loss": 2.4064,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
+ "learning_rate": 4.8312697860093295e-05,
1064
+ "loss": 2.3705,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
+ "learning_rate": 4.829369125903776e-05,
1070
+ "loss": 2.4151,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
+ "learning_rate": 4.8274581988232894e-05,
1076
+ "loss": 2.4219,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
+ "learning_rate": 4.825537013190509e-05,
1082
+ "loss": 2.4042,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
+ "learning_rate": 4.823605577473293e-05,
1088
+ "loss": 2.4509,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
+ "learning_rate": 4.8216639001846764e-05,
1094
+ "loss": 2.4255,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
+ "learning_rate": 4.8197119898828367e-05,
1100
+ "loss": 2.4928,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
+ "learning_rate": 4.817749855171052e-05,
1106
+ "loss": 2.4044,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
+ "learning_rate": 4.8157775046976684e-05,
1112
+ "loss": 2.4085,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
+ "learning_rate": 4.813794947156058e-05,
1118
+ "loss": 2.3941,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
+ "learning_rate": 4.8118021912845815e-05,
1124
+ "loss": 2.3493,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
+ "learning_rate": 4.8097992458665506e-05,
1130
+ "loss": 2.3367,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
+ "learning_rate": 4.807786119730189e-05,
1136
+ "loss": 2.3865,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
+ "learning_rate": 4.8057628217485916e-05,
1142
+ "loss": 2.3885,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
+ "learning_rate": 4.803729360839688e-05,
1148
+ "loss": 2.4296,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
+ "learning_rate": 4.801685745966203e-05,
1154
+ "loss": 2.3856,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
+ "learning_rate": 4.799631986135615e-05,
1160
+ "loss": 2.3631,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
+ "learning_rate": 4.797568090400117e-05,
1166
+ "loss": 2.3882,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
+ "learning_rate": 4.7954940678565785e-05,
1172
+ "loss": 2.3396,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
+ "learning_rate": 4.793409927646504e-05,
1178
+ "loss": 2.3548,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
+ "learning_rate": 4.791315678955991e-05,
1184
+ "loss": 2.4576,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
+ "learning_rate": 4.789211331015695e-05,
1190
+ "loss": 2.4523,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
+ "learning_rate": 4.787096893100781e-05,
1196
+ "loss": 2.2542,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
+ "learning_rate": 4.784972374530889e-05,
1202
+ "loss": 2.4385,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
+ "learning_rate": 4.7828377846700925e-05,
1208
+ "loss": 2.3266,
1209
  "step": 1000
1210
  }
1211
  ],
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c481a87016b183d9406558b8812ee05f00d60a3c721055833e3cdda34cf9bb26
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b42f69ee6ce9030b2875b2859f3499c1e1c8f11e9ffa417d0dbdab2b51173da
3
  size 4920
checkpoint-1100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ea80b3c97be6fe2b7210aa1e1c9f7cc71203f1eeebf3252a7c8e277f61aa40
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:380adf12899fef7050a2ba91b3f607c84b70b74f1ea5c365fa138ddbf06871d5
3
  size 3158328
checkpoint-1100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a906b82ea9f377db0e3fddcdbeb72d0ad945f37b639288eea867affedbdc8d3c
3
  size 6372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:242354a64274a52135c1593c87a294c5835f1f5a78a8c2cb227d975aea333783
3
  size 6372346
checkpoint-1100/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f8a19ec786e929d2ec5cf00a109504e163e58f6a85cd2e06e12f302d94820d4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22d1ae34df3e97c22f7d165f5c3bdae7c4ce87538df1878f01c5c9ca8aaeb93f
3
  size 14244
checkpoint-1100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d48b5a7719cce1a4a890f8541be4f95f09fe9a3bb8b54924212960109bda920
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ed4aba8c56cb948b1cee8202d1e3738b069f61cd4cf7a50cd38396f92d62ba4
3
  size 1064
checkpoint-1100/trainer_state.json CHANGED
@@ -10,1322 +10,1322 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.999997796189788e-05,
14
- "loss": 2.7465,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
- "learning_rate": 1.9999911847688657e-05,
20
- "loss": 2.6877,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 1.999980165766374e-05,
26
- "loss": 2.6848,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
- "learning_rate": 1.9999647392308798e-05,
32
- "loss": 2.7677,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
- "learning_rate": 1.9999449052303777e-05,
38
- "loss": 2.7784,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
- "learning_rate": 1.9999206638522888e-05,
44
- "loss": 2.8161,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
- "learning_rate": 1.9998920152034595e-05,
50
- "loss": 2.6752,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
- "learning_rate": 1.9998589594101623e-05,
56
- "loss": 2.6403,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
- "learning_rate": 1.9998214966180948e-05,
62
- "loss": 2.6433,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
- "learning_rate": 1.999779626992378e-05,
68
- "loss": 2.5787,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
- "learning_rate": 1.9997333507175583e-05,
74
- "loss": 2.6812,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
- "learning_rate": 1.9996826679976033e-05,
80
- "loss": 2.7287,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
- "learning_rate": 1.9996275790559037e-05,
86
- "loss": 2.6465,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
- "learning_rate": 1.99956808413527e-05,
92
- "loss": 2.5268,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
- "learning_rate": 1.999504183497934e-05,
98
- "loss": 2.6893,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
- "learning_rate": 1.9994358774255444e-05,
104
- "loss": 2.6274,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
- "learning_rate": 1.9993631662191696e-05,
110
- "loss": 2.6232,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
- "learning_rate": 1.9992860501992924e-05,
116
- "loss": 2.7188,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
- "learning_rate": 1.9992045297058108e-05,
122
- "loss": 2.5388,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
- "learning_rate": 1.9991186050980366e-05,
128
- "loss": 2.6793,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
- "learning_rate": 1.9990282767546926e-05,
134
- "loss": 2.5523,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
- "learning_rate": 1.998933545073912e-05,
140
- "loss": 2.5763,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
- "learning_rate": 1.998834410473236e-05,
146
- "loss": 2.6447,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
- "learning_rate": 1.998730873389612e-05,
152
- "loss": 2.5579,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
- "learning_rate": 1.998622934279393e-05,
158
- "loss": 2.5884,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
- "learning_rate": 1.9985105936183327e-05,
164
- "loss": 2.5051,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
- "learning_rate": 1.9983938519015868e-05,
170
- "loss": 2.6014,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
- "learning_rate": 1.998272709643708e-05,
176
- "loss": 2.5878,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
- "learning_rate": 1.998147167378645e-05,
182
- "loss": 2.6642,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
- "learning_rate": 1.998017225659742e-05,
188
- "loss": 2.5077,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
- "learning_rate": 1.9978828850597312e-05,
194
- "loss": 2.5921,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
- "learning_rate": 1.9977441461707358e-05,
200
- "loss": 2.5577,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
- "learning_rate": 1.9976010096042634e-05,
206
- "loss": 2.524,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
- "learning_rate": 1.9974534759912068e-05,
212
- "loss": 2.5708,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
- "learning_rate": 1.997301545981837e-05,
218
- "loss": 2.5578,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
- "learning_rate": 1.9971452202458048e-05,
224
- "loss": 2.6874,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
- "learning_rate": 1.9969844994721338e-05,
230
- "loss": 2.535,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
- "learning_rate": 1.996819384369221e-05,
236
- "loss": 2.5816,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
- "learning_rate": 1.9966498756648305e-05,
242
- "loss": 2.6225,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
- "learning_rate": 1.9964759741060926e-05,
248
- "loss": 2.5387,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
- "learning_rate": 1.9962976804594993e-05,
254
- "loss": 2.524,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
- "learning_rate": 1.996114995510901e-05,
260
- "loss": 2.488,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
- "learning_rate": 1.9959279200655044e-05,
266
- "loss": 2.5824,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
- "learning_rate": 1.9957364549478663e-05,
272
- "loss": 2.5828,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
- "learning_rate": 1.9955406010018928e-05,
278
- "loss": 2.5137,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
- "learning_rate": 1.9953403590908334e-05,
284
- "loss": 2.5539,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
- "learning_rate": 1.995135730097278e-05,
290
- "loss": 2.6099,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
- "learning_rate": 1.994926714923155e-05,
296
- "loss": 2.5309,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
- "learning_rate": 1.9947133144897225e-05,
302
- "loss": 2.5152,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
- "learning_rate": 1.9944955297375693e-05,
308
- "loss": 2.4738,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
- "learning_rate": 1.9942733616266076e-05,
314
- "loss": 2.5173,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
- "learning_rate": 1.99404681113607e-05,
320
- "loss": 2.578,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
- "learning_rate": 1.993815879264506e-05,
326
- "loss": 2.5089,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
- "learning_rate": 1.9935805670297744e-05,
332
- "loss": 2.5872,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
- "learning_rate": 1.993340875469043e-05,
338
- "loss": 2.5882,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
- "learning_rate": 1.993096805638781e-05,
344
- "loss": 2.5789,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
- "learning_rate": 1.9928483586147553e-05,
350
- "loss": 2.488,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
- "learning_rate": 1.9925955354920265e-05,
356
- "loss": 2.617,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
- "learning_rate": 1.992338337384943e-05,
362
- "loss": 2.5752,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
- "learning_rate": 1.992076765427136e-05,
368
- "loss": 2.5529,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
- "learning_rate": 1.9918108207715156e-05,
374
- "loss": 2.4942,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
- "learning_rate": 1.991540504590265e-05,
380
- "loss": 2.5456,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
- "learning_rate": 1.991265818074835e-05,
386
- "loss": 2.5957,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
- "learning_rate": 1.99098676243594e-05,
392
- "loss": 2.5699,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
- "learning_rate": 1.9907033389035512e-05,
398
- "loss": 2.5544,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
- "learning_rate": 1.9904155487268912e-05,
404
- "loss": 2.538,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
- "learning_rate": 1.990123393174431e-05,
410
- "loss": 2.6055,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
- "learning_rate": 1.9898268735338807e-05,
416
- "loss": 2.5846,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
- "learning_rate": 1.9895259911121866e-05,
422
- "loss": 2.5405,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
- "learning_rate": 1.9892207472355243e-05,
428
- "loss": 2.5162,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
- "learning_rate": 1.988911143249292e-05,
434
- "loss": 2.5484,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
- "learning_rate": 1.9885971805181083e-05,
440
- "loss": 2.671,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
- "learning_rate": 1.9882788604258e-05,
446
- "loss": 2.5696,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
- "learning_rate": 1.987956184375402e-05,
452
- "loss": 2.5021,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
- "learning_rate": 1.9876291537891482e-05,
458
- "loss": 2.5644,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
- "learning_rate": 1.9872977701084645e-05,
464
- "loss": 2.5386,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
- "learning_rate": 1.9869620347939652e-05,
470
- "loss": 2.554,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
- "learning_rate": 1.9866219493254433e-05,
476
- "loss": 2.4798,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
- "learning_rate": 1.986277515201867e-05,
482
- "loss": 2.538,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
- "learning_rate": 1.9859287339413714e-05,
488
- "loss": 2.5041,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
- "learning_rate": 1.9855756070812514e-05,
494
- "loss": 2.7067,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
- "learning_rate": 1.9852181361779563e-05,
500
- "loss": 2.5054,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
- "learning_rate": 1.984856322807082e-05,
506
- "loss": 2.4815,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
- "learning_rate": 1.9844901685633648e-05,
512
- "loss": 2.5885,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
- "learning_rate": 1.9841196750606735e-05,
518
- "loss": 2.521,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
- "learning_rate": 1.9837448439320027e-05,
524
- "loss": 2.4937,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
- "learning_rate": 1.983365676829466e-05,
530
- "loss": 2.5877,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
- "learning_rate": 1.9829821754242885e-05,
536
- "loss": 2.5761,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
- "learning_rate": 1.9825943414067974e-05,
542
- "loss": 2.4917,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
- "learning_rate": 1.9822021764864194e-05,
548
- "loss": 2.5434,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
- "learning_rate": 1.9818056823916675e-05,
554
- "loss": 2.5906,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
- "learning_rate": 1.9814048608701374e-05,
560
- "loss": 2.5508,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
- "learning_rate": 1.980999713688499e-05,
566
- "loss": 2.6005,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
- "learning_rate": 1.980590242632486e-05,
572
- "loss": 2.523,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
- "learning_rate": 1.9801764495068923e-05,
578
- "loss": 2.5453,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
- "learning_rate": 1.979758336135561e-05,
584
- "loss": 2.6426,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
- "learning_rate": 1.9793359043613768e-05,
590
- "loss": 2.5454,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
- "learning_rate": 1.9789091560462587e-05,
596
- "loss": 2.6071,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
- "learning_rate": 1.9784780930711514e-05,
602
- "loss": 2.5913,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
- "learning_rate": 1.9780427173360165e-05,
608
- "loss": 2.5082,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
- "learning_rate": 1.977603030759825e-05,
614
- "loss": 2.565,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
- "learning_rate": 1.977159035280549e-05,
620
- "loss": 2.6365,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
- "learning_rate": 1.9767107328551515e-05,
626
- "loss": 2.6872,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
- "learning_rate": 1.9762581254595797e-05,
632
- "loss": 2.6222,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
- "learning_rate": 1.975801215088755e-05,
638
- "loss": 2.5633,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
- "learning_rate": 1.9753400037565653e-05,
644
- "loss": 2.6579,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
- "learning_rate": 1.9748744934958548e-05,
650
- "loss": 2.5572,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
- "learning_rate": 1.974404686358416e-05,
656
- "loss": 2.5845,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
- "learning_rate": 1.97393058441498e-05,
662
- "loss": 2.6484,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
- "learning_rate": 1.973452189755209e-05,
668
- "loss": 2.5473,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
- "learning_rate": 1.9729695044876847e-05,
674
- "loss": 2.5995,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
- "learning_rate": 1.9724825307399003e-05,
680
- "loss": 2.5473,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
- "learning_rate": 1.971991270658252e-05,
686
- "loss": 2.5798,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
- "learning_rate": 1.971495726408027e-05,
692
- "loss": 2.655,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
- "learning_rate": 1.970995900173397e-05,
698
- "loss": 2.6904,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
- "learning_rate": 1.9704917941574053e-05,
704
- "loss": 2.6973,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
- "learning_rate": 1.969983410581961e-05,
710
- "loss": 2.6518,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
- "learning_rate": 1.969470751687825e-05,
716
- "loss": 2.5849,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
- "learning_rate": 1.9689538197346035e-05,
722
- "loss": 2.4715,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
- "learning_rate": 1.9684326170007365e-05,
728
- "loss": 2.7246,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
- "learning_rate": 1.9679071457834874e-05,
734
- "loss": 2.5482,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
- "learning_rate": 1.967377408398934e-05,
740
- "loss": 2.6084,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
- "learning_rate": 1.966843407181958e-05,
746
- "loss": 2.5144,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
- "learning_rate": 1.9663051444862335e-05,
752
- "loss": 2.7663,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
- "learning_rate": 1.9657626226842187e-05,
758
- "loss": 2.5697,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
- "learning_rate": 1.9652158441671435e-05,
764
- "loss": 2.6379,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
- "learning_rate": 1.964664811345e-05,
770
- "loss": 2.6784,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
- "learning_rate": 1.964109526646532e-05,
776
- "loss": 2.6936,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
- "learning_rate": 1.963549992519223e-05,
782
- "loss": 2.5672,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
- "learning_rate": 1.962986211429288e-05,
788
- "loss": 2.62,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
- "learning_rate": 1.9624181858616593e-05,
794
- "loss": 2.7293,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
- "learning_rate": 1.9618459183199782e-05,
800
- "loss": 2.6636,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
- "learning_rate": 1.961269411326583e-05,
806
- "loss": 2.5106,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
- "learning_rate": 1.9606886674224977e-05,
812
- "loss": 2.6878,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
- "learning_rate": 1.960103689167421e-05,
818
- "loss": 2.693,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
- "learning_rate": 1.9595144791397142e-05,
824
- "loss": 2.6562,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
- "learning_rate": 1.9589210399363925e-05,
830
- "loss": 2.6269,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
- "learning_rate": 1.95832337417311e-05,
836
- "loss": 2.6459,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
- "learning_rate": 1.9577214844841515e-05,
842
- "loss": 2.5765,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
- "learning_rate": 1.957115373522417e-05,
848
- "loss": 2.6113,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
- "learning_rate": 1.956505043959414e-05,
854
- "loss": 2.6641,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
- "learning_rate": 1.955890498485244e-05,
860
- "loss": 2.6493,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
- "learning_rate": 1.9552717398085898e-05,
866
- "loss": 2.6135,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
- "learning_rate": 1.954648770656705e-05,
872
- "loss": 2.6182,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
- "learning_rate": 1.954021593775401e-05,
878
- "loss": 2.6487,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
- "learning_rate": 1.9533902119290352e-05,
884
- "loss": 2.5927,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
- "learning_rate": 1.952754627900499e-05,
890
- "loss": 2.5825,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
- "learning_rate": 1.9521148444912065e-05,
896
- "loss": 2.7193,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
- "learning_rate": 1.9514708645210793e-05,
902
- "loss": 2.5802,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
- "learning_rate": 1.9508226908285368e-05,
908
- "loss": 2.6628,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
- "learning_rate": 1.950170326270483e-05,
914
- "loss": 2.5847,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
- "learning_rate": 1.9495137737222925e-05,
920
- "loss": 2.6594,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
- "learning_rate": 1.9488530360778007e-05,
926
- "loss": 2.6096,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
- "learning_rate": 1.948188116249287e-05,
932
- "loss": 2.6378,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
- "learning_rate": 1.9475190171674675e-05,
938
- "loss": 2.5984,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
- "learning_rate": 1.9468457417814753e-05,
944
- "loss": 2.6437,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
- "learning_rate": 1.9461682930588534e-05,
950
- "loss": 2.6522,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
- "learning_rate": 1.9454866739855384e-05,
956
- "loss": 2.6242,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
- "learning_rate": 1.944800887565849e-05,
962
- "loss": 2.5961,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
- "learning_rate": 1.9441109368224704e-05,
968
- "loss": 2.6365,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
- "learning_rate": 1.9434168247964447e-05,
974
- "loss": 2.5674,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
- "learning_rate": 1.9427185545471537e-05,
980
- "loss": 2.6369,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
- "learning_rate": 1.9420161291523076e-05,
986
- "loss": 2.5763,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
- "learning_rate": 1.941309551707931e-05,
992
- "loss": 2.5651,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
- "learning_rate": 1.9405988253283492e-05,
998
- "loss": 2.6223,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
- "learning_rate": 1.939883953146174e-05,
1004
- "loss": 2.6616,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
- "learning_rate": 1.939164938312291e-05,
1010
- "loss": 2.6496,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
- "learning_rate": 1.9384417839958443e-05,
1016
- "loss": 2.7161,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
- "learning_rate": 1.937714493384224e-05,
1022
- "loss": 2.6047,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
- "learning_rate": 1.936983069683051e-05,
1028
- "loss": 2.5978,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
- "learning_rate": 1.936247516116163e-05,
1034
- "loss": 2.6331,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
- "learning_rate": 1.935507835925601e-05,
1040
- "loss": 2.5679,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
- "learning_rate": 1.934764032371595e-05,
1046
- "loss": 2.6647,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
- "learning_rate": 1.9340161087325483e-05,
1052
- "loss": 2.6142,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
- "learning_rate": 1.9332640683050243e-05,
1058
- "loss": 2.6181,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
- "learning_rate": 1.932507914403732e-05,
1064
- "loss": 2.6105,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
- "learning_rate": 1.9317476503615108e-05,
1070
- "loss": 2.6415,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
- "learning_rate": 1.9309832795293156e-05,
1076
- "loss": 2.6736,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
- "learning_rate": 1.930214805276204e-05,
1082
- "loss": 2.6507,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
- "learning_rate": 1.9294422309893177e-05,
1088
- "loss": 2.6771,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
- "learning_rate": 1.9286655600738707e-05,
1094
- "loss": 2.6763,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
- "learning_rate": 1.9278847959531348e-05,
1100
- "loss": 2.7135,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
- "learning_rate": 1.927099942068421e-05,
1106
- "loss": 2.6166,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
- "learning_rate": 1.9263110018790673e-05,
1112
- "loss": 2.6238,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
- "learning_rate": 1.9255179788624233e-05,
1118
- "loss": 2.6424,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
- "learning_rate": 1.9247208765138325e-05,
1124
- "loss": 2.6023,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
- "learning_rate": 1.9239196983466204e-05,
1130
- "loss": 2.58,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
- "learning_rate": 1.9231144478920756e-05,
1136
- "loss": 2.6173,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
- "learning_rate": 1.9223051286994368e-05,
1142
- "loss": 2.628,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
- "learning_rate": 1.9214917443358753e-05,
1148
- "loss": 2.6868,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
- "learning_rate": 1.9206742983864813e-05,
1154
- "loss": 2.6342,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
- "learning_rate": 1.9198527944542462e-05,
1160
- "loss": 2.5934,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
- "learning_rate": 1.919027236160047e-05,
1166
- "loss": 2.6354,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
- "learning_rate": 1.9181976271426315e-05,
1172
- "loss": 2.5955,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
- "learning_rate": 1.9173639710586015e-05,
1178
- "loss": 2.6134,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
- "learning_rate": 1.9165262715823966e-05,
1184
- "loss": 2.7,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
- "learning_rate": 1.915684532406278e-05,
1190
- "loss": 2.7197,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
- "learning_rate": 1.9148387572403123e-05,
1196
- "loss": 2.4881,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
- "learning_rate": 1.913988949812356e-05,
1202
- "loss": 2.6999,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
- "learning_rate": 1.9131351138680368e-05,
1208
- "loss": 2.5981,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
- "learning_rate": 1.9122772531707405e-05,
1214
- "loss": 2.5515,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
- "learning_rate": 1.9114153715015905e-05,
1220
- "loss": 2.623,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
- "learning_rate": 1.9105494726594344e-05,
1226
- "loss": 2.6006,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
- "learning_rate": 1.9096795604608258e-05,
1232
- "loss": 2.5929,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
- "learning_rate": 1.9088056387400074e-05,
1238
- "loss": 2.5396,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
- "learning_rate": 1.907927711348894e-05,
1244
- "loss": 2.6428,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
- "learning_rate": 1.9070457821570566e-05,
1250
- "loss": 2.6296,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
- "learning_rate": 1.9061598550517048e-05,
1256
- "loss": 2.534,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
- "learning_rate": 1.9052699339376685e-05,
1262
- "loss": 2.6049,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
- "learning_rate": 1.9043760227373817e-05,
1268
- "loss": 2.5776,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
- "learning_rate": 1.9034781253908664e-05,
1274
- "loss": 2.7483,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
- "learning_rate": 1.902576245855713e-05,
1280
- "loss": 2.6973,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
- "learning_rate": 1.9016703881070646e-05,
1286
- "loss": 2.645,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
- "learning_rate": 1.900760556137598e-05,
1292
- "loss": 2.5775,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
- "learning_rate": 1.899846753957507e-05,
1298
- "loss": 2.6872,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
- "learning_rate": 1.8989289855944846e-05,
1304
- "loss": 2.5929,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
- "learning_rate": 1.8980072550937058e-05,
1310
- "loss": 2.5668,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
- "learning_rate": 1.8970815665178086e-05,
1316
- "loss": 2.6873,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
- "learning_rate": 1.896151923946877e-05,
1322
- "loss": 2.5679,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
- "learning_rate": 1.8952183314784224e-05,
1328
- "loss": 2.6734,
1329
  "step": 1100
1330
  }
1331
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 4.99999449047447e-05,
14
+ "loss": 2.7413,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
+ "learning_rate": 4.9999779619221645e-05,
20
+ "loss": 2.6684,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 4.999950414415935e-05,
26
+ "loss": 2.6511,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
+ "learning_rate": 4.999911848077199e-05,
32
+ "loss": 2.72,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
+ "learning_rate": 4.999862263075944e-05,
38
+ "loss": 2.7222,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 4.999801659630722e-05,
44
+ "loss": 2.7479,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
+ "learning_rate": 4.999730038008649e-05,
50
+ "loss": 2.6063,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
+ "learning_rate": 4.9996473985254055e-05,
56
+ "loss": 2.576,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
+ "learning_rate": 4.999553741545237e-05,
62
+ "loss": 2.579,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
+ "learning_rate": 4.999449067480945e-05,
68
+ "loss": 2.5114,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
+ "learning_rate": 4.9993333767938954e-05,
74
+ "loss": 2.6174,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
+ "learning_rate": 4.9992066699940085e-05,
80
+ "loss": 2.6581,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
+ "learning_rate": 4.9990689476397586e-05,
86
+ "loss": 2.586,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
+ "learning_rate": 4.998920210338175e-05,
92
+ "loss": 2.4642,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
+ "learning_rate": 4.9987604587448343e-05,
98
+ "loss": 2.6262,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
+ "learning_rate": 4.998589693563861e-05,
104
+ "loss": 2.5678,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
+ "learning_rate": 4.998407915547924e-05,
110
+ "loss": 2.5597,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
+ "learning_rate": 4.9982151254982304e-05,
116
+ "loss": 2.6578,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
+ "learning_rate": 4.998011324264527e-05,
122
+ "loss": 2.4825,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
+ "learning_rate": 4.997796512745091e-05,
128
+ "loss": 2.6197,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
+ "learning_rate": 4.997570691886732e-05,
134
+ "loss": 2.4921,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
+ "learning_rate": 4.99733386268478e-05,
140
+ "loss": 2.5126,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
+ "learning_rate": 4.99708602618309e-05,
146
+ "loss": 2.584,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
+ "learning_rate": 4.9968271834740305e-05,
152
+ "loss": 2.4991,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
+ "learning_rate": 4.996557335698482e-05,
158
+ "loss": 2.5242,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
+ "learning_rate": 4.996276484045832e-05,
164
+ "loss": 2.4506,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
+ "learning_rate": 4.995984629753967e-05,
170
+ "loss": 2.5479,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
+ "learning_rate": 4.9956817741092696e-05,
176
+ "loss": 2.5316,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
+ "learning_rate": 4.995367918446613e-05,
182
+ "loss": 2.6053,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
+ "learning_rate": 4.995043064149354e-05,
188
+ "loss": 2.4533,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
+ "learning_rate": 4.9947072126493276e-05,
194
+ "loss": 2.5279,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
+ "learning_rate": 4.994360365426839e-05,
200
+ "loss": 2.4994,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
+ "learning_rate": 4.994002524010659e-05,
206
+ "loss": 2.4675,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
+ "learning_rate": 4.9936336899780166e-05,
212
+ "loss": 2.5233,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
+ "learning_rate": 4.993253864954592e-05,
218
+ "loss": 2.4974,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
+ "learning_rate": 4.992863050614511e-05,
224
+ "loss": 2.6295,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
+ "learning_rate": 4.9924612486803346e-05,
230
+ "loss": 2.4744,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
+ "learning_rate": 4.992048460923052e-05,
236
+ "loss": 2.5248,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
+ "learning_rate": 4.991624689162076e-05,
242
+ "loss": 2.5727,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
+ "learning_rate": 4.991189935265231e-05,
248
+ "loss": 2.4845,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
+ "learning_rate": 4.990744201148748e-05,
254
+ "loss": 2.4544,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
+ "learning_rate": 4.990287488777253e-05,
260
+ "loss": 2.4383,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
+ "learning_rate": 4.989819800163761e-05,
266
+ "loss": 2.5263,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
+ "learning_rate": 4.989341137369666e-05,
272
+ "loss": 2.5219,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
+ "learning_rate": 4.9888515025047316e-05,
278
+ "loss": 2.4512,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
+ "learning_rate": 4.988350897727083e-05,
284
+ "loss": 2.491,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
+ "learning_rate": 4.9878393252431953e-05,
290
+ "loss": 2.5404,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
+ "learning_rate": 4.987316787307888e-05,
296
+ "loss": 2.4777,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
+ "learning_rate": 4.9867832862243055e-05,
302
+ "loss": 2.4498,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
+ "learning_rate": 4.986238824343923e-05,
308
+ "loss": 2.4174,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
+ "learning_rate": 4.985683404066519e-05,
314
+ "loss": 2.4589,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
+ "learning_rate": 4.985117027840175e-05,
320
+ "loss": 2.5029,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
+ "learning_rate": 4.984539698161264e-05,
326
+ "loss": 2.4376,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
+ "learning_rate": 4.983951417574436e-05,
332
+ "loss": 2.526,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
+ "learning_rate": 4.983352188672608e-05,
338
+ "loss": 2.5214,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
+ "learning_rate": 4.982742014096952e-05,
344
+ "loss": 2.5188,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
+ "learning_rate": 4.982120896536888e-05,
350
+ "loss": 2.4175,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
+ "learning_rate": 4.981488838730066e-05,
356
+ "loss": 2.5346,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
+ "learning_rate": 4.980845843462357e-05,
362
+ "loss": 2.496,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
+ "learning_rate": 4.98019191356784e-05,
368
+ "loss": 2.477,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
+ "learning_rate": 4.9795270519287886e-05,
374
+ "loss": 2.4205,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
+ "learning_rate": 4.9788512614756624e-05,
380
+ "loss": 2.4621,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
+ "learning_rate": 4.9781645451870875e-05,
386
+ "loss": 2.5098,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
+ "learning_rate": 4.9774669060898496e-05,
392
+ "loss": 2.4847,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
+ "learning_rate": 4.976758347258877e-05,
398
+ "loss": 2.4873,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
+ "learning_rate": 4.976038871817228e-05,
404
+ "loss": 2.4462,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
+ "learning_rate": 4.9753084829360776e-05,
410
+ "loss": 2.4994,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
+ "learning_rate": 4.974567183834702e-05,
416
+ "loss": 2.4994,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
+ "learning_rate": 4.9738149777804665e-05,
422
+ "loss": 2.4584,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
+ "learning_rate": 4.97305186808881e-05,
428
+ "loss": 2.4294,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
+ "learning_rate": 4.9722778581232305e-05,
434
+ "loss": 2.4499,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
+ "learning_rate": 4.9714929512952704e-05,
440
+ "loss": 2.5851,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
+ "learning_rate": 4.9706971510645e-05,
446
+ "loss": 2.4746,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
+ "learning_rate": 4.969890460938505e-05,
452
+ "loss": 2.4184,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
+ "learning_rate": 4.9690728844728704e-05,
458
+ "loss": 2.4716,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
+ "learning_rate": 4.968244425271161e-05,
464
+ "loss": 2.4477,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
+ "learning_rate": 4.9674050869849124e-05,
470
+ "loss": 2.4563,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
+ "learning_rate": 4.966554873313608e-05,
476
+ "loss": 2.3708,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
+ "learning_rate": 4.9656937880046676e-05,
482
+ "loss": 2.4351,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
+ "learning_rate": 4.9648218348534284e-05,
488
+ "loss": 2.3933,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
+ "learning_rate": 4.963939017703128e-05,
494
+ "loss": 2.5871,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
+ "learning_rate": 4.9630453404448905e-05,
500
+ "loss": 2.3965,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
+ "learning_rate": 4.962140807017705e-05,
506
+ "loss": 2.372,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
+ "learning_rate": 4.961225421408412e-05,
512
+ "loss": 2.4847,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
+ "learning_rate": 4.960299187651684e-05,
518
+ "loss": 2.4046,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
+ "learning_rate": 4.959362109830007e-05,
524
+ "loss": 2.3912,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
+ "learning_rate": 4.9584141920736656e-05,
530
+ "loss": 2.4732,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
+ "learning_rate": 4.957455438560721e-05,
536
+ "loss": 2.4854,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
+ "learning_rate": 4.956485853516993e-05,
542
+ "loss": 2.3736,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
+ "learning_rate": 4.9555054412160476e-05,
548
+ "loss": 2.4222,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
+ "learning_rate": 4.9545142059791686e-05,
554
+ "loss": 2.4498,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
+ "learning_rate": 4.9535121521753434e-05,
560
+ "loss": 2.4172,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
+ "learning_rate": 4.952499284221247e-05,
566
+ "loss": 2.4742,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
+ "learning_rate": 4.951475606581215e-05,
572
+ "loss": 2.4008,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
+ "learning_rate": 4.950441123767231e-05,
578
+ "loss": 2.4278,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
+ "learning_rate": 4.949395840338903e-05,
584
+ "loss": 2.5041,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
+ "learning_rate": 4.948339760903442e-05,
590
+ "loss": 2.4088,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
+ "learning_rate": 4.947272890115647e-05,
596
+ "loss": 2.466,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
+ "learning_rate": 4.946195232677878e-05,
602
+ "loss": 2.4404,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
+ "learning_rate": 4.9451067933400406e-05,
608
+ "loss": 2.3658,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
+ "learning_rate": 4.9440075768995625e-05,
614
+ "loss": 2.4247,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
+ "learning_rate": 4.942897588201372e-05,
620
+ "loss": 2.5082,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
+ "learning_rate": 4.941776832137879e-05,
626
+ "loss": 2.5545,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
+ "learning_rate": 4.940645313648949e-05,
632
+ "loss": 2.4665,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
+ "learning_rate": 4.939503037721888e-05,
638
+ "loss": 2.4195,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
+ "learning_rate": 4.938350009391413e-05,
644
+ "loss": 2.4908,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
+ "learning_rate": 4.937186233739637e-05,
650
+ "loss": 2.4093,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
+ "learning_rate": 4.93601171589604e-05,
656
+ "loss": 2.44,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
+ "learning_rate": 4.9348264610374494e-05,
662
+ "loss": 2.4989,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
+ "learning_rate": 4.9336304743880226e-05,
668
+ "loss": 2.3786,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
+ "learning_rate": 4.932423761219211e-05,
674
+ "loss": 2.4305,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
+ "learning_rate": 4.931206326849751e-05,
680
+ "loss": 2.4012,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
+ "learning_rate": 4.92997817664563e-05,
686
+ "loss": 2.4263,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
+ "learning_rate": 4.9287393160200676e-05,
692
+ "loss": 2.4749,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
+ "learning_rate": 4.927489750433492e-05,
698
+ "loss": 2.5083,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
+ "learning_rate": 4.926229485393513e-05,
704
+ "loss": 2.5228,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
+ "learning_rate": 4.924958526454902e-05,
710
+ "loss": 2.4623,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
+ "learning_rate": 4.923676879219562e-05,
716
+ "loss": 2.4199,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
+ "learning_rate": 4.9223845493365085e-05,
722
+ "loss": 2.2906,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
+ "learning_rate": 4.9210815425018406e-05,
728
+ "loss": 2.5539,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
+ "learning_rate": 4.919767864458718e-05,
734
+ "loss": 2.3456,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
+ "learning_rate": 4.9184435209973354e-05,
740
+ "loss": 2.4328,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
+ "learning_rate": 4.917108517954895e-05,
746
+ "loss": 2.3412,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
+ "learning_rate": 4.9157628612155836e-05,
752
+ "loss": 2.604,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
+ "learning_rate": 4.9144065567105465e-05,
758
+ "loss": 2.4007,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
+ "learning_rate": 4.913039610417859e-05,
764
+ "loss": 2.437,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
+ "learning_rate": 4.9116620283624996e-05,
770
+ "loss": 2.5004,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
+ "learning_rate": 4.91027381661633e-05,
776
+ "loss": 2.5045,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
+ "learning_rate": 4.908874981298057e-05,
782
+ "loss": 2.3794,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
+ "learning_rate": 4.9074655285732196e-05,
788
+ "loss": 2.4621,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
+ "learning_rate": 4.906045464654148e-05,
794
+ "loss": 2.549,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
+ "learning_rate": 4.9046147957999454e-05,
800
+ "loss": 2.4567,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
+ "learning_rate": 4.9031735283164574e-05,
806
+ "loss": 2.3243,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
+ "learning_rate": 4.901721668556244e-05,
812
+ "loss": 2.4758,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
+ "learning_rate": 4.9002592229185515e-05,
818
+ "loss": 2.5083,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
+ "learning_rate": 4.898786197849285e-05,
824
+ "loss": 2.4428,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
+ "learning_rate": 4.897302599840981e-05,
830
+ "loss": 2.4403,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
+ "learning_rate": 4.895808435432776e-05,
836
+ "loss": 2.4401,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
+ "learning_rate": 4.8943037112103784e-05,
842
+ "loss": 2.3789,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
+ "learning_rate": 4.892788433806042e-05,
848
+ "loss": 2.3945,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
+ "learning_rate": 4.8912626098985355e-05,
854
+ "loss": 2.4758,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
+ "learning_rate": 4.88972624621311e-05,
860
+ "loss": 2.4646,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
+ "learning_rate": 4.888179349521475e-05,
866
+ "loss": 2.4225,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
+ "learning_rate": 4.886621926641762e-05,
872
+ "loss": 2.4074,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
+ "learning_rate": 4.8850539844385017e-05,
878
+ "loss": 2.4438,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
+ "learning_rate": 4.883475529822587e-05,
884
+ "loss": 2.3666,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
+ "learning_rate": 4.881886569751248e-05,
890
+ "loss": 2.3782,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
+ "learning_rate": 4.880287111228016e-05,
896
+ "loss": 2.4916,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
+ "learning_rate": 4.878677161302698e-05,
902
+ "loss": 2.3639,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
+ "learning_rate": 4.877056727071342e-05,
908
+ "loss": 2.4333,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
+ "learning_rate": 4.8754258156762075e-05,
914
+ "loss": 2.3907,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
+ "learning_rate": 4.8737844343057315e-05,
920
+ "loss": 2.433,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
+ "learning_rate": 4.8721325901945015e-05,
926
+ "loss": 2.3849,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
+ "learning_rate": 4.870470290623218e-05,
932
+ "loss": 2.419,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
+ "learning_rate": 4.8687975429186685e-05,
938
+ "loss": 2.3694,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
+ "learning_rate": 4.867114354453688e-05,
944
+ "loss": 2.4335,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
+ "learning_rate": 4.865420732647133e-05,
950
+ "loss": 2.4168,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
+ "learning_rate": 4.863716684963846e-05,
956
+ "loss": 2.419,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
+ "learning_rate": 4.862002218914622e-05,
962
+ "loss": 2.335,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
+ "learning_rate": 4.860277342056176e-05,
968
+ "loss": 2.4059,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
+ "learning_rate": 4.858542061991112e-05,
974
+ "loss": 2.3423,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
+ "learning_rate": 4.856796386367884e-05,
980
+ "loss": 2.4135,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
+ "learning_rate": 4.855040322880769e-05,
986
+ "loss": 2.3652,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
+ "learning_rate": 4.853273879269827e-05,
992
+ "loss": 2.3511,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
+ "learning_rate": 4.8514970633208726e-05,
998
+ "loss": 2.413,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
+ "learning_rate": 4.849709882865435e-05,
1004
+ "loss": 2.4445,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
+ "learning_rate": 4.847912345780727e-05,
1010
+ "loss": 2.4221,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
+ "learning_rate": 4.846104459989611e-05,
1016
+ "loss": 2.4992,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
+ "learning_rate": 4.84428623346056e-05,
1022
+ "loss": 2.3923,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
+ "learning_rate": 4.842457674207627e-05,
1028
+ "loss": 2.3666,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
+ "learning_rate": 4.8406187902904076e-05,
1034
+ "loss": 2.4329,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
+ "learning_rate": 4.8387695898140026e-05,
1040
+ "loss": 2.3429,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
+ "learning_rate": 4.836910080928987e-05,
1046
+ "loss": 2.4428,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
+ "learning_rate": 4.83504027183137e-05,
1052
+ "loss": 2.3842,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
+ "learning_rate": 4.833160170762561e-05,
1058
+ "loss": 2.4064,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
+ "learning_rate": 4.8312697860093295e-05,
1064
+ "loss": 2.3705,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
+ "learning_rate": 4.829369125903776e-05,
1070
+ "loss": 2.4151,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
+ "learning_rate": 4.8274581988232894e-05,
1076
+ "loss": 2.4219,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
+ "learning_rate": 4.825537013190509e-05,
1082
+ "loss": 2.4042,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
+ "learning_rate": 4.823605577473293e-05,
1088
+ "loss": 2.4509,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
+ "learning_rate": 4.8216639001846764e-05,
1094
+ "loss": 2.4255,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
+ "learning_rate": 4.8197119898828367e-05,
1100
+ "loss": 2.4928,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
+ "learning_rate": 4.817749855171052e-05,
1106
+ "loss": 2.4044,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
+ "learning_rate": 4.8157775046976684e-05,
1112
+ "loss": 2.4085,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
+ "learning_rate": 4.813794947156058e-05,
1118
+ "loss": 2.3941,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
+ "learning_rate": 4.8118021912845815e-05,
1124
+ "loss": 2.3493,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
+ "learning_rate": 4.8097992458665506e-05,
1130
+ "loss": 2.3367,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
+ "learning_rate": 4.807786119730189e-05,
1136
+ "loss": 2.3865,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
+ "learning_rate": 4.8057628217485916e-05,
1142
+ "loss": 2.3885,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
+ "learning_rate": 4.803729360839688e-05,
1148
+ "loss": 2.4296,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
+ "learning_rate": 4.801685745966203e-05,
1154
+ "loss": 2.3856,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
+ "learning_rate": 4.799631986135615e-05,
1160
+ "loss": 2.3631,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
+ "learning_rate": 4.797568090400117e-05,
1166
+ "loss": 2.3882,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
+ "learning_rate": 4.7954940678565785e-05,
1172
+ "loss": 2.3396,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
+ "learning_rate": 4.793409927646504e-05,
1178
+ "loss": 2.3548,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
+ "learning_rate": 4.791315678955991e-05,
1184
+ "loss": 2.4576,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
+ "learning_rate": 4.789211331015695e-05,
1190
+ "loss": 2.4523,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
+ "learning_rate": 4.787096893100781e-05,
1196
+ "loss": 2.2542,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
+ "learning_rate": 4.784972374530889e-05,
1202
+ "loss": 2.4385,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
+ "learning_rate": 4.7828377846700925e-05,
1208
+ "loss": 2.3266,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
+ "learning_rate": 4.780693132926851e-05,
1214
+ "loss": 2.3301,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
+ "learning_rate": 4.7785384287539755e-05,
1220
+ "loss": 2.3554,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
+ "learning_rate": 4.776373681648586e-05,
1226
+ "loss": 2.3838,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
+ "learning_rate": 4.7741989011520645e-05,
1232
+ "loss": 2.3361,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
+ "learning_rate": 4.772014096850018e-05,
1238
+ "loss": 2.2864,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
+ "learning_rate": 4.769819278372235e-05,
1244
+ "loss": 2.3955,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
+ "learning_rate": 4.7676144553926414e-05,
1250
+ "loss": 2.3802,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
+ "learning_rate": 4.765399637629262e-05,
1256
+ "loss": 2.2856,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
+ "learning_rate": 4.7631748348441705e-05,
1262
+ "loss": 2.3698,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
+ "learning_rate": 4.760940056843454e-05,
1268
+ "loss": 2.3305,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
+ "learning_rate": 4.758695313477166e-05,
1274
+ "loss": 2.4944,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
+ "learning_rate": 4.756440614639283e-05,
1280
+ "loss": 2.4622,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
+ "learning_rate": 4.754175970267661e-05,
1286
+ "loss": 2.3995,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
+ "learning_rate": 4.751901390343995e-05,
1292
+ "loss": 2.343,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
+ "learning_rate": 4.7496168848937674e-05,
1298
+ "loss": 2.4388,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
+ "learning_rate": 4.7473224639862116e-05,
1304
+ "loss": 2.3694,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
+ "learning_rate": 4.745018137734264e-05,
1310
+ "loss": 2.3485,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
+ "learning_rate": 4.742703916294521e-05,
1316
+ "loss": 2.46,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
+ "learning_rate": 4.740379809867193e-05,
1322
+ "loss": 2.3138,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
+ "learning_rate": 4.738045828696056e-05,
1328
+ "loss": 2.4117,
1329
  "step": 1100
1330
  }
1331
  ],
checkpoint-1100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c481a87016b183d9406558b8812ee05f00d60a3c721055833e3cdda34cf9bb26
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b42f69ee6ce9030b2875b2859f3499c1e1c8f11e9ffa417d0dbdab2b51173da
3
  size 4920
checkpoint-1200/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ac54222bde53351cbf4c771875eb4a60e28dff8500e39019ea2105dc4783dc
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0332c2562ed9fd315df607f9304988caeb0fa43213d87d9a87e83bfc94e53ac7
3
  size 3158328
checkpoint-1200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:182e06d4a3ae15651a75020a84259190d31e5a818c7c791ba3d5143e661ba2ea
3
  size 6372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9edf2c541867a371c332c2ff96cccd771292bec32a4bc66b9f1c33c00099717e
3
  size 6372346
checkpoint-1200/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96adc02837ce2589b7211f05190c6a97aca71cb72548698f685e857208add8e5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c677634149ef320e920e116b867562582b411b61727df957e67337d952334ec
3
  size 14244
checkpoint-1200/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92dde75fe28e3945eb17f02e6eef4f242b0cf7ef7d81ba92bd9ee050143d6239
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa90cff08f89d08c6b0758daf4bc69ba723dd76e5adc578ff218ec4b0face27c
3
  size 1064
checkpoint-1200/trainer_state.json CHANGED
@@ -10,1442 +10,1442 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.999997796189788e-05,
14
- "loss": 2.7465,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
- "learning_rate": 1.9999911847688657e-05,
20
- "loss": 2.6877,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 1.999980165766374e-05,
26
- "loss": 2.6848,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
- "learning_rate": 1.9999647392308798e-05,
32
- "loss": 2.7677,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
- "learning_rate": 1.9999449052303777e-05,
38
- "loss": 2.7784,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
- "learning_rate": 1.9999206638522888e-05,
44
- "loss": 2.8161,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
- "learning_rate": 1.9998920152034595e-05,
50
- "loss": 2.6752,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
- "learning_rate": 1.9998589594101623e-05,
56
- "loss": 2.6403,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
- "learning_rate": 1.9998214966180948e-05,
62
- "loss": 2.6433,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
- "learning_rate": 1.999779626992378e-05,
68
- "loss": 2.5787,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
- "learning_rate": 1.9997333507175583e-05,
74
- "loss": 2.6812,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
- "learning_rate": 1.9996826679976033e-05,
80
- "loss": 2.7287,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
- "learning_rate": 1.9996275790559037e-05,
86
- "loss": 2.6465,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
- "learning_rate": 1.99956808413527e-05,
92
- "loss": 2.5268,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
- "learning_rate": 1.999504183497934e-05,
98
- "loss": 2.6893,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
- "learning_rate": 1.9994358774255444e-05,
104
- "loss": 2.6274,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
- "learning_rate": 1.9993631662191696e-05,
110
- "loss": 2.6232,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
- "learning_rate": 1.9992860501992924e-05,
116
- "loss": 2.7188,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
- "learning_rate": 1.9992045297058108e-05,
122
- "loss": 2.5388,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
- "learning_rate": 1.9991186050980366e-05,
128
- "loss": 2.6793,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
- "learning_rate": 1.9990282767546926e-05,
134
- "loss": 2.5523,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
- "learning_rate": 1.998933545073912e-05,
140
- "loss": 2.5763,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
- "learning_rate": 1.998834410473236e-05,
146
- "loss": 2.6447,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
- "learning_rate": 1.998730873389612e-05,
152
- "loss": 2.5579,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
- "learning_rate": 1.998622934279393e-05,
158
- "loss": 2.5884,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
- "learning_rate": 1.9985105936183327e-05,
164
- "loss": 2.5051,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
- "learning_rate": 1.9983938519015868e-05,
170
- "loss": 2.6014,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
- "learning_rate": 1.998272709643708e-05,
176
- "loss": 2.5878,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
- "learning_rate": 1.998147167378645e-05,
182
- "loss": 2.6642,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
- "learning_rate": 1.998017225659742e-05,
188
- "loss": 2.5077,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
- "learning_rate": 1.9978828850597312e-05,
194
- "loss": 2.5921,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
- "learning_rate": 1.9977441461707358e-05,
200
- "loss": 2.5577,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
- "learning_rate": 1.9976010096042634e-05,
206
- "loss": 2.524,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
- "learning_rate": 1.9974534759912068e-05,
212
- "loss": 2.5708,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
- "learning_rate": 1.997301545981837e-05,
218
- "loss": 2.5578,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
- "learning_rate": 1.9971452202458048e-05,
224
- "loss": 2.6874,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
- "learning_rate": 1.9969844994721338e-05,
230
- "loss": 2.535,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
- "learning_rate": 1.996819384369221e-05,
236
- "loss": 2.5816,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
- "learning_rate": 1.9966498756648305e-05,
242
- "loss": 2.6225,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
- "learning_rate": 1.9964759741060926e-05,
248
- "loss": 2.5387,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
- "learning_rate": 1.9962976804594993e-05,
254
- "loss": 2.524,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
- "learning_rate": 1.996114995510901e-05,
260
- "loss": 2.488,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
- "learning_rate": 1.9959279200655044e-05,
266
- "loss": 2.5824,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
- "learning_rate": 1.9957364549478663e-05,
272
- "loss": 2.5828,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
- "learning_rate": 1.9955406010018928e-05,
278
- "loss": 2.5137,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
- "learning_rate": 1.9953403590908334e-05,
284
- "loss": 2.5539,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
- "learning_rate": 1.995135730097278e-05,
290
- "loss": 2.6099,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
- "learning_rate": 1.994926714923155e-05,
296
- "loss": 2.5309,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
- "learning_rate": 1.9947133144897225e-05,
302
- "loss": 2.5152,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
- "learning_rate": 1.9944955297375693e-05,
308
- "loss": 2.4738,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
- "learning_rate": 1.9942733616266076e-05,
314
- "loss": 2.5173,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
- "learning_rate": 1.99404681113607e-05,
320
- "loss": 2.578,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
- "learning_rate": 1.993815879264506e-05,
326
- "loss": 2.5089,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
- "learning_rate": 1.9935805670297744e-05,
332
- "loss": 2.5872,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
- "learning_rate": 1.993340875469043e-05,
338
- "loss": 2.5882,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
- "learning_rate": 1.993096805638781e-05,
344
- "loss": 2.5789,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
- "learning_rate": 1.9928483586147553e-05,
350
- "loss": 2.488,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
- "learning_rate": 1.9925955354920265e-05,
356
- "loss": 2.617,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
- "learning_rate": 1.992338337384943e-05,
362
- "loss": 2.5752,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
- "learning_rate": 1.992076765427136e-05,
368
- "loss": 2.5529,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
- "learning_rate": 1.9918108207715156e-05,
374
- "loss": 2.4942,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
- "learning_rate": 1.991540504590265e-05,
380
- "loss": 2.5456,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
- "learning_rate": 1.991265818074835e-05,
386
- "loss": 2.5957,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
- "learning_rate": 1.99098676243594e-05,
392
- "loss": 2.5699,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
- "learning_rate": 1.9907033389035512e-05,
398
- "loss": 2.5544,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
- "learning_rate": 1.9904155487268912e-05,
404
- "loss": 2.538,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
- "learning_rate": 1.990123393174431e-05,
410
- "loss": 2.6055,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
- "learning_rate": 1.9898268735338807e-05,
416
- "loss": 2.5846,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
- "learning_rate": 1.9895259911121866e-05,
422
- "loss": 2.5405,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
- "learning_rate": 1.9892207472355243e-05,
428
- "loss": 2.5162,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
- "learning_rate": 1.988911143249292e-05,
434
- "loss": 2.5484,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
- "learning_rate": 1.9885971805181083e-05,
440
- "loss": 2.671,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
- "learning_rate": 1.9882788604258e-05,
446
- "loss": 2.5696,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
- "learning_rate": 1.987956184375402e-05,
452
- "loss": 2.5021,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
- "learning_rate": 1.9876291537891482e-05,
458
- "loss": 2.5644,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
- "learning_rate": 1.9872977701084645e-05,
464
- "loss": 2.5386,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
- "learning_rate": 1.9869620347939652e-05,
470
- "loss": 2.554,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
- "learning_rate": 1.9866219493254433e-05,
476
- "loss": 2.4798,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
- "learning_rate": 1.986277515201867e-05,
482
- "loss": 2.538,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
- "learning_rate": 1.9859287339413714e-05,
488
- "loss": 2.5041,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
- "learning_rate": 1.9855756070812514e-05,
494
- "loss": 2.7067,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
- "learning_rate": 1.9852181361779563e-05,
500
- "loss": 2.5054,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
- "learning_rate": 1.984856322807082e-05,
506
- "loss": 2.4815,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
- "learning_rate": 1.9844901685633648e-05,
512
- "loss": 2.5885,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
- "learning_rate": 1.9841196750606735e-05,
518
- "loss": 2.521,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
- "learning_rate": 1.9837448439320027e-05,
524
- "loss": 2.4937,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
- "learning_rate": 1.983365676829466e-05,
530
- "loss": 2.5877,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
- "learning_rate": 1.9829821754242885e-05,
536
- "loss": 2.5761,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
- "learning_rate": 1.9825943414067974e-05,
542
- "loss": 2.4917,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
- "learning_rate": 1.9822021764864194e-05,
548
- "loss": 2.5434,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
- "learning_rate": 1.9818056823916675e-05,
554
- "loss": 2.5906,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
- "learning_rate": 1.9814048608701374e-05,
560
- "loss": 2.5508,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
- "learning_rate": 1.980999713688499e-05,
566
- "loss": 2.6005,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
- "learning_rate": 1.980590242632486e-05,
572
- "loss": 2.523,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
- "learning_rate": 1.9801764495068923e-05,
578
- "loss": 2.5453,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
- "learning_rate": 1.979758336135561e-05,
584
- "loss": 2.6426,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
- "learning_rate": 1.9793359043613768e-05,
590
- "loss": 2.5454,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
- "learning_rate": 1.9789091560462587e-05,
596
- "loss": 2.6071,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
- "learning_rate": 1.9784780930711514e-05,
602
- "loss": 2.5913,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
- "learning_rate": 1.9780427173360165e-05,
608
- "loss": 2.5082,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
- "learning_rate": 1.977603030759825e-05,
614
- "loss": 2.565,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
- "learning_rate": 1.977159035280549e-05,
620
- "loss": 2.6365,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
- "learning_rate": 1.9767107328551515e-05,
626
- "loss": 2.6872,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
- "learning_rate": 1.9762581254595797e-05,
632
- "loss": 2.6222,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
- "learning_rate": 1.975801215088755e-05,
638
- "loss": 2.5633,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
- "learning_rate": 1.9753400037565653e-05,
644
- "loss": 2.6579,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
- "learning_rate": 1.9748744934958548e-05,
650
- "loss": 2.5572,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
- "learning_rate": 1.974404686358416e-05,
656
- "loss": 2.5845,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
- "learning_rate": 1.97393058441498e-05,
662
- "loss": 2.6484,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
- "learning_rate": 1.973452189755209e-05,
668
- "loss": 2.5473,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
- "learning_rate": 1.9729695044876847e-05,
674
- "loss": 2.5995,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
- "learning_rate": 1.9724825307399003e-05,
680
- "loss": 2.5473,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
- "learning_rate": 1.971991270658252e-05,
686
- "loss": 2.5798,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
- "learning_rate": 1.971495726408027e-05,
692
- "loss": 2.655,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
- "learning_rate": 1.970995900173397e-05,
698
- "loss": 2.6904,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
- "learning_rate": 1.9704917941574053e-05,
704
- "loss": 2.6973,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
- "learning_rate": 1.969983410581961e-05,
710
- "loss": 2.6518,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
- "learning_rate": 1.969470751687825e-05,
716
- "loss": 2.5849,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
- "learning_rate": 1.9689538197346035e-05,
722
- "loss": 2.4715,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
- "learning_rate": 1.9684326170007365e-05,
728
- "loss": 2.7246,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
- "learning_rate": 1.9679071457834874e-05,
734
- "loss": 2.5482,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
- "learning_rate": 1.967377408398934e-05,
740
- "loss": 2.6084,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
- "learning_rate": 1.966843407181958e-05,
746
- "loss": 2.5144,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
- "learning_rate": 1.9663051444862335e-05,
752
- "loss": 2.7663,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
- "learning_rate": 1.9657626226842187e-05,
758
- "loss": 2.5697,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
- "learning_rate": 1.9652158441671435e-05,
764
- "loss": 2.6379,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
- "learning_rate": 1.964664811345e-05,
770
- "loss": 2.6784,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
- "learning_rate": 1.964109526646532e-05,
776
- "loss": 2.6936,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
- "learning_rate": 1.963549992519223e-05,
782
- "loss": 2.5672,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
- "learning_rate": 1.962986211429288e-05,
788
- "loss": 2.62,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
- "learning_rate": 1.9624181858616593e-05,
794
- "loss": 2.7293,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
- "learning_rate": 1.9618459183199782e-05,
800
- "loss": 2.6636,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
- "learning_rate": 1.961269411326583e-05,
806
- "loss": 2.5106,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
- "learning_rate": 1.9606886674224977e-05,
812
- "loss": 2.6878,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
- "learning_rate": 1.960103689167421e-05,
818
- "loss": 2.693,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
- "learning_rate": 1.9595144791397142e-05,
824
- "loss": 2.6562,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
- "learning_rate": 1.9589210399363925e-05,
830
- "loss": 2.6269,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
- "learning_rate": 1.95832337417311e-05,
836
- "loss": 2.6459,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
- "learning_rate": 1.9577214844841515e-05,
842
- "loss": 2.5765,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
- "learning_rate": 1.957115373522417e-05,
848
- "loss": 2.6113,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
- "learning_rate": 1.956505043959414e-05,
854
- "loss": 2.6641,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
- "learning_rate": 1.955890498485244e-05,
860
- "loss": 2.6493,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
- "learning_rate": 1.9552717398085898e-05,
866
- "loss": 2.6135,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
- "learning_rate": 1.954648770656705e-05,
872
- "loss": 2.6182,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
- "learning_rate": 1.954021593775401e-05,
878
- "loss": 2.6487,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
- "learning_rate": 1.9533902119290352e-05,
884
- "loss": 2.5927,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
- "learning_rate": 1.952754627900499e-05,
890
- "loss": 2.5825,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
- "learning_rate": 1.9521148444912065e-05,
896
- "loss": 2.7193,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
- "learning_rate": 1.9514708645210793e-05,
902
- "loss": 2.5802,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
- "learning_rate": 1.9508226908285368e-05,
908
- "loss": 2.6628,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
- "learning_rate": 1.950170326270483e-05,
914
- "loss": 2.5847,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
- "learning_rate": 1.9495137737222925e-05,
920
- "loss": 2.6594,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
- "learning_rate": 1.9488530360778007e-05,
926
- "loss": 2.6096,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
- "learning_rate": 1.948188116249287e-05,
932
- "loss": 2.6378,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
- "learning_rate": 1.9475190171674675e-05,
938
- "loss": 2.5984,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
- "learning_rate": 1.9468457417814753e-05,
944
- "loss": 2.6437,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
- "learning_rate": 1.9461682930588534e-05,
950
- "loss": 2.6522,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
- "learning_rate": 1.9454866739855384e-05,
956
- "loss": 2.6242,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
- "learning_rate": 1.944800887565849e-05,
962
- "loss": 2.5961,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
- "learning_rate": 1.9441109368224704e-05,
968
- "loss": 2.6365,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
- "learning_rate": 1.9434168247964447e-05,
974
- "loss": 2.5674,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
- "learning_rate": 1.9427185545471537e-05,
980
- "loss": 2.6369,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
- "learning_rate": 1.9420161291523076e-05,
986
- "loss": 2.5763,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
- "learning_rate": 1.941309551707931e-05,
992
- "loss": 2.5651,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
- "learning_rate": 1.9405988253283492e-05,
998
- "loss": 2.6223,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
- "learning_rate": 1.939883953146174e-05,
1004
- "loss": 2.6616,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
- "learning_rate": 1.939164938312291e-05,
1010
- "loss": 2.6496,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
- "learning_rate": 1.9384417839958443e-05,
1016
- "loss": 2.7161,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
- "learning_rate": 1.937714493384224e-05,
1022
- "loss": 2.6047,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
- "learning_rate": 1.936983069683051e-05,
1028
- "loss": 2.5978,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
- "learning_rate": 1.936247516116163e-05,
1034
- "loss": 2.6331,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
- "learning_rate": 1.935507835925601e-05,
1040
- "loss": 2.5679,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
- "learning_rate": 1.934764032371595e-05,
1046
- "loss": 2.6647,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
- "learning_rate": 1.9340161087325483e-05,
1052
- "loss": 2.6142,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
- "learning_rate": 1.9332640683050243e-05,
1058
- "loss": 2.6181,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
- "learning_rate": 1.932507914403732e-05,
1064
- "loss": 2.6105,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
- "learning_rate": 1.9317476503615108e-05,
1070
- "loss": 2.6415,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
- "learning_rate": 1.9309832795293156e-05,
1076
- "loss": 2.6736,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
- "learning_rate": 1.930214805276204e-05,
1082
- "loss": 2.6507,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
- "learning_rate": 1.9294422309893177e-05,
1088
- "loss": 2.6771,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
- "learning_rate": 1.9286655600738707e-05,
1094
- "loss": 2.6763,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
- "learning_rate": 1.9278847959531348e-05,
1100
- "loss": 2.7135,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
- "learning_rate": 1.927099942068421e-05,
1106
- "loss": 2.6166,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
- "learning_rate": 1.9263110018790673e-05,
1112
- "loss": 2.6238,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
- "learning_rate": 1.9255179788624233e-05,
1118
- "loss": 2.6424,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
- "learning_rate": 1.9247208765138325e-05,
1124
- "loss": 2.6023,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
- "learning_rate": 1.9239196983466204e-05,
1130
- "loss": 2.58,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
- "learning_rate": 1.9231144478920756e-05,
1136
- "loss": 2.6173,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
- "learning_rate": 1.9223051286994368e-05,
1142
- "loss": 2.628,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
- "learning_rate": 1.9214917443358753e-05,
1148
- "loss": 2.6868,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
- "learning_rate": 1.9206742983864813e-05,
1154
- "loss": 2.6342,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
- "learning_rate": 1.9198527944542462e-05,
1160
- "loss": 2.5934,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
- "learning_rate": 1.919027236160047e-05,
1166
- "loss": 2.6354,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
- "learning_rate": 1.9181976271426315e-05,
1172
- "loss": 2.5955,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
- "learning_rate": 1.9173639710586015e-05,
1178
- "loss": 2.6134,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
- "learning_rate": 1.9165262715823966e-05,
1184
- "loss": 2.7,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
- "learning_rate": 1.915684532406278e-05,
1190
- "loss": 2.7197,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
- "learning_rate": 1.9148387572403123e-05,
1196
- "loss": 2.4881,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
- "learning_rate": 1.913988949812356e-05,
1202
- "loss": 2.6999,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
- "learning_rate": 1.9131351138680368e-05,
1208
- "loss": 2.5981,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
- "learning_rate": 1.9122772531707405e-05,
1214
- "loss": 2.5515,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
- "learning_rate": 1.9114153715015905e-05,
1220
- "loss": 2.623,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
- "learning_rate": 1.9105494726594344e-05,
1226
- "loss": 2.6006,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
- "learning_rate": 1.9096795604608258e-05,
1232
- "loss": 2.5929,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
- "learning_rate": 1.9088056387400074e-05,
1238
- "loss": 2.5396,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
- "learning_rate": 1.907927711348894e-05,
1244
- "loss": 2.6428,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
- "learning_rate": 1.9070457821570566e-05,
1250
- "loss": 2.6296,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
- "learning_rate": 1.9061598550517048e-05,
1256
- "loss": 2.534,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
- "learning_rate": 1.9052699339376685e-05,
1262
- "loss": 2.6049,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
- "learning_rate": 1.9043760227373817e-05,
1268
- "loss": 2.5776,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
- "learning_rate": 1.9034781253908664e-05,
1274
- "loss": 2.7483,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
- "learning_rate": 1.902576245855713e-05,
1280
- "loss": 2.6973,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
- "learning_rate": 1.9016703881070646e-05,
1286
- "loss": 2.645,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
- "learning_rate": 1.900760556137598e-05,
1292
- "loss": 2.5775,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
- "learning_rate": 1.899846753957507e-05,
1298
- "loss": 2.6872,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
- "learning_rate": 1.8989289855944846e-05,
1304
- "loss": 2.5929,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
- "learning_rate": 1.8980072550937058e-05,
1310
- "loss": 2.5668,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
- "learning_rate": 1.8970815665178086e-05,
1316
- "loss": 2.6873,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
- "learning_rate": 1.896151923946877e-05,
1322
- "loss": 2.5679,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
- "learning_rate": 1.8952183314784224e-05,
1328
- "loss": 2.6734,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
- "learning_rate": 1.8942807932273664e-05,
1334
- "loss": 2.6083,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
- "learning_rate": 1.893339313326021e-05,
1340
- "loss": 2.6189,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
- "learning_rate": 1.892393895924073e-05,
1346
- "loss": 2.5117,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
- "learning_rate": 1.8914445451885633e-05,
1352
- "loss": 2.6646,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
- "learning_rate": 1.890491265303869e-05,
1358
- "loss": 2.7551,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
- "learning_rate": 1.8895340604716867e-05,
1364
- "loss": 2.7003,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
- "learning_rate": 1.888572934911012e-05,
1370
- "loss": 2.6109,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
- "learning_rate": 1.8876078928581207e-05,
1376
- "loss": 2.6227,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
- "learning_rate": 1.8866389385665525e-05,
1382
- "loss": 2.5943,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
- "learning_rate": 1.8856660763070904e-05,
1388
- "loss": 2.7203,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
- "learning_rate": 1.8846893103677414e-05,
1394
- "loss": 2.561,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
- "learning_rate": 1.8837086450537195e-05,
1400
- "loss": 2.6959,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
- "learning_rate": 1.8827240846874245e-05,
1406
- "loss": 2.7047,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
- "learning_rate": 1.8817356336084252e-05,
1412
- "loss": 2.5853,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
- "learning_rate": 1.8807432961734388e-05,
1418
- "loss": 2.6165,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
- "learning_rate": 1.8797470767563122e-05,
1424
- "loss": 2.756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
- "learning_rate": 1.878746979748002e-05,
1430
- "loss": 2.667,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
- "learning_rate": 1.8777430095565563e-05,
1436
- "loss": 2.7607,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
- "learning_rate": 1.876735170607095e-05,
1442
- "loss": 2.6816,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
- "learning_rate": 1.8757234673417892e-05,
1448
- "loss": 2.688,
1449
  "step": 1200
1450
  }
1451
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 4.99999449047447e-05,
14
+ "loss": 2.7413,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
+ "learning_rate": 4.9999779619221645e-05,
20
+ "loss": 2.6684,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 4.999950414415935e-05,
26
+ "loss": 2.6511,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
+ "learning_rate": 4.999911848077199e-05,
32
+ "loss": 2.72,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
+ "learning_rate": 4.999862263075944e-05,
38
+ "loss": 2.7222,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 4.999801659630722e-05,
44
+ "loss": 2.7479,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
+ "learning_rate": 4.999730038008649e-05,
50
+ "loss": 2.6063,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
+ "learning_rate": 4.9996473985254055e-05,
56
+ "loss": 2.576,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
+ "learning_rate": 4.999553741545237e-05,
62
+ "loss": 2.579,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
+ "learning_rate": 4.999449067480945e-05,
68
+ "loss": 2.5114,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
+ "learning_rate": 4.9993333767938954e-05,
74
+ "loss": 2.6174,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
+ "learning_rate": 4.9992066699940085e-05,
80
+ "loss": 2.6581,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
+ "learning_rate": 4.9990689476397586e-05,
86
+ "loss": 2.586,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
+ "learning_rate": 4.998920210338175e-05,
92
+ "loss": 2.4642,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
+ "learning_rate": 4.9987604587448343e-05,
98
+ "loss": 2.6262,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
+ "learning_rate": 4.998589693563861e-05,
104
+ "loss": 2.5678,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
+ "learning_rate": 4.998407915547924e-05,
110
+ "loss": 2.5597,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
+ "learning_rate": 4.9982151254982304e-05,
116
+ "loss": 2.6578,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
+ "learning_rate": 4.998011324264527e-05,
122
+ "loss": 2.4825,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
+ "learning_rate": 4.997796512745091e-05,
128
+ "loss": 2.6197,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
+ "learning_rate": 4.997570691886732e-05,
134
+ "loss": 2.4921,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
+ "learning_rate": 4.99733386268478e-05,
140
+ "loss": 2.5126,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
+ "learning_rate": 4.99708602618309e-05,
146
+ "loss": 2.584,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
+ "learning_rate": 4.9968271834740305e-05,
152
+ "loss": 2.4991,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
+ "learning_rate": 4.996557335698482e-05,
158
+ "loss": 2.5242,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
+ "learning_rate": 4.996276484045832e-05,
164
+ "loss": 2.4506,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
+ "learning_rate": 4.995984629753967e-05,
170
+ "loss": 2.5479,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
+ "learning_rate": 4.9956817741092696e-05,
176
+ "loss": 2.5316,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
+ "learning_rate": 4.995367918446613e-05,
182
+ "loss": 2.6053,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
+ "learning_rate": 4.995043064149354e-05,
188
+ "loss": 2.4533,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
+ "learning_rate": 4.9947072126493276e-05,
194
+ "loss": 2.5279,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
+ "learning_rate": 4.994360365426839e-05,
200
+ "loss": 2.4994,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
+ "learning_rate": 4.994002524010659e-05,
206
+ "loss": 2.4675,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
+ "learning_rate": 4.9936336899780166e-05,
212
+ "loss": 2.5233,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
+ "learning_rate": 4.993253864954592e-05,
218
+ "loss": 2.4974,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
+ "learning_rate": 4.992863050614511e-05,
224
+ "loss": 2.6295,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
+ "learning_rate": 4.9924612486803346e-05,
230
+ "loss": 2.4744,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
+ "learning_rate": 4.992048460923052e-05,
236
+ "loss": 2.5248,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
+ "learning_rate": 4.991624689162076e-05,
242
+ "loss": 2.5727,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
+ "learning_rate": 4.991189935265231e-05,
248
+ "loss": 2.4845,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
+ "learning_rate": 4.990744201148748e-05,
254
+ "loss": 2.4544,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
+ "learning_rate": 4.990287488777253e-05,
260
+ "loss": 2.4383,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
+ "learning_rate": 4.989819800163761e-05,
266
+ "loss": 2.5263,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
+ "learning_rate": 4.989341137369666e-05,
272
+ "loss": 2.5219,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
+ "learning_rate": 4.9888515025047316e-05,
278
+ "loss": 2.4512,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
+ "learning_rate": 4.988350897727083e-05,
284
+ "loss": 2.491,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
+ "learning_rate": 4.9878393252431953e-05,
290
+ "loss": 2.5404,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
+ "learning_rate": 4.987316787307888e-05,
296
+ "loss": 2.4777,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
+ "learning_rate": 4.9867832862243055e-05,
302
+ "loss": 2.4498,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
+ "learning_rate": 4.986238824343923e-05,
308
+ "loss": 2.4174,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
+ "learning_rate": 4.985683404066519e-05,
314
+ "loss": 2.4589,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
+ "learning_rate": 4.985117027840175e-05,
320
+ "loss": 2.5029,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
+ "learning_rate": 4.984539698161264e-05,
326
+ "loss": 2.4376,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
+ "learning_rate": 4.983951417574436e-05,
332
+ "loss": 2.526,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
+ "learning_rate": 4.983352188672608e-05,
338
+ "loss": 2.5214,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
+ "learning_rate": 4.982742014096952e-05,
344
+ "loss": 2.5188,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
+ "learning_rate": 4.982120896536888e-05,
350
+ "loss": 2.4175,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
+ "learning_rate": 4.981488838730066e-05,
356
+ "loss": 2.5346,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
+ "learning_rate": 4.980845843462357e-05,
362
+ "loss": 2.496,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
+ "learning_rate": 4.98019191356784e-05,
368
+ "loss": 2.477,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
+ "learning_rate": 4.9795270519287886e-05,
374
+ "loss": 2.4205,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
+ "learning_rate": 4.9788512614756624e-05,
380
+ "loss": 2.4621,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
+ "learning_rate": 4.9781645451870875e-05,
386
+ "loss": 2.5098,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
+ "learning_rate": 4.9774669060898496e-05,
392
+ "loss": 2.4847,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
+ "learning_rate": 4.976758347258877e-05,
398
+ "loss": 2.4873,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
+ "learning_rate": 4.976038871817228e-05,
404
+ "loss": 2.4462,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
+ "learning_rate": 4.9753084829360776e-05,
410
+ "loss": 2.4994,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
+ "learning_rate": 4.974567183834702e-05,
416
+ "loss": 2.4994,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
+ "learning_rate": 4.9738149777804665e-05,
422
+ "loss": 2.4584,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
+ "learning_rate": 4.97305186808881e-05,
428
+ "loss": 2.4294,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
+ "learning_rate": 4.9722778581232305e-05,
434
+ "loss": 2.4499,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
+ "learning_rate": 4.9714929512952704e-05,
440
+ "loss": 2.5851,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
+ "learning_rate": 4.9706971510645e-05,
446
+ "loss": 2.4746,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
+ "learning_rate": 4.969890460938505e-05,
452
+ "loss": 2.4184,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
+ "learning_rate": 4.9690728844728704e-05,
458
+ "loss": 2.4716,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
+ "learning_rate": 4.968244425271161e-05,
464
+ "loss": 2.4477,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
+ "learning_rate": 4.9674050869849124e-05,
470
+ "loss": 2.4563,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
+ "learning_rate": 4.966554873313608e-05,
476
+ "loss": 2.3708,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
+ "learning_rate": 4.9656937880046676e-05,
482
+ "loss": 2.4351,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
+ "learning_rate": 4.9648218348534284e-05,
488
+ "loss": 2.3933,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
+ "learning_rate": 4.963939017703128e-05,
494
+ "loss": 2.5871,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
+ "learning_rate": 4.9630453404448905e-05,
500
+ "loss": 2.3965,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
+ "learning_rate": 4.962140807017705e-05,
506
+ "loss": 2.372,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
+ "learning_rate": 4.961225421408412e-05,
512
+ "loss": 2.4847,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
+ "learning_rate": 4.960299187651684e-05,
518
+ "loss": 2.4046,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
+ "learning_rate": 4.959362109830007e-05,
524
+ "loss": 2.3912,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
+ "learning_rate": 4.9584141920736656e-05,
530
+ "loss": 2.4732,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
+ "learning_rate": 4.957455438560721e-05,
536
+ "loss": 2.4854,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
+ "learning_rate": 4.956485853516993e-05,
542
+ "loss": 2.3736,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
+ "learning_rate": 4.9555054412160476e-05,
548
+ "loss": 2.4222,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
+ "learning_rate": 4.9545142059791686e-05,
554
+ "loss": 2.4498,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
+ "learning_rate": 4.9535121521753434e-05,
560
+ "loss": 2.4172,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
+ "learning_rate": 4.952499284221247e-05,
566
+ "loss": 2.4742,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
+ "learning_rate": 4.951475606581215e-05,
572
+ "loss": 2.4008,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
+ "learning_rate": 4.950441123767231e-05,
578
+ "loss": 2.4278,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
+ "learning_rate": 4.949395840338903e-05,
584
+ "loss": 2.5041,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
+ "learning_rate": 4.948339760903442e-05,
590
+ "loss": 2.4088,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
+ "learning_rate": 4.947272890115647e-05,
596
+ "loss": 2.466,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
+ "learning_rate": 4.946195232677878e-05,
602
+ "loss": 2.4404,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
+ "learning_rate": 4.9451067933400406e-05,
608
+ "loss": 2.3658,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
+ "learning_rate": 4.9440075768995625e-05,
614
+ "loss": 2.4247,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
+ "learning_rate": 4.942897588201372e-05,
620
+ "loss": 2.5082,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
+ "learning_rate": 4.941776832137879e-05,
626
+ "loss": 2.5545,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
+ "learning_rate": 4.940645313648949e-05,
632
+ "loss": 2.4665,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
+ "learning_rate": 4.939503037721888e-05,
638
+ "loss": 2.4195,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
+ "learning_rate": 4.938350009391413e-05,
644
+ "loss": 2.4908,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
+ "learning_rate": 4.937186233739637e-05,
650
+ "loss": 2.4093,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
+ "learning_rate": 4.93601171589604e-05,
656
+ "loss": 2.44,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
+ "learning_rate": 4.9348264610374494e-05,
662
+ "loss": 2.4989,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
+ "learning_rate": 4.9336304743880226e-05,
668
+ "loss": 2.3786,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
+ "learning_rate": 4.932423761219211e-05,
674
+ "loss": 2.4305,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
+ "learning_rate": 4.931206326849751e-05,
680
+ "loss": 2.4012,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
+ "learning_rate": 4.92997817664563e-05,
686
+ "loss": 2.4263,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
+ "learning_rate": 4.9287393160200676e-05,
692
+ "loss": 2.4749,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
+ "learning_rate": 4.927489750433492e-05,
698
+ "loss": 2.5083,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
+ "learning_rate": 4.926229485393513e-05,
704
+ "loss": 2.5228,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
+ "learning_rate": 4.924958526454902e-05,
710
+ "loss": 2.4623,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
+ "learning_rate": 4.923676879219562e-05,
716
+ "loss": 2.4199,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
+ "learning_rate": 4.9223845493365085e-05,
722
+ "loss": 2.2906,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
+ "learning_rate": 4.9210815425018406e-05,
728
+ "loss": 2.5539,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
+ "learning_rate": 4.919767864458718e-05,
734
+ "loss": 2.3456,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
+ "learning_rate": 4.9184435209973354e-05,
740
+ "loss": 2.4328,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
+ "learning_rate": 4.917108517954895e-05,
746
+ "loss": 2.3412,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
+ "learning_rate": 4.9157628612155836e-05,
752
+ "loss": 2.604,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
+ "learning_rate": 4.9144065567105465e-05,
758
+ "loss": 2.4007,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
+ "learning_rate": 4.913039610417859e-05,
764
+ "loss": 2.437,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
+ "learning_rate": 4.9116620283624996e-05,
770
+ "loss": 2.5004,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
+ "learning_rate": 4.91027381661633e-05,
776
+ "loss": 2.5045,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
+ "learning_rate": 4.908874981298057e-05,
782
+ "loss": 2.3794,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
+ "learning_rate": 4.9074655285732196e-05,
788
+ "loss": 2.4621,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
+ "learning_rate": 4.906045464654148e-05,
794
+ "loss": 2.549,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
+ "learning_rate": 4.9046147957999454e-05,
800
+ "loss": 2.4567,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
+ "learning_rate": 4.9031735283164574e-05,
806
+ "loss": 2.3243,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
+ "learning_rate": 4.901721668556244e-05,
812
+ "loss": 2.4758,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
+ "learning_rate": 4.9002592229185515e-05,
818
+ "loss": 2.5083,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
+ "learning_rate": 4.898786197849285e-05,
824
+ "loss": 2.4428,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
+ "learning_rate": 4.897302599840981e-05,
830
+ "loss": 2.4403,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
+ "learning_rate": 4.895808435432776e-05,
836
+ "loss": 2.4401,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
+ "learning_rate": 4.8943037112103784e-05,
842
+ "loss": 2.3789,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
+ "learning_rate": 4.892788433806042e-05,
848
+ "loss": 2.3945,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
+ "learning_rate": 4.8912626098985355e-05,
854
+ "loss": 2.4758,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
+ "learning_rate": 4.88972624621311e-05,
860
+ "loss": 2.4646,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
+ "learning_rate": 4.888179349521475e-05,
866
+ "loss": 2.4225,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
+ "learning_rate": 4.886621926641762e-05,
872
+ "loss": 2.4074,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
+ "learning_rate": 4.8850539844385017e-05,
878
+ "loss": 2.4438,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
+ "learning_rate": 4.883475529822587e-05,
884
+ "loss": 2.3666,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
+ "learning_rate": 4.881886569751248e-05,
890
+ "loss": 2.3782,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
+ "learning_rate": 4.880287111228016e-05,
896
+ "loss": 2.4916,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
+ "learning_rate": 4.878677161302698e-05,
902
+ "loss": 2.3639,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
+ "learning_rate": 4.877056727071342e-05,
908
+ "loss": 2.4333,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
+ "learning_rate": 4.8754258156762075e-05,
914
+ "loss": 2.3907,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
+ "learning_rate": 4.8737844343057315e-05,
920
+ "loss": 2.433,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
+ "learning_rate": 4.8721325901945015e-05,
926
+ "loss": 2.3849,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
+ "learning_rate": 4.870470290623218e-05,
932
+ "loss": 2.419,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
+ "learning_rate": 4.8687975429186685e-05,
938
+ "loss": 2.3694,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
+ "learning_rate": 4.867114354453688e-05,
944
+ "loss": 2.4335,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
+ "learning_rate": 4.865420732647133e-05,
950
+ "loss": 2.4168,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
+ "learning_rate": 4.863716684963846e-05,
956
+ "loss": 2.419,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
+ "learning_rate": 4.862002218914622e-05,
962
+ "loss": 2.335,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
+ "learning_rate": 4.860277342056176e-05,
968
+ "loss": 2.4059,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
+ "learning_rate": 4.858542061991112e-05,
974
+ "loss": 2.3423,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
+ "learning_rate": 4.856796386367884e-05,
980
+ "loss": 2.4135,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
+ "learning_rate": 4.855040322880769e-05,
986
+ "loss": 2.3652,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
+ "learning_rate": 4.853273879269827e-05,
992
+ "loss": 2.3511,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
+ "learning_rate": 4.8514970633208726e-05,
998
+ "loss": 2.413,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
+ "learning_rate": 4.849709882865435e-05,
1004
+ "loss": 2.4445,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
+ "learning_rate": 4.847912345780727e-05,
1010
+ "loss": 2.4221,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
+ "learning_rate": 4.846104459989611e-05,
1016
+ "loss": 2.4992,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
+ "learning_rate": 4.84428623346056e-05,
1022
+ "loss": 2.3923,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
+ "learning_rate": 4.842457674207627e-05,
1028
+ "loss": 2.3666,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
+ "learning_rate": 4.8406187902904076e-05,
1034
+ "loss": 2.4329,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
+ "learning_rate": 4.8387695898140026e-05,
1040
+ "loss": 2.3429,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
+ "learning_rate": 4.836910080928987e-05,
1046
+ "loss": 2.4428,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
+ "learning_rate": 4.83504027183137e-05,
1052
+ "loss": 2.3842,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
+ "learning_rate": 4.833160170762561e-05,
1058
+ "loss": 2.4064,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
+ "learning_rate": 4.8312697860093295e-05,
1064
+ "loss": 2.3705,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
+ "learning_rate": 4.829369125903776e-05,
1070
+ "loss": 2.4151,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
+ "learning_rate": 4.8274581988232894e-05,
1076
+ "loss": 2.4219,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
+ "learning_rate": 4.825537013190509e-05,
1082
+ "loss": 2.4042,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
+ "learning_rate": 4.823605577473293e-05,
1088
+ "loss": 2.4509,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
+ "learning_rate": 4.8216639001846764e-05,
1094
+ "loss": 2.4255,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
+ "learning_rate": 4.8197119898828367e-05,
1100
+ "loss": 2.4928,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
+ "learning_rate": 4.817749855171052e-05,
1106
+ "loss": 2.4044,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
+ "learning_rate": 4.8157775046976684e-05,
1112
+ "loss": 2.4085,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
+ "learning_rate": 4.813794947156058e-05,
1118
+ "loss": 2.3941,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
+ "learning_rate": 4.8118021912845815e-05,
1124
+ "loss": 2.3493,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
+ "learning_rate": 4.8097992458665506e-05,
1130
+ "loss": 2.3367,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
+ "learning_rate": 4.807786119730189e-05,
1136
+ "loss": 2.3865,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
+ "learning_rate": 4.8057628217485916e-05,
1142
+ "loss": 2.3885,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
+ "learning_rate": 4.803729360839688e-05,
1148
+ "loss": 2.4296,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
+ "learning_rate": 4.801685745966203e-05,
1154
+ "loss": 2.3856,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
+ "learning_rate": 4.799631986135615e-05,
1160
+ "loss": 2.3631,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
+ "learning_rate": 4.797568090400117e-05,
1166
+ "loss": 2.3882,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
+ "learning_rate": 4.7954940678565785e-05,
1172
+ "loss": 2.3396,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
+ "learning_rate": 4.793409927646504e-05,
1178
+ "loss": 2.3548,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
+ "learning_rate": 4.791315678955991e-05,
1184
+ "loss": 2.4576,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
+ "learning_rate": 4.789211331015695e-05,
1190
+ "loss": 2.4523,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
+ "learning_rate": 4.787096893100781e-05,
1196
+ "loss": 2.2542,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
+ "learning_rate": 4.784972374530889e-05,
1202
+ "loss": 2.4385,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
+ "learning_rate": 4.7828377846700925e-05,
1208
+ "loss": 2.3266,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
+ "learning_rate": 4.780693132926851e-05,
1214
+ "loss": 2.3301,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
+ "learning_rate": 4.7785384287539755e-05,
1220
+ "loss": 2.3554,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
+ "learning_rate": 4.776373681648586e-05,
1226
+ "loss": 2.3838,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
+ "learning_rate": 4.7741989011520645e-05,
1232
+ "loss": 2.3361,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
+ "learning_rate": 4.772014096850018e-05,
1238
+ "loss": 2.2864,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
+ "learning_rate": 4.769819278372235e-05,
1244
+ "loss": 2.3955,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
+ "learning_rate": 4.7676144553926414e-05,
1250
+ "loss": 2.3802,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
+ "learning_rate": 4.765399637629262e-05,
1256
+ "loss": 2.2856,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
+ "learning_rate": 4.7631748348441705e-05,
1262
+ "loss": 2.3698,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
+ "learning_rate": 4.760940056843454e-05,
1268
+ "loss": 2.3305,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
+ "learning_rate": 4.758695313477166e-05,
1274
+ "loss": 2.4944,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
+ "learning_rate": 4.756440614639283e-05,
1280
+ "loss": 2.4622,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
+ "learning_rate": 4.754175970267661e-05,
1286
+ "loss": 2.3995,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
+ "learning_rate": 4.751901390343995e-05,
1292
+ "loss": 2.343,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
+ "learning_rate": 4.7496168848937674e-05,
1298
+ "loss": 2.4388,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
+ "learning_rate": 4.7473224639862116e-05,
1304
+ "loss": 2.3694,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
+ "learning_rate": 4.745018137734264e-05,
1310
+ "loss": 2.3485,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
+ "learning_rate": 4.742703916294521e-05,
1316
+ "loss": 2.46,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
+ "learning_rate": 4.740379809867193e-05,
1322
+ "loss": 2.3138,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
+ "learning_rate": 4.738045828696056e-05,
1328
+ "loss": 2.4117,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
+ "learning_rate": 4.735701983068416e-05,
1334
+ "loss": 2.3506,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
+ "learning_rate": 4.7333482833150525e-05,
1340
+ "loss": 2.3711,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
+ "learning_rate": 4.730984739810183e-05,
1346
+ "loss": 2.2639,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
+ "learning_rate": 4.728611362971408e-05,
1352
+ "loss": 2.3749,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
+ "learning_rate": 4.726228163259673e-05,
1358
+ "loss": 2.4556,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
+ "learning_rate": 4.7238351511792165e-05,
1364
+ "loss": 2.4233,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
+ "learning_rate": 4.721432337277529e-05,
1370
+ "loss": 2.3544,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
+ "learning_rate": 4.7190197321453014e-05,
1376
+ "loss": 2.3654,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
+ "learning_rate": 4.716597346416382e-05,
1382
+ "loss": 2.3356,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
+ "learning_rate": 4.7141651907677256e-05,
1388
+ "loss": 2.4522,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
+ "learning_rate": 4.7117232759193534e-05,
1394
+ "loss": 2.3104,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
+ "learning_rate": 4.709271612634298e-05,
1400
+ "loss": 2.4492,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
+ "learning_rate": 4.706810211718561e-05,
1406
+ "loss": 2.4437,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
+ "learning_rate": 4.7043390840210636e-05,
1412
+ "loss": 2.3099,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
+ "learning_rate": 4.701858240433597e-05,
1418
+ "loss": 2.351,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
+ "learning_rate": 4.6993676918907804e-05,
1424
+ "loss": 2.4756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
+ "learning_rate": 4.696867449370005e-05,
1430
+ "loss": 2.418,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
+ "learning_rate": 4.6943575238913904e-05,
1436
+ "loss": 2.5026,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
+ "learning_rate": 4.691837926517737e-05,
1442
+ "loss": 2.4096,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
+ "learning_rate": 4.689308668354473e-05,
1448
+ "loss": 2.4101,
1449
  "step": 1200
1450
  }
1451
  ],
checkpoint-1200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c481a87016b183d9406558b8812ee05f00d60a3c721055833e3cdda34cf9bb26
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b42f69ee6ce9030b2875b2859f3499c1e1c8f11e9ffa417d0dbdab2b51173da
3
  size 4920
checkpoint-1300/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93a743276446513df34090043098faa6f2f54319ce812ffbbbc7177e808b4acd
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d44d71942097cd68479a1b530cc3db7a8499c52f2dd5c95445e4bdb7ca0407
3
  size 3158328
checkpoint-1300/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70e7e038d205d8c80caa36a067fc3041cb39465a739e25d3e69c04fe67bac194
3
  size 6372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44a97830c602709fef51164112c64ad71692c8d62e24b60f9dcda8e8261e35a7
3
  size 6372346
checkpoint-1300/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:526966a5580ef6ae271e1283ca1d8eb49043b3517fb3fcbb8b62182c75c6cb50
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a767883b2aece252f63c5fedc1f692c331a88492c8d948c62f706948632a9121
3
  size 14244
checkpoint-1300/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c335008bdba91f2c973d08b5b0ffe05abd80eb22f7ffe115b89e882de54cae68
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70617719e24dbddbd2b2937f8a6cdb77a5ab4eba865b80796c824be2b5d8307d
3
  size 1064
checkpoint-1300/trainer_state.json CHANGED
@@ -10,1562 +10,1562 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.999997796189788e-05,
14
- "loss": 2.7465,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
- "learning_rate": 1.9999911847688657e-05,
20
- "loss": 2.6877,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 1.999980165766374e-05,
26
- "loss": 2.6848,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
- "learning_rate": 1.9999647392308798e-05,
32
- "loss": 2.7677,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
- "learning_rate": 1.9999449052303777e-05,
38
- "loss": 2.7784,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
- "learning_rate": 1.9999206638522888e-05,
44
- "loss": 2.8161,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
- "learning_rate": 1.9998920152034595e-05,
50
- "loss": 2.6752,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
- "learning_rate": 1.9998589594101623e-05,
56
- "loss": 2.6403,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
- "learning_rate": 1.9998214966180948e-05,
62
- "loss": 2.6433,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
- "learning_rate": 1.999779626992378e-05,
68
- "loss": 2.5787,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
- "learning_rate": 1.9997333507175583e-05,
74
- "loss": 2.6812,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
- "learning_rate": 1.9996826679976033e-05,
80
- "loss": 2.7287,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
- "learning_rate": 1.9996275790559037e-05,
86
- "loss": 2.6465,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
- "learning_rate": 1.99956808413527e-05,
92
- "loss": 2.5268,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
- "learning_rate": 1.999504183497934e-05,
98
- "loss": 2.6893,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
- "learning_rate": 1.9994358774255444e-05,
104
- "loss": 2.6274,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
- "learning_rate": 1.9993631662191696e-05,
110
- "loss": 2.6232,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
- "learning_rate": 1.9992860501992924e-05,
116
- "loss": 2.7188,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
- "learning_rate": 1.9992045297058108e-05,
122
- "loss": 2.5388,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
- "learning_rate": 1.9991186050980366e-05,
128
- "loss": 2.6793,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
- "learning_rate": 1.9990282767546926e-05,
134
- "loss": 2.5523,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
- "learning_rate": 1.998933545073912e-05,
140
- "loss": 2.5763,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
- "learning_rate": 1.998834410473236e-05,
146
- "loss": 2.6447,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
- "learning_rate": 1.998730873389612e-05,
152
- "loss": 2.5579,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
- "learning_rate": 1.998622934279393e-05,
158
- "loss": 2.5884,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
- "learning_rate": 1.9985105936183327e-05,
164
- "loss": 2.5051,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
- "learning_rate": 1.9983938519015868e-05,
170
- "loss": 2.6014,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
- "learning_rate": 1.998272709643708e-05,
176
- "loss": 2.5878,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
- "learning_rate": 1.998147167378645e-05,
182
- "loss": 2.6642,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
- "learning_rate": 1.998017225659742e-05,
188
- "loss": 2.5077,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
- "learning_rate": 1.9978828850597312e-05,
194
- "loss": 2.5921,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
- "learning_rate": 1.9977441461707358e-05,
200
- "loss": 2.5577,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
- "learning_rate": 1.9976010096042634e-05,
206
- "loss": 2.524,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
- "learning_rate": 1.9974534759912068e-05,
212
- "loss": 2.5708,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
- "learning_rate": 1.997301545981837e-05,
218
- "loss": 2.5578,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
- "learning_rate": 1.9971452202458048e-05,
224
- "loss": 2.6874,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
- "learning_rate": 1.9969844994721338e-05,
230
- "loss": 2.535,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
- "learning_rate": 1.996819384369221e-05,
236
- "loss": 2.5816,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
- "learning_rate": 1.9966498756648305e-05,
242
- "loss": 2.6225,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
- "learning_rate": 1.9964759741060926e-05,
248
- "loss": 2.5387,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
- "learning_rate": 1.9962976804594993e-05,
254
- "loss": 2.524,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
- "learning_rate": 1.996114995510901e-05,
260
- "loss": 2.488,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
- "learning_rate": 1.9959279200655044e-05,
266
- "loss": 2.5824,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
- "learning_rate": 1.9957364549478663e-05,
272
- "loss": 2.5828,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
- "learning_rate": 1.9955406010018928e-05,
278
- "loss": 2.5137,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
- "learning_rate": 1.9953403590908334e-05,
284
- "loss": 2.5539,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
- "learning_rate": 1.995135730097278e-05,
290
- "loss": 2.6099,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
- "learning_rate": 1.994926714923155e-05,
296
- "loss": 2.5309,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
- "learning_rate": 1.9947133144897225e-05,
302
- "loss": 2.5152,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
- "learning_rate": 1.9944955297375693e-05,
308
- "loss": 2.4738,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
- "learning_rate": 1.9942733616266076e-05,
314
- "loss": 2.5173,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
- "learning_rate": 1.99404681113607e-05,
320
- "loss": 2.578,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
- "learning_rate": 1.993815879264506e-05,
326
- "loss": 2.5089,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
- "learning_rate": 1.9935805670297744e-05,
332
- "loss": 2.5872,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
- "learning_rate": 1.993340875469043e-05,
338
- "loss": 2.5882,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
- "learning_rate": 1.993096805638781e-05,
344
- "loss": 2.5789,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
- "learning_rate": 1.9928483586147553e-05,
350
- "loss": 2.488,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
- "learning_rate": 1.9925955354920265e-05,
356
- "loss": 2.617,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
- "learning_rate": 1.992338337384943e-05,
362
- "loss": 2.5752,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
- "learning_rate": 1.992076765427136e-05,
368
- "loss": 2.5529,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
- "learning_rate": 1.9918108207715156e-05,
374
- "loss": 2.4942,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
- "learning_rate": 1.991540504590265e-05,
380
- "loss": 2.5456,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
- "learning_rate": 1.991265818074835e-05,
386
- "loss": 2.5957,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
- "learning_rate": 1.99098676243594e-05,
392
- "loss": 2.5699,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
- "learning_rate": 1.9907033389035512e-05,
398
- "loss": 2.5544,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
- "learning_rate": 1.9904155487268912e-05,
404
- "loss": 2.538,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
- "learning_rate": 1.990123393174431e-05,
410
- "loss": 2.6055,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
- "learning_rate": 1.9898268735338807e-05,
416
- "loss": 2.5846,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
- "learning_rate": 1.9895259911121866e-05,
422
- "loss": 2.5405,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
- "learning_rate": 1.9892207472355243e-05,
428
- "loss": 2.5162,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
- "learning_rate": 1.988911143249292e-05,
434
- "loss": 2.5484,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
- "learning_rate": 1.9885971805181083e-05,
440
- "loss": 2.671,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
- "learning_rate": 1.9882788604258e-05,
446
- "loss": 2.5696,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
- "learning_rate": 1.987956184375402e-05,
452
- "loss": 2.5021,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
- "learning_rate": 1.9876291537891482e-05,
458
- "loss": 2.5644,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
- "learning_rate": 1.9872977701084645e-05,
464
- "loss": 2.5386,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
- "learning_rate": 1.9869620347939652e-05,
470
- "loss": 2.554,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
- "learning_rate": 1.9866219493254433e-05,
476
- "loss": 2.4798,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
- "learning_rate": 1.986277515201867e-05,
482
- "loss": 2.538,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
- "learning_rate": 1.9859287339413714e-05,
488
- "loss": 2.5041,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
- "learning_rate": 1.9855756070812514e-05,
494
- "loss": 2.7067,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
- "learning_rate": 1.9852181361779563e-05,
500
- "loss": 2.5054,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
- "learning_rate": 1.984856322807082e-05,
506
- "loss": 2.4815,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
- "learning_rate": 1.9844901685633648e-05,
512
- "loss": 2.5885,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
- "learning_rate": 1.9841196750606735e-05,
518
- "loss": 2.521,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
- "learning_rate": 1.9837448439320027e-05,
524
- "loss": 2.4937,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
- "learning_rate": 1.983365676829466e-05,
530
- "loss": 2.5877,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
- "learning_rate": 1.9829821754242885e-05,
536
- "loss": 2.5761,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
- "learning_rate": 1.9825943414067974e-05,
542
- "loss": 2.4917,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
- "learning_rate": 1.9822021764864194e-05,
548
- "loss": 2.5434,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
- "learning_rate": 1.9818056823916675e-05,
554
- "loss": 2.5906,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
- "learning_rate": 1.9814048608701374e-05,
560
- "loss": 2.5508,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
- "learning_rate": 1.980999713688499e-05,
566
- "loss": 2.6005,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
- "learning_rate": 1.980590242632486e-05,
572
- "loss": 2.523,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
- "learning_rate": 1.9801764495068923e-05,
578
- "loss": 2.5453,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
- "learning_rate": 1.979758336135561e-05,
584
- "loss": 2.6426,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
- "learning_rate": 1.9793359043613768e-05,
590
- "loss": 2.5454,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
- "learning_rate": 1.9789091560462587e-05,
596
- "loss": 2.6071,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
- "learning_rate": 1.9784780930711514e-05,
602
- "loss": 2.5913,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
- "learning_rate": 1.9780427173360165e-05,
608
- "loss": 2.5082,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
- "learning_rate": 1.977603030759825e-05,
614
- "loss": 2.565,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
- "learning_rate": 1.977159035280549e-05,
620
- "loss": 2.6365,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
- "learning_rate": 1.9767107328551515e-05,
626
- "loss": 2.6872,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
- "learning_rate": 1.9762581254595797e-05,
632
- "loss": 2.6222,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
- "learning_rate": 1.975801215088755e-05,
638
- "loss": 2.5633,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
- "learning_rate": 1.9753400037565653e-05,
644
- "loss": 2.6579,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
- "learning_rate": 1.9748744934958548e-05,
650
- "loss": 2.5572,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
- "learning_rate": 1.974404686358416e-05,
656
- "loss": 2.5845,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
- "learning_rate": 1.97393058441498e-05,
662
- "loss": 2.6484,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
- "learning_rate": 1.973452189755209e-05,
668
- "loss": 2.5473,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
- "learning_rate": 1.9729695044876847e-05,
674
- "loss": 2.5995,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
- "learning_rate": 1.9724825307399003e-05,
680
- "loss": 2.5473,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
- "learning_rate": 1.971991270658252e-05,
686
- "loss": 2.5798,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
- "learning_rate": 1.971495726408027e-05,
692
- "loss": 2.655,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
- "learning_rate": 1.970995900173397e-05,
698
- "loss": 2.6904,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
- "learning_rate": 1.9704917941574053e-05,
704
- "loss": 2.6973,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
- "learning_rate": 1.969983410581961e-05,
710
- "loss": 2.6518,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
- "learning_rate": 1.969470751687825e-05,
716
- "loss": 2.5849,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
- "learning_rate": 1.9689538197346035e-05,
722
- "loss": 2.4715,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
- "learning_rate": 1.9684326170007365e-05,
728
- "loss": 2.7246,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
- "learning_rate": 1.9679071457834874e-05,
734
- "loss": 2.5482,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
- "learning_rate": 1.967377408398934e-05,
740
- "loss": 2.6084,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
- "learning_rate": 1.966843407181958e-05,
746
- "loss": 2.5144,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
- "learning_rate": 1.9663051444862335e-05,
752
- "loss": 2.7663,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
- "learning_rate": 1.9657626226842187e-05,
758
- "loss": 2.5697,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
- "learning_rate": 1.9652158441671435e-05,
764
- "loss": 2.6379,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
- "learning_rate": 1.964664811345e-05,
770
- "loss": 2.6784,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
- "learning_rate": 1.964109526646532e-05,
776
- "loss": 2.6936,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
- "learning_rate": 1.963549992519223e-05,
782
- "loss": 2.5672,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
- "learning_rate": 1.962986211429288e-05,
788
- "loss": 2.62,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
- "learning_rate": 1.9624181858616593e-05,
794
- "loss": 2.7293,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
- "learning_rate": 1.9618459183199782e-05,
800
- "loss": 2.6636,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
- "learning_rate": 1.961269411326583e-05,
806
- "loss": 2.5106,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
- "learning_rate": 1.9606886674224977e-05,
812
- "loss": 2.6878,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
- "learning_rate": 1.960103689167421e-05,
818
- "loss": 2.693,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
- "learning_rate": 1.9595144791397142e-05,
824
- "loss": 2.6562,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
- "learning_rate": 1.9589210399363925e-05,
830
- "loss": 2.6269,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
- "learning_rate": 1.95832337417311e-05,
836
- "loss": 2.6459,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
- "learning_rate": 1.9577214844841515e-05,
842
- "loss": 2.5765,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
- "learning_rate": 1.957115373522417e-05,
848
- "loss": 2.6113,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
- "learning_rate": 1.956505043959414e-05,
854
- "loss": 2.6641,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
- "learning_rate": 1.955890498485244e-05,
860
- "loss": 2.6493,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
- "learning_rate": 1.9552717398085898e-05,
866
- "loss": 2.6135,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
- "learning_rate": 1.954648770656705e-05,
872
- "loss": 2.6182,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
- "learning_rate": 1.954021593775401e-05,
878
- "loss": 2.6487,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
- "learning_rate": 1.9533902119290352e-05,
884
- "loss": 2.5927,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
- "learning_rate": 1.952754627900499e-05,
890
- "loss": 2.5825,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
- "learning_rate": 1.9521148444912065e-05,
896
- "loss": 2.7193,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
- "learning_rate": 1.9514708645210793e-05,
902
- "loss": 2.5802,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
- "learning_rate": 1.9508226908285368e-05,
908
- "loss": 2.6628,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
- "learning_rate": 1.950170326270483e-05,
914
- "loss": 2.5847,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
- "learning_rate": 1.9495137737222925e-05,
920
- "loss": 2.6594,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
- "learning_rate": 1.9488530360778007e-05,
926
- "loss": 2.6096,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
- "learning_rate": 1.948188116249287e-05,
932
- "loss": 2.6378,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
- "learning_rate": 1.9475190171674675e-05,
938
- "loss": 2.5984,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
- "learning_rate": 1.9468457417814753e-05,
944
- "loss": 2.6437,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
- "learning_rate": 1.9461682930588534e-05,
950
- "loss": 2.6522,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
- "learning_rate": 1.9454866739855384e-05,
956
- "loss": 2.6242,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
- "learning_rate": 1.944800887565849e-05,
962
- "loss": 2.5961,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
- "learning_rate": 1.9441109368224704e-05,
968
- "loss": 2.6365,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
- "learning_rate": 1.9434168247964447e-05,
974
- "loss": 2.5674,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
- "learning_rate": 1.9427185545471537e-05,
980
- "loss": 2.6369,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
- "learning_rate": 1.9420161291523076e-05,
986
- "loss": 2.5763,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
- "learning_rate": 1.941309551707931e-05,
992
- "loss": 2.5651,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
- "learning_rate": 1.9405988253283492e-05,
998
- "loss": 2.6223,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
- "learning_rate": 1.939883953146174e-05,
1004
- "loss": 2.6616,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
- "learning_rate": 1.939164938312291e-05,
1010
- "loss": 2.6496,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
- "learning_rate": 1.9384417839958443e-05,
1016
- "loss": 2.7161,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
- "learning_rate": 1.937714493384224e-05,
1022
- "loss": 2.6047,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
- "learning_rate": 1.936983069683051e-05,
1028
- "loss": 2.5978,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
- "learning_rate": 1.936247516116163e-05,
1034
- "loss": 2.6331,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
- "learning_rate": 1.935507835925601e-05,
1040
- "loss": 2.5679,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
- "learning_rate": 1.934764032371595e-05,
1046
- "loss": 2.6647,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
- "learning_rate": 1.9340161087325483e-05,
1052
- "loss": 2.6142,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
- "learning_rate": 1.9332640683050243e-05,
1058
- "loss": 2.6181,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
- "learning_rate": 1.932507914403732e-05,
1064
- "loss": 2.6105,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
- "learning_rate": 1.9317476503615108e-05,
1070
- "loss": 2.6415,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
- "learning_rate": 1.9309832795293156e-05,
1076
- "loss": 2.6736,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
- "learning_rate": 1.930214805276204e-05,
1082
- "loss": 2.6507,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
- "learning_rate": 1.9294422309893177e-05,
1088
- "loss": 2.6771,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
- "learning_rate": 1.9286655600738707e-05,
1094
- "loss": 2.6763,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
- "learning_rate": 1.9278847959531348e-05,
1100
- "loss": 2.7135,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
- "learning_rate": 1.927099942068421e-05,
1106
- "loss": 2.6166,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
- "learning_rate": 1.9263110018790673e-05,
1112
- "loss": 2.6238,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
- "learning_rate": 1.9255179788624233e-05,
1118
- "loss": 2.6424,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
- "learning_rate": 1.9247208765138325e-05,
1124
- "loss": 2.6023,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
- "learning_rate": 1.9239196983466204e-05,
1130
- "loss": 2.58,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
- "learning_rate": 1.9231144478920756e-05,
1136
- "loss": 2.6173,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
- "learning_rate": 1.9223051286994368e-05,
1142
- "loss": 2.628,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
- "learning_rate": 1.9214917443358753e-05,
1148
- "loss": 2.6868,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
- "learning_rate": 1.9206742983864813e-05,
1154
- "loss": 2.6342,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
- "learning_rate": 1.9198527944542462e-05,
1160
- "loss": 2.5934,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
- "learning_rate": 1.919027236160047e-05,
1166
- "loss": 2.6354,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
- "learning_rate": 1.9181976271426315e-05,
1172
- "loss": 2.5955,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
- "learning_rate": 1.9173639710586015e-05,
1178
- "loss": 2.6134,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
- "learning_rate": 1.9165262715823966e-05,
1184
- "loss": 2.7,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
- "learning_rate": 1.915684532406278e-05,
1190
- "loss": 2.7197,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
- "learning_rate": 1.9148387572403123e-05,
1196
- "loss": 2.4881,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
- "learning_rate": 1.913988949812356e-05,
1202
- "loss": 2.6999,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
- "learning_rate": 1.9131351138680368e-05,
1208
- "loss": 2.5981,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
- "learning_rate": 1.9122772531707405e-05,
1214
- "loss": 2.5515,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
- "learning_rate": 1.9114153715015905e-05,
1220
- "loss": 2.623,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
- "learning_rate": 1.9105494726594344e-05,
1226
- "loss": 2.6006,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
- "learning_rate": 1.9096795604608258e-05,
1232
- "loss": 2.5929,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
- "learning_rate": 1.9088056387400074e-05,
1238
- "loss": 2.5396,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
- "learning_rate": 1.907927711348894e-05,
1244
- "loss": 2.6428,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
- "learning_rate": 1.9070457821570566e-05,
1250
- "loss": 2.6296,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
- "learning_rate": 1.9061598550517048e-05,
1256
- "loss": 2.534,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
- "learning_rate": 1.9052699339376685e-05,
1262
- "loss": 2.6049,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
- "learning_rate": 1.9043760227373817e-05,
1268
- "loss": 2.5776,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
- "learning_rate": 1.9034781253908664e-05,
1274
- "loss": 2.7483,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
- "learning_rate": 1.902576245855713e-05,
1280
- "loss": 2.6973,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
- "learning_rate": 1.9016703881070646e-05,
1286
- "loss": 2.645,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
- "learning_rate": 1.900760556137598e-05,
1292
- "loss": 2.5775,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
- "learning_rate": 1.899846753957507e-05,
1298
- "loss": 2.6872,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
- "learning_rate": 1.8989289855944846e-05,
1304
- "loss": 2.5929,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
- "learning_rate": 1.8980072550937058e-05,
1310
- "loss": 2.5668,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
- "learning_rate": 1.8970815665178086e-05,
1316
- "loss": 2.6873,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
- "learning_rate": 1.896151923946877e-05,
1322
- "loss": 2.5679,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
- "learning_rate": 1.8952183314784224e-05,
1328
- "loss": 2.6734,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
- "learning_rate": 1.8942807932273664e-05,
1334
- "loss": 2.6083,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
- "learning_rate": 1.893339313326021e-05,
1340
- "loss": 2.6189,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
- "learning_rate": 1.892393895924073e-05,
1346
- "loss": 2.5117,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
- "learning_rate": 1.8914445451885633e-05,
1352
- "loss": 2.6646,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
- "learning_rate": 1.890491265303869e-05,
1358
- "loss": 2.7551,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
- "learning_rate": 1.8895340604716867e-05,
1364
- "loss": 2.7003,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
- "learning_rate": 1.888572934911012e-05,
1370
- "loss": 2.6109,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
- "learning_rate": 1.8876078928581207e-05,
1376
- "loss": 2.6227,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
- "learning_rate": 1.8866389385665525e-05,
1382
- "loss": 2.5943,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
- "learning_rate": 1.8856660763070904e-05,
1388
- "loss": 2.7203,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
- "learning_rate": 1.8846893103677414e-05,
1394
- "loss": 2.561,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
- "learning_rate": 1.8837086450537195e-05,
1400
- "loss": 2.6959,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
- "learning_rate": 1.8827240846874245e-05,
1406
- "loss": 2.7047,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
- "learning_rate": 1.8817356336084252e-05,
1412
- "loss": 2.5853,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
- "learning_rate": 1.8807432961734388e-05,
1418
- "loss": 2.6165,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
- "learning_rate": 1.8797470767563122e-05,
1424
- "loss": 2.756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
- "learning_rate": 1.878746979748002e-05,
1430
- "loss": 2.667,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
- "learning_rate": 1.8777430095565563e-05,
1436
- "loss": 2.7607,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
- "learning_rate": 1.876735170607095e-05,
1442
- "loss": 2.6816,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
- "learning_rate": 1.8757234673417892e-05,
1448
- "loss": 2.688,
1449
  "step": 1200
1450
  },
1451
  {
1452
  "epoch": 0.48,
1453
- "learning_rate": 1.874707904219843e-05,
1454
- "loss": 2.7286,
1455
  "step": 1205
1456
  },
1457
  {
1458
  "epoch": 0.49,
1459
- "learning_rate": 1.8736884857174733e-05,
1460
- "loss": 2.5721,
1461
  "step": 1210
1462
  },
1463
  {
1464
  "epoch": 0.49,
1465
- "learning_rate": 1.87266521632789e-05,
1466
- "loss": 2.6588,
1467
  "step": 1215
1468
  },
1469
  {
1470
  "epoch": 0.49,
1471
- "learning_rate": 1.8716381005612756e-05,
1472
- "loss": 2.625,
1473
  "step": 1220
1474
  },
1475
  {
1476
  "epoch": 0.49,
1477
- "learning_rate": 1.870607142944767e-05,
1478
- "loss": 2.5962,
1479
  "step": 1225
1480
  },
1481
  {
1482
  "epoch": 0.49,
1483
- "learning_rate": 1.869572348022434e-05,
1484
- "loss": 2.7405,
1485
  "step": 1230
1486
  },
1487
  {
1488
  "epoch": 0.5,
1489
- "learning_rate": 1.8685337203552602e-05,
1490
- "loss": 2.5352,
1491
  "step": 1235
1492
  },
1493
  {
1494
  "epoch": 0.5,
1495
- "learning_rate": 1.8674912645211212e-05,
1496
- "loss": 2.5935,
1497
  "step": 1240
1498
  },
1499
  {
1500
  "epoch": 0.5,
1501
- "learning_rate": 1.8664449851147675e-05,
1502
- "loss": 2.72,
1503
  "step": 1245
1504
  },
1505
  {
1506
  "epoch": 0.5,
1507
- "learning_rate": 1.8653948867478008e-05,
1508
- "loss": 2.6364,
1509
  "step": 1250
1510
  },
1511
  {
1512
  "epoch": 0.5,
1513
- "learning_rate": 1.864340974048657e-05,
1514
- "loss": 2.6966,
1515
  "step": 1255
1516
  },
1517
  {
1518
  "epoch": 0.51,
1519
- "learning_rate": 1.8632832516625825e-05,
1520
- "loss": 2.6181,
1521
  "step": 1260
1522
  },
1523
  {
1524
  "epoch": 0.51,
1525
- "learning_rate": 1.8622217242516164e-05,
1526
- "loss": 2.6416,
1527
  "step": 1265
1528
  },
1529
  {
1530
  "epoch": 0.51,
1531
- "learning_rate": 1.8611563964945685e-05,
1532
- "loss": 2.5406,
1533
  "step": 1270
1534
  },
1535
  {
1536
  "epoch": 0.51,
1537
- "learning_rate": 1.8600872730869995e-05,
1538
- "loss": 2.6398,
1539
  "step": 1275
1540
  },
1541
  {
1542
  "epoch": 0.51,
1543
- "learning_rate": 1.859014358741199e-05,
1544
- "loss": 2.5742,
1545
  "step": 1280
1546
  },
1547
  {
1548
  "epoch": 0.52,
1549
- "learning_rate": 1.8579376581861665e-05,
1550
- "loss": 2.6472,
1551
  "step": 1285
1552
  },
1553
  {
1554
  "epoch": 0.52,
1555
- "learning_rate": 1.8568571761675893e-05,
1556
- "loss": 2.6341,
1557
  "step": 1290
1558
  },
1559
  {
1560
  "epoch": 0.52,
1561
- "learning_rate": 1.8557729174478222e-05,
1562
- "loss": 2.7972,
1563
  "step": 1295
1564
  },
1565
  {
1566
  "epoch": 0.52,
1567
- "learning_rate": 1.854684886805866e-05,
1568
- "loss": 2.6391,
1569
  "step": 1300
1570
  }
1571
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 4.99999449047447e-05,
14
+ "loss": 2.7413,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
+ "learning_rate": 4.9999779619221645e-05,
20
+ "loss": 2.6684,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 4.999950414415935e-05,
26
+ "loss": 2.6511,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
+ "learning_rate": 4.999911848077199e-05,
32
+ "loss": 2.72,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
+ "learning_rate": 4.999862263075944e-05,
38
+ "loss": 2.7222,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 4.999801659630722e-05,
44
+ "loss": 2.7479,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
+ "learning_rate": 4.999730038008649e-05,
50
+ "loss": 2.6063,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
+ "learning_rate": 4.9996473985254055e-05,
56
+ "loss": 2.576,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
+ "learning_rate": 4.999553741545237e-05,
62
+ "loss": 2.579,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
+ "learning_rate": 4.999449067480945e-05,
68
+ "loss": 2.5114,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
+ "learning_rate": 4.9993333767938954e-05,
74
+ "loss": 2.6174,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
+ "learning_rate": 4.9992066699940085e-05,
80
+ "loss": 2.6581,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
+ "learning_rate": 4.9990689476397586e-05,
86
+ "loss": 2.586,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
+ "learning_rate": 4.998920210338175e-05,
92
+ "loss": 2.4642,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
+ "learning_rate": 4.9987604587448343e-05,
98
+ "loss": 2.6262,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
+ "learning_rate": 4.998589693563861e-05,
104
+ "loss": 2.5678,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
+ "learning_rate": 4.998407915547924e-05,
110
+ "loss": 2.5597,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
+ "learning_rate": 4.9982151254982304e-05,
116
+ "loss": 2.6578,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
+ "learning_rate": 4.998011324264527e-05,
122
+ "loss": 2.4825,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
+ "learning_rate": 4.997796512745091e-05,
128
+ "loss": 2.6197,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
+ "learning_rate": 4.997570691886732e-05,
134
+ "loss": 2.4921,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
+ "learning_rate": 4.99733386268478e-05,
140
+ "loss": 2.5126,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
+ "learning_rate": 4.99708602618309e-05,
146
+ "loss": 2.584,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
+ "learning_rate": 4.9968271834740305e-05,
152
+ "loss": 2.4991,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
+ "learning_rate": 4.996557335698482e-05,
158
+ "loss": 2.5242,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
+ "learning_rate": 4.996276484045832e-05,
164
+ "loss": 2.4506,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
+ "learning_rate": 4.995984629753967e-05,
170
+ "loss": 2.5479,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
+ "learning_rate": 4.9956817741092696e-05,
176
+ "loss": 2.5316,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
+ "learning_rate": 4.995367918446613e-05,
182
+ "loss": 2.6053,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
+ "learning_rate": 4.995043064149354e-05,
188
+ "loss": 2.4533,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
+ "learning_rate": 4.9947072126493276e-05,
194
+ "loss": 2.5279,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
+ "learning_rate": 4.994360365426839e-05,
200
+ "loss": 2.4994,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
+ "learning_rate": 4.994002524010659e-05,
206
+ "loss": 2.4675,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
+ "learning_rate": 4.9936336899780166e-05,
212
+ "loss": 2.5233,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
+ "learning_rate": 4.993253864954592e-05,
218
+ "loss": 2.4974,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
+ "learning_rate": 4.992863050614511e-05,
224
+ "loss": 2.6295,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
+ "learning_rate": 4.9924612486803346e-05,
230
+ "loss": 2.4744,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
+ "learning_rate": 4.992048460923052e-05,
236
+ "loss": 2.5248,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
+ "learning_rate": 4.991624689162076e-05,
242
+ "loss": 2.5727,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
+ "learning_rate": 4.991189935265231e-05,
248
+ "loss": 2.4845,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
+ "learning_rate": 4.990744201148748e-05,
254
+ "loss": 2.4544,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
+ "learning_rate": 4.990287488777253e-05,
260
+ "loss": 2.4383,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
+ "learning_rate": 4.989819800163761e-05,
266
+ "loss": 2.5263,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
+ "learning_rate": 4.989341137369666e-05,
272
+ "loss": 2.5219,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
+ "learning_rate": 4.9888515025047316e-05,
278
+ "loss": 2.4512,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
+ "learning_rate": 4.988350897727083e-05,
284
+ "loss": 2.491,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
+ "learning_rate": 4.9878393252431953e-05,
290
+ "loss": 2.5404,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
+ "learning_rate": 4.987316787307888e-05,
296
+ "loss": 2.4777,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
+ "learning_rate": 4.9867832862243055e-05,
302
+ "loss": 2.4498,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
+ "learning_rate": 4.986238824343923e-05,
308
+ "loss": 2.4174,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
+ "learning_rate": 4.985683404066519e-05,
314
+ "loss": 2.4589,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
+ "learning_rate": 4.985117027840175e-05,
320
+ "loss": 2.5029,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
+ "learning_rate": 4.984539698161264e-05,
326
+ "loss": 2.4376,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
+ "learning_rate": 4.983951417574436e-05,
332
+ "loss": 2.526,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
+ "learning_rate": 4.983352188672608e-05,
338
+ "loss": 2.5214,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
+ "learning_rate": 4.982742014096952e-05,
344
+ "loss": 2.5188,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
+ "learning_rate": 4.982120896536888e-05,
350
+ "loss": 2.4175,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
+ "learning_rate": 4.981488838730066e-05,
356
+ "loss": 2.5346,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
+ "learning_rate": 4.980845843462357e-05,
362
+ "loss": 2.496,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
+ "learning_rate": 4.98019191356784e-05,
368
+ "loss": 2.477,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
+ "learning_rate": 4.9795270519287886e-05,
374
+ "loss": 2.4205,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
+ "learning_rate": 4.9788512614756624e-05,
380
+ "loss": 2.4621,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
+ "learning_rate": 4.9781645451870875e-05,
386
+ "loss": 2.5098,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
+ "learning_rate": 4.9774669060898496e-05,
392
+ "loss": 2.4847,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
+ "learning_rate": 4.976758347258877e-05,
398
+ "loss": 2.4873,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
+ "learning_rate": 4.976038871817228e-05,
404
+ "loss": 2.4462,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
+ "learning_rate": 4.9753084829360776e-05,
410
+ "loss": 2.4994,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
+ "learning_rate": 4.974567183834702e-05,
416
+ "loss": 2.4994,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
+ "learning_rate": 4.9738149777804665e-05,
422
+ "loss": 2.4584,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
+ "learning_rate": 4.97305186808881e-05,
428
+ "loss": 2.4294,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
+ "learning_rate": 4.9722778581232305e-05,
434
+ "loss": 2.4499,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
+ "learning_rate": 4.9714929512952704e-05,
440
+ "loss": 2.5851,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
+ "learning_rate": 4.9706971510645e-05,
446
+ "loss": 2.4746,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
+ "learning_rate": 4.969890460938505e-05,
452
+ "loss": 2.4184,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
+ "learning_rate": 4.9690728844728704e-05,
458
+ "loss": 2.4716,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
+ "learning_rate": 4.968244425271161e-05,
464
+ "loss": 2.4477,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
+ "learning_rate": 4.9674050869849124e-05,
470
+ "loss": 2.4563,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
+ "learning_rate": 4.966554873313608e-05,
476
+ "loss": 2.3708,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
+ "learning_rate": 4.9656937880046676e-05,
482
+ "loss": 2.4351,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
+ "learning_rate": 4.9648218348534284e-05,
488
+ "loss": 2.3933,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
+ "learning_rate": 4.963939017703128e-05,
494
+ "loss": 2.5871,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
+ "learning_rate": 4.9630453404448905e-05,
500
+ "loss": 2.3965,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
+ "learning_rate": 4.962140807017705e-05,
506
+ "loss": 2.372,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
+ "learning_rate": 4.961225421408412e-05,
512
+ "loss": 2.4847,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
+ "learning_rate": 4.960299187651684e-05,
518
+ "loss": 2.4046,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
+ "learning_rate": 4.959362109830007e-05,
524
+ "loss": 2.3912,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
+ "learning_rate": 4.9584141920736656e-05,
530
+ "loss": 2.4732,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
+ "learning_rate": 4.957455438560721e-05,
536
+ "loss": 2.4854,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
+ "learning_rate": 4.956485853516993e-05,
542
+ "loss": 2.3736,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
+ "learning_rate": 4.9555054412160476e-05,
548
+ "loss": 2.4222,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
+ "learning_rate": 4.9545142059791686e-05,
554
+ "loss": 2.4498,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
+ "learning_rate": 4.9535121521753434e-05,
560
+ "loss": 2.4172,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
+ "learning_rate": 4.952499284221247e-05,
566
+ "loss": 2.4742,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
+ "learning_rate": 4.951475606581215e-05,
572
+ "loss": 2.4008,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
+ "learning_rate": 4.950441123767231e-05,
578
+ "loss": 2.4278,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
+ "learning_rate": 4.949395840338903e-05,
584
+ "loss": 2.5041,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
+ "learning_rate": 4.948339760903442e-05,
590
+ "loss": 2.4088,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
+ "learning_rate": 4.947272890115647e-05,
596
+ "loss": 2.466,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
+ "learning_rate": 4.946195232677878e-05,
602
+ "loss": 2.4404,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
+ "learning_rate": 4.9451067933400406e-05,
608
+ "loss": 2.3658,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
+ "learning_rate": 4.9440075768995625e-05,
614
+ "loss": 2.4247,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
+ "learning_rate": 4.942897588201372e-05,
620
+ "loss": 2.5082,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
+ "learning_rate": 4.941776832137879e-05,
626
+ "loss": 2.5545,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
+ "learning_rate": 4.940645313648949e-05,
632
+ "loss": 2.4665,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
+ "learning_rate": 4.939503037721888e-05,
638
+ "loss": 2.4195,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
+ "learning_rate": 4.938350009391413e-05,
644
+ "loss": 2.4908,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
+ "learning_rate": 4.937186233739637e-05,
650
+ "loss": 2.4093,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
+ "learning_rate": 4.93601171589604e-05,
656
+ "loss": 2.44,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
+ "learning_rate": 4.9348264610374494e-05,
662
+ "loss": 2.4989,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
+ "learning_rate": 4.9336304743880226e-05,
668
+ "loss": 2.3786,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
+ "learning_rate": 4.932423761219211e-05,
674
+ "loss": 2.4305,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
+ "learning_rate": 4.931206326849751e-05,
680
+ "loss": 2.4012,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
+ "learning_rate": 4.92997817664563e-05,
686
+ "loss": 2.4263,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
+ "learning_rate": 4.9287393160200676e-05,
692
+ "loss": 2.4749,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
+ "learning_rate": 4.927489750433492e-05,
698
+ "loss": 2.5083,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
+ "learning_rate": 4.926229485393513e-05,
704
+ "loss": 2.5228,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
+ "learning_rate": 4.924958526454902e-05,
710
+ "loss": 2.4623,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
+ "learning_rate": 4.923676879219562e-05,
716
+ "loss": 2.4199,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
+ "learning_rate": 4.9223845493365085e-05,
722
+ "loss": 2.2906,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
+ "learning_rate": 4.9210815425018406e-05,
728
+ "loss": 2.5539,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
+ "learning_rate": 4.919767864458718e-05,
734
+ "loss": 2.3456,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
+ "learning_rate": 4.9184435209973354e-05,
740
+ "loss": 2.4328,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
+ "learning_rate": 4.917108517954895e-05,
746
+ "loss": 2.3412,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
+ "learning_rate": 4.9157628612155836e-05,
752
+ "loss": 2.604,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
+ "learning_rate": 4.9144065567105465e-05,
758
+ "loss": 2.4007,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
+ "learning_rate": 4.913039610417859e-05,
764
+ "loss": 2.437,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
+ "learning_rate": 4.9116620283624996e-05,
770
+ "loss": 2.5004,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
+ "learning_rate": 4.91027381661633e-05,
776
+ "loss": 2.5045,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
+ "learning_rate": 4.908874981298057e-05,
782
+ "loss": 2.3794,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
+ "learning_rate": 4.9074655285732196e-05,
788
+ "loss": 2.4621,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
+ "learning_rate": 4.906045464654148e-05,
794
+ "loss": 2.549,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
+ "learning_rate": 4.9046147957999454e-05,
800
+ "loss": 2.4567,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
+ "learning_rate": 4.9031735283164574e-05,
806
+ "loss": 2.3243,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
+ "learning_rate": 4.901721668556244e-05,
812
+ "loss": 2.4758,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
+ "learning_rate": 4.9002592229185515e-05,
818
+ "loss": 2.5083,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
+ "learning_rate": 4.898786197849285e-05,
824
+ "loss": 2.4428,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
+ "learning_rate": 4.897302599840981e-05,
830
+ "loss": 2.4403,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
+ "learning_rate": 4.895808435432776e-05,
836
+ "loss": 2.4401,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
+ "learning_rate": 4.8943037112103784e-05,
842
+ "loss": 2.3789,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
+ "learning_rate": 4.892788433806042e-05,
848
+ "loss": 2.3945,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
+ "learning_rate": 4.8912626098985355e-05,
854
+ "loss": 2.4758,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
+ "learning_rate": 4.88972624621311e-05,
860
+ "loss": 2.4646,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
+ "learning_rate": 4.888179349521475e-05,
866
+ "loss": 2.4225,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
+ "learning_rate": 4.886621926641762e-05,
872
+ "loss": 2.4074,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
+ "learning_rate": 4.8850539844385017e-05,
878
+ "loss": 2.4438,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
+ "learning_rate": 4.883475529822587e-05,
884
+ "loss": 2.3666,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
+ "learning_rate": 4.881886569751248e-05,
890
+ "loss": 2.3782,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
+ "learning_rate": 4.880287111228016e-05,
896
+ "loss": 2.4916,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
+ "learning_rate": 4.878677161302698e-05,
902
+ "loss": 2.3639,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
+ "learning_rate": 4.877056727071342e-05,
908
+ "loss": 2.4333,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
+ "learning_rate": 4.8754258156762075e-05,
914
+ "loss": 2.3907,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
+ "learning_rate": 4.8737844343057315e-05,
920
+ "loss": 2.433,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
+ "learning_rate": 4.8721325901945015e-05,
926
+ "loss": 2.3849,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
+ "learning_rate": 4.870470290623218e-05,
932
+ "loss": 2.419,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
+ "learning_rate": 4.8687975429186685e-05,
938
+ "loss": 2.3694,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
+ "learning_rate": 4.867114354453688e-05,
944
+ "loss": 2.4335,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
+ "learning_rate": 4.865420732647133e-05,
950
+ "loss": 2.4168,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
+ "learning_rate": 4.863716684963846e-05,
956
+ "loss": 2.419,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
+ "learning_rate": 4.862002218914622e-05,
962
+ "loss": 2.335,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
+ "learning_rate": 4.860277342056176e-05,
968
+ "loss": 2.4059,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
+ "learning_rate": 4.858542061991112e-05,
974
+ "loss": 2.3423,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
+ "learning_rate": 4.856796386367884e-05,
980
+ "loss": 2.4135,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
+ "learning_rate": 4.855040322880769e-05,
986
+ "loss": 2.3652,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
+ "learning_rate": 4.853273879269827e-05,
992
+ "loss": 2.3511,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
+ "learning_rate": 4.8514970633208726e-05,
998
+ "loss": 2.413,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
+ "learning_rate": 4.849709882865435e-05,
1004
+ "loss": 2.4445,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
+ "learning_rate": 4.847912345780727e-05,
1010
+ "loss": 2.4221,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
+ "learning_rate": 4.846104459989611e-05,
1016
+ "loss": 2.4992,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
+ "learning_rate": 4.84428623346056e-05,
1022
+ "loss": 2.3923,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
+ "learning_rate": 4.842457674207627e-05,
1028
+ "loss": 2.3666,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
+ "learning_rate": 4.8406187902904076e-05,
1034
+ "loss": 2.4329,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
+ "learning_rate": 4.8387695898140026e-05,
1040
+ "loss": 2.3429,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
+ "learning_rate": 4.836910080928987e-05,
1046
+ "loss": 2.4428,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
+ "learning_rate": 4.83504027183137e-05,
1052
+ "loss": 2.3842,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
+ "learning_rate": 4.833160170762561e-05,
1058
+ "loss": 2.4064,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
+ "learning_rate": 4.8312697860093295e-05,
1064
+ "loss": 2.3705,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
+ "learning_rate": 4.829369125903776e-05,
1070
+ "loss": 2.4151,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
+ "learning_rate": 4.8274581988232894e-05,
1076
+ "loss": 2.4219,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
+ "learning_rate": 4.825537013190509e-05,
1082
+ "loss": 2.4042,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
+ "learning_rate": 4.823605577473293e-05,
1088
+ "loss": 2.4509,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
+ "learning_rate": 4.8216639001846764e-05,
1094
+ "loss": 2.4255,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
+ "learning_rate": 4.8197119898828367e-05,
1100
+ "loss": 2.4928,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
+ "learning_rate": 4.817749855171052e-05,
1106
+ "loss": 2.4044,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
+ "learning_rate": 4.8157775046976684e-05,
1112
+ "loss": 2.4085,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
+ "learning_rate": 4.813794947156058e-05,
1118
+ "loss": 2.3941,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
+ "learning_rate": 4.8118021912845815e-05,
1124
+ "loss": 2.3493,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
+ "learning_rate": 4.8097992458665506e-05,
1130
+ "loss": 2.3367,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
+ "learning_rate": 4.807786119730189e-05,
1136
+ "loss": 2.3865,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
+ "learning_rate": 4.8057628217485916e-05,
1142
+ "loss": 2.3885,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
+ "learning_rate": 4.803729360839688e-05,
1148
+ "loss": 2.4296,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
+ "learning_rate": 4.801685745966203e-05,
1154
+ "loss": 2.3856,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
+ "learning_rate": 4.799631986135615e-05,
1160
+ "loss": 2.3631,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
+ "learning_rate": 4.797568090400117e-05,
1166
+ "loss": 2.3882,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
+ "learning_rate": 4.7954940678565785e-05,
1172
+ "loss": 2.3396,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
+ "learning_rate": 4.793409927646504e-05,
1178
+ "loss": 2.3548,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
+ "learning_rate": 4.791315678955991e-05,
1184
+ "loss": 2.4576,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
+ "learning_rate": 4.789211331015695e-05,
1190
+ "loss": 2.4523,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
+ "learning_rate": 4.787096893100781e-05,
1196
+ "loss": 2.2542,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
+ "learning_rate": 4.784972374530889e-05,
1202
+ "loss": 2.4385,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
+ "learning_rate": 4.7828377846700925e-05,
1208
+ "loss": 2.3266,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
+ "learning_rate": 4.780693132926851e-05,
1214
+ "loss": 2.3301,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
+ "learning_rate": 4.7785384287539755e-05,
1220
+ "loss": 2.3554,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
+ "learning_rate": 4.776373681648586e-05,
1226
+ "loss": 2.3838,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
+ "learning_rate": 4.7741989011520645e-05,
1232
+ "loss": 2.3361,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
+ "learning_rate": 4.772014096850018e-05,
1238
+ "loss": 2.2864,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
+ "learning_rate": 4.769819278372235e-05,
1244
+ "loss": 2.3955,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
+ "learning_rate": 4.7676144553926414e-05,
1250
+ "loss": 2.3802,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
+ "learning_rate": 4.765399637629262e-05,
1256
+ "loss": 2.2856,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
+ "learning_rate": 4.7631748348441705e-05,
1262
+ "loss": 2.3698,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
+ "learning_rate": 4.760940056843454e-05,
1268
+ "loss": 2.3305,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
+ "learning_rate": 4.758695313477166e-05,
1274
+ "loss": 2.4944,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
+ "learning_rate": 4.756440614639283e-05,
1280
+ "loss": 2.4622,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
+ "learning_rate": 4.754175970267661e-05,
1286
+ "loss": 2.3995,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
+ "learning_rate": 4.751901390343995e-05,
1292
+ "loss": 2.343,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
+ "learning_rate": 4.7496168848937674e-05,
1298
+ "loss": 2.4388,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
+ "learning_rate": 4.7473224639862116e-05,
1304
+ "loss": 2.3694,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
+ "learning_rate": 4.745018137734264e-05,
1310
+ "loss": 2.3485,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
+ "learning_rate": 4.742703916294521e-05,
1316
+ "loss": 2.46,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
+ "learning_rate": 4.740379809867193e-05,
1322
+ "loss": 2.3138,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
+ "learning_rate": 4.738045828696056e-05,
1328
+ "loss": 2.4117,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
+ "learning_rate": 4.735701983068416e-05,
1334
+ "loss": 2.3506,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
+ "learning_rate": 4.7333482833150525e-05,
1340
+ "loss": 2.3711,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
+ "learning_rate": 4.730984739810183e-05,
1346
+ "loss": 2.2639,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
+ "learning_rate": 4.728611362971408e-05,
1352
+ "loss": 2.3749,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
+ "learning_rate": 4.726228163259673e-05,
1358
+ "loss": 2.4556,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
+ "learning_rate": 4.7238351511792165e-05,
1364
+ "loss": 2.4233,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
+ "learning_rate": 4.721432337277529e-05,
1370
+ "loss": 2.3544,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
+ "learning_rate": 4.7190197321453014e-05,
1376
+ "loss": 2.3654,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
+ "learning_rate": 4.716597346416382e-05,
1382
+ "loss": 2.3356,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
+ "learning_rate": 4.7141651907677256e-05,
1388
+ "loss": 2.4522,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
+ "learning_rate": 4.7117232759193534e-05,
1394
+ "loss": 2.3104,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
+ "learning_rate": 4.709271612634298e-05,
1400
+ "loss": 2.4492,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
+ "learning_rate": 4.706810211718561e-05,
1406
+ "loss": 2.4437,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
+ "learning_rate": 4.7043390840210636e-05,
1412
+ "loss": 2.3099,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
+ "learning_rate": 4.701858240433597e-05,
1418
+ "loss": 2.351,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
+ "learning_rate": 4.6993676918907804e-05,
1424
+ "loss": 2.4756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
+ "learning_rate": 4.696867449370005e-05,
1430
+ "loss": 2.418,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
+ "learning_rate": 4.6943575238913904e-05,
1436
+ "loss": 2.5026,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
+ "learning_rate": 4.691837926517737e-05,
1442
+ "loss": 2.4096,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
+ "learning_rate": 4.689308668354473e-05,
1448
+ "loss": 2.4101,
1449
  "step": 1200
1450
  },
1451
  {
1452
  "epoch": 0.48,
1453
+ "learning_rate": 4.686769760549607e-05,
1454
+ "loss": 2.4533,
1455
  "step": 1205
1456
  },
1457
  {
1458
  "epoch": 0.49,
1459
+ "learning_rate": 4.684221214293683e-05,
1460
+ "loss": 2.2988,
1461
  "step": 1210
1462
  },
1463
  {
1464
  "epoch": 0.49,
1465
+ "learning_rate": 4.681663040819724e-05,
1466
+ "loss": 2.3937,
1467
  "step": 1215
1468
  },
1469
  {
1470
  "epoch": 0.49,
1471
+ "learning_rate": 4.679095251403189e-05,
1472
+ "loss": 2.3954,
1473
  "step": 1220
1474
  },
1475
  {
1476
  "epoch": 0.49,
1477
+ "learning_rate": 4.676517857361917e-05,
1478
+ "loss": 2.3275,
1479
  "step": 1225
1480
  },
1481
  {
1482
  "epoch": 0.49,
1483
+ "learning_rate": 4.673930870056085e-05,
1484
+ "loss": 2.4748,
1485
  "step": 1230
1486
  },
1487
  {
1488
  "epoch": 0.5,
1489
+ "learning_rate": 4.67133430088815e-05,
1490
+ "loss": 2.2496,
1491
  "step": 1235
1492
  },
1493
  {
1494
  "epoch": 0.5,
1495
+ "learning_rate": 4.668728161302803e-05,
1496
+ "loss": 2.3226,
1497
  "step": 1240
1498
  },
1499
  {
1500
  "epoch": 0.5,
1501
+ "learning_rate": 4.666112462786919e-05,
1502
+ "loss": 2.4655,
1503
  "step": 1245
1504
  },
1505
  {
1506
  "epoch": 0.5,
1507
+ "learning_rate": 4.663487216869502e-05,
1508
+ "loss": 2.3751,
1509
  "step": 1250
1510
  },
1511
  {
1512
  "epoch": 0.5,
1513
+ "learning_rate": 4.660852435121642e-05,
1514
+ "loss": 2.4136,
1515
  "step": 1255
1516
  },
1517
  {
1518
  "epoch": 0.51,
1519
+ "learning_rate": 4.658208129156456e-05,
1520
+ "loss": 2.3612,
1521
  "step": 1260
1522
  },
1523
  {
1524
  "epoch": 0.51,
1525
+ "learning_rate": 4.655554310629041e-05,
1526
+ "loss": 2.3675,
1527
  "step": 1265
1528
  },
1529
  {
1530
  "epoch": 0.51,
1531
+ "learning_rate": 4.652890991236421e-05,
1532
+ "loss": 2.2831,
1533
  "step": 1270
1534
  },
1535
  {
1536
  "epoch": 0.51,
1537
+ "learning_rate": 4.650218182717498e-05,
1538
+ "loss": 2.3958,
1539
  "step": 1275
1540
  },
1541
  {
1542
  "epoch": 0.51,
1543
+ "learning_rate": 4.647535896852997e-05,
1544
+ "loss": 2.3236,
1545
  "step": 1280
1546
  },
1547
  {
1548
  "epoch": 0.52,
1549
+ "learning_rate": 4.6448441454654156e-05,
1550
+ "loss": 2.3904,
1551
  "step": 1285
1552
  },
1553
  {
1554
  "epoch": 0.52,
1555
+ "learning_rate": 4.642142940418973e-05,
1556
+ "loss": 2.3565,
1557
  "step": 1290
1558
  },
1559
  {
1560
  "epoch": 0.52,
1561
+ "learning_rate": 4.6394322936195556e-05,
1562
+ "loss": 2.5229,
1563
  "step": 1295
1564
  },
1565
  {
1566
  "epoch": 0.52,
1567
+ "learning_rate": 4.636712217014665e-05,
1568
+ "loss": 2.369,
1569
  "step": 1300
1570
  }
1571
  ],
checkpoint-1300/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c481a87016b183d9406558b8812ee05f00d60a3c721055833e3cdda34cf9bb26
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b42f69ee6ce9030b2875b2859f3499c1e1c8f11e9ffa417d0dbdab2b51173da
3
  size 4920
checkpoint-1400/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37882960dcad94da0c6b1d8dc9dd907d8d5bc48c2e987901c79f1d0e63d42bdb
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d03acd30d4bacce2ddecdeb6c3c2cdfc82d59d0c97c3fc48cb6b5b188cd455
3
  size 3158328
checkpoint-1400/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e039be911da4bc016c38ad1e357800790d3259379ca00b95d66f36d4eb6166e
3
  size 6372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d1c2f80161b79704ec8f989e4a4f9b02767cb52f7663045994cbf2417464e36
3
  size 6372346
checkpoint-1400/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec026babad8c601a68f1f2a7a01b3bed836d482c187da27afb5aedb8f94c8593
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40a0e8502a3482a7fac2f1801b87bd2deab21cb9d9b5edac73820f2a0ebb67d
3
  size 14244
checkpoint-1400/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eaf19d7cfd6b9902d2f5980def9424ccd253fe97bc5a609d14df9265d98a05f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6098660294e311de5e1ea8e1f18d230597281b4ddf237ff1b146b71321a982f3
3
  size 1064
checkpoint-1400/trainer_state.json CHANGED
@@ -10,1682 +10,1682 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.999997796189788e-05,
14
- "loss": 2.7465,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
- "learning_rate": 1.9999911847688657e-05,
20
- "loss": 2.6877,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 1.999980165766374e-05,
26
- "loss": 2.6848,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
- "learning_rate": 1.9999647392308798e-05,
32
- "loss": 2.7677,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
- "learning_rate": 1.9999449052303777e-05,
38
- "loss": 2.7784,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
- "learning_rate": 1.9999206638522888e-05,
44
- "loss": 2.8161,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
- "learning_rate": 1.9998920152034595e-05,
50
- "loss": 2.6752,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
- "learning_rate": 1.9998589594101623e-05,
56
- "loss": 2.6403,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
- "learning_rate": 1.9998214966180948e-05,
62
- "loss": 2.6433,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
- "learning_rate": 1.999779626992378e-05,
68
- "loss": 2.5787,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
- "learning_rate": 1.9997333507175583e-05,
74
- "loss": 2.6812,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
- "learning_rate": 1.9996826679976033e-05,
80
- "loss": 2.7287,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
- "learning_rate": 1.9996275790559037e-05,
86
- "loss": 2.6465,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
- "learning_rate": 1.99956808413527e-05,
92
- "loss": 2.5268,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
- "learning_rate": 1.999504183497934e-05,
98
- "loss": 2.6893,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
- "learning_rate": 1.9994358774255444e-05,
104
- "loss": 2.6274,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
- "learning_rate": 1.9993631662191696e-05,
110
- "loss": 2.6232,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
- "learning_rate": 1.9992860501992924e-05,
116
- "loss": 2.7188,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
- "learning_rate": 1.9992045297058108e-05,
122
- "loss": 2.5388,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
- "learning_rate": 1.9991186050980366e-05,
128
- "loss": 2.6793,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
- "learning_rate": 1.9990282767546926e-05,
134
- "loss": 2.5523,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
- "learning_rate": 1.998933545073912e-05,
140
- "loss": 2.5763,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
- "learning_rate": 1.998834410473236e-05,
146
- "loss": 2.6447,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
- "learning_rate": 1.998730873389612e-05,
152
- "loss": 2.5579,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
- "learning_rate": 1.998622934279393e-05,
158
- "loss": 2.5884,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
- "learning_rate": 1.9985105936183327e-05,
164
- "loss": 2.5051,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
- "learning_rate": 1.9983938519015868e-05,
170
- "loss": 2.6014,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
- "learning_rate": 1.998272709643708e-05,
176
- "loss": 2.5878,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
- "learning_rate": 1.998147167378645e-05,
182
- "loss": 2.6642,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
- "learning_rate": 1.998017225659742e-05,
188
- "loss": 2.5077,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
- "learning_rate": 1.9978828850597312e-05,
194
- "loss": 2.5921,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
- "learning_rate": 1.9977441461707358e-05,
200
- "loss": 2.5577,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
- "learning_rate": 1.9976010096042634e-05,
206
- "loss": 2.524,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
- "learning_rate": 1.9974534759912068e-05,
212
- "loss": 2.5708,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
- "learning_rate": 1.997301545981837e-05,
218
- "loss": 2.5578,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
- "learning_rate": 1.9971452202458048e-05,
224
- "loss": 2.6874,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
- "learning_rate": 1.9969844994721338e-05,
230
- "loss": 2.535,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
- "learning_rate": 1.996819384369221e-05,
236
- "loss": 2.5816,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
- "learning_rate": 1.9966498756648305e-05,
242
- "loss": 2.6225,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
- "learning_rate": 1.9964759741060926e-05,
248
- "loss": 2.5387,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
- "learning_rate": 1.9962976804594993e-05,
254
- "loss": 2.524,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
- "learning_rate": 1.996114995510901e-05,
260
- "loss": 2.488,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
- "learning_rate": 1.9959279200655044e-05,
266
- "loss": 2.5824,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
- "learning_rate": 1.9957364549478663e-05,
272
- "loss": 2.5828,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
- "learning_rate": 1.9955406010018928e-05,
278
- "loss": 2.5137,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
- "learning_rate": 1.9953403590908334e-05,
284
- "loss": 2.5539,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
- "learning_rate": 1.995135730097278e-05,
290
- "loss": 2.6099,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
- "learning_rate": 1.994926714923155e-05,
296
- "loss": 2.5309,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
- "learning_rate": 1.9947133144897225e-05,
302
- "loss": 2.5152,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
- "learning_rate": 1.9944955297375693e-05,
308
- "loss": 2.4738,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
- "learning_rate": 1.9942733616266076e-05,
314
- "loss": 2.5173,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
- "learning_rate": 1.99404681113607e-05,
320
- "loss": 2.578,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
- "learning_rate": 1.993815879264506e-05,
326
- "loss": 2.5089,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
- "learning_rate": 1.9935805670297744e-05,
332
- "loss": 2.5872,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
- "learning_rate": 1.993340875469043e-05,
338
- "loss": 2.5882,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
- "learning_rate": 1.993096805638781e-05,
344
- "loss": 2.5789,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
- "learning_rate": 1.9928483586147553e-05,
350
- "loss": 2.488,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
- "learning_rate": 1.9925955354920265e-05,
356
- "loss": 2.617,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
- "learning_rate": 1.992338337384943e-05,
362
- "loss": 2.5752,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
- "learning_rate": 1.992076765427136e-05,
368
- "loss": 2.5529,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
- "learning_rate": 1.9918108207715156e-05,
374
- "loss": 2.4942,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
- "learning_rate": 1.991540504590265e-05,
380
- "loss": 2.5456,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
- "learning_rate": 1.991265818074835e-05,
386
- "loss": 2.5957,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
- "learning_rate": 1.99098676243594e-05,
392
- "loss": 2.5699,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
- "learning_rate": 1.9907033389035512e-05,
398
- "loss": 2.5544,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
- "learning_rate": 1.9904155487268912e-05,
404
- "loss": 2.538,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
- "learning_rate": 1.990123393174431e-05,
410
- "loss": 2.6055,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
- "learning_rate": 1.9898268735338807e-05,
416
- "loss": 2.5846,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
- "learning_rate": 1.9895259911121866e-05,
422
- "loss": 2.5405,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
- "learning_rate": 1.9892207472355243e-05,
428
- "loss": 2.5162,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
- "learning_rate": 1.988911143249292e-05,
434
- "loss": 2.5484,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
- "learning_rate": 1.9885971805181083e-05,
440
- "loss": 2.671,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
- "learning_rate": 1.9882788604258e-05,
446
- "loss": 2.5696,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
- "learning_rate": 1.987956184375402e-05,
452
- "loss": 2.5021,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
- "learning_rate": 1.9876291537891482e-05,
458
- "loss": 2.5644,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
- "learning_rate": 1.9872977701084645e-05,
464
- "loss": 2.5386,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
- "learning_rate": 1.9869620347939652e-05,
470
- "loss": 2.554,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
- "learning_rate": 1.9866219493254433e-05,
476
- "loss": 2.4798,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
- "learning_rate": 1.986277515201867e-05,
482
- "loss": 2.538,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
- "learning_rate": 1.9859287339413714e-05,
488
- "loss": 2.5041,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
- "learning_rate": 1.9855756070812514e-05,
494
- "loss": 2.7067,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
- "learning_rate": 1.9852181361779563e-05,
500
- "loss": 2.5054,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
- "learning_rate": 1.984856322807082e-05,
506
- "loss": 2.4815,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
- "learning_rate": 1.9844901685633648e-05,
512
- "loss": 2.5885,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
- "learning_rate": 1.9841196750606735e-05,
518
- "loss": 2.521,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
- "learning_rate": 1.9837448439320027e-05,
524
- "loss": 2.4937,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
- "learning_rate": 1.983365676829466e-05,
530
- "loss": 2.5877,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
- "learning_rate": 1.9829821754242885e-05,
536
- "loss": 2.5761,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
- "learning_rate": 1.9825943414067974e-05,
542
- "loss": 2.4917,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
- "learning_rate": 1.9822021764864194e-05,
548
- "loss": 2.5434,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
- "learning_rate": 1.9818056823916675e-05,
554
- "loss": 2.5906,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
- "learning_rate": 1.9814048608701374e-05,
560
- "loss": 2.5508,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
- "learning_rate": 1.980999713688499e-05,
566
- "loss": 2.6005,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
- "learning_rate": 1.980590242632486e-05,
572
- "loss": 2.523,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
- "learning_rate": 1.9801764495068923e-05,
578
- "loss": 2.5453,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
- "learning_rate": 1.979758336135561e-05,
584
- "loss": 2.6426,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
- "learning_rate": 1.9793359043613768e-05,
590
- "loss": 2.5454,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
- "learning_rate": 1.9789091560462587e-05,
596
- "loss": 2.6071,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
- "learning_rate": 1.9784780930711514e-05,
602
- "loss": 2.5913,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
- "learning_rate": 1.9780427173360165e-05,
608
- "loss": 2.5082,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
- "learning_rate": 1.977603030759825e-05,
614
- "loss": 2.565,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
- "learning_rate": 1.977159035280549e-05,
620
- "loss": 2.6365,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
- "learning_rate": 1.9767107328551515e-05,
626
- "loss": 2.6872,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
- "learning_rate": 1.9762581254595797e-05,
632
- "loss": 2.6222,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
- "learning_rate": 1.975801215088755e-05,
638
- "loss": 2.5633,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
- "learning_rate": 1.9753400037565653e-05,
644
- "loss": 2.6579,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
- "learning_rate": 1.9748744934958548e-05,
650
- "loss": 2.5572,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
- "learning_rate": 1.974404686358416e-05,
656
- "loss": 2.5845,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
- "learning_rate": 1.97393058441498e-05,
662
- "loss": 2.6484,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
- "learning_rate": 1.973452189755209e-05,
668
- "loss": 2.5473,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
- "learning_rate": 1.9729695044876847e-05,
674
- "loss": 2.5995,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
- "learning_rate": 1.9724825307399003e-05,
680
- "loss": 2.5473,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
- "learning_rate": 1.971991270658252e-05,
686
- "loss": 2.5798,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
- "learning_rate": 1.971495726408027e-05,
692
- "loss": 2.655,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
- "learning_rate": 1.970995900173397e-05,
698
- "loss": 2.6904,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
- "learning_rate": 1.9704917941574053e-05,
704
- "loss": 2.6973,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
- "learning_rate": 1.969983410581961e-05,
710
- "loss": 2.6518,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
- "learning_rate": 1.969470751687825e-05,
716
- "loss": 2.5849,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
- "learning_rate": 1.9689538197346035e-05,
722
- "loss": 2.4715,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
- "learning_rate": 1.9684326170007365e-05,
728
- "loss": 2.7246,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
- "learning_rate": 1.9679071457834874e-05,
734
- "loss": 2.5482,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
- "learning_rate": 1.967377408398934e-05,
740
- "loss": 2.6084,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
- "learning_rate": 1.966843407181958e-05,
746
- "loss": 2.5144,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
- "learning_rate": 1.9663051444862335e-05,
752
- "loss": 2.7663,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
- "learning_rate": 1.9657626226842187e-05,
758
- "loss": 2.5697,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
- "learning_rate": 1.9652158441671435e-05,
764
- "loss": 2.6379,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
- "learning_rate": 1.964664811345e-05,
770
- "loss": 2.6784,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
- "learning_rate": 1.964109526646532e-05,
776
- "loss": 2.6936,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
- "learning_rate": 1.963549992519223e-05,
782
- "loss": 2.5672,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
- "learning_rate": 1.962986211429288e-05,
788
- "loss": 2.62,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
- "learning_rate": 1.9624181858616593e-05,
794
- "loss": 2.7293,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
- "learning_rate": 1.9618459183199782e-05,
800
- "loss": 2.6636,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
- "learning_rate": 1.961269411326583e-05,
806
- "loss": 2.5106,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
- "learning_rate": 1.9606886674224977e-05,
812
- "loss": 2.6878,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
- "learning_rate": 1.960103689167421e-05,
818
- "loss": 2.693,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
- "learning_rate": 1.9595144791397142e-05,
824
- "loss": 2.6562,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
- "learning_rate": 1.9589210399363925e-05,
830
- "loss": 2.6269,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
- "learning_rate": 1.95832337417311e-05,
836
- "loss": 2.6459,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
- "learning_rate": 1.9577214844841515e-05,
842
- "loss": 2.5765,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
- "learning_rate": 1.957115373522417e-05,
848
- "loss": 2.6113,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
- "learning_rate": 1.956505043959414e-05,
854
- "loss": 2.6641,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
- "learning_rate": 1.955890498485244e-05,
860
- "loss": 2.6493,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
- "learning_rate": 1.9552717398085898e-05,
866
- "loss": 2.6135,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
- "learning_rate": 1.954648770656705e-05,
872
- "loss": 2.6182,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
- "learning_rate": 1.954021593775401e-05,
878
- "loss": 2.6487,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
- "learning_rate": 1.9533902119290352e-05,
884
- "loss": 2.5927,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
- "learning_rate": 1.952754627900499e-05,
890
- "loss": 2.5825,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
- "learning_rate": 1.9521148444912065e-05,
896
- "loss": 2.7193,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
- "learning_rate": 1.9514708645210793e-05,
902
- "loss": 2.5802,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
- "learning_rate": 1.9508226908285368e-05,
908
- "loss": 2.6628,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
- "learning_rate": 1.950170326270483e-05,
914
- "loss": 2.5847,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
- "learning_rate": 1.9495137737222925e-05,
920
- "loss": 2.6594,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
- "learning_rate": 1.9488530360778007e-05,
926
- "loss": 2.6096,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
- "learning_rate": 1.948188116249287e-05,
932
- "loss": 2.6378,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
- "learning_rate": 1.9475190171674675e-05,
938
- "loss": 2.5984,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
- "learning_rate": 1.9468457417814753e-05,
944
- "loss": 2.6437,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
- "learning_rate": 1.9461682930588534e-05,
950
- "loss": 2.6522,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
- "learning_rate": 1.9454866739855384e-05,
956
- "loss": 2.6242,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
- "learning_rate": 1.944800887565849e-05,
962
- "loss": 2.5961,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
- "learning_rate": 1.9441109368224704e-05,
968
- "loss": 2.6365,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
- "learning_rate": 1.9434168247964447e-05,
974
- "loss": 2.5674,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
- "learning_rate": 1.9427185545471537e-05,
980
- "loss": 2.6369,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
- "learning_rate": 1.9420161291523076e-05,
986
- "loss": 2.5763,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
- "learning_rate": 1.941309551707931e-05,
992
- "loss": 2.5651,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
- "learning_rate": 1.9405988253283492e-05,
998
- "loss": 2.6223,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
- "learning_rate": 1.939883953146174e-05,
1004
- "loss": 2.6616,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
- "learning_rate": 1.939164938312291e-05,
1010
- "loss": 2.6496,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
- "learning_rate": 1.9384417839958443e-05,
1016
- "loss": 2.7161,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
- "learning_rate": 1.937714493384224e-05,
1022
- "loss": 2.6047,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
- "learning_rate": 1.936983069683051e-05,
1028
- "loss": 2.5978,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
- "learning_rate": 1.936247516116163e-05,
1034
- "loss": 2.6331,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
- "learning_rate": 1.935507835925601e-05,
1040
- "loss": 2.5679,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
- "learning_rate": 1.934764032371595e-05,
1046
- "loss": 2.6647,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
- "learning_rate": 1.9340161087325483e-05,
1052
- "loss": 2.6142,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
- "learning_rate": 1.9332640683050243e-05,
1058
- "loss": 2.6181,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
- "learning_rate": 1.932507914403732e-05,
1064
- "loss": 2.6105,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
- "learning_rate": 1.9317476503615108e-05,
1070
- "loss": 2.6415,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
- "learning_rate": 1.9309832795293156e-05,
1076
- "loss": 2.6736,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
- "learning_rate": 1.930214805276204e-05,
1082
- "loss": 2.6507,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
- "learning_rate": 1.9294422309893177e-05,
1088
- "loss": 2.6771,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
- "learning_rate": 1.9286655600738707e-05,
1094
- "loss": 2.6763,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
- "learning_rate": 1.9278847959531348e-05,
1100
- "loss": 2.7135,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
- "learning_rate": 1.927099942068421e-05,
1106
- "loss": 2.6166,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
- "learning_rate": 1.9263110018790673e-05,
1112
- "loss": 2.6238,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
- "learning_rate": 1.9255179788624233e-05,
1118
- "loss": 2.6424,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
- "learning_rate": 1.9247208765138325e-05,
1124
- "loss": 2.6023,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
- "learning_rate": 1.9239196983466204e-05,
1130
- "loss": 2.58,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
- "learning_rate": 1.9231144478920756e-05,
1136
- "loss": 2.6173,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
- "learning_rate": 1.9223051286994368e-05,
1142
- "loss": 2.628,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
- "learning_rate": 1.9214917443358753e-05,
1148
- "loss": 2.6868,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
- "learning_rate": 1.9206742983864813e-05,
1154
- "loss": 2.6342,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
- "learning_rate": 1.9198527944542462e-05,
1160
- "loss": 2.5934,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
- "learning_rate": 1.919027236160047e-05,
1166
- "loss": 2.6354,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
- "learning_rate": 1.9181976271426315e-05,
1172
- "loss": 2.5955,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
- "learning_rate": 1.9173639710586015e-05,
1178
- "loss": 2.6134,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
- "learning_rate": 1.9165262715823966e-05,
1184
- "loss": 2.7,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
- "learning_rate": 1.915684532406278e-05,
1190
- "loss": 2.7197,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
- "learning_rate": 1.9148387572403123e-05,
1196
- "loss": 2.4881,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
- "learning_rate": 1.913988949812356e-05,
1202
- "loss": 2.6999,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
- "learning_rate": 1.9131351138680368e-05,
1208
- "loss": 2.5981,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
- "learning_rate": 1.9122772531707405e-05,
1214
- "loss": 2.5515,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
- "learning_rate": 1.9114153715015905e-05,
1220
- "loss": 2.623,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
- "learning_rate": 1.9105494726594344e-05,
1226
- "loss": 2.6006,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
- "learning_rate": 1.9096795604608258e-05,
1232
- "loss": 2.5929,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
- "learning_rate": 1.9088056387400074e-05,
1238
- "loss": 2.5396,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
- "learning_rate": 1.907927711348894e-05,
1244
- "loss": 2.6428,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
- "learning_rate": 1.9070457821570566e-05,
1250
- "loss": 2.6296,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
- "learning_rate": 1.9061598550517048e-05,
1256
- "loss": 2.534,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
- "learning_rate": 1.9052699339376685e-05,
1262
- "loss": 2.6049,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
- "learning_rate": 1.9043760227373817e-05,
1268
- "loss": 2.5776,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
- "learning_rate": 1.9034781253908664e-05,
1274
- "loss": 2.7483,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
- "learning_rate": 1.902576245855713e-05,
1280
- "loss": 2.6973,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
- "learning_rate": 1.9016703881070646e-05,
1286
- "loss": 2.645,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
- "learning_rate": 1.900760556137598e-05,
1292
- "loss": 2.5775,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
- "learning_rate": 1.899846753957507e-05,
1298
- "loss": 2.6872,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
- "learning_rate": 1.8989289855944846e-05,
1304
- "loss": 2.5929,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
- "learning_rate": 1.8980072550937058e-05,
1310
- "loss": 2.5668,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
- "learning_rate": 1.8970815665178086e-05,
1316
- "loss": 2.6873,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
- "learning_rate": 1.896151923946877e-05,
1322
- "loss": 2.5679,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
- "learning_rate": 1.8952183314784224e-05,
1328
- "loss": 2.6734,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
- "learning_rate": 1.8942807932273664e-05,
1334
- "loss": 2.6083,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
- "learning_rate": 1.893339313326021e-05,
1340
- "loss": 2.6189,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
- "learning_rate": 1.892393895924073e-05,
1346
- "loss": 2.5117,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
- "learning_rate": 1.8914445451885633e-05,
1352
- "loss": 2.6646,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
- "learning_rate": 1.890491265303869e-05,
1358
- "loss": 2.7551,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
- "learning_rate": 1.8895340604716867e-05,
1364
- "loss": 2.7003,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
- "learning_rate": 1.888572934911012e-05,
1370
- "loss": 2.6109,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
- "learning_rate": 1.8876078928581207e-05,
1376
- "loss": 2.6227,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
- "learning_rate": 1.8866389385665525e-05,
1382
- "loss": 2.5943,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
- "learning_rate": 1.8856660763070904e-05,
1388
- "loss": 2.7203,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
- "learning_rate": 1.8846893103677414e-05,
1394
- "loss": 2.561,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
- "learning_rate": 1.8837086450537195e-05,
1400
- "loss": 2.6959,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
- "learning_rate": 1.8827240846874245e-05,
1406
- "loss": 2.7047,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
- "learning_rate": 1.8817356336084252e-05,
1412
- "loss": 2.5853,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
- "learning_rate": 1.8807432961734388e-05,
1418
- "loss": 2.6165,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
- "learning_rate": 1.8797470767563122e-05,
1424
- "loss": 2.756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
- "learning_rate": 1.878746979748002e-05,
1430
- "loss": 2.667,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
- "learning_rate": 1.8777430095565563e-05,
1436
- "loss": 2.7607,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
- "learning_rate": 1.876735170607095e-05,
1442
- "loss": 2.6816,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
- "learning_rate": 1.8757234673417892e-05,
1448
- "loss": 2.688,
1449
  "step": 1200
1450
  },
1451
  {
1452
  "epoch": 0.48,
1453
- "learning_rate": 1.874707904219843e-05,
1454
- "loss": 2.7286,
1455
  "step": 1205
1456
  },
1457
  {
1458
  "epoch": 0.49,
1459
- "learning_rate": 1.8736884857174733e-05,
1460
- "loss": 2.5721,
1461
  "step": 1210
1462
  },
1463
  {
1464
  "epoch": 0.49,
1465
- "learning_rate": 1.87266521632789e-05,
1466
- "loss": 2.6588,
1467
  "step": 1215
1468
  },
1469
  {
1470
  "epoch": 0.49,
1471
- "learning_rate": 1.8716381005612756e-05,
1472
- "loss": 2.625,
1473
  "step": 1220
1474
  },
1475
  {
1476
  "epoch": 0.49,
1477
- "learning_rate": 1.870607142944767e-05,
1478
- "loss": 2.5962,
1479
  "step": 1225
1480
  },
1481
  {
1482
  "epoch": 0.49,
1483
- "learning_rate": 1.869572348022434e-05,
1484
- "loss": 2.7405,
1485
  "step": 1230
1486
  },
1487
  {
1488
  "epoch": 0.5,
1489
- "learning_rate": 1.8685337203552602e-05,
1490
- "loss": 2.5352,
1491
  "step": 1235
1492
  },
1493
  {
1494
  "epoch": 0.5,
1495
- "learning_rate": 1.8674912645211212e-05,
1496
- "loss": 2.5935,
1497
  "step": 1240
1498
  },
1499
  {
1500
  "epoch": 0.5,
1501
- "learning_rate": 1.8664449851147675e-05,
1502
- "loss": 2.72,
1503
  "step": 1245
1504
  },
1505
  {
1506
  "epoch": 0.5,
1507
- "learning_rate": 1.8653948867478008e-05,
1508
- "loss": 2.6364,
1509
  "step": 1250
1510
  },
1511
  {
1512
  "epoch": 0.5,
1513
- "learning_rate": 1.864340974048657e-05,
1514
- "loss": 2.6966,
1515
  "step": 1255
1516
  },
1517
  {
1518
  "epoch": 0.51,
1519
- "learning_rate": 1.8632832516625825e-05,
1520
- "loss": 2.6181,
1521
  "step": 1260
1522
  },
1523
  {
1524
  "epoch": 0.51,
1525
- "learning_rate": 1.8622217242516164e-05,
1526
- "loss": 2.6416,
1527
  "step": 1265
1528
  },
1529
  {
1530
  "epoch": 0.51,
1531
- "learning_rate": 1.8611563964945685e-05,
1532
- "loss": 2.5406,
1533
  "step": 1270
1534
  },
1535
  {
1536
  "epoch": 0.51,
1537
- "learning_rate": 1.8600872730869995e-05,
1538
- "loss": 2.6398,
1539
  "step": 1275
1540
  },
1541
  {
1542
  "epoch": 0.51,
1543
- "learning_rate": 1.859014358741199e-05,
1544
- "loss": 2.5742,
1545
  "step": 1280
1546
  },
1547
  {
1548
  "epoch": 0.52,
1549
- "learning_rate": 1.8579376581861665e-05,
1550
- "loss": 2.6472,
1551
  "step": 1285
1552
  },
1553
  {
1554
  "epoch": 0.52,
1555
- "learning_rate": 1.8568571761675893e-05,
1556
- "loss": 2.6341,
1557
  "step": 1290
1558
  },
1559
  {
1560
  "epoch": 0.52,
1561
- "learning_rate": 1.8557729174478222e-05,
1562
- "loss": 2.7972,
1563
  "step": 1295
1564
  },
1565
  {
1566
  "epoch": 0.52,
1567
- "learning_rate": 1.854684886805866e-05,
1568
- "loss": 2.6391,
1569
  "step": 1300
1570
  },
1571
  {
1572
  "epoch": 0.52,
1573
- "learning_rate": 1.8535930890373467e-05,
1574
- "loss": 2.6008,
1575
  "step": 1305
1576
  },
1577
  {
1578
  "epoch": 0.53,
1579
- "learning_rate": 1.8524975289544943e-05,
1580
- "loss": 2.6225,
1581
  "step": 1310
1582
  },
1583
  {
1584
  "epoch": 0.53,
1585
- "learning_rate": 1.851398211386122e-05,
1586
- "loss": 2.6241,
1587
  "step": 1315
1588
  },
1589
  {
1590
  "epoch": 0.53,
1591
- "learning_rate": 1.8502951411776043e-05,
1592
- "loss": 2.7438,
1593
  "step": 1320
1594
  },
1595
  {
1596
  "epoch": 0.53,
1597
- "learning_rate": 1.849188323190856e-05,
1598
- "loss": 2.6974,
1599
  "step": 1325
1600
  },
1601
  {
1602
  "epoch": 0.53,
1603
- "learning_rate": 1.848077762304311e-05,
1604
- "loss": 2.6354,
1605
  "step": 1330
1606
  },
1607
  {
1608
  "epoch": 0.54,
1609
- "learning_rate": 1.8469634634128993e-05,
1610
- "loss": 2.7041,
1611
  "step": 1335
1612
  },
1613
  {
1614
  "epoch": 0.54,
1615
- "learning_rate": 1.8458454314280283e-05,
1616
- "loss": 2.7031,
1617
  "step": 1340
1618
  },
1619
  {
1620
  "epoch": 0.54,
1621
- "learning_rate": 1.8447236712775583e-05,
1622
- "loss": 2.5,
1623
  "step": 1345
1624
  },
1625
  {
1626
  "epoch": 0.54,
1627
- "learning_rate": 1.843598187905782e-05,
1628
- "loss": 2.627,
1629
  "step": 1350
1630
  },
1631
  {
1632
  "epoch": 0.54,
1633
- "learning_rate": 1.842468986273403e-05,
1634
- "loss": 2.6997,
1635
  "step": 1355
1636
  },
1637
  {
1638
  "epoch": 0.55,
1639
- "learning_rate": 1.841336071357514e-05,
1640
- "loss": 2.7178,
1641
  "step": 1360
1642
  },
1643
  {
1644
  "epoch": 0.55,
1645
- "learning_rate": 1.840199448151573e-05,
1646
- "loss": 2.6428,
1647
  "step": 1365
1648
  },
1649
  {
1650
  "epoch": 0.55,
1651
- "learning_rate": 1.8390591216653845e-05,
1652
- "loss": 2.6359,
1653
  "step": 1370
1654
  },
1655
  {
1656
  "epoch": 0.55,
1657
- "learning_rate": 1.8379150969250747e-05,
1658
- "loss": 2.6191,
1659
  "step": 1375
1660
  },
1661
  {
1662
  "epoch": 0.55,
1663
- "learning_rate": 1.83676737897307e-05,
1664
- "loss": 2.6765,
1665
  "step": 1380
1666
  },
1667
  {
1668
  "epoch": 0.56,
1669
- "learning_rate": 1.8356159728680754e-05,
1670
- "loss": 2.6555,
1671
  "step": 1385
1672
  },
1673
  {
1674
  "epoch": 0.56,
1675
- "learning_rate": 1.8344608836850526e-05,
1676
- "loss": 2.6545,
1677
  "step": 1390
1678
  },
1679
  {
1680
  "epoch": 0.56,
1681
- "learning_rate": 1.833302116515196e-05,
1682
- "loss": 2.6659,
1683
  "step": 1395
1684
  },
1685
  {
1686
  "epoch": 0.56,
1687
- "learning_rate": 1.832139676465911e-05,
1688
- "loss": 2.6792,
1689
  "step": 1400
1690
  }
1691
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 4.99999449047447e-05,
14
+ "loss": 2.7413,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
+ "learning_rate": 4.9999779619221645e-05,
20
+ "loss": 2.6684,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 4.999950414415935e-05,
26
+ "loss": 2.6511,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
+ "learning_rate": 4.999911848077199e-05,
32
+ "loss": 2.72,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
+ "learning_rate": 4.999862263075944e-05,
38
+ "loss": 2.7222,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 4.999801659630722e-05,
44
+ "loss": 2.7479,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
+ "learning_rate": 4.999730038008649e-05,
50
+ "loss": 2.6063,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
+ "learning_rate": 4.9996473985254055e-05,
56
+ "loss": 2.576,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
+ "learning_rate": 4.999553741545237e-05,
62
+ "loss": 2.579,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
+ "learning_rate": 4.999449067480945e-05,
68
+ "loss": 2.5114,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
+ "learning_rate": 4.9993333767938954e-05,
74
+ "loss": 2.6174,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
+ "learning_rate": 4.9992066699940085e-05,
80
+ "loss": 2.6581,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
+ "learning_rate": 4.9990689476397586e-05,
86
+ "loss": 2.586,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
+ "learning_rate": 4.998920210338175e-05,
92
+ "loss": 2.4642,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
+ "learning_rate": 4.9987604587448343e-05,
98
+ "loss": 2.6262,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
+ "learning_rate": 4.998589693563861e-05,
104
+ "loss": 2.5678,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
+ "learning_rate": 4.998407915547924e-05,
110
+ "loss": 2.5597,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
+ "learning_rate": 4.9982151254982304e-05,
116
+ "loss": 2.6578,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
+ "learning_rate": 4.998011324264527e-05,
122
+ "loss": 2.4825,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
+ "learning_rate": 4.997796512745091e-05,
128
+ "loss": 2.6197,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
+ "learning_rate": 4.997570691886732e-05,
134
+ "loss": 2.4921,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
+ "learning_rate": 4.99733386268478e-05,
140
+ "loss": 2.5126,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
+ "learning_rate": 4.99708602618309e-05,
146
+ "loss": 2.584,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
+ "learning_rate": 4.9968271834740305e-05,
152
+ "loss": 2.4991,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
+ "learning_rate": 4.996557335698482e-05,
158
+ "loss": 2.5242,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
+ "learning_rate": 4.996276484045832e-05,
164
+ "loss": 2.4506,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
+ "learning_rate": 4.995984629753967e-05,
170
+ "loss": 2.5479,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
+ "learning_rate": 4.9956817741092696e-05,
176
+ "loss": 2.5316,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
+ "learning_rate": 4.995367918446613e-05,
182
+ "loss": 2.6053,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
+ "learning_rate": 4.995043064149354e-05,
188
+ "loss": 2.4533,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
+ "learning_rate": 4.9947072126493276e-05,
194
+ "loss": 2.5279,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
+ "learning_rate": 4.994360365426839e-05,
200
+ "loss": 2.4994,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
+ "learning_rate": 4.994002524010659e-05,
206
+ "loss": 2.4675,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
+ "learning_rate": 4.9936336899780166e-05,
212
+ "loss": 2.5233,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
+ "learning_rate": 4.993253864954592e-05,
218
+ "loss": 2.4974,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
+ "learning_rate": 4.992863050614511e-05,
224
+ "loss": 2.6295,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
+ "learning_rate": 4.9924612486803346e-05,
230
+ "loss": 2.4744,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
+ "learning_rate": 4.992048460923052e-05,
236
+ "loss": 2.5248,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
+ "learning_rate": 4.991624689162076e-05,
242
+ "loss": 2.5727,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
+ "learning_rate": 4.991189935265231e-05,
248
+ "loss": 2.4845,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
+ "learning_rate": 4.990744201148748e-05,
254
+ "loss": 2.4544,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
+ "learning_rate": 4.990287488777253e-05,
260
+ "loss": 2.4383,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
+ "learning_rate": 4.989819800163761e-05,
266
+ "loss": 2.5263,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
+ "learning_rate": 4.989341137369666e-05,
272
+ "loss": 2.5219,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
+ "learning_rate": 4.9888515025047316e-05,
278
+ "loss": 2.4512,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
+ "learning_rate": 4.988350897727083e-05,
284
+ "loss": 2.491,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
+ "learning_rate": 4.9878393252431953e-05,
290
+ "loss": 2.5404,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
+ "learning_rate": 4.987316787307888e-05,
296
+ "loss": 2.4777,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
+ "learning_rate": 4.9867832862243055e-05,
302
+ "loss": 2.4498,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
+ "learning_rate": 4.986238824343923e-05,
308
+ "loss": 2.4174,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
+ "learning_rate": 4.985683404066519e-05,
314
+ "loss": 2.4589,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
+ "learning_rate": 4.985117027840175e-05,
320
+ "loss": 2.5029,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
+ "learning_rate": 4.984539698161264e-05,
326
+ "loss": 2.4376,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
+ "learning_rate": 4.983951417574436e-05,
332
+ "loss": 2.526,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
+ "learning_rate": 4.983352188672608e-05,
338
+ "loss": 2.5214,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
+ "learning_rate": 4.982742014096952e-05,
344
+ "loss": 2.5188,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
+ "learning_rate": 4.982120896536888e-05,
350
+ "loss": 2.4175,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
+ "learning_rate": 4.981488838730066e-05,
356
+ "loss": 2.5346,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
+ "learning_rate": 4.980845843462357e-05,
362
+ "loss": 2.496,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
+ "learning_rate": 4.98019191356784e-05,
368
+ "loss": 2.477,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
+ "learning_rate": 4.9795270519287886e-05,
374
+ "loss": 2.4205,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
+ "learning_rate": 4.9788512614756624e-05,
380
+ "loss": 2.4621,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
+ "learning_rate": 4.9781645451870875e-05,
386
+ "loss": 2.5098,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
+ "learning_rate": 4.9774669060898496e-05,
392
+ "loss": 2.4847,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
+ "learning_rate": 4.976758347258877e-05,
398
+ "loss": 2.4873,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
+ "learning_rate": 4.976038871817228e-05,
404
+ "loss": 2.4462,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
+ "learning_rate": 4.9753084829360776e-05,
410
+ "loss": 2.4994,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
+ "learning_rate": 4.974567183834702e-05,
416
+ "loss": 2.4994,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
+ "learning_rate": 4.9738149777804665e-05,
422
+ "loss": 2.4584,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
+ "learning_rate": 4.97305186808881e-05,
428
+ "loss": 2.4294,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
+ "learning_rate": 4.9722778581232305e-05,
434
+ "loss": 2.4499,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
+ "learning_rate": 4.9714929512952704e-05,
440
+ "loss": 2.5851,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
+ "learning_rate": 4.9706971510645e-05,
446
+ "loss": 2.4746,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
+ "learning_rate": 4.969890460938505e-05,
452
+ "loss": 2.4184,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
+ "learning_rate": 4.9690728844728704e-05,
458
+ "loss": 2.4716,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
+ "learning_rate": 4.968244425271161e-05,
464
+ "loss": 2.4477,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
+ "learning_rate": 4.9674050869849124e-05,
470
+ "loss": 2.4563,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
+ "learning_rate": 4.966554873313608e-05,
476
+ "loss": 2.3708,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
+ "learning_rate": 4.9656937880046676e-05,
482
+ "loss": 2.4351,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
+ "learning_rate": 4.9648218348534284e-05,
488
+ "loss": 2.3933,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
+ "learning_rate": 4.963939017703128e-05,
494
+ "loss": 2.5871,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
+ "learning_rate": 4.9630453404448905e-05,
500
+ "loss": 2.3965,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
+ "learning_rate": 4.962140807017705e-05,
506
+ "loss": 2.372,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
+ "learning_rate": 4.961225421408412e-05,
512
+ "loss": 2.4847,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
+ "learning_rate": 4.960299187651684e-05,
518
+ "loss": 2.4046,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
+ "learning_rate": 4.959362109830007e-05,
524
+ "loss": 2.3912,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
+ "learning_rate": 4.9584141920736656e-05,
530
+ "loss": 2.4732,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
+ "learning_rate": 4.957455438560721e-05,
536
+ "loss": 2.4854,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
+ "learning_rate": 4.956485853516993e-05,
542
+ "loss": 2.3736,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
+ "learning_rate": 4.9555054412160476e-05,
548
+ "loss": 2.4222,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
+ "learning_rate": 4.9545142059791686e-05,
554
+ "loss": 2.4498,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
+ "learning_rate": 4.9535121521753434e-05,
560
+ "loss": 2.4172,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
+ "learning_rate": 4.952499284221247e-05,
566
+ "loss": 2.4742,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
+ "learning_rate": 4.951475606581215e-05,
572
+ "loss": 2.4008,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
+ "learning_rate": 4.950441123767231e-05,
578
+ "loss": 2.4278,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
+ "learning_rate": 4.949395840338903e-05,
584
+ "loss": 2.5041,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
+ "learning_rate": 4.948339760903442e-05,
590
+ "loss": 2.4088,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
+ "learning_rate": 4.947272890115647e-05,
596
+ "loss": 2.466,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
+ "learning_rate": 4.946195232677878e-05,
602
+ "loss": 2.4404,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
+ "learning_rate": 4.9451067933400406e-05,
608
+ "loss": 2.3658,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
+ "learning_rate": 4.9440075768995625e-05,
614
+ "loss": 2.4247,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
+ "learning_rate": 4.942897588201372e-05,
620
+ "loss": 2.5082,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
+ "learning_rate": 4.941776832137879e-05,
626
+ "loss": 2.5545,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
+ "learning_rate": 4.940645313648949e-05,
632
+ "loss": 2.4665,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
+ "learning_rate": 4.939503037721888e-05,
638
+ "loss": 2.4195,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
+ "learning_rate": 4.938350009391413e-05,
644
+ "loss": 2.4908,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
+ "learning_rate": 4.937186233739637e-05,
650
+ "loss": 2.4093,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
+ "learning_rate": 4.93601171589604e-05,
656
+ "loss": 2.44,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
+ "learning_rate": 4.9348264610374494e-05,
662
+ "loss": 2.4989,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
+ "learning_rate": 4.9336304743880226e-05,
668
+ "loss": 2.3786,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
+ "learning_rate": 4.932423761219211e-05,
674
+ "loss": 2.4305,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
+ "learning_rate": 4.931206326849751e-05,
680
+ "loss": 2.4012,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
+ "learning_rate": 4.92997817664563e-05,
686
+ "loss": 2.4263,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
+ "learning_rate": 4.9287393160200676e-05,
692
+ "loss": 2.4749,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
+ "learning_rate": 4.927489750433492e-05,
698
+ "loss": 2.5083,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
+ "learning_rate": 4.926229485393513e-05,
704
+ "loss": 2.5228,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
+ "learning_rate": 4.924958526454902e-05,
710
+ "loss": 2.4623,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
+ "learning_rate": 4.923676879219562e-05,
716
+ "loss": 2.4199,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
+ "learning_rate": 4.9223845493365085e-05,
722
+ "loss": 2.2906,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
+ "learning_rate": 4.9210815425018406e-05,
728
+ "loss": 2.5539,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
+ "learning_rate": 4.919767864458718e-05,
734
+ "loss": 2.3456,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
+ "learning_rate": 4.9184435209973354e-05,
740
+ "loss": 2.4328,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
+ "learning_rate": 4.917108517954895e-05,
746
+ "loss": 2.3412,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
+ "learning_rate": 4.9157628612155836e-05,
752
+ "loss": 2.604,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
+ "learning_rate": 4.9144065567105465e-05,
758
+ "loss": 2.4007,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
+ "learning_rate": 4.913039610417859e-05,
764
+ "loss": 2.437,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
+ "learning_rate": 4.9116620283624996e-05,
770
+ "loss": 2.5004,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
+ "learning_rate": 4.91027381661633e-05,
776
+ "loss": 2.5045,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
+ "learning_rate": 4.908874981298057e-05,
782
+ "loss": 2.3794,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
+ "learning_rate": 4.9074655285732196e-05,
788
+ "loss": 2.4621,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
+ "learning_rate": 4.906045464654148e-05,
794
+ "loss": 2.549,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
+ "learning_rate": 4.9046147957999454e-05,
800
+ "loss": 2.4567,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
+ "learning_rate": 4.9031735283164574e-05,
806
+ "loss": 2.3243,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
+ "learning_rate": 4.901721668556244e-05,
812
+ "loss": 2.4758,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
+ "learning_rate": 4.9002592229185515e-05,
818
+ "loss": 2.5083,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
+ "learning_rate": 4.898786197849285e-05,
824
+ "loss": 2.4428,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
+ "learning_rate": 4.897302599840981e-05,
830
+ "loss": 2.4403,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
+ "learning_rate": 4.895808435432776e-05,
836
+ "loss": 2.4401,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
+ "learning_rate": 4.8943037112103784e-05,
842
+ "loss": 2.3789,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
+ "learning_rate": 4.892788433806042e-05,
848
+ "loss": 2.3945,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
+ "learning_rate": 4.8912626098985355e-05,
854
+ "loss": 2.4758,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
+ "learning_rate": 4.88972624621311e-05,
860
+ "loss": 2.4646,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
+ "learning_rate": 4.888179349521475e-05,
866
+ "loss": 2.4225,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
+ "learning_rate": 4.886621926641762e-05,
872
+ "loss": 2.4074,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
+ "learning_rate": 4.8850539844385017e-05,
878
+ "loss": 2.4438,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
+ "learning_rate": 4.883475529822587e-05,
884
+ "loss": 2.3666,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
+ "learning_rate": 4.881886569751248e-05,
890
+ "loss": 2.3782,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
+ "learning_rate": 4.880287111228016e-05,
896
+ "loss": 2.4916,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
+ "learning_rate": 4.878677161302698e-05,
902
+ "loss": 2.3639,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
+ "learning_rate": 4.877056727071342e-05,
908
+ "loss": 2.4333,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
+ "learning_rate": 4.8754258156762075e-05,
914
+ "loss": 2.3907,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
+ "learning_rate": 4.8737844343057315e-05,
920
+ "loss": 2.433,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
+ "learning_rate": 4.8721325901945015e-05,
926
+ "loss": 2.3849,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
+ "learning_rate": 4.870470290623218e-05,
932
+ "loss": 2.419,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
+ "learning_rate": 4.8687975429186685e-05,
938
+ "loss": 2.3694,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
+ "learning_rate": 4.867114354453688e-05,
944
+ "loss": 2.4335,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
+ "learning_rate": 4.865420732647133e-05,
950
+ "loss": 2.4168,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
+ "learning_rate": 4.863716684963846e-05,
956
+ "loss": 2.419,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
+ "learning_rate": 4.862002218914622e-05,
962
+ "loss": 2.335,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
+ "learning_rate": 4.860277342056176e-05,
968
+ "loss": 2.4059,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
+ "learning_rate": 4.858542061991112e-05,
974
+ "loss": 2.3423,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
+ "learning_rate": 4.856796386367884e-05,
980
+ "loss": 2.4135,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
+ "learning_rate": 4.855040322880769e-05,
986
+ "loss": 2.3652,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
+ "learning_rate": 4.853273879269827e-05,
992
+ "loss": 2.3511,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
+ "learning_rate": 4.8514970633208726e-05,
998
+ "loss": 2.413,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
+ "learning_rate": 4.849709882865435e-05,
1004
+ "loss": 2.4445,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
+ "learning_rate": 4.847912345780727e-05,
1010
+ "loss": 2.4221,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
+ "learning_rate": 4.846104459989611e-05,
1016
+ "loss": 2.4992,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
+ "learning_rate": 4.84428623346056e-05,
1022
+ "loss": 2.3923,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
+ "learning_rate": 4.842457674207627e-05,
1028
+ "loss": 2.3666,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
+ "learning_rate": 4.8406187902904076e-05,
1034
+ "loss": 2.4329,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
+ "learning_rate": 4.8387695898140026e-05,
1040
+ "loss": 2.3429,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
+ "learning_rate": 4.836910080928987e-05,
1046
+ "loss": 2.4428,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
+ "learning_rate": 4.83504027183137e-05,
1052
+ "loss": 2.3842,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
+ "learning_rate": 4.833160170762561e-05,
1058
+ "loss": 2.4064,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
+ "learning_rate": 4.8312697860093295e-05,
1064
+ "loss": 2.3705,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
+ "learning_rate": 4.829369125903776e-05,
1070
+ "loss": 2.4151,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
+ "learning_rate": 4.8274581988232894e-05,
1076
+ "loss": 2.4219,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
+ "learning_rate": 4.825537013190509e-05,
1082
+ "loss": 2.4042,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
+ "learning_rate": 4.823605577473293e-05,
1088
+ "loss": 2.4509,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
+ "learning_rate": 4.8216639001846764e-05,
1094
+ "loss": 2.4255,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
+ "learning_rate": 4.8197119898828367e-05,
1100
+ "loss": 2.4928,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
+ "learning_rate": 4.817749855171052e-05,
1106
+ "loss": 2.4044,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
+ "learning_rate": 4.8157775046976684e-05,
1112
+ "loss": 2.4085,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
+ "learning_rate": 4.813794947156058e-05,
1118
+ "loss": 2.3941,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
+ "learning_rate": 4.8118021912845815e-05,
1124
+ "loss": 2.3493,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
+ "learning_rate": 4.8097992458665506e-05,
1130
+ "loss": 2.3367,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
+ "learning_rate": 4.807786119730189e-05,
1136
+ "loss": 2.3865,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
+ "learning_rate": 4.8057628217485916e-05,
1142
+ "loss": 2.3885,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
+ "learning_rate": 4.803729360839688e-05,
1148
+ "loss": 2.4296,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
+ "learning_rate": 4.801685745966203e-05,
1154
+ "loss": 2.3856,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
+ "learning_rate": 4.799631986135615e-05,
1160
+ "loss": 2.3631,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
+ "learning_rate": 4.797568090400117e-05,
1166
+ "loss": 2.3882,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
+ "learning_rate": 4.7954940678565785e-05,
1172
+ "loss": 2.3396,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
+ "learning_rate": 4.793409927646504e-05,
1178
+ "loss": 2.3548,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
+ "learning_rate": 4.791315678955991e-05,
1184
+ "loss": 2.4576,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
+ "learning_rate": 4.789211331015695e-05,
1190
+ "loss": 2.4523,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
+ "learning_rate": 4.787096893100781e-05,
1196
+ "loss": 2.2542,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
+ "learning_rate": 4.784972374530889e-05,
1202
+ "loss": 2.4385,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
+ "learning_rate": 4.7828377846700925e-05,
1208
+ "loss": 2.3266,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
+ "learning_rate": 4.780693132926851e-05,
1214
+ "loss": 2.3301,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
+ "learning_rate": 4.7785384287539755e-05,
1220
+ "loss": 2.3554,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
+ "learning_rate": 4.776373681648586e-05,
1226
+ "loss": 2.3838,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
+ "learning_rate": 4.7741989011520645e-05,
1232
+ "loss": 2.3361,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
+ "learning_rate": 4.772014096850018e-05,
1238
+ "loss": 2.2864,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
+ "learning_rate": 4.769819278372235e-05,
1244
+ "loss": 2.3955,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
+ "learning_rate": 4.7676144553926414e-05,
1250
+ "loss": 2.3802,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
+ "learning_rate": 4.765399637629262e-05,
1256
+ "loss": 2.2856,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
+ "learning_rate": 4.7631748348441705e-05,
1262
+ "loss": 2.3698,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
+ "learning_rate": 4.760940056843454e-05,
1268
+ "loss": 2.3305,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
+ "learning_rate": 4.758695313477166e-05,
1274
+ "loss": 2.4944,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
+ "learning_rate": 4.756440614639283e-05,
1280
+ "loss": 2.4622,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
+ "learning_rate": 4.754175970267661e-05,
1286
+ "loss": 2.3995,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
+ "learning_rate": 4.751901390343995e-05,
1292
+ "loss": 2.343,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
+ "learning_rate": 4.7496168848937674e-05,
1298
+ "loss": 2.4388,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
+ "learning_rate": 4.7473224639862116e-05,
1304
+ "loss": 2.3694,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
+ "learning_rate": 4.745018137734264e-05,
1310
+ "loss": 2.3485,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
+ "learning_rate": 4.742703916294521e-05,
1316
+ "loss": 2.46,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
+ "learning_rate": 4.740379809867193e-05,
1322
+ "loss": 2.3138,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
+ "learning_rate": 4.738045828696056e-05,
1328
+ "loss": 2.4117,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
+ "learning_rate": 4.735701983068416e-05,
1334
+ "loss": 2.3506,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
+ "learning_rate": 4.7333482833150525e-05,
1340
+ "loss": 2.3711,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
+ "learning_rate": 4.730984739810183e-05,
1346
+ "loss": 2.2639,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
+ "learning_rate": 4.728611362971408e-05,
1352
+ "loss": 2.3749,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
+ "learning_rate": 4.726228163259673e-05,
1358
+ "loss": 2.4556,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
+ "learning_rate": 4.7238351511792165e-05,
1364
+ "loss": 2.4233,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
+ "learning_rate": 4.721432337277529e-05,
1370
+ "loss": 2.3544,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
+ "learning_rate": 4.7190197321453014e-05,
1376
+ "loss": 2.3654,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
+ "learning_rate": 4.716597346416382e-05,
1382
+ "loss": 2.3356,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
+ "learning_rate": 4.7141651907677256e-05,
1388
+ "loss": 2.4522,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
+ "learning_rate": 4.7117232759193534e-05,
1394
+ "loss": 2.3104,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
+ "learning_rate": 4.709271612634298e-05,
1400
+ "loss": 2.4492,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
+ "learning_rate": 4.706810211718561e-05,
1406
+ "loss": 2.4437,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
+ "learning_rate": 4.7043390840210636e-05,
1412
+ "loss": 2.3099,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
+ "learning_rate": 4.701858240433597e-05,
1418
+ "loss": 2.351,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
+ "learning_rate": 4.6993676918907804e-05,
1424
+ "loss": 2.4756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
+ "learning_rate": 4.696867449370005e-05,
1430
+ "loss": 2.418,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
+ "learning_rate": 4.6943575238913904e-05,
1436
+ "loss": 2.5026,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
+ "learning_rate": 4.691837926517737e-05,
1442
+ "loss": 2.4096,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
+ "learning_rate": 4.689308668354473e-05,
1448
+ "loss": 2.4101,
1449
  "step": 1200
1450
  },
1451
  {
1452
  "epoch": 0.48,
1453
+ "learning_rate": 4.686769760549607e-05,
1454
+ "loss": 2.4533,
1455
  "step": 1205
1456
  },
1457
  {
1458
  "epoch": 0.49,
1459
+ "learning_rate": 4.684221214293683e-05,
1460
+ "loss": 2.2988,
1461
  "step": 1210
1462
  },
1463
  {
1464
  "epoch": 0.49,
1465
+ "learning_rate": 4.681663040819724e-05,
1466
+ "loss": 2.3937,
1467
  "step": 1215
1468
  },
1469
  {
1470
  "epoch": 0.49,
1471
+ "learning_rate": 4.679095251403189e-05,
1472
+ "loss": 2.3954,
1473
  "step": 1220
1474
  },
1475
  {
1476
  "epoch": 0.49,
1477
+ "learning_rate": 4.676517857361917e-05,
1478
+ "loss": 2.3275,
1479
  "step": 1225
1480
  },
1481
  {
1482
  "epoch": 0.49,
1483
+ "learning_rate": 4.673930870056085e-05,
1484
+ "loss": 2.4748,
1485
  "step": 1230
1486
  },
1487
  {
1488
  "epoch": 0.5,
1489
+ "learning_rate": 4.67133430088815e-05,
1490
+ "loss": 2.2496,
1491
  "step": 1235
1492
  },
1493
  {
1494
  "epoch": 0.5,
1495
+ "learning_rate": 4.668728161302803e-05,
1496
+ "loss": 2.3226,
1497
  "step": 1240
1498
  },
1499
  {
1500
  "epoch": 0.5,
1501
+ "learning_rate": 4.666112462786919e-05,
1502
+ "loss": 2.4655,
1503
  "step": 1245
1504
  },
1505
  {
1506
  "epoch": 0.5,
1507
+ "learning_rate": 4.663487216869502e-05,
1508
+ "loss": 2.3751,
1509
  "step": 1250
1510
  },
1511
  {
1512
  "epoch": 0.5,
1513
+ "learning_rate": 4.660852435121642e-05,
1514
+ "loss": 2.4136,
1515
  "step": 1255
1516
  },
1517
  {
1518
  "epoch": 0.51,
1519
+ "learning_rate": 4.658208129156456e-05,
1520
+ "loss": 2.3612,
1521
  "step": 1260
1522
  },
1523
  {
1524
  "epoch": 0.51,
1525
+ "learning_rate": 4.655554310629041e-05,
1526
+ "loss": 2.3675,
1527
  "step": 1265
1528
  },
1529
  {
1530
  "epoch": 0.51,
1531
+ "learning_rate": 4.652890991236421e-05,
1532
+ "loss": 2.2831,
1533
  "step": 1270
1534
  },
1535
  {
1536
  "epoch": 0.51,
1537
+ "learning_rate": 4.650218182717498e-05,
1538
+ "loss": 2.3958,
1539
  "step": 1275
1540
  },
1541
  {
1542
  "epoch": 0.51,
1543
+ "learning_rate": 4.647535896852997e-05,
1544
+ "loss": 2.3236,
1545
  "step": 1280
1546
  },
1547
  {
1548
  "epoch": 0.52,
1549
+ "learning_rate": 4.6448441454654156e-05,
1550
+ "loss": 2.3904,
1551
  "step": 1285
1552
  },
1553
  {
1554
  "epoch": 0.52,
1555
+ "learning_rate": 4.642142940418973e-05,
1556
+ "loss": 2.3565,
1557
  "step": 1290
1558
  },
1559
  {
1560
  "epoch": 0.52,
1561
+ "learning_rate": 4.6394322936195556e-05,
1562
+ "loss": 2.5229,
1563
  "step": 1295
1564
  },
1565
  {
1566
  "epoch": 0.52,
1567
+ "learning_rate": 4.636712217014665e-05,
1568
+ "loss": 2.369,
1569
  "step": 1300
1570
  },
1571
  {
1572
  "epoch": 0.52,
1573
+ "learning_rate": 4.6339827225933665e-05,
1574
+ "loss": 2.3444,
1575
  "step": 1305
1576
  },
1577
  {
1578
  "epoch": 0.53,
1579
+ "learning_rate": 4.6312438223862356e-05,
1580
+ "loss": 2.3387,
1581
  "step": 1310
1582
  },
1583
  {
1584
  "epoch": 0.53,
1585
+ "learning_rate": 4.628495528465305e-05,
1586
+ "loss": 2.3589,
1587
  "step": 1315
1588
  },
1589
  {
1590
  "epoch": 0.53,
1591
+ "learning_rate": 4.625737852944011e-05,
1592
+ "loss": 2.4779,
1593
  "step": 1320
1594
  },
1595
  {
1596
  "epoch": 0.53,
1597
+ "learning_rate": 4.62297080797714e-05,
1598
+ "loss": 2.4398,
1599
  "step": 1325
1600
  },
1601
  {
1602
  "epoch": 0.53,
1603
+ "learning_rate": 4.620194405760777e-05,
1604
+ "loss": 2.383,
1605
  "step": 1330
1606
  },
1607
  {
1608
  "epoch": 0.54,
1609
+ "learning_rate": 4.6174086585322485e-05,
1610
+ "loss": 2.424,
1611
  "step": 1335
1612
  },
1613
  {
1614
  "epoch": 0.54,
1615
+ "learning_rate": 4.6146135785700705e-05,
1616
+ "loss": 2.4289,
1617
  "step": 1340
1618
  },
1619
  {
1620
  "epoch": 0.54,
1621
+ "learning_rate": 4.611809178193896e-05,
1622
+ "loss": 2.225,
1623
  "step": 1345
1624
  },
1625
  {
1626
  "epoch": 0.54,
1627
+ "learning_rate": 4.608995469764455e-05,
1628
+ "loss": 2.3643,
1629
  "step": 1350
1630
  },
1631
  {
1632
  "epoch": 0.54,
1633
+ "learning_rate": 4.606172465683508e-05,
1634
+ "loss": 2.4278,
1635
  "step": 1355
1636
  },
1637
  {
1638
  "epoch": 0.55,
1639
+ "learning_rate": 4.6033401783937844e-05,
1640
+ "loss": 2.4464,
1641
  "step": 1360
1642
  },
1643
  {
1644
  "epoch": 0.55,
1645
+ "learning_rate": 4.600498620378932e-05,
1646
+ "loss": 2.3639,
1647
  "step": 1365
1648
  },
1649
  {
1650
  "epoch": 0.55,
1651
+ "learning_rate": 4.597647804163461e-05,
1652
+ "loss": 2.3595,
1653
  "step": 1370
1654
  },
1655
  {
1656
  "epoch": 0.55,
1657
+ "learning_rate": 4.5947877423126864e-05,
1658
+ "loss": 2.3569,
1659
  "step": 1375
1660
  },
1661
  {
1662
  "epoch": 0.55,
1663
+ "learning_rate": 4.591918447432675e-05,
1664
+ "loss": 2.4167,
1665
  "step": 1380
1666
  },
1667
  {
1668
  "epoch": 0.56,
1669
+ "learning_rate": 4.589039932170188e-05,
1670
+ "loss": 2.3959,
1671
  "step": 1385
1672
  },
1673
  {
1674
  "epoch": 0.56,
1675
+ "learning_rate": 4.5861522092126313e-05,
1676
+ "loss": 2.3629,
1677
  "step": 1390
1678
  },
1679
  {
1680
  "epoch": 0.56,
1681
+ "learning_rate": 4.5832552912879894e-05,
1682
+ "loss": 2.3978,
1683
  "step": 1395
1684
  },
1685
  {
1686
  "epoch": 0.56,
1687
+ "learning_rate": 4.580349191164777e-05,
1688
+ "loss": 2.4372,
1689
  "step": 1400
1690
  }
1691
  ],
checkpoint-1400/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c481a87016b183d9406558b8812ee05f00d60a3c721055833e3cdda34cf9bb26
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b42f69ee6ce9030b2875b2859f3499c1e1c8f11e9ffa417d0dbdab2b51173da
3
  size 4920
checkpoint-1500/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:617998886ebced7ffbbc34dba08474899d00a5ff7b0ad1043c34f0198861b73e
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86001ec26d75f984491a257969213702b9e50f38474b61cbcac24938a9fbebe3
3
  size 3158328
checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f219e2be373d53a0e38f48e30a0cd5e563d0125f92d587e044557f98201cc2fa
3
  size 6372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4b8775f6ab7c39f5b05b600f771c2f96b56f166148062a520e7c0eacaee1ab1
3
  size 6372346
checkpoint-1500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:429b00f4aa51484e6c19c98196707dffc361874e6f4a07a1891c9c2eed9e7c0c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:373f174594d1004d1fbec6db05cd5f49739496b5c13d43df817b9d01262e548c
3
  size 14244
checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13a828a303cdd41d427d5947577be57d26c65d74ce9f1145f61c910ce959418e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b19f3dc3dfe8d0ae4948bc2c6039073cc193e981c6055a9a923f13cfafd0710c
3
  size 1064
checkpoint-1500/trainer_state.json CHANGED
@@ -10,1802 +10,1802 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.999997796189788e-05,
14
- "loss": 2.7465,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
- "learning_rate": 1.9999911847688657e-05,
20
- "loss": 2.6877,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 1.999980165766374e-05,
26
- "loss": 2.6848,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
- "learning_rate": 1.9999647392308798e-05,
32
- "loss": 2.7677,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
- "learning_rate": 1.9999449052303777e-05,
38
- "loss": 2.7784,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
- "learning_rate": 1.9999206638522888e-05,
44
- "loss": 2.8161,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
- "learning_rate": 1.9998920152034595e-05,
50
- "loss": 2.6752,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
- "learning_rate": 1.9998589594101623e-05,
56
- "loss": 2.6403,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
- "learning_rate": 1.9998214966180948e-05,
62
- "loss": 2.6433,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
- "learning_rate": 1.999779626992378e-05,
68
- "loss": 2.5787,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
- "learning_rate": 1.9997333507175583e-05,
74
- "loss": 2.6812,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
- "learning_rate": 1.9996826679976033e-05,
80
- "loss": 2.7287,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
- "learning_rate": 1.9996275790559037e-05,
86
- "loss": 2.6465,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
- "learning_rate": 1.99956808413527e-05,
92
- "loss": 2.5268,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
- "learning_rate": 1.999504183497934e-05,
98
- "loss": 2.6893,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
- "learning_rate": 1.9994358774255444e-05,
104
- "loss": 2.6274,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
- "learning_rate": 1.9993631662191696e-05,
110
- "loss": 2.6232,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
- "learning_rate": 1.9992860501992924e-05,
116
- "loss": 2.7188,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
- "learning_rate": 1.9992045297058108e-05,
122
- "loss": 2.5388,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
- "learning_rate": 1.9991186050980366e-05,
128
- "loss": 2.6793,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
- "learning_rate": 1.9990282767546926e-05,
134
- "loss": 2.5523,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
- "learning_rate": 1.998933545073912e-05,
140
- "loss": 2.5763,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
- "learning_rate": 1.998834410473236e-05,
146
- "loss": 2.6447,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
- "learning_rate": 1.998730873389612e-05,
152
- "loss": 2.5579,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
- "learning_rate": 1.998622934279393e-05,
158
- "loss": 2.5884,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
- "learning_rate": 1.9985105936183327e-05,
164
- "loss": 2.5051,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
- "learning_rate": 1.9983938519015868e-05,
170
- "loss": 2.6014,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
- "learning_rate": 1.998272709643708e-05,
176
- "loss": 2.5878,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
- "learning_rate": 1.998147167378645e-05,
182
- "loss": 2.6642,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
- "learning_rate": 1.998017225659742e-05,
188
- "loss": 2.5077,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
- "learning_rate": 1.9978828850597312e-05,
194
- "loss": 2.5921,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
- "learning_rate": 1.9977441461707358e-05,
200
- "loss": 2.5577,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
- "learning_rate": 1.9976010096042634e-05,
206
- "loss": 2.524,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
- "learning_rate": 1.9974534759912068e-05,
212
- "loss": 2.5708,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
- "learning_rate": 1.997301545981837e-05,
218
- "loss": 2.5578,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
- "learning_rate": 1.9971452202458048e-05,
224
- "loss": 2.6874,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
- "learning_rate": 1.9969844994721338e-05,
230
- "loss": 2.535,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
- "learning_rate": 1.996819384369221e-05,
236
- "loss": 2.5816,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
- "learning_rate": 1.9966498756648305e-05,
242
- "loss": 2.6225,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
- "learning_rate": 1.9964759741060926e-05,
248
- "loss": 2.5387,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
- "learning_rate": 1.9962976804594993e-05,
254
- "loss": 2.524,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
- "learning_rate": 1.996114995510901e-05,
260
- "loss": 2.488,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
- "learning_rate": 1.9959279200655044e-05,
266
- "loss": 2.5824,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
- "learning_rate": 1.9957364549478663e-05,
272
- "loss": 2.5828,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
- "learning_rate": 1.9955406010018928e-05,
278
- "loss": 2.5137,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
- "learning_rate": 1.9953403590908334e-05,
284
- "loss": 2.5539,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
- "learning_rate": 1.995135730097278e-05,
290
- "loss": 2.6099,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
- "learning_rate": 1.994926714923155e-05,
296
- "loss": 2.5309,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
- "learning_rate": 1.9947133144897225e-05,
302
- "loss": 2.5152,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
- "learning_rate": 1.9944955297375693e-05,
308
- "loss": 2.4738,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
- "learning_rate": 1.9942733616266076e-05,
314
- "loss": 2.5173,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
- "learning_rate": 1.99404681113607e-05,
320
- "loss": 2.578,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
- "learning_rate": 1.993815879264506e-05,
326
- "loss": 2.5089,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
- "learning_rate": 1.9935805670297744e-05,
332
- "loss": 2.5872,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
- "learning_rate": 1.993340875469043e-05,
338
- "loss": 2.5882,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
- "learning_rate": 1.993096805638781e-05,
344
- "loss": 2.5789,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
- "learning_rate": 1.9928483586147553e-05,
350
- "loss": 2.488,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
- "learning_rate": 1.9925955354920265e-05,
356
- "loss": 2.617,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
- "learning_rate": 1.992338337384943e-05,
362
- "loss": 2.5752,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
- "learning_rate": 1.992076765427136e-05,
368
- "loss": 2.5529,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
- "learning_rate": 1.9918108207715156e-05,
374
- "loss": 2.4942,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
- "learning_rate": 1.991540504590265e-05,
380
- "loss": 2.5456,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
- "learning_rate": 1.991265818074835e-05,
386
- "loss": 2.5957,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
- "learning_rate": 1.99098676243594e-05,
392
- "loss": 2.5699,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
- "learning_rate": 1.9907033389035512e-05,
398
- "loss": 2.5544,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
- "learning_rate": 1.9904155487268912e-05,
404
- "loss": 2.538,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
- "learning_rate": 1.990123393174431e-05,
410
- "loss": 2.6055,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
- "learning_rate": 1.9898268735338807e-05,
416
- "loss": 2.5846,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
- "learning_rate": 1.9895259911121866e-05,
422
- "loss": 2.5405,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
- "learning_rate": 1.9892207472355243e-05,
428
- "loss": 2.5162,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
- "learning_rate": 1.988911143249292e-05,
434
- "loss": 2.5484,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
- "learning_rate": 1.9885971805181083e-05,
440
- "loss": 2.671,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
- "learning_rate": 1.9882788604258e-05,
446
- "loss": 2.5696,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
- "learning_rate": 1.987956184375402e-05,
452
- "loss": 2.5021,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
- "learning_rate": 1.9876291537891482e-05,
458
- "loss": 2.5644,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
- "learning_rate": 1.9872977701084645e-05,
464
- "loss": 2.5386,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
- "learning_rate": 1.9869620347939652e-05,
470
- "loss": 2.554,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
- "learning_rate": 1.9866219493254433e-05,
476
- "loss": 2.4798,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
- "learning_rate": 1.986277515201867e-05,
482
- "loss": 2.538,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
- "learning_rate": 1.9859287339413714e-05,
488
- "loss": 2.5041,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
- "learning_rate": 1.9855756070812514e-05,
494
- "loss": 2.7067,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
- "learning_rate": 1.9852181361779563e-05,
500
- "loss": 2.5054,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
- "learning_rate": 1.984856322807082e-05,
506
- "loss": 2.4815,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
- "learning_rate": 1.9844901685633648e-05,
512
- "loss": 2.5885,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
- "learning_rate": 1.9841196750606735e-05,
518
- "loss": 2.521,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
- "learning_rate": 1.9837448439320027e-05,
524
- "loss": 2.4937,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
- "learning_rate": 1.983365676829466e-05,
530
- "loss": 2.5877,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
- "learning_rate": 1.9829821754242885e-05,
536
- "loss": 2.5761,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
- "learning_rate": 1.9825943414067974e-05,
542
- "loss": 2.4917,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
- "learning_rate": 1.9822021764864194e-05,
548
- "loss": 2.5434,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
- "learning_rate": 1.9818056823916675e-05,
554
- "loss": 2.5906,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
- "learning_rate": 1.9814048608701374e-05,
560
- "loss": 2.5508,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
- "learning_rate": 1.980999713688499e-05,
566
- "loss": 2.6005,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
- "learning_rate": 1.980590242632486e-05,
572
- "loss": 2.523,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
- "learning_rate": 1.9801764495068923e-05,
578
- "loss": 2.5453,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
- "learning_rate": 1.979758336135561e-05,
584
- "loss": 2.6426,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
- "learning_rate": 1.9793359043613768e-05,
590
- "loss": 2.5454,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
- "learning_rate": 1.9789091560462587e-05,
596
- "loss": 2.6071,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
- "learning_rate": 1.9784780930711514e-05,
602
- "loss": 2.5913,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
- "learning_rate": 1.9780427173360165e-05,
608
- "loss": 2.5082,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
- "learning_rate": 1.977603030759825e-05,
614
- "loss": 2.565,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
- "learning_rate": 1.977159035280549e-05,
620
- "loss": 2.6365,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
- "learning_rate": 1.9767107328551515e-05,
626
- "loss": 2.6872,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
- "learning_rate": 1.9762581254595797e-05,
632
- "loss": 2.6222,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
- "learning_rate": 1.975801215088755e-05,
638
- "loss": 2.5633,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
- "learning_rate": 1.9753400037565653e-05,
644
- "loss": 2.6579,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
- "learning_rate": 1.9748744934958548e-05,
650
- "loss": 2.5572,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
- "learning_rate": 1.974404686358416e-05,
656
- "loss": 2.5845,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
- "learning_rate": 1.97393058441498e-05,
662
- "loss": 2.6484,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
- "learning_rate": 1.973452189755209e-05,
668
- "loss": 2.5473,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
- "learning_rate": 1.9729695044876847e-05,
674
- "loss": 2.5995,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
- "learning_rate": 1.9724825307399003e-05,
680
- "loss": 2.5473,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
- "learning_rate": 1.971991270658252e-05,
686
- "loss": 2.5798,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
- "learning_rate": 1.971495726408027e-05,
692
- "loss": 2.655,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
- "learning_rate": 1.970995900173397e-05,
698
- "loss": 2.6904,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
- "learning_rate": 1.9704917941574053e-05,
704
- "loss": 2.6973,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
- "learning_rate": 1.969983410581961e-05,
710
- "loss": 2.6518,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
- "learning_rate": 1.969470751687825e-05,
716
- "loss": 2.5849,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
- "learning_rate": 1.9689538197346035e-05,
722
- "loss": 2.4715,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
- "learning_rate": 1.9684326170007365e-05,
728
- "loss": 2.7246,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
- "learning_rate": 1.9679071457834874e-05,
734
- "loss": 2.5482,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
- "learning_rate": 1.967377408398934e-05,
740
- "loss": 2.6084,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
- "learning_rate": 1.966843407181958e-05,
746
- "loss": 2.5144,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
- "learning_rate": 1.9663051444862335e-05,
752
- "loss": 2.7663,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
- "learning_rate": 1.9657626226842187e-05,
758
- "loss": 2.5697,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
- "learning_rate": 1.9652158441671435e-05,
764
- "loss": 2.6379,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
- "learning_rate": 1.964664811345e-05,
770
- "loss": 2.6784,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
- "learning_rate": 1.964109526646532e-05,
776
- "loss": 2.6936,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
- "learning_rate": 1.963549992519223e-05,
782
- "loss": 2.5672,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
- "learning_rate": 1.962986211429288e-05,
788
- "loss": 2.62,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
- "learning_rate": 1.9624181858616593e-05,
794
- "loss": 2.7293,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
- "learning_rate": 1.9618459183199782e-05,
800
- "loss": 2.6636,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
- "learning_rate": 1.961269411326583e-05,
806
- "loss": 2.5106,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
- "learning_rate": 1.9606886674224977e-05,
812
- "loss": 2.6878,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
- "learning_rate": 1.960103689167421e-05,
818
- "loss": 2.693,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
- "learning_rate": 1.9595144791397142e-05,
824
- "loss": 2.6562,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
- "learning_rate": 1.9589210399363925e-05,
830
- "loss": 2.6269,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
- "learning_rate": 1.95832337417311e-05,
836
- "loss": 2.6459,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
- "learning_rate": 1.9577214844841515e-05,
842
- "loss": 2.5765,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
- "learning_rate": 1.957115373522417e-05,
848
- "loss": 2.6113,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
- "learning_rate": 1.956505043959414e-05,
854
- "loss": 2.6641,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
- "learning_rate": 1.955890498485244e-05,
860
- "loss": 2.6493,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
- "learning_rate": 1.9552717398085898e-05,
866
- "loss": 2.6135,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
- "learning_rate": 1.954648770656705e-05,
872
- "loss": 2.6182,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
- "learning_rate": 1.954021593775401e-05,
878
- "loss": 2.6487,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
- "learning_rate": 1.9533902119290352e-05,
884
- "loss": 2.5927,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
- "learning_rate": 1.952754627900499e-05,
890
- "loss": 2.5825,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
- "learning_rate": 1.9521148444912065e-05,
896
- "loss": 2.7193,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
- "learning_rate": 1.9514708645210793e-05,
902
- "loss": 2.5802,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
- "learning_rate": 1.9508226908285368e-05,
908
- "loss": 2.6628,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
- "learning_rate": 1.950170326270483e-05,
914
- "loss": 2.5847,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
- "learning_rate": 1.9495137737222925e-05,
920
- "loss": 2.6594,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
- "learning_rate": 1.9488530360778007e-05,
926
- "loss": 2.6096,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
- "learning_rate": 1.948188116249287e-05,
932
- "loss": 2.6378,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
- "learning_rate": 1.9475190171674675e-05,
938
- "loss": 2.5984,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
- "learning_rate": 1.9468457417814753e-05,
944
- "loss": 2.6437,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
- "learning_rate": 1.9461682930588534e-05,
950
- "loss": 2.6522,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
- "learning_rate": 1.9454866739855384e-05,
956
- "loss": 2.6242,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
- "learning_rate": 1.944800887565849e-05,
962
- "loss": 2.5961,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
- "learning_rate": 1.9441109368224704e-05,
968
- "loss": 2.6365,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
- "learning_rate": 1.9434168247964447e-05,
974
- "loss": 2.5674,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
- "learning_rate": 1.9427185545471537e-05,
980
- "loss": 2.6369,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
- "learning_rate": 1.9420161291523076e-05,
986
- "loss": 2.5763,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
- "learning_rate": 1.941309551707931e-05,
992
- "loss": 2.5651,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
- "learning_rate": 1.9405988253283492e-05,
998
- "loss": 2.6223,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
- "learning_rate": 1.939883953146174e-05,
1004
- "loss": 2.6616,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
- "learning_rate": 1.939164938312291e-05,
1010
- "loss": 2.6496,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
- "learning_rate": 1.9384417839958443e-05,
1016
- "loss": 2.7161,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
- "learning_rate": 1.937714493384224e-05,
1022
- "loss": 2.6047,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
- "learning_rate": 1.936983069683051e-05,
1028
- "loss": 2.5978,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
- "learning_rate": 1.936247516116163e-05,
1034
- "loss": 2.6331,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
- "learning_rate": 1.935507835925601e-05,
1040
- "loss": 2.5679,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
- "learning_rate": 1.934764032371595e-05,
1046
- "loss": 2.6647,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
- "learning_rate": 1.9340161087325483e-05,
1052
- "loss": 2.6142,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
- "learning_rate": 1.9332640683050243e-05,
1058
- "loss": 2.6181,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
- "learning_rate": 1.932507914403732e-05,
1064
- "loss": 2.6105,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
- "learning_rate": 1.9317476503615108e-05,
1070
- "loss": 2.6415,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
- "learning_rate": 1.9309832795293156e-05,
1076
- "loss": 2.6736,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
- "learning_rate": 1.930214805276204e-05,
1082
- "loss": 2.6507,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
- "learning_rate": 1.9294422309893177e-05,
1088
- "loss": 2.6771,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
- "learning_rate": 1.9286655600738707e-05,
1094
- "loss": 2.6763,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
- "learning_rate": 1.9278847959531348e-05,
1100
- "loss": 2.7135,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
- "learning_rate": 1.927099942068421e-05,
1106
- "loss": 2.6166,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
- "learning_rate": 1.9263110018790673e-05,
1112
- "loss": 2.6238,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
- "learning_rate": 1.9255179788624233e-05,
1118
- "loss": 2.6424,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
- "learning_rate": 1.9247208765138325e-05,
1124
- "loss": 2.6023,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
- "learning_rate": 1.9239196983466204e-05,
1130
- "loss": 2.58,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
- "learning_rate": 1.9231144478920756e-05,
1136
- "loss": 2.6173,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
- "learning_rate": 1.9223051286994368e-05,
1142
- "loss": 2.628,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
- "learning_rate": 1.9214917443358753e-05,
1148
- "loss": 2.6868,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
- "learning_rate": 1.9206742983864813e-05,
1154
- "loss": 2.6342,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
- "learning_rate": 1.9198527944542462e-05,
1160
- "loss": 2.5934,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
- "learning_rate": 1.919027236160047e-05,
1166
- "loss": 2.6354,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
- "learning_rate": 1.9181976271426315e-05,
1172
- "loss": 2.5955,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
- "learning_rate": 1.9173639710586015e-05,
1178
- "loss": 2.6134,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
- "learning_rate": 1.9165262715823966e-05,
1184
- "loss": 2.7,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
- "learning_rate": 1.915684532406278e-05,
1190
- "loss": 2.7197,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
- "learning_rate": 1.9148387572403123e-05,
1196
- "loss": 2.4881,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
- "learning_rate": 1.913988949812356e-05,
1202
- "loss": 2.6999,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
- "learning_rate": 1.9131351138680368e-05,
1208
- "loss": 2.5981,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
- "learning_rate": 1.9122772531707405e-05,
1214
- "loss": 2.5515,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
- "learning_rate": 1.9114153715015905e-05,
1220
- "loss": 2.623,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
- "learning_rate": 1.9105494726594344e-05,
1226
- "loss": 2.6006,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
- "learning_rate": 1.9096795604608258e-05,
1232
- "loss": 2.5929,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
- "learning_rate": 1.9088056387400074e-05,
1238
- "loss": 2.5396,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
- "learning_rate": 1.907927711348894e-05,
1244
- "loss": 2.6428,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
- "learning_rate": 1.9070457821570566e-05,
1250
- "loss": 2.6296,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
- "learning_rate": 1.9061598550517048e-05,
1256
- "loss": 2.534,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
- "learning_rate": 1.9052699339376685e-05,
1262
- "loss": 2.6049,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
- "learning_rate": 1.9043760227373817e-05,
1268
- "loss": 2.5776,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
- "learning_rate": 1.9034781253908664e-05,
1274
- "loss": 2.7483,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
- "learning_rate": 1.902576245855713e-05,
1280
- "loss": 2.6973,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
- "learning_rate": 1.9016703881070646e-05,
1286
- "loss": 2.645,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
- "learning_rate": 1.900760556137598e-05,
1292
- "loss": 2.5775,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
- "learning_rate": 1.899846753957507e-05,
1298
- "loss": 2.6872,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
- "learning_rate": 1.8989289855944846e-05,
1304
- "loss": 2.5929,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
- "learning_rate": 1.8980072550937058e-05,
1310
- "loss": 2.5668,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
- "learning_rate": 1.8970815665178086e-05,
1316
- "loss": 2.6873,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
- "learning_rate": 1.896151923946877e-05,
1322
- "loss": 2.5679,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
- "learning_rate": 1.8952183314784224e-05,
1328
- "loss": 2.6734,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
- "learning_rate": 1.8942807932273664e-05,
1334
- "loss": 2.6083,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
- "learning_rate": 1.893339313326021e-05,
1340
- "loss": 2.6189,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
- "learning_rate": 1.892393895924073e-05,
1346
- "loss": 2.5117,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
- "learning_rate": 1.8914445451885633e-05,
1352
- "loss": 2.6646,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
- "learning_rate": 1.890491265303869e-05,
1358
- "loss": 2.7551,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
- "learning_rate": 1.8895340604716867e-05,
1364
- "loss": 2.7003,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
- "learning_rate": 1.888572934911012e-05,
1370
- "loss": 2.6109,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
- "learning_rate": 1.8876078928581207e-05,
1376
- "loss": 2.6227,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
- "learning_rate": 1.8866389385665525e-05,
1382
- "loss": 2.5943,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
- "learning_rate": 1.8856660763070904e-05,
1388
- "loss": 2.7203,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
- "learning_rate": 1.8846893103677414e-05,
1394
- "loss": 2.561,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
- "learning_rate": 1.8837086450537195e-05,
1400
- "loss": 2.6959,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
- "learning_rate": 1.8827240846874245e-05,
1406
- "loss": 2.7047,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
- "learning_rate": 1.8817356336084252e-05,
1412
- "loss": 2.5853,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
- "learning_rate": 1.8807432961734388e-05,
1418
- "loss": 2.6165,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
- "learning_rate": 1.8797470767563122e-05,
1424
- "loss": 2.756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
- "learning_rate": 1.878746979748002e-05,
1430
- "loss": 2.667,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
- "learning_rate": 1.8777430095565563e-05,
1436
- "loss": 2.7607,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
- "learning_rate": 1.876735170607095e-05,
1442
- "loss": 2.6816,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
- "learning_rate": 1.8757234673417892e-05,
1448
- "loss": 2.688,
1449
  "step": 1200
1450
  },
1451
  {
1452
  "epoch": 0.48,
1453
- "learning_rate": 1.874707904219843e-05,
1454
- "loss": 2.7286,
1455
  "step": 1205
1456
  },
1457
  {
1458
  "epoch": 0.49,
1459
- "learning_rate": 1.8736884857174733e-05,
1460
- "loss": 2.5721,
1461
  "step": 1210
1462
  },
1463
  {
1464
  "epoch": 0.49,
1465
- "learning_rate": 1.87266521632789e-05,
1466
- "loss": 2.6588,
1467
  "step": 1215
1468
  },
1469
  {
1470
  "epoch": 0.49,
1471
- "learning_rate": 1.8716381005612756e-05,
1472
- "loss": 2.625,
1473
  "step": 1220
1474
  },
1475
  {
1476
  "epoch": 0.49,
1477
- "learning_rate": 1.870607142944767e-05,
1478
- "loss": 2.5962,
1479
  "step": 1225
1480
  },
1481
  {
1482
  "epoch": 0.49,
1483
- "learning_rate": 1.869572348022434e-05,
1484
- "loss": 2.7405,
1485
  "step": 1230
1486
  },
1487
  {
1488
  "epoch": 0.5,
1489
- "learning_rate": 1.8685337203552602e-05,
1490
- "loss": 2.5352,
1491
  "step": 1235
1492
  },
1493
  {
1494
  "epoch": 0.5,
1495
- "learning_rate": 1.8674912645211212e-05,
1496
- "loss": 2.5935,
1497
  "step": 1240
1498
  },
1499
  {
1500
  "epoch": 0.5,
1501
- "learning_rate": 1.8664449851147675e-05,
1502
- "loss": 2.72,
1503
  "step": 1245
1504
  },
1505
  {
1506
  "epoch": 0.5,
1507
- "learning_rate": 1.8653948867478008e-05,
1508
- "loss": 2.6364,
1509
  "step": 1250
1510
  },
1511
  {
1512
  "epoch": 0.5,
1513
- "learning_rate": 1.864340974048657e-05,
1514
- "loss": 2.6966,
1515
  "step": 1255
1516
  },
1517
  {
1518
  "epoch": 0.51,
1519
- "learning_rate": 1.8632832516625825e-05,
1520
- "loss": 2.6181,
1521
  "step": 1260
1522
  },
1523
  {
1524
  "epoch": 0.51,
1525
- "learning_rate": 1.8622217242516164e-05,
1526
- "loss": 2.6416,
1527
  "step": 1265
1528
  },
1529
  {
1530
  "epoch": 0.51,
1531
- "learning_rate": 1.8611563964945685e-05,
1532
- "loss": 2.5406,
1533
  "step": 1270
1534
  },
1535
  {
1536
  "epoch": 0.51,
1537
- "learning_rate": 1.8600872730869995e-05,
1538
- "loss": 2.6398,
1539
  "step": 1275
1540
  },
1541
  {
1542
  "epoch": 0.51,
1543
- "learning_rate": 1.859014358741199e-05,
1544
- "loss": 2.5742,
1545
  "step": 1280
1546
  },
1547
  {
1548
  "epoch": 0.52,
1549
- "learning_rate": 1.8579376581861665e-05,
1550
- "loss": 2.6472,
1551
  "step": 1285
1552
  },
1553
  {
1554
  "epoch": 0.52,
1555
- "learning_rate": 1.8568571761675893e-05,
1556
- "loss": 2.6341,
1557
  "step": 1290
1558
  },
1559
  {
1560
  "epoch": 0.52,
1561
- "learning_rate": 1.8557729174478222e-05,
1562
- "loss": 2.7972,
1563
  "step": 1295
1564
  },
1565
  {
1566
  "epoch": 0.52,
1567
- "learning_rate": 1.854684886805866e-05,
1568
- "loss": 2.6391,
1569
  "step": 1300
1570
  },
1571
  {
1572
  "epoch": 0.52,
1573
- "learning_rate": 1.8535930890373467e-05,
1574
- "loss": 2.6008,
1575
  "step": 1305
1576
  },
1577
  {
1578
  "epoch": 0.53,
1579
- "learning_rate": 1.8524975289544943e-05,
1580
- "loss": 2.6225,
1581
  "step": 1310
1582
  },
1583
  {
1584
  "epoch": 0.53,
1585
- "learning_rate": 1.851398211386122e-05,
1586
- "loss": 2.6241,
1587
  "step": 1315
1588
  },
1589
  {
1590
  "epoch": 0.53,
1591
- "learning_rate": 1.8502951411776043e-05,
1592
- "loss": 2.7438,
1593
  "step": 1320
1594
  },
1595
  {
1596
  "epoch": 0.53,
1597
- "learning_rate": 1.849188323190856e-05,
1598
- "loss": 2.6974,
1599
  "step": 1325
1600
  },
1601
  {
1602
  "epoch": 0.53,
1603
- "learning_rate": 1.848077762304311e-05,
1604
- "loss": 2.6354,
1605
  "step": 1330
1606
  },
1607
  {
1608
  "epoch": 0.54,
1609
- "learning_rate": 1.8469634634128993e-05,
1610
- "loss": 2.7041,
1611
  "step": 1335
1612
  },
1613
  {
1614
  "epoch": 0.54,
1615
- "learning_rate": 1.8458454314280283e-05,
1616
- "loss": 2.7031,
1617
  "step": 1340
1618
  },
1619
  {
1620
  "epoch": 0.54,
1621
- "learning_rate": 1.8447236712775583e-05,
1622
- "loss": 2.5,
1623
  "step": 1345
1624
  },
1625
  {
1626
  "epoch": 0.54,
1627
- "learning_rate": 1.843598187905782e-05,
1628
- "loss": 2.627,
1629
  "step": 1350
1630
  },
1631
  {
1632
  "epoch": 0.54,
1633
- "learning_rate": 1.842468986273403e-05,
1634
- "loss": 2.6997,
1635
  "step": 1355
1636
  },
1637
  {
1638
  "epoch": 0.55,
1639
- "learning_rate": 1.841336071357514e-05,
1640
- "loss": 2.7178,
1641
  "step": 1360
1642
  },
1643
  {
1644
  "epoch": 0.55,
1645
- "learning_rate": 1.840199448151573e-05,
1646
- "loss": 2.6428,
1647
  "step": 1365
1648
  },
1649
  {
1650
  "epoch": 0.55,
1651
- "learning_rate": 1.8390591216653845e-05,
1652
- "loss": 2.6359,
1653
  "step": 1370
1654
  },
1655
  {
1656
  "epoch": 0.55,
1657
- "learning_rate": 1.8379150969250747e-05,
1658
- "loss": 2.6191,
1659
  "step": 1375
1660
  },
1661
  {
1662
  "epoch": 0.55,
1663
- "learning_rate": 1.83676737897307e-05,
1664
- "loss": 2.6765,
1665
  "step": 1380
1666
  },
1667
  {
1668
  "epoch": 0.56,
1669
- "learning_rate": 1.8356159728680754e-05,
1670
- "loss": 2.6555,
1671
  "step": 1385
1672
  },
1673
  {
1674
  "epoch": 0.56,
1675
- "learning_rate": 1.8344608836850526e-05,
1676
- "loss": 2.6545,
1677
  "step": 1390
1678
  },
1679
  {
1680
  "epoch": 0.56,
1681
- "learning_rate": 1.833302116515196e-05,
1682
- "loss": 2.6659,
1683
  "step": 1395
1684
  },
1685
  {
1686
  "epoch": 0.56,
1687
- "learning_rate": 1.832139676465911e-05,
1688
- "loss": 2.6792,
1689
  "step": 1400
1690
  },
1691
  {
1692
  "epoch": 0.56,
1693
- "learning_rate": 1.8309735686607927e-05,
1694
- "loss": 2.7196,
1695
  "step": 1405
1696
  },
1697
  {
1698
  "epoch": 0.57,
1699
- "learning_rate": 1.8298037982396013e-05,
1700
- "loss": 2.6484,
1701
  "step": 1410
1702
  },
1703
  {
1704
  "epoch": 0.57,
1705
- "learning_rate": 1.828630370358241e-05,
1706
- "loss": 2.6177,
1707
  "step": 1415
1708
  },
1709
  {
1710
  "epoch": 0.57,
1711
- "learning_rate": 1.8274532901887363e-05,
1712
- "loss": 2.6296,
1713
  "step": 1420
1714
  },
1715
  {
1716
  "epoch": 0.57,
1717
- "learning_rate": 1.8262725629192105e-05,
1718
- "loss": 2.679,
1719
  "step": 1425
1720
  },
1721
  {
1722
  "epoch": 0.57,
1723
- "learning_rate": 1.8250881937538604e-05,
1724
- "loss": 2.5841,
1725
  "step": 1430
1726
  },
1727
  {
1728
  "epoch": 0.58,
1729
- "learning_rate": 1.8239001879129355e-05,
1730
- "loss": 2.7088,
1731
  "step": 1435
1732
  },
1733
  {
1734
  "epoch": 0.58,
1735
- "learning_rate": 1.8227085506327157e-05,
1736
- "loss": 2.6291,
1737
  "step": 1440
1738
  },
1739
  {
1740
  "epoch": 0.58,
1741
- "learning_rate": 1.821513287165485e-05,
1742
- "loss": 2.5989,
1743
  "step": 1445
1744
  },
1745
  {
1746
  "epoch": 0.58,
1747
- "learning_rate": 1.820314402779511e-05,
1748
- "loss": 2.6521,
1749
  "step": 1450
1750
  },
1751
  {
1752
  "epoch": 0.58,
1753
- "learning_rate": 1.819111902759022e-05,
1754
- "loss": 2.6189,
1755
  "step": 1455
1756
  },
1757
  {
1758
  "epoch": 0.59,
1759
- "learning_rate": 1.8179057924041804e-05,
1760
- "loss": 2.6539,
1761
  "step": 1460
1762
  },
1763
  {
1764
  "epoch": 0.59,
1765
- "learning_rate": 1.8166960770310632e-05,
1766
- "loss": 2.7381,
1767
  "step": 1465
1768
  },
1769
  {
1770
  "epoch": 0.59,
1771
- "learning_rate": 1.815482761971637e-05,
1772
- "loss": 2.6731,
1773
  "step": 1470
1774
  },
1775
  {
1776
  "epoch": 0.59,
1777
- "learning_rate": 1.8142658525737338e-05,
1778
- "loss": 2.6377,
1779
  "step": 1475
1780
  },
1781
  {
1782
  "epoch": 0.59,
1783
- "learning_rate": 1.8130453542010284e-05,
1784
- "loss": 2.5693,
1785
  "step": 1480
1786
  },
1787
  {
1788
  "epoch": 0.6,
1789
- "learning_rate": 1.8118212722330143e-05,
1790
- "loss": 2.6144,
1791
  "step": 1485
1792
  },
1793
  {
1794
  "epoch": 0.6,
1795
- "learning_rate": 1.8105936120649795e-05,
1796
- "loss": 2.5852,
1797
  "step": 1490
1798
  },
1799
  {
1800
  "epoch": 0.6,
1801
- "learning_rate": 1.8093623791079853e-05,
1802
- "loss": 2.6397,
1803
  "step": 1495
1804
  },
1805
  {
1806
  "epoch": 0.6,
1807
- "learning_rate": 1.8081275787888385e-05,
1808
- "loss": 2.7686,
1809
  "step": 1500
1810
  }
1811
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 4.99999449047447e-05,
14
+ "loss": 2.7413,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
+ "learning_rate": 4.9999779619221645e-05,
20
+ "loss": 2.6684,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 4.999950414415935e-05,
26
+ "loss": 2.6511,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
+ "learning_rate": 4.999911848077199e-05,
32
+ "loss": 2.72,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
+ "learning_rate": 4.999862263075944e-05,
38
+ "loss": 2.7222,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 4.999801659630722e-05,
44
+ "loss": 2.7479,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
+ "learning_rate": 4.999730038008649e-05,
50
+ "loss": 2.6063,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
+ "learning_rate": 4.9996473985254055e-05,
56
+ "loss": 2.576,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
+ "learning_rate": 4.999553741545237e-05,
62
+ "loss": 2.579,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
+ "learning_rate": 4.999449067480945e-05,
68
+ "loss": 2.5114,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
+ "learning_rate": 4.9993333767938954e-05,
74
+ "loss": 2.6174,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
+ "learning_rate": 4.9992066699940085e-05,
80
+ "loss": 2.6581,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
+ "learning_rate": 4.9990689476397586e-05,
86
+ "loss": 2.586,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
+ "learning_rate": 4.998920210338175e-05,
92
+ "loss": 2.4642,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
+ "learning_rate": 4.9987604587448343e-05,
98
+ "loss": 2.6262,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
+ "learning_rate": 4.998589693563861e-05,
104
+ "loss": 2.5678,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
+ "learning_rate": 4.998407915547924e-05,
110
+ "loss": 2.5597,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
+ "learning_rate": 4.9982151254982304e-05,
116
+ "loss": 2.6578,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
+ "learning_rate": 4.998011324264527e-05,
122
+ "loss": 2.4825,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
+ "learning_rate": 4.997796512745091e-05,
128
+ "loss": 2.6197,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
+ "learning_rate": 4.997570691886732e-05,
134
+ "loss": 2.4921,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
+ "learning_rate": 4.99733386268478e-05,
140
+ "loss": 2.5126,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
+ "learning_rate": 4.99708602618309e-05,
146
+ "loss": 2.584,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
+ "learning_rate": 4.9968271834740305e-05,
152
+ "loss": 2.4991,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
+ "learning_rate": 4.996557335698482e-05,
158
+ "loss": 2.5242,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
+ "learning_rate": 4.996276484045832e-05,
164
+ "loss": 2.4506,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
+ "learning_rate": 4.995984629753967e-05,
170
+ "loss": 2.5479,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
+ "learning_rate": 4.9956817741092696e-05,
176
+ "loss": 2.5316,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
+ "learning_rate": 4.995367918446613e-05,
182
+ "loss": 2.6053,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
+ "learning_rate": 4.995043064149354e-05,
188
+ "loss": 2.4533,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
+ "learning_rate": 4.9947072126493276e-05,
194
+ "loss": 2.5279,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
+ "learning_rate": 4.994360365426839e-05,
200
+ "loss": 2.4994,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
+ "learning_rate": 4.994002524010659e-05,
206
+ "loss": 2.4675,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
+ "learning_rate": 4.9936336899780166e-05,
212
+ "loss": 2.5233,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
+ "learning_rate": 4.993253864954592e-05,
218
+ "loss": 2.4974,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
+ "learning_rate": 4.992863050614511e-05,
224
+ "loss": 2.6295,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
+ "learning_rate": 4.9924612486803346e-05,
230
+ "loss": 2.4744,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
+ "learning_rate": 4.992048460923052e-05,
236
+ "loss": 2.5248,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
+ "learning_rate": 4.991624689162076e-05,
242
+ "loss": 2.5727,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
+ "learning_rate": 4.991189935265231e-05,
248
+ "loss": 2.4845,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
+ "learning_rate": 4.990744201148748e-05,
254
+ "loss": 2.4544,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
+ "learning_rate": 4.990287488777253e-05,
260
+ "loss": 2.4383,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
+ "learning_rate": 4.989819800163761e-05,
266
+ "loss": 2.5263,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
+ "learning_rate": 4.989341137369666e-05,
272
+ "loss": 2.5219,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
+ "learning_rate": 4.9888515025047316e-05,
278
+ "loss": 2.4512,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
+ "learning_rate": 4.988350897727083e-05,
284
+ "loss": 2.491,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
+ "learning_rate": 4.9878393252431953e-05,
290
+ "loss": 2.5404,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
+ "learning_rate": 4.987316787307888e-05,
296
+ "loss": 2.4777,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
+ "learning_rate": 4.9867832862243055e-05,
302
+ "loss": 2.4498,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
+ "learning_rate": 4.986238824343923e-05,
308
+ "loss": 2.4174,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
+ "learning_rate": 4.985683404066519e-05,
314
+ "loss": 2.4589,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
+ "learning_rate": 4.985117027840175e-05,
320
+ "loss": 2.5029,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
+ "learning_rate": 4.984539698161264e-05,
326
+ "loss": 2.4376,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
+ "learning_rate": 4.983951417574436e-05,
332
+ "loss": 2.526,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
+ "learning_rate": 4.983352188672608e-05,
338
+ "loss": 2.5214,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
+ "learning_rate": 4.982742014096952e-05,
344
+ "loss": 2.5188,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
+ "learning_rate": 4.982120896536888e-05,
350
+ "loss": 2.4175,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
+ "learning_rate": 4.981488838730066e-05,
356
+ "loss": 2.5346,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
+ "learning_rate": 4.980845843462357e-05,
362
+ "loss": 2.496,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
+ "learning_rate": 4.98019191356784e-05,
368
+ "loss": 2.477,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
+ "learning_rate": 4.9795270519287886e-05,
374
+ "loss": 2.4205,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
+ "learning_rate": 4.9788512614756624e-05,
380
+ "loss": 2.4621,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
+ "learning_rate": 4.9781645451870875e-05,
386
+ "loss": 2.5098,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
+ "learning_rate": 4.9774669060898496e-05,
392
+ "loss": 2.4847,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
+ "learning_rate": 4.976758347258877e-05,
398
+ "loss": 2.4873,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
+ "learning_rate": 4.976038871817228e-05,
404
+ "loss": 2.4462,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
+ "learning_rate": 4.9753084829360776e-05,
410
+ "loss": 2.4994,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
+ "learning_rate": 4.974567183834702e-05,
416
+ "loss": 2.4994,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
+ "learning_rate": 4.9738149777804665e-05,
422
+ "loss": 2.4584,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
+ "learning_rate": 4.97305186808881e-05,
428
+ "loss": 2.4294,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
+ "learning_rate": 4.9722778581232305e-05,
434
+ "loss": 2.4499,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
+ "learning_rate": 4.9714929512952704e-05,
440
+ "loss": 2.5851,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
+ "learning_rate": 4.9706971510645e-05,
446
+ "loss": 2.4746,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
+ "learning_rate": 4.969890460938505e-05,
452
+ "loss": 2.4184,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
+ "learning_rate": 4.9690728844728704e-05,
458
+ "loss": 2.4716,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
+ "learning_rate": 4.968244425271161e-05,
464
+ "loss": 2.4477,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
+ "learning_rate": 4.9674050869849124e-05,
470
+ "loss": 2.4563,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
+ "learning_rate": 4.966554873313608e-05,
476
+ "loss": 2.3708,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
+ "learning_rate": 4.9656937880046676e-05,
482
+ "loss": 2.4351,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
+ "learning_rate": 4.9648218348534284e-05,
488
+ "loss": 2.3933,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
+ "learning_rate": 4.963939017703128e-05,
494
+ "loss": 2.5871,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
+ "learning_rate": 4.9630453404448905e-05,
500
+ "loss": 2.3965,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
+ "learning_rate": 4.962140807017705e-05,
506
+ "loss": 2.372,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
+ "learning_rate": 4.961225421408412e-05,
512
+ "loss": 2.4847,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
+ "learning_rate": 4.960299187651684e-05,
518
+ "loss": 2.4046,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
+ "learning_rate": 4.959362109830007e-05,
524
+ "loss": 2.3912,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
+ "learning_rate": 4.9584141920736656e-05,
530
+ "loss": 2.4732,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
+ "learning_rate": 4.957455438560721e-05,
536
+ "loss": 2.4854,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
+ "learning_rate": 4.956485853516993e-05,
542
+ "loss": 2.3736,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
+ "learning_rate": 4.9555054412160476e-05,
548
+ "loss": 2.4222,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
+ "learning_rate": 4.9545142059791686e-05,
554
+ "loss": 2.4498,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
+ "learning_rate": 4.9535121521753434e-05,
560
+ "loss": 2.4172,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
+ "learning_rate": 4.952499284221247e-05,
566
+ "loss": 2.4742,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
+ "learning_rate": 4.951475606581215e-05,
572
+ "loss": 2.4008,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
+ "learning_rate": 4.950441123767231e-05,
578
+ "loss": 2.4278,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
+ "learning_rate": 4.949395840338903e-05,
584
+ "loss": 2.5041,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
+ "learning_rate": 4.948339760903442e-05,
590
+ "loss": 2.4088,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
+ "learning_rate": 4.947272890115647e-05,
596
+ "loss": 2.466,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
+ "learning_rate": 4.946195232677878e-05,
602
+ "loss": 2.4404,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
+ "learning_rate": 4.9451067933400406e-05,
608
+ "loss": 2.3658,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
+ "learning_rate": 4.9440075768995625e-05,
614
+ "loss": 2.4247,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
+ "learning_rate": 4.942897588201372e-05,
620
+ "loss": 2.5082,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
+ "learning_rate": 4.941776832137879e-05,
626
+ "loss": 2.5545,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
+ "learning_rate": 4.940645313648949e-05,
632
+ "loss": 2.4665,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
+ "learning_rate": 4.939503037721888e-05,
638
+ "loss": 2.4195,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
+ "learning_rate": 4.938350009391413e-05,
644
+ "loss": 2.4908,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
+ "learning_rate": 4.937186233739637e-05,
650
+ "loss": 2.4093,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
+ "learning_rate": 4.93601171589604e-05,
656
+ "loss": 2.44,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
+ "learning_rate": 4.9348264610374494e-05,
662
+ "loss": 2.4989,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
+ "learning_rate": 4.9336304743880226e-05,
668
+ "loss": 2.3786,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
+ "learning_rate": 4.932423761219211e-05,
674
+ "loss": 2.4305,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
+ "learning_rate": 4.931206326849751e-05,
680
+ "loss": 2.4012,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
+ "learning_rate": 4.92997817664563e-05,
686
+ "loss": 2.4263,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
+ "learning_rate": 4.9287393160200676e-05,
692
+ "loss": 2.4749,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
+ "learning_rate": 4.927489750433492e-05,
698
+ "loss": 2.5083,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
+ "learning_rate": 4.926229485393513e-05,
704
+ "loss": 2.5228,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
+ "learning_rate": 4.924958526454902e-05,
710
+ "loss": 2.4623,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
+ "learning_rate": 4.923676879219562e-05,
716
+ "loss": 2.4199,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
+ "learning_rate": 4.9223845493365085e-05,
722
+ "loss": 2.2906,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
+ "learning_rate": 4.9210815425018406e-05,
728
+ "loss": 2.5539,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
+ "learning_rate": 4.919767864458718e-05,
734
+ "loss": 2.3456,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
+ "learning_rate": 4.9184435209973354e-05,
740
+ "loss": 2.4328,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
+ "learning_rate": 4.917108517954895e-05,
746
+ "loss": 2.3412,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
+ "learning_rate": 4.9157628612155836e-05,
752
+ "loss": 2.604,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
+ "learning_rate": 4.9144065567105465e-05,
758
+ "loss": 2.4007,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
+ "learning_rate": 4.913039610417859e-05,
764
+ "loss": 2.437,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
+ "learning_rate": 4.9116620283624996e-05,
770
+ "loss": 2.5004,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
+ "learning_rate": 4.91027381661633e-05,
776
+ "loss": 2.5045,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
+ "learning_rate": 4.908874981298057e-05,
782
+ "loss": 2.3794,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
+ "learning_rate": 4.9074655285732196e-05,
788
+ "loss": 2.4621,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
+ "learning_rate": 4.906045464654148e-05,
794
+ "loss": 2.549,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
+ "learning_rate": 4.9046147957999454e-05,
800
+ "loss": 2.4567,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
+ "learning_rate": 4.9031735283164574e-05,
806
+ "loss": 2.3243,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
+ "learning_rate": 4.901721668556244e-05,
812
+ "loss": 2.4758,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
+ "learning_rate": 4.9002592229185515e-05,
818
+ "loss": 2.5083,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
+ "learning_rate": 4.898786197849285e-05,
824
+ "loss": 2.4428,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
+ "learning_rate": 4.897302599840981e-05,
830
+ "loss": 2.4403,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
+ "learning_rate": 4.895808435432776e-05,
836
+ "loss": 2.4401,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
+ "learning_rate": 4.8943037112103784e-05,
842
+ "loss": 2.3789,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
+ "learning_rate": 4.892788433806042e-05,
848
+ "loss": 2.3945,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
+ "learning_rate": 4.8912626098985355e-05,
854
+ "loss": 2.4758,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
+ "learning_rate": 4.88972624621311e-05,
860
+ "loss": 2.4646,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
+ "learning_rate": 4.888179349521475e-05,
866
+ "loss": 2.4225,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
+ "learning_rate": 4.886621926641762e-05,
872
+ "loss": 2.4074,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
+ "learning_rate": 4.8850539844385017e-05,
878
+ "loss": 2.4438,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
+ "learning_rate": 4.883475529822587e-05,
884
+ "loss": 2.3666,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
+ "learning_rate": 4.881886569751248e-05,
890
+ "loss": 2.3782,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
+ "learning_rate": 4.880287111228016e-05,
896
+ "loss": 2.4916,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
+ "learning_rate": 4.878677161302698e-05,
902
+ "loss": 2.3639,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
+ "learning_rate": 4.877056727071342e-05,
908
+ "loss": 2.4333,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
+ "learning_rate": 4.8754258156762075e-05,
914
+ "loss": 2.3907,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
+ "learning_rate": 4.8737844343057315e-05,
920
+ "loss": 2.433,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
+ "learning_rate": 4.8721325901945015e-05,
926
+ "loss": 2.3849,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
+ "learning_rate": 4.870470290623218e-05,
932
+ "loss": 2.419,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
+ "learning_rate": 4.8687975429186685e-05,
938
+ "loss": 2.3694,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
+ "learning_rate": 4.867114354453688e-05,
944
+ "loss": 2.4335,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
+ "learning_rate": 4.865420732647133e-05,
950
+ "loss": 2.4168,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
+ "learning_rate": 4.863716684963846e-05,
956
+ "loss": 2.419,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
+ "learning_rate": 4.862002218914622e-05,
962
+ "loss": 2.335,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
+ "learning_rate": 4.860277342056176e-05,
968
+ "loss": 2.4059,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
+ "learning_rate": 4.858542061991112e-05,
974
+ "loss": 2.3423,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
+ "learning_rate": 4.856796386367884e-05,
980
+ "loss": 2.4135,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
+ "learning_rate": 4.855040322880769e-05,
986
+ "loss": 2.3652,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
+ "learning_rate": 4.853273879269827e-05,
992
+ "loss": 2.3511,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
+ "learning_rate": 4.8514970633208726e-05,
998
+ "loss": 2.413,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
+ "learning_rate": 4.849709882865435e-05,
1004
+ "loss": 2.4445,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
+ "learning_rate": 4.847912345780727e-05,
1010
+ "loss": 2.4221,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
+ "learning_rate": 4.846104459989611e-05,
1016
+ "loss": 2.4992,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
+ "learning_rate": 4.84428623346056e-05,
1022
+ "loss": 2.3923,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
+ "learning_rate": 4.842457674207627e-05,
1028
+ "loss": 2.3666,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
+ "learning_rate": 4.8406187902904076e-05,
1034
+ "loss": 2.4329,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
+ "learning_rate": 4.8387695898140026e-05,
1040
+ "loss": 2.3429,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
+ "learning_rate": 4.836910080928987e-05,
1046
+ "loss": 2.4428,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
+ "learning_rate": 4.83504027183137e-05,
1052
+ "loss": 2.3842,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
+ "learning_rate": 4.833160170762561e-05,
1058
+ "loss": 2.4064,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
+ "learning_rate": 4.8312697860093295e-05,
1064
+ "loss": 2.3705,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
+ "learning_rate": 4.829369125903776e-05,
1070
+ "loss": 2.4151,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
+ "learning_rate": 4.8274581988232894e-05,
1076
+ "loss": 2.4219,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
+ "learning_rate": 4.825537013190509e-05,
1082
+ "loss": 2.4042,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
+ "learning_rate": 4.823605577473293e-05,
1088
+ "loss": 2.4509,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
+ "learning_rate": 4.8216639001846764e-05,
1094
+ "loss": 2.4255,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
+ "learning_rate": 4.8197119898828367e-05,
1100
+ "loss": 2.4928,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
+ "learning_rate": 4.817749855171052e-05,
1106
+ "loss": 2.4044,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
+ "learning_rate": 4.8157775046976684e-05,
1112
+ "loss": 2.4085,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
+ "learning_rate": 4.813794947156058e-05,
1118
+ "loss": 2.3941,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
+ "learning_rate": 4.8118021912845815e-05,
1124
+ "loss": 2.3493,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
+ "learning_rate": 4.8097992458665506e-05,
1130
+ "loss": 2.3367,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
+ "learning_rate": 4.807786119730189e-05,
1136
+ "loss": 2.3865,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
+ "learning_rate": 4.8057628217485916e-05,
1142
+ "loss": 2.3885,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
+ "learning_rate": 4.803729360839688e-05,
1148
+ "loss": 2.4296,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
+ "learning_rate": 4.801685745966203e-05,
1154
+ "loss": 2.3856,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
+ "learning_rate": 4.799631986135615e-05,
1160
+ "loss": 2.3631,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
+ "learning_rate": 4.797568090400117e-05,
1166
+ "loss": 2.3882,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
+ "learning_rate": 4.7954940678565785e-05,
1172
+ "loss": 2.3396,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
+ "learning_rate": 4.793409927646504e-05,
1178
+ "loss": 2.3548,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
+ "learning_rate": 4.791315678955991e-05,
1184
+ "loss": 2.4576,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
+ "learning_rate": 4.789211331015695e-05,
1190
+ "loss": 2.4523,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
+ "learning_rate": 4.787096893100781e-05,
1196
+ "loss": 2.2542,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
+ "learning_rate": 4.784972374530889e-05,
1202
+ "loss": 2.4385,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
+ "learning_rate": 4.7828377846700925e-05,
1208
+ "loss": 2.3266,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
+ "learning_rate": 4.780693132926851e-05,
1214
+ "loss": 2.3301,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
+ "learning_rate": 4.7785384287539755e-05,
1220
+ "loss": 2.3554,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
+ "learning_rate": 4.776373681648586e-05,
1226
+ "loss": 2.3838,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
+ "learning_rate": 4.7741989011520645e-05,
1232
+ "loss": 2.3361,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
+ "learning_rate": 4.772014096850018e-05,
1238
+ "loss": 2.2864,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
+ "learning_rate": 4.769819278372235e-05,
1244
+ "loss": 2.3955,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
+ "learning_rate": 4.7676144553926414e-05,
1250
+ "loss": 2.3802,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
+ "learning_rate": 4.765399637629262e-05,
1256
+ "loss": 2.2856,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
+ "learning_rate": 4.7631748348441705e-05,
1262
+ "loss": 2.3698,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
+ "learning_rate": 4.760940056843454e-05,
1268
+ "loss": 2.3305,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
+ "learning_rate": 4.758695313477166e-05,
1274
+ "loss": 2.4944,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
+ "learning_rate": 4.756440614639283e-05,
1280
+ "loss": 2.4622,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
+ "learning_rate": 4.754175970267661e-05,
1286
+ "loss": 2.3995,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
+ "learning_rate": 4.751901390343995e-05,
1292
+ "loss": 2.343,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
+ "learning_rate": 4.7496168848937674e-05,
1298
+ "loss": 2.4388,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
+ "learning_rate": 4.7473224639862116e-05,
1304
+ "loss": 2.3694,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
+ "learning_rate": 4.745018137734264e-05,
1310
+ "loss": 2.3485,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
+ "learning_rate": 4.742703916294521e-05,
1316
+ "loss": 2.46,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
+ "learning_rate": 4.740379809867193e-05,
1322
+ "loss": 2.3138,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
+ "learning_rate": 4.738045828696056e-05,
1328
+ "loss": 2.4117,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
+ "learning_rate": 4.735701983068416e-05,
1334
+ "loss": 2.3506,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
+ "learning_rate": 4.7333482833150525e-05,
1340
+ "loss": 2.3711,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
+ "learning_rate": 4.730984739810183e-05,
1346
+ "loss": 2.2639,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
+ "learning_rate": 4.728611362971408e-05,
1352
+ "loss": 2.3749,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
+ "learning_rate": 4.726228163259673e-05,
1358
+ "loss": 2.4556,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
+ "learning_rate": 4.7238351511792165e-05,
1364
+ "loss": 2.4233,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
+ "learning_rate": 4.721432337277529e-05,
1370
+ "loss": 2.3544,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
+ "learning_rate": 4.7190197321453014e-05,
1376
+ "loss": 2.3654,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
+ "learning_rate": 4.716597346416382e-05,
1382
+ "loss": 2.3356,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
+ "learning_rate": 4.7141651907677256e-05,
1388
+ "loss": 2.4522,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
+ "learning_rate": 4.7117232759193534e-05,
1394
+ "loss": 2.3104,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
+ "learning_rate": 4.709271612634298e-05,
1400
+ "loss": 2.4492,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
+ "learning_rate": 4.706810211718561e-05,
1406
+ "loss": 2.4437,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
+ "learning_rate": 4.7043390840210636e-05,
1412
+ "loss": 2.3099,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
+ "learning_rate": 4.701858240433597e-05,
1418
+ "loss": 2.351,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
+ "learning_rate": 4.6993676918907804e-05,
1424
+ "loss": 2.4756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
+ "learning_rate": 4.696867449370005e-05,
1430
+ "loss": 2.418,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
+ "learning_rate": 4.6943575238913904e-05,
1436
+ "loss": 2.5026,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
+ "learning_rate": 4.691837926517737e-05,
1442
+ "loss": 2.4096,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
+ "learning_rate": 4.689308668354473e-05,
1448
+ "loss": 2.4101,
1449
  "step": 1200
1450
  },
1451
  {
1452
  "epoch": 0.48,
1453
+ "learning_rate": 4.686769760549607e-05,
1454
+ "loss": 2.4533,
1455
  "step": 1205
1456
  },
1457
  {
1458
  "epoch": 0.49,
1459
+ "learning_rate": 4.684221214293683e-05,
1460
+ "loss": 2.2988,
1461
  "step": 1210
1462
  },
1463
  {
1464
  "epoch": 0.49,
1465
+ "learning_rate": 4.681663040819724e-05,
1466
+ "loss": 2.3937,
1467
  "step": 1215
1468
  },
1469
  {
1470
  "epoch": 0.49,
1471
+ "learning_rate": 4.679095251403189e-05,
1472
+ "loss": 2.3954,
1473
  "step": 1220
1474
  },
1475
  {
1476
  "epoch": 0.49,
1477
+ "learning_rate": 4.676517857361917e-05,
1478
+ "loss": 2.3275,
1479
  "step": 1225
1480
  },
1481
  {
1482
  "epoch": 0.49,
1483
+ "learning_rate": 4.673930870056085e-05,
1484
+ "loss": 2.4748,
1485
  "step": 1230
1486
  },
1487
  {
1488
  "epoch": 0.5,
1489
+ "learning_rate": 4.67133430088815e-05,
1490
+ "loss": 2.2496,
1491
  "step": 1235
1492
  },
1493
  {
1494
  "epoch": 0.5,
1495
+ "learning_rate": 4.668728161302803e-05,
1496
+ "loss": 2.3226,
1497
  "step": 1240
1498
  },
1499
  {
1500
  "epoch": 0.5,
1501
+ "learning_rate": 4.666112462786919e-05,
1502
+ "loss": 2.4655,
1503
  "step": 1245
1504
  },
1505
  {
1506
  "epoch": 0.5,
1507
+ "learning_rate": 4.663487216869502e-05,
1508
+ "loss": 2.3751,
1509
  "step": 1250
1510
  },
1511
  {
1512
  "epoch": 0.5,
1513
+ "learning_rate": 4.660852435121642e-05,
1514
+ "loss": 2.4136,
1515
  "step": 1255
1516
  },
1517
  {
1518
  "epoch": 0.51,
1519
+ "learning_rate": 4.658208129156456e-05,
1520
+ "loss": 2.3612,
1521
  "step": 1260
1522
  },
1523
  {
1524
  "epoch": 0.51,
1525
+ "learning_rate": 4.655554310629041e-05,
1526
+ "loss": 2.3675,
1527
  "step": 1265
1528
  },
1529
  {
1530
  "epoch": 0.51,
1531
+ "learning_rate": 4.652890991236421e-05,
1532
+ "loss": 2.2831,
1533
  "step": 1270
1534
  },
1535
  {
1536
  "epoch": 0.51,
1537
+ "learning_rate": 4.650218182717498e-05,
1538
+ "loss": 2.3958,
1539
  "step": 1275
1540
  },
1541
  {
1542
  "epoch": 0.51,
1543
+ "learning_rate": 4.647535896852997e-05,
1544
+ "loss": 2.3236,
1545
  "step": 1280
1546
  },
1547
  {
1548
  "epoch": 0.52,
1549
+ "learning_rate": 4.6448441454654156e-05,
1550
+ "loss": 2.3904,
1551
  "step": 1285
1552
  },
1553
  {
1554
  "epoch": 0.52,
1555
+ "learning_rate": 4.642142940418973e-05,
1556
+ "loss": 2.3565,
1557
  "step": 1290
1558
  },
1559
  {
1560
  "epoch": 0.52,
1561
+ "learning_rate": 4.6394322936195556e-05,
1562
+ "loss": 2.5229,
1563
  "step": 1295
1564
  },
1565
  {
1566
  "epoch": 0.52,
1567
+ "learning_rate": 4.636712217014665e-05,
1568
+ "loss": 2.369,
1569
  "step": 1300
1570
  },
1571
  {
1572
  "epoch": 0.52,
1573
+ "learning_rate": 4.6339827225933665e-05,
1574
+ "loss": 2.3444,
1575
  "step": 1305
1576
  },
1577
  {
1578
  "epoch": 0.53,
1579
+ "learning_rate": 4.6312438223862356e-05,
1580
+ "loss": 2.3387,
1581
  "step": 1310
1582
  },
1583
  {
1584
  "epoch": 0.53,
1585
+ "learning_rate": 4.628495528465305e-05,
1586
+ "loss": 2.3589,
1587
  "step": 1315
1588
  },
1589
  {
1590
  "epoch": 0.53,
1591
+ "learning_rate": 4.625737852944011e-05,
1592
+ "loss": 2.4779,
1593
  "step": 1320
1594
  },
1595
  {
1596
  "epoch": 0.53,
1597
+ "learning_rate": 4.62297080797714e-05,
1598
+ "loss": 2.4398,
1599
  "step": 1325
1600
  },
1601
  {
1602
  "epoch": 0.53,
1603
+ "learning_rate": 4.620194405760777e-05,
1604
+ "loss": 2.383,
1605
  "step": 1330
1606
  },
1607
  {
1608
  "epoch": 0.54,
1609
+ "learning_rate": 4.6174086585322485e-05,
1610
+ "loss": 2.424,
1611
  "step": 1335
1612
  },
1613
  {
1614
  "epoch": 0.54,
1615
+ "learning_rate": 4.6146135785700705e-05,
1616
+ "loss": 2.4289,
1617
  "step": 1340
1618
  },
1619
  {
1620
  "epoch": 0.54,
1621
+ "learning_rate": 4.611809178193896e-05,
1622
+ "loss": 2.225,
1623
  "step": 1345
1624
  },
1625
  {
1626
  "epoch": 0.54,
1627
+ "learning_rate": 4.608995469764455e-05,
1628
+ "loss": 2.3643,
1629
  "step": 1350
1630
  },
1631
  {
1632
  "epoch": 0.54,
1633
+ "learning_rate": 4.606172465683508e-05,
1634
+ "loss": 2.4278,
1635
  "step": 1355
1636
  },
1637
  {
1638
  "epoch": 0.55,
1639
+ "learning_rate": 4.6033401783937844e-05,
1640
+ "loss": 2.4464,
1641
  "step": 1360
1642
  },
1643
  {
1644
  "epoch": 0.55,
1645
+ "learning_rate": 4.600498620378932e-05,
1646
+ "loss": 2.3639,
1647
  "step": 1365
1648
  },
1649
  {
1650
  "epoch": 0.55,
1651
+ "learning_rate": 4.597647804163461e-05,
1652
+ "loss": 2.3595,
1653
  "step": 1370
1654
  },
1655
  {
1656
  "epoch": 0.55,
1657
+ "learning_rate": 4.5947877423126864e-05,
1658
+ "loss": 2.3569,
1659
  "step": 1375
1660
  },
1661
  {
1662
  "epoch": 0.55,
1663
+ "learning_rate": 4.591918447432675e-05,
1664
+ "loss": 2.4167,
1665
  "step": 1380
1666
  },
1667
  {
1668
  "epoch": 0.56,
1669
+ "learning_rate": 4.589039932170188e-05,
1670
+ "loss": 2.3959,
1671
  "step": 1385
1672
  },
1673
  {
1674
  "epoch": 0.56,
1675
+ "learning_rate": 4.5861522092126313e-05,
1676
+ "loss": 2.3629,
1677
  "step": 1390
1678
  },
1679
  {
1680
  "epoch": 0.56,
1681
+ "learning_rate": 4.5832552912879894e-05,
1682
+ "loss": 2.3978,
1683
  "step": 1395
1684
  },
1685
  {
1686
  "epoch": 0.56,
1687
+ "learning_rate": 4.580349191164777e-05,
1688
+ "loss": 2.4372,
1689
  "step": 1400
1690
  },
1691
  {
1692
  "epoch": 0.56,
1693
+ "learning_rate": 4.5774339216519815e-05,
1694
+ "loss": 2.4422,
1695
  "step": 1405
1696
  },
1697
  {
1698
  "epoch": 0.57,
1699
+ "learning_rate": 4.574509495599003e-05,
1700
+ "loss": 2.3786,
1701
  "step": 1410
1702
  },
1703
  {
1704
  "epoch": 0.57,
1705
+ "learning_rate": 4.5715759258956025e-05,
1706
+ "loss": 2.3467,
1707
  "step": 1415
1708
  },
1709
  {
1710
  "epoch": 0.57,
1711
+ "learning_rate": 4.568633225471841e-05,
1712
+ "loss": 2.371,
1713
  "step": 1420
1714
  },
1715
  {
1716
  "epoch": 0.57,
1717
+ "learning_rate": 4.565681407298026e-05,
1718
+ "loss": 2.3776,
1719
  "step": 1425
1720
  },
1721
  {
1722
  "epoch": 0.57,
1723
+ "learning_rate": 4.5627204843846504e-05,
1724
+ "loss": 2.3036,
1725
  "step": 1430
1726
  },
1727
  {
1728
  "epoch": 0.58,
1729
+ "learning_rate": 4.559750469782339e-05,
1730
+ "loss": 2.446,
1731
  "step": 1435
1732
  },
1733
  {
1734
  "epoch": 0.58,
1735
+ "learning_rate": 4.5567713765817886e-05,
1736
+ "loss": 2.3521,
1737
  "step": 1440
1738
  },
1739
  {
1740
  "epoch": 0.58,
1741
+ "learning_rate": 4.553783217913712e-05,
1742
+ "loss": 2.3322,
1743
  "step": 1445
1744
  },
1745
  {
1746
  "epoch": 0.58,
1747
+ "learning_rate": 4.550786006948777e-05,
1748
+ "loss": 2.3966,
1749
  "step": 1450
1750
  },
1751
  {
1752
  "epoch": 0.58,
1753
+ "learning_rate": 4.547779756897554e-05,
1754
+ "loss": 2.35,
1755
  "step": 1455
1756
  },
1757
  {
1758
  "epoch": 0.59,
1759
+ "learning_rate": 4.544764481010451e-05,
1760
+ "loss": 2.3739,
1761
  "step": 1460
1762
  },
1763
  {
1764
  "epoch": 0.59,
1765
+ "learning_rate": 4.541740192577658e-05,
1766
+ "loss": 2.4817,
1767
  "step": 1465
1768
  },
1769
  {
1770
  "epoch": 0.59,
1771
+ "learning_rate": 4.538706904929092e-05,
1772
+ "loss": 2.3857,
1773
  "step": 1470
1774
  },
1775
  {
1776
  "epoch": 0.59,
1777
+ "learning_rate": 4.5356646314343344e-05,
1778
+ "loss": 2.3453,
1779
  "step": 1475
1780
  },
1781
  {
1782
  "epoch": 0.59,
1783
+ "learning_rate": 4.532613385502571e-05,
1784
+ "loss": 2.2916,
1785
  "step": 1480
1786
  },
1787
  {
1788
  "epoch": 0.6,
1789
+ "learning_rate": 4.5295531805825355e-05,
1790
+ "loss": 2.338,
1791
  "step": 1485
1792
  },
1793
  {
1794
  "epoch": 0.6,
1795
+ "learning_rate": 4.526484030162449e-05,
1796
+ "loss": 2.2829,
1797
  "step": 1490
1798
  },
1799
  {
1800
  "epoch": 0.6,
1801
+ "learning_rate": 4.5234059477699635e-05,
1802
+ "loss": 2.3761,
1803
  "step": 1495
1804
  },
1805
  {
1806
  "epoch": 0.6,
1807
+ "learning_rate": 4.520318946972097e-05,
1808
+ "loss": 2.4692,
1809
  "step": 1500
1810
  }
1811
  ],
checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c481a87016b183d9406558b8812ee05f00d60a3c721055833e3cdda34cf9bb26
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b42f69ee6ce9030b2875b2859f3499c1e1c8f11e9ffa417d0dbdab2b51173da
3
  size 4920
checkpoint-1600/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99e8fd83c86ab9ea9056abc7fd393faa01250ff29662b30136e59621a7f041a6
3
  size 3158328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d0ac243bf67b6ae16d79399be73255f419e1674feb57d20aa193793e7771a5a
3
  size 3158328
checkpoint-1600/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57886534876bbfa9a90c1a2e62d045be3e08edfbe736a2729340799d4c19406d
3
  size 6372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8669f917c9eb73cae7351eb4a405fc91f7f246c15694a2ded11c92905c52ac68
3
  size 6372346
checkpoint-1600/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50356d01ef5dc28d14fe30d59cfe0c96edd33f24822273614965624893740377
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182b3da41b72ff317b5282dd4ff3615d2f9a33f31a069a4cbc972818fb1f2662
3
  size 14244
checkpoint-1600/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d435c1c5bf756c0f0096057dfbe2cd975deb192aaf27f6333f0272ca803ed8c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:395d1bd7b4643052e35a1d652b41ae2f9f32d85ee9bcd25c98ed9bdd3d8bed70
3
  size 1064
checkpoint-1600/trainer_state.json CHANGED
@@ -10,1922 +10,1922 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.999997796189788e-05,
14
- "loss": 2.7465,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
- "learning_rate": 1.9999911847688657e-05,
20
- "loss": 2.6877,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
- "learning_rate": 1.999980165766374e-05,
26
- "loss": 2.6848,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
- "learning_rate": 1.9999647392308798e-05,
32
- "loss": 2.7677,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
- "learning_rate": 1.9999449052303777e-05,
38
- "loss": 2.7784,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
- "learning_rate": 1.9999206638522888e-05,
44
- "loss": 2.8161,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
- "learning_rate": 1.9998920152034595e-05,
50
- "loss": 2.6752,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
- "learning_rate": 1.9998589594101623e-05,
56
- "loss": 2.6403,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
- "learning_rate": 1.9998214966180948e-05,
62
- "loss": 2.6433,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
- "learning_rate": 1.999779626992378e-05,
68
- "loss": 2.5787,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
- "learning_rate": 1.9997333507175583e-05,
74
- "loss": 2.6812,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
- "learning_rate": 1.9996826679976033e-05,
80
- "loss": 2.7287,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
- "learning_rate": 1.9996275790559037e-05,
86
- "loss": 2.6465,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
- "learning_rate": 1.99956808413527e-05,
92
- "loss": 2.5268,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
- "learning_rate": 1.999504183497934e-05,
98
- "loss": 2.6893,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
- "learning_rate": 1.9994358774255444e-05,
104
- "loss": 2.6274,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
- "learning_rate": 1.9993631662191696e-05,
110
- "loss": 2.6232,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
- "learning_rate": 1.9992860501992924e-05,
116
- "loss": 2.7188,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
- "learning_rate": 1.9992045297058108e-05,
122
- "loss": 2.5388,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
- "learning_rate": 1.9991186050980366e-05,
128
- "loss": 2.6793,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
- "learning_rate": 1.9990282767546926e-05,
134
- "loss": 2.5523,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
- "learning_rate": 1.998933545073912e-05,
140
- "loss": 2.5763,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
- "learning_rate": 1.998834410473236e-05,
146
- "loss": 2.6447,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
- "learning_rate": 1.998730873389612e-05,
152
- "loss": 2.5579,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
- "learning_rate": 1.998622934279393e-05,
158
- "loss": 2.5884,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
- "learning_rate": 1.9985105936183327e-05,
164
- "loss": 2.5051,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
- "learning_rate": 1.9983938519015868e-05,
170
- "loss": 2.6014,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
- "learning_rate": 1.998272709643708e-05,
176
- "loss": 2.5878,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
- "learning_rate": 1.998147167378645e-05,
182
- "loss": 2.6642,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
- "learning_rate": 1.998017225659742e-05,
188
- "loss": 2.5077,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
- "learning_rate": 1.9978828850597312e-05,
194
- "loss": 2.5921,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
- "learning_rate": 1.9977441461707358e-05,
200
- "loss": 2.5577,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
- "learning_rate": 1.9976010096042634e-05,
206
- "loss": 2.524,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
- "learning_rate": 1.9974534759912068e-05,
212
- "loss": 2.5708,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
- "learning_rate": 1.997301545981837e-05,
218
- "loss": 2.5578,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
- "learning_rate": 1.9971452202458048e-05,
224
- "loss": 2.6874,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
- "learning_rate": 1.9969844994721338e-05,
230
- "loss": 2.535,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
- "learning_rate": 1.996819384369221e-05,
236
- "loss": 2.5816,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
- "learning_rate": 1.9966498756648305e-05,
242
- "loss": 2.6225,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
- "learning_rate": 1.9964759741060926e-05,
248
- "loss": 2.5387,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
- "learning_rate": 1.9962976804594993e-05,
254
- "loss": 2.524,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
- "learning_rate": 1.996114995510901e-05,
260
- "loss": 2.488,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
- "learning_rate": 1.9959279200655044e-05,
266
- "loss": 2.5824,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
- "learning_rate": 1.9957364549478663e-05,
272
- "loss": 2.5828,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
- "learning_rate": 1.9955406010018928e-05,
278
- "loss": 2.5137,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
- "learning_rate": 1.9953403590908334e-05,
284
- "loss": 2.5539,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
- "learning_rate": 1.995135730097278e-05,
290
- "loss": 2.6099,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
- "learning_rate": 1.994926714923155e-05,
296
- "loss": 2.5309,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
- "learning_rate": 1.9947133144897225e-05,
302
- "loss": 2.5152,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
- "learning_rate": 1.9944955297375693e-05,
308
- "loss": 2.4738,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
- "learning_rate": 1.9942733616266076e-05,
314
- "loss": 2.5173,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
- "learning_rate": 1.99404681113607e-05,
320
- "loss": 2.578,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
- "learning_rate": 1.993815879264506e-05,
326
- "loss": 2.5089,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
- "learning_rate": 1.9935805670297744e-05,
332
- "loss": 2.5872,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
- "learning_rate": 1.993340875469043e-05,
338
- "loss": 2.5882,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
- "learning_rate": 1.993096805638781e-05,
344
- "loss": 2.5789,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
- "learning_rate": 1.9928483586147553e-05,
350
- "loss": 2.488,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
- "learning_rate": 1.9925955354920265e-05,
356
- "loss": 2.617,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
- "learning_rate": 1.992338337384943e-05,
362
- "loss": 2.5752,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
- "learning_rate": 1.992076765427136e-05,
368
- "loss": 2.5529,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
- "learning_rate": 1.9918108207715156e-05,
374
- "loss": 2.4942,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
- "learning_rate": 1.991540504590265e-05,
380
- "loss": 2.5456,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
- "learning_rate": 1.991265818074835e-05,
386
- "loss": 2.5957,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
- "learning_rate": 1.99098676243594e-05,
392
- "loss": 2.5699,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
- "learning_rate": 1.9907033389035512e-05,
398
- "loss": 2.5544,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
- "learning_rate": 1.9904155487268912e-05,
404
- "loss": 2.538,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
- "learning_rate": 1.990123393174431e-05,
410
- "loss": 2.6055,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
- "learning_rate": 1.9898268735338807e-05,
416
- "loss": 2.5846,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
- "learning_rate": 1.9895259911121866e-05,
422
- "loss": 2.5405,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
- "learning_rate": 1.9892207472355243e-05,
428
- "loss": 2.5162,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
- "learning_rate": 1.988911143249292e-05,
434
- "loss": 2.5484,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
- "learning_rate": 1.9885971805181083e-05,
440
- "loss": 2.671,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
- "learning_rate": 1.9882788604258e-05,
446
- "loss": 2.5696,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
- "learning_rate": 1.987956184375402e-05,
452
- "loss": 2.5021,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
- "learning_rate": 1.9876291537891482e-05,
458
- "loss": 2.5644,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
- "learning_rate": 1.9872977701084645e-05,
464
- "loss": 2.5386,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
- "learning_rate": 1.9869620347939652e-05,
470
- "loss": 2.554,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
- "learning_rate": 1.9866219493254433e-05,
476
- "loss": 2.4798,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
- "learning_rate": 1.986277515201867e-05,
482
- "loss": 2.538,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
- "learning_rate": 1.9859287339413714e-05,
488
- "loss": 2.5041,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
- "learning_rate": 1.9855756070812514e-05,
494
- "loss": 2.7067,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
- "learning_rate": 1.9852181361779563e-05,
500
- "loss": 2.5054,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
- "learning_rate": 1.984856322807082e-05,
506
- "loss": 2.4815,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
- "learning_rate": 1.9844901685633648e-05,
512
- "loss": 2.5885,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
- "learning_rate": 1.9841196750606735e-05,
518
- "loss": 2.521,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
- "learning_rate": 1.9837448439320027e-05,
524
- "loss": 2.4937,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
- "learning_rate": 1.983365676829466e-05,
530
- "loss": 2.5877,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
- "learning_rate": 1.9829821754242885e-05,
536
- "loss": 2.5761,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
- "learning_rate": 1.9825943414067974e-05,
542
- "loss": 2.4917,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
- "learning_rate": 1.9822021764864194e-05,
548
- "loss": 2.5434,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
- "learning_rate": 1.9818056823916675e-05,
554
- "loss": 2.5906,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
- "learning_rate": 1.9814048608701374e-05,
560
- "loss": 2.5508,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
- "learning_rate": 1.980999713688499e-05,
566
- "loss": 2.6005,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
- "learning_rate": 1.980590242632486e-05,
572
- "loss": 2.523,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
- "learning_rate": 1.9801764495068923e-05,
578
- "loss": 2.5453,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
- "learning_rate": 1.979758336135561e-05,
584
- "loss": 2.6426,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
- "learning_rate": 1.9793359043613768e-05,
590
- "loss": 2.5454,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
- "learning_rate": 1.9789091560462587e-05,
596
- "loss": 2.6071,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
- "learning_rate": 1.9784780930711514e-05,
602
- "loss": 2.5913,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
- "learning_rate": 1.9780427173360165e-05,
608
- "loss": 2.5082,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
- "learning_rate": 1.977603030759825e-05,
614
- "loss": 2.565,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
- "learning_rate": 1.977159035280549e-05,
620
- "loss": 2.6365,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
- "learning_rate": 1.9767107328551515e-05,
626
- "loss": 2.6872,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
- "learning_rate": 1.9762581254595797e-05,
632
- "loss": 2.6222,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
- "learning_rate": 1.975801215088755e-05,
638
- "loss": 2.5633,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
- "learning_rate": 1.9753400037565653e-05,
644
- "loss": 2.6579,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
- "learning_rate": 1.9748744934958548e-05,
650
- "loss": 2.5572,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
- "learning_rate": 1.974404686358416e-05,
656
- "loss": 2.5845,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
- "learning_rate": 1.97393058441498e-05,
662
- "loss": 2.6484,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
- "learning_rate": 1.973452189755209e-05,
668
- "loss": 2.5473,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
- "learning_rate": 1.9729695044876847e-05,
674
- "loss": 2.5995,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
- "learning_rate": 1.9724825307399003e-05,
680
- "loss": 2.5473,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
- "learning_rate": 1.971991270658252e-05,
686
- "loss": 2.5798,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
- "learning_rate": 1.971495726408027e-05,
692
- "loss": 2.655,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
- "learning_rate": 1.970995900173397e-05,
698
- "loss": 2.6904,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
- "learning_rate": 1.9704917941574053e-05,
704
- "loss": 2.6973,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
- "learning_rate": 1.969983410581961e-05,
710
- "loss": 2.6518,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
- "learning_rate": 1.969470751687825e-05,
716
- "loss": 2.5849,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
- "learning_rate": 1.9689538197346035e-05,
722
- "loss": 2.4715,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
- "learning_rate": 1.9684326170007365e-05,
728
- "loss": 2.7246,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
- "learning_rate": 1.9679071457834874e-05,
734
- "loss": 2.5482,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
- "learning_rate": 1.967377408398934e-05,
740
- "loss": 2.6084,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
- "learning_rate": 1.966843407181958e-05,
746
- "loss": 2.5144,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
- "learning_rate": 1.9663051444862335e-05,
752
- "loss": 2.7663,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
- "learning_rate": 1.9657626226842187e-05,
758
- "loss": 2.5697,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
- "learning_rate": 1.9652158441671435e-05,
764
- "loss": 2.6379,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
- "learning_rate": 1.964664811345e-05,
770
- "loss": 2.6784,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
- "learning_rate": 1.964109526646532e-05,
776
- "loss": 2.6936,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
- "learning_rate": 1.963549992519223e-05,
782
- "loss": 2.5672,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
- "learning_rate": 1.962986211429288e-05,
788
- "loss": 2.62,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
- "learning_rate": 1.9624181858616593e-05,
794
- "loss": 2.7293,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
- "learning_rate": 1.9618459183199782e-05,
800
- "loss": 2.6636,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
- "learning_rate": 1.961269411326583e-05,
806
- "loss": 2.5106,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
- "learning_rate": 1.9606886674224977e-05,
812
- "loss": 2.6878,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
- "learning_rate": 1.960103689167421e-05,
818
- "loss": 2.693,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
- "learning_rate": 1.9595144791397142e-05,
824
- "loss": 2.6562,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
- "learning_rate": 1.9589210399363925e-05,
830
- "loss": 2.6269,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
- "learning_rate": 1.95832337417311e-05,
836
- "loss": 2.6459,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
- "learning_rate": 1.9577214844841515e-05,
842
- "loss": 2.5765,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
- "learning_rate": 1.957115373522417e-05,
848
- "loss": 2.6113,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
- "learning_rate": 1.956505043959414e-05,
854
- "loss": 2.6641,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
- "learning_rate": 1.955890498485244e-05,
860
- "loss": 2.6493,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
- "learning_rate": 1.9552717398085898e-05,
866
- "loss": 2.6135,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
- "learning_rate": 1.954648770656705e-05,
872
- "loss": 2.6182,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
- "learning_rate": 1.954021593775401e-05,
878
- "loss": 2.6487,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
- "learning_rate": 1.9533902119290352e-05,
884
- "loss": 2.5927,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
- "learning_rate": 1.952754627900499e-05,
890
- "loss": 2.5825,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
- "learning_rate": 1.9521148444912065e-05,
896
- "loss": 2.7193,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
- "learning_rate": 1.9514708645210793e-05,
902
- "loss": 2.5802,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
- "learning_rate": 1.9508226908285368e-05,
908
- "loss": 2.6628,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
- "learning_rate": 1.950170326270483e-05,
914
- "loss": 2.5847,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
- "learning_rate": 1.9495137737222925e-05,
920
- "loss": 2.6594,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
- "learning_rate": 1.9488530360778007e-05,
926
- "loss": 2.6096,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
- "learning_rate": 1.948188116249287e-05,
932
- "loss": 2.6378,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
- "learning_rate": 1.9475190171674675e-05,
938
- "loss": 2.5984,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
- "learning_rate": 1.9468457417814753e-05,
944
- "loss": 2.6437,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
- "learning_rate": 1.9461682930588534e-05,
950
- "loss": 2.6522,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
- "learning_rate": 1.9454866739855384e-05,
956
- "loss": 2.6242,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
- "learning_rate": 1.944800887565849e-05,
962
- "loss": 2.5961,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
- "learning_rate": 1.9441109368224704e-05,
968
- "loss": 2.6365,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
- "learning_rate": 1.9434168247964447e-05,
974
- "loss": 2.5674,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
- "learning_rate": 1.9427185545471537e-05,
980
- "loss": 2.6369,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
- "learning_rate": 1.9420161291523076e-05,
986
- "loss": 2.5763,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
- "learning_rate": 1.941309551707931e-05,
992
- "loss": 2.5651,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
- "learning_rate": 1.9405988253283492e-05,
998
- "loss": 2.6223,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
- "learning_rate": 1.939883953146174e-05,
1004
- "loss": 2.6616,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
- "learning_rate": 1.939164938312291e-05,
1010
- "loss": 2.6496,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
- "learning_rate": 1.9384417839958443e-05,
1016
- "loss": 2.7161,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
- "learning_rate": 1.937714493384224e-05,
1022
- "loss": 2.6047,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
- "learning_rate": 1.936983069683051e-05,
1028
- "loss": 2.5978,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
- "learning_rate": 1.936247516116163e-05,
1034
- "loss": 2.6331,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
- "learning_rate": 1.935507835925601e-05,
1040
- "loss": 2.5679,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
- "learning_rate": 1.934764032371595e-05,
1046
- "loss": 2.6647,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
- "learning_rate": 1.9340161087325483e-05,
1052
- "loss": 2.6142,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
- "learning_rate": 1.9332640683050243e-05,
1058
- "loss": 2.6181,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
- "learning_rate": 1.932507914403732e-05,
1064
- "loss": 2.6105,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
- "learning_rate": 1.9317476503615108e-05,
1070
- "loss": 2.6415,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
- "learning_rate": 1.9309832795293156e-05,
1076
- "loss": 2.6736,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
- "learning_rate": 1.930214805276204e-05,
1082
- "loss": 2.6507,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
- "learning_rate": 1.9294422309893177e-05,
1088
- "loss": 2.6771,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
- "learning_rate": 1.9286655600738707e-05,
1094
- "loss": 2.6763,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
- "learning_rate": 1.9278847959531348e-05,
1100
- "loss": 2.7135,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
- "learning_rate": 1.927099942068421e-05,
1106
- "loss": 2.6166,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
- "learning_rate": 1.9263110018790673e-05,
1112
- "loss": 2.6238,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
- "learning_rate": 1.9255179788624233e-05,
1118
- "loss": 2.6424,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
- "learning_rate": 1.9247208765138325e-05,
1124
- "loss": 2.6023,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
- "learning_rate": 1.9239196983466204e-05,
1130
- "loss": 2.58,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
- "learning_rate": 1.9231144478920756e-05,
1136
- "loss": 2.6173,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
- "learning_rate": 1.9223051286994368e-05,
1142
- "loss": 2.628,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
- "learning_rate": 1.9214917443358753e-05,
1148
- "loss": 2.6868,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
- "learning_rate": 1.9206742983864813e-05,
1154
- "loss": 2.6342,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
- "learning_rate": 1.9198527944542462e-05,
1160
- "loss": 2.5934,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
- "learning_rate": 1.919027236160047e-05,
1166
- "loss": 2.6354,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
- "learning_rate": 1.9181976271426315e-05,
1172
- "loss": 2.5955,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
- "learning_rate": 1.9173639710586015e-05,
1178
- "loss": 2.6134,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
- "learning_rate": 1.9165262715823966e-05,
1184
- "loss": 2.7,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
- "learning_rate": 1.915684532406278e-05,
1190
- "loss": 2.7197,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
- "learning_rate": 1.9148387572403123e-05,
1196
- "loss": 2.4881,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
- "learning_rate": 1.913988949812356e-05,
1202
- "loss": 2.6999,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
- "learning_rate": 1.9131351138680368e-05,
1208
- "loss": 2.5981,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
- "learning_rate": 1.9122772531707405e-05,
1214
- "loss": 2.5515,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
- "learning_rate": 1.9114153715015905e-05,
1220
- "loss": 2.623,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
- "learning_rate": 1.9105494726594344e-05,
1226
- "loss": 2.6006,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
- "learning_rate": 1.9096795604608258e-05,
1232
- "loss": 2.5929,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
- "learning_rate": 1.9088056387400074e-05,
1238
- "loss": 2.5396,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
- "learning_rate": 1.907927711348894e-05,
1244
- "loss": 2.6428,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
- "learning_rate": 1.9070457821570566e-05,
1250
- "loss": 2.6296,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
- "learning_rate": 1.9061598550517048e-05,
1256
- "loss": 2.534,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
- "learning_rate": 1.9052699339376685e-05,
1262
- "loss": 2.6049,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
- "learning_rate": 1.9043760227373817e-05,
1268
- "loss": 2.5776,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
- "learning_rate": 1.9034781253908664e-05,
1274
- "loss": 2.7483,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
- "learning_rate": 1.902576245855713e-05,
1280
- "loss": 2.6973,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
- "learning_rate": 1.9016703881070646e-05,
1286
- "loss": 2.645,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
- "learning_rate": 1.900760556137598e-05,
1292
- "loss": 2.5775,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
- "learning_rate": 1.899846753957507e-05,
1298
- "loss": 2.6872,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
- "learning_rate": 1.8989289855944846e-05,
1304
- "loss": 2.5929,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
- "learning_rate": 1.8980072550937058e-05,
1310
- "loss": 2.5668,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
- "learning_rate": 1.8970815665178086e-05,
1316
- "loss": 2.6873,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
- "learning_rate": 1.896151923946877e-05,
1322
- "loss": 2.5679,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
- "learning_rate": 1.8952183314784224e-05,
1328
- "loss": 2.6734,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
- "learning_rate": 1.8942807932273664e-05,
1334
- "loss": 2.6083,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
- "learning_rate": 1.893339313326021e-05,
1340
- "loss": 2.6189,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
- "learning_rate": 1.892393895924073e-05,
1346
- "loss": 2.5117,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
- "learning_rate": 1.8914445451885633e-05,
1352
- "loss": 2.6646,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
- "learning_rate": 1.890491265303869e-05,
1358
- "loss": 2.7551,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
- "learning_rate": 1.8895340604716867e-05,
1364
- "loss": 2.7003,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
- "learning_rate": 1.888572934911012e-05,
1370
- "loss": 2.6109,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
- "learning_rate": 1.8876078928581207e-05,
1376
- "loss": 2.6227,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
- "learning_rate": 1.8866389385665525e-05,
1382
- "loss": 2.5943,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
- "learning_rate": 1.8856660763070904e-05,
1388
- "loss": 2.7203,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
- "learning_rate": 1.8846893103677414e-05,
1394
- "loss": 2.561,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
- "learning_rate": 1.8837086450537195e-05,
1400
- "loss": 2.6959,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
- "learning_rate": 1.8827240846874245e-05,
1406
- "loss": 2.7047,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
- "learning_rate": 1.8817356336084252e-05,
1412
- "loss": 2.5853,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
- "learning_rate": 1.8807432961734388e-05,
1418
- "loss": 2.6165,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
- "learning_rate": 1.8797470767563122e-05,
1424
- "loss": 2.756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
- "learning_rate": 1.878746979748002e-05,
1430
- "loss": 2.667,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
- "learning_rate": 1.8777430095565563e-05,
1436
- "loss": 2.7607,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
- "learning_rate": 1.876735170607095e-05,
1442
- "loss": 2.6816,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
- "learning_rate": 1.8757234673417892e-05,
1448
- "loss": 2.688,
1449
  "step": 1200
1450
  },
1451
  {
1452
  "epoch": 0.48,
1453
- "learning_rate": 1.874707904219843e-05,
1454
- "loss": 2.7286,
1455
  "step": 1205
1456
  },
1457
  {
1458
  "epoch": 0.49,
1459
- "learning_rate": 1.8736884857174733e-05,
1460
- "loss": 2.5721,
1461
  "step": 1210
1462
  },
1463
  {
1464
  "epoch": 0.49,
1465
- "learning_rate": 1.87266521632789e-05,
1466
- "loss": 2.6588,
1467
  "step": 1215
1468
  },
1469
  {
1470
  "epoch": 0.49,
1471
- "learning_rate": 1.8716381005612756e-05,
1472
- "loss": 2.625,
1473
  "step": 1220
1474
  },
1475
  {
1476
  "epoch": 0.49,
1477
- "learning_rate": 1.870607142944767e-05,
1478
- "loss": 2.5962,
1479
  "step": 1225
1480
  },
1481
  {
1482
  "epoch": 0.49,
1483
- "learning_rate": 1.869572348022434e-05,
1484
- "loss": 2.7405,
1485
  "step": 1230
1486
  },
1487
  {
1488
  "epoch": 0.5,
1489
- "learning_rate": 1.8685337203552602e-05,
1490
- "loss": 2.5352,
1491
  "step": 1235
1492
  },
1493
  {
1494
  "epoch": 0.5,
1495
- "learning_rate": 1.8674912645211212e-05,
1496
- "loss": 2.5935,
1497
  "step": 1240
1498
  },
1499
  {
1500
  "epoch": 0.5,
1501
- "learning_rate": 1.8664449851147675e-05,
1502
- "loss": 2.72,
1503
  "step": 1245
1504
  },
1505
  {
1506
  "epoch": 0.5,
1507
- "learning_rate": 1.8653948867478008e-05,
1508
- "loss": 2.6364,
1509
  "step": 1250
1510
  },
1511
  {
1512
  "epoch": 0.5,
1513
- "learning_rate": 1.864340974048657e-05,
1514
- "loss": 2.6966,
1515
  "step": 1255
1516
  },
1517
  {
1518
  "epoch": 0.51,
1519
- "learning_rate": 1.8632832516625825e-05,
1520
- "loss": 2.6181,
1521
  "step": 1260
1522
  },
1523
  {
1524
  "epoch": 0.51,
1525
- "learning_rate": 1.8622217242516164e-05,
1526
- "loss": 2.6416,
1527
  "step": 1265
1528
  },
1529
  {
1530
  "epoch": 0.51,
1531
- "learning_rate": 1.8611563964945685e-05,
1532
- "loss": 2.5406,
1533
  "step": 1270
1534
  },
1535
  {
1536
  "epoch": 0.51,
1537
- "learning_rate": 1.8600872730869995e-05,
1538
- "loss": 2.6398,
1539
  "step": 1275
1540
  },
1541
  {
1542
  "epoch": 0.51,
1543
- "learning_rate": 1.859014358741199e-05,
1544
- "loss": 2.5742,
1545
  "step": 1280
1546
  },
1547
  {
1548
  "epoch": 0.52,
1549
- "learning_rate": 1.8579376581861665e-05,
1550
- "loss": 2.6472,
1551
  "step": 1285
1552
  },
1553
  {
1554
  "epoch": 0.52,
1555
- "learning_rate": 1.8568571761675893e-05,
1556
- "loss": 2.6341,
1557
  "step": 1290
1558
  },
1559
  {
1560
  "epoch": 0.52,
1561
- "learning_rate": 1.8557729174478222e-05,
1562
- "loss": 2.7972,
1563
  "step": 1295
1564
  },
1565
  {
1566
  "epoch": 0.52,
1567
- "learning_rate": 1.854684886805866e-05,
1568
- "loss": 2.6391,
1569
  "step": 1300
1570
  },
1571
  {
1572
  "epoch": 0.52,
1573
- "learning_rate": 1.8535930890373467e-05,
1574
- "loss": 2.6008,
1575
  "step": 1305
1576
  },
1577
  {
1578
  "epoch": 0.53,
1579
- "learning_rate": 1.8524975289544943e-05,
1580
- "loss": 2.6225,
1581
  "step": 1310
1582
  },
1583
  {
1584
  "epoch": 0.53,
1585
- "learning_rate": 1.851398211386122e-05,
1586
- "loss": 2.6241,
1587
  "step": 1315
1588
  },
1589
  {
1590
  "epoch": 0.53,
1591
- "learning_rate": 1.8502951411776043e-05,
1592
- "loss": 2.7438,
1593
  "step": 1320
1594
  },
1595
  {
1596
  "epoch": 0.53,
1597
- "learning_rate": 1.849188323190856e-05,
1598
- "loss": 2.6974,
1599
  "step": 1325
1600
  },
1601
  {
1602
  "epoch": 0.53,
1603
- "learning_rate": 1.848077762304311e-05,
1604
- "loss": 2.6354,
1605
  "step": 1330
1606
  },
1607
  {
1608
  "epoch": 0.54,
1609
- "learning_rate": 1.8469634634128993e-05,
1610
- "loss": 2.7041,
1611
  "step": 1335
1612
  },
1613
  {
1614
  "epoch": 0.54,
1615
- "learning_rate": 1.8458454314280283e-05,
1616
- "loss": 2.7031,
1617
  "step": 1340
1618
  },
1619
  {
1620
  "epoch": 0.54,
1621
- "learning_rate": 1.8447236712775583e-05,
1622
- "loss": 2.5,
1623
  "step": 1345
1624
  },
1625
  {
1626
  "epoch": 0.54,
1627
- "learning_rate": 1.843598187905782e-05,
1628
- "loss": 2.627,
1629
  "step": 1350
1630
  },
1631
  {
1632
  "epoch": 0.54,
1633
- "learning_rate": 1.842468986273403e-05,
1634
- "loss": 2.6997,
1635
  "step": 1355
1636
  },
1637
  {
1638
  "epoch": 0.55,
1639
- "learning_rate": 1.841336071357514e-05,
1640
- "loss": 2.7178,
1641
  "step": 1360
1642
  },
1643
  {
1644
  "epoch": 0.55,
1645
- "learning_rate": 1.840199448151573e-05,
1646
- "loss": 2.6428,
1647
  "step": 1365
1648
  },
1649
  {
1650
  "epoch": 0.55,
1651
- "learning_rate": 1.8390591216653845e-05,
1652
- "loss": 2.6359,
1653
  "step": 1370
1654
  },
1655
  {
1656
  "epoch": 0.55,
1657
- "learning_rate": 1.8379150969250747e-05,
1658
- "loss": 2.6191,
1659
  "step": 1375
1660
  },
1661
  {
1662
  "epoch": 0.55,
1663
- "learning_rate": 1.83676737897307e-05,
1664
- "loss": 2.6765,
1665
  "step": 1380
1666
  },
1667
  {
1668
  "epoch": 0.56,
1669
- "learning_rate": 1.8356159728680754e-05,
1670
- "loss": 2.6555,
1671
  "step": 1385
1672
  },
1673
  {
1674
  "epoch": 0.56,
1675
- "learning_rate": 1.8344608836850526e-05,
1676
- "loss": 2.6545,
1677
  "step": 1390
1678
  },
1679
  {
1680
  "epoch": 0.56,
1681
- "learning_rate": 1.833302116515196e-05,
1682
- "loss": 2.6659,
1683
  "step": 1395
1684
  },
1685
  {
1686
  "epoch": 0.56,
1687
- "learning_rate": 1.832139676465911e-05,
1688
- "loss": 2.6792,
1689
  "step": 1400
1690
  },
1691
  {
1692
  "epoch": 0.56,
1693
- "learning_rate": 1.8309735686607927e-05,
1694
- "loss": 2.7196,
1695
  "step": 1405
1696
  },
1697
  {
1698
  "epoch": 0.57,
1699
- "learning_rate": 1.8298037982396013e-05,
1700
- "loss": 2.6484,
1701
  "step": 1410
1702
  },
1703
  {
1704
  "epoch": 0.57,
1705
- "learning_rate": 1.828630370358241e-05,
1706
- "loss": 2.6177,
1707
  "step": 1415
1708
  },
1709
  {
1710
  "epoch": 0.57,
1711
- "learning_rate": 1.8274532901887363e-05,
1712
- "loss": 2.6296,
1713
  "step": 1420
1714
  },
1715
  {
1716
  "epoch": 0.57,
1717
- "learning_rate": 1.8262725629192105e-05,
1718
- "loss": 2.679,
1719
  "step": 1425
1720
  },
1721
  {
1722
  "epoch": 0.57,
1723
- "learning_rate": 1.8250881937538604e-05,
1724
- "loss": 2.5841,
1725
  "step": 1430
1726
  },
1727
  {
1728
  "epoch": 0.58,
1729
- "learning_rate": 1.8239001879129355e-05,
1730
- "loss": 2.7088,
1731
  "step": 1435
1732
  },
1733
  {
1734
  "epoch": 0.58,
1735
- "learning_rate": 1.8227085506327157e-05,
1736
- "loss": 2.6291,
1737
  "step": 1440
1738
  },
1739
  {
1740
  "epoch": 0.58,
1741
- "learning_rate": 1.821513287165485e-05,
1742
- "loss": 2.5989,
1743
  "step": 1445
1744
  },
1745
  {
1746
  "epoch": 0.58,
1747
- "learning_rate": 1.820314402779511e-05,
1748
- "loss": 2.6521,
1749
  "step": 1450
1750
  },
1751
  {
1752
  "epoch": 0.58,
1753
- "learning_rate": 1.819111902759022e-05,
1754
- "loss": 2.6189,
1755
  "step": 1455
1756
  },
1757
  {
1758
  "epoch": 0.59,
1759
- "learning_rate": 1.8179057924041804e-05,
1760
- "loss": 2.6539,
1761
  "step": 1460
1762
  },
1763
  {
1764
  "epoch": 0.59,
1765
- "learning_rate": 1.8166960770310632e-05,
1766
- "loss": 2.7381,
1767
  "step": 1465
1768
  },
1769
  {
1770
  "epoch": 0.59,
1771
- "learning_rate": 1.815482761971637e-05,
1772
- "loss": 2.6731,
1773
  "step": 1470
1774
  },
1775
  {
1776
  "epoch": 0.59,
1777
- "learning_rate": 1.8142658525737338e-05,
1778
- "loss": 2.6377,
1779
  "step": 1475
1780
  },
1781
  {
1782
  "epoch": 0.59,
1783
- "learning_rate": 1.8130453542010284e-05,
1784
- "loss": 2.5693,
1785
  "step": 1480
1786
  },
1787
  {
1788
  "epoch": 0.6,
1789
- "learning_rate": 1.8118212722330143e-05,
1790
- "loss": 2.6144,
1791
  "step": 1485
1792
  },
1793
  {
1794
  "epoch": 0.6,
1795
- "learning_rate": 1.8105936120649795e-05,
1796
- "loss": 2.5852,
1797
  "step": 1490
1798
  },
1799
  {
1800
  "epoch": 0.6,
1801
- "learning_rate": 1.8093623791079853e-05,
1802
- "loss": 2.6397,
1803
  "step": 1495
1804
  },
1805
  {
1806
  "epoch": 0.6,
1807
- "learning_rate": 1.8081275787888385e-05,
1808
- "loss": 2.7686,
1809
  "step": 1500
1810
  },
1811
  {
1812
  "epoch": 0.6,
1813
- "learning_rate": 1.8068892165500704e-05,
1814
- "loss": 2.6627,
1815
  "step": 1505
1816
  },
1817
  {
1818
  "epoch": 0.61,
1819
- "learning_rate": 1.8056472978499114e-05,
1820
- "loss": 2.6635,
1821
  "step": 1510
1822
  },
1823
  {
1824
  "epoch": 0.61,
1825
- "learning_rate": 1.804401828162268e-05,
1826
- "loss": 2.6679,
1827
  "step": 1515
1828
  },
1829
  {
1830
  "epoch": 0.61,
1831
- "learning_rate": 1.803152812976698e-05,
1832
- "loss": 2.6539,
1833
  "step": 1520
1834
  },
1835
  {
1836
  "epoch": 0.61,
1837
- "learning_rate": 1.801900257798386e-05,
1838
- "loss": 2.7683,
1839
  "step": 1525
1840
  },
1841
  {
1842
  "epoch": 0.61,
1843
- "learning_rate": 1.8006441681481194e-05,
1844
- "loss": 2.7179,
1845
  "step": 1530
1846
  },
1847
  {
1848
  "epoch": 0.62,
1849
- "learning_rate": 1.7993845495622654e-05,
1850
- "loss": 2.6437,
1851
  "step": 1535
1852
  },
1853
  {
1854
  "epoch": 0.62,
1855
- "learning_rate": 1.798121407592744e-05,
1856
- "loss": 2.5448,
1857
  "step": 1540
1858
  },
1859
  {
1860
  "epoch": 0.62,
1861
- "learning_rate": 1.796854747807006e-05,
1862
- "loss": 2.6761,
1863
  "step": 1545
1864
  },
1865
  {
1866
  "epoch": 0.62,
1867
- "learning_rate": 1.795584575788006e-05,
1868
- "loss": 2.6266,
1869
  "step": 1550
1870
  },
1871
  {
1872
  "epoch": 0.62,
1873
- "learning_rate": 1.7943108971341815e-05,
1874
- "loss": 2.6264,
1875
  "step": 1555
1876
  },
1877
  {
1878
  "epoch": 0.63,
1879
- "learning_rate": 1.7930337174594234e-05,
1880
- "loss": 2.6469,
1881
  "step": 1560
1882
  },
1883
  {
1884
  "epoch": 0.63,
1885
- "learning_rate": 1.791753042393056e-05,
1886
- "loss": 2.8521,
1887
  "step": 1565
1888
  },
1889
  {
1890
  "epoch": 0.63,
1891
- "learning_rate": 1.7904688775798075e-05,
1892
- "loss": 2.7343,
1893
  "step": 1570
1894
  },
1895
  {
1896
  "epoch": 0.63,
1897
- "learning_rate": 1.7891812286797902e-05,
1898
- "loss": 2.7565,
1899
  "step": 1575
1900
  },
1901
  {
1902
  "epoch": 0.63,
1903
- "learning_rate": 1.787890101368471e-05,
1904
- "loss": 2.701,
1905
  "step": 1580
1906
  },
1907
  {
1908
  "epoch": 0.64,
1909
- "learning_rate": 1.7865955013366493e-05,
1910
- "loss": 2.6739,
1911
  "step": 1585
1912
  },
1913
  {
1914
  "epoch": 0.64,
1915
- "learning_rate": 1.7852974342904303e-05,
1916
- "loss": 2.6679,
1917
  "step": 1590
1918
  },
1919
  {
1920
  "epoch": 0.64,
1921
- "learning_rate": 1.7839959059512016e-05,
1922
- "loss": 2.6126,
1923
  "step": 1595
1924
  },
1925
  {
1926
  "epoch": 0.64,
1927
- "learning_rate": 1.782690922055605e-05,
1928
- "loss": 2.6504,
1929
  "step": 1600
1930
  }
1931
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 4.99999449047447e-05,
14
+ "loss": 2.7413,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.0,
19
+ "learning_rate": 4.9999779619221645e-05,
20
+ "loss": 2.6684,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
+ "learning_rate": 4.999950414415935e-05,
26
+ "loss": 2.6511,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.01,
31
+ "learning_rate": 4.999911848077199e-05,
32
+ "loss": 2.72,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.01,
37
+ "learning_rate": 4.999862263075944e-05,
38
+ "loss": 2.7222,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 4.999801659630722e-05,
44
+ "loss": 2.7479,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.01,
49
+ "learning_rate": 4.999730038008649e-05,
50
+ "loss": 2.6063,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.02,
55
+ "learning_rate": 4.9996473985254055e-05,
56
+ "loss": 2.576,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.02,
61
+ "learning_rate": 4.999553741545237e-05,
62
+ "loss": 2.579,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.02,
67
+ "learning_rate": 4.999449067480945e-05,
68
+ "loss": 2.5114,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.02,
73
+ "learning_rate": 4.9993333767938954e-05,
74
+ "loss": 2.6174,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.02,
79
+ "learning_rate": 4.9992066699940085e-05,
80
+ "loss": 2.6581,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.03,
85
+ "learning_rate": 4.9990689476397586e-05,
86
+ "loss": 2.586,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.03,
91
+ "learning_rate": 4.998920210338175e-05,
92
+ "loss": 2.4642,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.03,
97
+ "learning_rate": 4.9987604587448343e-05,
98
+ "loss": 2.6262,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.03,
103
+ "learning_rate": 4.998589693563861e-05,
104
+ "loss": 2.5678,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.03,
109
+ "learning_rate": 4.998407915547924e-05,
110
+ "loss": 2.5597,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.04,
115
+ "learning_rate": 4.9982151254982304e-05,
116
+ "loss": 2.6578,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.04,
121
+ "learning_rate": 4.998011324264527e-05,
122
+ "loss": 2.4825,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.04,
127
+ "learning_rate": 4.997796512745091e-05,
128
+ "loss": 2.6197,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.04,
133
+ "learning_rate": 4.997570691886732e-05,
134
+ "loss": 2.4921,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.04,
139
+ "learning_rate": 4.99733386268478e-05,
140
+ "loss": 2.5126,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.05,
145
+ "learning_rate": 4.99708602618309e-05,
146
+ "loss": 2.584,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.05,
151
+ "learning_rate": 4.9968271834740305e-05,
152
+ "loss": 2.4991,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.05,
157
+ "learning_rate": 4.996557335698482e-05,
158
+ "loss": 2.5242,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.05,
163
+ "learning_rate": 4.996276484045832e-05,
164
+ "loss": 2.4506,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.05,
169
+ "learning_rate": 4.995984629753967e-05,
170
+ "loss": 2.5479,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.06,
175
+ "learning_rate": 4.9956817741092696e-05,
176
+ "loss": 2.5316,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.06,
181
+ "learning_rate": 4.995367918446613e-05,
182
+ "loss": 2.6053,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.06,
187
+ "learning_rate": 4.995043064149354e-05,
188
+ "loss": 2.4533,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.06,
193
+ "learning_rate": 4.9947072126493276e-05,
194
+ "loss": 2.5279,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.06,
199
+ "learning_rate": 4.994360365426839e-05,
200
+ "loss": 2.4994,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.07,
205
+ "learning_rate": 4.994002524010659e-05,
206
+ "loss": 2.4675,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.07,
211
+ "learning_rate": 4.9936336899780166e-05,
212
+ "loss": 2.5233,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.07,
217
+ "learning_rate": 4.993253864954592e-05,
218
+ "loss": 2.4974,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.07,
223
+ "learning_rate": 4.992863050614511e-05,
224
+ "loss": 2.6295,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.07,
229
+ "learning_rate": 4.9924612486803346e-05,
230
+ "loss": 2.4744,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.08,
235
+ "learning_rate": 4.992048460923052e-05,
236
+ "loss": 2.5248,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.08,
241
+ "learning_rate": 4.991624689162076e-05,
242
+ "loss": 2.5727,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.08,
247
+ "learning_rate": 4.991189935265231e-05,
248
+ "loss": 2.4845,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.08,
253
+ "learning_rate": 4.990744201148748e-05,
254
+ "loss": 2.4544,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.08,
259
+ "learning_rate": 4.990287488777253e-05,
260
+ "loss": 2.4383,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.09,
265
+ "learning_rate": 4.989819800163761e-05,
266
+ "loss": 2.5263,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.09,
271
+ "learning_rate": 4.989341137369666e-05,
272
+ "loss": 2.5219,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.09,
277
+ "learning_rate": 4.9888515025047316e-05,
278
+ "loss": 2.4512,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.09,
283
+ "learning_rate": 4.988350897727083e-05,
284
+ "loss": 2.491,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.09,
289
+ "learning_rate": 4.9878393252431953e-05,
290
+ "loss": 2.5404,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.1,
295
+ "learning_rate": 4.987316787307888e-05,
296
+ "loss": 2.4777,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.1,
301
+ "learning_rate": 4.9867832862243055e-05,
302
+ "loss": 2.4498,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.1,
307
+ "learning_rate": 4.986238824343923e-05,
308
+ "loss": 2.4174,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.1,
313
+ "learning_rate": 4.985683404066519e-05,
314
+ "loss": 2.4589,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.1,
319
+ "learning_rate": 4.985117027840175e-05,
320
+ "loss": 2.5029,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.11,
325
+ "learning_rate": 4.984539698161264e-05,
326
+ "loss": 2.4376,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.11,
331
+ "learning_rate": 4.983951417574436e-05,
332
+ "loss": 2.526,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.11,
337
+ "learning_rate": 4.983352188672608e-05,
338
+ "loss": 2.5214,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.11,
343
+ "learning_rate": 4.982742014096952e-05,
344
+ "loss": 2.5188,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.11,
349
+ "learning_rate": 4.982120896536888e-05,
350
+ "loss": 2.4175,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.12,
355
+ "learning_rate": 4.981488838730066e-05,
356
+ "loss": 2.5346,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.12,
361
+ "learning_rate": 4.980845843462357e-05,
362
+ "loss": 2.496,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.12,
367
+ "learning_rate": 4.98019191356784e-05,
368
+ "loss": 2.477,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.12,
373
+ "learning_rate": 4.9795270519287886e-05,
374
+ "loss": 2.4205,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.12,
379
+ "learning_rate": 4.9788512614756624e-05,
380
+ "loss": 2.4621,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.13,
385
+ "learning_rate": 4.9781645451870875e-05,
386
+ "loss": 2.5098,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.13,
391
+ "learning_rate": 4.9774669060898496e-05,
392
+ "loss": 2.4847,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.13,
397
+ "learning_rate": 4.976758347258877e-05,
398
+ "loss": 2.4873,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.13,
403
+ "learning_rate": 4.976038871817228e-05,
404
+ "loss": 2.4462,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.13,
409
+ "learning_rate": 4.9753084829360776e-05,
410
+ "loss": 2.4994,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.14,
415
+ "learning_rate": 4.974567183834702e-05,
416
+ "loss": 2.4994,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.14,
421
+ "learning_rate": 4.9738149777804665e-05,
422
+ "loss": 2.4584,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.14,
427
+ "learning_rate": 4.97305186808881e-05,
428
+ "loss": 2.4294,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.14,
433
+ "learning_rate": 4.9722778581232305e-05,
434
+ "loss": 2.4499,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.14,
439
+ "learning_rate": 4.9714929512952704e-05,
440
+ "loss": 2.5851,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.15,
445
+ "learning_rate": 4.9706971510645e-05,
446
+ "loss": 2.4746,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.15,
451
+ "learning_rate": 4.969890460938505e-05,
452
+ "loss": 2.4184,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.15,
457
+ "learning_rate": 4.9690728844728704e-05,
458
+ "loss": 2.4716,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.15,
463
+ "learning_rate": 4.968244425271161e-05,
464
+ "loss": 2.4477,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.15,
469
+ "learning_rate": 4.9674050869849124e-05,
470
+ "loss": 2.4563,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.16,
475
+ "learning_rate": 4.966554873313608e-05,
476
+ "loss": 2.3708,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.16,
481
+ "learning_rate": 4.9656937880046676e-05,
482
+ "loss": 2.4351,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.16,
487
+ "learning_rate": 4.9648218348534284e-05,
488
+ "loss": 2.3933,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.16,
493
+ "learning_rate": 4.963939017703128e-05,
494
+ "loss": 2.5871,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.16,
499
+ "learning_rate": 4.9630453404448905e-05,
500
+ "loss": 2.3965,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.17,
505
+ "learning_rate": 4.962140807017705e-05,
506
+ "loss": 2.372,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.17,
511
+ "learning_rate": 4.961225421408412e-05,
512
+ "loss": 2.4847,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.17,
517
+ "learning_rate": 4.960299187651684e-05,
518
+ "loss": 2.4046,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.17,
523
+ "learning_rate": 4.959362109830007e-05,
524
+ "loss": 2.3912,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.17,
529
+ "learning_rate": 4.9584141920736656e-05,
530
+ "loss": 2.4732,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.18,
535
+ "learning_rate": 4.957455438560721e-05,
536
+ "loss": 2.4854,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.18,
541
+ "learning_rate": 4.956485853516993e-05,
542
+ "loss": 2.3736,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.18,
547
+ "learning_rate": 4.9555054412160476e-05,
548
+ "loss": 2.4222,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.18,
553
+ "learning_rate": 4.9545142059791686e-05,
554
+ "loss": 2.4498,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.18,
559
+ "learning_rate": 4.9535121521753434e-05,
560
+ "loss": 2.4172,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.19,
565
+ "learning_rate": 4.952499284221247e-05,
566
+ "loss": 2.4742,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.19,
571
+ "learning_rate": 4.951475606581215e-05,
572
+ "loss": 2.4008,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.19,
577
+ "learning_rate": 4.950441123767231e-05,
578
+ "loss": 2.4278,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.19,
583
+ "learning_rate": 4.949395840338903e-05,
584
+ "loss": 2.5041,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.19,
589
+ "learning_rate": 4.948339760903442e-05,
590
+ "loss": 2.4088,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.2,
595
+ "learning_rate": 4.947272890115647e-05,
596
+ "loss": 2.466,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.2,
601
+ "learning_rate": 4.946195232677878e-05,
602
+ "loss": 2.4404,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.2,
607
+ "learning_rate": 4.9451067933400406e-05,
608
+ "loss": 2.3658,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 0.2,
613
+ "learning_rate": 4.9440075768995625e-05,
614
+ "loss": 2.4247,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 0.2,
619
+ "learning_rate": 4.942897588201372e-05,
620
+ "loss": 2.5082,
621
  "step": 510
622
  },
623
  {
624
  "epoch": 0.21,
625
+ "learning_rate": 4.941776832137879e-05,
626
+ "loss": 2.5545,
627
  "step": 515
628
  },
629
  {
630
  "epoch": 0.21,
631
+ "learning_rate": 4.940645313648949e-05,
632
+ "loss": 2.4665,
633
  "step": 520
634
  },
635
  {
636
  "epoch": 0.21,
637
+ "learning_rate": 4.939503037721888e-05,
638
+ "loss": 2.4195,
639
  "step": 525
640
  },
641
  {
642
  "epoch": 0.21,
643
+ "learning_rate": 4.938350009391413e-05,
644
+ "loss": 2.4908,
645
  "step": 530
646
  },
647
  {
648
  "epoch": 0.21,
649
+ "learning_rate": 4.937186233739637e-05,
650
+ "loss": 2.4093,
651
  "step": 535
652
  },
653
  {
654
  "epoch": 0.22,
655
+ "learning_rate": 4.93601171589604e-05,
656
+ "loss": 2.44,
657
  "step": 540
658
  },
659
  {
660
  "epoch": 0.22,
661
+ "learning_rate": 4.9348264610374494e-05,
662
+ "loss": 2.4989,
663
  "step": 545
664
  },
665
  {
666
  "epoch": 0.22,
667
+ "learning_rate": 4.9336304743880226e-05,
668
+ "loss": 2.3786,
669
  "step": 550
670
  },
671
  {
672
  "epoch": 0.22,
673
+ "learning_rate": 4.932423761219211e-05,
674
+ "loss": 2.4305,
675
  "step": 555
676
  },
677
  {
678
  "epoch": 0.22,
679
+ "learning_rate": 4.931206326849751e-05,
680
+ "loss": 2.4012,
681
  "step": 560
682
  },
683
  {
684
  "epoch": 0.23,
685
+ "learning_rate": 4.92997817664563e-05,
686
+ "loss": 2.4263,
687
  "step": 565
688
  },
689
  {
690
  "epoch": 0.23,
691
+ "learning_rate": 4.9287393160200676e-05,
692
+ "loss": 2.4749,
693
  "step": 570
694
  },
695
  {
696
  "epoch": 0.23,
697
+ "learning_rate": 4.927489750433492e-05,
698
+ "loss": 2.5083,
699
  "step": 575
700
  },
701
  {
702
  "epoch": 0.23,
703
+ "learning_rate": 4.926229485393513e-05,
704
+ "loss": 2.5228,
705
  "step": 580
706
  },
707
  {
708
  "epoch": 0.23,
709
+ "learning_rate": 4.924958526454902e-05,
710
+ "loss": 2.4623,
711
  "step": 585
712
  },
713
  {
714
  "epoch": 0.24,
715
+ "learning_rate": 4.923676879219562e-05,
716
+ "loss": 2.4199,
717
  "step": 590
718
  },
719
  {
720
  "epoch": 0.24,
721
+ "learning_rate": 4.9223845493365085e-05,
722
+ "loss": 2.2906,
723
  "step": 595
724
  },
725
  {
726
  "epoch": 0.24,
727
+ "learning_rate": 4.9210815425018406e-05,
728
+ "loss": 2.5539,
729
  "step": 600
730
  },
731
  {
732
  "epoch": 0.24,
733
+ "learning_rate": 4.919767864458718e-05,
734
+ "loss": 2.3456,
735
  "step": 605
736
  },
737
  {
738
  "epoch": 0.24,
739
+ "learning_rate": 4.9184435209973354e-05,
740
+ "loss": 2.4328,
741
  "step": 610
742
  },
743
  {
744
  "epoch": 0.25,
745
+ "learning_rate": 4.917108517954895e-05,
746
+ "loss": 2.3412,
747
  "step": 615
748
  },
749
  {
750
  "epoch": 0.25,
751
+ "learning_rate": 4.9157628612155836e-05,
752
+ "loss": 2.604,
753
  "step": 620
754
  },
755
  {
756
  "epoch": 0.25,
757
+ "learning_rate": 4.9144065567105465e-05,
758
+ "loss": 2.4007,
759
  "step": 625
760
  },
761
  {
762
  "epoch": 0.25,
763
+ "learning_rate": 4.913039610417859e-05,
764
+ "loss": 2.437,
765
  "step": 630
766
  },
767
  {
768
  "epoch": 0.25,
769
+ "learning_rate": 4.9116620283624996e-05,
770
+ "loss": 2.5004,
771
  "step": 635
772
  },
773
  {
774
  "epoch": 0.26,
775
+ "learning_rate": 4.91027381661633e-05,
776
+ "loss": 2.5045,
777
  "step": 640
778
  },
779
  {
780
  "epoch": 0.26,
781
+ "learning_rate": 4.908874981298057e-05,
782
+ "loss": 2.3794,
783
  "step": 645
784
  },
785
  {
786
  "epoch": 0.26,
787
+ "learning_rate": 4.9074655285732196e-05,
788
+ "loss": 2.4621,
789
  "step": 650
790
  },
791
  {
792
  "epoch": 0.26,
793
+ "learning_rate": 4.906045464654148e-05,
794
+ "loss": 2.549,
795
  "step": 655
796
  },
797
  {
798
  "epoch": 0.26,
799
+ "learning_rate": 4.9046147957999454e-05,
800
+ "loss": 2.4567,
801
  "step": 660
802
  },
803
  {
804
  "epoch": 0.27,
805
+ "learning_rate": 4.9031735283164574e-05,
806
+ "loss": 2.3243,
807
  "step": 665
808
  },
809
  {
810
  "epoch": 0.27,
811
+ "learning_rate": 4.901721668556244e-05,
812
+ "loss": 2.4758,
813
  "step": 670
814
  },
815
  {
816
  "epoch": 0.27,
817
+ "learning_rate": 4.9002592229185515e-05,
818
+ "loss": 2.5083,
819
  "step": 675
820
  },
821
  {
822
  "epoch": 0.27,
823
+ "learning_rate": 4.898786197849285e-05,
824
+ "loss": 2.4428,
825
  "step": 680
826
  },
827
  {
828
  "epoch": 0.27,
829
+ "learning_rate": 4.897302599840981e-05,
830
+ "loss": 2.4403,
831
  "step": 685
832
  },
833
  {
834
  "epoch": 0.28,
835
+ "learning_rate": 4.895808435432776e-05,
836
+ "loss": 2.4401,
837
  "step": 690
838
  },
839
  {
840
  "epoch": 0.28,
841
+ "learning_rate": 4.8943037112103784e-05,
842
+ "loss": 2.3789,
843
  "step": 695
844
  },
845
  {
846
  "epoch": 0.28,
847
+ "learning_rate": 4.892788433806042e-05,
848
+ "loss": 2.3945,
849
  "step": 700
850
  },
851
  {
852
  "epoch": 0.28,
853
+ "learning_rate": 4.8912626098985355e-05,
854
+ "loss": 2.4758,
855
  "step": 705
856
  },
857
  {
858
  "epoch": 0.28,
859
+ "learning_rate": 4.88972624621311e-05,
860
+ "loss": 2.4646,
861
  "step": 710
862
  },
863
  {
864
  "epoch": 0.29,
865
+ "learning_rate": 4.888179349521475e-05,
866
+ "loss": 2.4225,
867
  "step": 715
868
  },
869
  {
870
  "epoch": 0.29,
871
+ "learning_rate": 4.886621926641762e-05,
872
+ "loss": 2.4074,
873
  "step": 720
874
  },
875
  {
876
  "epoch": 0.29,
877
+ "learning_rate": 4.8850539844385017e-05,
878
+ "loss": 2.4438,
879
  "step": 725
880
  },
881
  {
882
  "epoch": 0.29,
883
+ "learning_rate": 4.883475529822587e-05,
884
+ "loss": 2.3666,
885
  "step": 730
886
  },
887
  {
888
  "epoch": 0.29,
889
+ "learning_rate": 4.881886569751248e-05,
890
+ "loss": 2.3782,
891
  "step": 735
892
  },
893
  {
894
  "epoch": 0.3,
895
+ "learning_rate": 4.880287111228016e-05,
896
+ "loss": 2.4916,
897
  "step": 740
898
  },
899
  {
900
  "epoch": 0.3,
901
+ "learning_rate": 4.878677161302698e-05,
902
+ "loss": 2.3639,
903
  "step": 745
904
  },
905
  {
906
  "epoch": 0.3,
907
+ "learning_rate": 4.877056727071342e-05,
908
+ "loss": 2.4333,
909
  "step": 750
910
  },
911
  {
912
  "epoch": 0.3,
913
+ "learning_rate": 4.8754258156762075e-05,
914
+ "loss": 2.3907,
915
  "step": 755
916
  },
917
  {
918
  "epoch": 0.3,
919
+ "learning_rate": 4.8737844343057315e-05,
920
+ "loss": 2.433,
921
  "step": 760
922
  },
923
  {
924
  "epoch": 0.31,
925
+ "learning_rate": 4.8721325901945015e-05,
926
+ "loss": 2.3849,
927
  "step": 765
928
  },
929
  {
930
  "epoch": 0.31,
931
+ "learning_rate": 4.870470290623218e-05,
932
+ "loss": 2.419,
933
  "step": 770
934
  },
935
  {
936
  "epoch": 0.31,
937
+ "learning_rate": 4.8687975429186685e-05,
938
+ "loss": 2.3694,
939
  "step": 775
940
  },
941
  {
942
  "epoch": 0.31,
943
+ "learning_rate": 4.867114354453688e-05,
944
+ "loss": 2.4335,
945
  "step": 780
946
  },
947
  {
948
  "epoch": 0.31,
949
+ "learning_rate": 4.865420732647133e-05,
950
+ "loss": 2.4168,
951
  "step": 785
952
  },
953
  {
954
  "epoch": 0.32,
955
+ "learning_rate": 4.863716684963846e-05,
956
+ "loss": 2.419,
957
  "step": 790
958
  },
959
  {
960
  "epoch": 0.32,
961
+ "learning_rate": 4.862002218914622e-05,
962
+ "loss": 2.335,
963
  "step": 795
964
  },
965
  {
966
  "epoch": 0.32,
967
+ "learning_rate": 4.860277342056176e-05,
968
+ "loss": 2.4059,
969
  "step": 800
970
  },
971
  {
972
  "epoch": 0.32,
973
+ "learning_rate": 4.858542061991112e-05,
974
+ "loss": 2.3423,
975
  "step": 805
976
  },
977
  {
978
  "epoch": 0.32,
979
+ "learning_rate": 4.856796386367884e-05,
980
+ "loss": 2.4135,
981
  "step": 810
982
  },
983
  {
984
  "epoch": 0.33,
985
+ "learning_rate": 4.855040322880769e-05,
986
+ "loss": 2.3652,
987
  "step": 815
988
  },
989
  {
990
  "epoch": 0.33,
991
+ "learning_rate": 4.853273879269827e-05,
992
+ "loss": 2.3511,
993
  "step": 820
994
  },
995
  {
996
  "epoch": 0.33,
997
+ "learning_rate": 4.8514970633208726e-05,
998
+ "loss": 2.413,
999
  "step": 825
1000
  },
1001
  {
1002
  "epoch": 0.33,
1003
+ "learning_rate": 4.849709882865435e-05,
1004
+ "loss": 2.4445,
1005
  "step": 830
1006
  },
1007
  {
1008
  "epoch": 0.33,
1009
+ "learning_rate": 4.847912345780727e-05,
1010
+ "loss": 2.4221,
1011
  "step": 835
1012
  },
1013
  {
1014
  "epoch": 0.34,
1015
+ "learning_rate": 4.846104459989611e-05,
1016
+ "loss": 2.4992,
1017
  "step": 840
1018
  },
1019
  {
1020
  "epoch": 0.34,
1021
+ "learning_rate": 4.84428623346056e-05,
1022
+ "loss": 2.3923,
1023
  "step": 845
1024
  },
1025
  {
1026
  "epoch": 0.34,
1027
+ "learning_rate": 4.842457674207627e-05,
1028
+ "loss": 2.3666,
1029
  "step": 850
1030
  },
1031
  {
1032
  "epoch": 0.34,
1033
+ "learning_rate": 4.8406187902904076e-05,
1034
+ "loss": 2.4329,
1035
  "step": 855
1036
  },
1037
  {
1038
  "epoch": 0.34,
1039
+ "learning_rate": 4.8387695898140026e-05,
1040
+ "loss": 2.3429,
1041
  "step": 860
1042
  },
1043
  {
1044
  "epoch": 0.35,
1045
+ "learning_rate": 4.836910080928987e-05,
1046
+ "loss": 2.4428,
1047
  "step": 865
1048
  },
1049
  {
1050
  "epoch": 0.35,
1051
+ "learning_rate": 4.83504027183137e-05,
1052
+ "loss": 2.3842,
1053
  "step": 870
1054
  },
1055
  {
1056
  "epoch": 0.35,
1057
+ "learning_rate": 4.833160170762561e-05,
1058
+ "loss": 2.4064,
1059
  "step": 875
1060
  },
1061
  {
1062
  "epoch": 0.35,
1063
+ "learning_rate": 4.8312697860093295e-05,
1064
+ "loss": 2.3705,
1065
  "step": 880
1066
  },
1067
  {
1068
  "epoch": 0.35,
1069
+ "learning_rate": 4.829369125903776e-05,
1070
+ "loss": 2.4151,
1071
  "step": 885
1072
  },
1073
  {
1074
  "epoch": 0.36,
1075
+ "learning_rate": 4.8274581988232894e-05,
1076
+ "loss": 2.4219,
1077
  "step": 890
1078
  },
1079
  {
1080
  "epoch": 0.36,
1081
+ "learning_rate": 4.825537013190509e-05,
1082
+ "loss": 2.4042,
1083
  "step": 895
1084
  },
1085
  {
1086
  "epoch": 0.36,
1087
+ "learning_rate": 4.823605577473293e-05,
1088
+ "loss": 2.4509,
1089
  "step": 900
1090
  },
1091
  {
1092
  "epoch": 0.36,
1093
+ "learning_rate": 4.8216639001846764e-05,
1094
+ "loss": 2.4255,
1095
  "step": 905
1096
  },
1097
  {
1098
  "epoch": 0.36,
1099
+ "learning_rate": 4.8197119898828367e-05,
1100
+ "loss": 2.4928,
1101
  "step": 910
1102
  },
1103
  {
1104
  "epoch": 0.37,
1105
+ "learning_rate": 4.817749855171052e-05,
1106
+ "loss": 2.4044,
1107
  "step": 915
1108
  },
1109
  {
1110
  "epoch": 0.37,
1111
+ "learning_rate": 4.8157775046976684e-05,
1112
+ "loss": 2.4085,
1113
  "step": 920
1114
  },
1115
  {
1116
  "epoch": 0.37,
1117
+ "learning_rate": 4.813794947156058e-05,
1118
+ "loss": 2.3941,
1119
  "step": 925
1120
  },
1121
  {
1122
  "epoch": 0.37,
1123
+ "learning_rate": 4.8118021912845815e-05,
1124
+ "loss": 2.3493,
1125
  "step": 930
1126
  },
1127
  {
1128
  "epoch": 0.37,
1129
+ "learning_rate": 4.8097992458665506e-05,
1130
+ "loss": 2.3367,
1131
  "step": 935
1132
  },
1133
  {
1134
  "epoch": 0.38,
1135
+ "learning_rate": 4.807786119730189e-05,
1136
+ "loss": 2.3865,
1137
  "step": 940
1138
  },
1139
  {
1140
  "epoch": 0.38,
1141
+ "learning_rate": 4.8057628217485916e-05,
1142
+ "loss": 2.3885,
1143
  "step": 945
1144
  },
1145
  {
1146
  "epoch": 0.38,
1147
+ "learning_rate": 4.803729360839688e-05,
1148
+ "loss": 2.4296,
1149
  "step": 950
1150
  },
1151
  {
1152
  "epoch": 0.38,
1153
+ "learning_rate": 4.801685745966203e-05,
1154
+ "loss": 2.3856,
1155
  "step": 955
1156
  },
1157
  {
1158
  "epoch": 0.38,
1159
+ "learning_rate": 4.799631986135615e-05,
1160
+ "loss": 2.3631,
1161
  "step": 960
1162
  },
1163
  {
1164
  "epoch": 0.39,
1165
+ "learning_rate": 4.797568090400117e-05,
1166
+ "loss": 2.3882,
1167
  "step": 965
1168
  },
1169
  {
1170
  "epoch": 0.39,
1171
+ "learning_rate": 4.7954940678565785e-05,
1172
+ "loss": 2.3396,
1173
  "step": 970
1174
  },
1175
  {
1176
  "epoch": 0.39,
1177
+ "learning_rate": 4.793409927646504e-05,
1178
+ "loss": 2.3548,
1179
  "step": 975
1180
  },
1181
  {
1182
  "epoch": 0.39,
1183
+ "learning_rate": 4.791315678955991e-05,
1184
+ "loss": 2.4576,
1185
  "step": 980
1186
  },
1187
  {
1188
  "epoch": 0.39,
1189
+ "learning_rate": 4.789211331015695e-05,
1190
+ "loss": 2.4523,
1191
  "step": 985
1192
  },
1193
  {
1194
  "epoch": 0.4,
1195
+ "learning_rate": 4.787096893100781e-05,
1196
+ "loss": 2.2542,
1197
  "step": 990
1198
  },
1199
  {
1200
  "epoch": 0.4,
1201
+ "learning_rate": 4.784972374530889e-05,
1202
+ "loss": 2.4385,
1203
  "step": 995
1204
  },
1205
  {
1206
  "epoch": 0.4,
1207
+ "learning_rate": 4.7828377846700925e-05,
1208
+ "loss": 2.3266,
1209
  "step": 1000
1210
  },
1211
  {
1212
  "epoch": 0.4,
1213
+ "learning_rate": 4.780693132926851e-05,
1214
+ "loss": 2.3301,
1215
  "step": 1005
1216
  },
1217
  {
1218
  "epoch": 0.4,
1219
+ "learning_rate": 4.7785384287539755e-05,
1220
+ "loss": 2.3554,
1221
  "step": 1010
1222
  },
1223
  {
1224
  "epoch": 0.41,
1225
+ "learning_rate": 4.776373681648586e-05,
1226
+ "loss": 2.3838,
1227
  "step": 1015
1228
  },
1229
  {
1230
  "epoch": 0.41,
1231
+ "learning_rate": 4.7741989011520645e-05,
1232
+ "loss": 2.3361,
1233
  "step": 1020
1234
  },
1235
  {
1236
  "epoch": 0.41,
1237
+ "learning_rate": 4.772014096850018e-05,
1238
+ "loss": 2.2864,
1239
  "step": 1025
1240
  },
1241
  {
1242
  "epoch": 0.41,
1243
+ "learning_rate": 4.769819278372235e-05,
1244
+ "loss": 2.3955,
1245
  "step": 1030
1246
  },
1247
  {
1248
  "epoch": 0.41,
1249
+ "learning_rate": 4.7676144553926414e-05,
1250
+ "loss": 2.3802,
1251
  "step": 1035
1252
  },
1253
  {
1254
  "epoch": 0.42,
1255
+ "learning_rate": 4.765399637629262e-05,
1256
+ "loss": 2.2856,
1257
  "step": 1040
1258
  },
1259
  {
1260
  "epoch": 0.42,
1261
+ "learning_rate": 4.7631748348441705e-05,
1262
+ "loss": 2.3698,
1263
  "step": 1045
1264
  },
1265
  {
1266
  "epoch": 0.42,
1267
+ "learning_rate": 4.760940056843454e-05,
1268
+ "loss": 2.3305,
1269
  "step": 1050
1270
  },
1271
  {
1272
  "epoch": 0.42,
1273
+ "learning_rate": 4.758695313477166e-05,
1274
+ "loss": 2.4944,
1275
  "step": 1055
1276
  },
1277
  {
1278
  "epoch": 0.43,
1279
+ "learning_rate": 4.756440614639283e-05,
1280
+ "loss": 2.4622,
1281
  "step": 1060
1282
  },
1283
  {
1284
  "epoch": 0.43,
1285
+ "learning_rate": 4.754175970267661e-05,
1286
+ "loss": 2.3995,
1287
  "step": 1065
1288
  },
1289
  {
1290
  "epoch": 0.43,
1291
+ "learning_rate": 4.751901390343995e-05,
1292
+ "loss": 2.343,
1293
  "step": 1070
1294
  },
1295
  {
1296
  "epoch": 0.43,
1297
+ "learning_rate": 4.7496168848937674e-05,
1298
+ "loss": 2.4388,
1299
  "step": 1075
1300
  },
1301
  {
1302
  "epoch": 0.43,
1303
+ "learning_rate": 4.7473224639862116e-05,
1304
+ "loss": 2.3694,
1305
  "step": 1080
1306
  },
1307
  {
1308
  "epoch": 0.44,
1309
+ "learning_rate": 4.745018137734264e-05,
1310
+ "loss": 2.3485,
1311
  "step": 1085
1312
  },
1313
  {
1314
  "epoch": 0.44,
1315
+ "learning_rate": 4.742703916294521e-05,
1316
+ "loss": 2.46,
1317
  "step": 1090
1318
  },
1319
  {
1320
  "epoch": 0.44,
1321
+ "learning_rate": 4.740379809867193e-05,
1322
+ "loss": 2.3138,
1323
  "step": 1095
1324
  },
1325
  {
1326
  "epoch": 0.44,
1327
+ "learning_rate": 4.738045828696056e-05,
1328
+ "loss": 2.4117,
1329
  "step": 1100
1330
  },
1331
  {
1332
  "epoch": 0.44,
1333
+ "learning_rate": 4.735701983068416e-05,
1334
+ "loss": 2.3506,
1335
  "step": 1105
1336
  },
1337
  {
1338
  "epoch": 0.45,
1339
+ "learning_rate": 4.7333482833150525e-05,
1340
+ "loss": 2.3711,
1341
  "step": 1110
1342
  },
1343
  {
1344
  "epoch": 0.45,
1345
+ "learning_rate": 4.730984739810183e-05,
1346
+ "loss": 2.2639,
1347
  "step": 1115
1348
  },
1349
  {
1350
  "epoch": 0.45,
1351
+ "learning_rate": 4.728611362971408e-05,
1352
+ "loss": 2.3749,
1353
  "step": 1120
1354
  },
1355
  {
1356
  "epoch": 0.45,
1357
+ "learning_rate": 4.726228163259673e-05,
1358
+ "loss": 2.4556,
1359
  "step": 1125
1360
  },
1361
  {
1362
  "epoch": 0.45,
1363
+ "learning_rate": 4.7238351511792165e-05,
1364
+ "loss": 2.4233,
1365
  "step": 1130
1366
  },
1367
  {
1368
  "epoch": 0.46,
1369
+ "learning_rate": 4.721432337277529e-05,
1370
+ "loss": 2.3544,
1371
  "step": 1135
1372
  },
1373
  {
1374
  "epoch": 0.46,
1375
+ "learning_rate": 4.7190197321453014e-05,
1376
+ "loss": 2.3654,
1377
  "step": 1140
1378
  },
1379
  {
1380
  "epoch": 0.46,
1381
+ "learning_rate": 4.716597346416382e-05,
1382
+ "loss": 2.3356,
1383
  "step": 1145
1384
  },
1385
  {
1386
  "epoch": 0.46,
1387
+ "learning_rate": 4.7141651907677256e-05,
1388
+ "loss": 2.4522,
1389
  "step": 1150
1390
  },
1391
  {
1392
  "epoch": 0.46,
1393
+ "learning_rate": 4.7117232759193534e-05,
1394
+ "loss": 2.3104,
1395
  "step": 1155
1396
  },
1397
  {
1398
  "epoch": 0.47,
1399
+ "learning_rate": 4.709271612634298e-05,
1400
+ "loss": 2.4492,
1401
  "step": 1160
1402
  },
1403
  {
1404
  "epoch": 0.47,
1405
+ "learning_rate": 4.706810211718561e-05,
1406
+ "loss": 2.4437,
1407
  "step": 1165
1408
  },
1409
  {
1410
  "epoch": 0.47,
1411
+ "learning_rate": 4.7043390840210636e-05,
1412
+ "loss": 2.3099,
1413
  "step": 1170
1414
  },
1415
  {
1416
  "epoch": 0.47,
1417
+ "learning_rate": 4.701858240433597e-05,
1418
+ "loss": 2.351,
1419
  "step": 1175
1420
  },
1421
  {
1422
  "epoch": 0.47,
1423
+ "learning_rate": 4.6993676918907804e-05,
1424
+ "loss": 2.4756,
1425
  "step": 1180
1426
  },
1427
  {
1428
  "epoch": 0.48,
1429
+ "learning_rate": 4.696867449370005e-05,
1430
+ "loss": 2.418,
1431
  "step": 1185
1432
  },
1433
  {
1434
  "epoch": 0.48,
1435
+ "learning_rate": 4.6943575238913904e-05,
1436
+ "loss": 2.5026,
1437
  "step": 1190
1438
  },
1439
  {
1440
  "epoch": 0.48,
1441
+ "learning_rate": 4.691837926517737e-05,
1442
+ "loss": 2.4096,
1443
  "step": 1195
1444
  },
1445
  {
1446
  "epoch": 0.48,
1447
+ "learning_rate": 4.689308668354473e-05,
1448
+ "loss": 2.4101,
1449
  "step": 1200
1450
  },
1451
  {
1452
  "epoch": 0.48,
1453
+ "learning_rate": 4.686769760549607e-05,
1454
+ "loss": 2.4533,
1455
  "step": 1205
1456
  },
1457
  {
1458
  "epoch": 0.49,
1459
+ "learning_rate": 4.684221214293683e-05,
1460
+ "loss": 2.2988,
1461
  "step": 1210
1462
  },
1463
  {
1464
  "epoch": 0.49,
1465
+ "learning_rate": 4.681663040819724e-05,
1466
+ "loss": 2.3937,
1467
  "step": 1215
1468
  },
1469
  {
1470
  "epoch": 0.49,
1471
+ "learning_rate": 4.679095251403189e-05,
1472
+ "loss": 2.3954,
1473
  "step": 1220
1474
  },
1475
  {
1476
  "epoch": 0.49,
1477
+ "learning_rate": 4.676517857361917e-05,
1478
+ "loss": 2.3275,
1479
  "step": 1225
1480
  },
1481
  {
1482
  "epoch": 0.49,
1483
+ "learning_rate": 4.673930870056085e-05,
1484
+ "loss": 2.4748,
1485
  "step": 1230
1486
  },
1487
  {
1488
  "epoch": 0.5,
1489
+ "learning_rate": 4.67133430088815e-05,
1490
+ "loss": 2.2496,
1491
  "step": 1235
1492
  },
1493
  {
1494
  "epoch": 0.5,
1495
+ "learning_rate": 4.668728161302803e-05,
1496
+ "loss": 2.3226,
1497
  "step": 1240
1498
  },
1499
  {
1500
  "epoch": 0.5,
1501
+ "learning_rate": 4.666112462786919e-05,
1502
+ "loss": 2.4655,
1503
  "step": 1245
1504
  },
1505
  {
1506
  "epoch": 0.5,
1507
+ "learning_rate": 4.663487216869502e-05,
1508
+ "loss": 2.3751,
1509
  "step": 1250
1510
  },
1511
  {
1512
  "epoch": 0.5,
1513
+ "learning_rate": 4.660852435121642e-05,
1514
+ "loss": 2.4136,
1515
  "step": 1255
1516
  },
1517
  {
1518
  "epoch": 0.51,
1519
+ "learning_rate": 4.658208129156456e-05,
1520
+ "loss": 2.3612,
1521
  "step": 1260
1522
  },
1523
  {
1524
  "epoch": 0.51,
1525
+ "learning_rate": 4.655554310629041e-05,
1526
+ "loss": 2.3675,
1527
  "step": 1265
1528
  },
1529
  {
1530
  "epoch": 0.51,
1531
+ "learning_rate": 4.652890991236421e-05,
1532
+ "loss": 2.2831,
1533
  "step": 1270
1534
  },
1535
  {
1536
  "epoch": 0.51,
1537
+ "learning_rate": 4.650218182717498e-05,
1538
+ "loss": 2.3958,
1539
  "step": 1275
1540
  },
1541
  {
1542
  "epoch": 0.51,
1543
+ "learning_rate": 4.647535896852997e-05,
1544
+ "loss": 2.3236,
1545
  "step": 1280
1546
  },
1547
  {
1548
  "epoch": 0.52,
1549
+ "learning_rate": 4.6448441454654156e-05,
1550
+ "loss": 2.3904,
1551
  "step": 1285
1552
  },
1553
  {
1554
  "epoch": 0.52,
1555
+ "learning_rate": 4.642142940418973e-05,
1556
+ "loss": 2.3565,
1557
  "step": 1290
1558
  },
1559
  {
1560
  "epoch": 0.52,
1561
+ "learning_rate": 4.6394322936195556e-05,
1562
+ "loss": 2.5229,
1563
  "step": 1295
1564
  },
1565
  {
1566
  "epoch": 0.52,
1567
+ "learning_rate": 4.636712217014665e-05,
1568
+ "loss": 2.369,
1569
  "step": 1300
1570
  },
1571
  {
1572
  "epoch": 0.52,
1573
+ "learning_rate": 4.6339827225933665e-05,
1574
+ "loss": 2.3444,
1575
  "step": 1305
1576
  },
1577
  {
1578
  "epoch": 0.53,
1579
+ "learning_rate": 4.6312438223862356e-05,
1580
+ "loss": 2.3387,
1581
  "step": 1310
1582
  },
1583
  {
1584
  "epoch": 0.53,
1585
+ "learning_rate": 4.628495528465305e-05,
1586
+ "loss": 2.3589,
1587
  "step": 1315
1588
  },
1589
  {
1590
  "epoch": 0.53,
1591
+ "learning_rate": 4.625737852944011e-05,
1592
+ "loss": 2.4779,
1593
  "step": 1320
1594
  },
1595
  {
1596
  "epoch": 0.53,
1597
+ "learning_rate": 4.62297080797714e-05,
1598
+ "loss": 2.4398,
1599
  "step": 1325
1600
  },
1601
  {
1602
  "epoch": 0.53,
1603
+ "learning_rate": 4.620194405760777e-05,
1604
+ "loss": 2.383,
1605
  "step": 1330
1606
  },
1607
  {
1608
  "epoch": 0.54,
1609
+ "learning_rate": 4.6174086585322485e-05,
1610
+ "loss": 2.424,
1611
  "step": 1335
1612
  },
1613
  {
1614
  "epoch": 0.54,
1615
+ "learning_rate": 4.6146135785700705e-05,
1616
+ "loss": 2.4289,
1617
  "step": 1340
1618
  },
1619
  {
1620
  "epoch": 0.54,
1621
+ "learning_rate": 4.611809178193896e-05,
1622
+ "loss": 2.225,
1623
  "step": 1345
1624
  },
1625
  {
1626
  "epoch": 0.54,
1627
+ "learning_rate": 4.608995469764455e-05,
1628
+ "loss": 2.3643,
1629
  "step": 1350
1630
  },
1631
  {
1632
  "epoch": 0.54,
1633
+ "learning_rate": 4.606172465683508e-05,
1634
+ "loss": 2.4278,
1635
  "step": 1355
1636
  },
1637
  {
1638
  "epoch": 0.55,
1639
+ "learning_rate": 4.6033401783937844e-05,
1640
+ "loss": 2.4464,
1641
  "step": 1360
1642
  },
1643
  {
1644
  "epoch": 0.55,
1645
+ "learning_rate": 4.600498620378932e-05,
1646
+ "loss": 2.3639,
1647
  "step": 1365
1648
  },
1649
  {
1650
  "epoch": 0.55,
1651
+ "learning_rate": 4.597647804163461e-05,
1652
+ "loss": 2.3595,
1653
  "step": 1370
1654
  },
1655
  {
1656
  "epoch": 0.55,
1657
+ "learning_rate": 4.5947877423126864e-05,
1658
+ "loss": 2.3569,
1659
  "step": 1375
1660
  },
1661
  {
1662
  "epoch": 0.55,
1663
+ "learning_rate": 4.591918447432675e-05,
1664
+ "loss": 2.4167,
1665
  "step": 1380
1666
  },
1667
  {
1668
  "epoch": 0.56,
1669
+ "learning_rate": 4.589039932170188e-05,
1670
+ "loss": 2.3959,
1671
  "step": 1385
1672
  },
1673
  {
1674
  "epoch": 0.56,
1675
+ "learning_rate": 4.5861522092126313e-05,
1676
+ "loss": 2.3629,
1677
  "step": 1390
1678
  },
1679
  {
1680
  "epoch": 0.56,
1681
+ "learning_rate": 4.5832552912879894e-05,
1682
+ "loss": 2.3978,
1683
  "step": 1395
1684
  },
1685
  {
1686
  "epoch": 0.56,
1687
+ "learning_rate": 4.580349191164777e-05,
1688
+ "loss": 2.4372,
1689
  "step": 1400
1690
  },
1691
  {
1692
  "epoch": 0.56,
1693
+ "learning_rate": 4.5774339216519815e-05,
1694
+ "loss": 2.4422,
1695
  "step": 1405
1696
  },
1697
  {
1698
  "epoch": 0.57,
1699
+ "learning_rate": 4.574509495599003e-05,
1700
+ "loss": 2.3786,
1701
  "step": 1410
1702
  },
1703
  {
1704
  "epoch": 0.57,
1705
+ "learning_rate": 4.5715759258956025e-05,
1706
+ "loss": 2.3467,
1707
  "step": 1415
1708
  },
1709
  {
1710
  "epoch": 0.57,
1711
+ "learning_rate": 4.568633225471841e-05,
1712
+ "loss": 2.371,
1713
  "step": 1420
1714
  },
1715
  {
1716
  "epoch": 0.57,
1717
+ "learning_rate": 4.565681407298026e-05,
1718
+ "loss": 2.3776,
1719
  "step": 1425
1720
  },
1721
  {
1722
  "epoch": 0.57,
1723
+ "learning_rate": 4.5627204843846504e-05,
1724
+ "loss": 2.3036,
1725
  "step": 1430
1726
  },
1727
  {
1728
  "epoch": 0.58,
1729
+ "learning_rate": 4.559750469782339e-05,
1730
+ "loss": 2.446,
1731
  "step": 1435
1732
  },
1733
  {
1734
  "epoch": 0.58,
1735
+ "learning_rate": 4.5567713765817886e-05,
1736
+ "loss": 2.3521,
1737
  "step": 1440
1738
  },
1739
  {
1740
  "epoch": 0.58,
1741
+ "learning_rate": 4.553783217913712e-05,
1742
+ "loss": 2.3322,
1743
  "step": 1445
1744
  },
1745
  {
1746
  "epoch": 0.58,
1747
+ "learning_rate": 4.550786006948777e-05,
1748
+ "loss": 2.3966,
1749
  "step": 1450
1750
  },
1751
  {
1752
  "epoch": 0.58,
1753
+ "learning_rate": 4.547779756897554e-05,
1754
+ "loss": 2.35,
1755
  "step": 1455
1756
  },
1757
  {
1758
  "epoch": 0.59,
1759
+ "learning_rate": 4.544764481010451e-05,
1760
+ "loss": 2.3739,
1761
  "step": 1460
1762
  },
1763
  {
1764
  "epoch": 0.59,
1765
+ "learning_rate": 4.541740192577658e-05,
1766
+ "loss": 2.4817,
1767
  "step": 1465
1768
  },
1769
  {
1770
  "epoch": 0.59,
1771
+ "learning_rate": 4.538706904929092e-05,
1772
+ "loss": 2.3857,
1773
  "step": 1470
1774
  },
1775
  {
1776
  "epoch": 0.59,
1777
+ "learning_rate": 4.5356646314343344e-05,
1778
+ "loss": 2.3453,
1779
  "step": 1475
1780
  },
1781
  {
1782
  "epoch": 0.59,
1783
+ "learning_rate": 4.532613385502571e-05,
1784
+ "loss": 2.2916,
1785
  "step": 1480
1786
  },
1787
  {
1788
  "epoch": 0.6,
1789
+ "learning_rate": 4.5295531805825355e-05,
1790
+ "loss": 2.338,
1791
  "step": 1485
1792
  },
1793
  {
1794
  "epoch": 0.6,
1795
+ "learning_rate": 4.526484030162449e-05,
1796
+ "loss": 2.2829,
1797
  "step": 1490
1798
  },
1799
  {
1800
  "epoch": 0.6,
1801
+ "learning_rate": 4.5234059477699635e-05,
1802
+ "loss": 2.3761,
1803
  "step": 1495
1804
  },
1805
  {
1806
  "epoch": 0.6,
1807
+ "learning_rate": 4.520318946972097e-05,
1808
+ "loss": 2.4692,
1809
  "step": 1500
1810
  },
1811
  {
1812
  "epoch": 0.6,
1813
+ "learning_rate": 4.517223041375176e-05,
1814
+ "loss": 2.3689,
1815
  "step": 1505
1816
  },
1817
  {
1818
  "epoch": 0.61,
1819
+ "learning_rate": 4.514118244624779e-05,
1820
+ "loss": 2.4082,
1821
  "step": 1510
1822
  },
1823
  {
1824
  "epoch": 0.61,
1825
+ "learning_rate": 4.51100457040567e-05,
1826
+ "loss": 2.4152,
1827
  "step": 1515
1828
  },
1829
  {
1830
  "epoch": 0.61,
1831
+ "learning_rate": 4.507882032441745e-05,
1832
+ "loss": 2.389,
1833
  "step": 1520
1834
  },
1835
  {
1836
  "epoch": 0.61,
1837
+ "learning_rate": 4.504750644495965e-05,
1838
+ "loss": 2.4914,
1839
  "step": 1525
1840
  },
1841
  {
1842
  "epoch": 0.61,
1843
+ "learning_rate": 4.501610420370298e-05,
1844
+ "loss": 2.4066,
1845
  "step": 1530
1846
  },
1847
  {
1848
  "epoch": 0.62,
1849
+ "learning_rate": 4.4984613739056635e-05,
1850
+ "loss": 2.3664,
1851
  "step": 1535
1852
  },
1853
  {
1854
  "epoch": 0.62,
1855
+ "learning_rate": 4.4953035189818597e-05,
1856
+ "loss": 2.2608,
1857
  "step": 1540
1858
  },
1859
  {
1860
  "epoch": 0.62,
1861
+ "learning_rate": 4.4921368695175145e-05,
1862
+ "loss": 2.3886,
1863
  "step": 1545
1864
  },
1865
  {
1866
  "epoch": 0.62,
1867
+ "learning_rate": 4.488961439470015e-05,
1868
+ "loss": 2.3459,
1869
  "step": 1550
1870
  },
1871
  {
1872
  "epoch": 0.62,
1873
+ "learning_rate": 4.4857772428354536e-05,
1874
+ "loss": 2.3653,
1875
  "step": 1555
1876
  },
1877
  {
1878
  "epoch": 0.63,
1879
+ "learning_rate": 4.4825842936485585e-05,
1880
+ "loss": 2.3518,
1881
  "step": 1560
1882
  },
1883
  {
1884
  "epoch": 0.63,
1885
+ "learning_rate": 4.4793826059826394e-05,
1886
+ "loss": 2.5581,
1887
  "step": 1565
1888
  },
1889
  {
1890
  "epoch": 0.63,
1891
+ "learning_rate": 4.4761721939495185e-05,
1892
+ "loss": 2.4307,
1893
  "step": 1570
1894
  },
1895
  {
1896
  "epoch": 0.63,
1897
+ "learning_rate": 4.472953071699475e-05,
1898
+ "loss": 2.4616,
1899
  "step": 1575
1900
  },
1901
  {
1902
  "epoch": 0.63,
1903
+ "learning_rate": 4.469725253421177e-05,
1904
+ "loss": 2.4261,
1905
  "step": 1580
1906
  },
1907
  {
1908
  "epoch": 0.64,
1909
+ "learning_rate": 4.466488753341623e-05,
1910
+ "loss": 2.404,
1911
  "step": 1585
1912
  },
1913
  {
1914
  "epoch": 0.64,
1915
+ "learning_rate": 4.4632435857260754e-05,
1916
+ "loss": 2.374,
1917
  "step": 1590
1918
  },
1919
  {
1920
  "epoch": 0.64,
1921
+ "learning_rate": 4.459989764878004e-05,
1922
+ "loss": 2.3282,
1923
  "step": 1595
1924
  },
1925
  {
1926
  "epoch": 0.64,
1927
+ "learning_rate": 4.456727305139013e-05,
1928
+ "loss": 2.3896,
1929
  "step": 1600
1930
  }
1931
  ],
checkpoint-1600/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c481a87016b183d9406558b8812ee05f00d60a3c721055833e3cdda34cf9bb26
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b42f69ee6ce9030b2875b2859f3499c1e1c8f11e9ffa417d0dbdab2b51173da
3
  size 4920