|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.65345170671973, |
|
"eval_steps": 1000, |
|
"global_step": 200000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019133629266799325, |
|
"grad_norm": 3.534731864929199, |
|
"learning_rate": 0.0002982, |
|
"loss": 2.0385, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03826725853359865, |
|
"grad_norm": 3.997960090637207, |
|
"learning_rate": 0.00029925413533834583, |
|
"loss": 1.5221, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03826725853359865, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.3189, |
|
"eval_samples_per_second": 22.199, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.9139914321980114, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05740088780039798, |
|
"grad_norm": 2.7378287315368652, |
|
"learning_rate": 0.0002985022556390977, |
|
"loss": 1.4633, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0765345170671973, |
|
"grad_norm": 3.522592782974243, |
|
"learning_rate": 0.0002977503759398496, |
|
"loss": 1.4078, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0765345170671973, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2822, |
|
"eval_samples_per_second": 22.202, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.9065607150412524, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09566814633399663, |
|
"grad_norm": 8.136958122253418, |
|
"learning_rate": 0.0002969984962406015, |
|
"loss": 1.3589, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.11480177560079596, |
|
"grad_norm": 2.9784021377563477, |
|
"learning_rate": 0.00029624661654135335, |
|
"loss": 1.336, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11480177560079596, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.7333, |
|
"eval_samples_per_second": 22.24, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.8949386503067485, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13393540486759528, |
|
"grad_norm": 3.5435404777526855, |
|
"learning_rate": 0.0002954947368421052, |
|
"loss": 1.3258, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.1530690341343946, |
|
"grad_norm": 2.8903892040252686, |
|
"learning_rate": 0.0002947443609022556, |
|
"loss": 1.2887, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1530690341343946, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2047, |
|
"eval_samples_per_second": 22.207, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.8745901205838799, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17220266340119394, |
|
"grad_norm": 2.647843360900879, |
|
"learning_rate": 0.000293993984962406, |
|
"loss": 1.2713, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.19133629266799326, |
|
"grad_norm": 3.8844950199127197, |
|
"learning_rate": 0.00029324210526315787, |
|
"loss": 1.26, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.19133629266799326, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.2217, |
|
"eval_samples_per_second": 22.276, |
|
"eval_steps_per_second": 2.785, |
|
"eval_wer": 0.8671197376771737, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.21046992193479258, |
|
"grad_norm": 3.8498334884643555, |
|
"learning_rate": 0.00029249022556390974, |
|
"loss": 1.231, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22960355120159193, |
|
"grad_norm": 2.3221595287323, |
|
"learning_rate": 0.0002917383458646616, |
|
"loss": 1.2188, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.22960355120159193, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.8377, |
|
"eval_samples_per_second": 22.233, |
|
"eval_steps_per_second": 2.78, |
|
"eval_wer": 0.8699756716733658, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24873718046839124, |
|
"grad_norm": 2.9522998332977295, |
|
"learning_rate": 0.00029098646616541353, |
|
"loss": 1.1974, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.26787080973519056, |
|
"grad_norm": 5.0502214431762695, |
|
"learning_rate": 0.0002902345864661654, |
|
"loss": 1.1992, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.26787080973519056, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6699, |
|
"eval_samples_per_second": 22.245, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.8537920456949439, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2870044390019899, |
|
"grad_norm": 3.480316400527954, |
|
"learning_rate": 0.00028948270676691727, |
|
"loss": 1.1786, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3061380682687892, |
|
"grad_norm": 3.4583587646484375, |
|
"learning_rate": 0.00028873082706766913, |
|
"loss": 1.1773, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3061380682687892, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6868, |
|
"eval_samples_per_second": 22.244, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.8329278612227629, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.32527169753558854, |
|
"grad_norm": 3.2121310234069824, |
|
"learning_rate": 0.00028797894736842106, |
|
"loss": 1.1475, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.3444053268023879, |
|
"grad_norm": 6.1687846183776855, |
|
"learning_rate": 0.0002872285714285714, |
|
"loss": 1.1602, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3444053268023879, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.9185, |
|
"eval_samples_per_second": 22.227, |
|
"eval_steps_per_second": 2.779, |
|
"eval_wer": 0.8200497144066003, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3635389560691872, |
|
"grad_norm": 2.9368505477905273, |
|
"learning_rate": 0.0002864766917293233, |
|
"loss": 1.1233, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.3826725853359865, |
|
"grad_norm": 2.0912511348724365, |
|
"learning_rate": 0.00028572481203007513, |
|
"loss": 1.1128, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3826725853359865, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2709, |
|
"eval_samples_per_second": 22.202, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.8059287074254284, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.40180621460278587, |
|
"grad_norm": 2.822795867919922, |
|
"learning_rate": 0.00028497293233082705, |
|
"loss": 1.096, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.42093984386958516, |
|
"grad_norm": 2.4663002490997314, |
|
"learning_rate": 0.0002842210526315789, |
|
"loss": 1.0893, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.42093984386958516, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.2266, |
|
"eval_samples_per_second": 22.136, |
|
"eval_steps_per_second": 2.768, |
|
"eval_wer": 0.827678760313095, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4400734731363845, |
|
"grad_norm": 1.9610426425933838, |
|
"learning_rate": 0.0002834691729323308, |
|
"loss": 1.0702, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.45920710240318385, |
|
"grad_norm": 3.893796682357788, |
|
"learning_rate": 0.00028271729323308266, |
|
"loss": 1.0809, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.45920710240318385, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2707, |
|
"eval_samples_per_second": 22.202, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.8028480008462027, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47834073166998314, |
|
"grad_norm": 3.08317494392395, |
|
"learning_rate": 0.0002819669172932331, |
|
"loss": 1.0616, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.4974743609367825, |
|
"grad_norm": 1.9941602945327759, |
|
"learning_rate": 0.0002812150375939849, |
|
"loss": 1.0426, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.4974743609367825, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.9265, |
|
"eval_samples_per_second": 22.297, |
|
"eval_steps_per_second": 2.788, |
|
"eval_wer": 0.8004416120160779, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5166079902035818, |
|
"grad_norm": 2.075686454772949, |
|
"learning_rate": 0.0002804646616541353, |
|
"loss": 1.0316, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5357416194703811, |
|
"grad_norm": 1.7053288221359253, |
|
"learning_rate": 0.0002797127819548872, |
|
"loss": 1.0202, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5357416194703811, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6762, |
|
"eval_samples_per_second": 22.244, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.7824598053733869, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5548752487371804, |
|
"grad_norm": 2.8614988327026367, |
|
"learning_rate": 0.00027896240601503757, |
|
"loss": 1.0398, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5740088780039798, |
|
"grad_norm": 2.7677505016326904, |
|
"learning_rate": 0.0002782105263157895, |
|
"loss": 1.0005, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5740088780039798, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.6237, |
|
"eval_samples_per_second": 22.178, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.7785725618785699, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5931425072707791, |
|
"grad_norm": 1.8297598361968994, |
|
"learning_rate": 0.0002774586466165413, |
|
"loss": 1.0069, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6122761365375784, |
|
"grad_norm": 3.259901285171509, |
|
"learning_rate": 0.0002767067669172932, |
|
"loss": 0.9987, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6122761365375784, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.076, |
|
"eval_samples_per_second": 22.146, |
|
"eval_steps_per_second": 2.769, |
|
"eval_wer": 0.752102284747197, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6314097658043778, |
|
"grad_norm": 2.1204450130462646, |
|
"learning_rate": 0.0002759548872180451, |
|
"loss": 0.9862, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6505433950711771, |
|
"grad_norm": 4.308948993682861, |
|
"learning_rate": 0.00027520300751879696, |
|
"loss": 0.9705, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6505433950711771, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.4489, |
|
"eval_samples_per_second": 22.19, |
|
"eval_steps_per_second": 2.775, |
|
"eval_wer": 0.7592817854876243, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6696770243379764, |
|
"grad_norm": 3.809417247772217, |
|
"learning_rate": 0.00027445112781954883, |
|
"loss": 0.9836, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.6888106536047758, |
|
"grad_norm": 8.3826904296875, |
|
"learning_rate": 0.00027369924812030075, |
|
"loss": 0.9884, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.6888106536047758, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.2775, |
|
"eval_samples_per_second": 22.132, |
|
"eval_steps_per_second": 2.767, |
|
"eval_wer": 0.7380606092659192, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7079442828715751, |
|
"grad_norm": 2.119809627532959, |
|
"learning_rate": 0.0002729473684210526, |
|
"loss": 0.9554, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7270779121383744, |
|
"grad_norm": 2.714183807373047, |
|
"learning_rate": 0.0002721954887218045, |
|
"loss": 0.9618, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7270779121383744, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.8028, |
|
"eval_samples_per_second": 22.165, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.7364739792680347, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7462115414051738, |
|
"grad_norm": 4.593650817871094, |
|
"learning_rate": 0.00027144360902255635, |
|
"loss": 0.9389, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.765345170671973, |
|
"grad_norm": 1.7419074773788452, |
|
"learning_rate": 0.0002706917293233083, |
|
"loss": 0.9374, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.765345170671973, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.3558, |
|
"eval_samples_per_second": 22.197, |
|
"eval_steps_per_second": 2.775, |
|
"eval_wer": 0.7415644171779141, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7844787999387723, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0002699413533834586, |
|
"loss": 0.9241, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8036124292055717, |
|
"grad_norm": 2.104843854904175, |
|
"learning_rate": 0.0002691894736842105, |
|
"loss": 0.9175, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8036124292055717, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.3422, |
|
"eval_samples_per_second": 22.268, |
|
"eval_steps_per_second": 2.784, |
|
"eval_wer": 0.7299291305267611, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.822746058472371, |
|
"grad_norm": 3.1283345222473145, |
|
"learning_rate": 0.0002684390977443609, |
|
"loss": 0.9193, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8418796877391703, |
|
"grad_norm": 3.1702754497528076, |
|
"learning_rate": 0.00026768721804511274, |
|
"loss": 0.9247, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8418796877391703, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.0504, |
|
"eval_samples_per_second": 22.218, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.7260154431986461, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8610133170059697, |
|
"grad_norm": 4.537879943847656, |
|
"learning_rate": 0.00026693533834586466, |
|
"loss": 0.9154, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.880146946272769, |
|
"grad_norm": 3.9466328620910645, |
|
"learning_rate": 0.00026618345864661653, |
|
"loss": 0.9001, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.880146946272769, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.881, |
|
"eval_samples_per_second": 22.23, |
|
"eval_steps_per_second": 2.78, |
|
"eval_wer": 0.72349005711868, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.8992805755395683, |
|
"grad_norm": 5.256113052368164, |
|
"learning_rate": 0.0002654315789473684, |
|
"loss": 0.9027, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9184142048063677, |
|
"grad_norm": 2.1492791175842285, |
|
"learning_rate": 0.00026467969924812027, |
|
"loss": 0.8836, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9184142048063677, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.4361, |
|
"eval_samples_per_second": 22.261, |
|
"eval_steps_per_second": 2.783, |
|
"eval_wer": 0.7085757351385656, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.937547834073167, |
|
"grad_norm": 3.541306734085083, |
|
"learning_rate": 0.0002639278195488722, |
|
"loss": 0.8782, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.9566814633399663, |
|
"grad_norm": 1.7392828464508057, |
|
"learning_rate": 0.000263175939849624, |
|
"loss": 0.8789, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9566814633399663, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.1779, |
|
"eval_samples_per_second": 22.209, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.7144859318806854, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9758150926067657, |
|
"grad_norm": 2.769277572631836, |
|
"learning_rate": 0.0002624240601503759, |
|
"loss": 0.8776, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.994948721873565, |
|
"grad_norm": 3.246126413345337, |
|
"learning_rate": 0.0002616721804511278, |
|
"loss": 0.8734, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.994948721873565, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.5548, |
|
"eval_samples_per_second": 22.253, |
|
"eval_steps_per_second": 2.782, |
|
"eval_wer": 0.7195102602073197, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0140823511403643, |
|
"grad_norm": 1.4256001710891724, |
|
"learning_rate": 0.00026092030075187966, |
|
"loss": 0.8471, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.0332159804071637, |
|
"grad_norm": 1.2950644493103027, |
|
"learning_rate": 0.00026016992481203005, |
|
"loss": 0.8398, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0332159804071637, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.7022, |
|
"eval_samples_per_second": 22.313, |
|
"eval_steps_per_second": 2.79, |
|
"eval_wer": 0.6837978633382695, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0523496096739628, |
|
"grad_norm": 0.8874345421791077, |
|
"learning_rate": 0.0002594180451127819, |
|
"loss": 0.8403, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.0714832389407623, |
|
"grad_norm": 1.146148920059204, |
|
"learning_rate": 0.0002586661654135338, |
|
"loss": 0.8268, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.0714832389407623, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.7884, |
|
"eval_samples_per_second": 22.378, |
|
"eval_steps_per_second": 2.798, |
|
"eval_wer": 0.6793288555108948, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.0906168682075617, |
|
"grad_norm": 0.687147319316864, |
|
"learning_rate": 0.0002579142857142857, |
|
"loss": 0.8228, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.1097504974743608, |
|
"grad_norm": 0.614025890827179, |
|
"learning_rate": 0.00025716390977443605, |
|
"loss": 0.8196, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1097504974743608, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.3179, |
|
"eval_samples_per_second": 22.27, |
|
"eval_steps_per_second": 2.784, |
|
"eval_wer": 0.66387243494817, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1288841267411602, |
|
"grad_norm": 0.7900418043136597, |
|
"learning_rate": 0.0002564120300751879, |
|
"loss": 0.8262, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.1480177560079596, |
|
"grad_norm": 0.9414839148521423, |
|
"learning_rate": 0.00025566015037593984, |
|
"loss": 0.8124, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1480177560079596, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.1972, |
|
"eval_samples_per_second": 22.349, |
|
"eval_steps_per_second": 2.794, |
|
"eval_wer": 0.6615453776179395, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1671513852747588, |
|
"grad_norm": 0.9619298577308655, |
|
"learning_rate": 0.0002549082706766917, |
|
"loss": 0.8052, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.1862850145415582, |
|
"grad_norm": 1.3762531280517578, |
|
"learning_rate": 0.0002541563909774436, |
|
"loss": 0.7935, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.1862850145415582, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.3254, |
|
"eval_samples_per_second": 22.269, |
|
"eval_steps_per_second": 2.784, |
|
"eval_wer": 0.6607520626189972, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2054186438083576, |
|
"grad_norm": 1.0670289993286133, |
|
"learning_rate": 0.00025340451127819544, |
|
"loss": 0.799, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.2245522730751568, |
|
"grad_norm": 1.5455262660980225, |
|
"learning_rate": 0.00025265263157894736, |
|
"loss": 0.817, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2245522730751568, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.3401, |
|
"eval_samples_per_second": 22.339, |
|
"eval_steps_per_second": 2.793, |
|
"eval_wer": 0.6709726041887032, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2436859023419562, |
|
"grad_norm": 1.185735821723938, |
|
"learning_rate": 0.00025190075187969923, |
|
"loss": 0.7835, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.2628195316087556, |
|
"grad_norm": 0.8058122396469116, |
|
"learning_rate": 0.0002511503759398496, |
|
"loss": 0.7975, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.2628195316087556, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.7071, |
|
"eval_samples_per_second": 22.242, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.66950497144066, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.2819531608755548, |
|
"grad_norm": 0.6225046515464783, |
|
"learning_rate": 0.0002503984962406015, |
|
"loss": 0.8023, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.3010867901423542, |
|
"grad_norm": 0.8266538381576538, |
|
"learning_rate": 0.00024964661654135336, |
|
"loss": 0.7746, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3010867901423542, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.3157, |
|
"eval_samples_per_second": 22.199, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.6674423524434102, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3202204194091536, |
|
"grad_norm": 0.8520437479019165, |
|
"learning_rate": 0.00024889624060150375, |
|
"loss": 0.7828, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.3393540486759528, |
|
"grad_norm": 0.9565845131874084, |
|
"learning_rate": 0.0002481443609022556, |
|
"loss": 0.8013, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3393540486759528, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.1504, |
|
"eval_samples_per_second": 22.281, |
|
"eval_steps_per_second": 2.786, |
|
"eval_wer": 0.6586762217050983, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3584876779427522, |
|
"grad_norm": 1.2239874601364136, |
|
"learning_rate": 0.0002473924812030075, |
|
"loss": 0.7765, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.3776213072095516, |
|
"grad_norm": 0.6564140319824219, |
|
"learning_rate": 0.00024664060150375935, |
|
"loss": 0.7703, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.3776213072095516, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.0508, |
|
"eval_samples_per_second": 22.218, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.6388301248148932, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.3967549364763507, |
|
"grad_norm": 0.7060734629631042, |
|
"learning_rate": 0.0002458887218045113, |
|
"loss": 0.7685, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.4158885657431501, |
|
"grad_norm": 1.3065494298934937, |
|
"learning_rate": 0.00024513684210526314, |
|
"loss": 0.7581, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.4158885657431501, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.8618, |
|
"eval_samples_per_second": 22.231, |
|
"eval_steps_per_second": 2.78, |
|
"eval_wer": 0.6461021789718637, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.4350221950099495, |
|
"grad_norm": 1.282378911972046, |
|
"learning_rate": 0.00024438646616541354, |
|
"loss": 0.7693, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.4541558242767487, |
|
"grad_norm": 1.1911076307296753, |
|
"learning_rate": 0.00024363458646616538, |
|
"loss": 0.7468, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.4541558242767487, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.6111, |
|
"eval_samples_per_second": 22.179, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.6333959170721388, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.4732894535435481, |
|
"grad_norm": 0.7600271105766296, |
|
"learning_rate": 0.00024288270676691727, |
|
"loss": 0.7582, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.4924230828103475, |
|
"grad_norm": 0.6814852356910706, |
|
"learning_rate": 0.00024213082706766914, |
|
"loss": 0.7534, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.4924230828103475, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.0864, |
|
"eval_samples_per_second": 22.215, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.6300639940765813, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.5115567120771467, |
|
"grad_norm": 0.7314792275428772, |
|
"learning_rate": 0.00024137894736842104, |
|
"loss": 0.7336, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.530690341343946, |
|
"grad_norm": 1.1860034465789795, |
|
"learning_rate": 0.0002406270676691729, |
|
"loss": 0.752, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.530690341343946, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.462, |
|
"eval_samples_per_second": 22.05, |
|
"eval_steps_per_second": 2.757, |
|
"eval_wer": 0.6221969536704041, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5498239706107455, |
|
"grad_norm": 1.3469467163085938, |
|
"learning_rate": 0.0002398766917293233, |
|
"loss": 0.7508, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.5689575998775447, |
|
"grad_norm": 0.7789831757545471, |
|
"learning_rate": 0.00023912481203007516, |
|
"loss": 0.736, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.5689575998775447, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.8112, |
|
"eval_samples_per_second": 22.165, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.6203326634228897, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.588091229144344, |
|
"grad_norm": 1.3181277513504028, |
|
"learning_rate": 0.00023837443609022553, |
|
"loss": 0.752, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.6072248584111435, |
|
"grad_norm": 0.6017114520072937, |
|
"learning_rate": 0.00023762255639097742, |
|
"loss": 0.7188, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6072248584111435, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.1913, |
|
"eval_samples_per_second": 22.279, |
|
"eval_steps_per_second": 2.786, |
|
"eval_wer": 0.620795430505606, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6263584876779427, |
|
"grad_norm": 0.7502321600914001, |
|
"learning_rate": 0.0002368706766917293, |
|
"loss": 0.7138, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.645492116944742, |
|
"grad_norm": 0.6769944429397583, |
|
"learning_rate": 0.00023611879699248119, |
|
"loss": 0.7308, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.645492116944742, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.0035, |
|
"eval_samples_per_second": 22.221, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.6056695578591073, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.6646257462115415, |
|
"grad_norm": 1.050374984741211, |
|
"learning_rate": 0.00023536691729323305, |
|
"loss": 0.7145, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.6837593754783406, |
|
"grad_norm": 1.2912209033966064, |
|
"learning_rate": 0.00023461503759398495, |
|
"loss": 0.7179, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.6837593754783406, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6998, |
|
"eval_samples_per_second": 22.243, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.6291649037444468, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.70289300474514, |
|
"grad_norm": 0.8525875210762024, |
|
"learning_rate": 0.00023386315789473682, |
|
"loss": 0.7237, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.7220266340119394, |
|
"grad_norm": 1.1356332302093506, |
|
"learning_rate": 0.0002331112781954887, |
|
"loss": 0.7341, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7220266340119394, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.323, |
|
"eval_samples_per_second": 22.199, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.6034218320287709, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7411602632787386, |
|
"grad_norm": 0.6900098323822021, |
|
"learning_rate": 0.00023235939849624058, |
|
"loss": 0.7166, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.760293892545538, |
|
"grad_norm": 0.7305801510810852, |
|
"learning_rate": 0.00023160902255639097, |
|
"loss": 0.7061, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.760293892545538, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.9517, |
|
"eval_samples_per_second": 22.225, |
|
"eval_steps_per_second": 2.779, |
|
"eval_wer": 0.6136555955151258, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.7794275218123374, |
|
"grad_norm": 1.960204839706421, |
|
"learning_rate": 0.00023085714285714284, |
|
"loss": 0.688, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.7985611510791366, |
|
"grad_norm": 1.001535415649414, |
|
"learning_rate": 0.00023010526315789473, |
|
"loss": 0.7081, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.7985611510791366, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.7057, |
|
"eval_samples_per_second": 22.172, |
|
"eval_steps_per_second": 2.772, |
|
"eval_wer": 0.6123334038502222, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.817694780345936, |
|
"grad_norm": 0.8360883593559265, |
|
"learning_rate": 0.0002293533834586466, |
|
"loss": 0.704, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.8368284096127354, |
|
"grad_norm": 1.349821925163269, |
|
"learning_rate": 0.0002286015037593985, |
|
"loss": 0.6957, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.8368284096127354, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.3207, |
|
"eval_samples_per_second": 22.199, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.6053522318595304, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.8559620388795346, |
|
"grad_norm": 0.9751301407814026, |
|
"learning_rate": 0.00022785112781954886, |
|
"loss": 0.6974, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.875095668146334, |
|
"grad_norm": 0.7246661186218262, |
|
"learning_rate": 0.00022710075187969923, |
|
"loss": 0.7052, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.875095668146334, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.447, |
|
"eval_samples_per_second": 22.26, |
|
"eval_steps_per_second": 2.783, |
|
"eval_wer": 0.616498307594669, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.8942292974131334, |
|
"grad_norm": 0.716491162776947, |
|
"learning_rate": 0.00022634887218045112, |
|
"loss": 0.6898, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.9133629266799326, |
|
"grad_norm": 0.7438942193984985, |
|
"learning_rate": 0.000225596992481203, |
|
"loss": 0.6833, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.9133629266799326, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.1363, |
|
"eval_samples_per_second": 22.212, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.5887455045483393, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.932496555946732, |
|
"grad_norm": 0.8066436648368835, |
|
"learning_rate": 0.00022484511278195488, |
|
"loss": 0.6755, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.9516301852135314, |
|
"grad_norm": 0.6446587443351746, |
|
"learning_rate": 0.00022409323308270672, |
|
"loss": 0.6995, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.9516301852135314, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.7016, |
|
"eval_samples_per_second": 22.243, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.5871456526338058, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.9707638144803306, |
|
"grad_norm": 0.8823833465576172, |
|
"learning_rate": 0.00022334135338345862, |
|
"loss": 0.6909, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.98989744374713, |
|
"grad_norm": 0.8129588961601257, |
|
"learning_rate": 0.0002225894736842105, |
|
"loss": 0.6703, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.98989744374713, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.397, |
|
"eval_samples_per_second": 22.194, |
|
"eval_steps_per_second": 2.775, |
|
"eval_wer": 0.5954357943727523, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.0090310730139294, |
|
"grad_norm": 0.9364180564880371, |
|
"learning_rate": 0.0002218390977443609, |
|
"loss": 0.6462, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.0281647022807285, |
|
"grad_norm": 0.5429893732070923, |
|
"learning_rate": 0.00022108721804511275, |
|
"loss": 0.6265, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.0281647022807285, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6741, |
|
"eval_samples_per_second": 22.244, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.5791728368944362, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.0472983315475277, |
|
"grad_norm": 0.7964287996292114, |
|
"learning_rate": 0.00022033533834586464, |
|
"loss": 0.6283, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.0664319608143273, |
|
"grad_norm": 0.928997278213501, |
|
"learning_rate": 0.0002195834586466165, |
|
"loss": 0.633, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.0664319608143273, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.8143, |
|
"eval_samples_per_second": 22.164, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.5696001692405331, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.0855655900811265, |
|
"grad_norm": 0.9358041882514954, |
|
"learning_rate": 0.0002188315789473684, |
|
"loss": 0.6608, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.1046992193479257, |
|
"grad_norm": 0.49673086404800415, |
|
"learning_rate": 0.00021807969924812027, |
|
"loss": 0.6399, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.1046992193479257, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.4793, |
|
"eval_samples_per_second": 22.188, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 0.5717685635709753, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.1238328486147253, |
|
"grad_norm": 0.824380099773407, |
|
"learning_rate": 0.00021732932330827064, |
|
"loss": 0.6317, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.1429664778815245, |
|
"grad_norm": 0.784487783908844, |
|
"learning_rate": 0.00021657744360902253, |
|
"loss": 0.6165, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.1429664778815245, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.9557, |
|
"eval_samples_per_second": 22.295, |
|
"eval_steps_per_second": 2.788, |
|
"eval_wer": 0.5836550666384599, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.1621001071483237, |
|
"grad_norm": 1.1931605339050293, |
|
"learning_rate": 0.0002158270676691729, |
|
"loss": 0.6268, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.1812337364151233, |
|
"grad_norm": 1.4376397132873535, |
|
"learning_rate": 0.0002150751879699248, |
|
"loss": 0.6148, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.1812337364151233, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.0058, |
|
"eval_samples_per_second": 22.363, |
|
"eval_steps_per_second": 2.796, |
|
"eval_wer": 0.5597233975037021, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.2003673656819225, |
|
"grad_norm": 1.6363264322280884, |
|
"learning_rate": 0.00021432330827067666, |
|
"loss": 0.6377, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.2195009949487217, |
|
"grad_norm": 4.29092264175415, |
|
"learning_rate": 0.00021357142857142855, |
|
"loss": 0.6228, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.2195009949487217, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.1699, |
|
"eval_samples_per_second": 22.351, |
|
"eval_steps_per_second": 2.795, |
|
"eval_wer": 0.5706843664057542, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.2386346242155213, |
|
"grad_norm": 1.1523572206497192, |
|
"learning_rate": 0.00021281954887218042, |
|
"loss": 0.6243, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.2577682534823205, |
|
"grad_norm": 2.0131170749664307, |
|
"learning_rate": 0.00021206766917293232, |
|
"loss": 0.6302, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.2577682534823205, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.1147, |
|
"eval_samples_per_second": 22.284, |
|
"eval_steps_per_second": 2.786, |
|
"eval_wer": 0.5717685635709753, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.2769018827491196, |
|
"grad_norm": 1.358688235282898, |
|
"learning_rate": 0.00021131578947368419, |
|
"loss": 0.612, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.2960355120159193, |
|
"grad_norm": 0.5921105742454529, |
|
"learning_rate": 0.00021056390977443608, |
|
"loss": 0.6035, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.2960355120159193, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.6822, |
|
"eval_samples_per_second": 22.315, |
|
"eval_steps_per_second": 2.79, |
|
"eval_wer": 0.5638883012481489, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.3151691412827184, |
|
"grad_norm": 0.600351095199585, |
|
"learning_rate": 0.00020981203007518795, |
|
"loss": 0.6157, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.3343027705495176, |
|
"grad_norm": 1.6874371767044067, |
|
"learning_rate": 0.00020906015037593984, |
|
"loss": 0.602, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.3343027705495176, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.103, |
|
"eval_samples_per_second": 22.214, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.5633197588322403, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.3534363998163172, |
|
"grad_norm": 0.5364500284194946, |
|
"learning_rate": 0.0002083082706766917, |
|
"loss": 0.6057, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.3725700290831164, |
|
"grad_norm": 1.9607787132263184, |
|
"learning_rate": 0.0002075563909774436, |
|
"loss": 0.6023, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.3725700290831164, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.4546, |
|
"eval_samples_per_second": 22.331, |
|
"eval_steps_per_second": 2.792, |
|
"eval_wer": 0.5581103236725196, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.3917036583499156, |
|
"grad_norm": 1.1173665523529053, |
|
"learning_rate": 0.00020680451127819547, |
|
"loss": 0.6097, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.4108372876167152, |
|
"grad_norm": 1.7585097551345825, |
|
"learning_rate": 0.00020605563909774434, |
|
"loss": 0.5924, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.4108372876167152, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.7721, |
|
"eval_samples_per_second": 22.308, |
|
"eval_steps_per_second": 2.789, |
|
"eval_wer": 0.5511820393484239, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.4299709168835144, |
|
"grad_norm": 0.9437362551689148, |
|
"learning_rate": 0.00020530375939849623, |
|
"loss": 0.5968, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.4491045461503136, |
|
"grad_norm": 0.6077060103416443, |
|
"learning_rate": 0.0002045518796992481, |
|
"loss": 0.5969, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.4491045461503136, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.4786, |
|
"eval_samples_per_second": 22.329, |
|
"eval_steps_per_second": 2.792, |
|
"eval_wer": 0.5489739792680347, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.468238175417113, |
|
"grad_norm": 1.9816350936889648, |
|
"learning_rate": 0.0002038, |
|
"loss": 0.5978, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.4873718046839124, |
|
"grad_norm": 1.865081787109375, |
|
"learning_rate": 0.00020304812030075186, |
|
"loss": 0.6029, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.4873718046839124, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.4282, |
|
"eval_samples_per_second": 22.333, |
|
"eval_steps_per_second": 2.792, |
|
"eval_wer": 0.5444124180241168, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.5065054339507116, |
|
"grad_norm": 0.9471901059150696, |
|
"learning_rate": 0.00020229624060150376, |
|
"loss": 0.6068, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.525639063217511, |
|
"grad_norm": 1.5803519487380981, |
|
"learning_rate": 0.0002015443609022556, |
|
"loss": 0.6046, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.525639063217511, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.3481, |
|
"eval_samples_per_second": 22.197, |
|
"eval_steps_per_second": 2.775, |
|
"eval_wer": 0.5460519356885974, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.5447726924843104, |
|
"grad_norm": 0.7890714406967163, |
|
"learning_rate": 0.0002007924812030075, |
|
"loss": 0.5865, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.5639063217511096, |
|
"grad_norm": 0.7467088103294373, |
|
"learning_rate": 0.00020004060150375936, |
|
"loss": 0.6095, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.5639063217511096, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.5673, |
|
"eval_samples_per_second": 22.182, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 0.5476253437698329, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.583039951017909, |
|
"grad_norm": 0.7762987613677979, |
|
"learning_rate": 0.00019928872180451126, |
|
"loss": 0.5869, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.6021735802847084, |
|
"grad_norm": 0.5018890500068665, |
|
"learning_rate": 0.00019853684210526312, |
|
"loss": 0.598, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6021735802847084, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.3997, |
|
"eval_samples_per_second": 22.193, |
|
"eval_steps_per_second": 2.775, |
|
"eval_wer": 0.532129257457161, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6213072095515075, |
|
"grad_norm": 1.0056216716766357, |
|
"learning_rate": 0.00019778496240601502, |
|
"loss": 0.5821, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.640440838818307, |
|
"grad_norm": 1.3957178592681885, |
|
"learning_rate": 0.00019703308270676689, |
|
"loss": 0.5812, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.640440838818307, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6807, |
|
"eval_samples_per_second": 22.244, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.5357388407023482, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 0.6841593384742737, |
|
"learning_rate": 0.00019628120300751878, |
|
"loss": 0.5802, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.6787080973519055, |
|
"grad_norm": 0.8700592517852783, |
|
"learning_rate": 0.00019552932330827065, |
|
"loss": 0.5957, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.6787080973519055, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.4344, |
|
"eval_samples_per_second": 22.191, |
|
"eval_steps_per_second": 2.775, |
|
"eval_wer": 0.5368230378675692, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.697841726618705, |
|
"grad_norm": 1.0066908597946167, |
|
"learning_rate": 0.00019477894736842104, |
|
"loss": 0.5817, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.7169753558855043, |
|
"grad_norm": 0.8253029584884644, |
|
"learning_rate": 0.0001940270676691729, |
|
"loss": 0.5909, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.7169753558855043, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.9129, |
|
"eval_samples_per_second": 22.019, |
|
"eval_steps_per_second": 2.753, |
|
"eval_wer": 0.5239448910514068, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.7361089851523035, |
|
"grad_norm": 0.9649154543876648, |
|
"learning_rate": 0.0001932751879699248, |
|
"loss": 0.5877, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.755242614419103, |
|
"grad_norm": 1.6835025548934937, |
|
"learning_rate": 0.00019252330827067667, |
|
"loss": 0.5953, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.755242614419103, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.62, |
|
"eval_samples_per_second": 22.178, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.5421911360270785, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.7743762436859023, |
|
"grad_norm": 1.4002177715301514, |
|
"learning_rate": 0.00019177443609022553, |
|
"loss": 0.5811, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.7935098729527015, |
|
"grad_norm": 1.1865595579147339, |
|
"learning_rate": 0.00019102255639097743, |
|
"loss": 0.5702, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.7935098729527015, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.9208, |
|
"eval_samples_per_second": 22.227, |
|
"eval_steps_per_second": 2.779, |
|
"eval_wer": 0.5225698117199069, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.812643502219501, |
|
"grad_norm": 0.8575685620307922, |
|
"learning_rate": 0.0001902706766917293, |
|
"loss": 0.5806, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.8317771314863003, |
|
"grad_norm": 0.9127354025840759, |
|
"learning_rate": 0.0001895187969924812, |
|
"loss": 0.5755, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.8317771314863003, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.601, |
|
"eval_samples_per_second": 22.25, |
|
"eval_steps_per_second": 2.782, |
|
"eval_wer": 0.5319441506240745, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.8509107607530995, |
|
"grad_norm": 1.893068552017212, |
|
"learning_rate": 0.00018876691729323306, |
|
"loss": 0.574, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.870044390019899, |
|
"grad_norm": 0.7603012323379517, |
|
"learning_rate": 0.00018801654135338345, |
|
"loss": 0.5659, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.870044390019899, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.7507, |
|
"eval_samples_per_second": 22.239, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.5286783372117622, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.8891780192866983, |
|
"grad_norm": 1.636072039604187, |
|
"learning_rate": 0.00018726466165413532, |
|
"loss": 0.5645, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.9083116485534974, |
|
"grad_norm": 0.8112033605575562, |
|
"learning_rate": 0.0001865127819548872, |
|
"loss": 0.5581, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.9083116485534974, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6028, |
|
"eval_samples_per_second": 22.249, |
|
"eval_steps_per_second": 2.782, |
|
"eval_wer": 0.5277924687962767, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.927445277820297, |
|
"grad_norm": 0.8813944458961487, |
|
"learning_rate": 0.00018576090225563908, |
|
"loss": 0.573, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.9465789070870962, |
|
"grad_norm": 0.7851129174232483, |
|
"learning_rate": 0.00018501052631578945, |
|
"loss": 0.5786, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.9465789070870962, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.8095, |
|
"eval_samples_per_second": 22.165, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.5194626613073832, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.9657125363538954, |
|
"grad_norm": 0.9913876056671143, |
|
"learning_rate": 0.00018425864661654134, |
|
"loss": 0.5783, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.984846165620695, |
|
"grad_norm": 0.9374109506607056, |
|
"learning_rate": 0.0001835067669172932, |
|
"loss": 0.5485, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.984846165620695, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.6351, |
|
"eval_samples_per_second": 22.177, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.5255976306325365, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.0039797948874942, |
|
"grad_norm": 0.9412303566932678, |
|
"learning_rate": 0.0001827548872180451, |
|
"loss": 0.543, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 3.0231134241542934, |
|
"grad_norm": 0.9871559739112854, |
|
"learning_rate": 0.00018200300751879697, |
|
"loss": 0.5113, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.0231134241542934, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.7492, |
|
"eval_samples_per_second": 22.169, |
|
"eval_steps_per_second": 2.772, |
|
"eval_wer": 0.5220277131372963, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.042247053421093, |
|
"grad_norm": 1.3727389574050903, |
|
"learning_rate": 0.00018125112781954887, |
|
"loss": 0.5075, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 3.061380682687892, |
|
"grad_norm": 1.3193981647491455, |
|
"learning_rate": 0.00018049924812030073, |
|
"loss": 0.4973, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.061380682687892, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.9407, |
|
"eval_samples_per_second": 22.226, |
|
"eval_steps_per_second": 2.779, |
|
"eval_wer": 0.5146102178971864, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.0805143119546914, |
|
"grad_norm": 1.4254885911941528, |
|
"learning_rate": 0.00017974887218045113, |
|
"loss": 0.5063, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.099647941221491, |
|
"grad_norm": 1.1925376653671265, |
|
"learning_rate": 0.000178996992481203, |
|
"loss": 0.5085, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.099647941221491, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.3194, |
|
"eval_samples_per_second": 22.269, |
|
"eval_steps_per_second": 2.784, |
|
"eval_wer": 0.5240903321345463, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.11878157048829, |
|
"grad_norm": 0.8363128304481506, |
|
"learning_rate": 0.00017824661654135339, |
|
"loss": 0.5263, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 3.1379151997550894, |
|
"grad_norm": 0.8608238697052002, |
|
"learning_rate": 0.00017749473684210525, |
|
"loss": 0.5111, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.1379151997550894, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.6205, |
|
"eval_samples_per_second": 22.178, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.5104585360693886, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.157048829021889, |
|
"grad_norm": 0.7947099208831787, |
|
"learning_rate": 0.00017674285714285715, |
|
"loss": 0.496, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 3.176182458288688, |
|
"grad_norm": 0.629405677318573, |
|
"learning_rate": 0.00017599097744360902, |
|
"loss": 0.5047, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.176182458288688, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.567, |
|
"eval_samples_per_second": 22.252, |
|
"eval_steps_per_second": 2.782, |
|
"eval_wer": 0.5117675058176433, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.1953160875554873, |
|
"grad_norm": 0.6956018209457397, |
|
"learning_rate": 0.0001752390977443609, |
|
"loss": 0.519, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 3.214449716822287, |
|
"grad_norm": 0.4928194284439087, |
|
"learning_rate": 0.00017448721804511275, |
|
"loss": 0.4994, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.214449716822287, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.5623, |
|
"eval_samples_per_second": 22.182, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 0.49931246033425003, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.233583346089086, |
|
"grad_norm": 0.9308450222015381, |
|
"learning_rate": 0.00017373533834586465, |
|
"loss": 0.5108, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 3.2527169753558853, |
|
"grad_norm": 1.1794687509536743, |
|
"learning_rate": 0.00017298345864661652, |
|
"loss": 0.5077, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.2527169753558853, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.1672, |
|
"eval_samples_per_second": 22.21, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.5099825470700232, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.271850604622685, |
|
"grad_norm": 0.6575067639350891, |
|
"learning_rate": 0.0001722315789473684, |
|
"loss": 0.5123, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 3.290984233889484, |
|
"grad_norm": 0.60300612449646, |
|
"learning_rate": 0.00017148120300751877, |
|
"loss": 0.5035, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.290984233889484, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.8684, |
|
"eval_samples_per_second": 22.161, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.49292627459276495, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.3101178631562833, |
|
"grad_norm": 1.378197193145752, |
|
"learning_rate": 0.00017072932330827064, |
|
"loss": 0.5036, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 3.329251492423083, |
|
"grad_norm": 0.6758792996406555, |
|
"learning_rate": 0.00016997894736842103, |
|
"loss": 0.5045, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.329251492423083, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6861, |
|
"eval_samples_per_second": 22.244, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.5026311614131584, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.348385121689882, |
|
"grad_norm": 0.708972692489624, |
|
"learning_rate": 0.0001692270676691729, |
|
"loss": 0.5057, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 3.3675187509566813, |
|
"grad_norm": 1.0322130918502808, |
|
"learning_rate": 0.0001684751879699248, |
|
"loss": 0.4951, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.3675187509566813, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.1978, |
|
"eval_samples_per_second": 22.208, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.49707795642056274, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.386652380223481, |
|
"grad_norm": 0.5131962299346924, |
|
"learning_rate": 0.00016772330827067667, |
|
"loss": 0.4925, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 3.40578600949028, |
|
"grad_norm": 0.7158399820327759, |
|
"learning_rate": 0.00016697142857142856, |
|
"loss": 0.4915, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.40578600949028, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 319.2292, |
|
"eval_samples_per_second": 21.997, |
|
"eval_steps_per_second": 2.75, |
|
"eval_wer": 0.4984133700021155, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.4249196387570793, |
|
"grad_norm": 1.1134260892868042, |
|
"learning_rate": 0.00016621954887218043, |
|
"loss": 0.5056, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 3.444053268023879, |
|
"grad_norm": 0.7996990084648132, |
|
"learning_rate": 0.00016546766917293232, |
|
"loss": 0.4875, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.444053268023879, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.288, |
|
"eval_samples_per_second": 22.201, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.49683996192088004, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.463186897290678, |
|
"grad_norm": 1.4016754627227783, |
|
"learning_rate": 0.0001647157894736842, |
|
"loss": 0.5019, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.4823205265574773, |
|
"grad_norm": 0.6514917016029358, |
|
"learning_rate": 0.00016396541353383458, |
|
"loss": 0.4964, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.4823205265574773, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.9586, |
|
"eval_samples_per_second": 22.154, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.49888935900148085, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.501454155824277, |
|
"grad_norm": 0.3953873813152313, |
|
"learning_rate": 0.00016321353383458645, |
|
"loss": 0.4878, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 3.520587785091076, |
|
"grad_norm": 0.6485087871551514, |
|
"learning_rate": 0.00016246165413533832, |
|
"loss": 0.4767, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.520587785091076, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.0491, |
|
"eval_samples_per_second": 22.148, |
|
"eval_steps_per_second": 2.769, |
|
"eval_wer": 0.4921594034271208, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.5397214143578752, |
|
"grad_norm": 0.649442732334137, |
|
"learning_rate": 0.00016170977443609021, |
|
"loss": 0.491, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 3.558855043624675, |
|
"grad_norm": 0.7617647051811218, |
|
"learning_rate": 0.00016095939849624058, |
|
"loss": 0.4765, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.558855043624675, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.7633, |
|
"eval_samples_per_second": 22.098, |
|
"eval_steps_per_second": 2.763, |
|
"eval_wer": 0.48691030251745293, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.577988672891474, |
|
"grad_norm": 0.5942517518997192, |
|
"learning_rate": 0.00016020751879699247, |
|
"loss": 0.4805, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 3.597122302158273, |
|
"grad_norm": 0.9230866432189941, |
|
"learning_rate": 0.00015945563909774434, |
|
"loss": 0.4967, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.597122302158273, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.3012, |
|
"eval_samples_per_second": 22.13, |
|
"eval_steps_per_second": 2.767, |
|
"eval_wer": 0.49814893166913476, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.616255931425073, |
|
"grad_norm": 0.601637065410614, |
|
"learning_rate": 0.00015870375939849624, |
|
"loss": 0.5055, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 3.635389560691872, |
|
"grad_norm": 0.6463965773582458, |
|
"learning_rate": 0.0001579518796992481, |
|
"loss": 0.4941, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.635389560691872, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.5729, |
|
"eval_samples_per_second": 22.042, |
|
"eval_steps_per_second": 2.756, |
|
"eval_wer": 0.49616564417177916, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.654523189958671, |
|
"grad_norm": 0.8088521957397461, |
|
"learning_rate": 0.0001572, |
|
"loss": 0.4851, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 3.673656819225471, |
|
"grad_norm": 1.0758212804794312, |
|
"learning_rate": 0.00015644812030075187, |
|
"loss": 0.4808, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.673656819225471, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.3887, |
|
"eval_samples_per_second": 22.124, |
|
"eval_steps_per_second": 2.766, |
|
"eval_wer": 0.4856277766024963, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.69279044849227, |
|
"grad_norm": 2.3342583179473877, |
|
"learning_rate": 0.00015569624060150376, |
|
"loss": 0.4682, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 3.711924077759069, |
|
"grad_norm": 0.9281033873558044, |
|
"learning_rate": 0.0001549443609022556, |
|
"loss": 0.4838, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.711924077759069, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.8326, |
|
"eval_samples_per_second": 22.093, |
|
"eval_steps_per_second": 2.762, |
|
"eval_wer": 0.47491802411677597, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.731057707025869, |
|
"grad_norm": 0.6656193733215332, |
|
"learning_rate": 0.00015419248120300753, |
|
"loss": 0.4893, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 3.750191336292668, |
|
"grad_norm": 0.8286083936691284, |
|
"learning_rate": 0.0001534421052631579, |
|
"loss": 0.4644, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.750191336292668, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.0552, |
|
"eval_samples_per_second": 22.148, |
|
"eval_steps_per_second": 2.769, |
|
"eval_wer": 0.4738073831182568, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.769324965559467, |
|
"grad_norm": 0.7517048716545105, |
|
"learning_rate": 0.00015269022556390978, |
|
"loss": 0.4828, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 3.788458594826267, |
|
"grad_norm": 1.126383662223816, |
|
"learning_rate": 0.00015193834586466163, |
|
"loss": 0.4818, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.788458594826267, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.5174, |
|
"eval_samples_per_second": 22.115, |
|
"eval_steps_per_second": 2.765, |
|
"eval_wer": 0.47370160778506454, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.807592224093066, |
|
"grad_norm": 1.257995843887329, |
|
"learning_rate": 0.00015118646616541352, |
|
"loss": 0.485, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 3.826725853359865, |
|
"grad_norm": 1.0233116149902344, |
|
"learning_rate": 0.0001504345864661654, |
|
"loss": 0.4741, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.826725853359865, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.1035, |
|
"eval_samples_per_second": 22.144, |
|
"eval_steps_per_second": 2.769, |
|
"eval_wer": 0.48123810027501585, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.8458594826266648, |
|
"grad_norm": 1.8550606966018677, |
|
"learning_rate": 0.00014968270676691728, |
|
"loss": 0.4763, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 3.864993111893464, |
|
"grad_norm": 0.8199677467346191, |
|
"learning_rate": 0.00014893082706766915, |
|
"loss": 0.4734, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.864993111893464, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.4217, |
|
"eval_samples_per_second": 22.122, |
|
"eval_steps_per_second": 2.766, |
|
"eval_wer": 0.4772186376137085, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.884126741160263, |
|
"grad_norm": 0.5380846261978149, |
|
"learning_rate": 0.00014817894736842105, |
|
"loss": 0.4602, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 3.9032603704270628, |
|
"grad_norm": 1.026496410369873, |
|
"learning_rate": 0.00014742706766917291, |
|
"loss": 0.4733, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.9032603704270628, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.6547, |
|
"eval_samples_per_second": 22.106, |
|
"eval_steps_per_second": 2.764, |
|
"eval_wer": 0.47355616670192513, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.922393999693862, |
|
"grad_norm": 1.4769624471664429, |
|
"learning_rate": 0.0001466766917293233, |
|
"loss": 0.477, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 3.941527628960661, |
|
"grad_norm": 1.2881931066513062, |
|
"learning_rate": 0.00014592481203007517, |
|
"loss": 0.4937, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 3.941527628960661, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.1763, |
|
"eval_samples_per_second": 22.139, |
|
"eval_steps_per_second": 2.768, |
|
"eval_wer": 0.4694573725407235, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 3.9606612582274607, |
|
"grad_norm": 0.6641072630882263, |
|
"learning_rate": 0.00014517443609022554, |
|
"loss": 0.4728, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 3.97979488749426, |
|
"grad_norm": 0.7772675156593323, |
|
"learning_rate": 0.00014442255639097743, |
|
"loss": 0.4864, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 3.97979488749426, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.8284, |
|
"eval_samples_per_second": 22.163, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.47485191453353076, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 3.998928516761059, |
|
"grad_norm": 0.4977366626262665, |
|
"learning_rate": 0.0001436706766917293, |
|
"loss": 0.4671, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 4.018062146027859, |
|
"grad_norm": 1.0952422618865967, |
|
"learning_rate": 0.00014291879699248117, |
|
"loss": 0.4126, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.018062146027859, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.0988, |
|
"eval_samples_per_second": 22.075, |
|
"eval_steps_per_second": 2.76, |
|
"eval_wer": 0.4635207319653057, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.0371957752946575, |
|
"grad_norm": 0.694837212562561, |
|
"learning_rate": 0.00014216691729323306, |
|
"loss": 0.4202, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 4.056329404561457, |
|
"grad_norm": 0.7097035646438599, |
|
"learning_rate": 0.00014141503759398493, |
|
"loss": 0.4228, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.056329404561457, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.7201, |
|
"eval_samples_per_second": 22.241, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.47007880262322826, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.075463033828257, |
|
"grad_norm": 1.1219637393951416, |
|
"learning_rate": 0.00014066315789473683, |
|
"loss": 0.4349, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 4.094596663095055, |
|
"grad_norm": 0.5968381762504578, |
|
"learning_rate": 0.0001399127819548872, |
|
"loss": 0.4098, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.094596663095055, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2686, |
|
"eval_samples_per_second": 22.203, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.45890628305479164, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.113730292361855, |
|
"grad_norm": 0.5920900106430054, |
|
"learning_rate": 0.0001391609022556391, |
|
"loss": 0.4182, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 4.132863921628655, |
|
"grad_norm": 0.45823031663894653, |
|
"learning_rate": 0.00013840902255639095, |
|
"loss": 0.4193, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.132863921628655, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.9696, |
|
"eval_samples_per_second": 22.154, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.46152422255130104, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.151997550895453, |
|
"grad_norm": 0.6653383374214172, |
|
"learning_rate": 0.00013765714285714285, |
|
"loss": 0.4087, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 4.171131180162253, |
|
"grad_norm": 0.5999200940132141, |
|
"learning_rate": 0.00013690526315789472, |
|
"loss": 0.4083, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.171131180162253, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.122, |
|
"eval_samples_per_second": 22.073, |
|
"eval_steps_per_second": 2.76, |
|
"eval_wer": 0.46397027713137295, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.190264809429053, |
|
"grad_norm": 0.5787246823310852, |
|
"learning_rate": 0.0001361533834586466, |
|
"loss": 0.4075, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 4.209398438695851, |
|
"grad_norm": 2.8409461975097656, |
|
"learning_rate": 0.00013540150375939848, |
|
"loss": 0.406, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.209398438695851, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.8914, |
|
"eval_samples_per_second": 22.159, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.46135233763486355, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.228532067962651, |
|
"grad_norm": 0.8015612959861755, |
|
"learning_rate": 0.00013464962406015038, |
|
"loss": 0.4167, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 4.247665697229451, |
|
"grad_norm": 3.532646417617798, |
|
"learning_rate": 0.00013389774436090224, |
|
"loss": 0.4125, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.247665697229451, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.906, |
|
"eval_samples_per_second": 22.158, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.4608366828855511, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.266799326496249, |
|
"grad_norm": 0.7524324059486389, |
|
"learning_rate": 0.00013314736842105264, |
|
"loss": 0.4205, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 4.285932955763049, |
|
"grad_norm": 2.5353856086730957, |
|
"learning_rate": 0.0001323954887218045, |
|
"loss": 0.4104, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.285932955763049, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.4796, |
|
"eval_samples_per_second": 22.049, |
|
"eval_steps_per_second": 2.757, |
|
"eval_wer": 0.44868574148508567, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.305066585029849, |
|
"grad_norm": 0.9442459940910339, |
|
"learning_rate": 0.00013164360902255637, |
|
"loss": 0.4114, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 4.324200214296647, |
|
"grad_norm": 1.0531048774719238, |
|
"learning_rate": 0.00013089172932330827, |
|
"loss": 0.3988, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.324200214296647, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.0914, |
|
"eval_samples_per_second": 22.145, |
|
"eval_steps_per_second": 2.769, |
|
"eval_wer": 0.45987148297017133, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.343333843563447, |
|
"grad_norm": 0.7277682423591614, |
|
"learning_rate": 0.00013013984962406013, |
|
"loss": 0.4164, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 4.362467472830247, |
|
"grad_norm": 0.48210740089416504, |
|
"learning_rate": 0.00012938796992481203, |
|
"loss": 0.4034, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.362467472830247, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.084, |
|
"eval_samples_per_second": 22.076, |
|
"eval_steps_per_second": 2.76, |
|
"eval_wer": 0.45389517664480644, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.381601102097045, |
|
"grad_norm": 1.0025782585144043, |
|
"learning_rate": 0.0001286375939849624, |
|
"loss": 0.423, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 4.400734731363845, |
|
"grad_norm": 0.5586313009262085, |
|
"learning_rate": 0.00012788571428571426, |
|
"loss": 0.4023, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.400734731363845, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.3491, |
|
"eval_samples_per_second": 22.058, |
|
"eval_steps_per_second": 2.758, |
|
"eval_wer": 0.4479585360693886, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.419868360630645, |
|
"grad_norm": 0.6510444283485413, |
|
"learning_rate": 0.00012713383458646616, |
|
"loss": 0.4041, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 4.439001989897443, |
|
"grad_norm": 0.6380518674850464, |
|
"learning_rate": 0.00012638195488721802, |
|
"loss": 0.4026, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.439001989897443, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.4508, |
|
"eval_samples_per_second": 22.051, |
|
"eval_steps_per_second": 2.757, |
|
"eval_wer": 0.45242754389676326, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.458135619164243, |
|
"grad_norm": 0.7297781109809875, |
|
"learning_rate": 0.00012563157894736842, |
|
"loss": 0.4124, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 4.477269248431043, |
|
"grad_norm": 0.9323301911354065, |
|
"learning_rate": 0.00012487969924812028, |
|
"loss": 0.4182, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.477269248431043, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.9248, |
|
"eval_samples_per_second": 22.157, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.44729744023693674, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.496402877697841, |
|
"grad_norm": 0.7702882289886475, |
|
"learning_rate": 0.00012412781954887218, |
|
"loss": 0.4137, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 4.515536506964641, |
|
"grad_norm": 0.45166295766830444, |
|
"learning_rate": 0.00012337593984962405, |
|
"loss": 0.4046, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.515536506964641, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.8786, |
|
"eval_samples_per_second": 22.372, |
|
"eval_steps_per_second": 2.797, |
|
"eval_wer": 0.44563147873915804, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.5346701362314406, |
|
"grad_norm": 0.8825483322143555, |
|
"learning_rate": 0.00012262406015037594, |
|
"loss": 0.4001, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 4.553803765498239, |
|
"grad_norm": 0.5982456207275391, |
|
"learning_rate": 0.00012187218045112781, |
|
"loss": 0.4126, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.553803765498239, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.2679, |
|
"eval_samples_per_second": 22.415, |
|
"eval_steps_per_second": 2.803, |
|
"eval_wer": 0.4406071504125238, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.572937394765039, |
|
"grad_norm": 0.49651646614074707, |
|
"learning_rate": 0.00012112030075187969, |
|
"loss": 0.3899, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 4.5920710240318385, |
|
"grad_norm": 0.6609870195388794, |
|
"learning_rate": 0.00012036842105263157, |
|
"loss": 0.4144, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.5920710240318385, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.9675, |
|
"eval_samples_per_second": 22.365, |
|
"eval_steps_per_second": 2.796, |
|
"eval_wer": 0.44486460757351387, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.611204653298637, |
|
"grad_norm": 0.8339817523956299, |
|
"learning_rate": 0.00011961804511278195, |
|
"loss": 0.4067, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 4.630338282565437, |
|
"grad_norm": 0.683476984500885, |
|
"learning_rate": 0.00011886766917293232, |
|
"loss": 0.4074, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.630338282565437, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.2537, |
|
"eval_samples_per_second": 22.345, |
|
"eval_steps_per_second": 2.794, |
|
"eval_wer": 0.44750899090332136, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.6494719118322365, |
|
"grad_norm": 0.6157557964324951, |
|
"learning_rate": 0.0001181172932330827, |
|
"loss": 0.3947, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 4.668605541099035, |
|
"grad_norm": 0.7728904485702515, |
|
"learning_rate": 0.00011736541353383457, |
|
"loss": 0.3922, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.668605541099035, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 312.5029, |
|
"eval_samples_per_second": 22.47, |
|
"eval_steps_per_second": 2.81, |
|
"eval_wer": 0.4387693039983076, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.687739170365835, |
|
"grad_norm": 0.6580181121826172, |
|
"learning_rate": 0.00011661353383458646, |
|
"loss": 0.3969, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 4.7068727996326345, |
|
"grad_norm": 3.9013619422912598, |
|
"learning_rate": 0.00011586165413533834, |
|
"loss": 0.3866, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 4.7068727996326345, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 312.7382, |
|
"eval_samples_per_second": 22.453, |
|
"eval_steps_per_second": 2.807, |
|
"eval_wer": 0.44741643748677806, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 4.726006428899433, |
|
"grad_norm": 0.7487606406211853, |
|
"learning_rate": 0.00011510977443609022, |
|
"loss": 0.4057, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 4.745140058166233, |
|
"grad_norm": 0.9207865595817566, |
|
"learning_rate": 0.0001143578947368421, |
|
"loss": 0.3873, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 4.745140058166233, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 312.9132, |
|
"eval_samples_per_second": 22.441, |
|
"eval_steps_per_second": 2.806, |
|
"eval_wer": 0.4344986249206685, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 4.7642736874330325, |
|
"grad_norm": 0.6548067331314087, |
|
"learning_rate": 0.00011360601503759398, |
|
"loss": 0.4058, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 4.783407316699831, |
|
"grad_norm": 0.97373366355896, |
|
"learning_rate": 0.00011285413533834586, |
|
"loss": 0.3917, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 4.783407316699831, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.7438, |
|
"eval_samples_per_second": 22.381, |
|
"eval_steps_per_second": 2.798, |
|
"eval_wer": 0.4338243071715676, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 4.802540945966631, |
|
"grad_norm": 0.7585910558700562, |
|
"learning_rate": 0.00011210375939849623, |
|
"loss": 0.4095, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 4.8216745752334305, |
|
"grad_norm": 1.3345550298690796, |
|
"learning_rate": 0.0001113533834586466, |
|
"loss": 0.3864, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 4.8216745752334305, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 312.643, |
|
"eval_samples_per_second": 22.46, |
|
"eval_steps_per_second": 2.808, |
|
"eval_wer": 0.4350803892532262, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 4.840808204500229, |
|
"grad_norm": 0.6124061346054077, |
|
"learning_rate": 0.00011060150375939849, |
|
"loss": 0.3909, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 4.859941833767029, |
|
"grad_norm": 0.7190678119659424, |
|
"learning_rate": 0.00010984962406015037, |
|
"loss": 0.3826, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 4.859941833767029, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 312.9649, |
|
"eval_samples_per_second": 22.437, |
|
"eval_steps_per_second": 2.805, |
|
"eval_wer": 0.430783266342289, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 4.879075463033828, |
|
"grad_norm": 0.9486848711967468, |
|
"learning_rate": 0.00010909774436090225, |
|
"loss": 0.3974, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 4.898209092300627, |
|
"grad_norm": 3.170478343963623, |
|
"learning_rate": 0.00010834736842105263, |
|
"loss": 0.391, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 4.898209092300627, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.5327, |
|
"eval_samples_per_second": 22.396, |
|
"eval_steps_per_second": 2.8, |
|
"eval_wer": 0.431497249841337, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 4.917342721567427, |
|
"grad_norm": 0.462166428565979, |
|
"learning_rate": 0.000107596992481203, |
|
"loss": 0.3817, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 4.936476350834226, |
|
"grad_norm": 1.1014796495437622, |
|
"learning_rate": 0.00010684511278195487, |
|
"loss": 0.394, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 4.936476350834226, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.0162, |
|
"eval_samples_per_second": 22.433, |
|
"eval_steps_per_second": 2.805, |
|
"eval_wer": 0.42787444467950075, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 4.955609980101025, |
|
"grad_norm": 0.624528706073761, |
|
"learning_rate": 0.00010609323308270676, |
|
"loss": 0.3821, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 4.974743609367825, |
|
"grad_norm": 0.5960122346878052, |
|
"learning_rate": 0.00010534135338345864, |
|
"loss": 0.3957, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 4.974743609367825, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.106, |
|
"eval_samples_per_second": 22.427, |
|
"eval_steps_per_second": 2.804, |
|
"eval_wer": 0.4235112121853184, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 4.993877238634624, |
|
"grad_norm": 0.7740840911865234, |
|
"learning_rate": 0.0001045894736842105, |
|
"loss": 0.4012, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 5.013010867901423, |
|
"grad_norm": 0.39604371786117554, |
|
"learning_rate": 0.00010383759398496239, |
|
"loss": 0.3515, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.013010867901423, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 312.7575, |
|
"eval_samples_per_second": 22.452, |
|
"eval_steps_per_second": 2.807, |
|
"eval_wer": 0.42159403427120795, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.032144497168223, |
|
"grad_norm": 0.582255482673645, |
|
"learning_rate": 0.00010308571428571427, |
|
"loss": 0.332, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 5.051278126435022, |
|
"grad_norm": 1.186954379081726, |
|
"learning_rate": 0.00010233383458646615, |
|
"loss": 0.3389, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.051278126435022, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 312.5025, |
|
"eval_samples_per_second": 22.47, |
|
"eval_steps_per_second": 2.81, |
|
"eval_wer": 0.42548127776602496, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.070411755701821, |
|
"grad_norm": 0.7782790064811707, |
|
"learning_rate": 0.00010158195488721803, |
|
"loss": 0.3352, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 5.089545384968621, |
|
"grad_norm": 0.5987495183944702, |
|
"learning_rate": 0.00010083157894736841, |
|
"loss": 0.333, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.089545384968621, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 514.1295, |
|
"eval_samples_per_second": 13.658, |
|
"eval_steps_per_second": 1.708, |
|
"eval_wer": 0.42526972709964034, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.10867901423542, |
|
"grad_norm": 0.714470624923706, |
|
"learning_rate": 0.0001000812030075188, |
|
"loss": 0.3334, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 5.127812643502219, |
|
"grad_norm": 0.554375171661377, |
|
"learning_rate": 9.932932330827067e-05, |
|
"loss": 0.3313, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.127812643502219, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 313.7907, |
|
"eval_samples_per_second": 22.378, |
|
"eval_steps_per_second": 2.798, |
|
"eval_wer": 0.4178390099428813, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.146946272769019, |
|
"grad_norm": 0.8462457060813904, |
|
"learning_rate": 9.857744360902255e-05, |
|
"loss": 0.3391, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 5.166079902035818, |
|
"grad_norm": 0.5063708424568176, |
|
"learning_rate": 9.782556390977443e-05, |
|
"loss": 0.3351, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.166079902035818, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.6864, |
|
"eval_samples_per_second": 22.314, |
|
"eval_steps_per_second": 2.79, |
|
"eval_wer": 0.422321239686905, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.185213531302617, |
|
"grad_norm": 0.5310961008071899, |
|
"learning_rate": 9.707368421052631e-05, |
|
"loss": 0.3435, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 5.204347160569417, |
|
"grad_norm": 0.7004749774932861, |
|
"learning_rate": 9.63218045112782e-05, |
|
"loss": 0.3262, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.204347160569417, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.217, |
|
"eval_samples_per_second": 22.348, |
|
"eval_steps_per_second": 2.794, |
|
"eval_wer": 0.416318489528242, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.223480789836216, |
|
"grad_norm": 0.5730725526809692, |
|
"learning_rate": 9.556992481203008e-05, |
|
"loss": 0.333, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 5.242614419103015, |
|
"grad_norm": 1.6563917398452759, |
|
"learning_rate": 9.481804511278196e-05, |
|
"loss": 0.3333, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.242614419103015, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.3953, |
|
"eval_samples_per_second": 22.264, |
|
"eval_steps_per_second": 2.784, |
|
"eval_wer": 0.42156759043790987, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.261748048369815, |
|
"grad_norm": 0.5844420790672302, |
|
"learning_rate": 9.406616541353384e-05, |
|
"loss": 0.3369, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 5.280881677636614, |
|
"grad_norm": 0.6321229934692383, |
|
"learning_rate": 9.331428571428571e-05, |
|
"loss": 0.3229, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.280881677636614, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.3276, |
|
"eval_samples_per_second": 22.269, |
|
"eval_steps_per_second": 2.784, |
|
"eval_wer": 0.41326422678231434, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.300015306903413, |
|
"grad_norm": 0.6538165807723999, |
|
"learning_rate": 9.256240601503759e-05, |
|
"loss": 0.3382, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 0.7837240099906921, |
|
"learning_rate": 9.181052631578947e-05, |
|
"loss": 0.3345, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2249, |
|
"eval_samples_per_second": 22.206, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.4136079966151893, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.338282565437012, |
|
"grad_norm": 0.8858345150947571, |
|
"learning_rate": 9.106015037593984e-05, |
|
"loss": 0.3293, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 5.357416194703811, |
|
"grad_norm": 1.3112056255340576, |
|
"learning_rate": 9.030827067669172e-05, |
|
"loss": 0.3365, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.357416194703811, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.1322, |
|
"eval_samples_per_second": 22.212, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.4193330865242226, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.376549823970611, |
|
"grad_norm": 0.720756471157074, |
|
"learning_rate": 8.95563909774436e-05, |
|
"loss": 0.3318, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 5.39568345323741, |
|
"grad_norm": 0.46977701783180237, |
|
"learning_rate": 8.880451127819548e-05, |
|
"loss": 0.3165, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.39568345323741, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.295, |
|
"eval_samples_per_second": 22.342, |
|
"eval_steps_per_second": 2.794, |
|
"eval_wer": 0.4112148297017136, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.414817082504209, |
|
"grad_norm": 0.7260543704032898, |
|
"learning_rate": 8.805263157894736e-05, |
|
"loss": 0.337, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 5.433950711771009, |
|
"grad_norm": 1.3891347646713257, |
|
"learning_rate": 8.730075187969924e-05, |
|
"loss": 0.3224, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.433950711771009, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.0255, |
|
"eval_samples_per_second": 22.29, |
|
"eval_steps_per_second": 2.787, |
|
"eval_wer": 0.40745980537338694, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.453084341037808, |
|
"grad_norm": 0.4979989528656006, |
|
"learning_rate": 8.654887218045112e-05, |
|
"loss": 0.3341, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 5.472217970304607, |
|
"grad_norm": 0.9820772409439087, |
|
"learning_rate": 8.579699248120299e-05, |
|
"loss": 0.335, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 5.472217970304607, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.1397, |
|
"eval_samples_per_second": 22.353, |
|
"eval_steps_per_second": 2.795, |
|
"eval_wer": 0.4112941612016078, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 5.491351599571407, |
|
"grad_norm": 1.6596304178237915, |
|
"learning_rate": 8.504511278195487e-05, |
|
"loss": 0.3243, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 5.510485228838206, |
|
"grad_norm": 0.687848687171936, |
|
"learning_rate": 8.429323308270675e-05, |
|
"loss": 0.3377, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 5.510485228838206, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.8491, |
|
"eval_samples_per_second": 22.232, |
|
"eval_steps_per_second": 2.78, |
|
"eval_wer": 0.41762745927649675, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 5.529618858105005, |
|
"grad_norm": 1.0538053512573242, |
|
"learning_rate": 8.354135338345864e-05, |
|
"loss": 0.3195, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 5.548752487371805, |
|
"grad_norm": 0.6369953751564026, |
|
"learning_rate": 8.278947368421052e-05, |
|
"loss": 0.3411, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 5.548752487371805, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.0558, |
|
"eval_samples_per_second": 22.218, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.4091125449545166, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 5.567886116638604, |
|
"grad_norm": 1.193766474723816, |
|
"learning_rate": 8.20390977443609e-05, |
|
"loss": 0.3385, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 5.587019745905403, |
|
"grad_norm": 0.5476765632629395, |
|
"learning_rate": 8.128721804511278e-05, |
|
"loss": 0.3247, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 5.587019745905403, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.7439, |
|
"eval_samples_per_second": 22.24, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.4096414216204781, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 5.606153375172203, |
|
"grad_norm": 0.9929884076118469, |
|
"learning_rate": 8.053533834586466e-05, |
|
"loss": 0.3305, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 5.625287004439002, |
|
"grad_norm": 0.3942908048629761, |
|
"learning_rate": 7.978345864661654e-05, |
|
"loss": 0.3304, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 5.625287004439002, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.4131, |
|
"eval_samples_per_second": 22.334, |
|
"eval_steps_per_second": 2.793, |
|
"eval_wer": 0.40843822720541567, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 5.644420633705801, |
|
"grad_norm": 0.7728341221809387, |
|
"learning_rate": 7.903157894736842e-05, |
|
"loss": 0.3301, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 5.663554262972601, |
|
"grad_norm": 1.4606784582138062, |
|
"learning_rate": 7.82796992481203e-05, |
|
"loss": 0.3267, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 5.663554262972601, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.4256, |
|
"eval_samples_per_second": 22.333, |
|
"eval_steps_per_second": 2.792, |
|
"eval_wer": 0.40423365771102177, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 5.6826878922394, |
|
"grad_norm": 0.6086077690124512, |
|
"learning_rate": 7.752781954887217e-05, |
|
"loss": 0.3195, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 5.701821521506199, |
|
"grad_norm": 0.5943909883499146, |
|
"learning_rate": 7.677593984962405e-05, |
|
"loss": 0.3193, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 5.701821521506199, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.9231, |
|
"eval_samples_per_second": 22.298, |
|
"eval_steps_per_second": 2.788, |
|
"eval_wer": 0.4026470277131373, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 5.7209551507729985, |
|
"grad_norm": 0.5374177098274231, |
|
"learning_rate": 7.602556390977442e-05, |
|
"loss": 0.3374, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 5.740088780039798, |
|
"grad_norm": 0.675542950630188, |
|
"learning_rate": 7.52736842105263e-05, |
|
"loss": 0.3155, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 5.740088780039798, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.593, |
|
"eval_samples_per_second": 22.18, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.40481542204357945, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 5.759222409306597, |
|
"grad_norm": 1.1648385524749756, |
|
"learning_rate": 7.45218045112782e-05, |
|
"loss": 0.3247, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 5.7783560385733965, |
|
"grad_norm": 0.6115811467170715, |
|
"learning_rate": 7.376992481203008e-05, |
|
"loss": 0.3238, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 5.7783560385733965, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.3837, |
|
"eval_samples_per_second": 22.055, |
|
"eval_steps_per_second": 2.758, |
|
"eval_wer": 0.40333456737888723, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 5.797489667840196, |
|
"grad_norm": 1.0145585536956787, |
|
"learning_rate": 7.301804511278196e-05, |
|
"loss": 0.3175, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 5.816623297106995, |
|
"grad_norm": 0.7065938115119934, |
|
"learning_rate": 7.226766917293232e-05, |
|
"loss": 0.3172, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 5.816623297106995, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.6878, |
|
"eval_samples_per_second": 22.034, |
|
"eval_steps_per_second": 2.755, |
|
"eval_wer": 0.40486830971017557, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 5.8357569263737945, |
|
"grad_norm": 0.7309425473213196, |
|
"learning_rate": 7.15172932330827e-05, |
|
"loss": 0.3229, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 5.854890555640594, |
|
"grad_norm": 1.0197374820709229, |
|
"learning_rate": 7.076541353383458e-05, |
|
"loss": 0.3148, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 5.854890555640594, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 323.1191, |
|
"eval_samples_per_second": 21.732, |
|
"eval_steps_per_second": 2.717, |
|
"eval_wer": 0.3989052253014597, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 5.874024184907393, |
|
"grad_norm": 0.7352388501167297, |
|
"learning_rate": 7.001503759398496e-05, |
|
"loss": 0.313, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 5.8931578141741925, |
|
"grad_norm": 0.6405870914459229, |
|
"learning_rate": 6.926315789473684e-05, |
|
"loss": 0.3217, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 5.8931578141741925, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 319.2909, |
|
"eval_samples_per_second": 21.992, |
|
"eval_steps_per_second": 2.75, |
|
"eval_wer": 0.39782102813623865, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 5.912291443440992, |
|
"grad_norm": 0.711800754070282, |
|
"learning_rate": 6.851127819548872e-05, |
|
"loss": 0.3206, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 5.931425072707791, |
|
"grad_norm": 0.41337111592292786, |
|
"learning_rate": 6.77593984962406e-05, |
|
"loss": 0.3145, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 5.931425072707791, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.549, |
|
"eval_samples_per_second": 22.044, |
|
"eval_steps_per_second": 2.756, |
|
"eval_wer": 0.39298180664269095, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 5.9505587019745905, |
|
"grad_norm": 0.3790297210216522, |
|
"learning_rate": 6.700751879699248e-05, |
|
"loss": 0.3185, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 5.96969233124139, |
|
"grad_norm": 0.7092337012290955, |
|
"learning_rate": 6.625563909774435e-05, |
|
"loss": 0.3178, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 5.96969233124139, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.815, |
|
"eval_samples_per_second": 22.025, |
|
"eval_steps_per_second": 2.754, |
|
"eval_wer": 0.39946054580071927, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 5.988825960508189, |
|
"grad_norm": 1.5943549871444702, |
|
"learning_rate": 6.550375939849623e-05, |
|
"loss": 0.3154, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 6.0079595897749885, |
|
"grad_norm": 1.1709485054016113, |
|
"learning_rate": 6.475187969924812e-05, |
|
"loss": 0.2895, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 6.0079595897749885, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.8329, |
|
"eval_samples_per_second": 22.024, |
|
"eval_steps_per_second": 2.754, |
|
"eval_wer": 0.39980431563359425, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 6.027093219041788, |
|
"grad_norm": 0.6642709374427795, |
|
"learning_rate": 6.4e-05, |
|
"loss": 0.2714, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 6.046226848308587, |
|
"grad_norm": 0.7695789337158203, |
|
"learning_rate": 6.324812030075188e-05, |
|
"loss": 0.269, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 6.046226848308587, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.0108, |
|
"eval_samples_per_second": 22.081, |
|
"eval_steps_per_second": 2.761, |
|
"eval_wer": 0.3926248148931669, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 6.065360477575386, |
|
"grad_norm": 0.6437819600105286, |
|
"learning_rate": 6.249624060150375e-05, |
|
"loss": 0.2651, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 6.084494106842186, |
|
"grad_norm": 0.9133914113044739, |
|
"learning_rate": 6.174436090225563e-05, |
|
"loss": 0.2757, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 6.084494106842186, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.9915, |
|
"eval_samples_per_second": 22.082, |
|
"eval_steps_per_second": 2.761, |
|
"eval_wer": 0.39225460122699385, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 6.103627736108985, |
|
"grad_norm": 0.8651337623596191, |
|
"learning_rate": 6.099398496240601e-05, |
|
"loss": 0.2763, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 6.122761365375784, |
|
"grad_norm": 0.6800199151039124, |
|
"learning_rate": 6.0243609022556384e-05, |
|
"loss": 0.2573, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 6.122761365375784, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.7524, |
|
"eval_samples_per_second": 22.03, |
|
"eval_steps_per_second": 2.754, |
|
"eval_wer": 0.3906283054791623, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 6.141894994642584, |
|
"grad_norm": 0.7264246940612793, |
|
"learning_rate": 5.9491729323308265e-05, |
|
"loss": 0.2632, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 6.161028623909383, |
|
"grad_norm": 0.2874845862388611, |
|
"learning_rate": 5.873984962406015e-05, |
|
"loss": 0.2666, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 6.161028623909383, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.0588, |
|
"eval_samples_per_second": 22.078, |
|
"eval_steps_per_second": 2.76, |
|
"eval_wer": 0.38831447006558073, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 6.180162253176182, |
|
"grad_norm": 0.36712953448295593, |
|
"learning_rate": 5.798796992481202e-05, |
|
"loss": 0.2719, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 6.199295882442982, |
|
"grad_norm": 1.4700485467910767, |
|
"learning_rate": 5.72360902255639e-05, |
|
"loss": 0.2691, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 6.199295882442982, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.6549, |
|
"eval_samples_per_second": 22.036, |
|
"eval_steps_per_second": 2.755, |
|
"eval_wer": 0.391950497144066, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 6.218429511709781, |
|
"grad_norm": 0.664314866065979, |
|
"learning_rate": 5.6484210526315785e-05, |
|
"loss": 0.2734, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 6.23756314097658, |
|
"grad_norm": 0.7786546945571899, |
|
"learning_rate": 5.5732330827067666e-05, |
|
"loss": 0.2699, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 6.23756314097658, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 323.3374, |
|
"eval_samples_per_second": 21.717, |
|
"eval_steps_per_second": 2.715, |
|
"eval_wer": 0.39616828855510894, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 6.25669677024338, |
|
"grad_norm": 1.2886419296264648, |
|
"learning_rate": 5.498045112781954e-05, |
|
"loss": 0.2702, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 6.275830399510179, |
|
"grad_norm": 0.6407492160797119, |
|
"learning_rate": 5.422857142857142e-05, |
|
"loss": 0.259, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 6.275830399510179, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 318.7872, |
|
"eval_samples_per_second": 22.027, |
|
"eval_steps_per_second": 2.754, |
|
"eval_wer": 0.39016553839644597, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 6.294964028776978, |
|
"grad_norm": 0.36012986302375793, |
|
"learning_rate": 5.3476691729323304e-05, |
|
"loss": 0.2591, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 6.314097658043778, |
|
"grad_norm": 0.5582063794136047, |
|
"learning_rate": 5.2724812030075185e-05, |
|
"loss": 0.2707, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 6.314097658043778, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.8204, |
|
"eval_samples_per_second": 22.094, |
|
"eval_steps_per_second": 2.763, |
|
"eval_wer": 0.38777237148297017, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 6.333231287310577, |
|
"grad_norm": 0.5500897765159607, |
|
"learning_rate": 5.197293233082706e-05, |
|
"loss": 0.2574, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 6.352364916577376, |
|
"grad_norm": 0.5922083854675293, |
|
"learning_rate": 5.122105263157894e-05, |
|
"loss": 0.265, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 6.352364916577376, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.5005, |
|
"eval_samples_per_second": 22.186, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 0.38557753331922995, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 6.371498545844176, |
|
"grad_norm": 0.5268240571022034, |
|
"learning_rate": 5.046917293233082e-05, |
|
"loss": 0.2693, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 6.390632175110975, |
|
"grad_norm": 2.8765857219696045, |
|
"learning_rate": 4.9717293233082705e-05, |
|
"loss": 0.2657, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 6.390632175110975, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.5854, |
|
"eval_samples_per_second": 22.111, |
|
"eval_steps_per_second": 2.765, |
|
"eval_wer": 0.38506187856991747, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 6.409765804377774, |
|
"grad_norm": 0.6294525265693665, |
|
"learning_rate": 4.896541353383458e-05, |
|
"loss": 0.2634, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 6.428899433644574, |
|
"grad_norm": 1.270578384399414, |
|
"learning_rate": 4.821503759398496e-05, |
|
"loss": 0.2625, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 6.428899433644574, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.2046, |
|
"eval_samples_per_second": 22.137, |
|
"eval_steps_per_second": 2.768, |
|
"eval_wer": 0.3841495663211339, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 6.448033062911373, |
|
"grad_norm": 1.4967974424362183, |
|
"learning_rate": 4.746315789473684e-05, |
|
"loss": 0.2679, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 6.467166692178172, |
|
"grad_norm": 0.8087161779403687, |
|
"learning_rate": 4.671278195488721e-05, |
|
"loss": 0.2615, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 6.467166692178172, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.9216, |
|
"eval_samples_per_second": 22.157, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.3831975883224032, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 6.486300321444972, |
|
"grad_norm": 0.5716475248336792, |
|
"learning_rate": 4.596090225563909e-05, |
|
"loss": 0.2507, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 6.505433950711771, |
|
"grad_norm": 0.7699230909347534, |
|
"learning_rate": 4.520902255639097e-05, |
|
"loss": 0.2629, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 6.505433950711771, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.644, |
|
"eval_samples_per_second": 22.107, |
|
"eval_steps_per_second": 2.764, |
|
"eval_wer": 0.38342236090543685, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 6.52456757997857, |
|
"grad_norm": 0.8144583702087402, |
|
"learning_rate": 4.445714285714285e-05, |
|
"loss": 0.2646, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 6.54370120924537, |
|
"grad_norm": 0.9461275339126587, |
|
"learning_rate": 4.370526315789473e-05, |
|
"loss": 0.276, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 6.54370120924537, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.3907, |
|
"eval_samples_per_second": 22.194, |
|
"eval_steps_per_second": 2.775, |
|
"eval_wer": 0.38307859107256187, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 6.562834838512169, |
|
"grad_norm": 0.5300208926200867, |
|
"learning_rate": 4.295338345864661e-05, |
|
"loss": 0.2657, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 6.581968467778968, |
|
"grad_norm": 0.611358106136322, |
|
"learning_rate": 4.220150375939849e-05, |
|
"loss": 0.2623, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 6.581968467778968, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.1573, |
|
"eval_samples_per_second": 22.21, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.38129363232494184, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 6.601102097045768, |
|
"grad_norm": 0.8705514073371887, |
|
"learning_rate": 4.144962406015037e-05, |
|
"loss": 0.2614, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 6.620235726312567, |
|
"grad_norm": 0.4813309609889984, |
|
"learning_rate": 4.069924812030075e-05, |
|
"loss": 0.27, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 6.620235726312567, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2461, |
|
"eval_samples_per_second": 22.204, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.3814787391580283, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 6.639369355579366, |
|
"grad_norm": 2.4199442863464355, |
|
"learning_rate": 3.994736842105263e-05, |
|
"loss": 0.261, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 6.658502984846166, |
|
"grad_norm": 0.6200481653213501, |
|
"learning_rate": 3.919548872180451e-05, |
|
"loss": 0.2712, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 6.658502984846166, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.1791, |
|
"eval_samples_per_second": 22.139, |
|
"eval_steps_per_second": 2.768, |
|
"eval_wer": 0.3812143008250476, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 6.677636614112965, |
|
"grad_norm": 0.2569734454154968, |
|
"learning_rate": 3.844360902255639e-05, |
|
"loss": 0.2614, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 6.696770243379764, |
|
"grad_norm": 0.44579431414604187, |
|
"learning_rate": 3.769172932330827e-05, |
|
"loss": 0.263, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 6.696770243379764, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.5662, |
|
"eval_samples_per_second": 22.182, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 0.38161095832451875, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 6.715903872646564, |
|
"grad_norm": 0.38563570380210876, |
|
"learning_rate": 3.6939849624060146e-05, |
|
"loss": 0.2488, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 6.735037501913363, |
|
"grad_norm": 0.5862724781036377, |
|
"learning_rate": 3.6189473684210524e-05, |
|
"loss": 0.2616, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 6.735037501913363, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2674, |
|
"eval_samples_per_second": 22.203, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.379574783160567, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 6.754171131180162, |
|
"grad_norm": 0.5157662034034729, |
|
"learning_rate": 3.5437593984962405e-05, |
|
"loss": 0.2525, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 6.773304760446962, |
|
"grad_norm": 0.807600200176239, |
|
"learning_rate": 3.4687218045112776e-05, |
|
"loss": 0.253, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 6.773304760446962, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2322, |
|
"eval_samples_per_second": 22.205, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.3794161201607785, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 6.792438389713761, |
|
"grad_norm": 0.4601055383682251, |
|
"learning_rate": 3.393533834586466e-05, |
|
"loss": 0.2513, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 6.81157201898056, |
|
"grad_norm": 0.4807584881782532, |
|
"learning_rate": 3.3184962406015036e-05, |
|
"loss": 0.2572, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 6.81157201898056, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.7771, |
|
"eval_samples_per_second": 22.308, |
|
"eval_steps_per_second": 2.789, |
|
"eval_wer": 0.38285381848952826, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 6.83070564824736, |
|
"grad_norm": 2.668820381164551, |
|
"learning_rate": 3.243308270676692e-05, |
|
"loss": 0.2522, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 6.8498392775141586, |
|
"grad_norm": 0.5270944833755493, |
|
"learning_rate": 3.168270676691729e-05, |
|
"loss": 0.2517, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 6.8498392775141586, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 314.726, |
|
"eval_samples_per_second": 22.311, |
|
"eval_steps_per_second": 2.79, |
|
"eval_wer": 0.37728739158028346, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 6.868972906780958, |
|
"grad_norm": 0.5133803486824036, |
|
"learning_rate": 3.093082706766917e-05, |
|
"loss": 0.2479, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 6.888106536047758, |
|
"grad_norm": 0.6608215570449829, |
|
"learning_rate": 3.0178947368421048e-05, |
|
"loss": 0.2471, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 6.888106536047758, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.1185, |
|
"eval_samples_per_second": 22.284, |
|
"eval_steps_per_second": 2.786, |
|
"eval_wer": 0.37833192299555746, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 6.9072401653145565, |
|
"grad_norm": 0.9923522472381592, |
|
"learning_rate": 2.942706766917293e-05, |
|
"loss": 0.2533, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 6.926373794581356, |
|
"grad_norm": 0.6495700478553772, |
|
"learning_rate": 2.8675187969924808e-05, |
|
"loss": 0.2441, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 6.926373794581356, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.6279, |
|
"eval_samples_per_second": 22.248, |
|
"eval_steps_per_second": 2.782, |
|
"eval_wer": 0.37630896974825473, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 6.945507423848156, |
|
"grad_norm": 0.5995193123817444, |
|
"learning_rate": 2.792330827067669e-05, |
|
"loss": 0.2509, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 6.9646410531149545, |
|
"grad_norm": 0.6942078471183777, |
|
"learning_rate": 2.7172932330827067e-05, |
|
"loss": 0.245, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 6.9646410531149545, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.6284, |
|
"eval_samples_per_second": 22.177, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.3749338904167548, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 6.983774682381754, |
|
"grad_norm": 0.7296892404556274, |
|
"learning_rate": 2.6421052631578945e-05, |
|
"loss": 0.2513, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 7.002908311648554, |
|
"grad_norm": 0.9255119562149048, |
|
"learning_rate": 2.5669172932330827e-05, |
|
"loss": 0.235, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 7.002908311648554, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.5684, |
|
"eval_samples_per_second": 22.252, |
|
"eval_steps_per_second": 2.782, |
|
"eval_wer": 0.3724085043367887, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 7.0220419409153525, |
|
"grad_norm": 0.6719674468040466, |
|
"learning_rate": 2.4917293233082705e-05, |
|
"loss": 0.2154, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 7.041175570182152, |
|
"grad_norm": 0.5619477033615112, |
|
"learning_rate": 2.4165413533834586e-05, |
|
"loss": 0.2281, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 7.041175570182152, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.4942, |
|
"eval_samples_per_second": 22.187, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 0.37427279458430296, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 7.060309199448952, |
|
"grad_norm": 0.7847068309783936, |
|
"learning_rate": 2.3413533834586465e-05, |
|
"loss": 0.2184, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 7.0794428287157505, |
|
"grad_norm": 0.7864698171615601, |
|
"learning_rate": 2.2661654135338346e-05, |
|
"loss": 0.2155, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 7.0794428287157505, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.7964, |
|
"eval_samples_per_second": 22.236, |
|
"eval_steps_per_second": 2.78, |
|
"eval_wer": 0.3742463507510049, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 7.09857645798255, |
|
"grad_norm": 0.6666255593299866, |
|
"learning_rate": 2.1909774436090224e-05, |
|
"loss": 0.2106, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 7.11771008724935, |
|
"grad_norm": 0.5075043439865112, |
|
"learning_rate": 2.1157894736842106e-05, |
|
"loss": 0.2177, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 7.11771008724935, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.0968, |
|
"eval_samples_per_second": 22.215, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.37367780833509623, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 7.1368437165161485, |
|
"grad_norm": 0.7393398284912109, |
|
"learning_rate": 2.0406015037593984e-05, |
|
"loss": 0.2208, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 7.155977345782948, |
|
"grad_norm": 0.3300219476222992, |
|
"learning_rate": 1.9654135338345865e-05, |
|
"loss": 0.2107, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 7.155977345782948, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.1907, |
|
"eval_samples_per_second": 22.279, |
|
"eval_steps_per_second": 2.786, |
|
"eval_wer": 0.37078220858895705, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 7.175110975049748, |
|
"grad_norm": 3.2175910472869873, |
|
"learning_rate": 1.8902255639097743e-05, |
|
"loss": 0.2032, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 7.194244604316546, |
|
"grad_norm": 0.28260278701782227, |
|
"learning_rate": 1.8151879699248118e-05, |
|
"loss": 0.2129, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 7.194244604316546, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.4927, |
|
"eval_samples_per_second": 22.257, |
|
"eval_steps_per_second": 2.783, |
|
"eval_wer": 0.37161518933784643, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 7.213378233583346, |
|
"grad_norm": 1.4389430284500122, |
|
"learning_rate": 1.74e-05, |
|
"loss": 0.221, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 7.232511862850146, |
|
"grad_norm": 0.5784205198287964, |
|
"learning_rate": 1.6648120300751878e-05, |
|
"loss": 0.2173, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 7.232511862850146, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.5523, |
|
"eval_samples_per_second": 22.113, |
|
"eval_steps_per_second": 2.765, |
|
"eval_wer": 0.36953934842394753, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 7.251645492116944, |
|
"grad_norm": 0.5264465808868408, |
|
"learning_rate": 1.589624060150376e-05, |
|
"loss": 0.2102, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 7.270779121383744, |
|
"grad_norm": 1.0469930171966553, |
|
"learning_rate": 1.5144360902255639e-05, |
|
"loss": 0.2145, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 7.270779121383744, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.5149, |
|
"eval_samples_per_second": 22.185, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 0.37215728792045694, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 7.289912750650544, |
|
"grad_norm": 0.43167009949684143, |
|
"learning_rate": 1.4392481203007517e-05, |
|
"loss": 0.2217, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 7.309046379917342, |
|
"grad_norm": 1.083001732826233, |
|
"learning_rate": 1.3640601503759397e-05, |
|
"loss": 0.2116, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 7.309046379917342, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.4082, |
|
"eval_samples_per_second": 22.263, |
|
"eval_steps_per_second": 2.784, |
|
"eval_wer": 0.37024011000634655, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 7.328180009184142, |
|
"grad_norm": 0.5683468580245972, |
|
"learning_rate": 1.2888721804511277e-05, |
|
"loss": 0.2085, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 7.347313638450942, |
|
"grad_norm": 0.45209017395973206, |
|
"learning_rate": 1.2136842105263156e-05, |
|
"loss": 0.212, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 7.347313638450942, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.0151, |
|
"eval_samples_per_second": 22.22, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.37038555108948595, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 7.36644726771774, |
|
"grad_norm": 1.5557799339294434, |
|
"learning_rate": 1.1384962406015036e-05, |
|
"loss": 0.2091, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 7.38558089698454, |
|
"grad_norm": 0.456394761800766, |
|
"learning_rate": 1.0633082706766916e-05, |
|
"loss": 0.2116, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 7.38558089698454, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.9212, |
|
"eval_samples_per_second": 22.157, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.37012111275650517, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 7.40471452625134, |
|
"grad_norm": 0.34570273756980896, |
|
"learning_rate": 9.881203007518796e-06, |
|
"loss": 0.2034, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 7.423848155518138, |
|
"grad_norm": 0.6514278054237366, |
|
"learning_rate": 9.129323308270676e-06, |
|
"loss": 0.2124, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 7.423848155518138, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.4723, |
|
"eval_samples_per_second": 22.259, |
|
"eval_steps_per_second": 2.783, |
|
"eval_wer": 0.36865348000846204, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 7.442981784784938, |
|
"grad_norm": 0.3534170091152191, |
|
"learning_rate": 8.378947368421052e-06, |
|
"loss": 0.2146, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 7.462115414051738, |
|
"grad_norm": 0.6505366563796997, |
|
"learning_rate": 7.627067669172932e-06, |
|
"loss": 0.2078, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 7.462115414051738, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.1812, |
|
"eval_samples_per_second": 22.209, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.3681113814258515, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 7.481249043318536, |
|
"grad_norm": 0.5068254470825195, |
|
"learning_rate": 6.8751879699248115e-06, |
|
"loss": 0.2097, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 7.500382672585336, |
|
"grad_norm": 0.32878100872039795, |
|
"learning_rate": 6.124812030075188e-06, |
|
"loss": 0.2158, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 7.500382672585336, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.2612, |
|
"eval_samples_per_second": 22.203, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.3682171567590438, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 7.519516301852136, |
|
"grad_norm": 0.9998613595962524, |
|
"learning_rate": 5.3729323308270675e-06, |
|
"loss": 0.2031, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 7.538649931118934, |
|
"grad_norm": 0.6963976621627808, |
|
"learning_rate": 4.622556390977443e-06, |
|
"loss": 0.2157, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 7.538649931118934, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.927, |
|
"eval_samples_per_second": 22.157, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.36727840067696216, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 7.557783560385734, |
|
"grad_norm": 0.6300442218780518, |
|
"learning_rate": 3.870676691729323e-06, |
|
"loss": 0.2082, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 7.576917189652534, |
|
"grad_norm": 0.3542906939983368, |
|
"learning_rate": 3.118796992481203e-06, |
|
"loss": 0.2045, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 7.576917189652534, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 317.109, |
|
"eval_samples_per_second": 22.144, |
|
"eval_steps_per_second": 2.769, |
|
"eval_wer": 0.3666834144277554, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 7.596050818919332, |
|
"grad_norm": 0.9721285700798035, |
|
"learning_rate": 2.366917293233083e-06, |
|
"loss": 0.209, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 7.615184448186132, |
|
"grad_norm": 0.8516126275062561, |
|
"learning_rate": 1.6165413533834587e-06, |
|
"loss": 0.2188, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 7.615184448186132, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 316.4647, |
|
"eval_samples_per_second": 22.189, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 0.36752961709329385, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 7.634318077452932, |
|
"grad_norm": 0.7036492824554443, |
|
"learning_rate": 8.646616541353383e-07, |
|
"loss": 0.2072, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 7.65345170671973, |
|
"grad_norm": 0.5840544104576111, |
|
"learning_rate": 1.1278195488721805e-07, |
|
"loss": 0.2041, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 7.65345170671973, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 315.8635, |
|
"eval_samples_per_second": 22.231, |
|
"eval_steps_per_second": 2.78, |
|
"eval_wer": 0.3670800719272266, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 7.65345170671973, |
|
"step": 200000, |
|
"total_flos": 1.6865103601997185e+21, |
|
"train_loss": 0.5320780529403687, |
|
"train_runtime": 333857.2134, |
|
"train_samples_per_second": 4.792, |
|
"train_steps_per_second": 0.599 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 200000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 4000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6865103601997185e+21, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|