emilios commited on
Commit
e5f1061
·
1 Parent(s): 6a213a7

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 528.76,
3
  "eval_loss": 0.56689453125,
4
- "eval_runtime": 156.403,
5
- "eval_samples_per_second": 1.739,
6
- "eval_steps_per_second": 0.109,
7
  "eval_wer": 9.899702823179792,
8
- "train_loss": 0.00025936167769961886,
9
- "train_runtime": 28092.3517,
10
- "train_samples_per_second": 10.252,
11
- "train_steps_per_second": 0.32
12
  }
 
1
  {
2
+ "epoch": 587.82,
3
  "eval_loss": 0.56689453125,
4
+ "eval_runtime": 154.2304,
5
+ "eval_samples_per_second": 1.764,
6
+ "eval_steps_per_second": 0.11,
7
  "eval_wer": 9.899702823179792,
8
+ "train_loss": 1.4810419082641601e-05,
9
+ "train_runtime": 3866.6425,
10
+ "train_samples_per_second": 82.759,
11
+ "train_steps_per_second": 2.586
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 528.76,
3
  "eval_loss": 0.56689453125,
4
- "eval_runtime": 156.403,
5
- "eval_samples_per_second": 1.739,
6
- "eval_steps_per_second": 0.109,
7
  "eval_wer": 9.899702823179792
8
  }
 
1
  {
2
+ "epoch": 587.82,
3
  "eval_loss": 0.56689453125,
4
+ "eval_runtime": 154.2304,
5
+ "eval_samples_per_second": 1.764,
6
+ "eval_steps_per_second": 0.11,
7
  "eval_wer": 9.899702823179792
8
  }
runs/Dec23_07-12-50_129-146-176-120/events.out.tfevents.1671783758.129-146-176-120.858176.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00d6b23a4face7948649a31b01be2108827f32cf3dc24574e56cf90543af30e9
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 528.76,
3
- "train_loss": 0.00025936167769961886,
4
- "train_runtime": 28092.3517,
5
- "train_samples_per_second": 10.252,
6
- "train_steps_per_second": 0.32
7
  }
 
1
  {
2
+ "epoch": 587.82,
3
+ "train_loss": 1.4810419082641601e-05,
4
+ "train_runtime": 3866.6425,
5
+ "train_samples_per_second": 82.759,
6
+ "train_steps_per_second": 2.586
7
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 9.899702823179792,
3
  "best_model_checkpoint": "./checkpoint-8000",
4
- "epoch": 528.7647058823529,
5
- "global_step": 9000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2249,18 +2249,267 @@
2249
  "step": 9000
2250
  },
2251
  {
2252
- "epoch": 528.76,
2253
- "step": 9000,
2254
- "total_flos": 2.7722894647818505e+20,
2255
- "train_loss": 0.00025936167769961886,
2256
- "train_runtime": 28092.3517,
2257
- "train_samples_per_second": 10.252,
2258
- "train_steps_per_second": 0.32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2259
  }
2260
  ],
2261
- "max_steps": 9000,
2262
- "num_train_epochs": 530,
2263
- "total_flos": 2.7722894647818505e+20,
2264
  "trial_name": null,
2265
  "trial_params": null
2266
  }
 
1
  {
2
  "best_metric": 9.899702823179792,
3
  "best_model_checkpoint": "./checkpoint-8000",
4
+ "epoch": 587.8235294117648,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2249
  "step": 9000
2250
  },
2251
  {
2252
+ "epoch": 530.47,
2253
+ "learning_rate": 3.145263157894737e-07,
2254
+ "loss": 0.0001,
2255
+ "step": 9025
2256
+ },
2257
+ {
2258
+ "epoch": 531.94,
2259
+ "learning_rate": 3.0663157894736844e-07,
2260
+ "loss": 0.0002,
2261
+ "step": 9050
2262
+ },
2263
+ {
2264
+ "epoch": 533.41,
2265
+ "learning_rate": 2.987368421052632e-07,
2266
+ "loss": 0.0002,
2267
+ "step": 9075
2268
+ },
2269
+ {
2270
+ "epoch": 534.88,
2271
+ "learning_rate": 2.908421052631579e-07,
2272
+ "loss": 0.0001,
2273
+ "step": 9100
2274
+ },
2275
+ {
2276
+ "epoch": 536.35,
2277
+ "learning_rate": 2.829473684210526e-07,
2278
+ "loss": 0.0001,
2279
+ "step": 9125
2280
+ },
2281
+ {
2282
+ "epoch": 537.82,
2283
+ "learning_rate": 2.7505263157894737e-07,
2284
+ "loss": 0.0003,
2285
+ "step": 9150
2286
+ },
2287
+ {
2288
+ "epoch": 539.29,
2289
+ "learning_rate": 2.671578947368421e-07,
2290
+ "loss": 0.0002,
2291
+ "step": 9175
2292
+ },
2293
+ {
2294
+ "epoch": 540.76,
2295
+ "learning_rate": 2.5926315789473686e-07,
2296
+ "loss": 0.0002,
2297
+ "step": 9200
2298
+ },
2299
+ {
2300
+ "epoch": 542.24,
2301
+ "learning_rate": 2.513684210526316e-07,
2302
+ "loss": 0.0001,
2303
+ "step": 9225
2304
+ },
2305
+ {
2306
+ "epoch": 543.71,
2307
+ "learning_rate": 2.4347368421052635e-07,
2308
+ "loss": 0.0002,
2309
+ "step": 9250
2310
+ },
2311
+ {
2312
+ "epoch": 545.18,
2313
+ "learning_rate": 2.3557894736842104e-07,
2314
+ "loss": 0.0001,
2315
+ "step": 9275
2316
+ },
2317
+ {
2318
+ "epoch": 546.65,
2319
+ "learning_rate": 2.276842105263158e-07,
2320
+ "loss": 0.0002,
2321
+ "step": 9300
2322
+ },
2323
+ {
2324
+ "epoch": 548.12,
2325
+ "learning_rate": 2.197894736842105e-07,
2326
+ "loss": 0.0002,
2327
+ "step": 9325
2328
+ },
2329
+ {
2330
+ "epoch": 549.59,
2331
+ "learning_rate": 2.1189473684210525e-07,
2332
+ "loss": 0.0001,
2333
+ "step": 9350
2334
+ },
2335
+ {
2336
+ "epoch": 551.06,
2337
+ "learning_rate": 2.0400000000000003e-07,
2338
+ "loss": 0.0001,
2339
+ "step": 9375
2340
+ },
2341
+ {
2342
+ "epoch": 552.53,
2343
+ "learning_rate": 1.9610526315789472e-07,
2344
+ "loss": 0.0001,
2345
+ "step": 9400
2346
+ },
2347
+ {
2348
+ "epoch": 554.0,
2349
+ "learning_rate": 1.882105263157895e-07,
2350
+ "loss": 0.0002,
2351
+ "step": 9425
2352
+ },
2353
+ {
2354
+ "epoch": 555.47,
2355
+ "learning_rate": 1.803157894736842e-07,
2356
+ "loss": 0.0002,
2357
+ "step": 9450
2358
+ },
2359
+ {
2360
+ "epoch": 556.94,
2361
+ "learning_rate": 1.7242105263157896e-07,
2362
+ "loss": 0.0001,
2363
+ "step": 9475
2364
+ },
2365
+ {
2366
+ "epoch": 558.41,
2367
+ "learning_rate": 1.6452631578947368e-07,
2368
+ "loss": 0.0002,
2369
+ "step": 9500
2370
+ },
2371
+ {
2372
+ "epoch": 559.88,
2373
+ "learning_rate": 1.5663157894736842e-07,
2374
+ "loss": 0.0001,
2375
+ "step": 9525
2376
+ },
2377
+ {
2378
+ "epoch": 561.35,
2379
+ "learning_rate": 1.4873684210526317e-07,
2380
+ "loss": 0.0001,
2381
+ "step": 9550
2382
+ },
2383
+ {
2384
+ "epoch": 562.82,
2385
+ "learning_rate": 1.408421052631579e-07,
2386
+ "loss": 0.0001,
2387
+ "step": 9575
2388
+ },
2389
+ {
2390
+ "epoch": 564.29,
2391
+ "learning_rate": 1.3294736842105263e-07,
2392
+ "loss": 0.0002,
2393
+ "step": 9600
2394
+ },
2395
+ {
2396
+ "epoch": 565.76,
2397
+ "learning_rate": 1.2505263157894738e-07,
2398
+ "loss": 0.0001,
2399
+ "step": 9625
2400
+ },
2401
+ {
2402
+ "epoch": 567.24,
2403
+ "learning_rate": 1.1715789473684211e-07,
2404
+ "loss": 0.0002,
2405
+ "step": 9650
2406
+ },
2407
+ {
2408
+ "epoch": 568.71,
2409
+ "learning_rate": 1.0926315789473684e-07,
2410
+ "loss": 0.0001,
2411
+ "step": 9675
2412
+ },
2413
+ {
2414
+ "epoch": 570.18,
2415
+ "learning_rate": 1.0136842105263159e-07,
2416
+ "loss": 0.0001,
2417
+ "step": 9700
2418
+ },
2419
+ {
2420
+ "epoch": 571.65,
2421
+ "learning_rate": 9.347368421052632e-08,
2422
+ "loss": 0.0001,
2423
+ "step": 9725
2424
+ },
2425
+ {
2426
+ "epoch": 573.12,
2427
+ "learning_rate": 8.557894736842105e-08,
2428
+ "loss": 0.0001,
2429
+ "step": 9750
2430
+ },
2431
+ {
2432
+ "epoch": 574.59,
2433
+ "learning_rate": 7.768421052631578e-08,
2434
+ "loss": 0.0002,
2435
+ "step": 9775
2436
+ },
2437
+ {
2438
+ "epoch": 576.06,
2439
+ "learning_rate": 6.978947368421053e-08,
2440
+ "loss": 0.0001,
2441
+ "step": 9800
2442
+ },
2443
+ {
2444
+ "epoch": 577.53,
2445
+ "learning_rate": 6.189473684210526e-08,
2446
+ "loss": 0.0002,
2447
+ "step": 9825
2448
+ },
2449
+ {
2450
+ "epoch": 579.0,
2451
+ "learning_rate": 5.4e-08,
2452
+ "loss": 0.0001,
2453
+ "step": 9850
2454
+ },
2455
+ {
2456
+ "epoch": 580.47,
2457
+ "learning_rate": 4.610526315789473e-08,
2458
+ "loss": 0.0001,
2459
+ "step": 9875
2460
+ },
2461
+ {
2462
+ "epoch": 581.94,
2463
+ "learning_rate": 3.821052631578947e-08,
2464
+ "loss": 0.0002,
2465
+ "step": 9900
2466
+ },
2467
+ {
2468
+ "epoch": 583.41,
2469
+ "learning_rate": 3.031578947368421e-08,
2470
+ "loss": 0.0001,
2471
+ "step": 9925
2472
+ },
2473
+ {
2474
+ "epoch": 584.88,
2475
+ "learning_rate": 2.2421052631578946e-08,
2476
+ "loss": 0.0002,
2477
+ "step": 9950
2478
+ },
2479
+ {
2480
+ "epoch": 586.35,
2481
+ "learning_rate": 1.4526315789473685e-08,
2482
+ "loss": 0.0001,
2483
+ "step": 9975
2484
+ },
2485
+ {
2486
+ "epoch": 587.82,
2487
+ "learning_rate": 6.631578947368421e-09,
2488
+ "loss": 0.0001,
2489
+ "step": 10000
2490
+ },
2491
+ {
2492
+ "epoch": 587.82,
2493
+ "eval_loss": 0.5673828125,
2494
+ "eval_runtime": 153.8197,
2495
+ "eval_samples_per_second": 1.768,
2496
+ "eval_steps_per_second": 0.111,
2497
+ "eval_wer": 9.964710252600298,
2498
+ "step": 10000
2499
+ },
2500
+ {
2501
+ "epoch": 587.82,
2502
+ "step": 10000,
2503
+ "total_flos": 3.080532552348504e+20,
2504
+ "train_loss": 1.4810419082641601e-05,
2505
+ "train_runtime": 3866.6425,
2506
+ "train_samples_per_second": 82.759,
2507
+ "train_steps_per_second": 2.586
2508
  }
2509
  ],
2510
+ "max_steps": 10000,
2511
+ "num_train_epochs": 589,
2512
+ "total_flos": 3.080532552348504e+20,
2513
  "trial_name": null,
2514
  "trial_params": null
2515
  }