jdannem6 commited on
Commit
6e9c399
·
verified ·
1 Parent(s): 0fe8e14

Uploaded checkpoint-20000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dabf74ae279de7b02efca8e86a8be7d6678c1fde1ca611cfcb013f2db9f286e
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1094cefddb8a4c25c681c6cde66e2e7b24fd394103df2badf5c69d6900ada43b
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc93622196a281534191fa7817fe5473a0ed384e0fb7c72748dc1413da4ed838
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:645d5b94ee5359b4733aca4181803ae6254706a9713eb85a854d8057e3a67182
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21391ce535d5dea001dfdd91c5b0f4da5fd63a663039abb0d77f3eadaeb8b4e4
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e33dda9942df9cbad9cd46793f638f52f82780e545c7592c3d1cbe682087eb0
3
+ size 14180
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d75cd0c4e544f7391f9754fd838738017fc0e36a7e8de482816ca502f9dc5c07
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29c7a79b53a589de48d3b7a21df9c0d024be4dea79f68869f72fdc01ae3b212a
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4375,
5
  "eval_steps": 2500,
6
- "global_step": 17500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1288,6 +1288,189 @@
1288
  "eval_samples_per_second": 9.594,
1289
  "eval_steps_per_second": 9.594,
1290
  "step": 17500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1291
  }
1292
  ],
1293
  "logging_steps": 100,
@@ -1295,7 +1478,7 @@
1295
  "num_input_tokens_seen": 0,
1296
  "num_train_epochs": 1,
1297
  "save_steps": 2500,
1298
- "total_flos": 2.8178720489472e+17,
1299
  "train_batch_size": 1,
1300
  "trial_name": null,
1301
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5,
5
  "eval_steps": 2500,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1288
  "eval_samples_per_second": 9.594,
1289
  "eval_steps_per_second": 9.594,
1290
  "step": 17500
1291
+ },
1292
+ {
1293
+ "epoch": 0.44,
1294
+ "grad_norm": 7.914300918579102,
1295
+ "learning_rate": 2.461538461538462e-06,
1296
+ "loss": 0.7651,
1297
+ "step": 17600
1298
+ },
1299
+ {
1300
+ "epoch": 0.44,
1301
+ "grad_norm": 4.809656620025635,
1302
+ "learning_rate": 2.358974358974359e-06,
1303
+ "loss": 0.7631,
1304
+ "step": 17700
1305
+ },
1306
+ {
1307
+ "epoch": 0.45,
1308
+ "grad_norm": 6.220585823059082,
1309
+ "learning_rate": 2.2564102564102566e-06,
1310
+ "loss": 0.7925,
1311
+ "step": 17800
1312
+ },
1313
+ {
1314
+ "epoch": 0.45,
1315
+ "grad_norm": 3.666391611099243,
1316
+ "learning_rate": 2.153846153846154e-06,
1317
+ "loss": 0.7857,
1318
+ "step": 17900
1319
+ },
1320
+ {
1321
+ "epoch": 0.45,
1322
+ "grad_norm": 5.744978427886963,
1323
+ "learning_rate": 2.0512820512820513e-06,
1324
+ "loss": 0.8025,
1325
+ "step": 18000
1326
+ },
1327
+ {
1328
+ "epoch": 0.45,
1329
+ "grad_norm": 5.490359783172607,
1330
+ "learning_rate": 1.948717948717949e-06,
1331
+ "loss": 0.8005,
1332
+ "step": 18100
1333
+ },
1334
+ {
1335
+ "epoch": 0.46,
1336
+ "grad_norm": 3.3625869750976562,
1337
+ "learning_rate": 1.8461538461538465e-06,
1338
+ "loss": 0.7753,
1339
+ "step": 18200
1340
+ },
1341
+ {
1342
+ "epoch": 0.46,
1343
+ "grad_norm": 13.186784744262695,
1344
+ "learning_rate": 1.7435897435897436e-06,
1345
+ "loss": 0.7705,
1346
+ "step": 18300
1347
+ },
1348
+ {
1349
+ "epoch": 0.46,
1350
+ "grad_norm": 2.9938299655914307,
1351
+ "learning_rate": 1.6410256410256412e-06,
1352
+ "loss": 0.7838,
1353
+ "step": 18400
1354
+ },
1355
+ {
1356
+ "epoch": 0.46,
1357
+ "grad_norm": 3.876194477081299,
1358
+ "learning_rate": 1.5384615384615387e-06,
1359
+ "loss": 0.7963,
1360
+ "step": 18500
1361
+ },
1362
+ {
1363
+ "epoch": 0.47,
1364
+ "grad_norm": 8.027066230773926,
1365
+ "learning_rate": 1.4358974358974359e-06,
1366
+ "loss": 0.7841,
1367
+ "step": 18600
1368
+ },
1369
+ {
1370
+ "epoch": 0.47,
1371
+ "grad_norm": 6.673095226287842,
1372
+ "learning_rate": 1.3333333333333334e-06,
1373
+ "loss": 0.7676,
1374
+ "step": 18700
1375
+ },
1376
+ {
1377
+ "epoch": 0.47,
1378
+ "grad_norm": 6.047390460968018,
1379
+ "learning_rate": 1.230769230769231e-06,
1380
+ "loss": 0.7792,
1381
+ "step": 18800
1382
+ },
1383
+ {
1384
+ "epoch": 0.47,
1385
+ "grad_norm": 3.341261625289917,
1386
+ "learning_rate": 1.1282051282051283e-06,
1387
+ "loss": 0.7712,
1388
+ "step": 18900
1389
+ },
1390
+ {
1391
+ "epoch": 0.47,
1392
+ "grad_norm": 9.690947532653809,
1393
+ "learning_rate": 1.0256410256410257e-06,
1394
+ "loss": 0.768,
1395
+ "step": 19000
1396
+ },
1397
+ {
1398
+ "epoch": 0.48,
1399
+ "grad_norm": 2.3877036571502686,
1400
+ "learning_rate": 9.230769230769232e-07,
1401
+ "loss": 0.786,
1402
+ "step": 19100
1403
+ },
1404
+ {
1405
+ "epoch": 0.48,
1406
+ "grad_norm": 5.060111045837402,
1407
+ "learning_rate": 8.205128205128206e-07,
1408
+ "loss": 0.7492,
1409
+ "step": 19200
1410
+ },
1411
+ {
1412
+ "epoch": 0.48,
1413
+ "grad_norm": 4.0241570472717285,
1414
+ "learning_rate": 7.179487179487179e-07,
1415
+ "loss": 0.7638,
1416
+ "step": 19300
1417
+ },
1418
+ {
1419
+ "epoch": 0.48,
1420
+ "grad_norm": 6.047507286071777,
1421
+ "learning_rate": 6.153846153846155e-07,
1422
+ "loss": 0.7702,
1423
+ "step": 19400
1424
+ },
1425
+ {
1426
+ "epoch": 0.49,
1427
+ "grad_norm": 4.642309665679932,
1428
+ "learning_rate": 5.128205128205128e-07,
1429
+ "loss": 0.7541,
1430
+ "step": 19500
1431
+ },
1432
+ {
1433
+ "epoch": 0.49,
1434
+ "grad_norm": 10.096720695495605,
1435
+ "learning_rate": 4.102564102564103e-07,
1436
+ "loss": 0.7686,
1437
+ "step": 19600
1438
+ },
1439
+ {
1440
+ "epoch": 0.49,
1441
+ "grad_norm": 11.970602035522461,
1442
+ "learning_rate": 3.0769230769230774e-07,
1443
+ "loss": 0.7619,
1444
+ "step": 19700
1445
+ },
1446
+ {
1447
+ "epoch": 0.49,
1448
+ "grad_norm": 6.973097801208496,
1449
+ "learning_rate": 2.0512820512820514e-07,
1450
+ "loss": 0.7798,
1451
+ "step": 19800
1452
+ },
1453
+ {
1454
+ "epoch": 0.5,
1455
+ "grad_norm": 4.512222766876221,
1456
+ "learning_rate": 1.0256410256410257e-07,
1457
+ "loss": 0.7444,
1458
+ "step": 19900
1459
+ },
1460
+ {
1461
+ "epoch": 0.5,
1462
+ "grad_norm": 3.21940541267395,
1463
+ "learning_rate": 0.0,
1464
+ "loss": 0.7902,
1465
+ "step": 20000
1466
+ },
1467
+ {
1468
+ "epoch": 0.5,
1469
+ "eval_loss": 0.7663924694061279,
1470
+ "eval_runtime": 104.2963,
1471
+ "eval_samples_per_second": 9.588,
1472
+ "eval_steps_per_second": 9.588,
1473
+ "step": 20000
1474
  }
1475
  ],
1476
  "logging_steps": 100,
 
1478
  "num_input_tokens_seen": 0,
1479
  "num_train_epochs": 1,
1480
  "save_steps": 2500,
1481
+ "total_flos": 3.2204251987968e+17,
1482
  "train_batch_size": 1,
1483
  "trial_name": null,
1484
  "trial_params": null