ben81828 commited on
Commit
cdbce0f
·
verified ·
1 Parent(s): 307133a

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a83521c4411b458bb33d399cce26d96c4156800b9fbac089cf4b485c06b1beb
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b99b8b6736c9551b7572fb855984029765a2e7449d447f3321e71447af523fc8
3
  size 18516456
last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb2fe97b72b18e1b06d790cd0048ed44a05a1e5a39d4ef283b3835be894019fc
3
+ size 27700976
last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33551976b42dd9fe9ff7141cf66ca30bcc7ede611d7f697fa57a7b782a8dd060
3
+ size 27700976
last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d240351e1d36e8d3995c129c11871b7fe39972e02236eef7f1b586ccda5015
3
+ size 27700976
last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9fe797e05e77dff6b139e49ef3269ae6a923d455e5b82b1a9324bbdb7b9dd0f
3
+ size 27700976
last-checkpoint/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466d2bd70556de01e77927d489b8b824cae46eb687bd72a33613389ace256129
3
+ size 411571
last-checkpoint/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4462a91000f87f6f68f470477a3b4b4797055c31343e143a43b66f698e7b122
3
+ size 411507
last-checkpoint/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93f71e7375d5c7fea0e0456caa855df4c11e4823976f294fe192bdb5f785651
3
+ size 411507
last-checkpoint/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14f0fae20515bd14a4275cbf85eb4db5b258c692fc299f9a68e861c8cf095e39
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step250
 
1
+ global_step300
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418a5f105ae834c3075024076916b2a9475918fe034c12d0dd5b6d91f1aba467
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef002048764051a71fb00f8f978e9ec32b780dc850bdb059af362cc56494234b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e07ace389d24bc1307b74f42a1e7b8f0117b0db853e2df64ff3f15cb92916a2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37194a6d48612e1a46a2d5d317ead97c70d9fc4569b0118fcd5f84c3dc9daa5a
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da6a990f346d7014dffb28fa2bc7d3b890bd3c53712503fce3656da48d3d6e50
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c179483659a784aa1ace2427daff48c556a6bcc3c330e6f3274e4dc95e4b49
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e95f356ca38179b05993f55daece0223e96fa10b9a1b9ea2102a739211333f63
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56857c9b117629f35af2c3d64f522d33a9d8aa94faa81ec6956380a895118c4
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d7e2043e09ec2b328d02f0638a57759f62a1b72350c1a7738b78953d31c6142
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75979a52de119ed6c9cd6063ded2b7d6cb6c4870db4a05a6cc6a997d1fa9b653
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8956434726715088,
3
- "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-250",
4
- "epoch": 0.12876641771825909,
5
  "eval_steps": 50,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -452,11 +452,100 @@
452
  "eval_steps_per_second": 0.858,
453
  "num_input_tokens_seen": 2924016,
454
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  }
456
  ],
457
  "logging_steps": 5,
458
  "max_steps": 3400,
459
- "num_input_tokens_seen": 2924016,
460
  "num_train_epochs": 2,
461
  "save_steps": 50,
462
  "stateful_callbacks": {
@@ -471,7 +560,7 @@
471
  "attributes": {}
472
  }
473
  },
474
- "total_flos": 164152867291136.0,
475
  "train_batch_size": 1,
476
  "trial_name": null,
477
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8908902406692505,
3
+ "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-300",
4
+ "epoch": 0.1545197012619109,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
452
  "eval_steps_per_second": 0.858,
453
  "num_input_tokens_seen": 2924016,
454
  "step": 250
455
+ },
456
+ {
457
+ "epoch": 0.13134174607262425,
458
+ "grad_norm": 0.3762164361984238,
459
+ "learning_rate": 9.98292246503335e-05,
460
+ "loss": 0.8987,
461
+ "num_input_tokens_seen": 2982520,
462
+ "step": 255
463
+ },
464
+ {
465
+ "epoch": 0.13391707442698944,
466
+ "grad_norm": 0.6447043002410199,
467
+ "learning_rate": 9.980855572238714e-05,
468
+ "loss": 0.9036,
469
+ "num_input_tokens_seen": 3041008,
470
+ "step": 260
471
+ },
472
+ {
473
+ "epoch": 0.13649240278135463,
474
+ "grad_norm": 0.5308092769971742,
475
+ "learning_rate": 9.978670881475172e-05,
476
+ "loss": 0.8961,
477
+ "num_input_tokens_seen": 3099464,
478
+ "step": 265
479
+ },
480
+ {
481
+ "epoch": 0.1390677311357198,
482
+ "grad_norm": 0.508333330469703,
483
+ "learning_rate": 9.976368444410985e-05,
484
+ "loss": 0.9012,
485
+ "num_input_tokens_seen": 3157944,
486
+ "step": 270
487
+ },
488
+ {
489
+ "epoch": 0.141643059490085,
490
+ "grad_norm": 0.6801788563719119,
491
+ "learning_rate": 9.973948315499126e-05,
492
+ "loss": 0.8985,
493
+ "num_input_tokens_seen": 3216448,
494
+ "step": 275
495
+ },
496
+ {
497
+ "epoch": 0.14421838784445018,
498
+ "grad_norm": 0.6933074703933572,
499
+ "learning_rate": 9.971410551976002e-05,
500
+ "loss": 0.9114,
501
+ "num_input_tokens_seen": 3274928,
502
+ "step": 280
503
+ },
504
+ {
505
+ "epoch": 0.14679371619881534,
506
+ "grad_norm": 0.21208820897494882,
507
+ "learning_rate": 9.968755213860094e-05,
508
+ "loss": 0.8886,
509
+ "num_input_tokens_seen": 3333408,
510
+ "step": 285
511
+ },
512
+ {
513
+ "epoch": 0.14936904455318054,
514
+ "grad_norm": 0.5791422669000065,
515
+ "learning_rate": 9.96598236395054e-05,
516
+ "loss": 0.8929,
517
+ "num_input_tokens_seen": 3391896,
518
+ "step": 290
519
+ },
520
+ {
521
+ "epoch": 0.1519443729075457,
522
+ "grad_norm": 0.3460368893191152,
523
+ "learning_rate": 9.96309206782565e-05,
524
+ "loss": 0.9091,
525
+ "num_input_tokens_seen": 3450392,
526
+ "step": 295
527
+ },
528
+ {
529
+ "epoch": 0.1545197012619109,
530
+ "grad_norm": 0.22425222135997747,
531
+ "learning_rate": 9.960084393841355e-05,
532
+ "loss": 0.8893,
533
+ "num_input_tokens_seen": 3508888,
534
+ "step": 300
535
+ },
536
+ {
537
+ "epoch": 0.1545197012619109,
538
+ "eval_loss": 0.8908902406692505,
539
+ "eval_runtime": 16.9521,
540
+ "eval_samples_per_second": 3.539,
541
+ "eval_steps_per_second": 0.885,
542
+ "num_input_tokens_seen": 3508888,
543
+ "step": 300
544
  }
545
  ],
546
  "logging_steps": 5,
547
  "max_steps": 3400,
548
+ "num_input_tokens_seen": 3508888,
549
  "num_train_epochs": 2,
550
  "save_steps": 50,
551
  "stateful_callbacks": {
 
560
  "attributes": {}
561
  }
562
  },
563
+ "total_flos": 196997011734528.0,
564
  "train_batch_size": 1,
565
  "trial_name": null,
566
  "trial_params": null