ben81828 commited on
Commit
635d025
·
verified ·
1 Parent(s): 8f80ad9

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b185d60d1a95b109d35cc8c5048fbf846bc58f2e1aee39250a914046d9a6821
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b81846ac02e033886bed1edec96d0da70d2a41c811163f600e741ff029d3c0d0
3
  size 29034840
last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a1ea87d9a460095f8a6dfc71f41e3187bdd83627478f1fada09b55e61203a75
3
+ size 43429616
last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd0be5be8bcff8fad1ee892c578865dc476ce296b7d9997971a3bf192bcfdc7
3
+ size 43429616
last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fc3f4921583b0c695b5ce4035e669191dc439f047edb565d1da43103b3ee63b
3
+ size 43429616
last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f958c4b057bf39849b81a20c3e2860e0937295d26c9a3f9a625b944a3dc343
3
+ size 43429616
last-checkpoint/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4de5206222f5f6bc48ade0dd830e1dc6401703feaf7328bb17787793a784ed8
3
+ size 637299
last-checkpoint/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9c15f3de1191eba8ee412eb11cf43d8297d842d97f58ff3416540e6e60132b4
3
+ size 637171
last-checkpoint/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3363de2e039012fbc15be9a390355ad5f78df5709c5a29467c717bbc3d0644c
3
+ size 637171
last-checkpoint/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78419129614e77d4f1d73fe2039db0059edfa88fe000e6d37b57f43d85197257
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step250
 
1
+ global_step300
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418a5f105ae834c3075024076916b2a9475918fe034c12d0dd5b6d91f1aba467
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef002048764051a71fb00f8f978e9ec32b780dc850bdb059af362cc56494234b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e07ace389d24bc1307b74f42a1e7b8f0117b0db853e2df64ff3f15cb92916a2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37194a6d48612e1a46a2d5d317ead97c70d9fc4569b0118fcd5f84c3dc9daa5a
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da6a990f346d7014dffb28fa2bc7d3b890bd3c53712503fce3656da48d3d6e50
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c179483659a784aa1ace2427daff48c556a6bcc3c330e6f3274e4dc95e4b49
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e95f356ca38179b05993f55daece0223e96fa10b9a1b9ea2102a739211333f63
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56857c9b117629f35af2c3d64f522d33a9d8aa94faa81ec6956380a895118c4
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95727ac30e88a4b409e48fc76a5fc825943310a473f6f375c508be1f196dccaa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad5c396c3dfcdbd96b175132537b2bcb7ae04407121e3f88285feb248284457a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.810763955116272,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-250",
4
- "epoch": 0.07385524372230429,
5
  "eval_steps": 50,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -452,11 +452,100 @@
452
  "eval_steps_per_second": 0.783,
453
  "num_input_tokens_seen": 2587272,
454
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  }
456
  ],
457
  "logging_steps": 5,
458
  "max_steps": 6770,
459
- "num_input_tokens_seen": 2587272,
460
  "num_train_epochs": 2,
461
  "save_steps": 50,
462
  "stateful_callbacks": {
@@ -471,7 +560,7 @@
471
  "attributes": {}
472
  }
473
  },
474
- "total_flos": 170656960086016.0,
475
  "train_batch_size": 1,
476
  "trial_name": null,
477
  "trial_params": null
 
1
  {
2
  "best_metric": 0.810763955116272,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-250",
4
+ "epoch": 0.08862629246676514,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
452
  "eval_steps_per_second": 0.783,
453
  "num_input_tokens_seen": 2587272,
454
  "step": 250
455
+ },
456
+ {
457
+ "epoch": 0.07533234859675036,
458
+ "grad_norm": 2.353270620583961,
459
+ "learning_rate": 7.522123893805309e-05,
460
+ "loss": 0.7851,
461
+ "num_input_tokens_seen": 2638632,
462
+ "step": 255
463
+ },
464
+ {
465
+ "epoch": 0.07680945347119646,
466
+ "grad_norm": 3.0664271713541873,
467
+ "learning_rate": 7.669616519174043e-05,
468
+ "loss": 0.7211,
469
+ "num_input_tokens_seen": 2691016,
470
+ "step": 260
471
+ },
472
+ {
473
+ "epoch": 0.07828655834564253,
474
+ "grad_norm": 5.498034008223314,
475
+ "learning_rate": 7.817109144542774e-05,
476
+ "loss": 0.8082,
477
+ "num_input_tokens_seen": 2742912,
478
+ "step": 265
479
+ },
480
+ {
481
+ "epoch": 0.07976366322008863,
482
+ "grad_norm": 14.573947499657416,
483
+ "learning_rate": 7.964601769911504e-05,
484
+ "loss": 0.7485,
485
+ "num_input_tokens_seen": 2795264,
486
+ "step": 270
487
+ },
488
+ {
489
+ "epoch": 0.08124076809453472,
490
+ "grad_norm": 3.007807281619928,
491
+ "learning_rate": 8.112094395280237e-05,
492
+ "loss": 0.7454,
493
+ "num_input_tokens_seen": 2846344,
494
+ "step": 275
495
+ },
496
+ {
497
+ "epoch": 0.0827178729689808,
498
+ "grad_norm": 6.015750773450144,
499
+ "learning_rate": 8.259587020648968e-05,
500
+ "loss": 0.7258,
501
+ "num_input_tokens_seen": 2898304,
502
+ "step": 280
503
+ },
504
+ {
505
+ "epoch": 0.08419497784342689,
506
+ "grad_norm": 2.390238834834483,
507
+ "learning_rate": 8.4070796460177e-05,
508
+ "loss": 0.7863,
509
+ "num_input_tokens_seen": 2951368,
510
+ "step": 285
511
+ },
512
+ {
513
+ "epoch": 0.08567208271787297,
514
+ "grad_norm": 3.0216023427899357,
515
+ "learning_rate": 8.554572271386431e-05,
516
+ "loss": 0.7105,
517
+ "num_input_tokens_seen": 3003288,
518
+ "step": 290
519
+ },
520
+ {
521
+ "epoch": 0.08714918759231906,
522
+ "grad_norm": 3.255437171887138,
523
+ "learning_rate": 8.702064896755162e-05,
524
+ "loss": 0.6885,
525
+ "num_input_tokens_seen": 3054808,
526
+ "step": 295
527
+ },
528
+ {
529
+ "epoch": 0.08862629246676514,
530
+ "grad_norm": 3.506440325367033,
531
+ "learning_rate": 8.849557522123895e-05,
532
+ "loss": 0.78,
533
+ "num_input_tokens_seen": 3107200,
534
+ "step": 300
535
+ },
536
+ {
537
+ "epoch": 0.08862629246676514,
538
+ "eval_loss": 0.8194220662117004,
539
+ "eval_runtime": 19.1748,
540
+ "eval_samples_per_second": 3.129,
541
+ "eval_steps_per_second": 0.782,
542
+ "num_input_tokens_seen": 3107200,
543
+ "step": 300
544
  }
545
  ],
546
  "logging_steps": 5,
547
  "max_steps": 6770,
548
+ "num_input_tokens_seen": 3107200,
549
  "num_train_epochs": 2,
550
  "save_steps": 50,
551
  "stateful_callbacks": {
 
560
  "attributes": {}
561
  }
562
  },
563
+ "total_flos": 204907697143808.0,
564
  "train_batch_size": 1,
565
  "trial_name": null,
566
  "trial_params": null