ben81828 commited on
Commit
39a67ff
·
verified ·
1 Parent(s): 4b77d73

Training in progress, step 1550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c22365f9f1922257bce2b624ba131f902a872d3af6a64b3fc1e949516f00f5ac
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c433a8aa8f1650b5f131001712aa61d13d2d36f41df4d65d21c83dee89eb91ab
3
  size 18516456
last-checkpoint/global_step1550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d7ebe27708083904b852fe115e64b12162b20902da7f2c1e2fe91f23111d09
3
+ size 27700976
last-checkpoint/global_step1550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449164cc8d966cef473ae8dc64f85ae163e445e03aedefec776b0319085375c8
3
+ size 27700976
last-checkpoint/global_step1550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e7b5f20438ddfdc7004ca2513ac5b8f1ceea90a1d8d8671f94704c13491a889
3
+ size 27700976
last-checkpoint/global_step1550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:108bcd1a0645dbb2138024332876d5815aafed8c65411a604f8264c49ee86b07
3
+ size 27700976
last-checkpoint/global_step1550/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be709632e5d1fcdaf9cec70f99c018e4356be3683a1cb28eed1e4adab5f5fe93
3
+ size 411571
last-checkpoint/global_step1550/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eddc0ec4f9016093abb6753d09cb365044055ceab60982d7e1460257cdcf0db0
3
+ size 411507
last-checkpoint/global_step1550/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8e218616a923b3c2449e19337a97f40cf4b60e78fa26c554369524a0b898da
3
+ size 411507
last-checkpoint/global_step1550/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e02b898ca30292290d26463d8070037ac164ebcd3bc9b49bf8c96bab757faa52
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1450
 
1
+ global_step1550
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd4f3298d54e9509917373bcd359e11c92a8e0aa77b2cc0825602efd186ad77e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f1e48a120d69830576f7b582aa6cc46f0ca41d30015a7a674eaec3dcdfc0f09
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fdab02a791039ff46df83a272c972ab0f366fcea91338790dc016dbbbf6de80
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbabb9273d3983e52a4a981b5f60f8c2e19da375765d05bb9f2caad284b9652
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2808762a5f80587f05c1e2e8de26b5aef1010331cea366649319d593dbb0e66
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:554ac925bb9c9ea292b7a41caac1cf75285511cf8aa440f37090891ee457a178
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c052112cbe3f325fd4543a02558091b80c9c8abcd8db8822be9a8306eab6f2b7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5be5e00123fc0a321e41599b50e07be02f4c165504c601192e5c73f5f5437c30
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22e337e693b8d4ecad5d49a11b7f0649adec1748625a9390ba4ddabc0e901287
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb069683e7f84aa36296476346fc663361d9b05ad7b09b71f22f44afdb0ea48
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6643603444099426,
3
- "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1400",
4
- "epoch": 0.7468452227659027,
5
  "eval_steps": 50,
6
- "global_step": 1450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2588,11 +2588,189 @@
2588
  "eval_steps_per_second": 0.935,
2589
  "num_input_tokens_seen": 16959632,
2590
  "step": 1450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2591
  }
2592
  ],
2593
  "logging_steps": 5,
2594
  "max_steps": 3400,
2595
- "num_input_tokens_seen": 16959632,
2596
  "num_train_epochs": 2,
2597
  "save_steps": 50,
2598
  "stateful_callbacks": {
@@ -2607,7 +2785,7 @@
2607
  "attributes": {}
2608
  }
2609
  },
2610
- "total_flos": 952313688883200.0,
2611
  "train_batch_size": 1,
2612
  "trial_name": null,
2613
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.632923424243927,
3
+ "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1500",
4
+ "epoch": 0.7983517898532063,
5
  "eval_steps": 50,
6
+ "global_step": 1550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2588
  "eval_steps_per_second": 0.935,
2589
  "num_input_tokens_seen": 16959632,
2590
  "step": 1450
2591
+ },
2592
+ {
2593
+ "epoch": 0.7494205511202678,
2594
+ "grad_norm": 1.9571252974715032,
2595
+ "learning_rate": 6.57742480148798e-05,
2596
+ "loss": 0.6533,
2597
+ "num_input_tokens_seen": 17018072,
2598
+ "step": 1455
2599
+ },
2600
+ {
2601
+ "epoch": 0.751995879474633,
2602
+ "grad_norm": 3.2075825448529542,
2603
+ "learning_rate": 6.554332297097031e-05,
2604
+ "loss": 0.7114,
2605
+ "num_input_tokens_seen": 17076560,
2606
+ "step": 1460
2607
+ },
2608
+ {
2609
+ "epoch": 0.7545712078289982,
2610
+ "grad_norm": 2.0030816579741266,
2611
+ "learning_rate": 6.53120303251801e-05,
2612
+ "loss": 0.6568,
2613
+ "num_input_tokens_seen": 17135016,
2614
+ "step": 1465
2615
+ },
2616
+ {
2617
+ "epoch": 0.7571465361833634,
2618
+ "grad_norm": 2.65056436638165,
2619
+ "learning_rate": 6.508037554761432e-05,
2620
+ "loss": 0.7016,
2621
+ "num_input_tokens_seen": 17193496,
2622
+ "step": 1470
2623
+ },
2624
+ {
2625
+ "epoch": 0.7597218645377286,
2626
+ "grad_norm": 1.9541651871708403,
2627
+ "learning_rate": 6.484836411694267e-05,
2628
+ "loss": 0.6612,
2629
+ "num_input_tokens_seen": 17251944,
2630
+ "step": 1475
2631
+ },
2632
+ {
2633
+ "epoch": 0.7622971928920937,
2634
+ "grad_norm": 3.0540242692558577,
2635
+ "learning_rate": 6.461600152026965e-05,
2636
+ "loss": 0.6115,
2637
+ "num_input_tokens_seen": 17310456,
2638
+ "step": 1480
2639
+ },
2640
+ {
2641
+ "epoch": 0.7648725212464589,
2642
+ "grad_norm": 2.796196437541352,
2643
+ "learning_rate": 6.438329325300499e-05,
2644
+ "loss": 0.6458,
2645
+ "num_input_tokens_seen": 17368968,
2646
+ "step": 1485
2647
+ },
2648
+ {
2649
+ "epoch": 0.7674478496008241,
2650
+ "grad_norm": 3.1979427976381207,
2651
+ "learning_rate": 6.415024481873352e-05,
2652
+ "loss": 0.6434,
2653
+ "num_input_tokens_seen": 17427424,
2654
+ "step": 1490
2655
+ },
2656
+ {
2657
+ "epoch": 0.7700231779551893,
2658
+ "grad_norm": 3.8375601078700203,
2659
+ "learning_rate": 6.391686172908506e-05,
2660
+ "loss": 0.5973,
2661
+ "num_input_tokens_seen": 17485936,
2662
+ "step": 1495
2663
+ },
2664
+ {
2665
+ "epoch": 0.7725985063095545,
2666
+ "grad_norm": 2.405705749864128,
2667
+ "learning_rate": 6.368314950360415e-05,
2668
+ "loss": 0.6021,
2669
+ "num_input_tokens_seen": 17544440,
2670
+ "step": 1500
2671
+ },
2672
+ {
2673
+ "epoch": 0.7725985063095545,
2674
+ "eval_loss": 0.632923424243927,
2675
+ "eval_runtime": 16.1038,
2676
+ "eval_samples_per_second": 3.726,
2677
+ "eval_steps_per_second": 0.931,
2678
+ "num_input_tokens_seen": 17544440,
2679
+ "step": 1500
2680
+ },
2681
+ {
2682
+ "epoch": 0.7751738346639196,
2683
+ "grad_norm": 2.8519087211521734,
2684
+ "learning_rate": 6.344911366961934e-05,
2685
+ "loss": 0.5779,
2686
+ "num_input_tokens_seen": 17602952,
2687
+ "step": 1505
2688
+ },
2689
+ {
2690
+ "epoch": 0.7777491630182848,
2691
+ "grad_norm": 2.861290579940173,
2692
+ "learning_rate": 6.321475976211266e-05,
2693
+ "loss": 0.6707,
2694
+ "num_input_tokens_seen": 17661440,
2695
+ "step": 1510
2696
+ },
2697
+ {
2698
+ "epoch": 0.78032449137265,
2699
+ "grad_norm": 3.541365161144121,
2700
+ "learning_rate": 6.298009332358856e-05,
2701
+ "loss": 0.6326,
2702
+ "num_input_tokens_seen": 17719928,
2703
+ "step": 1515
2704
+ },
2705
+ {
2706
+ "epoch": 0.7828998197270152,
2707
+ "grad_norm": 2.969962641272996,
2708
+ "learning_rate": 6.274511990394294e-05,
2709
+ "loss": 0.6472,
2710
+ "num_input_tokens_seen": 17778424,
2711
+ "step": 1520
2712
+ },
2713
+ {
2714
+ "epoch": 0.7854751480813804,
2715
+ "grad_norm": 2.762063548864621,
2716
+ "learning_rate": 6.250984506033183e-05,
2717
+ "loss": 0.6215,
2718
+ "num_input_tokens_seen": 17836936,
2719
+ "step": 1525
2720
+ },
2721
+ {
2722
+ "epoch": 0.7880504764357456,
2723
+ "grad_norm": 3.2198855545004097,
2724
+ "learning_rate": 6.227427435703997e-05,
2725
+ "loss": 0.6102,
2726
+ "num_input_tokens_seen": 17895392,
2727
+ "step": 1530
2728
+ },
2729
+ {
2730
+ "epoch": 0.7906258047901107,
2731
+ "grad_norm": 3.846544371420393,
2732
+ "learning_rate": 6.203841336534924e-05,
2733
+ "loss": 0.6161,
2734
+ "num_input_tokens_seen": 17953872,
2735
+ "step": 1535
2736
+ },
2737
+ {
2738
+ "epoch": 0.7932011331444759,
2739
+ "grad_norm": 3.811248686105134,
2740
+ "learning_rate": 6.180226766340688e-05,
2741
+ "loss": 0.6103,
2742
+ "num_input_tokens_seen": 18012320,
2743
+ "step": 1540
2744
+ },
2745
+ {
2746
+ "epoch": 0.7957764614988411,
2747
+ "grad_norm": 2.9539705466919703,
2748
+ "learning_rate": 6.156584283609359e-05,
2749
+ "loss": 0.5791,
2750
+ "num_input_tokens_seen": 18070792,
2751
+ "step": 1545
2752
+ },
2753
+ {
2754
+ "epoch": 0.7983517898532063,
2755
+ "grad_norm": 3.0546686267383283,
2756
+ "learning_rate": 6.132914447489137e-05,
2757
+ "loss": 0.667,
2758
+ "num_input_tokens_seen": 18129304,
2759
+ "step": 1550
2760
+ },
2761
+ {
2762
+ "epoch": 0.7983517898532063,
2763
+ "eval_loss": 0.6617516279220581,
2764
+ "eval_runtime": 16.0333,
2765
+ "eval_samples_per_second": 3.742,
2766
+ "eval_steps_per_second": 0.936,
2767
+ "num_input_tokens_seen": 18129304,
2768
+ "step": 1550
2769
  }
2770
  ],
2771
  "logging_steps": 5,
2772
  "max_steps": 3400,
2773
+ "num_input_tokens_seen": 18129304,
2774
  "num_train_epochs": 2,
2775
  "save_steps": 50,
2776
  "stateful_callbacks": {
 
2785
  "attributes": {}
2786
  }
2787
  },
2788
+ "total_flos": 1017997776650240.0,
2789
  "train_batch_size": 1,
2790
  "trial_name": null,
2791
  "trial_params": null