MohamedAhmedAE commited on
Commit
6db0c9b
·
verified ·
1 Parent(s): 66345c0

Training in progress, step 343000, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "up_proj",
27
- "gate_proj",
28
- "k_proj",
29
- "v_proj",
30
  "q_proj",
 
 
 
 
31
  "o_proj",
32
- "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
 
26
  "q_proj",
27
+ "down_proj",
28
+ "v_proj",
29
+ "k_proj",
30
+ "gate_proj",
31
  "o_proj",
32
+ "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77ccc7e48ccdbe805a596485ddd3dbadcee2e22d9ba053f7df72c11bef42dd8e
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
3
  size 1342238560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a674a1bf3923257c0203aec537f3903553312ad604dd2b86b9a2d5cd0ddb714f
3
  size 683268498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781887d172314801ab8802842158c08145ef998a6a80b07686139a50d9285ded
3
  size 683268498
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a14ac9e461892314d6ba767ae6fbf3be389395cb4fe125c43f81c17b334c00ce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e85f0257d01a91ff4050d39219e8dd384bbb4cfdc5b2e0fb4fabf6b2fe3b33e2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bf5cb7d0fbd840eda73ead5e2ccb0627aa96feb1ef96682b5cfe40a387534d6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:840aa8e3a2615e43038d3be582aa3892a5d4ec1157dbf18b35d8a9ff2904fee4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2339602643111143,
5
  "eval_steps": 500,
6
- "global_step": 336400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11781,6 +11781,237 @@
11781
  "learning_rate": 1.9532589419723944e-05,
11782
  "loss": 1.7161,
11783
  "step": 336400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11784
  }
11785
  ],
11786
  "logging_steps": 200,
@@ -11800,7 +12031,7 @@
11800
  "attributes": {}
11801
  }
11802
  },
11803
- "total_flos": 4.4789352142658273e+18,
11804
  "train_batch_size": 1,
11805
  "trial_name": null,
11806
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.23855044785586269,
5
  "eval_steps": 500,
6
+ "global_step": 343000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11781
  "learning_rate": 1.9532589419723944e-05,
11782
  "loss": 1.7161,
11783
  "step": 336400
11784
+ },
11785
+ {
11786
+ "epoch": 0.23409936078216728,
11787
+ "grad_norm": 5.75113582611084,
11788
+ "learning_rate": 1.9532039753658822e-05,
11789
+ "loss": 1.6752,
11790
+ "step": 336600
11791
+ },
11792
+ {
11793
+ "epoch": 0.23423845725322026,
11794
+ "grad_norm": 3.8082878589630127,
11795
+ "learning_rate": 1.9531489775761617e-05,
11796
+ "loss": 1.6679,
11797
+ "step": 336800
11798
+ },
11799
+ {
11800
+ "epoch": 0.23437755372427324,
11801
+ "grad_norm": 4.37647819519043,
11802
+ "learning_rate": 1.953093948605858e-05,
11803
+ "loss": 1.6643,
11804
+ "step": 337000
11805
+ },
11806
+ {
11807
+ "epoch": 0.23451665019532622,
11808
+ "grad_norm": 5.018675327301025,
11809
+ "learning_rate": 1.953038888457599e-05,
11810
+ "loss": 1.6606,
11811
+ "step": 337200
11812
+ },
11813
+ {
11814
+ "epoch": 0.2346557466663792,
11815
+ "grad_norm": 5.047998905181885,
11816
+ "learning_rate": 1.952983797134013e-05,
11817
+ "loss": 1.6508,
11818
+ "step": 337400
11819
+ },
11820
+ {
11821
+ "epoch": 0.23479484313743218,
11822
+ "grad_norm": 7.279408931732178,
11823
+ "learning_rate": 1.95292867463773e-05,
11824
+ "loss": 1.6547,
11825
+ "step": 337600
11826
+ },
11827
+ {
11828
+ "epoch": 0.23493393960848516,
11829
+ "grad_norm": 6.7975382804870605,
11830
+ "learning_rate": 1.9528735209713808e-05,
11831
+ "loss": 1.6461,
11832
+ "step": 337800
11833
+ },
11834
+ {
11835
+ "epoch": 0.23507303607953814,
11836
+ "grad_norm": 7.198062896728516,
11837
+ "learning_rate": 1.9528183361375986e-05,
11838
+ "loss": 1.6954,
11839
+ "step": 338000
11840
+ },
11841
+ {
11842
+ "epoch": 0.23521213255059112,
11843
+ "grad_norm": 4.493501663208008,
11844
+ "learning_rate": 1.9527631201390185e-05,
11845
+ "loss": 1.6956,
11846
+ "step": 338200
11847
+ },
11848
+ {
11849
+ "epoch": 0.2353512290216441,
11850
+ "grad_norm": 4.0898118019104,
11851
+ "learning_rate": 1.952707872978276e-05,
11852
+ "loss": 1.6233,
11853
+ "step": 338400
11854
+ },
11855
+ {
11856
+ "epoch": 0.23549032549269708,
11857
+ "grad_norm": 3.5022025108337402,
11858
+ "learning_rate": 1.952652594658009e-05,
11859
+ "loss": 1.6675,
11860
+ "step": 338600
11861
+ },
11862
+ {
11863
+ "epoch": 0.23562942196375006,
11864
+ "grad_norm": 3.9198243618011475,
11865
+ "learning_rate": 1.9525972851808555e-05,
11866
+ "loss": 1.6433,
11867
+ "step": 338800
11868
+ },
11869
+ {
11870
+ "epoch": 0.23576851843480304,
11871
+ "grad_norm": 4.736083507537842,
11872
+ "learning_rate": 1.9525419445494563e-05,
11873
+ "loss": 1.6486,
11874
+ "step": 339000
11875
+ },
11876
+ {
11877
+ "epoch": 0.23590761490585604,
11878
+ "grad_norm": 3.913604259490967,
11879
+ "learning_rate": 1.952486572766454e-05,
11880
+ "loss": 1.5873,
11881
+ "step": 339200
11882
+ },
11883
+ {
11884
+ "epoch": 0.23604671137690902,
11885
+ "grad_norm": 4.593210220336914,
11886
+ "learning_rate": 1.9524311698344908e-05,
11887
+ "loss": 1.696,
11888
+ "step": 339400
11889
+ },
11890
+ {
11891
+ "epoch": 0.236185807847962,
11892
+ "grad_norm": 12.825864791870117,
11893
+ "learning_rate": 1.9523757357562124e-05,
11894
+ "loss": 1.6756,
11895
+ "step": 339600
11896
+ },
11897
+ {
11898
+ "epoch": 0.23632490431901498,
11899
+ "grad_norm": 3.4124608039855957,
11900
+ "learning_rate": 1.9523202705342653e-05,
11901
+ "loss": 1.6614,
11902
+ "step": 339800
11903
+ },
11904
+ {
11905
+ "epoch": 0.23646400079006796,
11906
+ "grad_norm": 3.605181932449341,
11907
+ "learning_rate": 1.9522647741712966e-05,
11908
+ "loss": 1.6916,
11909
+ "step": 340000
11910
+ },
11911
+ {
11912
+ "epoch": 0.23660309726112094,
11913
+ "grad_norm": 5.278689384460449,
11914
+ "learning_rate": 1.952209246669956e-05,
11915
+ "loss": 1.6617,
11916
+ "step": 340200
11917
+ },
11918
+ {
11919
+ "epoch": 0.23674219373217392,
11920
+ "grad_norm": 5.578737258911133,
11921
+ "learning_rate": 1.9521536880328943e-05,
11922
+ "loss": 1.7077,
11923
+ "step": 340400
11924
+ },
11925
+ {
11926
+ "epoch": 0.2368812902032269,
11927
+ "grad_norm": 4.157208442687988,
11928
+ "learning_rate": 1.9520980982627642e-05,
11929
+ "loss": 1.6824,
11930
+ "step": 340600
11931
+ },
11932
+ {
11933
+ "epoch": 0.23702038667427988,
11934
+ "grad_norm": 3.1329407691955566,
11935
+ "learning_rate": 1.9520424773622193e-05,
11936
+ "loss": 1.6559,
11937
+ "step": 340800
11938
+ },
11939
+ {
11940
+ "epoch": 0.23715948314533286,
11941
+ "grad_norm": 4.475450038909912,
11942
+ "learning_rate": 1.951986825333914e-05,
11943
+ "loss": 1.7017,
11944
+ "step": 341000
11945
+ },
11946
+ {
11947
+ "epoch": 0.23729857961638584,
11948
+ "grad_norm": 4.912330627441406,
11949
+ "learning_rate": 1.9519311421805062e-05,
11950
+ "loss": 1.6263,
11951
+ "step": 341200
11952
+ },
11953
+ {
11954
+ "epoch": 0.23743767608743882,
11955
+ "grad_norm": 6.892397403717041,
11956
+ "learning_rate": 1.951875427904654e-05,
11957
+ "loss": 1.7071,
11958
+ "step": 341400
11959
+ },
11960
+ {
11961
+ "epoch": 0.2375767725584918,
11962
+ "grad_norm": 4.659296989440918,
11963
+ "learning_rate": 1.9518196825090167e-05,
11964
+ "loss": 1.6526,
11965
+ "step": 341600
11966
+ },
11967
+ {
11968
+ "epoch": 0.23771586902954478,
11969
+ "grad_norm": 7.2321977615356445,
11970
+ "learning_rate": 1.9517639059962558e-05,
11971
+ "loss": 1.619,
11972
+ "step": 341800
11973
+ },
11974
+ {
11975
+ "epoch": 0.23785496550059776,
11976
+ "grad_norm": 4.7723283767700195,
11977
+ "learning_rate": 1.951708098369033e-05,
11978
+ "loss": 1.6601,
11979
+ "step": 342000
11980
+ },
11981
+ {
11982
+ "epoch": 0.23799406197165074,
11983
+ "grad_norm": 4.46943473815918,
11984
+ "learning_rate": 1.951652259630014e-05,
11985
+ "loss": 1.6552,
11986
+ "step": 342200
11987
+ },
11988
+ {
11989
+ "epoch": 0.23813315844270372,
11990
+ "grad_norm": 3.9207563400268555,
11991
+ "learning_rate": 1.951596389781864e-05,
11992
+ "loss": 1.6588,
11993
+ "step": 342400
11994
+ },
11995
+ {
11996
+ "epoch": 0.2382722549137567,
11997
+ "grad_norm": 4.317783355712891,
11998
+ "learning_rate": 1.95154048882725e-05,
11999
+ "loss": 1.6362,
12000
+ "step": 342600
12001
+ },
12002
+ {
12003
+ "epoch": 0.2384113513848097,
12004
+ "grad_norm": 4.8455939292907715,
12005
+ "learning_rate": 1.9514845567688408e-05,
12006
+ "loss": 1.6518,
12007
+ "step": 342800
12008
+ },
12009
+ {
12010
+ "epoch": 0.23855044785586269,
12011
+ "grad_norm": 7.664321422576904,
12012
+ "learning_rate": 1.9514285936093064e-05,
12013
+ "loss": 1.6889,
12014
+ "step": 343000
12015
  }
12016
  ],
12017
  "logging_steps": 200,
 
12031
  "attributes": {}
12032
  }
12033
  },
12034
+ "total_flos": 4.567214300600918e+18,
12035
  "train_batch_size": 1,
12036
  "trial_name": null,
12037
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6420e27a1743978dbb7f5afac4bb71396b01a4362c274caf9f98fc91a6bd501
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f194f0afbf00cd135f18b6f6e0dc2d489f2d84487accfafc9254221384d4d16
3
  size 6840