MohamedAhmedAE commited on
Commit
66345c0
·
verified ·
1 Parent(s): 7e75d05

Training in progress, step 343000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e75eac46e08e30439a8f1d1cf16b8bc94879ce5e8b6daabed28b0eb7d25334a
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
3
  size 1342238560
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "q_proj",
27
- "down_proj",
28
- "v_proj",
29
- "k_proj",
30
  "gate_proj",
 
 
 
31
  "o_proj",
32
- "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "up_proj",
 
 
 
27
  "gate_proj",
28
+ "k_proj",
29
+ "v_proj",
30
+ "q_proj",
31
  "o_proj",
32
+ "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e75eac46e08e30439a8f1d1cf16b8bc94879ce5e8b6daabed28b0eb7d25334a
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77ccc7e48ccdbe805a596485ddd3dbadcee2e22d9ba053f7df72c11bef42dd8e
3
  size 1342238560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a57ae5d41478c0cac2023cbd05e78ba9a9d963c6f2c28cd16143dbd1074838
3
  size 683268498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a674a1bf3923257c0203aec537f3903553312ad604dd2b86b9a2d5cd0ddb714f
3
  size 683268498
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15944d6ef476b38df570ff9d64bfc643ed8d2f56e7dd50a3fb10bb256d3f67b4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a14ac9e461892314d6ba767ae6fbf3be389395cb4fe125c43f81c17b334c00ce
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90e6f05351a0e7875d766d1c3a2bd92efaab5a08a7f356642a5d985941346bd0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf5cb7d0fbd840eda73ead5e2ccb0627aa96feb1ef96682b5cfe40a387534d6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2384113513848097,
5
  "eval_steps": 500,
6
- "global_step": 342800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11781,230 +11781,6 @@
11781
  "learning_rate": 1.9532589419723944e-05,
11782
  "loss": 1.7161,
11783
  "step": 336400
11784
- },
11785
- {
11786
- "epoch": 0.23409936078216728,
11787
- "grad_norm": 5.75113582611084,
11788
- "learning_rate": 1.9532039753658822e-05,
11789
- "loss": 1.6752,
11790
- "step": 336600
11791
- },
11792
- {
11793
- "epoch": 0.23423845725322026,
11794
- "grad_norm": 3.8082878589630127,
11795
- "learning_rate": 1.9531489775761617e-05,
11796
- "loss": 1.6679,
11797
- "step": 336800
11798
- },
11799
- {
11800
- "epoch": 0.23437755372427324,
11801
- "grad_norm": 4.37647819519043,
11802
- "learning_rate": 1.953093948605858e-05,
11803
- "loss": 1.6643,
11804
- "step": 337000
11805
- },
11806
- {
11807
- "epoch": 0.23451665019532622,
11808
- "grad_norm": 5.018675327301025,
11809
- "learning_rate": 1.953038888457599e-05,
11810
- "loss": 1.6606,
11811
- "step": 337200
11812
- },
11813
- {
11814
- "epoch": 0.2346557466663792,
11815
- "grad_norm": 5.047998905181885,
11816
- "learning_rate": 1.952983797134013e-05,
11817
- "loss": 1.6508,
11818
- "step": 337400
11819
- },
11820
- {
11821
- "epoch": 0.23479484313743218,
11822
- "grad_norm": 7.279408931732178,
11823
- "learning_rate": 1.95292867463773e-05,
11824
- "loss": 1.6547,
11825
- "step": 337600
11826
- },
11827
- {
11828
- "epoch": 0.23493393960848516,
11829
- "grad_norm": 6.7975382804870605,
11830
- "learning_rate": 1.9528735209713808e-05,
11831
- "loss": 1.6461,
11832
- "step": 337800
11833
- },
11834
- {
11835
- "epoch": 0.23507303607953814,
11836
- "grad_norm": 7.198062896728516,
11837
- "learning_rate": 1.9528183361375986e-05,
11838
- "loss": 1.6954,
11839
- "step": 338000
11840
- },
11841
- {
11842
- "epoch": 0.23521213255059112,
11843
- "grad_norm": 4.493501663208008,
11844
- "learning_rate": 1.9527631201390185e-05,
11845
- "loss": 1.6956,
11846
- "step": 338200
11847
- },
11848
- {
11849
- "epoch": 0.2353512290216441,
11850
- "grad_norm": 4.0898118019104,
11851
- "learning_rate": 1.952707872978276e-05,
11852
- "loss": 1.6233,
11853
- "step": 338400
11854
- },
11855
- {
11856
- "epoch": 0.23549032549269708,
11857
- "grad_norm": 3.5022025108337402,
11858
- "learning_rate": 1.952652594658009e-05,
11859
- "loss": 1.6675,
11860
- "step": 338600
11861
- },
11862
- {
11863
- "epoch": 0.23562942196375006,
11864
- "grad_norm": 3.9198243618011475,
11865
- "learning_rate": 1.9525972851808555e-05,
11866
- "loss": 1.6433,
11867
- "step": 338800
11868
- },
11869
- {
11870
- "epoch": 0.23576851843480304,
11871
- "grad_norm": 4.736083507537842,
11872
- "learning_rate": 1.9525419445494563e-05,
11873
- "loss": 1.6486,
11874
- "step": 339000
11875
- },
11876
- {
11877
- "epoch": 0.23590761490585604,
11878
- "grad_norm": 3.913604259490967,
11879
- "learning_rate": 1.952486572766454e-05,
11880
- "loss": 1.5873,
11881
- "step": 339200
11882
- },
11883
- {
11884
- "epoch": 0.23604671137690902,
11885
- "grad_norm": 4.593210220336914,
11886
- "learning_rate": 1.9524311698344908e-05,
11887
- "loss": 1.696,
11888
- "step": 339400
11889
- },
11890
- {
11891
- "epoch": 0.236185807847962,
11892
- "grad_norm": 12.825864791870117,
11893
- "learning_rate": 1.9523757357562124e-05,
11894
- "loss": 1.6756,
11895
- "step": 339600
11896
- },
11897
- {
11898
- "epoch": 0.23632490431901498,
11899
- "grad_norm": 3.4124608039855957,
11900
- "learning_rate": 1.9523202705342653e-05,
11901
- "loss": 1.6614,
11902
- "step": 339800
11903
- },
11904
- {
11905
- "epoch": 0.23646400079006796,
11906
- "grad_norm": 3.605181932449341,
11907
- "learning_rate": 1.9522647741712966e-05,
11908
- "loss": 1.6916,
11909
- "step": 340000
11910
- },
11911
- {
11912
- "epoch": 0.23660309726112094,
11913
- "grad_norm": 5.278689384460449,
11914
- "learning_rate": 1.952209246669956e-05,
11915
- "loss": 1.6617,
11916
- "step": 340200
11917
- },
11918
- {
11919
- "epoch": 0.23674219373217392,
11920
- "grad_norm": 5.578737258911133,
11921
- "learning_rate": 1.9521536880328943e-05,
11922
- "loss": 1.7077,
11923
- "step": 340400
11924
- },
11925
- {
11926
- "epoch": 0.2368812902032269,
11927
- "grad_norm": 4.157208442687988,
11928
- "learning_rate": 1.9520980982627642e-05,
11929
- "loss": 1.6824,
11930
- "step": 340600
11931
- },
11932
- {
11933
- "epoch": 0.23702038667427988,
11934
- "grad_norm": 3.1329407691955566,
11935
- "learning_rate": 1.9520424773622193e-05,
11936
- "loss": 1.6559,
11937
- "step": 340800
11938
- },
11939
- {
11940
- "epoch": 0.23715948314533286,
11941
- "grad_norm": 4.475450038909912,
11942
- "learning_rate": 1.951986825333914e-05,
11943
- "loss": 1.7017,
11944
- "step": 341000
11945
- },
11946
- {
11947
- "epoch": 0.23729857961638584,
11948
- "grad_norm": 4.912330627441406,
11949
- "learning_rate": 1.9519311421805062e-05,
11950
- "loss": 1.6263,
11951
- "step": 341200
11952
- },
11953
- {
11954
- "epoch": 0.23743767608743882,
11955
- "grad_norm": 6.892397403717041,
11956
- "learning_rate": 1.951875427904654e-05,
11957
- "loss": 1.7071,
11958
- "step": 341400
11959
- },
11960
- {
11961
- "epoch": 0.2375767725584918,
11962
- "grad_norm": 4.659296989440918,
11963
- "learning_rate": 1.9518196825090167e-05,
11964
- "loss": 1.6526,
11965
- "step": 341600
11966
- },
11967
- {
11968
- "epoch": 0.23771586902954478,
11969
- "grad_norm": 7.2321977615356445,
11970
- "learning_rate": 1.9517639059962558e-05,
11971
- "loss": 1.619,
11972
- "step": 341800
11973
- },
11974
- {
11975
- "epoch": 0.23785496550059776,
11976
- "grad_norm": 4.7723283767700195,
11977
- "learning_rate": 1.951708098369033e-05,
11978
- "loss": 1.6601,
11979
- "step": 342000
11980
- },
11981
- {
11982
- "epoch": 0.23799406197165074,
11983
- "grad_norm": 4.46943473815918,
11984
- "learning_rate": 1.951652259630014e-05,
11985
- "loss": 1.6552,
11986
- "step": 342200
11987
- },
11988
- {
11989
- "epoch": 0.23813315844270372,
11990
- "grad_norm": 3.9207563400268555,
11991
- "learning_rate": 1.951596389781864e-05,
11992
- "loss": 1.6588,
11993
- "step": 342400
11994
- },
11995
- {
11996
- "epoch": 0.2382722549137567,
11997
- "grad_norm": 4.317783355712891,
11998
- "learning_rate": 1.95154048882725e-05,
11999
- "loss": 1.6362,
12000
- "step": 342600
12001
- },
12002
- {
12003
- "epoch": 0.2384113513848097,
12004
- "grad_norm": 4.8455939292907715,
12005
- "learning_rate": 1.9514845567688408e-05,
12006
- "loss": 1.6518,
12007
- "step": 342800
12008
  }
12009
  ],
12010
  "logging_steps": 200,
@@ -12024,7 +11800,7 @@
12024
  "attributes": {}
12025
  }
12026
  },
12027
- "total_flos": 4.5645148898507244e+18,
12028
  "train_batch_size": 1,
12029
  "trial_name": null,
12030
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2339602643111143,
5
  "eval_steps": 500,
6
+ "global_step": 336400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11781
  "learning_rate": 1.9532589419723944e-05,
11782
  "loss": 1.7161,
11783
  "step": 336400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11784
  }
11785
  ],
11786
  "logging_steps": 200,
 
11800
  "attributes": {}
11801
  }
11802
  },
11803
+ "total_flos": 4.4789352142658273e+18,
11804
  "train_batch_size": 1,
11805
  "trial_name": null,
11806
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f194f0afbf00cd135f18b6f6e0dc2d489f2d84487accfafc9254221384d4d16
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6420e27a1743978dbb7f5afac4bb71396b01a4362c274caf9f98fc91a6bd501
3
  size 6840