Training in progress, step 343000
Browse files- adapter_model.safetensors +1 -1
- last-checkpoint/adapter_config.json +5 -5
- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3 -227
- last-checkpoint/training_args.bin +1 -1
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1342238560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
|
3 |
size 1342238560
|
last-checkpoint/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"down_proj",
|
28 |
-
"v_proj",
|
29 |
-
"k_proj",
|
30 |
"gate_proj",
|
|
|
|
|
|
|
31 |
"o_proj",
|
32 |
-
"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"up_proj",
|
|
|
|
|
|
|
27 |
"gate_proj",
|
28 |
+
"k_proj",
|
29 |
+
"v_proj",
|
30 |
+
"q_proj",
|
31 |
"o_proj",
|
32 |
+
"down_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1342238560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77ccc7e48ccdbe805a596485ddd3dbadcee2e22d9ba053f7df72c11bef42dd8e
|
3 |
size 1342238560
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 683268498
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a674a1bf3923257c0203aec537f3903553312ad604dd2b86b9a2d5cd0ddb714f
|
3 |
size 683268498
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a14ac9e461892314d6ba767ae6fbf3be389395cb4fe125c43f81c17b334c00ce
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bf5cb7d0fbd840eda73ead5e2ccb0627aa96feb1ef96682b5cfe40a387534d6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11781,230 +11781,6 @@
|
|
11781 |
"learning_rate": 1.9532589419723944e-05,
|
11782 |
"loss": 1.7161,
|
11783 |
"step": 336400
|
11784 |
-
},
|
11785 |
-
{
|
11786 |
-
"epoch": 0.23409936078216728,
|
11787 |
-
"grad_norm": 5.75113582611084,
|
11788 |
-
"learning_rate": 1.9532039753658822e-05,
|
11789 |
-
"loss": 1.6752,
|
11790 |
-
"step": 336600
|
11791 |
-
},
|
11792 |
-
{
|
11793 |
-
"epoch": 0.23423845725322026,
|
11794 |
-
"grad_norm": 3.8082878589630127,
|
11795 |
-
"learning_rate": 1.9531489775761617e-05,
|
11796 |
-
"loss": 1.6679,
|
11797 |
-
"step": 336800
|
11798 |
-
},
|
11799 |
-
{
|
11800 |
-
"epoch": 0.23437755372427324,
|
11801 |
-
"grad_norm": 4.37647819519043,
|
11802 |
-
"learning_rate": 1.953093948605858e-05,
|
11803 |
-
"loss": 1.6643,
|
11804 |
-
"step": 337000
|
11805 |
-
},
|
11806 |
-
{
|
11807 |
-
"epoch": 0.23451665019532622,
|
11808 |
-
"grad_norm": 5.018675327301025,
|
11809 |
-
"learning_rate": 1.953038888457599e-05,
|
11810 |
-
"loss": 1.6606,
|
11811 |
-
"step": 337200
|
11812 |
-
},
|
11813 |
-
{
|
11814 |
-
"epoch": 0.2346557466663792,
|
11815 |
-
"grad_norm": 5.047998905181885,
|
11816 |
-
"learning_rate": 1.952983797134013e-05,
|
11817 |
-
"loss": 1.6508,
|
11818 |
-
"step": 337400
|
11819 |
-
},
|
11820 |
-
{
|
11821 |
-
"epoch": 0.23479484313743218,
|
11822 |
-
"grad_norm": 7.279408931732178,
|
11823 |
-
"learning_rate": 1.95292867463773e-05,
|
11824 |
-
"loss": 1.6547,
|
11825 |
-
"step": 337600
|
11826 |
-
},
|
11827 |
-
{
|
11828 |
-
"epoch": 0.23493393960848516,
|
11829 |
-
"grad_norm": 6.7975382804870605,
|
11830 |
-
"learning_rate": 1.9528735209713808e-05,
|
11831 |
-
"loss": 1.6461,
|
11832 |
-
"step": 337800
|
11833 |
-
},
|
11834 |
-
{
|
11835 |
-
"epoch": 0.23507303607953814,
|
11836 |
-
"grad_norm": 7.198062896728516,
|
11837 |
-
"learning_rate": 1.9528183361375986e-05,
|
11838 |
-
"loss": 1.6954,
|
11839 |
-
"step": 338000
|
11840 |
-
},
|
11841 |
-
{
|
11842 |
-
"epoch": 0.23521213255059112,
|
11843 |
-
"grad_norm": 4.493501663208008,
|
11844 |
-
"learning_rate": 1.9527631201390185e-05,
|
11845 |
-
"loss": 1.6956,
|
11846 |
-
"step": 338200
|
11847 |
-
},
|
11848 |
-
{
|
11849 |
-
"epoch": 0.2353512290216441,
|
11850 |
-
"grad_norm": 4.0898118019104,
|
11851 |
-
"learning_rate": 1.952707872978276e-05,
|
11852 |
-
"loss": 1.6233,
|
11853 |
-
"step": 338400
|
11854 |
-
},
|
11855 |
-
{
|
11856 |
-
"epoch": 0.23549032549269708,
|
11857 |
-
"grad_norm": 3.5022025108337402,
|
11858 |
-
"learning_rate": 1.952652594658009e-05,
|
11859 |
-
"loss": 1.6675,
|
11860 |
-
"step": 338600
|
11861 |
-
},
|
11862 |
-
{
|
11863 |
-
"epoch": 0.23562942196375006,
|
11864 |
-
"grad_norm": 3.9198243618011475,
|
11865 |
-
"learning_rate": 1.9525972851808555e-05,
|
11866 |
-
"loss": 1.6433,
|
11867 |
-
"step": 338800
|
11868 |
-
},
|
11869 |
-
{
|
11870 |
-
"epoch": 0.23576851843480304,
|
11871 |
-
"grad_norm": 4.736083507537842,
|
11872 |
-
"learning_rate": 1.9525419445494563e-05,
|
11873 |
-
"loss": 1.6486,
|
11874 |
-
"step": 339000
|
11875 |
-
},
|
11876 |
-
{
|
11877 |
-
"epoch": 0.23590761490585604,
|
11878 |
-
"grad_norm": 3.913604259490967,
|
11879 |
-
"learning_rate": 1.952486572766454e-05,
|
11880 |
-
"loss": 1.5873,
|
11881 |
-
"step": 339200
|
11882 |
-
},
|
11883 |
-
{
|
11884 |
-
"epoch": 0.23604671137690902,
|
11885 |
-
"grad_norm": 4.593210220336914,
|
11886 |
-
"learning_rate": 1.9524311698344908e-05,
|
11887 |
-
"loss": 1.696,
|
11888 |
-
"step": 339400
|
11889 |
-
},
|
11890 |
-
{
|
11891 |
-
"epoch": 0.236185807847962,
|
11892 |
-
"grad_norm": 12.825864791870117,
|
11893 |
-
"learning_rate": 1.9523757357562124e-05,
|
11894 |
-
"loss": 1.6756,
|
11895 |
-
"step": 339600
|
11896 |
-
},
|
11897 |
-
{
|
11898 |
-
"epoch": 0.23632490431901498,
|
11899 |
-
"grad_norm": 3.4124608039855957,
|
11900 |
-
"learning_rate": 1.9523202705342653e-05,
|
11901 |
-
"loss": 1.6614,
|
11902 |
-
"step": 339800
|
11903 |
-
},
|
11904 |
-
{
|
11905 |
-
"epoch": 0.23646400079006796,
|
11906 |
-
"grad_norm": 3.605181932449341,
|
11907 |
-
"learning_rate": 1.9522647741712966e-05,
|
11908 |
-
"loss": 1.6916,
|
11909 |
-
"step": 340000
|
11910 |
-
},
|
11911 |
-
{
|
11912 |
-
"epoch": 0.23660309726112094,
|
11913 |
-
"grad_norm": 5.278689384460449,
|
11914 |
-
"learning_rate": 1.952209246669956e-05,
|
11915 |
-
"loss": 1.6617,
|
11916 |
-
"step": 340200
|
11917 |
-
},
|
11918 |
-
{
|
11919 |
-
"epoch": 0.23674219373217392,
|
11920 |
-
"grad_norm": 5.578737258911133,
|
11921 |
-
"learning_rate": 1.9521536880328943e-05,
|
11922 |
-
"loss": 1.7077,
|
11923 |
-
"step": 340400
|
11924 |
-
},
|
11925 |
-
{
|
11926 |
-
"epoch": 0.2368812902032269,
|
11927 |
-
"grad_norm": 4.157208442687988,
|
11928 |
-
"learning_rate": 1.9520980982627642e-05,
|
11929 |
-
"loss": 1.6824,
|
11930 |
-
"step": 340600
|
11931 |
-
},
|
11932 |
-
{
|
11933 |
-
"epoch": 0.23702038667427988,
|
11934 |
-
"grad_norm": 3.1329407691955566,
|
11935 |
-
"learning_rate": 1.9520424773622193e-05,
|
11936 |
-
"loss": 1.6559,
|
11937 |
-
"step": 340800
|
11938 |
-
},
|
11939 |
-
{
|
11940 |
-
"epoch": 0.23715948314533286,
|
11941 |
-
"grad_norm": 4.475450038909912,
|
11942 |
-
"learning_rate": 1.951986825333914e-05,
|
11943 |
-
"loss": 1.7017,
|
11944 |
-
"step": 341000
|
11945 |
-
},
|
11946 |
-
{
|
11947 |
-
"epoch": 0.23729857961638584,
|
11948 |
-
"grad_norm": 4.912330627441406,
|
11949 |
-
"learning_rate": 1.9519311421805062e-05,
|
11950 |
-
"loss": 1.6263,
|
11951 |
-
"step": 341200
|
11952 |
-
},
|
11953 |
-
{
|
11954 |
-
"epoch": 0.23743767608743882,
|
11955 |
-
"grad_norm": 6.892397403717041,
|
11956 |
-
"learning_rate": 1.951875427904654e-05,
|
11957 |
-
"loss": 1.7071,
|
11958 |
-
"step": 341400
|
11959 |
-
},
|
11960 |
-
{
|
11961 |
-
"epoch": 0.2375767725584918,
|
11962 |
-
"grad_norm": 4.659296989440918,
|
11963 |
-
"learning_rate": 1.9518196825090167e-05,
|
11964 |
-
"loss": 1.6526,
|
11965 |
-
"step": 341600
|
11966 |
-
},
|
11967 |
-
{
|
11968 |
-
"epoch": 0.23771586902954478,
|
11969 |
-
"grad_norm": 7.2321977615356445,
|
11970 |
-
"learning_rate": 1.9517639059962558e-05,
|
11971 |
-
"loss": 1.619,
|
11972 |
-
"step": 341800
|
11973 |
-
},
|
11974 |
-
{
|
11975 |
-
"epoch": 0.23785496550059776,
|
11976 |
-
"grad_norm": 4.7723283767700195,
|
11977 |
-
"learning_rate": 1.951708098369033e-05,
|
11978 |
-
"loss": 1.6601,
|
11979 |
-
"step": 342000
|
11980 |
-
},
|
11981 |
-
{
|
11982 |
-
"epoch": 0.23799406197165074,
|
11983 |
-
"grad_norm": 4.46943473815918,
|
11984 |
-
"learning_rate": 1.951652259630014e-05,
|
11985 |
-
"loss": 1.6552,
|
11986 |
-
"step": 342200
|
11987 |
-
},
|
11988 |
-
{
|
11989 |
-
"epoch": 0.23813315844270372,
|
11990 |
-
"grad_norm": 3.9207563400268555,
|
11991 |
-
"learning_rate": 1.951596389781864e-05,
|
11992 |
-
"loss": 1.6588,
|
11993 |
-
"step": 342400
|
11994 |
-
},
|
11995 |
-
{
|
11996 |
-
"epoch": 0.2382722549137567,
|
11997 |
-
"grad_norm": 4.317783355712891,
|
11998 |
-
"learning_rate": 1.95154048882725e-05,
|
11999 |
-
"loss": 1.6362,
|
12000 |
-
"step": 342600
|
12001 |
-
},
|
12002 |
-
{
|
12003 |
-
"epoch": 0.2384113513848097,
|
12004 |
-
"grad_norm": 4.8455939292907715,
|
12005 |
-
"learning_rate": 1.9514845567688408e-05,
|
12006 |
-
"loss": 1.6518,
|
12007 |
-
"step": 342800
|
12008 |
}
|
12009 |
],
|
12010 |
"logging_steps": 200,
|
@@ -12024,7 +11800,7 @@
|
|
12024 |
"attributes": {}
|
12025 |
}
|
12026 |
},
|
12027 |
-
"total_flos": 4.
|
12028 |
"train_batch_size": 1,
|
12029 |
"trial_name": null,
|
12030 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.2339602643111143,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 336400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11781 |
"learning_rate": 1.9532589419723944e-05,
|
11782 |
"loss": 1.7161,
|
11783 |
"step": 336400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11784 |
}
|
11785 |
],
|
11786 |
"logging_steps": 200,
|
|
|
11800 |
"attributes": {}
|
11801 |
}
|
11802 |
},
|
11803 |
+
"total_flos": 4.4789352142658273e+18,
|
11804 |
"train_batch_size": 1,
|
11805 |
"trial_name": null,
|
11806 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6420e27a1743978dbb7f5afac4bb71396b01a4362c274caf9f98fc91a6bd501
|
3 |
size 6840
|