Muhammad Khalifa
commited on
Commit
·
eca3a85
1
Parent(s):
bacaabd
add 100-shot finetuning
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- low-shot-task-specific-100-ex/coin_flip/best_model/adapter_config.json +21 -0
- low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt +3 -0
- low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth +3 -0
- low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt +3 -0
- low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json +95 -0
- low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin +3 -0
- low-shot-task-specific-100-ex/cola/best_model/adapter_config.json +21 -0
- low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-100-ex/cola/best_model/optimizer.pt +3 -0
- low-shot-task-specific-100-ex/cola/best_model/rng_state.pth +3 -0
- low-shot-task-specific-100-ex/cola/best_model/scheduler.pt +3 -0
- low-shot-task-specific-100-ex/cola/best_model/trainer_state.json +95 -0
- low-shot-task-specific-100-ex/cola/best_model/training_args.bin +3 -0
- low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_config.json +21 -0
- low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt +3 -0
- low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth +3 -0
- low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt +3 -0
- low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json +95 -0
- low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin +3 -0
- low-shot-task-specific-100-ex/emotion/best_model/adapter_config.json +21 -0
- low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt +3 -0
- low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth +3 -0
- low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt +3 -0
- low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json +95 -0
- low-shot-task-specific-100-ex/emotion/best_model/training_args.bin +3 -0
- low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_config.json +21 -0
- low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt +3 -0
- low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth +3 -0
- low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt +3 -0
- low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json +95 -0
- low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin +3 -0
- low-shot-task-specific-100-ex/sst/best_model/adapter_config.json +21 -0
- low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-100-ex/sst/best_model/optimizer.pt +3 -0
- low-shot-task-specific-100-ex/sst/best_model/rng_state.pth +3 -0
- low-shot-task-specific-100-ex/sst/best_model/scheduler.pt +3 -0
- low-shot-task-specific-100-ex/sst/best_model/trainer_state.json +95 -0
- low-shot-task-specific-100-ex/sst/best_model/training_args.bin +3 -0
- low-shot-task-specific-100-ex/sum/best_model/adapter_config.json +21 -0
- low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-100-ex/sum/best_model/optimizer.pt +3 -0
- low-shot-task-specific-100-ex/sum/best_model/rng_state.pth +3 -0
- low-shot-task-specific-100-ex/sum/best_model/scheduler.pt +3 -0
- low-shot-task-specific-100-ex/sum/best_model/trainer_state.json +95 -0
- low-shot-task-specific-100-ex/sum/best_model/training_args.bin +3 -0
- low-shot-task-specific-100-ex/svamp/best_model/adapter_config.json +21 -0
low-shot-task-specific-100-ex/coin_flip/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd47cb9e4d59e5592dc86a4825506634b4b81810e2c8cce8d4ee00b654660681
|
3 |
+
size 104973389
|
low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89cb77b938fcbec8ef8fc937366b88a4eb9b3dda7246bcdf5841e8d1ff389eb7
|
3 |
+
size 209984517
|
low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b683f78ced98226c01d86c04f25e6a3295aa86e50560601c917694a914d68aad
|
3 |
+
size 14575
|
low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb69dbf6f829f506b60594816da07bc6940e3d9adce52ab976bf73b294ac5127
|
3 |
+
size 627
|
low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.7364377379417419,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/coin_flip/checkpoint-20",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"eval_loss": 4.770002365112305,
|
14 |
+
"eval_runtime": 0.7512,
|
15 |
+
"eval_samples_per_second": 26.626,
|
16 |
+
"eval_steps_per_second": 3.994,
|
17 |
+
"step": 2
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 2.0,
|
21 |
+
"eval_loss": 4.635828971862793,
|
22 |
+
"eval_runtime": 0.7546,
|
23 |
+
"eval_samples_per_second": 26.505,
|
24 |
+
"eval_steps_per_second": 3.976,
|
25 |
+
"step": 5
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 2.8,
|
29 |
+
"eval_loss": 4.354025363922119,
|
30 |
+
"eval_runtime": 0.7548,
|
31 |
+
"eval_samples_per_second": 26.499,
|
32 |
+
"eval_steps_per_second": 3.975,
|
33 |
+
"step": 7
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 4.0,
|
37 |
+
"learning_rate": 5.9999999999999995e-05,
|
38 |
+
"loss": 4.5475,
|
39 |
+
"step": 10
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 4.0,
|
43 |
+
"eval_loss": 3.8307082653045654,
|
44 |
+
"eval_runtime": 0.7548,
|
45 |
+
"eval_samples_per_second": 26.498,
|
46 |
+
"eval_steps_per_second": 3.975,
|
47 |
+
"step": 10
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 4.8,
|
51 |
+
"eval_loss": 3.6256070137023926,
|
52 |
+
"eval_runtime": 0.7542,
|
53 |
+
"eval_samples_per_second": 26.519,
|
54 |
+
"eval_steps_per_second": 3.978,
|
55 |
+
"step": 12
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 6.0,
|
59 |
+
"eval_loss": 2.8008601665496826,
|
60 |
+
"eval_runtime": 0.7532,
|
61 |
+
"eval_samples_per_second": 26.552,
|
62 |
+
"eval_steps_per_second": 3.983,
|
63 |
+
"step": 15
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 6.8,
|
67 |
+
"eval_loss": 1.8803138732910156,
|
68 |
+
"eval_runtime": 0.7625,
|
69 |
+
"eval_samples_per_second": 26.229,
|
70 |
+
"eval_steps_per_second": 3.934,
|
71 |
+
"step": 17
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 8.0,
|
75 |
+
"learning_rate": 0.00011399999999999999,
|
76 |
+
"loss": 2.7123,
|
77 |
+
"step": 20
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 8.0,
|
81 |
+
"eval_loss": 0.7364377379417419,
|
82 |
+
"eval_runtime": 0.7535,
|
83 |
+
"eval_samples_per_second": 26.542,
|
84 |
+
"eval_steps_per_second": 3.981,
|
85 |
+
"step": 20
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"logging_steps": 10,
|
89 |
+
"max_steps": 20,
|
90 |
+
"num_train_epochs": 10,
|
91 |
+
"save_steps": 500,
|
92 |
+
"total_flos": 2393497935544320.0,
|
93 |
+
"trial_name": null,
|
94 |
+
"trial_params": null
|
95 |
+
}
|
low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfd2f888c4bb95dd2b9338dcfeb755b0ed743f36bb3535194839914be97ca407
|
3 |
+
size 4091
|
low-shot-task-specific-100-ex/cola/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1223c9b19186bca2d1f9a40d1afb7b9d667bb02a7b7fdf5a29875a3ba0ad2c8f
|
3 |
+
size 104973389
|
low-shot-task-specific-100-ex/cola/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52d58812c7b440cb5008d9896abfb5be121a3021dd1ec3e407cf5ece524078cb
|
3 |
+
size 209984517
|
low-shot-task-specific-100-ex/cola/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cfb4b1b5009a9b015311afd9778b0ef0a4a8bc389c04629a1d2e68a84aeea44
|
3 |
+
size 14575
|
low-shot-task-specific-100-ex/cola/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e
|
3 |
+
size 627
|
low-shot-task-specific-100-ex/cola/best_model/trainer_state.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 3.2643978595733643,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/cola/checkpoint-20",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"eval_loss": 7.496130466461182,
|
14 |
+
"eval_runtime": 0.651,
|
15 |
+
"eval_samples_per_second": 30.721,
|
16 |
+
"eval_steps_per_second": 4.608,
|
17 |
+
"step": 2
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 2.0,
|
21 |
+
"eval_loss": 7.389729976654053,
|
22 |
+
"eval_runtime": 0.6506,
|
23 |
+
"eval_samples_per_second": 30.74,
|
24 |
+
"eval_steps_per_second": 4.611,
|
25 |
+
"step": 5
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 2.8,
|
29 |
+
"eval_loss": 7.235182285308838,
|
30 |
+
"eval_runtime": 0.6495,
|
31 |
+
"eval_samples_per_second": 30.792,
|
32 |
+
"eval_steps_per_second": 4.619,
|
33 |
+
"step": 7
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 4.0,
|
37 |
+
"learning_rate": 5.9999999999999995e-05,
|
38 |
+
"loss": 7.3487,
|
39 |
+
"step": 10
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 4.0,
|
43 |
+
"eval_loss": 6.905265808105469,
|
44 |
+
"eval_runtime": 0.6482,
|
45 |
+
"eval_samples_per_second": 30.855,
|
46 |
+
"eval_steps_per_second": 4.628,
|
47 |
+
"step": 10
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 4.8,
|
51 |
+
"eval_loss": 6.568731784820557,
|
52 |
+
"eval_runtime": 0.6499,
|
53 |
+
"eval_samples_per_second": 30.774,
|
54 |
+
"eval_steps_per_second": 4.616,
|
55 |
+
"step": 12
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 6.0,
|
59 |
+
"eval_loss": 5.829730987548828,
|
60 |
+
"eval_runtime": 0.6495,
|
61 |
+
"eval_samples_per_second": 30.791,
|
62 |
+
"eval_steps_per_second": 4.619,
|
63 |
+
"step": 15
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 6.8,
|
67 |
+
"eval_loss": 5.088259220123291,
|
68 |
+
"eval_runtime": 0.6485,
|
69 |
+
"eval_samples_per_second": 30.84,
|
70 |
+
"eval_steps_per_second": 4.626,
|
71 |
+
"step": 17
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 8.0,
|
75 |
+
"learning_rate": 0.00011999999999999999,
|
76 |
+
"loss": 5.6641,
|
77 |
+
"step": 20
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 8.0,
|
81 |
+
"eval_loss": 3.2643978595733643,
|
82 |
+
"eval_runtime": 0.6478,
|
83 |
+
"eval_samples_per_second": 30.872,
|
84 |
+
"eval_steps_per_second": 4.631,
|
85 |
+
"step": 20
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"logging_steps": 10,
|
89 |
+
"max_steps": 20,
|
90 |
+
"num_train_epochs": 10,
|
91 |
+
"save_steps": 500,
|
92 |
+
"total_flos": 862945020149760.0,
|
93 |
+
"trial_name": null,
|
94 |
+
"trial_params": null
|
95 |
+
}
|
low-shot-task-specific-100-ex/cola/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7584dccd05bece2a9ec3f622ce5aa37500e33036a484978bd2e232bdb92a42bd
|
3 |
+
size 4091
|
low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f63d75b63e7b225c36c047b776e506742a0eb06a2cb34d6ac32cbc8faea89420
|
3 |
+
size 104973389
|
low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fdd8a85dbed2d83977bbce5185352392812b7736e12294c276e5e9046d46672
|
3 |
+
size 209984517
|
low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e7d44b7a4cdc1eb16e6709c586b407ac6e8b61b77e11a4182c69dce6b3efbf4
|
3 |
+
size 14575
|
low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e
|
3 |
+
size 627
|
low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 1.0019607543945312,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/commonsense_qa/checkpoint-20",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"eval_loss": 5.86135196685791,
|
14 |
+
"eval_runtime": 0.8193,
|
15 |
+
"eval_samples_per_second": 24.41,
|
16 |
+
"eval_steps_per_second": 3.662,
|
17 |
+
"step": 2
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 2.0,
|
21 |
+
"eval_loss": 5.789961814880371,
|
22 |
+
"eval_runtime": 0.8276,
|
23 |
+
"eval_samples_per_second": 24.167,
|
24 |
+
"eval_steps_per_second": 3.625,
|
25 |
+
"step": 5
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 2.8,
|
29 |
+
"eval_loss": 5.628936767578125,
|
30 |
+
"eval_runtime": 0.8224,
|
31 |
+
"eval_samples_per_second": 24.32,
|
32 |
+
"eval_steps_per_second": 3.648,
|
33 |
+
"step": 7
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 4.0,
|
37 |
+
"learning_rate": 5.9999999999999995e-05,
|
38 |
+
"loss": 5.5941,
|
39 |
+
"step": 10
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 4.0,
|
43 |
+
"eval_loss": 4.905825614929199,
|
44 |
+
"eval_runtime": 0.8285,
|
45 |
+
"eval_samples_per_second": 24.14,
|
46 |
+
"eval_steps_per_second": 3.621,
|
47 |
+
"step": 10
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 4.8,
|
51 |
+
"eval_loss": 4.051631927490234,
|
52 |
+
"eval_runtime": 0.8299,
|
53 |
+
"eval_samples_per_second": 24.099,
|
54 |
+
"eval_steps_per_second": 3.615,
|
55 |
+
"step": 12
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 6.0,
|
59 |
+
"eval_loss": 3.0537314414978027,
|
60 |
+
"eval_runtime": 0.8237,
|
61 |
+
"eval_samples_per_second": 24.28,
|
62 |
+
"eval_steps_per_second": 3.642,
|
63 |
+
"step": 15
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 6.8,
|
67 |
+
"eval_loss": 2.2741684913635254,
|
68 |
+
"eval_runtime": 0.8216,
|
69 |
+
"eval_samples_per_second": 24.344,
|
70 |
+
"eval_steps_per_second": 3.652,
|
71 |
+
"step": 17
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 8.0,
|
75 |
+
"learning_rate": 0.00011999999999999999,
|
76 |
+
"loss": 3.1561,
|
77 |
+
"step": 20
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 8.0,
|
81 |
+
"eval_loss": 1.0019607543945312,
|
82 |
+
"eval_runtime": 0.8235,
|
83 |
+
"eval_samples_per_second": 24.287,
|
84 |
+
"eval_steps_per_second": 3.643,
|
85 |
+
"step": 20
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"logging_steps": 10,
|
89 |
+
"max_steps": 20,
|
90 |
+
"num_train_epochs": 10,
|
91 |
+
"save_steps": 500,
|
92 |
+
"total_flos": 3078414183628800.0,
|
93 |
+
"trial_name": null,
|
94 |
+
"trial_params": null
|
95 |
+
}
|
low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcaa3c829539bb3746b5f7e0dafb15dbfccde57e3d5650c68eb42e3fbbacfa3e
|
3 |
+
size 4091
|
low-shot-task-specific-100-ex/emotion/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23178e724f805ef456b15b82726bde7476bb7f85943ce21b94a72364d19f8459
|
3 |
+
size 104973389
|
low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73979bacc3db61859edeb2672ed3234900273717c07987eb84d60c9ebb4e30cc
|
3 |
+
size 209984517
|
low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a608d60cb0a08df4efb2f7fa2cd8590824d7f4a718f101d397116dc4b5272878
|
3 |
+
size 14575
|
low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e
|
3 |
+
size 627
|
low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 3.7403335571289062,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/emotion/checkpoint-20",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"eval_loss": 6.759609222412109,
|
14 |
+
"eval_runtime": 0.7242,
|
15 |
+
"eval_samples_per_second": 27.617,
|
16 |
+
"eval_steps_per_second": 4.143,
|
17 |
+
"step": 2
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 2.0,
|
21 |
+
"eval_loss": 6.701653957366943,
|
22 |
+
"eval_runtime": 0.7245,
|
23 |
+
"eval_samples_per_second": 27.606,
|
24 |
+
"eval_steps_per_second": 4.141,
|
25 |
+
"step": 5
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 2.8,
|
29 |
+
"eval_loss": 6.61182165145874,
|
30 |
+
"eval_runtime": 0.7269,
|
31 |
+
"eval_samples_per_second": 27.515,
|
32 |
+
"eval_steps_per_second": 4.127,
|
33 |
+
"step": 7
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 4.0,
|
37 |
+
"learning_rate": 5.9999999999999995e-05,
|
38 |
+
"loss": 6.5973,
|
39 |
+
"step": 10
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 4.0,
|
43 |
+
"eval_loss": 6.34386682510376,
|
44 |
+
"eval_runtime": 0.7251,
|
45 |
+
"eval_samples_per_second": 27.583,
|
46 |
+
"eval_steps_per_second": 4.137,
|
47 |
+
"step": 10
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 4.8,
|
51 |
+
"eval_loss": 5.928864479064941,
|
52 |
+
"eval_runtime": 0.7256,
|
53 |
+
"eval_samples_per_second": 27.565,
|
54 |
+
"eval_steps_per_second": 4.135,
|
55 |
+
"step": 12
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 6.0,
|
59 |
+
"eval_loss": 5.030377388000488,
|
60 |
+
"eval_runtime": 0.7248,
|
61 |
+
"eval_samples_per_second": 27.593,
|
62 |
+
"eval_steps_per_second": 4.139,
|
63 |
+
"step": 15
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 6.8,
|
67 |
+
"eval_loss": 4.649694442749023,
|
68 |
+
"eval_runtime": 0.7259,
|
69 |
+
"eval_samples_per_second": 27.554,
|
70 |
+
"eval_steps_per_second": 4.133,
|
71 |
+
"step": 17
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 8.0,
|
75 |
+
"learning_rate": 0.00011999999999999999,
|
76 |
+
"loss": 5.1116,
|
77 |
+
"step": 20
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 8.0,
|
81 |
+
"eval_loss": 3.7403335571289062,
|
82 |
+
"eval_runtime": 0.7246,
|
83 |
+
"eval_samples_per_second": 27.602,
|
84 |
+
"eval_steps_per_second": 4.14,
|
85 |
+
"step": 20
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"logging_steps": 10,
|
89 |
+
"max_steps": 20,
|
90 |
+
"num_train_epochs": 10,
|
91 |
+
"save_steps": 500,
|
92 |
+
"total_flos": 2252558490992640.0,
|
93 |
+
"trial_name": null,
|
94 |
+
"trial_params": null
|
95 |
+
}
|
low-shot-task-specific-100-ex/emotion/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cec69ccd4746da9921e4026850479fcbb626150ed87fa80dc42db47d3486b211
|
3 |
+
size 4091
|
low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef60f33031060f9266bfe61d170eb7c269e012245e37652a45d7c3131c8d3add
|
3 |
+
size 104973389
|
low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abbeb1912756da0877b31d08f01c2fd522bbb55dc819144a235dd9eae39560ec
|
3 |
+
size 209984517
|
low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ab0262a43d8b8116e7c1b8d394e85687822d3581f13149fbc20dd3f065e85a6
|
3 |
+
size 14575
|
low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e
|
3 |
+
size 627
|
low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.821982741355896,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/social_i_qa/checkpoint-20",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"eval_loss": 6.487459659576416,
|
14 |
+
"eval_runtime": 0.8925,
|
15 |
+
"eval_samples_per_second": 22.408,
|
16 |
+
"eval_steps_per_second": 3.361,
|
17 |
+
"step": 2
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 2.0,
|
21 |
+
"eval_loss": 6.3755621910095215,
|
22 |
+
"eval_runtime": 0.897,
|
23 |
+
"eval_samples_per_second": 22.297,
|
24 |
+
"eval_steps_per_second": 3.345,
|
25 |
+
"step": 5
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 2.8,
|
29 |
+
"eval_loss": 6.16649866104126,
|
30 |
+
"eval_runtime": 0.8963,
|
31 |
+
"eval_samples_per_second": 22.314,
|
32 |
+
"eval_steps_per_second": 3.347,
|
33 |
+
"step": 7
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 4.0,
|
37 |
+
"learning_rate": 5.9999999999999995e-05,
|
38 |
+
"loss": 6.1511,
|
39 |
+
"step": 10
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 4.0,
|
43 |
+
"eval_loss": 5.644223690032959,
|
44 |
+
"eval_runtime": 0.8985,
|
45 |
+
"eval_samples_per_second": 22.259,
|
46 |
+
"eval_steps_per_second": 3.339,
|
47 |
+
"step": 10
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 4.8,
|
51 |
+
"eval_loss": 4.884535312652588,
|
52 |
+
"eval_runtime": 0.9023,
|
53 |
+
"eval_samples_per_second": 22.166,
|
54 |
+
"eval_steps_per_second": 3.325,
|
55 |
+
"step": 12
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 6.0,
|
59 |
+
"eval_loss": 3.0946044921875,
|
60 |
+
"eval_runtime": 0.8991,
|
61 |
+
"eval_samples_per_second": 22.245,
|
62 |
+
"eval_steps_per_second": 3.337,
|
63 |
+
"step": 15
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 6.8,
|
67 |
+
"eval_loss": 2.1555423736572266,
|
68 |
+
"eval_runtime": 0.9036,
|
69 |
+
"eval_samples_per_second": 22.133,
|
70 |
+
"eval_steps_per_second": 3.32,
|
71 |
+
"step": 17
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 8.0,
|
75 |
+
"learning_rate": 0.00011999999999999999,
|
76 |
+
"loss": 3.3011,
|
77 |
+
"step": 20
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 8.0,
|
81 |
+
"eval_loss": 0.821982741355896,
|
82 |
+
"eval_runtime": 0.8939,
|
83 |
+
"eval_samples_per_second": 22.373,
|
84 |
+
"eval_steps_per_second": 3.356,
|
85 |
+
"step": 20
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"logging_steps": 10,
|
89 |
+
"max_steps": 20,
|
90 |
+
"num_train_epochs": 10,
|
91 |
+
"save_steps": 500,
|
92 |
+
"total_flos": 3338039476224000.0,
|
93 |
+
"trial_name": null,
|
94 |
+
"trial_params": null
|
95 |
+
}
|
low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b54b052f5cac8f07391027a3b0c53c8bffff1028fc30595e86e27a07c6b887e
|
3 |
+
size 4091
|
low-shot-task-specific-100-ex/sst/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0ab51ab7c25129d35e69c6a5c9f19421507d7aea82814b2fbf3fbb9e27e0120
|
3 |
+
size 104973389
|
low-shot-task-specific-100-ex/sst/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75b2aab283007757d7da6dabd8a4d1a3e6a88f4f3b3c863b51c6af953abd76f6
|
3 |
+
size 209984517
|
low-shot-task-specific-100-ex/sst/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd5c668f408d63f497259a79124b33e953210b9fb047bdfd418e598fe5018820
|
3 |
+
size 14575
|
low-shot-task-specific-100-ex/sst/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daa948d8422c0ff5c84b04b3a278a49a8c2106994063cdc84b33b076944943d4
|
3 |
+
size 627
|
low-shot-task-specific-100-ex/sst/best_model/trainer_state.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 4.71249532699585,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/sst/checkpoint-20",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"eval_loss": 7.7774858474731445,
|
14 |
+
"eval_runtime": 0.6674,
|
15 |
+
"eval_samples_per_second": 29.965,
|
16 |
+
"eval_steps_per_second": 4.495,
|
17 |
+
"step": 2
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 2.0,
|
21 |
+
"eval_loss": 7.758080959320068,
|
22 |
+
"eval_runtime": 0.6648,
|
23 |
+
"eval_samples_per_second": 30.083,
|
24 |
+
"eval_steps_per_second": 4.512,
|
25 |
+
"step": 5
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 2.8,
|
29 |
+
"eval_loss": 7.7222137451171875,
|
30 |
+
"eval_runtime": 0.6677,
|
31 |
+
"eval_samples_per_second": 29.954,
|
32 |
+
"eval_steps_per_second": 4.493,
|
33 |
+
"step": 7
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 4.0,
|
37 |
+
"learning_rate": 4.2e-05,
|
38 |
+
"loss": 7.716,
|
39 |
+
"step": 10
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 4.0,
|
43 |
+
"eval_loss": 7.386101722717285,
|
44 |
+
"eval_runtime": 0.6691,
|
45 |
+
"eval_samples_per_second": 29.892,
|
46 |
+
"eval_steps_per_second": 4.484,
|
47 |
+
"step": 10
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 4.8,
|
51 |
+
"eval_loss": 7.176695346832275,
|
52 |
+
"eval_runtime": 0.6671,
|
53 |
+
"eval_samples_per_second": 29.982,
|
54 |
+
"eval_steps_per_second": 4.497,
|
55 |
+
"step": 12
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 6.0,
|
59 |
+
"eval_loss": 6.573421478271484,
|
60 |
+
"eval_runtime": 0.6678,
|
61 |
+
"eval_samples_per_second": 29.951,
|
62 |
+
"eval_steps_per_second": 4.493,
|
63 |
+
"step": 15
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 6.8,
|
67 |
+
"eval_loss": 6.066993236541748,
|
68 |
+
"eval_runtime": 0.6693,
|
69 |
+
"eval_samples_per_second": 29.881,
|
70 |
+
"eval_steps_per_second": 4.482,
|
71 |
+
"step": 17
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 8.0,
|
75 |
+
"learning_rate": 0.000102,
|
76 |
+
"loss": 6.5057,
|
77 |
+
"step": 20
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 8.0,
|
81 |
+
"eval_loss": 4.71249532699585,
|
82 |
+
"eval_runtime": 0.6679,
|
83 |
+
"eval_samples_per_second": 29.944,
|
84 |
+
"eval_steps_per_second": 4.492,
|
85 |
+
"step": 20
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"logging_steps": 10,
|
89 |
+
"max_steps": 20,
|
90 |
+
"num_train_epochs": 10,
|
91 |
+
"save_steps": 500,
|
92 |
+
"total_flos": 1760506745978880.0,
|
93 |
+
"trial_name": null,
|
94 |
+
"trial_params": null
|
95 |
+
}
|
low-shot-task-specific-100-ex/sst/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82db56540ff893c3d36fa725b2ccaa3a282af73561fa81b1cd27f7673a28d02f
|
3 |
+
size 4091
|
low-shot-task-specific-100-ex/sum/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7cc16d00544954ce77b8cae15ccb1f3d592abfd2e8a630f9f4afd1fecf5a7be
|
3 |
+
size 104973389
|
low-shot-task-specific-100-ex/sum/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a11eba00d1ce3e5f0d03227932d4e26433f2f6b6a7c8dc1f7f157da65ae61a16
|
3 |
+
size 209984517
|
low-shot-task-specific-100-ex/sum/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cfb4b1b5009a9b015311afd9778b0ef0a4a8bc389c04629a1d2e68a84aeea44
|
3 |
+
size 14575
|
low-shot-task-specific-100-ex/sum/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e
|
3 |
+
size 627
|
low-shot-task-specific-100-ex/sum/best_model/trainer_state.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 1.7415841817855835,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/sum/checkpoint-20",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"eval_loss": 3.6972098350524902,
|
14 |
+
"eval_runtime": 0.6541,
|
15 |
+
"eval_samples_per_second": 30.578,
|
16 |
+
"eval_steps_per_second": 4.587,
|
17 |
+
"step": 2
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 2.0,
|
21 |
+
"eval_loss": 3.5827407836914062,
|
22 |
+
"eval_runtime": 0.6516,
|
23 |
+
"eval_samples_per_second": 30.695,
|
24 |
+
"eval_steps_per_second": 4.604,
|
25 |
+
"step": 5
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 2.8,
|
29 |
+
"eval_loss": 3.435373306274414,
|
30 |
+
"eval_runtime": 0.6534,
|
31 |
+
"eval_samples_per_second": 30.611,
|
32 |
+
"eval_steps_per_second": 4.592,
|
33 |
+
"step": 7
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 4.0,
|
37 |
+
"learning_rate": 5.9999999999999995e-05,
|
38 |
+
"loss": 3.5457,
|
39 |
+
"step": 10
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 4.0,
|
43 |
+
"eval_loss": 3.144989013671875,
|
44 |
+
"eval_runtime": 0.6516,
|
45 |
+
"eval_samples_per_second": 30.692,
|
46 |
+
"eval_steps_per_second": 4.604,
|
47 |
+
"step": 10
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 4.8,
|
51 |
+
"eval_loss": 2.9391090869903564,
|
52 |
+
"eval_runtime": 0.6511,
|
53 |
+
"eval_samples_per_second": 30.718,
|
54 |
+
"eval_steps_per_second": 4.608,
|
55 |
+
"step": 12
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 6.0,
|
59 |
+
"eval_loss": 2.312290668487549,
|
60 |
+
"eval_runtime": 0.6505,
|
61 |
+
"eval_samples_per_second": 30.745,
|
62 |
+
"eval_steps_per_second": 4.612,
|
63 |
+
"step": 15
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 6.8,
|
67 |
+
"eval_loss": 1.938306450843811,
|
68 |
+
"eval_runtime": 0.6508,
|
69 |
+
"eval_samples_per_second": 30.732,
|
70 |
+
"eval_steps_per_second": 4.61,
|
71 |
+
"step": 17
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 8.0,
|
75 |
+
"learning_rate": 0.00011999999999999999,
|
76 |
+
"loss": 2.4305,
|
77 |
+
"step": 20
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 8.0,
|
81 |
+
"eval_loss": 1.7415841817855835,
|
82 |
+
"eval_runtime": 0.6535,
|
83 |
+
"eval_samples_per_second": 30.603,
|
84 |
+
"eval_steps_per_second": 4.59,
|
85 |
+
"step": 20
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"logging_steps": 10,
|
89 |
+
"max_steps": 20,
|
90 |
+
"num_train_epochs": 10,
|
91 |
+
"save_steps": 500,
|
92 |
+
"total_flos": 1186858480435200.0,
|
93 |
+
"trial_name": null,
|
94 |
+
"trial_params": null
|
95 |
+
}
|
low-shot-task-specific-100-ex/sum/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9b167b4b835edf4fa5e13863bdad1f68e733f002739a766fe1eb9a9eb6f48df
|
3 |
+
size 4091
|
low-shot-task-specific-100-ex/svamp/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|