chujiezheng commited on
Commit
28e3cae
1 Parent(s): b86cef2

Upload folder using huggingface_hub

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.98,
3
+ "train_loss": 0.5396593997353002,
4
+ "train_runtime": 4845.5764,
5
+ "train_samples": 12227,
6
+ "train_samples_per_second": 5.047,
7
+ "train_steps_per_second": 0.039
8
+ }
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "alignment-handbook/zephyr-7b-sft-full",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.39.3",
24
+ "use_cache": true,
25
+ "vocab_size": 32000
26
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "do_sample": true,
5
+ "eos_token_id": 2,
6
+ "transformers_version": "4.39.3"
7
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f51ab60d10197a15c568efaacbae3e806b215bdc8416011a55fb52aafcaedbe3
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b349fc954c3b8bddf53bca3c9f930454bbe70cecac01106bc1fcd8a6639e440
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50001b2eb43dec9c03f629e54ad969bf121dfb81be4f10b32c078f8d5ab326e9
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
runs/Apr16_21-34-04_pluslab01-a100.cs.ucla.edu/events.out.tfevents.1713328513.pluslab01-a100.cs.ucla.edu ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43222db7c2ed7f40af93ba20ab5497518681f50f54fe6f7ce52fe71cf21ab13e
3
+ size 31620
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.98,
3
+ "train_loss": 0.5396593997353002,
4
+ "train_runtime": 4845.5764,
5
+ "train_samples": 12227,
6
+ "train_samples_per_second": 5.047,
7
+ "train_steps_per_second": 0.039
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.9843342036553526,
5
+ "eval_steps": 500,
6
+ "global_step": 190,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "grad_norm": 6.530893553837012,
14
+ "learning_rate": 2.6315789473684208e-08,
15
+ "logits/chosen": -2.851747512817383,
16
+ "logits/rejected": -2.833996534347534,
17
+ "logps/chosen": -165.70089721679688,
18
+ "logps/rejected": -198.857666015625,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.05,
28
+ "grad_norm": 5.940017766790383,
29
+ "learning_rate": 1.3157894736842104e-07,
30
+ "logits/chosen": -2.7694077491760254,
31
+ "logits/rejected": -2.772179365158081,
32
+ "logps/chosen": -171.38229370117188,
33
+ "logps/rejected": -172.59909057617188,
34
+ "loss": 0.6932,
35
+ "rewards/accuracies": 0.3125,
36
+ "rewards/chosen": -0.00019297577091492712,
37
+ "rewards/margins": -0.0003426831099204719,
38
+ "rewards/rejected": 0.00014970726624596864,
39
+ "step": 5
40
+ },
41
+ {
42
+ "epoch": 0.1,
43
+ "grad_norm": 7.22860361762137,
44
+ "learning_rate": 2.631578947368421e-07,
45
+ "logits/chosen": -2.784412384033203,
46
+ "logits/rejected": -2.7932095527648926,
47
+ "logps/chosen": -190.19732666015625,
48
+ "logps/rejected": -194.6861572265625,
49
+ "loss": 0.6929,
50
+ "rewards/accuracies": 0.550000011920929,
51
+ "rewards/chosen": 0.0017464166739955544,
52
+ "rewards/margins": 0.0005834165494889021,
53
+ "rewards/rejected": 0.0011630002409219742,
54
+ "step": 10
55
+ },
56
+ {
57
+ "epoch": 0.16,
58
+ "grad_norm": 7.203100204300102,
59
+ "learning_rate": 3.9473684210526315e-07,
60
+ "logits/chosen": -2.8401434421539307,
61
+ "logits/rejected": -2.8505945205688477,
62
+ "logps/chosen": -198.759521484375,
63
+ "logps/rejected": -188.010986328125,
64
+ "loss": 0.6919,
65
+ "rewards/accuracies": 0.550000011920929,
66
+ "rewards/chosen": 0.012036855332553387,
67
+ "rewards/margins": 0.0030940533615648746,
68
+ "rewards/rejected": 0.0089428024366498,
69
+ "step": 15
70
+ },
71
+ {
72
+ "epoch": 0.21,
73
+ "grad_norm": 6.864101673229198,
74
+ "learning_rate": 4.999578104083306e-07,
75
+ "logits/chosen": -2.8547134399414062,
76
+ "logits/rejected": -2.864978075027466,
77
+ "logps/chosen": -165.3108673095703,
78
+ "logps/rejected": -177.08412170410156,
79
+ "loss": 0.6882,
80
+ "rewards/accuracies": 0.612500011920929,
81
+ "rewards/chosen": 0.034319791942834854,
82
+ "rewards/margins": 0.008450874127447605,
83
+ "rewards/rejected": 0.025868916884064674,
84
+ "step": 20
85
+ },
86
+ {
87
+ "epoch": 0.26,
88
+ "grad_norm": 6.3583895035816855,
89
+ "learning_rate": 4.984826693294873e-07,
90
+ "logits/chosen": -2.8253941535949707,
91
+ "logits/rejected": -2.8230338096618652,
92
+ "logps/chosen": -125.37552642822266,
93
+ "logps/rejected": -145.67837524414062,
94
+ "loss": 0.6822,
95
+ "rewards/accuracies": 0.668749988079071,
96
+ "rewards/chosen": 0.06096485257148743,
97
+ "rewards/margins": 0.02324753999710083,
98
+ "rewards/rejected": 0.0377173088490963,
99
+ "step": 25
100
+ },
101
+ {
102
+ "epoch": 0.31,
103
+ "grad_norm": 6.95430506418983,
104
+ "learning_rate": 4.949122667718934e-07,
105
+ "logits/chosen": -2.797011613845825,
106
+ "logits/rejected": -2.7968087196350098,
107
+ "logps/chosen": -156.46078491210938,
108
+ "logps/rejected": -166.90145874023438,
109
+ "loss": 0.6741,
110
+ "rewards/accuracies": 0.668749988079071,
111
+ "rewards/chosen": 0.030187183991074562,
112
+ "rewards/margins": 0.04637282341718674,
113
+ "rewards/rejected": -0.016185639426112175,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.37,
118
+ "grad_norm": 7.0189636592719085,
119
+ "learning_rate": 4.892767091689785e-07,
120
+ "logits/chosen": -2.849172592163086,
121
+ "logits/rejected": -2.8440544605255127,
122
+ "logps/chosen": -193.01596069335938,
123
+ "logps/rejected": -207.4866180419922,
124
+ "loss": 0.664,
125
+ "rewards/accuracies": 0.574999988079071,
126
+ "rewards/chosen": -0.01744670793414116,
127
+ "rewards/margins": 0.05499963089823723,
128
+ "rewards/rejected": -0.07244633138179779,
129
+ "step": 35
130
+ },
131
+ {
132
+ "epoch": 0.42,
133
+ "grad_norm": 8.51056311217898,
134
+ "learning_rate": 4.816235168037004e-07,
135
+ "logits/chosen": -2.7640957832336426,
136
+ "logits/rejected": -2.776198387145996,
137
+ "logps/chosen": -137.4984130859375,
138
+ "logps/rejected": -163.426513671875,
139
+ "loss": 0.6517,
140
+ "rewards/accuracies": 0.65625,
141
+ "rewards/chosen": -0.046394020318984985,
142
+ "rewards/margins": 0.10987555980682373,
143
+ "rewards/rejected": -0.15626958012580872,
144
+ "step": 40
145
+ },
146
+ {
147
+ "epoch": 0.47,
148
+ "grad_norm": 11.159926429466159,
149
+ "learning_rate": 4.720172231068844e-07,
150
+ "logits/chosen": -2.723865032196045,
151
+ "logits/rejected": -2.7389719486236572,
152
+ "logps/chosen": -207.92044067382812,
153
+ "logps/rejected": -213.3718719482422,
154
+ "loss": 0.6536,
155
+ "rewards/accuracies": 0.675000011920929,
156
+ "rewards/chosen": -0.19272716343402863,
157
+ "rewards/margins": 0.12425712496042252,
158
+ "rewards/rejected": -0.31698426604270935,
159
+ "step": 45
160
+ },
161
+ {
162
+ "epoch": 0.52,
163
+ "grad_norm": 12.393423854122126,
164
+ "learning_rate": 4.605388304968914e-07,
165
+ "logits/chosen": -2.718982696533203,
166
+ "logits/rejected": -2.7207980155944824,
167
+ "logps/chosen": -178.42510986328125,
168
+ "logps/rejected": -211.27377319335938,
169
+ "loss": 0.6282,
170
+ "rewards/accuracies": 0.668749988079071,
171
+ "rewards/chosen": -0.040986787527799606,
172
+ "rewards/margins": 0.17269372940063477,
173
+ "rewards/rejected": -0.21368053555488586,
174
+ "step": 50
175
+ },
176
+ {
177
+ "epoch": 0.57,
178
+ "grad_norm": 21.049387798378756,
179
+ "learning_rate": 4.472851273490984e-07,
180
+ "logits/chosen": -2.5929465293884277,
181
+ "logits/rejected": -2.6008853912353516,
182
+ "logps/chosen": -160.97874450683594,
183
+ "logps/rejected": -186.36863708496094,
184
+ "loss": 0.6258,
185
+ "rewards/accuracies": 0.625,
186
+ "rewards/chosen": -0.12865906953811646,
187
+ "rewards/margins": 0.20724160969257355,
188
+ "rewards/rejected": -0.3359006643295288,
189
+ "step": 55
190
+ },
191
+ {
192
+ "epoch": 0.63,
193
+ "grad_norm": 12.520960096291708,
194
+ "learning_rate": 4.323678718546552e-07,
195
+ "logits/chosen": -2.648369312286377,
196
+ "logits/rejected": -2.645002603530884,
197
+ "logps/chosen": -190.52731323242188,
198
+ "logps/rejected": -241.7861328125,
199
+ "loss": 0.6096,
200
+ "rewards/accuracies": 0.675000011920929,
201
+ "rewards/chosen": -0.18569248914718628,
202
+ "rewards/margins": 0.27379462122917175,
203
+ "rewards/rejected": -0.45948711037635803,
204
+ "step": 60
205
+ },
206
+ {
207
+ "epoch": 0.68,
208
+ "grad_norm": 18.22398171524467,
209
+ "learning_rate": 4.159128496504053e-07,
210
+ "logits/chosen": -2.635317325592041,
211
+ "logits/rejected": -2.6489968299865723,
212
+ "logps/chosen": -185.21597290039062,
213
+ "logps/rejected": -226.77963256835938,
214
+ "loss": 0.5957,
215
+ "rewards/accuracies": 0.7250000238418579,
216
+ "rewards/chosen": -0.23138824105262756,
217
+ "rewards/margins": 0.33824095129966736,
218
+ "rewards/rejected": -0.5696292519569397,
219
+ "step": 65
220
+ },
221
+ {
222
+ "epoch": 0.73,
223
+ "grad_norm": 12.486094279730978,
224
+ "learning_rate": 3.9805881316624503e-07,
225
+ "logits/chosen": -2.5691702365875244,
226
+ "logits/rejected": -2.5781631469726562,
227
+ "logps/chosen": -209.5417938232422,
228
+ "logps/rejected": -230.74874877929688,
229
+ "loss": 0.597,
230
+ "rewards/accuracies": 0.675000011920929,
231
+ "rewards/chosen": -0.3240502178668976,
232
+ "rewards/margins": 0.24576255679130554,
233
+ "rewards/rejected": -0.5698127150535583,
234
+ "step": 70
235
+ },
236
+ {
237
+ "epoch": 0.78,
238
+ "grad_norm": 16.18752353365872,
239
+ "learning_rate": 3.78956311633581e-07,
240
+ "logits/chosen": -2.5225658416748047,
241
+ "logits/rejected": -2.519923448562622,
242
+ "logps/chosen": -189.95606994628906,
243
+ "logps/rejected": -244.0750732421875,
244
+ "loss": 0.583,
245
+ "rewards/accuracies": 0.71875,
246
+ "rewards/chosen": -0.18096503615379333,
247
+ "rewards/margins": 0.4236725866794586,
248
+ "rewards/rejected": -0.604637622833252,
249
+ "step": 75
250
+ },
251
+ {
252
+ "epoch": 0.84,
253
+ "grad_norm": 16.73585640066424,
254
+ "learning_rate": 3.587664216205183e-07,
255
+ "logits/chosen": -2.4873414039611816,
256
+ "logits/rejected": -2.488546133041382,
257
+ "logps/chosen": -186.70196533203125,
258
+ "logps/rejected": -229.10800170898438,
259
+ "loss": 0.5896,
260
+ "rewards/accuracies": 0.7250000238418579,
261
+ "rewards/chosen": -0.1871272623538971,
262
+ "rewards/margins": 0.35064736008644104,
263
+ "rewards/rejected": -0.5377745628356934,
264
+ "step": 80
265
+ },
266
+ {
267
+ "epoch": 0.89,
268
+ "grad_norm": 14.48535258453445,
269
+ "learning_rate": 3.376593887981886e-07,
270
+ "logits/chosen": -2.5321877002716064,
271
+ "logits/rejected": -2.5364866256713867,
272
+ "logps/chosen": -201.63038635253906,
273
+ "logps/rejected": -250.297607421875,
274
+ "loss": 0.5943,
275
+ "rewards/accuracies": 0.643750011920929,
276
+ "rewards/chosen": -0.4144672751426697,
277
+ "rewards/margins": 0.3995351493358612,
278
+ "rewards/rejected": -0.8140023946762085,
279
+ "step": 85
280
+ },
281
+ {
282
+ "epoch": 0.94,
283
+ "grad_norm": 24.006493530801364,
284
+ "learning_rate": 3.1581319239114976e-07,
285
+ "logits/chosen": -2.46376633644104,
286
+ "logits/rejected": -2.4762072563171387,
287
+ "logps/chosen": -235.89602661132812,
288
+ "logps/rejected": -286.06207275390625,
289
+ "loss": 0.5905,
290
+ "rewards/accuracies": 0.668749988079071,
291
+ "rewards/chosen": -0.6695653200149536,
292
+ "rewards/margins": 0.43523311614990234,
293
+ "rewards/rejected": -1.1047985553741455,
294
+ "step": 90
295
+ },
296
+ {
297
+ "epoch": 0.99,
298
+ "grad_norm": 13.899105116058017,
299
+ "learning_rate": 2.934120444167326e-07,
300
+ "logits/chosen": -2.5002081394195557,
301
+ "logits/rejected": -2.515636920928955,
302
+ "logps/chosen": -240.7231903076172,
303
+ "logps/rejected": -282.92974853515625,
304
+ "loss": 0.579,
305
+ "rewards/accuracies": 0.6937500238418579,
306
+ "rewards/chosen": -0.5511455535888672,
307
+ "rewards/margins": 0.450370728969574,
308
+ "rewards/rejected": -1.001516342163086,
309
+ "step": 95
310
+ },
311
+ {
312
+ "epoch": 1.04,
313
+ "grad_norm": 12.195720253264156,
314
+ "learning_rate": 2.706448363680831e-07,
315
+ "logits/chosen": -2.492196559906006,
316
+ "logits/rejected": -2.4908487796783447,
317
+ "logps/chosen": -217.4730224609375,
318
+ "logps/rejected": -285.6746826171875,
319
+ "loss": 0.4963,
320
+ "rewards/accuracies": 0.7749999761581421,
321
+ "rewards/chosen": -0.3912954330444336,
322
+ "rewards/margins": 0.6321467161178589,
323
+ "rewards/rejected": -1.023442268371582,
324
+ "step": 100
325
+ },
326
+ {
327
+ "epoch": 1.1,
328
+ "grad_norm": 13.625308588645998,
329
+ "learning_rate": 2.477035464388184e-07,
330
+ "logits/chosen": -2.3943684101104736,
331
+ "logits/rejected": -2.3905301094055176,
332
+ "logps/chosen": -160.0459442138672,
333
+ "logps/rejected": -262.6797790527344,
334
+ "loss": 0.4563,
335
+ "rewards/accuracies": 0.8374999761581421,
336
+ "rewards/chosen": -0.28104764223098755,
337
+ "rewards/margins": 0.8251093029975891,
338
+ "rewards/rejected": -1.1061569452285767,
339
+ "step": 105
340
+ },
341
+ {
342
+ "epoch": 1.15,
343
+ "grad_norm": 16.514733311854357,
344
+ "learning_rate": 2.2478162071993296e-07,
345
+ "logits/chosen": -2.367004156112671,
346
+ "logits/rejected": -2.3704023361206055,
347
+ "logps/chosen": -212.1750946044922,
348
+ "logps/rejected": -292.59295654296875,
349
+ "loss": 0.462,
350
+ "rewards/accuracies": 0.875,
351
+ "rewards/chosen": -0.39816007018089294,
352
+ "rewards/margins": 0.7662478089332581,
353
+ "rewards/rejected": -1.1644079685211182,
354
+ "step": 110
355
+ },
356
+ {
357
+ "epoch": 1.2,
358
+ "grad_norm": 15.624289502053674,
359
+ "learning_rate": 2.0207234201906545e-07,
360
+ "logits/chosen": -2.3321692943573,
361
+ "logits/rejected": -2.3237671852111816,
362
+ "logps/chosen": -199.48365783691406,
363
+ "logps/rejected": -294.98834228515625,
364
+ "loss": 0.4647,
365
+ "rewards/accuracies": 0.8062499761581421,
366
+ "rewards/chosen": -0.5382081866264343,
367
+ "rewards/margins": 0.7745588421821594,
368
+ "rewards/rejected": -1.3127670288085938,
369
+ "step": 115
370
+ },
371
+ {
372
+ "epoch": 1.25,
373
+ "grad_norm": 16.693818527228895,
374
+ "learning_rate": 1.7976720005660767e-07,
375
+ "logits/chosen": -2.3294899463653564,
376
+ "logits/rejected": -2.3380885124206543,
377
+ "logps/chosen": -236.26138305664062,
378
+ "logps/rejected": -317.8197326660156,
379
+ "loss": 0.4734,
380
+ "rewards/accuracies": 0.793749988079071,
381
+ "rewards/chosen": -0.5919082164764404,
382
+ "rewards/margins": 0.8398138284683228,
383
+ "rewards/rejected": -1.4317219257354736,
384
+ "step": 120
385
+ },
386
+ {
387
+ "epoch": 1.31,
388
+ "grad_norm": 18.248981197195587,
389
+ "learning_rate": 1.5805427678152674e-07,
390
+ "logits/chosen": -2.3658409118652344,
391
+ "logits/rejected": -2.3801846504211426,
392
+ "logps/chosen": -252.34451293945312,
393
+ "logps/rejected": -347.273193359375,
394
+ "loss": 0.434,
395
+ "rewards/accuracies": 0.84375,
396
+ "rewards/chosen": -0.6862586736679077,
397
+ "rewards/margins": 0.8617037534713745,
398
+ "rewards/rejected": -1.5479624271392822,
399
+ "step": 125
400
+ },
401
+ {
402
+ "epoch": 1.36,
403
+ "grad_norm": 18.599023816787692,
404
+ "learning_rate": 1.371166604222777e-07,
405
+ "logits/chosen": -2.3468356132507324,
406
+ "logits/rejected": -2.3559298515319824,
407
+ "logps/chosen": -222.77267456054688,
408
+ "logps/rejected": -324.4059143066406,
409
+ "loss": 0.4462,
410
+ "rewards/accuracies": 0.831250011920929,
411
+ "rewards/chosen": -0.609306275844574,
412
+ "rewards/margins": 0.9141504168510437,
413
+ "rewards/rejected": -1.5234566926956177,
414
+ "step": 130
415
+ },
416
+ {
417
+ "epoch": 1.41,
418
+ "grad_norm": 18.279705611333885,
419
+ "learning_rate": 1.1713090164588606e-07,
420
+ "logits/chosen": -2.3474090099334717,
421
+ "logits/rejected": -2.3513846397399902,
422
+ "logps/chosen": -237.4060516357422,
423
+ "logps/rejected": -349.4905700683594,
424
+ "loss": 0.4436,
425
+ "rewards/accuracies": 0.8374999761581421,
426
+ "rewards/chosen": -0.6407004594802856,
427
+ "rewards/margins": 0.92046058177948,
428
+ "rewards/rejected": -1.5611611604690552,
429
+ "step": 135
430
+ },
431
+ {
432
+ "epoch": 1.46,
433
+ "grad_norm": 21.064741442339784,
434
+ "learning_rate": 9.826552484321085e-08,
435
+ "logits/chosen": -2.2716422080993652,
436
+ "logits/rejected": -2.270905017852783,
437
+ "logps/chosen": -254.72885131835938,
438
+ "logps/rejected": -353.58477783203125,
439
+ "loss": 0.4319,
440
+ "rewards/accuracies": 0.8500000238418579,
441
+ "rewards/chosen": -0.7879461050033569,
442
+ "rewards/margins": 1.0126745700836182,
443
+ "rewards/rejected": -1.800620675086975,
444
+ "step": 140
445
+ },
446
+ {
447
+ "epoch": 1.51,
448
+ "grad_norm": 18.966914430346105,
449
+ "learning_rate": 8.067960709356478e-08,
450
+ "logits/chosen": -2.1971030235290527,
451
+ "logits/rejected": -2.2067959308624268,
452
+ "logps/chosen": -210.01760864257812,
453
+ "logps/rejected": -323.2452697753906,
454
+ "loss": 0.422,
455
+ "rewards/accuracies": 0.800000011920929,
456
+ "rewards/chosen": -0.6746965646743774,
457
+ "rewards/margins": 1.0784823894500732,
458
+ "rewards/rejected": -1.7531789541244507,
459
+ "step": 145
460
+ },
461
+ {
462
+ "epoch": 1.57,
463
+ "grad_norm": 19.251567468071876,
464
+ "learning_rate": 6.452143679117964e-08,
465
+ "logits/chosen": -2.269256114959717,
466
+ "logits/rejected": -2.2733891010284424,
467
+ "logps/chosen": -269.5815124511719,
468
+ "logps/rejected": -360.28729248046875,
469
+ "loss": 0.4286,
470
+ "rewards/accuracies": 0.793749988079071,
471
+ "rewards/chosen": -0.8714927434921265,
472
+ "rewards/margins": 0.9906436204910278,
473
+ "rewards/rejected": -1.8621364831924438,
474
+ "step": 150
475
+ },
476
+ {
477
+ "epoch": 1.62,
478
+ "grad_norm": 20.881384471810865,
479
+ "learning_rate": 4.992726324427901e-08,
480
+ "logits/chosen": -2.2466838359832764,
481
+ "logits/rejected": -2.2517459392547607,
482
+ "logps/chosen": -229.53665161132812,
483
+ "logps/rejected": -345.5362243652344,
484
+ "loss": 0.4455,
485
+ "rewards/accuracies": 0.78125,
486
+ "rewards/chosen": -0.835593581199646,
487
+ "rewards/margins": 0.9400846362113953,
488
+ "rewards/rejected": -1.775678277015686,
489
+ "step": 155
490
+ },
491
+ {
492
+ "epoch": 1.67,
493
+ "grad_norm": 21.71541477038859,
494
+ "learning_rate": 3.702014779041826e-08,
495
+ "logits/chosen": -2.186835765838623,
496
+ "logits/rejected": -2.1860439777374268,
497
+ "logps/chosen": -272.96112060546875,
498
+ "logps/rejected": -380.77685546875,
499
+ "loss": 0.4318,
500
+ "rewards/accuracies": 0.800000011920929,
501
+ "rewards/chosen": -1.01149320602417,
502
+ "rewards/margins": 1.0301023721694946,
503
+ "rewards/rejected": -2.041595697402954,
504
+ "step": 160
505
+ },
506
+ {
507
+ "epoch": 1.72,
508
+ "grad_norm": 23.87035641942232,
509
+ "learning_rate": 2.5908926115744994e-08,
510
+ "logits/chosen": -2.3079283237457275,
511
+ "logits/rejected": -2.320064067840576,
512
+ "logps/chosen": -277.4764099121094,
513
+ "logps/rejected": -361.50311279296875,
514
+ "loss": 0.4383,
515
+ "rewards/accuracies": 0.7562500238418579,
516
+ "rewards/chosen": -1.0418604612350464,
517
+ "rewards/margins": 0.8757144808769226,
518
+ "rewards/rejected": -1.9175748825073242,
519
+ "step": 165
520
+ },
521
+ {
522
+ "epoch": 1.78,
523
+ "grad_norm": 21.532935828831377,
524
+ "learning_rate": 1.6687290528135722e-08,
525
+ "logits/chosen": -2.2234318256378174,
526
+ "logits/rejected": -2.235042095184326,
527
+ "logps/chosen": -243.2294158935547,
528
+ "logps/rejected": -327.7035217285156,
529
+ "loss": 0.433,
530
+ "rewards/accuracies": 0.78125,
531
+ "rewards/chosen": -0.8599830865859985,
532
+ "rewards/margins": 0.809087872505188,
533
+ "rewards/rejected": -1.669070839881897,
534
+ "step": 170
535
+ },
536
+ {
537
+ "epoch": 1.83,
538
+ "grad_norm": 22.322246598167897,
539
+ "learning_rate": 9.432999922687396e-09,
540
+ "logits/chosen": -2.2345292568206787,
541
+ "logits/rejected": -2.2446045875549316,
542
+ "logps/chosen": -282.7356872558594,
543
+ "logps/rejected": -373.1545104980469,
544
+ "loss": 0.4213,
545
+ "rewards/accuracies": 0.84375,
546
+ "rewards/chosen": -0.9868669509887695,
547
+ "rewards/margins": 1.0404006242752075,
548
+ "rewards/rejected": -2.0272676944732666,
549
+ "step": 175
550
+ },
551
+ {
552
+ "epoch": 1.88,
553
+ "grad_norm": 19.226686156526466,
554
+ "learning_rate": 4.207224101311246e-09,
555
+ "logits/chosen": -2.2762608528137207,
556
+ "logits/rejected": -2.290148973464966,
557
+ "logps/chosen": -289.68035888671875,
558
+ "logps/rejected": -380.23883056640625,
559
+ "loss": 0.4364,
560
+ "rewards/accuracies": 0.8374999761581421,
561
+ "rewards/chosen": -1.0276674032211304,
562
+ "rewards/margins": 0.9330458641052246,
563
+ "rewards/rejected": -1.9607131481170654,
564
+ "step": 180
565
+ },
566
+ {
567
+ "epoch": 1.93,
568
+ "grad_norm": 21.532509699275593,
569
+ "learning_rate": 1.0540279752731252e-09,
570
+ "logits/chosen": -2.232375383377075,
571
+ "logits/rejected": -2.242682456970215,
572
+ "logps/chosen": -278.67156982421875,
573
+ "logps/rejected": -388.5971374511719,
574
+ "loss": 0.4303,
575
+ "rewards/accuracies": 0.78125,
576
+ "rewards/chosen": -1.0453999042510986,
577
+ "rewards/margins": 0.9826061129570007,
578
+ "rewards/rejected": -2.028006076812744,
579
+ "step": 185
580
+ },
581
+ {
582
+ "epoch": 1.98,
583
+ "grad_norm": 20.268430701124647,
584
+ "learning_rate": 0.0,
585
+ "logits/chosen": -2.247197151184082,
586
+ "logits/rejected": -2.25414776802063,
587
+ "logps/chosen": -272.12359619140625,
588
+ "logps/rejected": -399.1447448730469,
589
+ "loss": 0.4268,
590
+ "rewards/accuracies": 0.8062499761581421,
591
+ "rewards/chosen": -1.060980200767517,
592
+ "rewards/margins": 1.1344108581542969,
593
+ "rewards/rejected": -2.1953911781311035,
594
+ "step": 190
595
+ },
596
+ {
597
+ "epoch": 1.98,
598
+ "step": 190,
599
+ "total_flos": 0.0,
600
+ "train_loss": 0.5396593997353002,
601
+ "train_runtime": 4845.5764,
602
+ "train_samples_per_second": 5.047,
603
+ "train_steps_per_second": 0.039
604
+ }
605
+ ],
606
+ "logging_steps": 5,
607
+ "max_steps": 190,
608
+ "num_input_tokens_seen": 0,
609
+ "num_train_epochs": 2,
610
+ "save_steps": 500,
611
+ "total_flos": 0.0,
612
+ "train_batch_size": 8,
613
+ "trial_name": null,
614
+ "trial_params": null
615
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:969f12a28e99661b30eeabdf8d0e2243dc15c2d54a328645c228f717d9d40f35
3
+ size 6200