duyduong9htv commited on
Commit
bbc2414
·
1 Parent(s): e9409a0

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "architectures": [
4
+ "EncoderDecoderModel"
5
+ ],
6
+ "decoder": {
7
+ "_name_or_path": "bert-base-uncased",
8
+ "add_cross_attention": true,
9
+ "architectures": [
10
+ "BertForMaskedLM"
11
+ ],
12
+ "attention_probs_dropout_prob": 0.1,
13
+ "bad_words_ids": null,
14
+ "begin_suppress_tokens": null,
15
+ "bos_token_id": null,
16
+ "chunk_size_feed_forward": 0,
17
+ "classifier_dropout": null,
18
+ "cross_attention_hidden_size": null,
19
+ "decoder_start_token_id": null,
20
+ "diversity_penalty": 0.0,
21
+ "do_sample": false,
22
+ "early_stopping": false,
23
+ "encoder_no_repeat_ngram_size": 0,
24
+ "eos_token_id": null,
25
+ "exponential_decay_length_penalty": null,
26
+ "finetuning_task": null,
27
+ "forced_bos_token_id": null,
28
+ "forced_eos_token_id": null,
29
+ "gradient_checkpointing": false,
30
+ "hidden_act": "gelu",
31
+ "hidden_dropout_prob": 0.1,
32
+ "hidden_size": 768,
33
+ "id2label": {
34
+ "0": "LABEL_0",
35
+ "1": "LABEL_1"
36
+ },
37
+ "initializer_range": 0.02,
38
+ "intermediate_size": 3072,
39
+ "is_decoder": true,
40
+ "is_encoder_decoder": false,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1
44
+ },
45
+ "layer_norm_eps": 1e-12,
46
+ "length_penalty": 1.0,
47
+ "max_length": 20,
48
+ "max_position_embeddings": 512,
49
+ "min_length": 0,
50
+ "model_type": "bert",
51
+ "no_repeat_ngram_size": 0,
52
+ "num_attention_heads": 12,
53
+ "num_beam_groups": 1,
54
+ "num_beams": 1,
55
+ "num_hidden_layers": 12,
56
+ "num_return_sequences": 1,
57
+ "output_attentions": false,
58
+ "output_hidden_states": false,
59
+ "output_scores": false,
60
+ "pad_token_id": 0,
61
+ "position_embedding_type": "absolute",
62
+ "prefix": null,
63
+ "problem_type": null,
64
+ "pruned_heads": {},
65
+ "remove_invalid_values": false,
66
+ "repetition_penalty": 1.0,
67
+ "return_dict": true,
68
+ "return_dict_in_generate": false,
69
+ "sep_token_id": null,
70
+ "suppress_tokens": null,
71
+ "task_specific_params": null,
72
+ "temperature": 1.0,
73
+ "tf_legacy_loss": false,
74
+ "tie_encoder_decoder": false,
75
+ "tie_word_embeddings": true,
76
+ "tokenizer_class": null,
77
+ "top_k": 50,
78
+ "top_p": 1.0,
79
+ "torch_dtype": null,
80
+ "torchscript": false,
81
+ "transformers_version": "4.26.1",
82
+ "type_vocab_size": 2,
83
+ "typical_p": 1.0,
84
+ "use_bfloat16": false,
85
+ "use_cache": true,
86
+ "vocab_size": 30522
87
+ },
88
+ "decoder_start_token_id": 101,
89
+ "early_stopping": true,
90
+ "encoder": {
91
+ "_name_or_path": "bert-base-uncased",
92
+ "add_cross_attention": false,
93
+ "architectures": [
94
+ "BertForMaskedLM"
95
+ ],
96
+ "attention_probs_dropout_prob": 0.1,
97
+ "bad_words_ids": null,
98
+ "begin_suppress_tokens": null,
99
+ "bos_token_id": null,
100
+ "chunk_size_feed_forward": 0,
101
+ "classifier_dropout": null,
102
+ "cross_attention_hidden_size": null,
103
+ "decoder_start_token_id": null,
104
+ "diversity_penalty": 0.0,
105
+ "do_sample": false,
106
+ "early_stopping": false,
107
+ "encoder_no_repeat_ngram_size": 0,
108
+ "eos_token_id": null,
109
+ "exponential_decay_length_penalty": null,
110
+ "finetuning_task": null,
111
+ "forced_bos_token_id": null,
112
+ "forced_eos_token_id": null,
113
+ "gradient_checkpointing": false,
114
+ "hidden_act": "gelu",
115
+ "hidden_dropout_prob": 0.1,
116
+ "hidden_size": 768,
117
+ "id2label": {
118
+ "0": "LABEL_0",
119
+ "1": "LABEL_1"
120
+ },
121
+ "initializer_range": 0.02,
122
+ "intermediate_size": 3072,
123
+ "is_decoder": false,
124
+ "is_encoder_decoder": false,
125
+ "label2id": {
126
+ "LABEL_0": 0,
127
+ "LABEL_1": 1
128
+ },
129
+ "layer_norm_eps": 1e-12,
130
+ "length_penalty": 1.0,
131
+ "max_length": 20,
132
+ "max_position_embeddings": 512,
133
+ "min_length": 0,
134
+ "model_type": "bert",
135
+ "no_repeat_ngram_size": 0,
136
+ "num_attention_heads": 12,
137
+ "num_beam_groups": 1,
138
+ "num_beams": 1,
139
+ "num_hidden_layers": 12,
140
+ "num_return_sequences": 1,
141
+ "output_attentions": false,
142
+ "output_hidden_states": false,
143
+ "output_scores": false,
144
+ "pad_token_id": 0,
145
+ "position_embedding_type": "absolute",
146
+ "prefix": null,
147
+ "problem_type": null,
148
+ "pruned_heads": {},
149
+ "remove_invalid_values": false,
150
+ "repetition_penalty": 1.0,
151
+ "return_dict": true,
152
+ "return_dict_in_generate": false,
153
+ "sep_token_id": null,
154
+ "suppress_tokens": null,
155
+ "task_specific_params": null,
156
+ "temperature": 1.0,
157
+ "tf_legacy_loss": false,
158
+ "tie_encoder_decoder": false,
159
+ "tie_word_embeddings": true,
160
+ "tokenizer_class": null,
161
+ "top_k": 50,
162
+ "top_p": 1.0,
163
+ "torch_dtype": null,
164
+ "torchscript": false,
165
+ "transformers_version": "4.26.1",
166
+ "type_vocab_size": 2,
167
+ "typical_p": 1.0,
168
+ "use_bfloat16": false,
169
+ "use_cache": true,
170
+ "vocab_size": 30522
171
+ },
172
+ "eos_token_id": 102,
173
+ "is_encoder_decoder": true,
174
+ "length_penalty": 2.0,
175
+ "max_length": 128,
176
+ "min_length": 56,
177
+ "model_type": "encoder-decoder",
178
+ "no_repeat_ngram_size": 3,
179
+ "num_beams": 4,
180
+ "pad_token_id": 0,
181
+ "torch_dtype": "float32",
182
+ "transformers_version": null,
183
+ "vocab_size": 30522
184
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8399abb99bf2be1de125a4695450f09d3429460d349b5c1bac5d8fd6814c3a3
3
+ size 989644857
runs/Mar28_15-40-43_e726d06b80ed/1680018048.9185486/events.out.tfevents.1680018048.e726d06b80ed.1124.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf316817c327abf861aa0e3221df3e6e1332165ae6ac6c71f6da8418197f72b6
3
+ size 5869
runs/Mar28_15-40-43_e726d06b80ed/events.out.tfevents.1680018048.e726d06b80ed.1124.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cdef985c8d2bd47d0acb6b050404c4fc6149f1659354a69735441a0aa04a1b4
3
+ size 8137
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "bert-base-uncased",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "BertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68920b34c1e49ac4d127a507049288c7cdc2da1d7f595e9a15a90f0a3f2c7aa7
3
+ size 3579
vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20230328_153334-3xrv5t9j/files/config.yaml ADDED
@@ -0,0 +1,722 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _commit_hash:
4
+ desc: null
5
+ value: null
6
+ _name_or_path:
7
+ desc: null
8
+ value: ''
9
+ _wandb:
10
+ desc: null
11
+ value:
12
+ cli_version: 0.13.10
13
+ framework: huggingface
14
+ huggingface_version: 4.26.1
15
+ is_jupyter_run: true
16
+ is_kaggle_kernel: true
17
+ m:
18
+ - 1: train/global_step
19
+ 6:
20
+ - 3
21
+ python_version: 3.7.12
22
+ start_time: 1680017614.402597
23
+ t:
24
+ 1:
25
+ - 1
26
+ - 2
27
+ - 3
28
+ - 5
29
+ - 11
30
+ - 12
31
+ - 49
32
+ - 51
33
+ - 53
34
+ - 55
35
+ - 71
36
+ 2:
37
+ - 1
38
+ - 2
39
+ - 3
40
+ - 5
41
+ - 11
42
+ - 12
43
+ - 49
44
+ - 51
45
+ - 53
46
+ - 55
47
+ - 71
48
+ 3:
49
+ - 7
50
+ - 23
51
+ 4: 3.7.12
52
+ 5: 0.13.10
53
+ 6: 4.26.1
54
+ 8:
55
+ - 1
56
+ - 2
57
+ - 5
58
+ adafactor:
59
+ desc: null
60
+ value: false
61
+ adam_beta1:
62
+ desc: null
63
+ value: 0.9
64
+ adam_beta2:
65
+ desc: null
66
+ value: 0.999
67
+ adam_epsilon:
68
+ desc: null
69
+ value: 1.0e-08
70
+ add_cross_attention:
71
+ desc: null
72
+ value: false
73
+ architectures:
74
+ desc: null
75
+ value: null
76
+ auto_find_batch_size:
77
+ desc: null
78
+ value: false
79
+ bad_words_ids:
80
+ desc: null
81
+ value: null
82
+ begin_suppress_tokens:
83
+ desc: null
84
+ value: null
85
+ bf16:
86
+ desc: null
87
+ value: false
88
+ bf16_full_eval:
89
+ desc: null
90
+ value: false
91
+ bos_token_id:
92
+ desc: null
93
+ value: null
94
+ chunk_size_feed_forward:
95
+ desc: null
96
+ value: 0
97
+ cross_attention_hidden_size:
98
+ desc: null
99
+ value: null
100
+ data_seed:
101
+ desc: null
102
+ value: None
103
+ dataloader_drop_last:
104
+ desc: null
105
+ value: false
106
+ dataloader_num_workers:
107
+ desc: null
108
+ value: 0
109
+ dataloader_pin_memory:
110
+ desc: null
111
+ value: true
112
+ ddp_bucket_cap_mb:
113
+ desc: null
114
+ value: None
115
+ ddp_find_unused_parameters:
116
+ desc: null
117
+ value: None
118
+ ddp_timeout:
119
+ desc: null
120
+ value: 1800
121
+ debug:
122
+ desc: null
123
+ value: '[]'
124
+ decoder:
125
+ desc: null
126
+ value:
127
+ _name_or_path: bert-base-uncased
128
+ add_cross_attention: true
129
+ architectures:
130
+ - BertForMaskedLM
131
+ attention_probs_dropout_prob: 0.1
132
+ bad_words_ids: null
133
+ begin_suppress_tokens: null
134
+ bos_token_id: null
135
+ chunk_size_feed_forward: 0
136
+ classifier_dropout: null
137
+ cross_attention_hidden_size: null
138
+ decoder_start_token_id: null
139
+ diversity_penalty: 0.0
140
+ do_sample: false
141
+ early_stopping: false
142
+ encoder_no_repeat_ngram_size: 0
143
+ eos_token_id: null
144
+ exponential_decay_length_penalty: null
145
+ finetuning_task: null
146
+ forced_bos_token_id: null
147
+ forced_eos_token_id: null
148
+ gradient_checkpointing: false
149
+ hidden_act: gelu
150
+ hidden_dropout_prob: 0.1
151
+ hidden_size: 768
152
+ id2label:
153
+ '0': LABEL_0
154
+ '1': LABEL_1
155
+ initializer_range: 0.02
156
+ intermediate_size: 3072
157
+ is_decoder: true
158
+ is_encoder_decoder: false
159
+ label2id:
160
+ LABEL_0: 0
161
+ LABEL_1: 1
162
+ layer_norm_eps: 1.0e-12
163
+ length_penalty: 1.0
164
+ max_length: 20
165
+ max_position_embeddings: 512
166
+ min_length: 0
167
+ model_type: bert
168
+ no_repeat_ngram_size: 0
169
+ num_attention_heads: 12
170
+ num_beam_groups: 1
171
+ num_beams: 1
172
+ num_hidden_layers: 12
173
+ num_return_sequences: 1
174
+ output_attentions: false
175
+ output_hidden_states: false
176
+ output_scores: false
177
+ pad_token_id: 0
178
+ position_embedding_type: absolute
179
+ prefix: null
180
+ problem_type: null
181
+ pruned_heads: {}
182
+ remove_invalid_values: false
183
+ repetition_penalty: 1.0
184
+ return_dict: true
185
+ return_dict_in_generate: false
186
+ sep_token_id: null
187
+ suppress_tokens: null
188
+ task_specific_params: null
189
+ temperature: 1.0
190
+ tf_legacy_loss: false
191
+ tie_encoder_decoder: false
192
+ tie_word_embeddings: true
193
+ tokenizer_class: null
194
+ top_k: 50
195
+ top_p: 1.0
196
+ torch_dtype: null
197
+ torchscript: false
198
+ transformers_version: 4.26.1
199
+ type_vocab_size: 2
200
+ typical_p: 1.0
201
+ use_bfloat16: false
202
+ use_cache: true
203
+ vocab_size: 30522
204
+ decoder_start_token_id:
205
+ desc: null
206
+ value: 101
207
+ deepspeed:
208
+ desc: null
209
+ value: None
210
+ disable_tqdm:
211
+ desc: null
212
+ value: false
213
+ diversity_penalty:
214
+ desc: null
215
+ value: 0.0
216
+ do_eval:
217
+ desc: null
218
+ value: true
219
+ do_predict:
220
+ desc: null
221
+ value: false
222
+ do_sample:
223
+ desc: null
224
+ value: false
225
+ do_train:
226
+ desc: null
227
+ value: false
228
+ early_stopping:
229
+ desc: null
230
+ value: true
231
+ encoder:
232
+ desc: null
233
+ value:
234
+ _name_or_path: bert-base-uncased
235
+ add_cross_attention: false
236
+ architectures:
237
+ - BertForMaskedLM
238
+ attention_probs_dropout_prob: 0.1
239
+ bad_words_ids: null
240
+ begin_suppress_tokens: null
241
+ bos_token_id: null
242
+ chunk_size_feed_forward: 0
243
+ classifier_dropout: null
244
+ cross_attention_hidden_size: null
245
+ decoder_start_token_id: null
246
+ diversity_penalty: 0.0
247
+ do_sample: false
248
+ early_stopping: false
249
+ encoder_no_repeat_ngram_size: 0
250
+ eos_token_id: null
251
+ exponential_decay_length_penalty: null
252
+ finetuning_task: null
253
+ forced_bos_token_id: null
254
+ forced_eos_token_id: null
255
+ gradient_checkpointing: false
256
+ hidden_act: gelu
257
+ hidden_dropout_prob: 0.1
258
+ hidden_size: 768
259
+ id2label:
260
+ '0': LABEL_0
261
+ '1': LABEL_1
262
+ initializer_range: 0.02
263
+ intermediate_size: 3072
264
+ is_decoder: false
265
+ is_encoder_decoder: false
266
+ label2id:
267
+ LABEL_0: 0
268
+ LABEL_1: 1
269
+ layer_norm_eps: 1.0e-12
270
+ length_penalty: 1.0
271
+ max_length: 20
272
+ max_position_embeddings: 512
273
+ min_length: 0
274
+ model_type: bert
275
+ no_repeat_ngram_size: 0
276
+ num_attention_heads: 12
277
+ num_beam_groups: 1
278
+ num_beams: 1
279
+ num_hidden_layers: 12
280
+ num_return_sequences: 1
281
+ output_attentions: false
282
+ output_hidden_states: false
283
+ output_scores: false
284
+ pad_token_id: 0
285
+ position_embedding_type: absolute
286
+ prefix: null
287
+ problem_type: null
288
+ pruned_heads: {}
289
+ remove_invalid_values: false
290
+ repetition_penalty: 1.0
291
+ return_dict: true
292
+ return_dict_in_generate: false
293
+ sep_token_id: null
294
+ suppress_tokens: null
295
+ task_specific_params: null
296
+ temperature: 1.0
297
+ tf_legacy_loss: false
298
+ tie_encoder_decoder: false
299
+ tie_word_embeddings: true
300
+ tokenizer_class: null
301
+ top_k: 50
302
+ top_p: 1.0
303
+ torch_dtype: null
304
+ torchscript: false
305
+ transformers_version: 4.26.1
306
+ type_vocab_size: 2
307
+ typical_p: 1.0
308
+ use_bfloat16: false
309
+ use_cache: true
310
+ vocab_size: 30522
311
+ encoder_no_repeat_ngram_size:
312
+ desc: null
313
+ value: 0
314
+ eos_token_id:
315
+ desc: null
316
+ value: 102
317
+ eval_accumulation_steps:
318
+ desc: null
319
+ value: None
320
+ eval_batch_size:
321
+ desc: null
322
+ value: 16
323
+ eval_delay:
324
+ desc: null
325
+ value: 0
326
+ eval_steps:
327
+ desc: null
328
+ value: 2000
329
+ evaluation_strategy:
330
+ desc: null
331
+ value: steps
332
+ exponential_decay_length_penalty:
333
+ desc: null
334
+ value: null
335
+ finetuning_task:
336
+ desc: null
337
+ value: null
338
+ forced_bos_token_id:
339
+ desc: null
340
+ value: null
341
+ forced_eos_token_id:
342
+ desc: null
343
+ value: null
344
+ fp16:
345
+ desc: null
346
+ value: true
347
+ fp16_backend:
348
+ desc: null
349
+ value: auto
350
+ fp16_full_eval:
351
+ desc: null
352
+ value: false
353
+ fp16_opt_level:
354
+ desc: null
355
+ value: O1
356
+ fsdp:
357
+ desc: null
358
+ value: '[]'
359
+ fsdp_min_num_params:
360
+ desc: null
361
+ value: 0
362
+ fsdp_transformer_layer_cls_to_wrap:
363
+ desc: null
364
+ value: None
365
+ full_determinism:
366
+ desc: null
367
+ value: false
368
+ generation_max_length:
369
+ desc: null
370
+ value: None
371
+ generation_num_beams:
372
+ desc: null
373
+ value: None
374
+ gradient_accumulation_steps:
375
+ desc: null
376
+ value: 1
377
+ gradient_checkpointing:
378
+ desc: null
379
+ value: false
380
+ greater_is_better:
381
+ desc: null
382
+ value: None
383
+ group_by_length:
384
+ desc: null
385
+ value: false
386
+ half_precision_backend:
387
+ desc: null
388
+ value: cuda_amp
389
+ hub_model_id:
390
+ desc: null
391
+ value: None
392
+ hub_private_repo:
393
+ desc: null
394
+ value: false
395
+ hub_strategy:
396
+ desc: null
397
+ value: every_save
398
+ hub_token:
399
+ desc: null
400
+ value: <HUB_TOKEN>
401
+ id2label:
402
+ desc: null
403
+ value:
404
+ '0': LABEL_0
405
+ '1': LABEL_1
406
+ ignore_data_skip:
407
+ desc: null
408
+ value: false
409
+ include_inputs_for_metrics:
410
+ desc: null
411
+ value: false
412
+ is_decoder:
413
+ desc: null
414
+ value: false
415
+ is_encoder_decoder:
416
+ desc: null
417
+ value: true
418
+ jit_mode_eval:
419
+ desc: null
420
+ value: false
421
+ label2id:
422
+ desc: null
423
+ value:
424
+ LABEL_0: 0
425
+ LABEL_1: 1
426
+ label_names:
427
+ desc: null
428
+ value: None
429
+ label_smoothing_factor:
430
+ desc: null
431
+ value: 0.0
432
+ learning_rate:
433
+ desc: null
434
+ value: 5.0e-05
435
+ length_column_name:
436
+ desc: null
437
+ value: length
438
+ length_penalty:
439
+ desc: null
440
+ value: 2.0
441
+ load_best_model_at_end:
442
+ desc: null
443
+ value: false
444
+ local_rank:
445
+ desc: null
446
+ value: -1
447
+ log_level:
448
+ desc: null
449
+ value: passive
450
+ log_level_replica:
451
+ desc: null
452
+ value: passive
453
+ log_on_each_node:
454
+ desc: null
455
+ value: true
456
+ logging_dir:
457
+ desc: null
458
+ value: ./runs/Mar28_15-40-43_e726d06b80ed
459
+ logging_first_step:
460
+ desc: null
461
+ value: false
462
+ logging_nan_inf_filter:
463
+ desc: null
464
+ value: true
465
+ logging_steps:
466
+ desc: null
467
+ value: 1000
468
+ logging_strategy:
469
+ desc: null
470
+ value: steps
471
+ lr_scheduler_type:
472
+ desc: null
473
+ value: linear
474
+ max_grad_norm:
475
+ desc: null
476
+ value: 1.0
477
+ max_length:
478
+ desc: null
479
+ value: 128
480
+ max_steps:
481
+ desc: null
482
+ value: -1
483
+ metric_for_best_model:
484
+ desc: null
485
+ value: None
486
+ min_length:
487
+ desc: null
488
+ value: 56
489
+ model_type:
490
+ desc: null
491
+ value: encoder-decoder
492
+ mp_parameters:
493
+ desc: null
494
+ value: ''
495
+ no_cuda:
496
+ desc: null
497
+ value: false
498
+ no_repeat_ngram_size:
499
+ desc: null
500
+ value: 3
501
+ num_beam_groups:
502
+ desc: null
503
+ value: 1
504
+ num_beams:
505
+ desc: null
506
+ value: 4
507
+ num_return_sequences:
508
+ desc: null
509
+ value: 1
510
+ num_train_epochs:
511
+ desc: null
512
+ value: 3.0
513
+ optim:
514
+ desc: null
515
+ value: adamw_hf
516
+ optim_args:
517
+ desc: null
518
+ value: None
519
+ output_attentions:
520
+ desc: null
521
+ value: false
522
+ output_dir:
523
+ desc: null
524
+ value: ./
525
+ output_hidden_states:
526
+ desc: null
527
+ value: false
528
+ output_scores:
529
+ desc: null
530
+ value: false
531
+ overwrite_output_dir:
532
+ desc: null
533
+ value: true
534
+ pad_token_id:
535
+ desc: null
536
+ value: 0
537
+ past_index:
538
+ desc: null
539
+ value: -1
540
+ per_device_eval_batch_size:
541
+ desc: null
542
+ value: 8
543
+ per_device_train_batch_size:
544
+ desc: null
545
+ value: 8
546
+ per_gpu_eval_batch_size:
547
+ desc: null
548
+ value: None
549
+ per_gpu_train_batch_size:
550
+ desc: null
551
+ value: None
552
+ predict_with_generate:
553
+ desc: null
554
+ value: true
555
+ prediction_loss_only:
556
+ desc: null
557
+ value: false
558
+ prefix:
559
+ desc: null
560
+ value: null
561
+ problem_type:
562
+ desc: null
563
+ value: null
564
+ pruned_heads:
565
+ desc: null
566
+ value: {}
567
+ push_to_hub:
568
+ desc: null
569
+ value: true
570
+ push_to_hub_model_id:
571
+ desc: null
572
+ value: None
573
+ push_to_hub_organization:
574
+ desc: null
575
+ value: None
576
+ push_to_hub_token:
577
+ desc: null
578
+ value: <PUSH_TO_HUB_TOKEN>
579
+ ray_scope:
580
+ desc: null
581
+ value: last
582
+ remove_invalid_values:
583
+ desc: null
584
+ value: false
585
+ remove_unused_columns:
586
+ desc: null
587
+ value: true
588
+ repetition_penalty:
589
+ desc: null
590
+ value: 1.0
591
+ report_to:
592
+ desc: null
593
+ value: '[''tensorboard'', ''wandb'']'
594
+ resume_from_checkpoint:
595
+ desc: null
596
+ value: None
597
+ return_dict:
598
+ desc: null
599
+ value: true
600
+ return_dict_in_generate:
601
+ desc: null
602
+ value: false
603
+ run_name:
604
+ desc: null
605
+ value: ./
606
+ save_on_each_node:
607
+ desc: null
608
+ value: false
609
+ save_steps:
610
+ desc: null
611
+ value: 500
612
+ save_strategy:
613
+ desc: null
614
+ value: steps
615
+ save_total_limit:
616
+ desc: null
617
+ value: 3
618
+ seed:
619
+ desc: null
620
+ value: 42
621
+ sep_token_id:
622
+ desc: null
623
+ value: null
624
+ sharded_ddp:
625
+ desc: null
626
+ value: '[]'
627
+ skip_memory_metrics:
628
+ desc: null
629
+ value: true
630
+ sortish_sampler:
631
+ desc: null
632
+ value: false
633
+ suppress_tokens:
634
+ desc: null
635
+ value: null
636
+ task_specific_params:
637
+ desc: null
638
+ value: null
639
+ temperature:
640
+ desc: null
641
+ value: 1.0
642
+ tf32:
643
+ desc: null
644
+ value: None
645
+ tf_legacy_loss:
646
+ desc: null
647
+ value: false
648
+ tie_encoder_decoder:
649
+ desc: null
650
+ value: false
651
+ tie_word_embeddings:
652
+ desc: null
653
+ value: true
654
+ tokenizer_class:
655
+ desc: null
656
+ value: null
657
+ top_k:
658
+ desc: null
659
+ value: 50
660
+ top_p:
661
+ desc: null
662
+ value: 1.0
663
+ torch_compile:
664
+ desc: null
665
+ value: false
666
+ torch_compile_backend:
667
+ desc: null
668
+ value: None
669
+ torch_compile_mode:
670
+ desc: null
671
+ value: None
672
+ torch_dtype:
673
+ desc: null
674
+ value: null
675
+ torchdynamo:
676
+ desc: null
677
+ value: None
678
+ torchscript:
679
+ desc: null
680
+ value: false
681
+ tpu_metrics_debug:
682
+ desc: null
683
+ value: false
684
+ tpu_num_cores:
685
+ desc: null
686
+ value: None
687
+ train_batch_size:
688
+ desc: null
689
+ value: 16
690
+ transformers_version:
691
+ desc: null
692
+ value: null
693
+ typical_p:
694
+ desc: null
695
+ value: 1.0
696
+ use_bfloat16:
697
+ desc: null
698
+ value: false
699
+ use_ipex:
700
+ desc: null
701
+ value: false
702
+ use_legacy_prediction_loop:
703
+ desc: null
704
+ value: false
705
+ use_mps_device:
706
+ desc: null
707
+ value: false
708
+ vocab_size:
709
+ desc: null
710
+ value: 30522
711
+ warmup_ratio:
712
+ desc: null
713
+ value: 0.0
714
+ warmup_steps:
715
+ desc: null
716
+ value: 1000
717
+ weight_decay:
718
+ desc: null
719
+ value: 0.0
720
+ xpu_backend:
721
+ desc: null
722
+ value: None