beamaia commited on
Commit
1488cf4
1 Parent(s): 40a29aa

Update metadata with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +98 -0
README.md CHANGED
@@ -196,6 +196,104 @@ Training Procedure:
196
  Training processing: 'dataset = dataset.shuffle(seed=55)
197
 
198
  dataset = dataset[''train''].train_test_split(test_size=0.1)'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  ---
200
  ## Training procedure
201
 
 
196
  Training processing: 'dataset = dataset.shuffle(seed=55)
197
 
198
  dataset = dataset[''train''].train_test_split(test_size=0.1)'
199
+ training_regime:
200
+ output_dir: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/
201
+ overwrite_output_dir: false
202
+ do_train: false
203
+ do_eval: true
204
+ do_predict: false
205
+ evaluation_strategy: epoch
206
+ prediction_loss_only: false
207
+ per_device_train_batch_size: 2
208
+ per_device_eval_batch_size: 8
209
+ gradient_accumulation_steps: 2
210
+ eval_accumulation_steps: 1
211
+ eval_delay: 0
212
+ learning_rate: 0.0004
213
+ weight_decay: 0.01
214
+ adam_beta1: 0.9
215
+ adam_beta2: 0.999
216
+ adam_epsilon: 1.0e-08
217
+ max_grad_norm: 0.3
218
+ num_train_epochs: 10
219
+ max_steps: -1
220
+ lr_scheduler_type: cosine
221
+ warmup_ratio: 0.1
222
+ warmup_steps: 0
223
+ log_level: passive
224
+ log_level_replica: warning
225
+ log_on_each_node: true
226
+ logging_dir: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/runs/Dec01_21-53-07_fd10189bb234
227
+ logging_strategy: steps
228
+ logging_first_step: false
229
+ logging_steps: 500
230
+ logging_nan_inf_filter: true
231
+ save_strategy: epoch
232
+ save_steps: 500
233
+ save_total_limit: 5
234
+ save_safetensors: true
235
+ save_on_each_node: false
236
+ no_cuda: false
237
+ use_mps_device: false
238
+ seed: 42
239
+ jit_mode_eval: false
240
+ use_ipex: false
241
+ bf16: false
242
+ fp16: true
243
+ fp16_opt_level: O1
244
+ half_precision_backend: auto
245
+ bf16_full_eval: false
246
+ fp16_full_eval: false
247
+ local_rank: 0
248
+ tpu_metrics_debug: false
249
+ debug: []
250
+ dataloader_drop_last: false
251
+ dataloader_num_workers: 0
252
+ past_index: -1
253
+ run_name: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/
254
+ disable_tqdm: false
255
+ remove_unused_columns: true
256
+ load_best_model_at_end: true
257
+ metric_for_best_model: eval_loss
258
+ greater_is_better: false
259
+ ignore_data_skip: false
260
+ sharded_ddp: []
261
+ fsdp: []
262
+ fsdp_min_num_params: 0
263
+ fsdp_config:
264
+ fsdp_min_num_params: 0
265
+ xla: false
266
+ xla_fsdp_grad_ckpt: false
267
+ label_smoothing_factor: 0.0
268
+ optim: adamw_torch
269
+ adafactor: false
270
+ group_by_length: false
271
+ length_column_name: length
272
+ report_to:
273
+ - tensorboard
274
+ dataloader_pin_memory: true
275
+ skip_memory_metrics: true
276
+ use_legacy_prediction_loop: false
277
+ push_to_hub: true
278
+ hub_model_id: Weni/ZeroShot-2.2.1-Llama2-13b-Multilanguage-3.0.3
279
+ hub_strategy: all_checkpoints
280
+ hub_token: <HUB_TOKEN>
281
+ hub_private_repo: false
282
+ gradient_checkpointing: true
283
+ include_inputs_for_metrics: false
284
+ fp16_backend: auto
285
+ push_to_hub_token: <PUSH_TO_HUB_TOKEN>
286
+ mp_parameters: ''
287
+ auto_find_batch_size: false
288
+ full_determinism: false
289
+ ray_scope: last
290
+ ddp_timeout: 1800
291
+ torch_compile: false
292
+ training_data:
293
+ name: Weni/zeroshot-3.0.3
294
+ 'preprocessing ': 'dataset = dataset.shuffle(seed=55)
295
+
296
+ dataset = dataset[''train''].train_test_split(test_size=0.1)'
297
  ---
298
  ## Training procedure
299