beamaia commited on
Commit
a285e74
1 Parent(s): d6436b2

Update metadata with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +35 -93
README.md CHANGED
@@ -197,99 +197,41 @@ Training Procedure:
197
  Training processing: 'dataset = dataset.shuffle(seed=55)
198
 
199
  dataset = dataset[''train''].train_test_split(test_size=0.1)'
200
- training_regime:
201
- output_dir: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/
202
- overwrite_output_dir: false
203
- do_train: false
204
- do_eval: true
205
- do_predict: false
206
- evaluation_strategy: epoch
207
- prediction_loss_only: false
208
- per_device_train_batch_size: 2
209
- per_device_eval_batch_size: 8
210
- gradient_accumulation_steps: 2
211
- eval_accumulation_steps: 1
212
- eval_delay: 0
213
- learning_rate: 0.0004
214
- weight_decay: 0.01
215
- adam_beta1: 0.9
216
- adam_beta2: 0.999
217
- adam_epsilon: 1.0e-08
218
- max_grad_norm: 0.3
219
- num_train_epochs: 10
220
- max_steps: -1
221
- lr_scheduler_type: cosine
222
- warmup_ratio: 0.1
223
- warmup_steps: 0
224
- log_level: passive
225
- log_level_replica: warning
226
- log_on_each_node: true
227
- logging_dir: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/runs/Dec01_21-53-07_fd10189bb234
228
- logging_strategy: steps
229
- logging_first_step: false
230
- logging_steps: 500
231
- logging_nan_inf_filter: true
232
- save_strategy: epoch
233
- save_steps: 500
234
- save_total_limit: 5
235
- save_safetensors: true
236
- save_on_each_node: false
237
- no_cuda: false
238
- use_mps_device: false
239
- seed: 42
240
- jit_mode_eval: false
241
- use_ipex: false
242
- bf16: false
243
- fp16: true
244
- fp16_opt_level: O1
245
- half_precision_backend: auto
246
- bf16_full_eval: false
247
- fp16_full_eval: false
248
- local_rank: 0
249
- tpu_metrics_debug: false
250
- debug: []
251
- dataloader_drop_last: false
252
- dataloader_num_workers: 0
253
- past_index: -1
254
- run_name: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/
255
- disable_tqdm: false
256
- remove_unused_columns: true
257
- load_best_model_at_end: true
258
- metric_for_best_model: eval_loss
259
- greater_is_better: false
260
- ignore_data_skip: false
261
- sharded_ddp: []
262
- fsdp: []
263
- fsdp_min_num_params: 0
264
- fsdp_config:
265
- fsdp_min_num_params: 0
266
- xla: false
267
- xla_fsdp_grad_ckpt: false
268
- label_smoothing_factor: 0.0
269
- optim: adamw_torch
270
- adafactor: false
271
- group_by_length: false
272
- length_column_name: length
273
- report_to:
274
- - tensorboard
275
- dataloader_pin_memory: true
276
- skip_memory_metrics: true
277
- use_legacy_prediction_loop: false
278
- push_to_hub: true
279
- hub_model_id: Weni/ZeroShot-2.2.1-Llama2-13b-Multilanguage-3.0.3
280
- hub_strategy: all_checkpoints
281
- hub_token: <HUB_TOKEN>
282
- hub_private_repo: false
283
- gradient_checkpointing: true
284
- include_inputs_for_metrics: false
285
- fp16_backend: auto
286
- push_to_hub_token: <PUSH_TO_HUB_TOKEN>
287
- mp_parameters: ''
288
- auto_find_batch_size: false
289
- full_determinism: false
290
- ray_scope: last
291
- ddp_timeout: 1800
292
- torch_compile: false
293
  training_data:
294
  name: Weni/zeroshot-3.0.3
295
  'preprocessing ': 'dataset = dataset.shuffle(seed=55)
 
197
  Training processing: 'dataset = dataset.shuffle(seed=55)
198
 
199
  dataset = dataset[''train''].train_test_split(test_size=0.1)'
200
+ training_regime: "### Training Hyperparameters- output_dir: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/\n\
201
+ - overwrite_output_dir: False\n- do_train: False\n- do_eval: True\n- do_predict:\
202
+ \ False\n- evaluation_strategy: epoch\n- prediction_loss_only: False\n- per_device_train_batch_size:\
203
+ \ 2\n- per_device_eval_batch_size: 8\n- per_gpu_train_batch_size: None\n- per_gpu_eval_batch_size:\
204
+ \ None\n- gradient_accumulation_steps: 2\n- eval_accumulation_steps: 1\n- eval_delay:\
205
+ \ 0\n- learning_rate: 0.0004\n- weight_decay: 0.01\n- adam_beta1: 0.9\n- adam_beta2:\
206
+ \ 0.999\n- adam_epsilon: 1e-08\n- max_grad_norm: 0.3\n- num_train_epochs: 10\n-\
207
+ \ max_steps: -1\n- lr_scheduler_type: cosine\n- warmup_ratio: 0.1\n- warmup_steps:\
208
+ \ 0\n- log_level: passive\n- log_level_replica: warning\n- log_on_each_node: True\n\
209
+ - logging_dir: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/runs/Dec01_21-53-07_fd10189bb234\n\
210
+ - logging_strategy: steps\n- logging_first_step: False\n- logging_steps: 500\n-\
211
+ \ logging_nan_inf_filter: True\n- save_strategy: epoch\n- save_steps: 500\n- save_total_limit:\
212
+ \ 5\n- save_safetensors: True\n- save_on_each_node: False\n- no_cuda: False\n- use_mps_device:\
213
+ \ False\n- seed: 42\n- data_seed: None\n- jit_mode_eval: False\n- use_ipex: False\n\
214
+ - bf16: False\n- fp16: True\n- fp16_opt_level: O1\n- half_precision_backend: auto\n\
215
+ - bf16_full_eval: False\n- fp16_full_eval: False\n- tf32: None\n- local_rank: 0\n\
216
+ - ddp_backend: None\n- tpu_num_cores: None\n- tpu_metrics_debug: False\n- debug:\
217
+ \ []\n- dataloader_drop_last: False\n- eval_steps: None\n- dataloader_num_workers:\
218
+ \ 0\n- past_index: -1\n- run_name: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/\n\
219
+ - disable_tqdm: False\n- remove_unused_columns: True\n- label_names: None\n- load_best_model_at_end:\
220
+ \ True\n- metric_for_best_model: eval_loss\n- greater_is_better: False\n- ignore_data_skip:\
221
+ \ False\n- sharded_ddp: []\n- fsdp: []\n- fsdp_min_num_params: 0\n- fsdp_config:\
222
+ \ {'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}\n- fsdp_transformer_layer_cls_to_wrap:\
223
+ \ None\n- deepspeed: None\n- label_smoothing_factor: 0.0\n- optim: adamw_torch\n\
224
+ - optim_args: None\n- adafactor: False\n- group_by_length: False\n- length_column_name:\
225
+ \ length\n- report_to: ['tensorboard']\n- ddp_find_unused_parameters: None\n- ddp_bucket_cap_mb:\
226
+ \ None\n- ddp_broadcast_buffers: None\n- dataloader_pin_memory: True\n- skip_memory_metrics:\
227
+ \ True\n- use_legacy_prediction_loop: False\n- push_to_hub: True\n- resume_from_checkpoint:\
228
+ \ None\n- hub_model_id: Weni/ZeroShot-2.2.1-Llama2-13b-Multilanguage-3.0.3\n- hub_strategy:\
229
+ \ all_checkpoints\n- hub_token: <HUB_TOKEN>\n- hub_private_repo: False\n- gradient_checkpointing:\
230
+ \ True\n- include_inputs_for_metrics: False\n- fp16_backend: auto\n- push_to_hub_model_id:\
231
+ \ None\n- push_to_hub_organization: None\n- push_to_hub_token: <PUSH_TO_HUB_TOKEN>\n\
232
+ - mp_parameters: \n- auto_find_batch_size: False\n- full_determinism: False\n- torchdynamo:\
233
+ \ None\n- ray_scope: last\n- ddp_timeout: 1800\n- torch_compile: False\n- torch_compile_backend:\
234
+ \ None\n- torch_compile_mode: None\n- xpu_backend: None"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  training_data:
236
  name: Weni/zeroshot-3.0.3
237
  'preprocessing ': 'dataset = dataset.shuffle(seed=55)