Update metadata with huggingface_hub
Browse files
README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
---
|
|
|
2 |
library_name: peft
|
3 |
pipeline_tag: zero-shot-classification
|
4 |
training_arguments:
|
@@ -96,6 +97,105 @@ training_arguments:
|
|
96 |
torch_compile: false
|
97 |
datatset:
|
98 |
name: Weni/zeroshot-3.0.3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
---
|
100 |
## Training procedure
|
101 |
|
|
|
1 |
---
|
2 |
+
language: pt
|
3 |
library_name: peft
|
4 |
pipeline_tag: zero-shot-classification
|
5 |
training_arguments:
|
|
|
97 |
torch_compile: false
|
98 |
datatset:
|
99 |
name: Weni/zeroshot-3.0.3
|
100 |
+
Training Procedure:
|
101 |
+
Training Hyperparameters:
|
102 |
+
output_dir: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/
|
103 |
+
overwrite_output_dir: false
|
104 |
+
do_train: false
|
105 |
+
do_eval: true
|
106 |
+
do_predict: false
|
107 |
+
evaluation_strategy: epoch
|
108 |
+
prediction_loss_only: false
|
109 |
+
per_device_train_batch_size: 2
|
110 |
+
per_device_eval_batch_size: 8
|
111 |
+
gradient_accumulation_steps: 2
|
112 |
+
eval_accumulation_steps: 1
|
113 |
+
eval_delay: 0
|
114 |
+
learning_rate: 0.0004
|
115 |
+
weight_decay: 0.01
|
116 |
+
adam_beta1: 0.9
|
117 |
+
adam_beta2: 0.999
|
118 |
+
adam_epsilon: 1.0e-08
|
119 |
+
max_grad_norm: 0.3
|
120 |
+
num_train_epochs: 10
|
121 |
+
max_steps: -1
|
122 |
+
lr_scheduler_type: cosine
|
123 |
+
warmup_ratio: 0.1
|
124 |
+
warmup_steps: 0
|
125 |
+
log_level: passive
|
126 |
+
log_level_replica: warning
|
127 |
+
log_on_each_node: true
|
128 |
+
logging_dir: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/runs/Dec01_21-53-07_fd10189bb234
|
129 |
+
logging_strategy: steps
|
130 |
+
logging_first_step: false
|
131 |
+
logging_steps: 500
|
132 |
+
logging_nan_inf_filter: true
|
133 |
+
save_strategy: epoch
|
134 |
+
save_steps: 500
|
135 |
+
save_total_limit: 5
|
136 |
+
save_safetensors: true
|
137 |
+
save_on_each_node: false
|
138 |
+
no_cuda: false
|
139 |
+
use_mps_device: false
|
140 |
+
seed: 42
|
141 |
+
jit_mode_eval: false
|
142 |
+
use_ipex: false
|
143 |
+
bf16: false
|
144 |
+
fp16: true
|
145 |
+
fp16_opt_level: O1
|
146 |
+
half_precision_backend: auto
|
147 |
+
bf16_full_eval: false
|
148 |
+
fp16_full_eval: false
|
149 |
+
local_rank: 0
|
150 |
+
tpu_metrics_debug: false
|
151 |
+
debug: []
|
152 |
+
dataloader_drop_last: false
|
153 |
+
dataloader_num_workers: 0
|
154 |
+
past_index: -1
|
155 |
+
run_name: ./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/
|
156 |
+
disable_tqdm: false
|
157 |
+
remove_unused_columns: true
|
158 |
+
load_best_model_at_end: true
|
159 |
+
metric_for_best_model: eval_loss
|
160 |
+
greater_is_better: false
|
161 |
+
ignore_data_skip: false
|
162 |
+
sharded_ddp: []
|
163 |
+
fsdp: []
|
164 |
+
fsdp_min_num_params: 0
|
165 |
+
fsdp_config:
|
166 |
+
fsdp_min_num_params: 0
|
167 |
+
xla: false
|
168 |
+
xla_fsdp_grad_ckpt: false
|
169 |
+
label_smoothing_factor: 0.0
|
170 |
+
optim: adamw_torch
|
171 |
+
adafactor: false
|
172 |
+
group_by_length: false
|
173 |
+
length_column_name: length
|
174 |
+
report_to:
|
175 |
+
- tensorboard
|
176 |
+
dataloader_pin_memory: true
|
177 |
+
skip_memory_metrics: true
|
178 |
+
use_legacy_prediction_loop: false
|
179 |
+
push_to_hub: true
|
180 |
+
hub_model_id: Weni/ZeroShot-2.2.1-Llama2-13b-Multilanguage-3.0.3
|
181 |
+
hub_strategy: all_checkpoints
|
182 |
+
hub_token: <HUB_TOKEN>
|
183 |
+
hub_private_repo: false
|
184 |
+
gradient_checkpointing: true
|
185 |
+
include_inputs_for_metrics: false
|
186 |
+
fp16_backend: auto
|
187 |
+
push_to_hub_token: <PUSH_TO_HUB_TOKEN>
|
188 |
+
mp_parameters: ''
|
189 |
+
auto_find_batch_size: false
|
190 |
+
full_determinism: false
|
191 |
+
ray_scope: last
|
192 |
+
ddp_timeout: 1800
|
193 |
+
torch_compile: false
|
194 |
+
Training data:
|
195 |
+
name: Weni/zeroshot-3.0.3
|
196 |
+
Training processing: 'dataset = dataset.shuffle(seed=55)
|
197 |
+
|
198 |
+
dataset = dataset[''train''].train_test_split(test_size=0.1)'
|
199 |
---
|
200 |
## Training procedure
|
201 |
|