Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
@@ -28,7 +28,7 @@ training_regime: 'accelerator_config="{''split_batches'': False, ''dispatch_batc
|
|
28 |
gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None",
|
29 |
greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false,
|
30 |
include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0,
|
31 |
-
learning_rate=
|
32 |
lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1,
|
33 |
metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false,
|
34 |
num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None",
|
@@ -38,22 +38,22 @@ training_regime: 'accelerator_config="{''split_batches'': False, ''dispatch_batc
|
|
38 |
seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false,
|
39 |
torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None",
|
40 |
train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false,
|
41 |
-
use_mps_device=false, warmup_ratio=0.
|
42 |
results: " precision recall f1-score support\n \n \
|
43 |
-
\ age 0.
|
44 |
-
\ 0.
|
45 |
-
\ 80\n general 0.
|
46 |
-
\ 0.
|
47 |
-
\ 0.
|
48 |
-
\ sexuality 0.
|
49 |
-
\ 0.
|
50 |
-
\ 0.
|
51 |
-
\ samples avg 0.
|
52 |
-
compute_infrastructure: '- Linux 5.
|
53 |
|
54 |
-
- MemTotal:
|
55 |
|
56 |
-
-
|
57 |
|
58 |
- GPU_0: NVIDIA L40S'
|
59 |
software: python 3.10.12, accelerate 0.32.1, aiohttp 3.9.5, aiosignal 1.3.1, anyio
|
@@ -134,47 +134,47 @@ model-index:
|
|
134 |
type: mix_human-eval_synthetic
|
135 |
metrics:
|
136 |
- type: loss
|
137 |
-
value: 0.
|
138 |
- type: accuracy
|
139 |
-
value: 0.
|
140 |
- type: f1_micro
|
141 |
-
value: 0.
|
142 |
- type: precision_micro
|
143 |
-
value: 0.
|
144 |
- type: recall_micro
|
145 |
-
value: 0.
|
146 |
- type: roc_auc_micro
|
147 |
-
value: 0.
|
148 |
- type: f1_macro
|
149 |
-
value: 0.
|
150 |
- type: precision_macro
|
151 |
-
value: 0.
|
152 |
- type: recall_macro
|
153 |
-
value: 0.
|
154 |
- type: roc_auc_macro
|
155 |
-
value: 0.
|
156 |
- type: f1_samples
|
157 |
-
value: 0.
|
158 |
- type: precision_samples
|
159 |
-
value: 0.
|
160 |
- type: recall_samples
|
161 |
-
value: 0.
|
162 |
- type: roc_auc_samples
|
163 |
-
value: 0.
|
164 |
- type: f1_weighted
|
165 |
-
value: 0.
|
166 |
- type: precision_weighted
|
167 |
-
value: 0.
|
168 |
- type: recall_weighted
|
169 |
-
value: 0.
|
170 |
- type: roc_auc_weighted
|
171 |
-
value: 0.
|
172 |
- type: runtime
|
173 |
-
value: 9.
|
174 |
- type: samples_per_second
|
175 |
-
value:
|
176 |
- type: steps_per_second
|
177 |
-
value: 7.
|
178 |
- type: epoch
|
179 |
value: 3.0
|
180 |
---
|
@@ -286,7 +286,7 @@ Use the code below to get started with the model.
|
|
286 |
|
287 |
#### Training Hyperparameters
|
288 |
|
289 |
-
- **Training regime:** accelerator_config="{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}", adafactor=false, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false, bf16=false, bf16_full_eval=false, data_seed="None", dataloader_drop_last=false, dataloader_num_workers=0, dataloader_persistent_workers=false, dataloader_pin_memory=true, dataloader_prefetch_factor="None", ddp_backend="None", ddp_broadcast_buffers="None", ddp_bucket_cap_mb="None", ddp_find_unused_parameters="None", ddp_timeout=1800, deepspeed="None", disable_tqdm=false, dispatch_batches="None", do_eval=true, do_predict=false, do_train=false, eval_accumulation_steps="None", eval_batch_size=8, eval_delay=0, eval_do_concat_batches=true, eval_on_start=false, eval_steps="None", eval_strategy="epoch", evaluation_strategy="None", fp16=false, fp16_backend="auto", fp16_full_eval=false, fp16_opt_level="O1", fsdp="[]", fsdp_config="{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}", fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap="None", full_determinism=false, gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None", greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false, include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0, learning_rate=
|
290 |
|
291 |
#### Speeds, Sizes, Times [optional]
|
292 |
|
@@ -322,19 +322,19 @@ Use the code below to get started with the model.
|
|
322 |
|
323 |
precision recall f1-score support
|
324 |
|
325 |
-
age 0.
|
326 |
-
disability 0.
|
327 |
-
feminine 0.
|
328 |
-
general 0.
|
329 |
-
masculine 0.
|
330 |
-
neutral 0.
|
331 |
-
racial 0.
|
332 |
-
sexuality 0.
|
333 |
|
334 |
-
micro avg 0.
|
335 |
-
macro avg 0.
|
336 |
-
weighted avg 0.
|
337 |
-
samples avg 0.
|
338 |
|
339 |
|
340 |
#### Summary
|
@@ -367,9 +367,9 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
367 |
|
368 |
### Compute Infrastructure
|
369 |
|
370 |
-
- Linux 5.
|
371 |
-
- MemTotal:
|
372 |
-
-
|
373 |
- GPU_0: NVIDIA L40S
|
374 |
|
375 |
#### Hardware
|
|
|
28 |
gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None",
|
29 |
greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false,
|
30 |
include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0,
|
31 |
+
learning_rate=3e-05, length_column_name="length", load_best_model_at_end=true, local_rank=0,
|
32 |
lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1,
|
33 |
metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false,
|
34 |
num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None",
|
|
|
38 |
seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false,
|
39 |
torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None",
|
40 |
train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false,
|
41 |
+
use_mps_device=false, warmup_ratio=0.1, warmup_steps=0, weight_decay=0.001'
|
42 |
results: " precision recall f1-score support\n \n \
|
43 |
+
\ age 0.82 0.57 0.68 80\n disability 0.87\
|
44 |
+
\ 0.42 0.57 80\n feminine 0.94 0.93 0.93\
|
45 |
+
\ 80\n general 0.61 0.14 0.22 80\n masculine\
|
46 |
+
\ 0.69 0.65 0.67 80\n neutral 0.29 0.72\
|
47 |
+
\ 0.41 80\n racial 0.93 0.71 0.81 80\n\
|
48 |
+
\ sexuality 0.86 0.81 0.83 80\n \n micro avg\
|
49 |
+
\ 0.66 0.62 0.64 640\n macro avg 0.75 0.62\
|
50 |
+
\ 0.64 640\n weighted avg 0.75 0.62 0.64 640\n\
|
51 |
+
\ samples avg 0.63 0.65 0.64 640\n "
|
52 |
+
compute_infrastructure: '- Linux 6.5.0-28-generic x86_64
|
53 |
|
54 |
+
- MemTotal: 527988292 kB
|
55 |
|
56 |
+
- 64 X Intel(R) Xeon(R) Silver 4314 CPU @ 2.40GHz
|
57 |
|
58 |
- GPU_0: NVIDIA L40S'
|
59 |
software: python 3.10.12, accelerate 0.32.1, aiohttp 3.9.5, aiosignal 1.3.1, anyio
|
|
|
134 |
type: mix_human-eval_synthetic
|
135 |
metrics:
|
136 |
- type: loss
|
137 |
+
value: 0.2932254374027252
|
138 |
- type: accuracy
|
139 |
+
value: 0.5856164383561644
|
140 |
- type: f1_micro
|
141 |
+
value: 0.6382636655948553
|
142 |
- type: precision_micro
|
143 |
+
value: 0.6572847682119205
|
144 |
- type: recall_micro
|
145 |
+
value: 0.6203125
|
146 |
- type: roc_auc_micro
|
147 |
+
value: 0.7844866071428571
|
148 |
- type: f1_macro
|
149 |
+
value: 0.6412879991871913
|
150 |
- type: precision_macro
|
151 |
+
value: 0.7517582669787894
|
152 |
- type: recall_macro
|
153 |
+
value: 0.6203125
|
154 |
- type: roc_auc_macro
|
155 |
+
value: 0.7844866071428571
|
156 |
- type: f1_samples
|
157 |
+
value: 0.6374714611872145
|
158 |
- type: precision_samples
|
159 |
+
value: 0.6342751141552511
|
160 |
- type: recall_samples
|
161 |
+
value: 0.653681506849315
|
162 |
- type: roc_auc_samples
|
163 |
+
value: 0.8007868558382258
|
164 |
- type: f1_weighted
|
165 |
+
value: 0.6412879991871913
|
166 |
- type: precision_weighted
|
167 |
+
value: 0.7517582669787893
|
168 |
- type: recall_weighted
|
169 |
+
value: 0.6203125
|
170 |
- type: roc_auc_weighted
|
171 |
+
value: 0.7844866071428572
|
172 |
- type: runtime
|
173 |
+
value: 9.6046
|
174 |
- type: samples_per_second
|
175 |
+
value: 60.804
|
176 |
- type: steps_per_second
|
177 |
+
value: 7.601
|
178 |
- type: epoch
|
179 |
value: 3.0
|
180 |
---
|
|
|
286 |
|
287 |
#### Training Hyperparameters
|
288 |
|
289 |
+
- **Training regime:** accelerator_config="{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}", adafactor=false, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false, bf16=false, bf16_full_eval=false, data_seed="None", dataloader_drop_last=false, dataloader_num_workers=0, dataloader_persistent_workers=false, dataloader_pin_memory=true, dataloader_prefetch_factor="None", ddp_backend="None", ddp_broadcast_buffers="None", ddp_bucket_cap_mb="None", ddp_find_unused_parameters="None", ddp_timeout=1800, deepspeed="None", disable_tqdm=false, dispatch_batches="None", do_eval=true, do_predict=false, do_train=false, eval_accumulation_steps="None", eval_batch_size=8, eval_delay=0, eval_do_concat_batches=true, eval_on_start=false, eval_steps="None", eval_strategy="epoch", evaluation_strategy="None", fp16=false, fp16_backend="auto", fp16_full_eval=false, fp16_opt_level="O1", fsdp="[]", fsdp_config="{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}", fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap="None", full_determinism=false, gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None", greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false, include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0, learning_rate=3e-05, length_column_name="length", load_best_model_at_end=true, local_rank=0, lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1, metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false, num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None", past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, per_gpu_eval_batch_size="None", per_gpu_train_batch_size="None", prediction_loss_only=false, ray_scope="last", remove_unused_columns=true, report_to="[]", restore_callback_states_from_checkpoint=false, resume_from_checkpoint="None", seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false, torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None", train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false, use_mps_device=false, warmup_ratio=0.1, warmup_steps=0, weight_decay=0.001 <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
290 |
|
291 |
#### Speeds, Sizes, Times [optional]
|
292 |
|
|
|
322 |
|
323 |
precision recall f1-score support
|
324 |
|
325 |
+
age 0.82 0.57 0.68 80
|
326 |
+
disability 0.87 0.42 0.57 80
|
327 |
+
feminine 0.94 0.93 0.93 80
|
328 |
+
general 0.61 0.14 0.22 80
|
329 |
+
masculine 0.69 0.65 0.67 80
|
330 |
+
neutral 0.29 0.72 0.41 80
|
331 |
+
racial 0.93 0.71 0.81 80
|
332 |
+
sexuality 0.86 0.81 0.83 80
|
333 |
|
334 |
+
micro avg 0.66 0.62 0.64 640
|
335 |
+
macro avg 0.75 0.62 0.64 640
|
336 |
+
weighted avg 0.75 0.62 0.64 640
|
337 |
+
samples avg 0.63 0.65 0.64 640
|
338 |
|
339 |
|
340 |
#### Summary
|
|
|
367 |
|
368 |
### Compute Infrastructure
|
369 |
|
370 |
+
- Linux 6.5.0-28-generic x86_64
|
371 |
+
- MemTotal: 527988292 kB
|
372 |
+
- 64 X Intel(R) Xeon(R) Silver 4314 CPU @ 2.40GHz
|
373 |
- GPU_0: NVIDIA L40S
|
374 |
|
375 |
#### Hardware
|