2024-mcm-everitt-ryan
/

roberta-large-job-bias-seq-cls

@@ -28,7 +28,7 @@ training_regime: 'accelerator_config="{''split_batches'': False, ''dispatch_batc
   gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None",
   greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false,
   include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0,
-  learning_rate=1e-05, length_column_name="length", load_best_model_at_end=true, local_rank=0,
   lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1,
   metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false,
   num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None",
@@ -38,22 +38,22 @@ training_regime: 'accelerator_config="{''split_batches'': False, ''dispatch_batc
   seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false,
   torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None",
   train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false,
-  use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001'
 results: "                  precision    recall  f1-score   support\n    \n      \
-  \       age       0.95      0.26      0.41        80\n      disability       0.86\
-  \      0.38      0.52        80\n        feminine       0.99      0.89      0.93\
-  \        80\n         general       0.50      0.05      0.09        80\n       masculine\
-  \       0.79      0.56      0.66        80\n         neutral       0.25      0.90\
-  \      0.39        80\n          racial       0.98      0.61      0.75        80\n\
-  \       sexuality       0.98      0.65      0.78        80\n    \n       micro avg\
-  \       0.59      0.54      0.56       640\n       macro avg       0.79      0.54\
-  \      0.57       640\n    weighted avg       0.79      0.54      0.57       640\n\
-  \     samples avg       0.57      0.58      0.57       640\n    "
-compute_infrastructure: '- Linux 5.15.0-78-generic x86_64
-  - MemTotal:       1056619068 kB
-  - 256 X AMD EPYC 7702 64-Core Processor
   - GPU_0: NVIDIA L40S'
 software: python 3.10.12, accelerate 0.32.1, aiohttp 3.9.5, aiosignal 1.3.1, anyio
@@ -134,47 +134,47 @@ model-index:
       type: mix_human-eval_synthetic
     metrics:
     - type: loss
-      value: 0.3373583257198334
     - type: accuracy
-      value: 0.547945205479452
     - type: f1_micro
-      value: 0.562551103843009
     - type: precision_micro
-      value: 0.5900514579759862
     - type: recall_micro
-      value: 0.5375
     - type: roc_auc_micro
-      value: 0.7391121031746033
     - type: f1_macro
-      value: 0.5681001780157972
     - type: precision_macro
-      value: 0.7875191792787375
     - type: recall_macro
-      value: 0.5375
     - type: roc_auc_macro
-      value: 0.7391121031746033
     - type: f1_samples
-      value: 0.5730593607305935
     - type: precision_samples
-      value: 0.5693493150684932
     - type: recall_samples
-      value: 0.5830479452054794
     - type: roc_auc_samples
-      value: 0.7613095238095239
     - type: f1_weighted
-      value: 0.5681001780157973
     - type: precision_weighted
-      value: 0.7875191792787375
     - type: recall_weighted
-      value: 0.5375
     - type: roc_auc_weighted
-      value: 0.7391121031746032
     - type: runtime
-      value: 9.8129
     - type: samples_per_second
-      value: 59.514
     - type: steps_per_second
-      value: 7.439
     - type: epoch
       value: 3.0
 ---
@@ -286,7 +286,7 @@ Use the code below to get started with the model.
 #### Training Hyperparameters
-- **Training regime:** accelerator_config="{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}", adafactor=false, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false, bf16=false, bf16_full_eval=false, data_seed="None", dataloader_drop_last=false, dataloader_num_workers=0, dataloader_persistent_workers=false, dataloader_pin_memory=true, dataloader_prefetch_factor="None", ddp_backend="None", ddp_broadcast_buffers="None", ddp_bucket_cap_mb="None", ddp_find_unused_parameters="None", ddp_timeout=1800, deepspeed="None", disable_tqdm=false, dispatch_batches="None", do_eval=true, do_predict=false, do_train=false, eval_accumulation_steps="None", eval_batch_size=8, eval_delay=0, eval_do_concat_batches=true, eval_on_start=false, eval_steps="None", eval_strategy="epoch", evaluation_strategy="None", fp16=false, fp16_backend="auto", fp16_full_eval=false, fp16_opt_level="O1", fsdp="[]", fsdp_config="{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}", fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap="None", full_determinism=false, gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None", greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false, include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0, learning_rate=1e-05, length_column_name="length", load_best_model_at_end=true, local_rank=0, lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1, metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false, num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None", past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, per_gpu_eval_batch_size="None", per_gpu_train_batch_size="None", prediction_loss_only=false, ray_scope="last", remove_unused_columns=true, report_to="[]", restore_callback_states_from_checkpoint=false, resume_from_checkpoint="None", seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false, torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None", train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false, use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001 <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
 #### Speeds, Sizes, Times [optional]
@@ -322,19 +322,19 @@ Use the code below to get started with the model.
                   precision    recall  f1-score   support
-             age       0.95      0.26      0.41        80
-      disability       0.86      0.38      0.52        80
-        feminine       0.99      0.89      0.93        80
-         general       0.50      0.05      0.09        80
-       masculine       0.79      0.56      0.66        80
-         neutral       0.25      0.90      0.39        80
-          racial       0.98      0.61      0.75        80
-       sexuality       0.98      0.65      0.78        80
-       micro avg       0.59      0.54      0.56       640
-       macro avg       0.79      0.54      0.57       640
-    weighted avg       0.79      0.54      0.57       640
-     samples avg       0.57      0.58      0.57       640
 #### Summary
@@ -367,9 +367,9 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 ### Compute Infrastructure
-- Linux 5.15.0-78-generic x86_64
-- MemTotal:       1056619068 kB
-- 256 X AMD EPYC 7702 64-Core Processor
 - GPU_0: NVIDIA L40S
 #### Hardware

   gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None",
   greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false,
   include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0,
+  learning_rate=3e-05, length_column_name="length", load_best_model_at_end=true, local_rank=0,
   lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1,
   metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false,
   num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None",
   seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false,
   torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None",
   train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false,
+  use_mps_device=false, warmup_ratio=0.1, warmup_steps=0, weight_decay=0.001'
 results: "                  precision    recall  f1-score   support\n    \n      \
+  \       age       0.82      0.57      0.68        80\n      disability       0.87\
+  \      0.42      0.57        80\n        feminine       0.94      0.93      0.93\
+  \        80\n         general       0.61      0.14      0.22        80\n       masculine\
+  \       0.69      0.65      0.67        80\n         neutral       0.29      0.72\
+  \      0.41        80\n          racial       0.93      0.71      0.81        80\n\
+  \       sexuality       0.86      0.81      0.83        80\n    \n       micro avg\
+  \       0.66      0.62      0.64       640\n       macro avg       0.75      0.62\
+  \      0.64       640\n    weighted avg       0.75      0.62      0.64       640\n\
+  \     samples avg       0.63      0.65      0.64       640\n    "
+compute_infrastructure: '- Linux 6.5.0-28-generic x86_64
+  - MemTotal:       527988292 kB
+  - 64 X Intel(R) Xeon(R) Silver 4314 CPU @ 2.40GHz
   - GPU_0: NVIDIA L40S'
 software: python 3.10.12, accelerate 0.32.1, aiohttp 3.9.5, aiosignal 1.3.1, anyio
       type: mix_human-eval_synthetic
     metrics:
     - type: loss
+      value: 0.2932254374027252
     - type: accuracy
+      value: 0.5856164383561644
     - type: f1_micro
+      value: 0.6382636655948553
     - type: precision_micro
+      value: 0.6572847682119205
     - type: recall_micro
+      value: 0.6203125
     - type: roc_auc_micro
+      value: 0.7844866071428571
     - type: f1_macro
+      value: 0.6412879991871913
     - type: precision_macro
+      value: 0.7517582669787894
     - type: recall_macro
+      value: 0.6203125
     - type: roc_auc_macro
+      value: 0.7844866071428571
     - type: f1_samples
+      value: 0.6374714611872145
     - type: precision_samples
+      value: 0.6342751141552511
     - type: recall_samples
+      value: 0.653681506849315
     - type: roc_auc_samples
+      value: 0.8007868558382258
     - type: f1_weighted
+      value: 0.6412879991871913
     - type: precision_weighted
+      value: 0.7517582669787893
     - type: recall_weighted
+      value: 0.6203125
     - type: roc_auc_weighted
+      value: 0.7844866071428572
     - type: runtime
+      value: 9.6046
     - type: samples_per_second
+      value: 60.804
     - type: steps_per_second
+      value: 7.601
     - type: epoch
       value: 3.0
 ---
 #### Training Hyperparameters
+- **Training regime:** accelerator_config="{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}", adafactor=false, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false, bf16=false, bf16_full_eval=false, data_seed="None", dataloader_drop_last=false, dataloader_num_workers=0, dataloader_persistent_workers=false, dataloader_pin_memory=true, dataloader_prefetch_factor="None", ddp_backend="None", ddp_broadcast_buffers="None", ddp_bucket_cap_mb="None", ddp_find_unused_parameters="None", ddp_timeout=1800, deepspeed="None", disable_tqdm=false, dispatch_batches="None", do_eval=true, do_predict=false, do_train=false, eval_accumulation_steps="None", eval_batch_size=8, eval_delay=0, eval_do_concat_batches=true, eval_on_start=false, eval_steps="None", eval_strategy="epoch", evaluation_strategy="None", fp16=false, fp16_backend="auto", fp16_full_eval=false, fp16_opt_level="O1", fsdp="[]", fsdp_config="{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}", fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap="None", full_determinism=false, gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None", greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false, include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0, learning_rate=3e-05, length_column_name="length", load_best_model_at_end=true, local_rank=0, lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1, metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false, num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None", past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, per_gpu_eval_batch_size="None", per_gpu_train_batch_size="None", prediction_loss_only=false, ray_scope="last", remove_unused_columns=true, report_to="[]", restore_callback_states_from_checkpoint=false, resume_from_checkpoint="None", seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false, torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None", train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false, use_mps_device=false, warmup_ratio=0.1, warmup_steps=0, weight_decay=0.001 <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
 #### Speeds, Sizes, Times [optional]
                   precision    recall  f1-score   support
+             age       0.82      0.57      0.68        80
+      disability       0.87      0.42      0.57        80
+        feminine       0.94      0.93      0.93        80
+         general       0.61      0.14      0.22        80
+       masculine       0.69      0.65      0.67        80
+         neutral       0.29      0.72      0.41        80
+          racial       0.93      0.71      0.81        80
+       sexuality       0.86      0.81      0.83        80
+       micro avg       0.66      0.62      0.64       640
+       macro avg       0.75      0.62      0.64       640
+    weighted avg       0.75      0.62      0.64       640
+     samples avg       0.63      0.65      0.64       640
 #### Summary
 ### Compute Infrastructure
+- Linux 6.5.0-28-generic x86_64
+- MemTotal:       527988292 kB
+- 64 X Intel(R) Xeon(R) Silver 4314 CPU @ 2.40GHz
 - GPU_0: NVIDIA L40S
 #### Hardware