2024-mcm-everitt-ryan
/

Phi-3-mini-4k-instruct-job-bias-qlora-seq-cls

@@ -11,7 +11,7 @@ developers: Tristan Everitt and Paul Ryan
 model_card_authors: See developers
 model_card_contact: See developers
 repo: https://gitlab.computing.dcu.ie/everitt2/2024-mcm-everitt-ryan
-training_regime: 'PEFT: None, accelerator_config="{''split_batches'': False, ''dispatch_batches'':
   None, ''even_batches'': True, ''use_seedable_sampler'': True, ''non_blocking'':
   False, ''gradient_accumulation_kwargs'': None}", adafactor=false, adam_beta1=0.9,
   adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false,
@@ -40,15 +40,15 @@ training_regime: 'PEFT: None, accelerator_config="{''split_batches'': False, ''d
   train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false,
   use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001'
 results: "                  precision    recall  f1-score   support\n    \n      \
-  \       age       0.93      0.33      0.48        80\n      disability       1.00\
-  \      0.42      0.60        80\n        feminine       0.99      0.85      0.91\
-  \        80\n         general       0.88      0.46      0.61        80\n       masculine\
-  \       0.91      0.49      0.63        80\n         neutral       0.31      0.95\
-  \      0.47        80\n          racial       0.98      0.75      0.85        80\n\
-  \       sexuality       0.97      0.74      0.84        80\n    \n       micro avg\
-  \       0.69      0.62      0.65       640\n       macro avg       0.87      0.62\
-  \      0.67       640\n    weighted avg       0.87      0.62      0.67       640\n\
-  \     samples avg       0.66      0.68      0.67       640\n    "
 compute_infrastructure: '- Linux 6.5.0-28-generic x86_64
   - MemTotal:       527988292 kB
@@ -134,47 +134,47 @@ model-index:
       type: mix_human-eval_synthetic
     metrics:
     - type: loss
-      value: 0.3106254041194916
     - type: accuracy
-      value: 0.636986301369863
     - type: f1_micro
-      value: 0.6530278232405892
     - type: precision_micro
-      value: 0.6855670103092784
     - type: recall_micro
-      value: 0.6234375
     - type: roc_auc_micro
-      value: 0.7890252976190476
     - type: f1_macro
-      value: 0.6735633963496355
     - type: precision_macro
-      value: 0.8705378602567351
     - type: recall_macro
-      value: 0.6234375
     - type: roc_auc_macro
-      value: 0.7890252976190477
     - type: f1_samples
-      value: 0.6667808219178082
     - type: precision_samples
-      value: 0.6618150684931506
     - type: recall_samples
-      value: 0.6793664383561644
     - type: roc_auc_samples
-      value: 0.8162977005870843
     - type: f1_weighted
-      value: 0.6735633963496355
     - type: precision_weighted
-      value: 0.8705378602567351
     - type: recall_weighted
-      value: 0.6234375
     - type: roc_auc_weighted
-      value: 0.7890252976190476
     - type: runtime
-      value: 109.5632
     - type: samples_per_second
-      value: 5.33
     - type: steps_per_second
-      value: 0.666
     - type: epoch
       value: 3.0
 ---
@@ -286,7 +286,7 @@ Use the code below to get started with the model.
 #### Training Hyperparameters
-- **Training regime:** PEFT: None, accelerator_config="{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}", adafactor=false, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false, bf16=false, bf16_full_eval=false, data_seed="None", dataloader_drop_last=false, dataloader_num_workers=0, dataloader_persistent_workers=false, dataloader_pin_memory=true, dataloader_prefetch_factor="None", ddp_backend="None", ddp_broadcast_buffers="None", ddp_bucket_cap_mb="None", ddp_find_unused_parameters="None", ddp_timeout=1800, deepspeed="None", disable_tqdm=false, dispatch_batches="None", do_eval=true, do_predict=false, do_train=false, eval_accumulation_steps="None", eval_batch_size=8, eval_delay=0, eval_do_concat_batches=true, eval_on_start=false, eval_steps="None", eval_strategy="epoch", evaluation_strategy="None", fp16=false, fp16_backend="auto", fp16_full_eval=false, fp16_opt_level="O1", fsdp="[]", fsdp_config="{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}", fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap="None", full_determinism=false, gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None", greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false, include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0, learning_rate=0.0001, length_column_name="length", load_best_model_at_end=true, local_rank=0, lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1, metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false, num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None", past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, per_gpu_eval_batch_size="None", per_gpu_train_batch_size="None", prediction_loss_only=false, ray_scope="last", remove_unused_columns=true, report_to="[]", restore_callback_states_from_checkpoint=false, resume_from_checkpoint="None", seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false, torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None", train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false, use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001 <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
 #### Speeds, Sizes, Times [optional]
@@ -322,19 +322,19 @@ Use the code below to get started with the model.
                   precision    recall  f1-score   support
-             age       0.93      0.33      0.48        80
-      disability       1.00      0.42      0.60        80
-        feminine       0.99      0.85      0.91        80
-         general       0.88      0.46      0.61        80
-       masculine       0.91      0.49      0.63        80
-         neutral       0.31      0.95      0.47        80
-          racial       0.98      0.75      0.85        80
-       sexuality       0.97      0.74      0.84        80
-       micro avg       0.69      0.62      0.65       640
-       macro avg       0.87      0.62      0.67       640
-    weighted avg       0.87      0.62      0.67       640
-     samples avg       0.66      0.68      0.67       640
 #### Summary

 model_card_authors: See developers
 model_card_contact: See developers
 repo: https://gitlab.computing.dcu.ie/everitt2/2024-mcm-everitt-ryan
+training_regime: 'accelerator_config="{''split_batches'': False, ''dispatch_batches'':
   None, ''even_batches'': True, ''use_seedable_sampler'': True, ''non_blocking'':
   False, ''gradient_accumulation_kwargs'': None}", adafactor=false, adam_beta1=0.9,
   adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false,
   train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false,
   use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001'
 results: "                  precision    recall  f1-score   support\n    \n      \
+  \       age       0.89      0.40      0.55        80\n      disability       0.97\
+  \      0.44      0.60        80\n        feminine       0.99      0.89      0.93\
+  \        80\n         general       0.65      0.51      0.57        80\n       masculine\
+  \       0.95      0.45      0.61        80\n         neutral       0.30      0.90\
+  \      0.44        80\n          racial       0.93      0.79      0.85        80\n\
+  \       sexuality       0.95      0.75      0.84        80\n    \n       micro avg\
+  \       0.66      0.64      0.65       640\n       macro avg       0.83      0.64\
+  \      0.68       640\n    weighted avg       0.83      0.64      0.68       640\n\
+  \     samples avg       0.66      0.69      0.67       640\n    "
 compute_infrastructure: '- Linux 6.5.0-28-generic x86_64
   - MemTotal:       527988292 kB
       type: mix_human-eval_synthetic
     metrics:
     - type: loss
+      value: 0.3098137676715851
     - type: accuracy
+      value: 0.6078767123287672
     - type: f1_micro
+      value: 0.6507936507936508
     - type: precision_micro
+      value: 0.6612903225806451
     - type: recall_micro
+      value: 0.640625
     - type: roc_auc_micro
+      value: 0.7942708333333334
     - type: f1_macro
+      value: 0.6759919550021907
     - type: precision_macro
+      value: 0.8274147252372333
     - type: recall_macro
+      value: 0.640625
     - type: roc_auc_macro
+      value: 0.7942708333333334
     - type: f1_samples
+      value: 0.6690639269406393
     - type: precision_samples
+      value: 0.661244292237443
     - type: recall_samples
+      value: 0.6936358447488585
     - type: roc_auc_samples
+      value: 0.8201993639921722
     - type: f1_weighted
+      value: 0.6759919550021907
     - type: precision_weighted
+      value: 0.8274147252372334
     - type: recall_weighted
+      value: 0.640625
     - type: roc_auc_weighted
+      value: 0.7942708333333334
     - type: runtime
+      value: 109.3748
     - type: samples_per_second
+      value: 5.339
     - type: steps_per_second
+      value: 0.667
     - type: epoch
       value: 3.0
 ---
 #### Training Hyperparameters
+- **Training regime:** accelerator_config="{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}", adafactor=false, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false, bf16=false, bf16_full_eval=false, data_seed="None", dataloader_drop_last=false, dataloader_num_workers=0, dataloader_persistent_workers=false, dataloader_pin_memory=true, dataloader_prefetch_factor="None", ddp_backend="None", ddp_broadcast_buffers="None", ddp_bucket_cap_mb="None", ddp_find_unused_parameters="None", ddp_timeout=1800, deepspeed="None", disable_tqdm=false, dispatch_batches="None", do_eval=true, do_predict=false, do_train=false, eval_accumulation_steps="None", eval_batch_size=8, eval_delay=0, eval_do_concat_batches=true, eval_on_start=false, eval_steps="None", eval_strategy="epoch", evaluation_strategy="None", fp16=false, fp16_backend="auto", fp16_full_eval=false, fp16_opt_level="O1", fsdp="[]", fsdp_config="{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}", fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap="None", full_determinism=false, gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None", greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false, include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0, learning_rate=0.0001, length_column_name="length", load_best_model_at_end=true, local_rank=0, lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1, metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false, num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None", past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, per_gpu_eval_batch_size="None", per_gpu_train_batch_size="None", prediction_loss_only=false, ray_scope="last", remove_unused_columns=true, report_to="[]", restore_callback_states_from_checkpoint=false, resume_from_checkpoint="None", seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false, torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None", train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false, use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001 <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
 #### Speeds, Sizes, Times [optional]
                   precision    recall  f1-score   support
+             age       0.89      0.40      0.55        80
+      disability       0.97      0.44      0.60        80
+        feminine       0.99      0.89      0.93        80
+         general       0.65      0.51      0.57        80
+       masculine       0.95      0.45      0.61        80
+         neutral       0.30      0.90      0.44        80
+          racial       0.93      0.79      0.85        80
+       sexuality       0.95      0.75      0.84        80
+       micro avg       0.66      0.64      0.65       640
+       macro avg       0.83      0.64      0.68       640
+    weighted avg       0.83      0.64      0.68       640
+     samples avg       0.66      0.69      0.67       640
 #### Summary