jjonas313 commited on Sep 5, 2024

Commit

9793ca2

verified ·

1 Parent(s): 240f785

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +26 -0
added_tokens.json +3 -0
checkpoint-120/config.json +44 -0
checkpoint-120/model.safetensors +3 -0
checkpoint-120/optimizer.pt +3 -0
checkpoint-120/rng_state.pth +3 -0
checkpoint-120/scheduler.pt +3 -0
checkpoint-120/trainer_state.json +195 -0
checkpoint-120/training_args.bin +3 -0
config.json +44 -0
model.safetensors +3 -0
runs/Sep05_14-14-01_r-jjonas313-autotrain-ecb-uncertainy-gf8d5yky-51253-9tti6/events.out.tfevents.1725545642.r-jjonas313-autotrain-ecb-uncertainy-gf8d5yky-51253-9tti6.169.0 +2 -2
runs/Sep05_14-14-01_r-jjonas313-autotrain-ecb-uncertainy-gf8d5yky-51253-9tti6/events.out.tfevents.1725545812.r-jjonas313-autotrain-ecb-uncertainy-gf8d5yky-51253-9tti6.169.1 +3 -0
special_tokens_map.json +15 -0
spm.model +3 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
training_args.bin +3 -0
training_params.json +30 -0

README.md ADDED Viewed

	@@ -0,0 +1,26 @@

+---
+tags:
+- autotrain
+- text-classification
+base_model: microsoft/deberta-v3-large
+widget:
+- text: "I love AutoTrain"
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 0.5364909768104553
+f1: 0.7044534412955465
+precision: 0.7073170731707317
+recall: 0.7016129032258065
+auc: 0.8095903592375367
+accuracy: 0.7566666666666667

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

checkpoint-120/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-large",
+  "_num_labels": 2,
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "certain",
+    "1": "uncertain"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "certain": 0,
+    "uncertain": 1
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

checkpoint-120/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6303709191a5725046de8c79bedf0fed5687be97f609eb37bcf165a4713a51
+size 1740304440

checkpoint-120/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f289bcf2de6f0a8547f264643ec5998bbb2095b03d2370f3666b4b09babe9c4
+size 3480840240

checkpoint-120/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f52d4d5e479edc60dd7f89c9af255d99789b9dd6d38c3e1ff659e6b4cc77514b
+size 14244

checkpoint-120/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c42520eeab738bd99a587d19c9396e41628ef6a96d5bedd679d68111554a650
+size 1064

checkpoint-120/trainer_state.json ADDED Viewed

	@@ -0,0 +1,195 @@

+{
+  "best_metric": 0.5364909768104553,
+  "best_model_checkpoint": "autotrain-ecb-uncertainty-deberta-v3-large/checkpoint-120",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 4.925415992736816,
+      "learning_rate": 1.6666666666666667e-06,
+      "loss": 0.7077,
+      "step": 6
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 4.121665954589844,
+      "learning_rate": 3.3333333333333333e-06,
+      "loss": 0.6967,
+      "step": 12
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 3.579362630844116,
+      "learning_rate": 5e-06,
+      "loss": 0.6732,
+      "step": 18
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 3.409770965576172,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 0.6812,
+      "step": 24
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 3.262946128845215,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 0.7021,
+      "step": 30
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 1.882023572921753,
+      "learning_rate": 1e-05,
+      "loss": 0.6861,
+      "step": 36
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 2.0393922328948975,
+      "learning_rate": 9.814814814814815e-06,
+      "loss": 0.6689,
+      "step": 42
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 2.3638412952423096,
+      "learning_rate": 9.62962962962963e-06,
+      "loss": 0.6781,
+      "step": 48
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 1.785644769668579,
+      "learning_rate": 9.444444444444445e-06,
+      "loss": 0.6817,
+      "step": 54
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 5.820407390594482,
+      "learning_rate": 9.25925925925926e-06,
+      "loss": 0.63,
+      "step": 60
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 2.0294086933135986,
+      "learning_rate": 9.074074074074075e-06,
+      "loss": 0.629,
+      "step": 66
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 7.212940692901611,
+      "learning_rate": 8.888888888888888e-06,
+      "loss": 0.7502,
+      "step": 72
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 5.35235071182251,
+      "learning_rate": 8.703703703703705e-06,
+      "loss": 0.6315,
+      "step": 78
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 3.978393077850342,
+      "learning_rate": 8.518518518518519e-06,
+      "loss": 0.6373,
+      "step": 84
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 6.702513694763184,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 0.573,
+      "step": 90
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 10.61103630065918,
+      "learning_rate": 8.148148148148148e-06,
+      "loss": 0.6378,
+      "step": 96
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 8.387734413146973,
+      "learning_rate": 7.962962962962963e-06,
+      "loss": 0.4785,
+      "step": 102
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 10.877920150756836,
+      "learning_rate": 7.808641975308642e-06,
+      "loss": 0.5362,
+      "step": 108
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 8.344444274902344,
+      "learning_rate": 7.623456790123458e-06,
+      "loss": 0.6228,
+      "step": 114
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 4.364614009857178,
+      "learning_rate": 7.438271604938272e-06,
+      "loss": 0.5578,
+      "step": 120
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7566666666666667,
+      "eval_auc": 0.8095903592375367,
+      "eval_f1": 0.7044534412955465,
+      "eval_loss": 0.5364909768104553,
+      "eval_precision": 0.7073170731707317,
+      "eval_recall": 0.7016129032258065,
+      "eval_runtime": 2.8168,
+      "eval_samples_per_second": 106.505,
+      "eval_steps_per_second": 5.325,
+      "step": 120
+    }
+  ],
+  "logging_steps": 6,
+  "max_steps": 360,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 559162592870400.0,
+  "train_batch_size": 10,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-120/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:602edec742b2aea7e508ea1239a7d0138f0e123ec4e2c5fb6e58b63f189e883a
+size 5304

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-large",
+  "_num_labels": 2,
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "certain",
+    "1": "uncertain"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "certain": 0,
+    "uncertain": 1
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6303709191a5725046de8c79bedf0fed5687be97f609eb37bcf165a4713a51
+size 1740304440

runs/Sep05_14-14-01_r-jjonas313-autotrain-ecb-uncertainy-gf8d5yky-51253-9tti6/events.out.tfevents.1725545642.r-jjonas313-autotrain-ecb-uncertainy-gf8d5yky-51253-9tti6.169.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f6c8de8c5b287df759527dd8df2490325d87199a050bf898f754ccf398419b0
-size 5306

 version https://git-lfs.github.com/spec/v1
+oid sha256:baf891ca2a8d35b9ce010ae318f8f95cbbb25301ea57fee464cd2c399597a6a9
+size 19783

runs/Sep05_14-14-01_r-jjonas313-autotrain-ecb-uncertainy-gf8d5yky-51253-9tti6/events.out.tfevents.1725545812.r-jjonas313-autotrain-ecb-uncertainy-gf8d5yky-51253-9tti6.169.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccc5339d039cffd0c8552d67887efd29352ee78d1c6424a3285e54b08fa6e4b8
+size 607

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:602edec742b2aea7e508ea1239a7d0138f0e123ec4e2c5fb6e58b63f189e883a
+size 5304

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "autotrain-ecb-uncertainty-deberta-v3-large/autotrain-data",
+    "model": "microsoft/deberta-v3-large",
+    "lr": 1e-05,
+    "epochs": 3,
+    "max_seq_length": 256,
+    "batch_size": 10,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 333,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "autotrain-ecb-uncertainty-deberta-v3-large",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "jjonas313",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}