Model save

Browse files

Files changed (5) hide show

README.md +65 -28
repo_card.md +31 -0
tokenizer.json +3 -5
trainer_state.json +6 -6
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,31 +1,68 @@
 ---
-{}
 ---
-    # distilbert-finetuned-uncased Model
-    This model is fine-tuned on the SQuAD v2 dataset for the task of question answering.
-    ## Training Procedure
-    - Number of Epochs: 2
-    - Learning Rate: 2e-05
-    - Batch Size: 128 (per device)
-    - Evaluation Strategy: Every 100 steps
-    - Save Strategy: Every 100 steps
-    - FP16 Training: Yes
-    ## Evaluation Results
-    - Exact Match: 23.347090036216628
-    - F1 Score: 26.869992349988973
-    - Total: 11873
-    - Has Answer Exact: 38.630229419703106
-    - Has Answer F1: 45.686136837283904
-    - Has Answer Total: 5928
-    - No Answer Exact: 8.107653490328007
-    - No Answer F1: 8.107653490328007
-    - No Answer Total: 5945
-    - Best Exact: 50.11370336056599
-    - Best Exact Threshold: 0.0
-    - Best F1: 50.11370336056599
-    - Best F1 Threshold: 0.0

 ---
+tags:
+- generated_from_trainer
+datasets:
+- squad_v2
+model-index:
+- name: distilbert-finetuned-uncased-squad_v2
+  results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# distilbert-finetuned-uncased-squad_v2
+This model was trained from scratch on the squad_v2 dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.3930
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 128
+- eval_batch_size: 128
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 512
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 4
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 3.6437        | 0.39  | 100  | 2.1780          |
+| 2.1596        | 0.78  | 200  | 1.6557          |
+| 1.8138        | 1.18  | 300  | 1.5683          |
+| 1.6987        | 1.57  | 400  | 1.5076          |
+| 1.6586        | 1.96  | 500  | 1.5350          |
+| 1.5957        | 1.18  | 600  | 1.4431          |
+| 1.5825        | 1.37  | 700  | 1.4955          |
+| 1.5523        | 1.57  | 800  | 1.4444          |
+| 1.5346        | 1.76  | 900  | 1.3930          |
+| 1.5098        | 1.96  | 1000 | 1.4285          |
+### Framework versions
+- Transformers 4.34.1
+- Pytorch 2.1.0+cu118
+- Datasets 2.14.5
+- Tokenizers 0.14.1

repo_card.md ADDED Viewed

	@@ -0,0 +1,31 @@

+---
+{}
+---
+    # distilbert-finetuned-uncased Model
+    This model is fine-tuned on the SQuAD v2 dataset for the task of question answering.
+    ## Training Procedure
+    - Number of Epochs: 2
+    - Learning Rate: 2e-05
+    - Batch Size: 128 (per device)
+    - Evaluation Strategy: Every 100 steps
+    - Save Strategy: Every 100 steps
+    - FP16 Training: Yes
+    ## Evaluation Results
+    - Exact Match: 23.347090036216628
+    - F1 Score: 26.869992349988973
+    - Total: 11873
+    - Has Answer Exact: 38.630229419703106
+    - Has Answer F1: 45.686136837283904
+    - Has Answer Total: 5928
+    - No Answer Exact: 8.107653490328007
+    - No Answer F1: 8.107653490328007
+    - No Answer Total: 5945
+    - Best Exact: 50.11370336056599
+    - Best Exact Threshold: 0.0
+    - Best F1: 50.11370336056599
+    - Best F1 Threshold: 0.0

tokenizer.json CHANGED Viewed

@@ -3,13 +3,11 @@
   "truncation": {
     "direction": "Right",
     "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
   },
   "padding": {
-    "strategy": {
-      "Fixed": 512
-    },
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 0,

   "truncation": {
     "direction": "Right",
     "max_length": 512,
+    "strategy": "OnlySecond",
+    "stride": 128
   },
   "padding": {
+    "strategy": "BatchLongest",
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 0,

trainer_state.json CHANGED Viewed

@@ -153,16 +153,16 @@
       "step": 1000,
       "total_flos": 5.015589595888435e+16,
       "train_loss": 0.0,
-      "train_runtime": 0.354,
-      "train_samples_per_second": 737393.791,
-      "train_steps_per_second": 1440.851
     },
     {
       "epoch": 1.96,
       "eval_loss": 1.3930128812789917,
-      "eval_runtime": 8.2561,
-      "eval_samples_per_second": 1449.724,
-      "eval_steps_per_second": 11.386,
       "step": 1000
     }
   ],

       "step": 1000,
       "total_flos": 5.015589595888435e+16,
       "train_loss": 0.0,
+      "train_runtime": 0.3572,
+      "train_samples_per_second": 730670.816,
+      "train_steps_per_second": 1427.715
     },
     {
       "epoch": 1.96,
       "eval_loss": 1.3930128812789917,
+      "eval_runtime": 8.2864,
+      "eval_samples_per_second": 1444.423,
+      "eval_steps_per_second": 11.344,
       "step": 1000
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b43115521096779b46ae6f7b82e2bd13f806b3fd3b5fbd681a6c2ae5c41ef56
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c2218707d8f17a87a80bd2f04a5dd940a8048c67f7e922aee33e6506357a060
 size 4664