Training in progress, step 3200, checkpoint

Browse files

Files changed (9) hide show

last-checkpoint/README.md +0 -12
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/special_tokens_map.json +7 -1
last-checkpoint/tokenizer_config.json +5 -1
last-checkpoint/trainer_state.json +10 -66
last-checkpoint/training_args.bin +1 -1

last-checkpoint/README.md CHANGED Viewed

@@ -201,18 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 ## Training procedure
-The following `bitsandbytes` quantization config was used during training:
-- quant_method: bitsandbytes
-- load_in_8bit: False
-- load_in_4bit: True
-- llm_int8_threshold: 6.0
-- llm_int8_skip_modules: None
-- llm_int8_enable_fp32_cpu_offload: False
-- llm_int8_has_fp16_weight: False
-- bnb_4bit_quant_type: nf4
-- bnb_4bit_use_double_quant: True
-- bnb_4bit_compute_dtype: float16
 ### Framework versions


201	## Training procedure
202
203












204	### Framework versions
205
206

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:648676827f40593439367dacf2ad638522ef14510808c4ba2af6eabf42c8342c
 size 75507072

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7d3db52e4d0054936ff52bf1ad37ac5cbca2e26a2f3c0b3c23351936d9dfd6b
 size 75507072

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98b8c16c0f5f93065962a416a72869294e42de29542f91565e010fff2a0b375f
-size 151034501

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbee7f9a425faba7846fad117ec9afb00a6786a4b4af07085358abd08bc4bb53
+size 151032837

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63998d98da08278c470436d7f3090e0552bc25f2e2fd93ff495fe8ccda5df6f6
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:eefe083b6454775aee01bb69e64bad53187f7b97719dea614c013fe397ac511b
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43e3cf8d56a3f083d00cc85544d76ada2f884a1018c8752332d96f2799911117
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:50b57a34df83b700e2c13775ff734b4569b74ce7e20da3479db76577bb4e906e
 size 627

last-checkpoint/special_tokens_map.json CHANGED Viewed

@@ -12,6 +12,12 @@
     ">>SUFFIX<<",
     ">>MIDDLE<<"
   ],
-  "eos_token": "<|endoftext|>",
   "pad_token": "<|endoftext|>"
 }

     ">>SUFFIX<<",
     ">>MIDDLE<<"
   ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
   "pad_token": "<|endoftext|>"
 }

last-checkpoint/tokenizer_config.json CHANGED Viewed

@@ -113,11 +113,15 @@
   ],
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 2048,
   "pad_token": "<|endoftext|>",
-  "tokenizer_class": "PreTrainedTokenizerFast"
 }

   ],
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
+  "max_length": 512,
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 2048,
   "pad_token": "<|endoftext|>",
+  "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first"
 }

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.120961308479309,
-  "best_model_checkpoint": "./outputs/checkpoint-3600",
-  "epoch": 2.6229508196721314,
   "eval_steps": 100,
-  "global_step": 3600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -445,79 +445,23 @@
     {
       "epoch": 2.33,
       "learning_rate": 0.0002,
-      "loss": 1.1435,
       "step": 3200
     },
     {
       "epoch": 2.33,
-      "eval_loss": 1.1588889360427856,
-      "eval_runtime": 417.5979,
-      "eval_samples_per_second": 15.024,
-      "eval_steps_per_second": 1.88,
       "step": 3200
-    },
-    {
-      "epoch": 2.4,
-      "learning_rate": 0.0002,
-      "loss": 1.127,
-      "step": 3300
-    },
-    {
-      "epoch": 2.4,
-      "eval_loss": 1.1483901739120483,
-      "eval_runtime": 425.2555,
-      "eval_samples_per_second": 14.753,
-      "eval_steps_per_second": 1.846,
-      "step": 3300
-    },
-    {
-      "epoch": 2.48,
-      "learning_rate": 0.0002,
-      "loss": 1.1119,
-      "step": 3400
-    },
-    {
-      "epoch": 2.48,
-      "eval_loss": 1.139683723449707,
-      "eval_runtime": 424.1155,
-      "eval_samples_per_second": 14.793,
-      "eval_steps_per_second": 1.851,
-      "step": 3400
-    },
-    {
-      "epoch": 2.55,
-      "learning_rate": 0.0002,
-      "loss": 1.1122,
-      "step": 3500
-    },
-    {
-      "epoch": 2.55,
-      "eval_loss": 1.130289077758789,
-      "eval_runtime": 417.8399,
-      "eval_samples_per_second": 15.015,
-      "eval_steps_per_second": 1.879,
-      "step": 3500
-    },
-    {
-      "epoch": 2.62,
-      "learning_rate": 0.0002,
-      "loss": 1.1058,
-      "step": 3600
-    },
-    {
-      "epoch": 2.62,
-      "eval_loss": 1.120961308479309,
-      "eval_runtime": 438.2292,
-      "eval_samples_per_second": 14.317,
-      "eval_steps_per_second": 1.791,
-      "step": 3600
     }
   ],
   "logging_steps": 100,
   "max_steps": 4116,
   "num_train_epochs": 3,
   "save_steps": 100,
-  "total_flos": 1.1670242076660173e+18,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.1691069602966309,
+  "best_model_checkpoint": "./outputs/checkpoint-3100",
+  "epoch": 2.33224043715847,
   "eval_steps": 100,
+  "global_step": 3200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
     {
       "epoch": 2.33,
       "learning_rate": 0.0002,
+      "loss": 1.1534,
       "step": 3200
     },
     {
       "epoch": 2.33,
+      "eval_loss": 1.1825001239776611,
+      "eval_runtime": 339.9447,
+      "eval_samples_per_second": 18.456,
+      "eval_steps_per_second": 2.309,
       "step": 3200
     }
   ],
   "logging_steps": 100,
   "max_steps": 4116,
   "num_train_epochs": 3,
   "save_steps": 100,
+  "total_flos": 1.0375834790343045e+18,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ac2a584f6fcab7b83aaf6c33387711020127b74d44cfc40eb76fb8dcafc6325
 size 4219

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe5603840bf23a51f167eb469a32e263b0b26363061da6a1848375d241e4e917
 size 4219