Training in progress, step 25, checkpoint

Files changed (7) hide show

checkpoint-25/adapter_config.json CHANGED Viewed

@@ -20,8 +20,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

checkpoint-25/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1563bb2f99164590d2dbdc515e408979c62a8093aeff08436576e9c26413bc8
 size 26235864

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebe4050447deec581bd3ff93dfbe198cc18566d7b2d38d43238acf2738c2abdd
 size 26235864

checkpoint-25/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbe492804c82bf09b809e818917727b1e18838b0bc10272e3390e307247a6458
 size 52523386

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4cb47b2b9565fe2265c4b343bef6711b4cb97a25458bf2ab7f0af03098f83fb
 size 52523386

checkpoint-25/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2fa8d5f3e9c6b5abca54e31be902d76e695522f08d66d4ead12cfcc409589fb2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:abd83e428a64cc7191cf368e64a77290525a3cadd3b210219a78a857e2fdb8ae
 size 14244

checkpoint-25/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:641e38a7f663447384b1de7ed580f2bead07e3637186dcf8d53646516defeff0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:30b5ec54b43ed8952919b704047aa882c4dd3048daa7c693f2bdcae16f7aa402
 size 1064

checkpoint-25/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 6.239687919616699,
   "best_model_checkpoint": "./Qwen1-5-4B-Chat-hindi-sft/checkpoint-25",
-  "epoch": 0.0036072433446360293,
   "eval_steps": 25,
   "global_step": 25,
   "is_hyper_param_search": false,
@@ -10,26 +10,26 @@
   "log_history": [
     {
       "epoch": 0.0,
-      "grad_norm": 4.076544761657715,
-      "learning_rate": 4.807692307692308e-06,
-      "loss": 6.0722,
       "step": 25
     },
     {
       "epoch": 0.0,
-      "eval_loss": 6.239687919616699,
-      "eval_runtime": 258.5805,
-      "eval_samples_per_second": 1.087,
-      "eval_steps_per_second": 1.087,
       "step": 25
     }
   ],
   "logging_steps": 25,
-  "max_steps": 34650,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
   "save_steps": 25,
-  "total_flos": 626456460764160.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 5.88370418548584,
   "best_model_checkpoint": "./Qwen1-5-4B-Chat-hindi-sft/checkpoint-25",
+  "epoch": 0.003574747980267391,
   "eval_steps": 25,
   "global_step": 25,
   "is_hyper_param_search": false,
   "log_history": [
     {
       "epoch": 0.0,
+      "grad_norm": 5.597507476806641,
+      "learning_rate": 7.936507936507936e-06,
+      "loss": 6.5107,
       "step": 25
     },
     {
       "epoch": 0.0,
+      "eval_loss": 5.88370418548584,
+      "eval_runtime": 27.753,
+      "eval_samples_per_second": 1.045,
+      "eval_steps_per_second": 1.045,
       "step": 25
     }
   ],
   "logging_steps": 25,
+  "max_steps": 20979,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
   "save_steps": 25,
+  "total_flos": 500129031075840.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

checkpoint-25/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81d460fd33adef439c648b974d0d6b51823bfbb80f3fee5d7110d1f149b86cd6
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9388d54f20b310f46b4eefba4bae25cec3658ca4bd837cd5c0be8d9c2b1f2f7
 size 5048