Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

config.json +1 -2
model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +3 -213

config.json CHANGED Viewed

@@ -15,8 +15,7 @@
   },
   "auto_map": {
     "AutoConfig": "ultravox_config.UltravoxConfig",
-    "AutoModel": "ultravox_model.UltravoxModel",
-    "AutoProcessor": "ultravox_processing.UltravoxProcessor"
   },
   "hidden_size": 4096,
   "ignore_index": -100,

   },
   "auto_map": {
     "AutoConfig": "ultravox_config.UltravoxConfig",
+    "AutoModel": "ultravox_model.UltravoxModel"
   },
   "hidden_size": 4096,
   "ignore_index": -100,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ce82a72c3765cab8c5232b02ba1a152c79c9250d6fba412dc2c4cd373055fbf
 size 98594264

 version https://git-lfs.github.com/spec/v1
+oid sha256:7287a044c945ad4f1903cef5887e534c70cddb3852b1a2bd80a230e61d00b22f
 size 98594264

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66d76750444a14e1344e32dcf439136877c4f2f7b595f7cb0e375502c4488ada
 size 197192018

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b3fde1382a141c8b9075d0038d9b4ecfa526a15bca7e0b376ced5c550dce6db
 size 197192018

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dced71fa402f23b7189468799ae0c433df0a210052db92d3afa695f8c8d907fb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:50406e2683fae845a67d2522407cfd71c13ce88867a1aac0dc9d26a8b3a5f840
 size 14244

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fbedce42792dc31946de3a39b22f6cdb8750f11aec852fa8384aadcb3bf9152
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae7d1df7fbe282a2a1f6e0263b266c20c101d0f96ff5d77217fca16dab8166f9
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.376344086021505,
   "eval_steps": 1000,
-  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -434,216 +434,6 @@
       "learning_rate": 0.0016026346363792565,
       "loss": 0.1546,
       "step": 6000
-    },
-    {
-      "epoch": 3.6439665471923535,
-      "grad_norm": 0.1328125,
-      "learning_rate": 0.0015877852522924731,
-      "loss": 0.1481,
-      "step": 6100
-    },
-    {
-      "epoch": 3.7037037037037037,
-      "grad_norm": 0.07373046875,
-      "learning_rate": 0.0015727351400805052,
-      "loss": 0.1503,
-      "step": 6200
-    },
-    {
-      "epoch": 3.763440860215054,
-      "grad_norm": 0.07421875,
-      "learning_rate": 0.0015574894393428856,
-      "loss": 0.1453,
-      "step": 6300
-    },
-    {
-      "epoch": 3.823178016726404,
-      "grad_norm": 0.059326171875,
-      "learning_rate": 0.0015420533564724495,
-      "loss": 0.1446,
-      "step": 6400
-    },
-    {
-      "epoch": 3.882915173237754,
-      "grad_norm": 0.0810546875,
-      "learning_rate": 0.0015264321628773558,
-      "loss": 0.1437,
-      "step": 6500
-    },
-    {
-      "epoch": 3.942652329749104,
-      "grad_norm": 0.07080078125,
-      "learning_rate": 0.001510631193180907,
-      "loss": 0.1412,
-      "step": 6600
-    },
-    {
-      "epoch": 4.002389486260454,
-      "grad_norm": 0.103515625,
-      "learning_rate": 0.001494655843399779,
-      "loss": 0.136,
-      "step": 6700
-    },
-    {
-      "epoch": 4.062126642771804,
-      "grad_norm": 0.076171875,
-      "learning_rate": 0.0014785115691012866,
-      "loss": 0.1356,
-      "step": 6800
-    },
-    {
-      "epoch": 4.121863799283154,
-      "grad_norm": 0.0927734375,
-      "learning_rate": 0.0014622038835403132,
-      "loss": 0.139,
-      "step": 6900
-    },
-    {
-      "epoch": 4.181600955794504,
-      "grad_norm": 0.091796875,
-      "learning_rate": 0.0014457383557765385,
-      "loss": 0.1374,
-      "step": 7000
-    },
-    {
-      "epoch": 4.241338112305854,
-      "grad_norm": 0.11962890625,
-      "learning_rate": 0.001429120608772609,
-      "loss": 0.136,
-      "step": 7100
-    },
-    {
-      "epoch": 4.301075268817204,
-      "grad_norm": 0.0693359375,
-      "learning_rate": 0.0014123563174739035,
-      "loss": 0.132,
-      "step": 7200
-    },
-    {
-      "epoch": 4.360812425328555,
-      "grad_norm": 0.0849609375,
-      "learning_rate": 0.0013954512068705424,
-      "loss": 0.1286,
-      "step": 7300
-    },
-    {
-      "epoch": 4.4205495818399045,
-      "grad_norm": 0.10009765625,
-      "learning_rate": 0.0013784110500423103,
-      "loss": 0.1252,
-      "step": 7400
-    },
-    {
-      "epoch": 4.480286738351254,
-      "grad_norm": 0.10302734375,
-      "learning_rate": 0.0013612416661871532,
-      "loss": 0.1261,
-      "step": 7500
-    },
-    {
-      "epoch": 4.540023894862604,
-      "grad_norm": 0.080078125,
-      "learning_rate": 0.0013439489186339282,
-      "loss": 0.1262,
-      "step": 7600
-    },
-    {
-      "epoch": 4.599761051373955,
-      "grad_norm": 0.0673828125,
-      "learning_rate": 0.0013265387128400831,
-      "loss": 0.1199,
-      "step": 7700
-    },
-    {
-      "epoch": 4.659498207885305,
-      "grad_norm": 0.06640625,
-      "learning_rate": 0.0013090169943749475,
-      "loss": 0.1201,
-      "step": 7800
-    },
-    {
-      "epoch": 4.7192353643966545,
-      "grad_norm": 0.08935546875,
-      "learning_rate": 0.0012913897468893247,
-      "loss": 0.1149,
-      "step": 7900
-    },
-    {
-      "epoch": 4.778972520908005,
-      "grad_norm": 0.0869140625,
-      "learning_rate": 0.0012736629900720832,
-      "loss": 0.1149,
-      "step": 8000
-    },
-    {
-      "epoch": 4.838709677419355,
-      "grad_norm": 0.05712890625,
-      "learning_rate": 0.0012558427775944357,
-      "loss": 0.1155,
-      "step": 8100
-    },
-    {
-      "epoch": 4.898446833930705,
-      "grad_norm": 0.08837890625,
-      "learning_rate": 0.0012379351950426187,
-      "loss": 0.113,
-      "step": 8200
-    },
-    {
-      "epoch": 4.958183990442055,
-      "grad_norm": 0.08154296875,
-      "learning_rate": 0.0012199463578396689,
-      "loss": 0.1101,
-      "step": 8300
-    },
-    {
-      "epoch": 5.017921146953405,
-      "grad_norm": 0.1005859375,
-      "learning_rate": 0.0012018824091570102,
-      "loss": 0.1094,
-      "step": 8400
-    },
-    {
-      "epoch": 5.077658303464755,
-      "grad_norm": 0.10107421875,
-      "learning_rate": 0.0011837495178165704,
-      "loss": 0.1089,
-      "step": 8500
-    },
-    {
-      "epoch": 5.137395459976105,
-      "grad_norm": 0.0625,
-      "learning_rate": 0.00116555387618413,
-      "loss": 0.1088,
-      "step": 8600
-    },
-    {
-      "epoch": 5.197132616487456,
-      "grad_norm": 0.0712890625,
-      "learning_rate": 0.0011473016980546376,
-      "loss": 0.1091,
-      "step": 8700
-    },
-    {
-      "epoch": 5.256869772998805,
-      "grad_norm": 0.07421875,
-      "learning_rate": 0.0011289992165302034,
-      "loss": 0.1075,
-      "step": 8800
-    },
-    {
-      "epoch": 5.316606929510155,
-      "grad_norm": 0.09326171875,
-      "learning_rate": 0.001110652681891501,
-      "loss": 0.105,
-      "step": 8900
-    },
-    {
-      "epoch": 5.376344086021505,
-      "grad_norm": 0.07568359375,
-      "learning_rate": 0.001092268359463302,
-      "loss": 0.1031,
-      "step": 9000
     }
   ],
   "logging_steps": 100,
@@ -663,7 +453,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.734784904880046e+17,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.5842293906810037,
   "eval_steps": 1000,
+  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0016026346363792565,
       "loss": 0.1546,
       "step": 6000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.1559951680731546e+17,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null