googlefan commited on
Commit
d71fdcb
·
verified ·
1 Parent(s): 7971a86

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. config.json +1 -2
  2. model.safetensors +1 -1
  3. optimizer.pt +1 -1
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +3 -213
config.json CHANGED
@@ -15,8 +15,7 @@
15
  },
16
  "auto_map": {
17
  "AutoConfig": "ultravox_config.UltravoxConfig",
18
- "AutoModel": "ultravox_model.UltravoxModel",
19
- "AutoProcessor": "ultravox_processing.UltravoxProcessor"
20
  },
21
  "hidden_size": 4096,
22
  "ignore_index": -100,
 
15
  },
16
  "auto_map": {
17
  "AutoConfig": "ultravox_config.UltravoxConfig",
18
+ "AutoModel": "ultravox_model.UltravoxModel"
 
19
  },
20
  "hidden_size": 4096,
21
  "ignore_index": -100,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ce82a72c3765cab8c5232b02ba1a152c79c9250d6fba412dc2c4cd373055fbf
3
  size 98594264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7287a044c945ad4f1903cef5887e534c70cddb3852b1a2bd80a230e61d00b22f
3
  size 98594264
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d76750444a14e1344e32dcf439136877c4f2f7b595f7cb0e375502c4488ada
3
  size 197192018
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3fde1382a141c8b9075d0038d9b4ecfa526a15bca7e0b376ced5c550dce6db
3
  size 197192018
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dced71fa402f23b7189468799ae0c433df0a210052db92d3afa695f8c8d907fb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50406e2683fae845a67d2522407cfd71c13ce88867a1aac0dc9d26a8b3a5f840
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fbedce42792dc31946de3a39b22f6cdb8750f11aec852fa8384aadcb3bf9152
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae7d1df7fbe282a2a1f6e0263b266c20c101d0f96ff5d77217fca16dab8166f9
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.376344086021505,
5
  "eval_steps": 1000,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -434,216 +434,6 @@
434
  "learning_rate": 0.0016026346363792565,
435
  "loss": 0.1546,
436
  "step": 6000
437
- },
438
- {
439
- "epoch": 3.6439665471923535,
440
- "grad_norm": 0.1328125,
441
- "learning_rate": 0.0015877852522924731,
442
- "loss": 0.1481,
443
- "step": 6100
444
- },
445
- {
446
- "epoch": 3.7037037037037037,
447
- "grad_norm": 0.07373046875,
448
- "learning_rate": 0.0015727351400805052,
449
- "loss": 0.1503,
450
- "step": 6200
451
- },
452
- {
453
- "epoch": 3.763440860215054,
454
- "grad_norm": 0.07421875,
455
- "learning_rate": 0.0015574894393428856,
456
- "loss": 0.1453,
457
- "step": 6300
458
- },
459
- {
460
- "epoch": 3.823178016726404,
461
- "grad_norm": 0.059326171875,
462
- "learning_rate": 0.0015420533564724495,
463
- "loss": 0.1446,
464
- "step": 6400
465
- },
466
- {
467
- "epoch": 3.882915173237754,
468
- "grad_norm": 0.0810546875,
469
- "learning_rate": 0.0015264321628773558,
470
- "loss": 0.1437,
471
- "step": 6500
472
- },
473
- {
474
- "epoch": 3.942652329749104,
475
- "grad_norm": 0.07080078125,
476
- "learning_rate": 0.001510631193180907,
477
- "loss": 0.1412,
478
- "step": 6600
479
- },
480
- {
481
- "epoch": 4.002389486260454,
482
- "grad_norm": 0.103515625,
483
- "learning_rate": 0.001494655843399779,
484
- "loss": 0.136,
485
- "step": 6700
486
- },
487
- {
488
- "epoch": 4.062126642771804,
489
- "grad_norm": 0.076171875,
490
- "learning_rate": 0.0014785115691012866,
491
- "loss": 0.1356,
492
- "step": 6800
493
- },
494
- {
495
- "epoch": 4.121863799283154,
496
- "grad_norm": 0.0927734375,
497
- "learning_rate": 0.0014622038835403132,
498
- "loss": 0.139,
499
- "step": 6900
500
- },
501
- {
502
- "epoch": 4.181600955794504,
503
- "grad_norm": 0.091796875,
504
- "learning_rate": 0.0014457383557765385,
505
- "loss": 0.1374,
506
- "step": 7000
507
- },
508
- {
509
- "epoch": 4.241338112305854,
510
- "grad_norm": 0.11962890625,
511
- "learning_rate": 0.001429120608772609,
512
- "loss": 0.136,
513
- "step": 7100
514
- },
515
- {
516
- "epoch": 4.301075268817204,
517
- "grad_norm": 0.0693359375,
518
- "learning_rate": 0.0014123563174739035,
519
- "loss": 0.132,
520
- "step": 7200
521
- },
522
- {
523
- "epoch": 4.360812425328555,
524
- "grad_norm": 0.0849609375,
525
- "learning_rate": 0.0013954512068705424,
526
- "loss": 0.1286,
527
- "step": 7300
528
- },
529
- {
530
- "epoch": 4.4205495818399045,
531
- "grad_norm": 0.10009765625,
532
- "learning_rate": 0.0013784110500423103,
533
- "loss": 0.1252,
534
- "step": 7400
535
- },
536
- {
537
- "epoch": 4.480286738351254,
538
- "grad_norm": 0.10302734375,
539
- "learning_rate": 0.0013612416661871532,
540
- "loss": 0.1261,
541
- "step": 7500
542
- },
543
- {
544
- "epoch": 4.540023894862604,
545
- "grad_norm": 0.080078125,
546
- "learning_rate": 0.0013439489186339282,
547
- "loss": 0.1262,
548
- "step": 7600
549
- },
550
- {
551
- "epoch": 4.599761051373955,
552
- "grad_norm": 0.0673828125,
553
- "learning_rate": 0.0013265387128400831,
554
- "loss": 0.1199,
555
- "step": 7700
556
- },
557
- {
558
- "epoch": 4.659498207885305,
559
- "grad_norm": 0.06640625,
560
- "learning_rate": 0.0013090169943749475,
561
- "loss": 0.1201,
562
- "step": 7800
563
- },
564
- {
565
- "epoch": 4.7192353643966545,
566
- "grad_norm": 0.08935546875,
567
- "learning_rate": 0.0012913897468893247,
568
- "loss": 0.1149,
569
- "step": 7900
570
- },
571
- {
572
- "epoch": 4.778972520908005,
573
- "grad_norm": 0.0869140625,
574
- "learning_rate": 0.0012736629900720832,
575
- "loss": 0.1149,
576
- "step": 8000
577
- },
578
- {
579
- "epoch": 4.838709677419355,
580
- "grad_norm": 0.05712890625,
581
- "learning_rate": 0.0012558427775944357,
582
- "loss": 0.1155,
583
- "step": 8100
584
- },
585
- {
586
- "epoch": 4.898446833930705,
587
- "grad_norm": 0.08837890625,
588
- "learning_rate": 0.0012379351950426187,
589
- "loss": 0.113,
590
- "step": 8200
591
- },
592
- {
593
- "epoch": 4.958183990442055,
594
- "grad_norm": 0.08154296875,
595
- "learning_rate": 0.0012199463578396689,
596
- "loss": 0.1101,
597
- "step": 8300
598
- },
599
- {
600
- "epoch": 5.017921146953405,
601
- "grad_norm": 0.1005859375,
602
- "learning_rate": 0.0012018824091570102,
603
- "loss": 0.1094,
604
- "step": 8400
605
- },
606
- {
607
- "epoch": 5.077658303464755,
608
- "grad_norm": 0.10107421875,
609
- "learning_rate": 0.0011837495178165704,
610
- "loss": 0.1089,
611
- "step": 8500
612
- },
613
- {
614
- "epoch": 5.137395459976105,
615
- "grad_norm": 0.0625,
616
- "learning_rate": 0.00116555387618413,
617
- "loss": 0.1088,
618
- "step": 8600
619
- },
620
- {
621
- "epoch": 5.197132616487456,
622
- "grad_norm": 0.0712890625,
623
- "learning_rate": 0.0011473016980546376,
624
- "loss": 0.1091,
625
- "step": 8700
626
- },
627
- {
628
- "epoch": 5.256869772998805,
629
- "grad_norm": 0.07421875,
630
- "learning_rate": 0.0011289992165302034,
631
- "loss": 0.1075,
632
- "step": 8800
633
- },
634
- {
635
- "epoch": 5.316606929510155,
636
- "grad_norm": 0.09326171875,
637
- "learning_rate": 0.001110652681891501,
638
- "loss": 0.105,
639
- "step": 8900
640
- },
641
- {
642
- "epoch": 5.376344086021505,
643
- "grad_norm": 0.07568359375,
644
- "learning_rate": 0.001092268359463302,
645
- "loss": 0.1031,
646
- "step": 9000
647
  }
648
  ],
649
  "logging_steps": 100,
@@ -663,7 +453,7 @@
663
  "attributes": {}
664
  }
665
  },
666
- "total_flos": 1.734784904880046e+17,
667
  "train_batch_size": 6,
668
  "trial_name": null,
669
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.5842293906810037,
5
  "eval_steps": 1000,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
434
  "learning_rate": 0.0016026346363792565,
435
  "loss": 0.1546,
436
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  }
438
  ],
439
  "logging_steps": 100,
 
453
  "attributes": {}
454
  }
455
  },
456
+ "total_flos": 1.1559951680731546e+17,
457
  "train_batch_size": 6,
458
  "trial_name": null,
459
  "trial_params": null