Training in progress, step 2850, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1370666272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:789f226e44ff6175f0650db489f0554e7f69dc5b63c5b19f6f8f90422e097bc3
|
3 |
size 1370666272
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 697294462
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53c7ca855a09c6703804528921ba002a4454692bef620396449f5abdd6380228
|
3 |
size 697294462
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afaac9fbe4271faaba5196ab94e52163e6bf1b95bd8386498fc1f2c58b28a4a4
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -19607,6 +19607,356 @@
|
|
19607 |
"learning_rate": 0.00019179977330980487,
|
19608 |
"loss": 0.8965,
|
19609 |
"step": 2800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19610 |
}
|
19611 |
],
|
19612 |
"logging_steps": 1,
|
@@ -19626,7 +19976,7 @@
|
|
19626 |
"attributes": {}
|
19627 |
}
|
19628 |
},
|
19629 |
-
"total_flos": 1.
|
19630 |
"train_batch_size": 32,
|
19631 |
"trial_name": null,
|
19632 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6614831147731229,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2850,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
19607 |
"learning_rate": 0.00019179977330980487,
|
19608 |
"loss": 0.8965,
|
19609 |
"step": 2800
|
19610 |
+
},
|
19611 |
+
{
|
19612 |
+
"epoch": 0.6501102471857956,
|
19613 |
+
"grad_norm": 0.4750087857246399,
|
19614 |
+
"learning_rate": 0.00019179398816212382,
|
19615 |
+
"loss": 0.9313,
|
19616 |
+
"step": 2801
|
19617 |
+
},
|
19618 |
+
{
|
19619 |
+
"epoch": 0.6503423465243124,
|
19620 |
+
"grad_norm": 0.4137982130050659,
|
19621 |
+
"learning_rate": 0.00019178820106180094,
|
19622 |
+
"loss": 0.8269,
|
19623 |
+
"step": 2802
|
19624 |
+
},
|
19625 |
+
{
|
19626 |
+
"epoch": 0.6505744458628293,
|
19627 |
+
"grad_norm": 0.48815712332725525,
|
19628 |
+
"learning_rate": 0.00019178241200895935,
|
19629 |
+
"loss": 0.8957,
|
19630 |
+
"step": 2803
|
19631 |
+
},
|
19632 |
+
{
|
19633 |
+
"epoch": 0.6508065452013462,
|
19634 |
+
"grad_norm": 0.5212056636810303,
|
19635 |
+
"learning_rate": 0.0001917766210037222,
|
19636 |
+
"loss": 0.826,
|
19637 |
+
"step": 2804
|
19638 |
+
},
|
19639 |
+
{
|
19640 |
+
"epoch": 0.651038644539863,
|
19641 |
+
"grad_norm": 0.46167051792144775,
|
19642 |
+
"learning_rate": 0.0001917708280462126,
|
19643 |
+
"loss": 0.9014,
|
19644 |
+
"step": 2805
|
19645 |
+
},
|
19646 |
+
{
|
19647 |
+
"epoch": 0.6512707438783799,
|
19648 |
+
"grad_norm": 0.47623032331466675,
|
19649 |
+
"learning_rate": 0.00019176503313655393,
|
19650 |
+
"loss": 0.8882,
|
19651 |
+
"step": 2806
|
19652 |
+
},
|
19653 |
+
{
|
19654 |
+
"epoch": 0.6515028432168968,
|
19655 |
+
"grad_norm": 0.46180862188339233,
|
19656 |
+
"learning_rate": 0.00019175923627486936,
|
19657 |
+
"loss": 0.9117,
|
19658 |
+
"step": 2807
|
19659 |
+
},
|
19660 |
+
{
|
19661 |
+
"epoch": 0.6517349425554138,
|
19662 |
+
"grad_norm": 0.4569379687309265,
|
19663 |
+
"learning_rate": 0.0001917534374612822,
|
19664 |
+
"loss": 0.8399,
|
19665 |
+
"step": 2808
|
19666 |
+
},
|
19667 |
+
{
|
19668 |
+
"epoch": 0.6519670418939306,
|
19669 |
+
"grad_norm": 0.42162245512008667,
|
19670 |
+
"learning_rate": 0.00019174763669591583,
|
19671 |
+
"loss": 0.8652,
|
19672 |
+
"step": 2809
|
19673 |
+
},
|
19674 |
+
{
|
19675 |
+
"epoch": 0.6521991412324475,
|
19676 |
+
"grad_norm": 0.4374902546405792,
|
19677 |
+
"learning_rate": 0.0001917418339788936,
|
19678 |
+
"loss": 0.895,
|
19679 |
+
"step": 2810
|
19680 |
+
},
|
19681 |
+
{
|
19682 |
+
"epoch": 0.6524312405709644,
|
19683 |
+
"grad_norm": 0.4497464895248413,
|
19684 |
+
"learning_rate": 0.000191736029310339,
|
19685 |
+
"loss": 0.8953,
|
19686 |
+
"step": 2811
|
19687 |
+
},
|
19688 |
+
{
|
19689 |
+
"epoch": 0.6526633399094812,
|
19690 |
+
"grad_norm": 0.4323320686817169,
|
19691 |
+
"learning_rate": 0.00019173022269037548,
|
19692 |
+
"loss": 0.8703,
|
19693 |
+
"step": 2812
|
19694 |
+
},
|
19695 |
+
{
|
19696 |
+
"epoch": 0.6528954392479981,
|
19697 |
+
"grad_norm": 0.45908528566360474,
|
19698 |
+
"learning_rate": 0.00019172441411912657,
|
19699 |
+
"loss": 0.8765,
|
19700 |
+
"step": 2813
|
19701 |
+
},
|
19702 |
+
{
|
19703 |
+
"epoch": 0.6531275385865151,
|
19704 |
+
"grad_norm": 0.41703182458877563,
|
19705 |
+
"learning_rate": 0.00019171860359671583,
|
19706 |
+
"loss": 0.8681,
|
19707 |
+
"step": 2814
|
19708 |
+
},
|
19709 |
+
{
|
19710 |
+
"epoch": 0.6533596379250319,
|
19711 |
+
"grad_norm": 0.45060259103775024,
|
19712 |
+
"learning_rate": 0.00019171279112326683,
|
19713 |
+
"loss": 0.8919,
|
19714 |
+
"step": 2815
|
19715 |
+
},
|
19716 |
+
{
|
19717 |
+
"epoch": 0.6535917372635488,
|
19718 |
+
"grad_norm": 0.4701296389102936,
|
19719 |
+
"learning_rate": 0.00019170697669890324,
|
19720 |
+
"loss": 0.8749,
|
19721 |
+
"step": 2816
|
19722 |
+
},
|
19723 |
+
{
|
19724 |
+
"epoch": 0.6538238366020657,
|
19725 |
+
"grad_norm": 0.4668188691139221,
|
19726 |
+
"learning_rate": 0.00019170116032374876,
|
19727 |
+
"loss": 0.8601,
|
19728 |
+
"step": 2817
|
19729 |
+
},
|
19730 |
+
{
|
19731 |
+
"epoch": 0.6540559359405825,
|
19732 |
+
"grad_norm": 0.42963141202926636,
|
19733 |
+
"learning_rate": 0.0001916953419979271,
|
19734 |
+
"loss": 0.884,
|
19735 |
+
"step": 2818
|
19736 |
+
},
|
19737 |
+
{
|
19738 |
+
"epoch": 0.6542880352790994,
|
19739 |
+
"grad_norm": 0.5206764340400696,
|
19740 |
+
"learning_rate": 0.00019168952172156202,
|
19741 |
+
"loss": 0.8831,
|
19742 |
+
"step": 2819
|
19743 |
+
},
|
19744 |
+
{
|
19745 |
+
"epoch": 0.6545201346176164,
|
19746 |
+
"grad_norm": 0.4822680652141571,
|
19747 |
+
"learning_rate": 0.0001916836994947773,
|
19748 |
+
"loss": 0.8141,
|
19749 |
+
"step": 2820
|
19750 |
+
},
|
19751 |
+
{
|
19752 |
+
"epoch": 0.6547522339561332,
|
19753 |
+
"grad_norm": 0.44132062792778015,
|
19754 |
+
"learning_rate": 0.00019167787531769684,
|
19755 |
+
"loss": 0.8837,
|
19756 |
+
"step": 2821
|
19757 |
+
},
|
19758 |
+
{
|
19759 |
+
"epoch": 0.6549843332946501,
|
19760 |
+
"grad_norm": 0.47267404198646545,
|
19761 |
+
"learning_rate": 0.00019167204919044451,
|
19762 |
+
"loss": 0.9059,
|
19763 |
+
"step": 2822
|
19764 |
+
},
|
19765 |
+
{
|
19766 |
+
"epoch": 0.655216432633167,
|
19767 |
+
"grad_norm": 0.4189220070838928,
|
19768 |
+
"learning_rate": 0.00019166622111314426,
|
19769 |
+
"loss": 0.8696,
|
19770 |
+
"step": 2823
|
19771 |
+
},
|
19772 |
+
{
|
19773 |
+
"epoch": 0.6554485319716838,
|
19774 |
+
"grad_norm": 0.41616180539131165,
|
19775 |
+
"learning_rate": 0.0001916603910859201,
|
19776 |
+
"loss": 0.8296,
|
19777 |
+
"step": 2824
|
19778 |
+
},
|
19779 |
+
{
|
19780 |
+
"epoch": 0.6556806313102007,
|
19781 |
+
"grad_norm": 0.4162457287311554,
|
19782 |
+
"learning_rate": 0.00019165455910889593,
|
19783 |
+
"loss": 0.8204,
|
19784 |
+
"step": 2825
|
19785 |
+
},
|
19786 |
+
{
|
19787 |
+
"epoch": 0.6559127306487177,
|
19788 |
+
"grad_norm": 0.4778987467288971,
|
19789 |
+
"learning_rate": 0.0001916487251821959,
|
19790 |
+
"loss": 0.8528,
|
19791 |
+
"step": 2826
|
19792 |
+
},
|
19793 |
+
{
|
19794 |
+
"epoch": 0.6561448299872346,
|
19795 |
+
"grad_norm": 0.4973873198032379,
|
19796 |
+
"learning_rate": 0.0001916428893059441,
|
19797 |
+
"loss": 0.8403,
|
19798 |
+
"step": 2827
|
19799 |
+
},
|
19800 |
+
{
|
19801 |
+
"epoch": 0.6563769293257514,
|
19802 |
+
"grad_norm": 0.4930678904056549,
|
19803 |
+
"learning_rate": 0.00019163705148026464,
|
19804 |
+
"loss": 0.8223,
|
19805 |
+
"step": 2828
|
19806 |
+
},
|
19807 |
+
{
|
19808 |
+
"epoch": 0.6566090286642683,
|
19809 |
+
"grad_norm": 0.44355422258377075,
|
19810 |
+
"learning_rate": 0.00019163121170528175,
|
19811 |
+
"loss": 0.8361,
|
19812 |
+
"step": 2829
|
19813 |
+
},
|
19814 |
+
{
|
19815 |
+
"epoch": 0.6568411280027852,
|
19816 |
+
"grad_norm": 0.45476454496383667,
|
19817 |
+
"learning_rate": 0.0001916253699811196,
|
19818 |
+
"loss": 0.8712,
|
19819 |
+
"step": 2830
|
19820 |
+
},
|
19821 |
+
{
|
19822 |
+
"epoch": 0.657073227341302,
|
19823 |
+
"grad_norm": 0.4533182382583618,
|
19824 |
+
"learning_rate": 0.00019161952630790248,
|
19825 |
+
"loss": 0.8984,
|
19826 |
+
"step": 2831
|
19827 |
+
},
|
19828 |
+
{
|
19829 |
+
"epoch": 0.657305326679819,
|
19830 |
+
"grad_norm": 0.4435712695121765,
|
19831 |
+
"learning_rate": 0.0001916136806857547,
|
19832 |
+
"loss": 0.8294,
|
19833 |
+
"step": 2832
|
19834 |
+
},
|
19835 |
+
{
|
19836 |
+
"epoch": 0.6575374260183359,
|
19837 |
+
"grad_norm": 0.5167298316955566,
|
19838 |
+
"learning_rate": 0.00019160783311480061,
|
19839 |
+
"loss": 0.9074,
|
19840 |
+
"step": 2833
|
19841 |
+
},
|
19842 |
+
{
|
19843 |
+
"epoch": 0.6577695253568527,
|
19844 |
+
"grad_norm": 0.48255985975265503,
|
19845 |
+
"learning_rate": 0.00019160198359516456,
|
19846 |
+
"loss": 0.8771,
|
19847 |
+
"step": 2834
|
19848 |
+
},
|
19849 |
+
{
|
19850 |
+
"epoch": 0.6580016246953696,
|
19851 |
+
"grad_norm": 0.49954113364219666,
|
19852 |
+
"learning_rate": 0.00019159613212697108,
|
19853 |
+
"loss": 0.837,
|
19854 |
+
"step": 2835
|
19855 |
+
},
|
19856 |
+
{
|
19857 |
+
"epoch": 0.6582337240338865,
|
19858 |
+
"grad_norm": 0.45875173807144165,
|
19859 |
+
"learning_rate": 0.00019159027871034452,
|
19860 |
+
"loss": 0.9007,
|
19861 |
+
"step": 2836
|
19862 |
+
},
|
19863 |
+
{
|
19864 |
+
"epoch": 0.6584658233724033,
|
19865 |
+
"grad_norm": 0.4180905818939209,
|
19866 |
+
"learning_rate": 0.00019158442334540947,
|
19867 |
+
"loss": 0.9139,
|
19868 |
+
"step": 2837
|
19869 |
+
},
|
19870 |
+
{
|
19871 |
+
"epoch": 0.6586979227109203,
|
19872 |
+
"grad_norm": 0.492866188287735,
|
19873 |
+
"learning_rate": 0.00019157856603229048,
|
19874 |
+
"loss": 0.8481,
|
19875 |
+
"step": 2838
|
19876 |
+
},
|
19877 |
+
{
|
19878 |
+
"epoch": 0.6589300220494372,
|
19879 |
+
"grad_norm": 0.45765408873558044,
|
19880 |
+
"learning_rate": 0.0001915727067711121,
|
19881 |
+
"loss": 0.8913,
|
19882 |
+
"step": 2839
|
19883 |
+
},
|
19884 |
+
{
|
19885 |
+
"epoch": 0.659162121387954,
|
19886 |
+
"grad_norm": 0.4523009657859802,
|
19887 |
+
"learning_rate": 0.00019156684556199903,
|
19888 |
+
"loss": 0.8815,
|
19889 |
+
"step": 2840
|
19890 |
+
},
|
19891 |
+
{
|
19892 |
+
"epoch": 0.6593942207264709,
|
19893 |
+
"grad_norm": 0.463329941034317,
|
19894 |
+
"learning_rate": 0.00019156098240507592,
|
19895 |
+
"loss": 0.8844,
|
19896 |
+
"step": 2841
|
19897 |
+
},
|
19898 |
+
{
|
19899 |
+
"epoch": 0.6596263200649878,
|
19900 |
+
"grad_norm": 0.4301539957523346,
|
19901 |
+
"learning_rate": 0.00019155511730046748,
|
19902 |
+
"loss": 0.8209,
|
19903 |
+
"step": 2842
|
19904 |
+
},
|
19905 |
+
{
|
19906 |
+
"epoch": 0.6598584194035046,
|
19907 |
+
"grad_norm": 0.4687608480453491,
|
19908 |
+
"learning_rate": 0.0001915492502482985,
|
19909 |
+
"loss": 0.8791,
|
19910 |
+
"step": 2843
|
19911 |
+
},
|
19912 |
+
{
|
19913 |
+
"epoch": 0.6600905187420216,
|
19914 |
+
"grad_norm": 0.46065258979797363,
|
19915 |
+
"learning_rate": 0.00019154338124869377,
|
19916 |
+
"loss": 0.8791,
|
19917 |
+
"step": 2844
|
19918 |
+
},
|
19919 |
+
{
|
19920 |
+
"epoch": 0.6603226180805385,
|
19921 |
+
"grad_norm": 0.4436477720737457,
|
19922 |
+
"learning_rate": 0.0001915375103017781,
|
19923 |
+
"loss": 0.879,
|
19924 |
+
"step": 2845
|
19925 |
+
},
|
19926 |
+
{
|
19927 |
+
"epoch": 0.6605547174190554,
|
19928 |
+
"grad_norm": 0.4415607750415802,
|
19929 |
+
"learning_rate": 0.0001915316374076764,
|
19930 |
+
"loss": 0.8601,
|
19931 |
+
"step": 2846
|
19932 |
+
},
|
19933 |
+
{
|
19934 |
+
"epoch": 0.6607868167575722,
|
19935 |
+
"grad_norm": 0.46711909770965576,
|
19936 |
+
"learning_rate": 0.00019152576256651366,
|
19937 |
+
"loss": 0.8796,
|
19938 |
+
"step": 2847
|
19939 |
+
},
|
19940 |
+
{
|
19941 |
+
"epoch": 0.6610189160960891,
|
19942 |
+
"grad_norm": 0.4268472194671631,
|
19943 |
+
"learning_rate": 0.0001915198857784148,
|
19944 |
+
"loss": 0.8689,
|
19945 |
+
"step": 2848
|
19946 |
+
},
|
19947 |
+
{
|
19948 |
+
"epoch": 0.661251015434606,
|
19949 |
+
"grad_norm": 0.3973580002784729,
|
19950 |
+
"learning_rate": 0.0001915140070435048,
|
19951 |
+
"loss": 0.8466,
|
19952 |
+
"step": 2849
|
19953 |
+
},
|
19954 |
+
{
|
19955 |
+
"epoch": 0.6614831147731229,
|
19956 |
+
"grad_norm": 0.4282270669937134,
|
19957 |
+
"learning_rate": 0.00019150812636190874,
|
19958 |
+
"loss": 0.8451,
|
19959 |
+
"step": 2850
|
19960 |
}
|
19961 |
],
|
19962 |
"logging_steps": 1,
|
|
|
19976 |
"attributes": {}
|
19977 |
}
|
19978 |
},
|
19979 |
+
"total_flos": 1.2650486414966784e+18,
|
19980 |
"train_batch_size": 32,
|
19981 |
"trial_name": null,
|
19982 |
"trial_params": null
|