Model save
Browse files- README.md +148 -0
- adapter_model.safetensors +1 -1
- all_results.json +21 -0
- eval_results.json +16 -0
- runs/Feb08_09-39-21_7dec04cc21c9/events.out.tfevents.1707385209.7dec04cc21c9.25454.0 +2 -2
- runs/Feb08_09-39-21_7dec04cc21c9/events.out.tfevents.1707549958.7dec04cc21c9.25454.1 +3 -0
- train_results.json +8 -0
- trainer_state.json +0 -0
README.md
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
library_name: peft
|
4 |
+
tags:
|
5 |
+
- trl
|
6 |
+
- dpo
|
7 |
+
- generated_from_trainer
|
8 |
+
base_model: mistralai/Mistral-7B-v0.1
|
9 |
+
model-index:
|
10 |
+
- name: zephyr-7b-dpo-qlora-no-sft
|
11 |
+
results: []
|
12 |
+
---
|
13 |
+
|
14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
+
should probably proofread and complete it, then remove this comment. -->
|
16 |
+
|
17 |
+
# zephyr-7b-dpo-qlora-no-sft
|
18 |
+
|
19 |
+
This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
|
20 |
+
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.5486
|
22 |
+
- Rewards/chosen: -1.4557
|
23 |
+
- Rewards/rejected: -2.2032
|
24 |
+
- Rewards/accuracies: 0.7090
|
25 |
+
- Rewards/margins: 0.7475
|
26 |
+
- Logps/rejected: -484.1859
|
27 |
+
- Logps/chosen: -430.8606
|
28 |
+
- Logits/rejected: 0.8536
|
29 |
+
- Logits/chosen: 0.8124
|
30 |
+
|
31 |
+
## Model description
|
32 |
+
|
33 |
+
More information needed
|
34 |
+
|
35 |
+
## Intended uses & limitations
|
36 |
+
|
37 |
+
More information needed
|
38 |
+
|
39 |
+
## Training and evaluation data
|
40 |
+
|
41 |
+
More information needed
|
42 |
+
|
43 |
+
## Training procedure
|
44 |
+
|
45 |
+
### Training hyperparameters
|
46 |
+
|
47 |
+
The following hyperparameters were used during training:
|
48 |
+
- learning_rate: 5e-06
|
49 |
+
- train_batch_size: 1
|
50 |
+
- eval_batch_size: 2
|
51 |
+
- seed: 42
|
52 |
+
- distributed_type: multi-GPU
|
53 |
+
- gradient_accumulation_steps: 8
|
54 |
+
- total_train_batch_size: 8
|
55 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
56 |
+
- lr_scheduler_type: cosine
|
57 |
+
- lr_scheduler_warmup_ratio: 0.1
|
58 |
+
- num_epochs: 1
|
59 |
+
|
60 |
+
### Training results
|
61 |
+
|
62 |
+
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
63 |
+
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
64 |
+
| 0.6934 | 0.01 | 100 | 0.6930 | 0.0021 | 0.0018 | 0.5120 | 0.0003 | -263.6873 | -285.0847 | -2.5761 | -2.6081 |
|
65 |
+
| 0.6921 | 0.03 | 200 | 0.6923 | 0.0064 | 0.0047 | 0.5820 | 0.0017 | -263.3970 | -284.6488 | -2.5766 | -2.6089 |
|
66 |
+
| 0.6913 | 0.04 | 300 | 0.6910 | 0.0127 | 0.0083 | 0.6195 | 0.0044 | -263.0383 | -284.0253 | -2.5774 | -2.6105 |
|
67 |
+
| 0.6888 | 0.05 | 400 | 0.6894 | 0.0235 | 0.0157 | 0.6210 | 0.0077 | -262.2991 | -282.9474 | -2.5778 | -2.6114 |
|
68 |
+
| 0.6881 | 0.07 | 500 | 0.6866 | 0.0322 | 0.0186 | 0.6220 | 0.0136 | -262.0058 | -282.0685 | -2.5648 | -2.6011 |
|
69 |
+
| 0.6848 | 0.08 | 600 | 0.6829 | 0.0391 | 0.0173 | 0.6230 | 0.0218 | -262.1442 | -281.3836 | -2.5621 | -2.6006 |
|
70 |
+
| 0.6706 | 0.09 | 700 | 0.6776 | 0.0515 | 0.0169 | 0.6135 | 0.0346 | -262.1758 | -280.1425 | -2.5437 | -2.5861 |
|
71 |
+
| 0.6544 | 0.1 | 800 | 0.6650 | -0.0843 | -0.1603 | 0.6065 | 0.0760 | -279.8956 | -293.7216 | -2.5208 | -2.5676 |
|
72 |
+
| 0.668 | 0.12 | 900 | 0.6552 | -0.1689 | -0.2798 | 0.6170 | 0.1109 | -291.8528 | -302.1819 | -2.5180 | -2.5613 |
|
73 |
+
| 0.6285 | 0.13 | 1000 | 0.6457 | -0.5287 | -0.7121 | 0.6290 | 0.1834 | -335.0806 | -338.1635 | -2.4563 | -2.4939 |
|
74 |
+
| 0.6741 | 0.14 | 1100 | 0.6396 | -0.7030 | -0.9481 | 0.6305 | 0.2452 | -358.6847 | -355.5893 | -2.2815 | -2.3227 |
|
75 |
+
| 0.605 | 0.16 | 1200 | 0.6279 | -0.7077 | -0.9713 | 0.6375 | 0.2636 | -360.9963 | -356.0601 | -2.2198 | -2.2608 |
|
76 |
+
| 0.5844 | 0.17 | 1300 | 0.6228 | -0.8502 | -1.1414 | 0.6410 | 0.2912 | -378.0121 | -370.3147 | -2.0337 | -2.0743 |
|
77 |
+
| 0.6085 | 0.18 | 1400 | 0.6157 | -0.6163 | -0.8963 | 0.6565 | 0.2799 | -353.4970 | -346.9268 | -1.9276 | -1.9742 |
|
78 |
+
| 0.5887 | 0.2 | 1500 | 0.6093 | -1.0534 | -1.4085 | 0.6585 | 0.3551 | -404.7234 | -390.6338 | -1.5130 | -1.5476 |
|
79 |
+
| 0.5585 | 0.21 | 1600 | 0.6020 | -0.8558 | -1.2372 | 0.6645 | 0.3814 | -387.5893 | -370.8767 | -1.4216 | -1.4652 |
|
80 |
+
| 0.5417 | 0.22 | 1700 | 0.5937 | -0.7787 | -1.1648 | 0.6640 | 0.3860 | -380.3489 | -363.1672 | -1.3190 | -1.3614 |
|
81 |
+
| 0.5691 | 0.24 | 1800 | 0.5964 | -1.0690 | -1.5628 | 0.6705 | 0.4938 | -420.1472 | -392.1945 | -0.7433 | -0.7891 |
|
82 |
+
| 0.5869 | 0.25 | 1900 | 0.5931 | -1.4234 | -1.8618 | 0.6700 | 0.4384 | -450.0478 | -427.6318 | -0.5757 | -0.5963 |
|
83 |
+
| 0.6732 | 0.26 | 2000 | 0.5928 | -0.7320 | -1.1323 | 0.6765 | 0.4002 | -377.0961 | -358.4945 | -0.8928 | -0.9596 |
|
84 |
+
| 0.5453 | 0.27 | 2100 | 0.5812 | -1.2215 | -1.6723 | 0.6770 | 0.4508 | -431.1005 | -407.4461 | -0.3057 | -0.3325 |
|
85 |
+
| 0.5521 | 0.29 | 2200 | 0.5773 | -0.9855 | -1.4907 | 0.6775 | 0.5052 | -412.9417 | -383.8439 | -0.0835 | -0.1059 |
|
86 |
+
| 0.5352 | 0.3 | 2300 | 0.5821 | -1.0780 | -1.5279 | 0.6885 | 0.4500 | -416.6599 | -393.0880 | -0.2117 | -0.2432 |
|
87 |
+
| 0.4291 | 0.31 | 2400 | 0.5800 | -1.3780 | -1.9871 | 0.6785 | 0.6091 | -462.5805 | -423.0901 | 0.1802 | 0.1741 |
|
88 |
+
| 0.5324 | 0.33 | 2500 | 0.5709 | -1.0291 | -1.5875 | 0.6765 | 0.5584 | -422.6171 | -388.1980 | 0.0904 | 0.0751 |
|
89 |
+
| 0.5659 | 0.34 | 2600 | 0.5640 | -1.2533 | -1.8232 | 0.6985 | 0.5699 | -446.1898 | -410.6243 | 0.3281 | 0.3241 |
|
90 |
+
| 0.5041 | 0.35 | 2700 | 0.5737 | -1.7469 | -2.3921 | 0.6865 | 0.6452 | -503.0828 | -459.9810 | 0.5911 | 0.5924 |
|
91 |
+
| 0.5754 | 0.37 | 2800 | 0.5716 | -1.6382 | -2.2298 | 0.6885 | 0.5915 | -486.8488 | -449.1171 | 0.6424 | 0.6612 |
|
92 |
+
| 0.6073 | 0.38 | 2900 | 0.5731 | -1.5512 | -2.2130 | 0.6815 | 0.6618 | -485.1724 | -440.4115 | 0.7017 | 0.6979 |
|
93 |
+
| 0.6283 | 0.39 | 3000 | 0.5645 | -1.3105 | -1.9937 | 0.6860 | 0.6832 | -463.2372 | -416.3378 | 0.6221 | 0.5951 |
|
94 |
+
| 0.5199 | 0.41 | 3100 | 0.5585 | -1.1618 | -1.7386 | 0.6940 | 0.5768 | -437.7283 | -401.4741 | 0.4404 | 0.4092 |
|
95 |
+
| 0.5658 | 0.42 | 3200 | 0.5603 | -1.1916 | -1.7704 | 0.6960 | 0.5788 | -440.9099 | -404.4548 | 0.3075 | 0.2535 |
|
96 |
+
| 0.6214 | 0.43 | 3300 | 0.5605 | -1.3366 | -1.9673 | 0.6925 | 0.6307 | -460.5986 | -418.9480 | 0.6742 | 0.6564 |
|
97 |
+
| 0.581 | 0.44 | 3400 | 0.5563 | -1.1359 | -1.7683 | 0.6985 | 0.6324 | -440.7018 | -398.8812 | 0.5839 | 0.5449 |
|
98 |
+
| 0.5422 | 0.46 | 3500 | 0.5590 | -1.0364 | -1.6150 | 0.6915 | 0.5786 | -425.3734 | -388.9318 | 0.5735 | 0.5330 |
|
99 |
+
| 0.5626 | 0.47 | 3600 | 0.5602 | -1.1120 | -1.7501 | 0.6910 | 0.6381 | -438.8792 | -396.4902 | 0.7862 | 0.7520 |
|
100 |
+
| 0.627 | 0.48 | 3700 | 0.5579 | -1.2845 | -1.9488 | 0.6935 | 0.6644 | -458.7537 | -413.7391 | 0.8809 | 0.8576 |
|
101 |
+
| 0.5522 | 0.5 | 3800 | 0.5562 | -1.3810 | -2.0706 | 0.6975 | 0.6896 | -470.9312 | -423.3916 | 0.9118 | 0.8745 |
|
102 |
+
| 0.5734 | 0.51 | 3900 | 0.5557 | -1.3964 | -2.0908 | 0.6970 | 0.6943 | -472.9462 | -424.9361 | 0.7969 | 0.7417 |
|
103 |
+
| 0.612 | 0.52 | 4000 | 0.5548 | -1.6249 | -2.3232 | 0.7075 | 0.6982 | -496.1850 | -447.7854 | 0.8941 | 0.8718 |
|
104 |
+
| 0.5357 | 0.54 | 4100 | 0.5587 | -1.1962 | -1.8866 | 0.6995 | 0.6904 | -452.5338 | -404.9135 | 0.5836 | 0.5102 |
|
105 |
+
| 0.5648 | 0.55 | 4200 | 0.5570 | -1.3147 | -2.0461 | 0.6940 | 0.7314 | -468.4804 | -416.7626 | 0.7063 | 0.6440 |
|
106 |
+
| 0.5237 | 0.56 | 4300 | 0.5515 | -1.5027 | -2.2087 | 0.7030 | 0.7060 | -484.7385 | -435.5629 | 0.8569 | 0.8282 |
|
107 |
+
| 0.5979 | 0.58 | 4400 | 0.5594 | -1.6981 | -2.4801 | 0.7040 | 0.7820 | -511.8796 | -455.1061 | 0.9415 | 0.9060 |
|
108 |
+
| 0.4859 | 0.59 | 4500 | 0.5530 | -1.5910 | -2.3517 | 0.7080 | 0.7607 | -499.0415 | -444.3948 | 0.9399 | 0.9057 |
|
109 |
+
| 0.5484 | 0.6 | 4600 | 0.5525 | -1.5159 | -2.2439 | 0.7055 | 0.7280 | -488.2595 | -436.8822 | 0.8711 | 0.8268 |
|
110 |
+
| 0.6135 | 0.62 | 4700 | 0.5504 | -1.3255 | -2.0246 | 0.7065 | 0.6990 | -466.3248 | -417.8462 | 0.7736 | 0.7222 |
|
111 |
+
| 0.5714 | 0.63 | 4800 | 0.5501 | -1.4736 | -2.1670 | 0.7070 | 0.6934 | -480.5717 | -432.6558 | 0.8649 | 0.8370 |
|
112 |
+
| 0.517 | 0.64 | 4900 | 0.5531 | -1.6509 | -2.4069 | 0.7090 | 0.7560 | -504.5561 | -450.3797 | 0.9735 | 0.9524 |
|
113 |
+
| 0.4862 | 0.65 | 5000 | 0.5524 | -1.5409 | -2.2932 | 0.7080 | 0.7523 | -493.1930 | -439.3873 | 0.9138 | 0.8849 |
|
114 |
+
| 0.6176 | 0.67 | 5100 | 0.5519 | -1.4759 | -2.2276 | 0.7020 | 0.7516 | -486.6266 | -432.8859 | 0.8785 | 0.8443 |
|
115 |
+
| 0.5514 | 0.68 | 5200 | 0.5500 | -1.4083 | -2.1357 | 0.7025 | 0.7274 | -477.4418 | -426.1200 | 0.8299 | 0.7894 |
|
116 |
+
| 0.5166 | 0.69 | 5300 | 0.5508 | -1.4154 | -2.1510 | 0.7040 | 0.7356 | -478.9723 | -426.8324 | 0.8441 | 0.8065 |
|
117 |
+
| 0.4918 | 0.71 | 5400 | 0.5496 | -1.4093 | -2.1290 | 0.7090 | 0.7197 | -476.7667 | -426.2183 | 0.8313 | 0.7905 |
|
118 |
+
| 0.596 | 0.72 | 5500 | 0.5489 | -1.4890 | -2.2221 | 0.7075 | 0.7332 | -486.0821 | -434.1885 | 0.8632 | 0.8239 |
|
119 |
+
| 0.6034 | 0.73 | 5600 | 0.5489 | -1.4048 | -2.1338 | 0.7065 | 0.7290 | -477.2522 | -425.7730 | 0.8041 | 0.7561 |
|
120 |
+
| 0.4793 | 0.75 | 5700 | 0.5495 | -1.5017 | -2.2541 | 0.7080 | 0.7524 | -489.2809 | -435.4676 | 0.8918 | 0.8545 |
|
121 |
+
| 0.5164 | 0.76 | 5800 | 0.5497 | -1.5548 | -2.3215 | 0.7085 | 0.7667 | -496.0150 | -440.7685 | 0.9221 | 0.8885 |
|
122 |
+
| 0.6164 | 0.77 | 5900 | 0.5491 | -1.5335 | -2.2884 | 0.7080 | 0.7549 | -492.7101 | -438.6432 | 0.8987 | 0.8645 |
|
123 |
+
| 0.5347 | 0.79 | 6000 | 0.5487 | -1.5028 | -2.2487 | 0.7105 | 0.7459 | -488.7427 | -435.5721 | 0.8766 | 0.8397 |
|
124 |
+
| 0.56 | 0.8 | 6100 | 0.5491 | -1.4855 | -2.2337 | 0.7105 | 0.7482 | -487.2426 | -433.8429 | 0.8643 | 0.8248 |
|
125 |
+
| 0.587 | 0.81 | 6200 | 0.5491 | -1.4638 | -2.2111 | 0.7095 | 0.7473 | -484.9788 | -431.6711 | 0.8489 | 0.8072 |
|
126 |
+
| 0.4927 | 0.82 | 6300 | 0.5490 | -1.4591 | -2.2082 | 0.7090 | 0.7491 | -484.6881 | -431.2039 | 0.8531 | 0.8118 |
|
127 |
+
| 0.6102 | 0.84 | 6400 | 0.5486 | -1.4462 | -2.1928 | 0.7105 | 0.7466 | -483.1518 | -429.9117 | 0.8474 | 0.8055 |
|
128 |
+
| 0.4988 | 0.85 | 6500 | 0.5485 | -1.4482 | -2.1938 | 0.7095 | 0.7456 | -483.2466 | -430.1142 | 0.8464 | 0.8046 |
|
129 |
+
| 0.5544 | 0.86 | 6600 | 0.5486 | -1.4491 | -2.1949 | 0.7115 | 0.7458 | -483.3600 | -430.1988 | 0.8487 | 0.8068 |
|
130 |
+
| 0.5828 | 0.88 | 6700 | 0.5486 | -1.4518 | -2.1981 | 0.7100 | 0.7463 | -483.6802 | -430.4771 | 0.8512 | 0.8097 |
|
131 |
+
| 0.5711 | 0.89 | 6800 | 0.5485 | -1.4557 | -2.2030 | 0.7095 | 0.7473 | -484.1660 | -430.8610 | 0.8538 | 0.8124 |
|
132 |
+
| 0.5621 | 0.9 | 6900 | 0.5484 | -1.4557 | -2.2035 | 0.7125 | 0.7478 | -484.2229 | -430.8625 | 0.8535 | 0.8119 |
|
133 |
+
| 0.5093 | 0.92 | 7000 | 0.5485 | -1.4555 | -2.2030 | 0.7095 | 0.7475 | -484.1658 | -430.8411 | 0.8539 | 0.8128 |
|
134 |
+
| 0.4665 | 0.93 | 7100 | 0.5485 | -1.4561 | -2.2038 | 0.7100 | 0.7477 | -484.2509 | -430.9035 | 0.8539 | 0.8128 |
|
135 |
+
| 0.6276 | 0.94 | 7200 | 0.5486 | -1.4556 | -2.2033 | 0.7110 | 0.7476 | -484.1955 | -430.8554 | 0.8539 | 0.8130 |
|
136 |
+
| 0.457 | 0.96 | 7300 | 0.5486 | -1.4547 | -2.2022 | 0.7110 | 0.7475 | -484.0942 | -430.7640 | 0.8540 | 0.8129 |
|
137 |
+
| 0.5436 | 0.97 | 7400 | 0.5486 | -1.4557 | -2.2035 | 0.7130 | 0.7478 | -484.2209 | -430.8634 | 0.8541 | 0.8130 |
|
138 |
+
| 0.4801 | 0.98 | 7500 | 0.5486 | -1.4555 | -2.2033 | 0.7125 | 0.7478 | -484.1994 | -430.8404 | 0.8538 | 0.8125 |
|
139 |
+
| 0.5922 | 0.99 | 7600 | 0.5486 | -1.4555 | -2.2032 | 0.7100 | 0.7477 | -484.1860 | -430.8414 | 0.8537 | 0.8124 |
|
140 |
+
|
141 |
+
|
142 |
+
### Framework versions
|
143 |
+
|
144 |
+
- PEFT 0.7.1
|
145 |
+
- Transformers 4.36.2
|
146 |
+
- Pytorch 2.1.2
|
147 |
+
- Datasets 2.14.6
|
148 |
+
- Tokenizers 0.15.0
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83946192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80b5f6441687821f93d5ba22ca2484f8cc929fbcc2393ffea853f0205add4b81
|
3 |
size 83946192
|
all_results.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": 0.8123594522476196,
|
4 |
+
"eval_logits/rejected": 0.8536277413368225,
|
5 |
+
"eval_logps/chosen": -430.860595703125,
|
6 |
+
"eval_logps/rejected": -484.1859436035156,
|
7 |
+
"eval_loss": 0.5485682487487793,
|
8 |
+
"eval_rewards/accuracies": 0.7089999914169312,
|
9 |
+
"eval_rewards/chosen": -1.4556782245635986,
|
10 |
+
"eval_rewards/margins": 0.7474843859672546,
|
11 |
+
"eval_rewards/rejected": -2.203162670135498,
|
12 |
+
"eval_runtime": 1172.6703,
|
13 |
+
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 1.706,
|
15 |
+
"eval_steps_per_second": 0.853,
|
16 |
+
"train_loss": 0.5756704107174588,
|
17 |
+
"train_runtime": 163576.2451,
|
18 |
+
"train_samples": 61135,
|
19 |
+
"train_samples_per_second": 0.374,
|
20 |
+
"train_steps_per_second": 0.047
|
21 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": 0.8123594522476196,
|
4 |
+
"eval_logits/rejected": 0.8536277413368225,
|
5 |
+
"eval_logps/chosen": -430.860595703125,
|
6 |
+
"eval_logps/rejected": -484.1859436035156,
|
7 |
+
"eval_loss": 0.5485682487487793,
|
8 |
+
"eval_rewards/accuracies": 0.7089999914169312,
|
9 |
+
"eval_rewards/chosen": -1.4556782245635986,
|
10 |
+
"eval_rewards/margins": 0.7474843859672546,
|
11 |
+
"eval_rewards/rejected": -2.203162670135498,
|
12 |
+
"eval_runtime": 1172.6703,
|
13 |
+
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 1.706,
|
15 |
+
"eval_steps_per_second": 0.853
|
16 |
+
}
|
runs/Feb08_09-39-21_7dec04cc21c9/events.out.tfevents.1707385209.7dec04cc21c9.25454.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44bc68a1f3e30c69816f872c6f6187bc7fa704767af023a90812583278854015
|
3 |
+
size 546412
|
runs/Feb08_09-39-21_7dec04cc21c9/events.out.tfevents.1707549958.7dec04cc21c9.25454.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9956dfe56c7897fd3da5a93efdd288471fd8b4d089034d71a6217a77e2736119
|
3 |
+
size 828
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.5756704107174588,
|
4 |
+
"train_runtime": 163576.2451,
|
5 |
+
"train_samples": 61135,
|
6 |
+
"train_samples_per_second": 0.374,
|
7 |
+
"train_steps_per_second": 0.047
|
8 |
+
}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|