Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
license: apache-2.0
|
4 |
+
base_model: Qwen/Qwen2.5-0.5B-Instruct
|
5 |
+
tags:
|
6 |
+
- generated_from_trainer
|
7 |
+
datasets:
|
8 |
+
- PJMixers-Dev/allura-org_gryphe-sonnet-3.5-charcards-names-added-qwq-all-aphrodite
|
9 |
+
- PJMixers-Dev/anthracite-org_c2_logs_32k_llama3_qwen2_v1.3-qwq-all-aphrodite
|
10 |
+
- PJMixers-Dev/grimulkan_aicg-logs-augmented-system-qwq-all-aphrodite
|
11 |
+
- PJMixers-Dev/grimulkan_jannie-log-augmented-system-qwq-all-aphrodite
|
12 |
+
- PJMixers-Dev/grimulkan_PIPPA-augmented-dedup-system-qwq-all-aphrodite
|
13 |
+
- PJMixers-Dev/lemonilia_LimaRP-Only-NonSus-Simple-CustomShareGPT-qwq-all-aphrodite
|
14 |
+
- PJMixers-Dev/MinervaAI_Aesir-Preview-Anon-qwq-all-aphrodite
|
15 |
+
- PJMixers-Dev/NyxKrage_chub-logs-sharegpt-longest-CustomShareGPT-qwq-all-aphrodite
|
16 |
+
model-index:
|
17 |
+
- name: Outputs/Qwen2.5-QwQ-RP-Draft-v0.1-0.5B-LoRA-run10
|
18 |
+
results: []
|
19 |
+
---
|
20 |
+
|
21 |
+

|
22 |
+
|
23 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
24 |
+
should probably proofread and complete it, then remove this comment. -->
|
25 |
+
|
26 |
+
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
27 |
+
<details><summary>See axolotl config</summary>
|
28 |
+
|
29 |
+
axolotl version: `0.6.0`
|
30 |
+
```yaml
|
31 |
+
# Weights and Biases logging config
|
32 |
+
wandb_project: Qwen2.5-QwQ-RP-Draft-0.5B
|
33 |
+
wandb_entity:
|
34 |
+
wandb_watch:
|
35 |
+
wandb_name: Qwen2.5-QwQ-RP-Draft-v0.1-0.5B-LoRA-run10
|
36 |
+
wandb_log_model:
|
37 |
+
|
38 |
+
# Model checkpointing config
|
39 |
+
output_dir: ./Outputs/Qwen2.5-QwQ-RP-Draft-v0.1-0.5B-LoRA-run10
|
40 |
+
resume_from_checkpoint:
|
41 |
+
save_steps: 10
|
42 |
+
save_safetensors: true
|
43 |
+
save_total_limit: 3
|
44 |
+
save_only_model: false
|
45 |
+
|
46 |
+
# Model architecture config
|
47 |
+
base_model: Qwen/Qwen2.5-0.5B-Instruct
|
48 |
+
model_type: AutoModelForCausalLM
|
49 |
+
tokenizer_type: AutoTokenizer
|
50 |
+
|
51 |
+
# Mixed precision training config
|
52 |
+
bf16: true
|
53 |
+
fp16: false
|
54 |
+
tf32: false
|
55 |
+
|
56 |
+
# Model loading config
|
57 |
+
load_in_8bit: false
|
58 |
+
load_in_4bit: false
|
59 |
+
strict: false
|
60 |
+
|
61 |
+
# Sequence config
|
62 |
+
sequence_len: 8192
|
63 |
+
min_sample_len: 256
|
64 |
+
sample_packing: true
|
65 |
+
eval_sample_packing: true
|
66 |
+
pad_to_sequence_len: true
|
67 |
+
train_on_inputs: false
|
68 |
+
group_by_length: false
|
69 |
+
|
70 |
+
# LoRA adapter config
|
71 |
+
adapter: lora
|
72 |
+
lora_model_dir:
|
73 |
+
lora_r: 128
|
74 |
+
lora_alpha: 128
|
75 |
+
lora_dropout: 0.125
|
76 |
+
peft_layers_to_transform:
|
77 |
+
peft_use_dora:
|
78 |
+
peft_use_rslora:
|
79 |
+
peft_layer_replication:
|
80 |
+
lora_target_modules:
|
81 |
+
- gate_proj
|
82 |
+
- down_proj
|
83 |
+
- up_proj
|
84 |
+
- q_proj
|
85 |
+
- v_proj
|
86 |
+
- k_proj
|
87 |
+
- o_proj
|
88 |
+
lora_modules_to_save:
|
89 |
+
|
90 |
+
# Fix uninitialized tokens (such as <|start_header_id|> on the base L3 models)
|
91 |
+
fix_untrained_tokens:
|
92 |
+
|
93 |
+
# Dataset config
|
94 |
+
# RP: https://github.com/xzuyn/axolotl/blob/prompt_formats/src/axolotl/prompt_strategies/customchatml-regex-last-only.py
|
95 |
+
datasets:
|
96 |
+
- path: PJMixers-Dev/allura-org_gryphe-sonnet-3.5-charcards-names-added-qwq-all-aphrodite
|
97 |
+
split: train[128:] # Everything except the first 128
|
98 |
+
type: customchatml-regex-last-only
|
99 |
+
- path: PJMixers-Dev/anthracite-org_c2_logs_32k_llama3_qwen2_v1.3-qwq-all-aphrodite
|
100 |
+
split: train[128:] # Everything except the first 128
|
101 |
+
type: customchatml-regex-last-only
|
102 |
+
- path: PJMixers-Dev/grimulkan_aicg-logs-augmented-system-qwq-all-aphrodite
|
103 |
+
split: train[128:] # Everything except the first 128
|
104 |
+
type: customchatml-regex-last-only
|
105 |
+
- path: PJMixers-Dev/grimulkan_jannie-log-augmented-system-qwq-all-aphrodite
|
106 |
+
split: train[128:] # Everything except the first 128
|
107 |
+
type: customchatml-regex-last-only
|
108 |
+
- path: PJMixers-Dev/grimulkan_PIPPA-augmented-dedup-system-qwq-all-aphrodite
|
109 |
+
split: train[128:] # Everything except the first 128
|
110 |
+
type: customchatml-regex-last-only
|
111 |
+
- path: PJMixers-Dev/lemonilia_LimaRP-Only-NonSus-Simple-CustomShareGPT-qwq-all-aphrodite
|
112 |
+
split: train[128:] # Everything except the first 128
|
113 |
+
type: customchatml-regex-last-only
|
114 |
+
- path: PJMixers-Dev/MinervaAI_Aesir-Preview-Anon-qwq-all-aphrodite
|
115 |
+
split: train[128:] # Everything except the first 128
|
116 |
+
type: customchatml-regex-last-only
|
117 |
+
- path: PJMixers-Dev/NyxKrage_chub-logs-sharegpt-longest-CustomShareGPT-qwq-all-aphrodite
|
118 |
+
split: train[128:] # Everything except the first 128
|
119 |
+
type: customchatml-regex-last-only
|
120 |
+
test_datasets:
|
121 |
+
- path: PJMixers-Dev/allura-org_gryphe-sonnet-3.5-charcards-names-added-qwq-all-aphrodite
|
122 |
+
split: train[:128] # Only the first 128
|
123 |
+
type: customchatml-regex-last-only
|
124 |
+
- path: PJMixers-Dev/anthracite-org_c2_logs_32k_llama3_qwen2_v1.3-qwq-all-aphrodite
|
125 |
+
split: train[:128] # Only the first 128
|
126 |
+
type: customchatml-regex-last-only
|
127 |
+
- path: PJMixers-Dev/grimulkan_aicg-logs-augmented-system-qwq-all-aphrodite
|
128 |
+
split: train[:128] # Only the first 128
|
129 |
+
type: customchatml-regex-last-only
|
130 |
+
- path: PJMixers-Dev/grimulkan_jannie-log-augmented-system-qwq-all-aphrodite
|
131 |
+
split: train[:128] # Only the first 128
|
132 |
+
type: customchatml-regex-last-only
|
133 |
+
- path: PJMixers-Dev/grimulkan_PIPPA-augmented-dedup-system-qwq-all-aphrodite
|
134 |
+
split: train[:128] # Only the first 128
|
135 |
+
type: customchatml-regex-last-only
|
136 |
+
- path: PJMixers-Dev/lemonilia_LimaRP-Only-NonSus-Simple-CustomShareGPT-qwq-all-aphrodite
|
137 |
+
split: train[:128] # Only the first 128
|
138 |
+
type: customchatml-regex-last-only
|
139 |
+
- path: PJMixers-Dev/MinervaAI_Aesir-Preview-Anon-qwq-all-aphrodite
|
140 |
+
split: train[:128] # Only the first 128
|
141 |
+
type: customchatml-regex-last-only
|
142 |
+
- path: PJMixers-Dev/NyxKrage_chub-logs-sharegpt-longest-CustomShareGPT-qwq-all-aphrodite
|
143 |
+
split: train[:128] # Only the first 128
|
144 |
+
type: customchatml-regex-last-only
|
145 |
+
val_set_size: 0
|
146 |
+
eval_strategy: steps
|
147 |
+
eval_steps: 10
|
148 |
+
dataset_prepared_path: ./00-Tokenized-Datasets/Qwen2.5-QwQ-Draft-0.5B-customchatml-regex-newer
|
149 |
+
shuffle_merged_datasets: true
|
150 |
+
dataset_processes:
|
151 |
+
|
152 |
+
# Training hyperparameters
|
153 |
+
num_epochs: 1
|
154 |
+
gradient_accumulation_steps: 1
|
155 |
+
micro_batch_size: 16
|
156 |
+
eval_batch_size: 16
|
157 |
+
warmup_steps: 0
|
158 |
+
optimizer: came_pytorch
|
159 |
+
optim_args:
|
160 |
+
optim_target_modules:
|
161 |
+
lr_scheduler: rex
|
162 |
+
learning_rate: 1e-5
|
163 |
+
cosine_min_lr_ratio:
|
164 |
+
loraplus_lr_ratio:
|
165 |
+
loraplus_lr_embedding:
|
166 |
+
weight_decay: 0.1
|
167 |
+
max_grad_norm: 1
|
168 |
+
logging_steps: 1
|
169 |
+
|
170 |
+
# Model optimization
|
171 |
+
gradient_checkpointing: unsloth
|
172 |
+
sdp_attention: true
|
173 |
+
plugins:
|
174 |
+
- axolotl.integrations.liger.LigerPlugin
|
175 |
+
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
176 |
+
cut_cross_entropy: true
|
177 |
+
liger_rope: true
|
178 |
+
liger_rms_norm: true
|
179 |
+
liger_layer_norm: true
|
180 |
+
liger_glu_activation: true
|
181 |
+
liger_cross_entropy: false
|
182 |
+
liger_fused_linear_cross_entropy: false
|
183 |
+
lora_mlp_kernel: false
|
184 |
+
lora_qkv_kernel: false
|
185 |
+
lora_o_kernel: false
|
186 |
+
|
187 |
+
# DeepSpeed
|
188 |
+
deepspeed:
|
189 |
+
|
190 |
+
# Garbage Collection
|
191 |
+
gc_steps: 1
|
192 |
+
|
193 |
+
# Debug config
|
194 |
+
debug: true
|
195 |
+
seed: 42
|
196 |
+
|
197 |
+
# Token config
|
198 |
+
special_tokens:
|
199 |
+
eos_token: "<|endoftext|>"
|
200 |
+
pad_token: "<|endoftext|>"
|
201 |
+
tokens:
|
202 |
+
|
203 |
+
```
|
204 |
+
|
205 |
+
</details><br>
|
206 |
+
|
207 |
+
# Outputs/Qwen2.5-QwQ-RP-Draft-v0.1-0.5B-LoRA-run10
|
208 |
+
|
209 |
+
This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on the PJMixers-Dev/allura-org_gryphe-sonnet-3.5-charcards-names-added-qwq-all-aphrodite, the PJMixers-Dev/anthracite-org_c2_logs_32k_llama3_qwen2_v1.3-qwq-all-aphrodite, the PJMixers-Dev/grimulkan_aicg-logs-augmented-system-qwq-all-aphrodite, the PJMixers-Dev/grimulkan_jannie-log-augmented-system-qwq-all-aphrodite, the PJMixers-Dev/grimulkan_PIPPA-augmented-dedup-system-qwq-all-aphrodite, the PJMixers-Dev/lemonilia_LimaRP-Only-NonSus-Simple-CustomShareGPT-qwq-all-aphrodite, the PJMixers-Dev/MinervaAI_Aesir-Preview-Anon-qwq-all-aphrodite and the PJMixers-Dev/NyxKrage_chub-logs-sharegpt-longest-CustomShareGPT-qwq-all-aphrodite datasets.
|
210 |
+
It achieves the following results on the evaluation set:
|
211 |
+
- Loss: 1.9716
|
212 |
+
|
213 |
+
## Model description
|
214 |
+
|
215 |
+
More information needed
|
216 |
+
|
217 |
+
## Intended uses & limitations
|
218 |
+
|
219 |
+
More information needed
|
220 |
+
|
221 |
+
## Training and evaluation data
|
222 |
+
|
223 |
+
More information needed
|
224 |
+
|
225 |
+
## Training procedure
|
226 |
+
|
227 |
+
### Training hyperparameters
|
228 |
+
|
229 |
+
The following hyperparameters were used during training:
|
230 |
+
- learning_rate: 1e-05
|
231 |
+
- train_batch_size: 16
|
232 |
+
- eval_batch_size: 16
|
233 |
+
- seed: 42
|
234 |
+
- optimizer: Use OptimizerNames.ADAMW_HF with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
235 |
+
- lr_scheduler_type: cosine
|
236 |
+
- num_epochs: 1.0
|
237 |
+
|
238 |
+
### Training results
|
239 |
+
|
240 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
241 |
+
|:-------------:|:------:|:----:|:---------------:|
|
242 |
+
| 3.4865 | 0.0010 | 1 | 3.2134 |
|
243 |
+
| 2.481 | 0.0102 | 10 | 2.5552 |
|
244 |
+
| 2.2964 | 0.0205 | 20 | 2.4036 |
|
245 |
+
| 2.3048 | 0.0307 | 30 | 2.3367 |
|
246 |
+
| 2.2577 | 0.0409 | 40 | 2.2934 |
|
247 |
+
| 2.2298 | 0.0512 | 50 | 2.2601 |
|
248 |
+
| 2.1367 | 0.0614 | 60 | 2.2385 |
|
249 |
+
| 2.1512 | 0.0716 | 70 | 2.2166 |
|
250 |
+
| 2.1703 | 0.0819 | 80 | 2.2022 |
|
251 |
+
| 2.1263 | 0.0921 | 90 | 2.1883 |
|
252 |
+
| 2.2121 | 0.1024 | 100 | 2.1750 |
|
253 |
+
| 2.1741 | 0.1126 | 110 | 2.1633 |
|
254 |
+
| 2.1621 | 0.1228 | 120 | 2.1547 |
|
255 |
+
| 2.0664 | 0.1331 | 130 | 2.1456 |
|
256 |
+
| 2.1005 | 0.1433 | 140 | 2.1374 |
|
257 |
+
| 2.0822 | 0.1535 | 150 | 2.1315 |
|
258 |
+
| 2.0856 | 0.1638 | 160 | 2.1252 |
|
259 |
+
| 2.1386 | 0.1740 | 170 | 2.1182 |
|
260 |
+
| 2.0756 | 0.1842 | 180 | 2.1134 |
|
261 |
+
| 2.0492 | 0.1945 | 190 | 2.1066 |
|
262 |
+
| 1.9882 | 0.2047 | 200 | 2.1024 |
|
263 |
+
| 2.036 | 0.2149 | 210 | 2.0970 |
|
264 |
+
| 2.1313 | 0.2252 | 220 | 2.0940 |
|
265 |
+
| 2.0356 | 0.2354 | 230 | 2.0897 |
|
266 |
+
| 2.0278 | 0.2456 | 240 | 2.0869 |
|
267 |
+
| 2.0754 | 0.2559 | 250 | 2.0825 |
|
268 |
+
| 2.0582 | 0.2661 | 260 | 2.0784 |
|
269 |
+
| 2.0588 | 0.2764 | 270 | 2.0758 |
|
270 |
+
| 1.9757 | 0.2866 | 280 | 2.0723 |
|
271 |
+
| 2.0619 | 0.2968 | 290 | 2.0700 |
|
272 |
+
| 1.956 | 0.3071 | 300 | 2.0684 |
|
273 |
+
| 2.065 | 0.3173 | 310 | 2.0642 |
|
274 |
+
| 1.982 | 0.3275 | 320 | 2.0604 |
|
275 |
+
| 2.0424 | 0.3378 | 330 | 2.0577 |
|
276 |
+
| 2.0635 | 0.3480 | 340 | 2.0553 |
|
277 |
+
| 1.9895 | 0.3582 | 350 | 2.0518 |
|
278 |
+
| 2.0296 | 0.3685 | 360 | 2.0496 |
|
279 |
+
| 2.0231 | 0.3787 | 370 | 2.0472 |
|
280 |
+
| 1.9422 | 0.3889 | 380 | 2.0459 |
|
281 |
+
| 2.0214 | 0.3992 | 390 | 2.0427 |
|
282 |
+
| 2.0107 | 0.4094 | 400 | 2.0401 |
|
283 |
+
| 2.0307 | 0.4197 | 410 | 2.0371 |
|
284 |
+
| 1.9874 | 0.4299 | 420 | 2.0356 |
|
285 |
+
| 2.0249 | 0.4401 | 430 | 2.0331 |
|
286 |
+
| 2.0947 | 0.4504 | 440 | 2.0314 |
|
287 |
+
| 1.9644 | 0.4606 | 450 | 2.0291 |
|
288 |
+
| 2.0633 | 0.4708 | 460 | 2.0271 |
|
289 |
+
| 2.0438 | 0.4811 | 470 | 2.0255 |
|
290 |
+
| 2.0227 | 0.4913 | 480 | 2.0239 |
|
291 |
+
| 2.0023 | 0.5015 | 490 | 2.0208 |
|
292 |
+
| 2.0231 | 0.5118 | 500 | 2.0193 |
|
293 |
+
| 1.9659 | 0.5220 | 510 | 2.0179 |
|
294 |
+
| 1.9382 | 0.5322 | 520 | 2.0171 |
|
295 |
+
| 1.9959 | 0.5425 | 530 | 2.0157 |
|
296 |
+
| 1.9835 | 0.5527 | 540 | 2.0139 |
|
297 |
+
| 1.942 | 0.5629 | 550 | 2.0124 |
|
298 |
+
| 2.0036 | 0.5732 | 560 | 2.0109 |
|
299 |
+
| 2.023 | 0.5834 | 570 | 2.0100 |
|
300 |
+
| 1.9686 | 0.5937 | 580 | 2.0078 |
|
301 |
+
| 1.9867 | 0.6039 | 590 | 2.0070 |
|
302 |
+
| 1.9662 | 0.6141 | 600 | 2.0060 |
|
303 |
+
| 1.968 | 0.6244 | 610 | 2.0045 |
|
304 |
+
| 1.9435 | 0.6346 | 620 | 2.0035 |
|
305 |
+
| 1.9245 | 0.6448 | 630 | 2.0024 |
|
306 |
+
| 1.9573 | 0.6551 | 640 | 2.0007 |
|
307 |
+
| 1.9466 | 0.6653 | 650 | 1.9994 |
|
308 |
+
| 2.0202 | 0.6755 | 660 | 1.9976 |
|
309 |
+
| 1.891 | 0.6858 | 670 | 1.9965 |
|
310 |
+
| 2.0134 | 0.6960 | 680 | 1.9980 |
|
311 |
+
| 1.9276 | 0.7062 | 690 | 1.9958 |
|
312 |
+
| 1.9266 | 0.7165 | 700 | 1.9949 |
|
313 |
+
| 1.8661 | 0.7267 | 710 | 1.9932 |
|
314 |
+
| 1.9446 | 0.7369 | 720 | 1.9923 |
|
315 |
+
| 1.8605 | 0.7472 | 730 | 1.9908 |
|
316 |
+
| 1.9426 | 0.7574 | 740 | 1.9906 |
|
317 |
+
| 1.9806 | 0.7677 | 750 | 1.9893 |
|
318 |
+
| 1.9268 | 0.7779 | 760 | 1.9880 |
|
319 |
+
| 1.987 | 0.7881 | 770 | 1.9870 |
|
320 |
+
| 1.9182 | 0.7984 | 780 | 1.9866 |
|
321 |
+
| 2.0103 | 0.8086 | 790 | 1.9853 |
|
322 |
+
| 1.9153 | 0.8188 | 800 | 1.9839 |
|
323 |
+
| 2.0043 | 0.8291 | 810 | 1.9830 |
|
324 |
+
| 1.9791 | 0.8393 | 820 | 1.9819 |
|
325 |
+
| 1.912 | 0.8495 | 830 | 1.9811 |
|
326 |
+
| 1.9288 | 0.8598 | 840 | 1.9808 |
|
327 |
+
| 1.9613 | 0.8700 | 850 | 1.9796 |
|
328 |
+
| 1.9767 | 0.8802 | 860 | 1.9783 |
|
329 |
+
| 1.9097 | 0.8905 | 870 | 1.9783 |
|
330 |
+
| 1.9727 | 0.9007 | 880 | 1.9773 |
|
331 |
+
| 1.9432 | 0.9110 | 890 | 1.9763 |
|
332 |
+
| 1.9109 | 0.9212 | 900 | 1.9754 |
|
333 |
+
| 1.9184 | 0.9314 | 910 | 1.9749 |
|
334 |
+
| 1.9179 | 0.9417 | 920 | 1.9744 |
|
335 |
+
| 1.9812 | 0.9519 | 930 | 1.9735 |
|
336 |
+
| 1.9695 | 0.9621 | 940 | 1.9727 |
|
337 |
+
| 1.9474 | 0.9724 | 950 | 1.9727 |
|
338 |
+
| 1.8376 | 0.9826 | 960 | 1.9721 |
|
339 |
+
| 1.8961 | 0.9928 | 970 | 1.9716 |
|
340 |
+
|
341 |
+
|
342 |
+
### Framework versions
|
343 |
+
|
344 |
+
- PEFT 0.14.0
|
345 |
+
- Transformers 4.50.0.dev0
|
346 |
+
- Pytorch 2.7.0.dev20250224+rocm6.3
|
347 |
+
- Datasets 3.3.1
|
348 |
+
- Tokenizers 0.21.0
|