Model save
Browse files- README.md +5 -8
- all_results.json +5 -5
- config.json +1 -1
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- runs/Jun14_18-07-43_action-graph-trainer/events.out.tfevents.1718388492.action-graph-trainer.695665.0 +3 -0
- train_results.json +5 -5
- trainer_state.json +25 -25
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,15 +2,12 @@
|
|
2 |
license: apache-2.0
|
3 |
base_model: Qwen/Qwen2-7B
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
-
- trl
|
7 |
-
- sft
|
8 |
-
- generated_from_trainer
|
9 |
- trl
|
10 |
- sft
|
|
|
11 |
- generated_from_trainer
|
12 |
datasets:
|
13 |
-
-
|
14 |
model-index:
|
15 |
- name: zephyr-qwen2-7b-sft
|
16 |
results: []
|
@@ -21,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
21 |
|
22 |
# zephyr-qwen2-7b-sft
|
23 |
|
24 |
-
This model is a fine-tuned version of [Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) on the
|
25 |
It achieves the following results on the evaluation set:
|
26 |
-
- Loss: 1.
|
27 |
|
28 |
## Model description
|
29 |
|
@@ -60,7 +57,7 @@ The following hyperparameters were used during training:
|
|
60 |
|
61 |
| Training Loss | Epoch | Step | Validation Loss |
|
62 |
|:-------------:|:-----:|:----:|:---------------:|
|
63 |
-
| 1.
|
64 |
|
65 |
|
66 |
### Framework versions
|
|
|
2 |
license: apache-2.0
|
3 |
base_model: Qwen/Qwen2-7B
|
4 |
tags:
|
|
|
|
|
|
|
|
|
5 |
- trl
|
6 |
- sft
|
7 |
+
- alignment-handbook
|
8 |
- generated_from_trainer
|
9 |
datasets:
|
10 |
+
- generator
|
11 |
model-index:
|
12 |
- name: zephyr-qwen2-7b-sft
|
13 |
results: []
|
|
|
18 |
|
19 |
# zephyr-qwen2-7b-sft
|
20 |
|
21 |
+
This model is a fine-tuned version of [Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) on the generator dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 1.0645
|
24 |
|
25 |
## Model description
|
26 |
|
|
|
57 |
|
58 |
| Training Loss | Epoch | Step | Validation Loss |
|
59 |
|:-------------:|:-----:|:----:|:---------------:|
|
60 |
+
| 1.0627 | 1.0 | 956 | 1.0645 |
|
61 |
|
62 |
|
63 |
### Framework versions
|
all_results.json
CHANGED
@@ -5,10 +5,10 @@
|
|
5 |
"eval_samples": 23109,
|
6 |
"eval_samples_per_second": 47.288,
|
7 |
"eval_steps_per_second": 0.74,
|
8 |
-
"total_flos":
|
9 |
-
"train_loss":
|
10 |
-
"train_runtime":
|
11 |
"train_samples": 207864,
|
12 |
-
"train_samples_per_second":
|
13 |
-
"train_steps_per_second":
|
14 |
}
|
|
|
5 |
"eval_samples": 23109,
|
6 |
"eval_samples_per_second": 47.288,
|
7 |
"eval_steps_per_second": 0.74,
|
8 |
+
"total_flos": 500662995517440.0,
|
9 |
+
"train_loss": 0.06220405869902926,
|
10 |
+
"train_runtime": 877.8841,
|
11 |
"train_samples": 207864,
|
12 |
+
"train_samples_per_second": 139.358,
|
13 |
+
"train_steps_per_second": 1.089
|
14 |
}
|
config.json
CHANGED
@@ -22,7 +22,7 @@
|
|
22 |
"tie_word_embeddings": false,
|
23 |
"torch_dtype": "bfloat16",
|
24 |
"transformers_version": "4.40.2",
|
25 |
-
"use_cache":
|
26 |
"use_sliding_window": false,
|
27 |
"vocab_size": 152064
|
28 |
}
|
|
|
22 |
"tie_word_embeddings": false,
|
23 |
"torch_dtype": "bfloat16",
|
24 |
"transformers_version": "4.40.2",
|
25 |
+
"use_cache": false,
|
26 |
"use_sliding_window": false,
|
27 |
"vocab_size": 152064
|
28 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5346f7673f73f551aaaa605516577660e1eeedcc29154ded68a8a39e1bf72c4c
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b42de9be5847138b7967316f3dad2efd4db71d2dc2042256575769e7883a189a
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90e5f201a3835937500f20e67d4c791a7124c5a060229fc1f92b17ebf3fda4b2
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:634e5128c71a098d1f1cad9837cc743ae34a83706e15c9cb9df0ac5d7fc76820
|
3 |
size 1089994880
|
runs/Jun14_18-07-43_action-graph-trainer/events.out.tfevents.1718388492.action-graph-trainer.695665.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c86dcc545bc64c7e85477584418e0651f4252ace1b3894df3f4e8f5359dddf47
|
3 |
+
size 7807
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"total_flos":
|
4 |
-
"train_loss":
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 207864,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second":
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"total_flos": 500662995517440.0,
|
4 |
+
"train_loss": 0.06220405869902926,
|
5 |
+
"train_runtime": 877.8841,
|
6 |
"train_samples": 207864,
|
7 |
+
"train_samples_per_second": 139.358,
|
8 |
+
"train_steps_per_second": 1.089
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1277,97 +1277,97 @@
|
|
1277 |
},
|
1278 |
{
|
1279 |
"epoch": 0.946652719665272,
|
1280 |
-
"grad_norm": 0.
|
1281 |
"learning_rate": 1.730440504639408e-07,
|
1282 |
"loss": 1.058,
|
1283 |
"step": 905
|
1284 |
},
|
1285 |
{
|
1286 |
"epoch": 0.9518828451882845,
|
1287 |
-
"grad_norm": 0.
|
1288 |
"learning_rate": 1.408530770781813e-07,
|
1289 |
"loss": 1.0526,
|
1290 |
"step": 910
|
1291 |
},
|
1292 |
{
|
1293 |
"epoch": 0.9571129707112971,
|
1294 |
-
"grad_norm": 0.
|
1295 |
"learning_rate": 1.1195115097079268e-07,
|
1296 |
-
"loss": 1.
|
1297 |
"step": 915
|
1298 |
},
|
1299 |
{
|
1300 |
"epoch": 0.9623430962343096,
|
1301 |
-
"grad_norm": 0.
|
1302 |
"learning_rate": 8.634791392946429e-08,
|
1303 |
"loss": 1.0676,
|
1304 |
"step": 920
|
1305 |
},
|
1306 |
{
|
1307 |
"epoch": 0.9675732217573222,
|
1308 |
-
"grad_norm": 0.
|
1309 |
"learning_rate": 6.405190728721033e-08,
|
1310 |
"loss": 1.0455,
|
1311 |
"step": 925
|
1312 |
},
|
1313 |
{
|
1314 |
"epoch": 0.9728033472803347,
|
1315 |
-
"grad_norm": 0.
|
1316 |
"learning_rate": 4.5070569072952485e-08,
|
1317 |
-
"loss": 1.
|
1318 |
"step": 930
|
1319 |
},
|
1320 |
{
|
1321 |
"epoch": 0.9780334728033473,
|
1322 |
-
"grad_norm": 0.
|
1323 |
"learning_rate": 2.9410231530168087e-08,
|
1324 |
-
"loss": 1.
|
1325 |
"step": 935
|
1326 |
},
|
1327 |
{
|
1328 |
"epoch": 0.9832635983263598,
|
1329 |
-
"grad_norm": 0.
|
1330 |
"learning_rate": 1.7076119004429958e-08,
|
1331 |
"loss": 1.0763,
|
1332 |
"step": 940
|
1333 |
},
|
1334 |
{
|
1335 |
"epoch": 0.9884937238493724,
|
1336 |
-
"grad_norm": 0.
|
1337 |
"learning_rate": 8.072346200544979e-09,
|
1338 |
"loss": 1.0672,
|
1339 |
"step": 945
|
1340 |
},
|
1341 |
{
|
1342 |
"epoch": 0.9937238493723849,
|
1343 |
-
"grad_norm": 0.
|
1344 |
"learning_rate": 2.401916809872118e-09,
|
1345 |
"loss": 1.0629,
|
1346 |
"step": 950
|
1347 |
},
|
1348 |
{
|
1349 |
"epoch": 0.9989539748953975,
|
1350 |
-
"grad_norm": 0.
|
1351 |
"learning_rate": 6.672250828620996e-11,
|
1352 |
-
"loss": 1.
|
1353 |
"step": 955
|
1354 |
},
|
1355 |
{
|
1356 |
"epoch": 1.0,
|
1357 |
-
"eval_loss": 1.
|
1358 |
-
"eval_runtime":
|
1359 |
-
"eval_samples_per_second": 47.
|
1360 |
-
"eval_steps_per_second": 0.
|
1361 |
"step": 956
|
1362 |
},
|
1363 |
{
|
1364 |
"epoch": 1.0,
|
1365 |
"step": 956,
|
1366 |
-
"total_flos":
|
1367 |
-
"train_loss":
|
1368 |
-
"train_runtime":
|
1369 |
-
"train_samples_per_second":
|
1370 |
-
"train_steps_per_second":
|
1371 |
}
|
1372 |
],
|
1373 |
"logging_steps": 5,
|
@@ -1375,7 +1375,7 @@
|
|
1375 |
"num_input_tokens_seen": 0,
|
1376 |
"num_train_epochs": 1,
|
1377 |
"save_steps": 100,
|
1378 |
-
"total_flos":
|
1379 |
"train_batch_size": 8,
|
1380 |
"trial_name": null,
|
1381 |
"trial_params": null
|
|
|
1277 |
},
|
1278 |
{
|
1279 |
"epoch": 0.946652719665272,
|
1280 |
+
"grad_norm": 0.4832426848529184,
|
1281 |
"learning_rate": 1.730440504639408e-07,
|
1282 |
"loss": 1.058,
|
1283 |
"step": 905
|
1284 |
},
|
1285 |
{
|
1286 |
"epoch": 0.9518828451882845,
|
1287 |
+
"grad_norm": 0.48799198233407015,
|
1288 |
"learning_rate": 1.408530770781813e-07,
|
1289 |
"loss": 1.0526,
|
1290 |
"step": 910
|
1291 |
},
|
1292 |
{
|
1293 |
"epoch": 0.9571129707112971,
|
1294 |
+
"grad_norm": 0.4969562812336113,
|
1295 |
"learning_rate": 1.1195115097079268e-07,
|
1296 |
+
"loss": 1.0684,
|
1297 |
"step": 915
|
1298 |
},
|
1299 |
{
|
1300 |
"epoch": 0.9623430962343096,
|
1301 |
+
"grad_norm": 0.4892866844912397,
|
1302 |
"learning_rate": 8.634791392946429e-08,
|
1303 |
"loss": 1.0676,
|
1304 |
"step": 920
|
1305 |
},
|
1306 |
{
|
1307 |
"epoch": 0.9675732217573222,
|
1308 |
+
"grad_norm": 0.49055262840153824,
|
1309 |
"learning_rate": 6.405190728721033e-08,
|
1310 |
"loss": 1.0455,
|
1311 |
"step": 925
|
1312 |
},
|
1313 |
{
|
1314 |
"epoch": 0.9728033472803347,
|
1315 |
+
"grad_norm": 0.47689555635255854,
|
1316 |
"learning_rate": 4.5070569072952485e-08,
|
1317 |
+
"loss": 1.0609,
|
1318 |
"step": 930
|
1319 |
},
|
1320 |
{
|
1321 |
"epoch": 0.9780334728033473,
|
1322 |
+
"grad_norm": 0.4955325802322405,
|
1323 |
"learning_rate": 2.9410231530168087e-08,
|
1324 |
+
"loss": 1.0498,
|
1325 |
"step": 935
|
1326 |
},
|
1327 |
{
|
1328 |
"epoch": 0.9832635983263598,
|
1329 |
+
"grad_norm": 0.47274550178714503,
|
1330 |
"learning_rate": 1.7076119004429958e-08,
|
1331 |
"loss": 1.0763,
|
1332 |
"step": 940
|
1333 |
},
|
1334 |
{
|
1335 |
"epoch": 0.9884937238493724,
|
1336 |
+
"grad_norm": 0.49350697124044746,
|
1337 |
"learning_rate": 8.072346200544979e-09,
|
1338 |
"loss": 1.0672,
|
1339 |
"step": 945
|
1340 |
},
|
1341 |
{
|
1342 |
"epoch": 0.9937238493723849,
|
1343 |
+
"grad_norm": 0.4887853656062252,
|
1344 |
"learning_rate": 2.401916809872118e-09,
|
1345 |
"loss": 1.0629,
|
1346 |
"step": 950
|
1347 |
},
|
1348 |
{
|
1349 |
"epoch": 0.9989539748953975,
|
1350 |
+
"grad_norm": 0.4764956168422736,
|
1351 |
"learning_rate": 6.672250828620996e-11,
|
1352 |
+
"loss": 1.0627,
|
1353 |
"step": 955
|
1354 |
},
|
1355 |
{
|
1356 |
"epoch": 1.0,
|
1357 |
+
"eval_loss": 1.0645456314086914,
|
1358 |
+
"eval_runtime": 285.3875,
|
1359 |
+
"eval_samples_per_second": 47.441,
|
1360 |
+
"eval_steps_per_second": 0.743,
|
1361 |
"step": 956
|
1362 |
},
|
1363 |
{
|
1364 |
"epoch": 1.0,
|
1365 |
"step": 956,
|
1366 |
+
"total_flos": 500662995517440.0,
|
1367 |
+
"train_loss": 0.06220405869902926,
|
1368 |
+
"train_runtime": 877.8841,
|
1369 |
+
"train_samples_per_second": 139.358,
|
1370 |
+
"train_steps_per_second": 1.089
|
1371 |
}
|
1372 |
],
|
1373 |
"logging_steps": 5,
|
|
|
1375 |
"num_input_tokens_seen": 0,
|
1376 |
"num_train_epochs": 1,
|
1377 |
"save_steps": 100,
|
1378 |
+
"total_flos": 500662995517440.0,
|
1379 |
"train_batch_size": 8,
|
1380 |
"trial_name": null,
|
1381 |
"trial_params": null
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a961b2dc9433696ebda3812a203e258f2b437c5b7c7dd434f0409fcfebe52fe
|
3 |
size 6200
|