Skyler215 commited on
Commit
fe947f5
·
verified ·
1 Parent(s): 4ac5dbc

Skyler215/Finetune2

Browse files
README.md CHANGED
@@ -16,11 +16,11 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 3.1590
20
- - Rouge1: 0.3875
21
- - Rouge2: 0.1212
22
- - Rougel: 0.3156
23
- - Rougelsum: 0.3166
24
 
25
  ## Model description
26
 
@@ -46,23 +46,28 @@ The following hyperparameters were used during training:
46
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_steps: 1024
49
- - num_epochs: 10
50
  - mixed_precision_training: Native AMP
51
 
52
  ### Training results
53
 
54
- | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
55
- |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|
56
- | No log | 1.0 | 13 | 4.3037 | 0.3448 | 0.0437 | 0.2181 | 0.2186 |
57
- | No log | 2.0 | 26 | 4.2507 | 0.3448 | 0.0437 | 0.2181 | 0.2186 |
58
- | No log | 3.0 | 39 | 4.1702 | 0.3448 | 0.0437 | 0.2181 | 0.2186 |
59
- | No log | 4.0 | 52 | 4.0673 | 0.3448 | 0.0437 | 0.2181 | 0.2186 |
60
- | No log | 5.0 | 65 | 3.9448 | 0.3643 | 0.0496 | 0.2480 | 0.2481 |
61
- | No log | 6.0 | 78 | 3.8053 | 0.3653 | 0.0499 | 0.2464 | 0.2466 |
62
- | No log | 7.0 | 91 | 3.6485 | 0.3653 | 0.0499 | 0.2464 | 0.2466 |
63
- | No log | 8.0 | 104 | 3.4774 | 0.4061 | 0.0678 | 0.2583 | 0.2586 |
64
- | No log | 9.0 | 117 | 3.3057 | 0.3700 | 0.0443 | 0.2441 | 0.2448 |
65
- | No log | 10.0 | 130 | 3.1590 | 0.3875 | 0.1212 | 0.3156 | 0.3166 |
 
 
 
 
 
66
 
67
 
68
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 2.0461
20
+ - Rouge1: 0.4850
21
+ - Rouge2: 0.2566
22
+ - Rougel: 0.3589
23
+ - Rougelsum: 0.3595
24
 
25
  ## Model description
26
 
 
46
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_steps: 1024
49
+ - num_epochs: 15
50
  - mixed_precision_training: Native AMP
51
 
52
  ### Training results
53
 
54
+ | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
55
+ |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|:------:|:---------:|
56
+ | 2.4042 | 1.0 | 1828 | 1.7451 | 0.4622 | 0.1906 | 0.3370 | 0.3422 |
57
+ | 1.5875 | 2.0 | 3656 | 1.5933 | 0.4599 | 0.2060 | 0.3451 | 0.3472 |
58
+ | 1.3882 | 3.0 | 5484 | 1.5322 | 0.4606 | 0.2082 | 0.3422 | 0.3442 |
59
+ | 1.2415 | 4.0 | 7312 | 1.5130 | 0.4687 | 0.2208 | 0.3458 | 0.3476 |
60
+ | 1.1113 | 5.0 | 9140 | 1.5186 | 0.4630 | 0.2146 | 0.3398 | 0.3402 |
61
+ | 0.9671 | 6.0 | 10968 | 1.5683 | 0.4720 | 0.2290 | 0.3517 | 0.3520 |
62
+ | 0.8528 | 7.0 | 12796 | 1.6352 | 0.4704 | 0.2281 | 0.3491 | 0.3496 |
63
+ | 0.7555 | 8.0 | 14624 | 1.7122 | 0.4725 | 0.2305 | 0.3477 | 0.3481 |
64
+ | 0.6567 | 9.0 | 16452 | 1.7814 | 0.4763 | 0.2389 | 0.3537 | 0.3543 |
65
+ | 0.5612 | 10.0 | 18280 | 1.8528 | 0.4777 | 0.2410 | 0.3515 | 0.3516 |
66
+ | 0.4953 | 11.0 | 20108 | 1.9072 | 0.4799 | 0.2487 | 0.3562 | 0.3565 |
67
+ | 0.4445 | 12.0 | 21936 | 1.9503 | 0.4829 | 0.2514 | 0.3571 | 0.3574 |
68
+ | 0.3976 | 13.0 | 23764 | 1.9928 | 0.4834 | 0.2543 | 0.3569 | 0.3573 |
69
+ | 0.3643 | 14.0 | 25592 | 2.0249 | 0.4820 | 0.2520 | 0.3575 | 0.3581 |
70
+ | 0.3263 | 15.0 | 27420 | 2.0461 | 0.4850 | 0.2566 | 0.3589 | 0.3595 |
71
 
72
 
73
  ### Framework versions
config.json CHANGED
@@ -185,21 +185,6 @@
185
  "no_repeat_ngram_size": null,
186
  "num_beams": null,
187
  "pad_token_id": 50259,
188
- "quantization_config": {
189
- "_load_in_4bit": true,
190
- "_load_in_8bit": false,
191
- "bnb_4bit_compute_dtype": "float16",
192
- "bnb_4bit_quant_storage": "uint8",
193
- "bnb_4bit_quant_type": "nf4",
194
- "bnb_4bit_use_double_quant": true,
195
- "llm_int8_enable_fp32_cpu_offload": false,
196
- "llm_int8_has_fp16_weight": false,
197
- "llm_int8_skip_modules": null,
198
- "llm_int8_threshold": 6.0,
199
- "load_in_4bit": true,
200
- "load_in_8bit": false,
201
- "quant_method": "bitsandbytes"
202
- },
203
  "tie_encoder_decoder": true,
204
  "tie_word_embeddings": false,
205
  "torch_dtype": "float32",
 
185
  "no_repeat_ngram_size": null,
186
  "num_beams": null,
187
  "pad_token_id": 50259,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  "tie_encoder_decoder": true,
189
  "tie_word_embeddings": false,
190
  "torch_dtype": "float32",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f08b62e0db86d85d9385c5e51409d7af7f172ba1d5b0776b62014e22f2998f66
3
  size 956847808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac4967b9e428e1d0cd5d8131a809c357f99733b0d702f90d22ee83a7015a9d8
3
  size 956847808
runs/Nov25_14-02-34_b1ac11fd3836/events.out.tfevents.1732543354.b1ac11fd3836.814.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:665beba30094b6e409a40941c559c94c60b3b09afda932cb2dad3aa9f92a9c73
3
+ size 9832
runs/Nov25_14-20-26_b1ac11fd3836/events.out.tfevents.1732544426.b1ac11fd3836.814.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d063b03dd9043c8bfcb803416b90652aa5fa75bf8dc952a64ad115d5a99e1bbb
3
+ size 22684
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3250168532d7a398ffe371b69ed5814ef5808e6d1166e9f0afc312e49f36e8e7
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63dabb376ec66b07ca97ed2e5a31a3f89c482d325417e5a1636bbca97377c0d6
3
  size 5432