End of training

Browse files

Files changed (5) hide show

README.md +114 -0
config.json +7 -7
generation_config.json +7 -0
model.safetensors +2 -2
training_args.bin +1 -1

README.md ADDED Viewed

	@@ -0,0 +1,114 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: google/long-t5-tglobal-base
+tags:
+- generated_from_trainer
+metrics:
+- rouge
+model-index:
+- name: long_t5_6
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# long_t5_6
+This model is a fine-tuned version of [google/long-t5-tglobal-base](https://huggingface.co/google/long-t5-tglobal-base) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.0450
+- Rouge1: 0.5157
+- Rouge2: 0.3356
+- Rougel: 0.4671
+- Rougelsum: 0.4673
+- Gen Len: 31.344
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 32
+- eval_batch_size: 16
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 50
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
+|:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
+| No log        | 1.0   | 250   | 1.6173          | 0.4644 | 0.29   | 0.4269 | 0.4268    | 25.406  |
+| 2.1255        | 2.0   | 500   | 1.5596          | 0.4748 | 0.2986 | 0.4353 | 0.4354    | 26.834  |
+| 2.1255        | 3.0   | 750   | 1.5241          | 0.4819 | 0.3074 | 0.4424 | 0.4423    | 25.6985 |
+| 1.7318        | 4.0   | 1000  | 1.5178          | 0.4925 | 0.3161 | 0.4521 | 0.4521    | 26.513  |
+| 1.7318        | 5.0   | 1250  | 1.5178          | 0.4975 | 0.3184 | 0.4555 | 0.4555    | 27.042  |
+| 1.5463        | 6.0   | 1500  | 1.5168          | 0.5014 | 0.3255 | 0.4614 | 0.4618    | 25.815  |
+| 1.5463        | 7.0   | 1750  | 1.5066          | 0.5054 | 0.3306 | 0.4653 | 0.4654    | 25.8755 |
+| 1.4053        | 8.0   | 2000  | 1.5184          | 0.508  | 0.3311 | 0.4673 | 0.4673    | 26.246  |
+| 1.4053        | 9.0   | 2250  | 1.5372          | 0.5095 | 0.3331 | 0.4669 | 0.4667    | 27.511  |
+| 1.289         | 10.0  | 2500  | 1.5446          | 0.5078 | 0.3328 | 0.4662 | 0.4664    | 27.14   |
+| 1.289         | 11.0  | 2750  | 1.5500          | 0.5111 | 0.3329 | 0.4687 | 0.4687    | 27.444  |
+| 1.191         | 12.0  | 3000  | 1.5660          | 0.5141 | 0.3345 | 0.4704 | 0.4703    | 27.397  |
+| 1.191         | 13.0  | 3250  | 1.5731          | 0.5168 | 0.3389 | 0.4735 | 0.4736    | 27.4535 |
+| 1.107         | 14.0  | 3500  | 1.5926          | 0.5158 | 0.3357 | 0.4709 | 0.4708    | 28.82   |
+| 1.107         | 15.0  | 3750  | 1.6107          | 0.5158 | 0.3406 | 0.473  | 0.4734    | 28.3135 |
+| 1.036         | 16.0  | 4000  | 1.6205          | 0.5187 | 0.3411 | 0.4742 | 0.4744    | 28.9715 |
+| 1.036         | 17.0  | 4250  | 1.6467          | 0.5142 | 0.3378 | 0.4701 | 0.4702    | 28.81   |
+| 0.9655        | 18.0  | 4500  | 1.6670          | 0.5192 | 0.3426 | 0.4748 | 0.4751    | 28.266  |
+| 0.9655        | 19.0  | 4750  | 1.6715          | 0.5154 | 0.3373 | 0.4695 | 0.4694    | 29.8395 |
+| 0.9055        | 20.0  | 5000  | 1.6824          | 0.5156 | 0.3388 | 0.4715 | 0.4721    | 28.653  |
+| 0.9055        | 21.0  | 5250  | 1.7156          | 0.5164 | 0.3384 | 0.4708 | 0.4712    | 30.2485 |
+| 0.8519        | 22.0  | 5500  | 1.7239          | 0.5164 | 0.3404 | 0.4733 | 0.4735    | 28.5295 |
+| 0.8519        | 23.0  | 5750  | 1.7292          | 0.5169 | 0.3374 | 0.4716 | 0.4718    | 29.1895 |
+| 0.8069        | 24.0  | 6000  | 1.7591          | 0.5168 | 0.3369 | 0.4703 | 0.4707    | 29.9035 |
+| 0.8069        | 25.0  | 6250  | 1.7733          | 0.5146 | 0.3355 | 0.4689 | 0.4692    | 29.533  |
+| 0.764         | 26.0  | 6500  | 1.7963          | 0.5172 | 0.3388 | 0.4716 | 0.4721    | 30.0075 |
+| 0.764         | 27.0  | 6750  | 1.8136          | 0.5173 | 0.3385 | 0.471  | 0.4714    | 29.672  |
+| 0.7256        | 28.0  | 7000  | 1.8317          | 0.5153 | 0.3361 | 0.4698 | 0.4702    | 30.5335 |
+| 0.7256        | 29.0  | 7250  | 1.8478          | 0.5136 | 0.336  | 0.4686 | 0.469     | 30.654  |
+| 0.6901        | 30.0  | 7500  | 1.8709          | 0.5169 | 0.338  | 0.472  | 0.4724    | 29.7215 |
+| 0.6901        | 31.0  | 7750  | 1.8733          | 0.5153 | 0.3364 | 0.4694 | 0.4698    | 30.3385 |
+| 0.6617        | 32.0  | 8000  | 1.8882          | 0.5137 | 0.3369 | 0.4692 | 0.4692    | 29.8545 |
+| 0.6617        | 33.0  | 8250  | 1.9176          | 0.5144 | 0.3354 | 0.4689 | 0.4692    | 30.489  |
+| 0.6331        | 34.0  | 8500  | 1.9219          | 0.517  | 0.3391 | 0.472  | 0.4723    | 30.3225 |
+| 0.6331        | 35.0  | 8750  | 1.9272          | 0.5146 | 0.3367 | 0.469  | 0.4695    | 30.647  |
+| 0.6106        | 36.0  | 9000  | 1.9468          | 0.512  | 0.3329 | 0.4658 | 0.466     | 31.4695 |
+| 0.6106        | 37.0  | 9250  | 1.9650          | 0.5143 | 0.3345 | 0.4682 | 0.4685    | 31.2565 |
+| 0.5914        | 38.0  | 9500  | 1.9666          | 0.5163 | 0.3367 | 0.4705 | 0.4708    | 30.9375 |
+| 0.5914        | 39.0  | 9750  | 1.9788          | 0.5134 | 0.3351 | 0.468  | 0.4683    | 30.297  |
+| 0.5722        | 40.0  | 10000 | 1.9985          | 0.5118 | 0.3331 | 0.4659 | 0.4662    | 31.1015 |
+| 0.5722        | 41.0  | 10250 | 2.0013          | 0.5137 | 0.3341 | 0.4671 | 0.4676    | 30.8835 |
+| 0.5571        | 42.0  | 10500 | 2.0087          | 0.513  | 0.333  | 0.4666 | 0.467     | 31.094  |
+| 0.5571        | 43.0  | 10750 | 2.0196          | 0.5155 | 0.3361 | 0.4682 | 0.4684    | 31.0515 |
+| 0.5466        | 44.0  | 11000 | 2.0221          | 0.5143 | 0.3349 | 0.4674 | 0.4678    | 31.1495 |
+| 0.5466        | 45.0  | 11250 | 2.0275          | 0.5146 | 0.3353 | 0.4672 | 0.4676    | 31.1845 |
+| 0.5355        | 46.0  | 11500 | 2.0311          | 0.5134 | 0.3344 | 0.4662 | 0.4665    | 30.9715 |
+| 0.5355        | 47.0  | 11750 | 2.0410          | 0.5141 | 0.3345 | 0.4657 | 0.466     | 31.6285 |
+| 0.5305        | 48.0  | 12000 | 2.0415          | 0.5154 | 0.3359 | 0.467  | 0.4672    | 31.3345 |
+| 0.5305        | 49.0  | 12250 | 2.0424          | 0.5157 | 0.3358 | 0.4677 | 0.4678    | 31.033  |
+| 0.5256        | 50.0  | 12500 | 2.0450          | 0.5157 | 0.3356 | 0.4671 | 0.4673    | 31.344  |
+### Framework versions
+- Transformers 4.45.2
+- Pytorch 2.2.1
+- Datasets 3.0.1
+- Tokenizers 0.20.1

config.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
-  "_name_or_path": "google/long-t5-local-large",
   "architectures": [
     "LongT5ForConditionalGeneration"
   ],
-  "d_ff": 2816,
   "d_kv": 64,
-  "d_model": 1024,
   "decoder_start_token_id": 0,
   "dense_act_fn": "gelu_new",
   "dropout_rate": 0.1,
-  "encoder_attention_type": "local",
   "eos_token_id": 1,
   "feed_forward_proj": "gated-gelu",
   "global_block_size": 16,
@@ -20,9 +20,9 @@
   "local_radius": 127,
   "model_type": "longt5",
   "n_positions": 4096,
-  "num_decoder_layers": 24,
-  "num_heads": 16,
-  "num_layers": 24,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,

 {
+  "_name_or_path": "google/long-t5-tglobal-base",
   "architectures": [
     "LongT5ForConditionalGeneration"
   ],
+  "d_ff": 2048,
   "d_kv": 64,
+  "d_model": 768,
   "decoder_start_token_id": 0,
   "dense_act_fn": "gelu_new",
   "dropout_rate": 0.1,
+  "encoder_attention_type": "transient-global",
   "eos_token_id": 1,
   "feed_forward_proj": "gated-gelu",
   "global_block_size": 16,
   "local_radius": 127,
   "model_type": "longt5",
   "n_positions": 4096,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "max_length": 100,
+  "pad_token_id": 0,
+  "transformers_version": "4.45.2"
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7dfe0aa03099eea22a031a32eb6e2ad15e0efb2679223416c0aff3abf2ff872
-size 3395862080

 version https://git-lfs.github.com/spec/v1
+oid sha256:302e5855752a1ce6a85763d7f4b6f84ba5fe236bd665d06b3534cca41cdc9b51
+size 1187780840

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:786bf7f2fa9edc8414910e4ba196bb8326121a18b6a0ed1cbdd12a3be011535a
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:c11f2e1748f1d759e767970eebfff4b69cc5570b85fa894611a56112d87c612c
 size 5304