Training

Browse files

Files changed (9) hide show

README.md +18 -29
config.json +1 -1
generation_config.json +1 -1
model.safetensors +3 -0
runs/Jan23_06-13-55_1fca61cf9468/events.out.tfevents.1705990445.1fca61cf9468.26.0 +3 -0
special_tokens_map.json +21 -3
tokenizer.json +6 -3
tokenizer_config.json +27 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -2,26 +2,29 @@
 license: apache-2.0
 base_model: IlyaGusev/rut5_base_sum_gazeta
 tags:
-- summarization_4
 - generated_from_trainer
-metrics:
-- rouge
 model-index:
 - name: rut5_base_sum_gazeta-finetuned_week_gpt
   results: []
 ---
-# rut5_base_sum_gazeta-finetuned_week_gpt
-This model is a fine-tuned version of [IlyaGusev/rut5_base_sum_gazeta](https://huggingface.co/IlyaGusev/rut5_base_sum_gazeta) on Natet/gpt_week_yandex dataset.
-This model is suitable for summarizing Hubr articles.
 It achieves the following results on the evaluation set:
-- Loss: 1.2643
-- Rouge1: 38.9266
-- Rouge2: 18.0587
-- Rougel: 38.1447
-- Rougelsum: 38.1337
 ## Model description
@@ -41,30 +44,16 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 5.6e-05
-- train_batch_size: 8
-- eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 8
-### Training results
-| Training Loss | Epoch | Step | Validation Loss | Rouge1  | Rouge2  | Rougel  | Rougelsum |
-|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|
-| 1.7691        | 1.0   | 1110 | 1.4005          | 37.7689 | 17.7394 | 36.8468 | 36.8842   |
-| 1.4892        | 2.0   | 2220 | 1.3477          | 35.9349 | 16.8403 | 35.1786 | 35.2055   |
-| 1.3579        | 3.0   | 3330 | 1.3079          | 37.7579 | 17.6421 | 36.8439 | 36.8182   |
-| 1.2708        | 4.0   | 4440 | 1.2675          | 37.867  | 17.3909 | 36.9706 | 36.987    |
-| 1.2006        | 5.0   | 5550 | 1.2703          | 38.8218 | 17.9772 | 38.001  | 37.9811   |
-| 1.1519        | 6.0   | 6660 | 1.2703          | 38.0351 | 17.5386 | 37.209  | 37.1815   |
-| 1.1132        | 7.0   | 7770 | 1.2593          | 38.4673 | 17.8343 | 37.529  | 37.5268   |
-| 1.0932        | 8.0   | 8880 | 1.2643          | 38.9266 | 18.0587 | 38.1447 | 38.1337   |
 ### Framework versions
-- Transformers 4.33.0
 - Pytorch 2.0.0
 - Datasets 2.1.0
-- Tokenizers 0.13.3

 license: apache-2.0
 base_model: IlyaGusev/rut5_base_sum_gazeta
 tags:
+- summarization
 - generated_from_trainer
 model-index:
 - name: rut5_base_sum_gazeta-finetuned_week_gpt
   results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# rut5_base_sum_gazeta-finetuned_week_gpt
+This model is a fine-tuned version of [IlyaGusev/rut5_base_sum_gazeta](https://huggingface.co/IlyaGusev/rut5_base_sum_gazeta) on the None dataset.
 It achieves the following results on the evaluation set:
+- eval_loss: 2.3331
+- eval_rouge1: 0.0
+- eval_rouge2: 0.0
+- eval_rougeL: 0.0
+- eval_rougeLsum: 0.0
+- eval_runtime: 7.8395
+- eval_samples_per_second: 0.383
+- eval_steps_per_second: 0.128
+- step: 0
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 5.6e-05
+- train_batch_size: 16
+- eval_batch_size: 16
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 8
 ### Framework versions
+- Transformers 4.36.2
 - Pytorch 2.0.0
 - Datasets 2.1.0
+- Tokenizers 0.15.0

config.json CHANGED Viewed

@@ -30,7 +30,7 @@
   "tie_word_embeddings": false,
   "tokenizer_class": "T5Tokenizer",
   "torch_dtype": "float32",
-  "transformers_version": "4.33.0",
   "use_cache": true,
   "vocab_size": 30000
 }

   "tie_word_embeddings": false,
   "tokenizer_class": "T5Tokenizer",
   "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
   "use_cache": true,
   "vocab_size": 30000
 }

generation_config.json CHANGED Viewed

@@ -5,5 +5,5 @@
   "max_length": 200,
   "num_beams": 5,
   "pad_token_id": 0,
-  "transformers_version": "4.33.0"
 }

   "max_length": 200,
   "num_beams": 5,
   "pad_token_id": 0,
+  "transformers_version": "4.36.2"
 }

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4ea4a6196701370c6d96dd9e35b5e2e625db48c4ab487302dd89d2e8bc8a633
+size 977270632

runs/Jan23_06-13-55_1fca61cf9468/events.out.tfevents.1705990445.1fca61cf9468.26.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72496ec47665df49742d009dda91596f82034e7a7bb0d4bfe3a5e86d0d4d520a
+size 488

special_tokens_map.json CHANGED Viewed

@@ -1,5 +1,23 @@
 {
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
 }

 {
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer.json CHANGED Viewed

@@ -55,7 +55,8 @@
   "pre_tokenizer": {
     "type": "Metaspace",
     "replacement": "▁",
-    "add_prefix_space": true
   },
   "post_processor": {
     "type": "TemplateProcessing",
@@ -114,7 +115,8 @@
   "decoder": {
     "type": "Metaspace",
     "replacement": "▁",
-    "add_prefix_space": true
   },
   "model": {
     "type": "Unigram",
@@ -120120,6 +120122,7 @@
         "▁<extra_id_0>",
         0.0
       ]
-    ]
   }
 }

   "pre_tokenizer": {
     "type": "Metaspace",
     "replacement": "▁",
+    "add_prefix_space": true,
+    "prepend_scheme": "always"
   },
   "post_processor": {
     "type": "TemplateProcessing",
   "decoder": {
     "type": "Metaspace",
     "replacement": "▁",
+    "add_prefix_space": true,
+    "prepend_scheme": "always"
   },
   "model": {
     "type": "Unigram",
         "▁<extra_id_0>",
         0.0
       ]
+    ],
+    "byte_fallback": false
   }
 }

tokenizer_config.json CHANGED Viewed

@@ -1,5 +1,31 @@
 {
-  "additional_special_tokens": null,
   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
   "extra_ids": 0,

 {
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
   "extra_ids": 0,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0589f8cdf02c4eaa97d8242bd3f50d9a5cdd49cdfbaf8652e423a63645bb55e
-size 4219

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7b33970fa0a97389aeebcbdbfe0733430564e97ffcb91f910e4e1e81abd2df2
+size 4475