End of training

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: mit
-base_model: gpt2
 tags:
 - generated_from_trainer
 model-index:
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
 # my_patent_abstract_causual_language-model
-This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.2714
 ## Model description
@@ -46,9 +46,9 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| No log        | 1.0   | 3    | 3.5388          |
-| No log        | 2.0   | 6    | 3.3409          |
-| No log        | 3.0   | 9    | 3.2714          |
 ### Framework versions

 ---
+license: apache-2.0
+base_model: distilbert/distilgpt2
 tags:
 - generated_from_trainer
 model-index:
 # my_patent_abstract_causual_language-model
+This model is a fine-tuned version of [distilbert/distilgpt2](https://huggingface.co/distilbert/distilgpt2) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.4992
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| No log        | 1.0   | 1    | 3.5980          |
+| No log        | 2.0   | 2    | 3.5339          |
+| No log        | 3.0   | 3    | 3.4992          |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,23 +1,28 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   ],
   "attn_pdrop": 0.1,
   "bos_token_id": 50256,
-  "do_sample": true,
   "embd_pdrop": 0.1,
   "eos_token_id": 50256,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
-  "max_length": 50,
   "model_type": "gpt2",
   "n_ctx": 1024,
   "n_embd": 768,
   "n_head": 12,
   "n_inner": null,
-  "n_layer": 12,
   "n_positions": 1024,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,

 {
+  "_name_or_path": "distilbert/distilgpt2",
+  "_num_labels": 1,
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   ],
   "attn_pdrop": 0.1,
   "bos_token_id": 50256,
   "embd_pdrop": 0.1,
   "eos_token_id": 50256,
+  "id2label": {
+    "0": "LABEL_0"
+  },
   "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
   "n_ctx": 1024,
   "n_embd": 768,
   "n_head": 12,
   "n_inner": null,
+  "n_layer": 6,
   "n_positions": 1024,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,

generation_config.json CHANGED Viewed

@@ -1,9 +1,6 @@
 {
   "_from_model_config": true,
   "bos_token_id": 50256,
-  "do_sample": true,
   "eos_token_id": 50256,
-  "max_length": 50,
-  "pad_token_id": 50256,
   "transformers_version": "4.41.2"
 }

 {
   "_from_model_config": true,
   "bos_token_id": 50256,
   "eos_token_id": 50256,
   "transformers_version": "4.41.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49c4672d24e79732c642c6803783920d9b18e29aa70b71ced578c5be7f4b0f66
-size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:3361cb0bce7d52dfa34eca10be122e285246c6b7be20d0821d482803d0036bde
+size 327657928

runs/Jun15_12-13-39_8423c480c5d4/events.out.tfevents.1718453620.8423c480c5d4.3030.8 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa404acccc4b84a63ac007643788a18d8cb408fa94da469f0be9113407c1d055
+size 6376

runs/Jun15_12-13-39_8423c480c5d4/events.out.tfevents.1718453630.8423c480c5d4.3030.9 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e717383363a3880e7f4f8a8562dfc6ba86138df7ecc0197ce2f2d1059e768f1
+size 354

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:766b3c4cd493c10475b8fddb906959a4abee22819d32a7ec7188882a59e595f2
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d2e20781b0328cebaeb6c41b73f74d08bf25c0fb57540d0cb45324e3e9d4437
 size 5176