kaizerBox commited on
Commit
ec348d3
·
1 Parent(s): ee639dc

Time is 194.93288735946018 minutes, Memory is 9.5GB

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [](https://huggingface.co/) on the xsum dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 4.1524
19
 
20
  ## Model description
21
 
@@ -50,9 +50,9 @@ The following hyperparameters were used during training:
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:-----:|:-----:|:---------------:|
53
- | 4.9693 | 1.0 | 5762 | 4.3526 |
54
- | 4.4387 | 2.0 | 11525 | 4.1929 |
55
- | 4.3335 | 3.0 | 17286 | 4.1524 |
56
 
57
 
58
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [](https://huggingface.co/) on the xsum dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 4.0256
19
 
20
  ## Model description
21
 
 
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:-----:|:-----:|:---------------:|
53
+ | 4.7214 | 1.0 | 5762 | 4.2254 |
54
+ | 4.1228 | 2.0 | 11525 | 4.0669 |
55
+ | 4.0011 | 3.0 | 17286 | 4.0256 |
56
 
57
 
58
  ### Framework versions
config.json CHANGED
@@ -3,20 +3,20 @@
3
  "architectures": [
4
  "GPT2LMHeadModel"
5
  ],
6
- "attn_pdrop": 0.1,
7
  "bos_token_id": 50256,
8
- "embd_pdrop": 0.1,
9
  "eos_token_id": 50256,
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gpt2",
13
  "n_embd": 128,
14
  "n_head": 2,
15
- "n_inner": 216,
16
  "n_layer": 3,
17
  "n_positions": 1024,
18
  "reorder_and_upcast_attn": false,
19
- "resid_pdrop": 0.1,
20
  "scale_attn_by_inverse_layer_idx": false,
21
  "scale_attn_weights": true,
22
  "summary_activation": null,
 
3
  "architectures": [
4
  "GPT2LMHeadModel"
5
  ],
6
+ "attn_pdrop": 0.0,
7
  "bos_token_id": 50256,
8
+ "embd_pdrop": 0.0,
9
  "eos_token_id": 50256,
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gpt2",
13
  "n_embd": 128,
14
  "n_head": 2,
15
+ "n_inner": 128,
16
  "n_layer": 3,
17
  "n_positions": 1024,
18
  "reorder_and_upcast_attn": false,
19
+ "resid_pdrop": 0.0,
20
  "scale_attn_by_inverse_layer_idx": false,
21
  "scale_attn_weights": true,
22
  "summary_activation": null,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffc2e22af735d871cabae0e6f33c79c063b01fccff67046866a93141a9da6741
3
- size 27728168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e54bcc781ea24987eb720ec863042cd076ba2bcb8b84faaea8a5d35d40deabf2
3
+ size 27456776
runs/Nov28_15-03-43_940adb247ee4/events.out.tfevents.1701183823.940adb247ee4.323.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fa8b58e2c3d5408de47f0c098f4f7055fc7e07ce33ec1db74babf41dd30ffbd
3
+ size 5922
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acf7defc4a7030a5b3580a4dc0a1031e5672ad75f73f1f43c06ed5d993f77c80
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9596d478b85074461b1bd9e3792f4de79e2758b9a5be432bc062f673ddf0c22
3
  size 4600