lapp0 commited on
Commit
f4cf8f4
1 Parent(s): f1742b1

Training in progress, step 61875

Browse files
Files changed (36) hide show
  1. benchmarks.shelve.bak +0 -0
  2. benchmarks.shelve.dat +0 -0
  3. benchmarks.shelve.dir +0 -0
  4. config.json +39 -0
  5. logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724523661.e3f806ea38c9 +3 -0
  6. logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524283.e3f806ea38c9 +3 -0
  7. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724522510.e3f806ea38c9 +3 -0
  8. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523463.e3f806ea38c9 +3 -0
  9. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523824.e3f806ea38c9 +3 -0
  10. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524084.e3f806ea38c9 +3 -0
  11. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524694.e3f806ea38c9 +3 -0
  12. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524956.e3f806ea38c9 +3 -0
  13. logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524382.e3f806ea38c9 +3 -0
  14. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523563.e3f806ea38c9 +3 -0
  15. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523923.e3f806ea38c9 +3 -0
  16. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524182.e3f806ea38c9 +3 -0
  17. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524793.e3f806ea38c9 +3 -0
  18. logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724523711.e3f806ea38c9 +3 -0
  19. logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524333.e3f806ea38c9 +3 -0
  20. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523513.e3f806ea38c9 +3 -0
  21. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523874.e3f806ea38c9 +3 -0
  22. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524133.e3f806ea38c9 +3 -0
  23. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524744.e3f806ea38c9 +3 -0
  24. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724531592.e3f806ea38c9 +3 -0
  25. logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524431.e3f806ea38c9 +3 -0
  26. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523613.e3f806ea38c9 +3 -0
  27. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523982.e3f806ea38c9 +3 -0
  28. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524232.e3f806ea38c9 +3 -0
  29. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524842.e3f806ea38c9 +3 -0
  30. merges.txt +0 -0
  31. model.safetensors +3 -0
  32. special_tokens_map.json +6 -0
  33. tokenizer.json +0 -0
  34. tokenizer_config.json +20 -0
  35. training_args.bin +3 -0
  36. vocab.json +0 -0
benchmarks.shelve.bak ADDED
File without changes
benchmarks.shelve.dat ADDED
File without changes
benchmarks.shelve.dir ADDED
File without changes
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.44.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724523661.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:577957b37ba54e360acf46062bc17809facd866a910252c9b8bc2bb40afda439
3
+ size 5558
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524283.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac4ad0345c302d748f83758efa2a88603d03ef3dd7f40c2450e93e8637d185a1
3
+ size 5558
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724522510.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77b25551da8952eb9a18fbb47288cbacb98f92f410617812604aecfca45ce56a
3
+ size 5566
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523463.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:548f2acdba18142803a364d454f8beb451c2598b7e69a02e20db82a26ae06ab1
3
+ size 5566
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523824.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ec061abfd76b50529d767b36d9afc0c87ff9c6a28df21296ff29784a6d62805
3
+ size 5566
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524084.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5c12de8f459f93d9a8484d78b3c5075e88ba9404d1307f1f2dd9484b43483c0
3
+ size 5566
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524694.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f732b9dee7a8b5edd257cc063aed97680bcb4a755da519fe8757f3a1b51dc21d
3
+ size 5566
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524956.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d180c5a2f2ada5e6286cf35996d62290b9823e1dfb6e4f66d6a0d4916a3cfdfa
3
+ size 29625288
logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524382.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4a889bb1c0d888ed9985115d54e8c36e8f2d1971ae33de6f1dd8a159404c40
3
+ size 5560
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523563.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0bbffacfd69fbbd3c0d644bb795074e3986162a2e770a7c57d92f21ddd6d53
3
+ size 5568
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523923.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea059fd0eedaabdd623cb335f68407ee64a1b2c3bdc1318c1774e50a0e00ad6
3
+ size 5568
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524182.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:325a1c84c196630995b6570e781194683ab06ddb79fca1c4607de4ae7ef4dae4
3
+ size 5568
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524793.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda0dbb3a9e5852a4e194f580d6d484cc35ea6e1244c52a198999e59f8672d5a
3
+ size 5568
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724523711.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d63977c854e15443ea58b621ff070c55b98a4e60d9bd12b1de565e42dadf53
3
+ size 5568
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524333.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d559755c0c20982c22f2d5a163a4e62f2f2d8602b90c0604f5531bec5d41cd8e
3
+ size 5568
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523513.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c582375ca7920dd4d88a1c9b9b629a849ecfaa52af57399904c6e1e5a7a80cda
3
+ size 5576
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523874.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d283c55297b7269344beac70983a9162ceb36537a7b1247568a19bc5f8b4d0
3
+ size 5576
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524133.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df324b7eec7478095f73c19424a7a67e1e47f07bb9a4efd2e7b328929c9a26f9
3
+ size 5576
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524744.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e9eec68a3dd610c34a297c4d2f49d09f1d5392f0f5c8a4fb28f9b91c113b27
3
+ size 5576
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724531592.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79878130fd48bddf8fca5df905effe213a05ee4c029b1ca7764cc41569024544
3
+ size 29625298
logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524431.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb30a32d9c344a26697a4469aec6f93929ab80ecd548d85f208edcc5dc952cb8
3
+ size 5566
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523613.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c44e294d72eecf84afbac478d19f206d2543cd5647ce14d75c4e24a335b569
3
+ size 5574
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523982.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7325cf3ad32ce767d1629ad09f470cb2561ace132be67f734c92e7eb7f867a2
3
+ size 5377
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524232.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:846f766c3117e34d12513c3c2dbd3b489244a2c3f073d54153da7ad2e6b05c4f
3
+ size 5574
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524842.e3f806ea38c9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355529149c2c153e8bed4dab7839ffb44fc92ea08ef5bd322c6b6b2424a0a8a4
3
+ size 5377
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fc7114b0258073c1910f5a664d2b0331fad70ba71908a4bd2a369b18b9c665f
3
+ size 248894656
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a9f747b2b38e7a5b6965c3502c5eb4f5c466e974724b9ebc48fc9e0ca8afe4
3
+ size 5432
vocab.json ADDED
The diff for this file is too large to render. See raw diff