jsanchez1981 commited on
Commit
ee9b26c
·
1 Parent(s): 0139fd8

Version of the model as reported in MODELS'24 paper about ModelMate

Browse files
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 32769,
3
+ "<EOL>": 32772,
4
+ "<URIPRE>": 32771,
5
+ "<s>": 32768,
6
+ "<unk>": 32770
7
+ }
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "codeparrot/codeparrot-small-multi",
3
+ "activation_function": "gelu_fast",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_embd": 768,
15
+ "n_head": 12,
16
+ "n_inner": 3072,
17
+ "n_layer": 12,
18
+ "n_positions": 1024,
19
+ "reorder_and_upcast_attn": false,
20
+ "resid_pdrop": 0.1,
21
+ "scale_attn_by_inverse_layer_idx": false,
22
+ "scale_attn_weights": true,
23
+ "summary_activation": null,
24
+ "summary_first_dropout": 0.1,
25
+ "summary_proj_to_labels": true,
26
+ "summary_type": "cls_index",
27
+ "summary_use_proj": true,
28
+ "tokenizer_class": "GPT2TokenizerFast",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.25.1",
31
+ "use_cache": true,
32
+ "vocab_size": 32773
33
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8de36b9b0fcd3d2f83e75ac9194d59297fce97c03e18d98a9e52cd379dc33e
3
+ size 888183493
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b473f848a2fbdf5b2679eea9852b35957a246f0feb84186f9bf0f7b04f5034a6
3
+ size 456687165
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bbc51f310b948dc0795f50be6140cdd17b8b5f45d66dad7c8d92f90f5eed816
3
+ size 15597
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27ec07a12731ae6f9765d05fe7c8495505f1d0f90b4cc6255a0853fec3970808
3
+ size 557
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0ebdb05c79a4eb141f2d1c33ec8d5353ac5a8a41d72da20623877e35572bc70
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "name_or_path": "codeparrot/codeparrot-small-multi",
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "</s>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "special_tokens_map_file": null,
32
+ "tokenizer_class": "GPT2Tokenizer",
33
+ "unk_token": {
34
+ "__type": "AddedToken",
35
+ "content": "<unk>",
36
+ "lstrip": false,
37
+ "normalized": true,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ }
41
+ }
trainer_state.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8726145625114441,
3
+ "best_model_checkpoint": "runs/codeparrot/codeparrot-small-multi-ecore_line-512/checkpoint-800",
4
+ "epoch": 4.998829496683574,
5
+ "global_step": 800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 4.5e-05,
13
+ "loss": 1.0255,
14
+ "step": 160
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_loss": 0.9181002378463745,
19
+ "eval_runtime": 18.9044,
20
+ "eval_samples_per_second": 78.923,
21
+ "eval_steps_per_second": 19.731,
22
+ "step": 160
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "learning_rate": 4e-05,
27
+ "loss": 0.8865,
28
+ "step": 320
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_loss": 0.8927440643310547,
33
+ "eval_runtime": 18.8288,
34
+ "eval_samples_per_second": 79.24,
35
+ "eval_steps_per_second": 19.81,
36
+ "step": 320
37
+ },
38
+ {
39
+ "epoch": 3.0,
40
+ "learning_rate": 3.5e-05,
41
+ "loss": 0.8313,
42
+ "step": 480
43
+ },
44
+ {
45
+ "epoch": 3.0,
46
+ "eval_loss": 0.8809273838996887,
47
+ "eval_runtime": 18.7583,
48
+ "eval_samples_per_second": 79.538,
49
+ "eval_steps_per_second": 19.885,
50
+ "step": 480
51
+ },
52
+ {
53
+ "epoch": 4.0,
54
+ "learning_rate": 3e-05,
55
+ "loss": 0.7884,
56
+ "step": 640
57
+ },
58
+ {
59
+ "epoch": 4.0,
60
+ "eval_loss": 0.8755712509155273,
61
+ "eval_runtime": 19.3412,
62
+ "eval_samples_per_second": 77.141,
63
+ "eval_steps_per_second": 19.285,
64
+ "step": 640
65
+ },
66
+ {
67
+ "epoch": 5.0,
68
+ "learning_rate": 2.5e-05,
69
+ "loss": 0.7539,
70
+ "step": 800
71
+ },
72
+ {
73
+ "epoch": 5.0,
74
+ "eval_loss": 0.8726145625114441,
75
+ "eval_runtime": 18.9056,
76
+ "eval_samples_per_second": 78.918,
77
+ "eval_steps_per_second": 19.73,
78
+ "step": 800
79
+ }
80
+ ],
81
+ "max_steps": 1600,
82
+ "num_train_epochs": 10,
83
+ "total_flos": 1.339121664e+16,
84
+ "trial_name": null,
85
+ "trial_params": null
86
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b97c2b82d758fd696ead9148467e62f2996197912ae145ee3f160b119fd8e9
3
+ size 3451
vocab.json ADDED
The diff for this file is too large to render. See raw diff