quintic commited on
Commit
12344ba
1 Parent(s): 286d750
added_tokens.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50294,
3
+ "\t\t\t": 50293,
4
+ "\t\t\t\t": 50292,
5
+ "\t\t\t\t\t": 50291,
6
+ "\t\t\t\t\t\t": 50290,
7
+ "\t\t\t\t\t\t\t": 50289,
8
+ "\t\t\t\t\t\t\t\t": 50288,
9
+ "\t\t\t\t\t\t\t\t\t": 50287,
10
+ " ": 50286,
11
+ " ": 50285,
12
+ " ": 50284,
13
+ " ": 50283,
14
+ " ": 50282,
15
+ " ": 50281,
16
+ " ": 50280,
17
+ " ": 50279,
18
+ " ": 50278,
19
+ " ": 50277,
20
+ " ": 50276,
21
+ " ": 50275,
22
+ " ": 50274,
23
+ " ": 50273,
24
+ " ": 50272,
25
+ " ": 50271,
26
+ " ": 50270,
27
+ " ": 50269,
28
+ " ": 50268,
29
+ " ": 50267,
30
+ " ": 50266,
31
+ " ": 50265,
32
+ " ": 50264,
33
+ " ": 50263,
34
+ " ": 50262,
35
+ " ": 50261,
36
+ " ": 50260,
37
+ " ": 50259,
38
+ " ": 50258,
39
+ " ": 50257
40
+ }
config.json CHANGED
@@ -10,18 +10,18 @@
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gptj",
13
- "n_embd": 4096,
14
  "n_head": 16,
15
  "n_inner": null,
16
- "n_layer": 33,
17
  "n_positions": 2048,
18
  "resid_pdrop": 0.0,
19
- "rotary_dim": 64,
20
  "scale_attn_weights": true,
21
  "tie_word_embeddings": false,
22
  "tokenizer_class": "CodeGenTokenizer",
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.26.0",
25
  "use_cache": true,
26
- "vocab_size": 50295
27
  }
 
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gptj",
13
+ "n_embd": 1024,
14
  "n_head": 16,
15
  "n_inner": null,
16
+ "n_layer": 20,
17
  "n_positions": 2048,
18
  "resid_pdrop": 0.0,
19
+ "rotary_dim": 32,
20
  "scale_attn_weights": true,
21
  "tie_word_embeddings": false,
22
  "tokenizer_class": "CodeGenTokenizer",
23
  "torch_dtype": "float32",
24
+ "transformers_version": "4.25.1",
25
  "use_cache": true,
26
+ "vocab_size": 51200
27
  }
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 2048,
22
+ "name_or_path": "Salesforce/codegen-350M-mono",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "CodeGenTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff