jtatman commited on
Commit
0871a98
1 Parent(s): ec3a531

Upload GPTNeoXForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./pythia-delphi-suboptimal",
3
  "architectures": [
4
  "GPTNeoXForCausalLM"
5
  ],
@@ -27,7 +27,7 @@
27
  "rotary_pct": 0.5,
28
  "tie_word_embeddings": false,
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "use_cache": true,
32
  "use_parallel_residual": true,
33
  "vocab_size": 50281
 
1
  {
2
+ "_name_or_path": "jtatman/pythia-delphi-suboptimal",
3
  "architectures": [
4
  "GPTNeoXForCausalLM"
5
  ],
 
27
  "rotary_pct": 0.5,
28
  "tie_word_embeddings": false,
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.42.3",
31
  "use_cache": true,
32
  "use_parallel_residual": true,
33
  "vocab_size": 50281
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.38.2"
7
  }
 
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.42.3"
7
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f772897121d3a5f0c9be023dee58f6e036851129d9a60734d152361af3cf176c
3
- size 4076812760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c10fabe7cff8e9f31da5ef1b73d8c999007f3561bfc2c933af333402a75d4b08
3
+ size 4064201824
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 5724416000
4
  },
5
  "weight_map": {
6
  "embed_out.weight": "model-00002-of-00002.safetensors",
@@ -30,18 +30,6 @@
30
  "gpt_neox.layers.1.mlp.dense_h_to_4h.bias": "model-00001-of-00002.safetensors",
31
  "gpt_neox.layers.1.mlp.dense_h_to_4h.weight": "model-00001-of-00002.safetensors",
32
  "gpt_neox.layers.1.post_attention_layernorm.bias": "model-00001-of-00002.safetensors",
33
- "gpt_neox.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
34
- "gpt_neox.layers.2.attention.dense.bias": "model-00001-of-00002.safetensors",
35
- "gpt_neox.layers.2.attention.dense.weight": "model-00001-of-00002.safetensors",
36
- "gpt_neox.layers.2.attention.query_key_value.bias": "model-00001-of-00002.safetensors",
37
- "gpt_neox.layers.2.attention.query_key_value.weight": "model-00001-of-00002.safetensors",
38
- "gpt_neox.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
39
- "gpt_neox.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
40
- "gpt_neox.layers.2.mlp.dense_4h_to_h.bias": "model-00001-of-00002.safetensors",
41
- "gpt_neox.layers.2.mlp.dense_4h_to_h.weight": "model-00001-of-00002.safetensors",
42
- "gpt_neox.layers.2.mlp.dense_h_to_4h.bias": "model-00001-of-00002.safetensors",
43
- "gpt_neox.layers.2.mlp.dense_h_to_4h.weight": "model-00001-of-00002.safetensors",
44
- "gpt_neox.layers.2.post_attention_layernorm.bias": "model-00001-of-00002.safetensors",
45
- "gpt_neox.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors"
46
  }
47
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 5711806464
4
  },
5
  "weight_map": {
6
  "embed_out.weight": "model-00002-of-00002.safetensors",
 
30
  "gpt_neox.layers.1.mlp.dense_h_to_4h.bias": "model-00001-of-00002.safetensors",
31
  "gpt_neox.layers.1.mlp.dense_h_to_4h.weight": "model-00001-of-00002.safetensors",
32
  "gpt_neox.layers.1.post_attention_layernorm.bias": "model-00001-of-00002.safetensors",
33
+ "gpt_neox.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors"
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  }