danielhanchen commited on
Commit
ad2e4fd
·
verified ·
1 Parent(s): 2c82f46

Upload LlamaForCausalLM

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: llama3.1
3
  base_model: meta-llama/Meta-Llama-3.1-70B-Instruct
 
4
  pipeline_tag: text-generation
5
  tags:
6
  - unsloth
 
1
  ---
 
2
  base_model: meta-llama/Meta-Llama-3.1-70B-Instruct
3
+ license: llama3.1
4
  pipeline_tag: text-generation
5
  tags:
6
  - unsloth
config.json CHANGED
@@ -1,21 +1,17 @@
1
  {
2
- "_name_or_path": "mattshumer/Reflection-70B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "eos_token_id": [
10
- 128001,
11
- 128008,
12
- 128009
13
- ],
14
  "hidden_act": "silu",
15
  "hidden_size": 8192,
16
  "initializer_range": 0.02,
17
  "intermediate_size": 28672,
18
- "max_position_embeddings": 131072,
19
  "mlp_bias": false,
20
  "model_type": "llama",
21
  "num_attention_heads": 64,
@@ -38,17 +34,11 @@
38
  "quant_method": "bitsandbytes"
39
  },
40
  "rms_norm_eps": 1e-05,
41
- "rope_scaling": {
42
- "factor": 8.0,
43
- "high_freq_factor": 4.0,
44
- "low_freq_factor": 1.0,
45
- "original_max_position_embeddings": 8192,
46
- "rope_type": "llama3"
47
- },
48
  "rope_theta": 500000.0,
49
  "tie_word_embeddings": false,
50
  "torch_dtype": "bfloat16",
51
  "transformers_version": "4.44.2",
52
  "use_cache": true,
53
- "vocab_size": 128256
54
  }
 
1
  {
2
+ "_name_or_path": "mattshumer/Reflection-Llama-3.1-70B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "eos_token_id": 128009,
 
 
 
 
10
  "hidden_act": "silu",
11
  "hidden_size": 8192,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 28672,
14
+ "max_position_embeddings": 8192,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
  "num_attention_heads": 64,
 
34
  "quant_method": "bitsandbytes"
35
  },
36
  "rms_norm_eps": 1e-05,
37
+ "rope_scaling": null,
 
 
 
 
 
 
38
  "rope_theta": 500000.0,
39
  "tie_word_embeddings": false,
40
  "torch_dtype": "bfloat16",
41
  "transformers_version": "4.44.2",
42
  "use_cache": true,
43
+ "vocab_size": 128262
44
  }
generation_config.json CHANGED
@@ -3,10 +3,10 @@
3
  "do_sample": true,
4
  "eos_token_id": [
5
  128001,
6
- 128008,
7
  128009
8
  ],
9
- "temperature": 0.7,
10
- "top_p": 0.95,
 
11
  "transformers_version": "4.44.2"
12
  }
 
3
  "do_sample": true,
4
  "eos_token_id": [
5
  128001,
 
6
  128009
7
  ],
8
+ "max_length": 4096,
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
  "transformers_version": "4.44.2"
12
  }
model-00001-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:611cd9876bc7cb6d1c2540c19e0b24beeabd4fb1197128b9b748c54536cfa948
3
- size 4949082290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f25b44e5e89a38d84a1df21a4cc48250c405b1bb06ca263914f03d5d12d718c
3
+ size 4949180592
model-00002-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3294eeef8b7d554affa793791a6ee7c38415d2b4af8a5daac3556735cd700c5
3
- size 4977054939
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ee9630f9113b81603505a416b3bde0cb0faa45f0271a17135c75a8c11d24cd
3
+ size 4977054937
model-00003-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccb6c4b58606e925ab8a37a728052f7ed4cb309dd22fccd868ec89b044786e91
3
- size 4977088096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f201406c98cb5ddb6556c7590972a2eafa1dd0a5a9b12388f2415aa83316ec
3
+ size 4977088089
model-00004-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52b100d91e34ff274403042bd32ac1c283d5fdc9c8b39cde41c88d12f9bc9667
3
- size 4933786844
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:571409702b8e5805a3e29eba02a81e6c48c927a5661d280574f9a8b23a3e5e60
3
+ size 4933786851
model-00005-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66780364e586394132739ed509a43531c0c49576a7b6b680f4534ea9930a4798
3
  size 4977055135
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e24219198f4136b092ed88f637f91c5777ece2a488833bb9cf3727b494e016a4
3
  size 4977055135
model-00006-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c49a7e32e03072f7cbe3cf644ed3acf95311ba26a5fbc70d0b0501a0bb72997
3
- size 4977055096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecaa5c823f92772e338093a6fc8356a3ee7c05d3fcfe5770d6b01879ad4d7a23
3
+ size 4977055104
model-00007-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b65f113992c31139678fc1e21cf27ff9ff02a33d947995d36cbba9eb6810da24
3
- size 4977088086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeaabf66442e56253508eefce1a29eaee70cee84fb67616a3ac8dc78b6163da0
3
+ size 4977088092
model-00008-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b2dc6991759a2e1718dac761b133dd8374f73443645dc0bdd06ea2932324f8c
3
- size 4750027590
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9a19adb72fba3822d66ac50c358914299fc86ca07cd9e9c983f87e3216df84e
3
+ size 4750125895
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 39517816284
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00008-of-00008.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 39518012903
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00008-of-00008.safetensors",