ashrafulparan commited on
Commit
3588264
·
verified ·
1 Parent(s): f3e660a

Trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

config.json CHANGED
@@ -1,22 +1,27 @@
1
  {
2
- "_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "eos_token_id": 128009,
 
 
 
 
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
- "max_position_embeddings": 8192,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
 
20
  "pretraining_tp": 1,
21
  "quantization_config": {
22
  "bnb_4bit_compute_dtype": "float16",
@@ -31,11 +36,17 @@
31
  "quant_method": "bitsandbytes"
32
  },
33
  "rms_norm_eps": 1e-05,
34
- "rope_scaling": null,
 
 
 
 
 
 
35
  "rope_theta": 500000.0,
36
  "tie_word_embeddings": false,
37
  "torch_dtype": "float16",
38
- "transformers_version": "4.42.3",
39
  "unsloth_version": "2024.7",
40
  "use_cache": true,
41
  "vocab_size": 128256
 
1
  {
2
+ "_name_or_path": "unsloth/meta-llama-3.1-8b-instruct-bnb-4bit",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
  "hidden_act": "silu",
15
  "hidden_size": 4096,
16
  "initializer_range": 0.02,
17
  "intermediate_size": 14336,
18
+ "max_position_embeddings": 131072,
19
  "mlp_bias": false,
20
  "model_type": "llama",
21
  "num_attention_heads": 32,
22
  "num_hidden_layers": 32,
23
  "num_key_value_heads": 8,
24
+ "pad_token_id": 128004,
25
  "pretraining_tp": 1,
26
  "quantization_config": {
27
  "bnb_4bit_compute_dtype": "float16",
 
36
  "quant_method": "bitsandbytes"
37
  },
38
  "rms_norm_eps": 1e-05,
39
+ "rope_scaling": {
40
+ "factor": 8.0,
41
+ "high_freq_factor": 4.0,
42
+ "low_freq_factor": 1.0,
43
+ "original_max_position_embeddings": 8192,
44
+ "rope_type": "llama3"
45
+ },
46
  "rope_theta": 500000.0,
47
  "tie_word_embeddings": false,
48
  "torch_dtype": "float16",
49
+ "transformers_version": "4.43.2",
50
  "unsloth_version": "2024.7",
51
  "use_cache": true,
52
  "vocab_size": 128256
generation_config.json CHANGED
@@ -1,9 +1,12 @@
1
  {
2
- "_from_model_config": true,
3
  "bos_token_id": 128000,
 
4
  "eos_token_id": [
5
  128001,
 
6
  128009
7
  ],
8
- "transformers_version": "4.42.3"
 
 
9
  }
 
1
  {
 
2
  "bos_token_id": 128000,
3
+ "do_sample": true,
4
  "eos_token_id": [
5
  128001,
6
+ 128008,
7
  128009
8
  ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.43.2"
12
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1c26d95c9ffec355b57e82e4f84367a0901a22391b4c3f4f992ded993bfd32b
3
- size 4652072848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02e85ce045feaad9b26fc82cb0b86da35162d17cd4e157eb6ead449ae84a3b03
3
+ size 4652072864
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8005050c5e27c8232d9ce04f03631ce67bcc362c80fc57079bda23b498695c20
3
  size 1050673280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e9a36ad418b0cd9b253f65771552d7dc05cf1abf1170b94de4b4d546aac255d
3
  size 1050673280
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 5702577728
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 5702577744
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00002.safetensors",