mpasila commited on
Commit
54ebf45
1 Parent(s): 8a45128

Added weights

Browse files
README.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - teknium/OpenHermes-2.5
4
+ ---
5
+ This is an EXL2 quantized model in 4bpw of [feeltheAGI/yi-super-9B](https://huggingface.co/feeltheAGI/yi-super-9B) using the default calibration dataset.
6
+
7
+ # Original Model card:
8
+
9
+ ![1702046172090179.jpg](https://cdn-uploads.huggingface.co/production/uploads/65d1f383351255ba48a4f831/EdV6mhHGCv5w2BIC58vCm.jpeg)
10
+
11
+ YI-9B-Super
12
+
13
+ YI-9B-Super is an YI-9B model that has been further fine-tuned with OpenHermes-2.5 dataset.
14
+
15
+
16
+ Results on some benchmarks :
17
+
18
+ | Tasks |Version| Filter |n-shot| Metric | Value | |Stderr|
19
+ |---------------------------------------|-------|----------------|------|-----------|------:|---|-----:|
20
+ |truthfulqa |N/A |none | 0|rouge1_max |47.1011|± |0.8016|
21
+ |hellaswag | 1|none |None |acc | 0.5758|± |0.0049|
22
+ | | |none |None |acc_norm | 0.7639|± |0.0042|
23
+ |gsm8k_cot | 3|strict-match |8 |exact_match| 0.5262|± |0.0138|
24
+ | | |flexible-extract|8 |exact_match| 0.6027|± |0.0135|
25
+ |gsm8k | 3|strict-match |5 |exact_match| 0.6073|± |0.0135|
26
+ | | |flexible-extract|5 |exact_match| 0.6126|± |0.0134|
27
+
28
+
29
+
30
+ | Groups |Version|Filter|n-shot| Metric | Value | |Stderr|
31
+ |------------------|-------|------|------|-----------|------:|---|-----:|
32
+ |truthfulqa |N/A |none | 0|rouge1_max |47.1011|± |0.8016|
33
+ | | |none | 0|bleu_max |21.9476|± |0.7162|
34
+ | | |none | 0|rouge2_acc | 0.3293|± |0.0165|
35
+ | | |none | 0|bleu_acc | 0.3635|± |0.0168|
36
+ | | |none | 0|rouge1_acc | 0.3892|± |0.0171|
37
+ | | |none | 0|rougeL_acc | 0.3782|± |0.0170|
38
+ | | |none | 0|bleu_diff |-2.3953|± |0.6292|
39
+ | | |none | 0|rouge2_diff|-4.6929|± |0.9130|
40
+ | | |none | 0|rougeL_diff|-4.2677|± |0.8034|
41
+ | | |none | 0|acc | 0.4040|± |0.0113|
42
+ | | |none | 0|rouge1_diff|-3.8975|± |0.7966|
43
+ | | |none | 0|rougeL_max |43.7954|± |0.8145|
44
+ | | |none | 0|rouge2_max |32.3573|± |0.9094|
45
+ |mmlu |N/A |none | 0|acc | 0.6726|± |0.0037|
46
+ | - humanities |N/A |none |None |acc | 0.6043|± |0.0067|
47
+ | - other |N/A |none |None |acc | 0.7306|± |0.0077|
48
+ | - social_sciences|N/A |none |None |acc | 0.7741|± |0.0074|
49
+ | - stem |N/A |none |None |acc | 0.6181|± |0.0083|
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 64001,
3
+ "<s>": 64000
4
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "01-ai/Yi-9B",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 64000,
9
+ "eos_token_id": 64001,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 11008,
14
+ "max_position_embeddings": 8192,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 48,
18
+ "num_key_value_heads": 4,
19
+ "pad_token_id": 0,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-06,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.38.2",
27
+ "use_cache": false,
28
+ "vocab_size": 64002
29
+ }
job_new.json ADDED
The diff for this file is too large to render. See raw diff
 
measurement.json ADDED
The diff for this file is too large to render. See raw diff
 
output.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cff2ce34da0f3acfb45839535691a72d77ad784278e882724eeb14d709ef9fb
3
+ size 4885584736
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
3
+ size 1033105
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<|startoftext|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "<|endoftext|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "64000": {
31
+ "content": "<s>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "64001": {
39
+ "content": "</s>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ }
46
+ },
47
+ "bos_token": "<s>",
48
+ "clean_up_tokenization_spaces": false,
49
+ "eos_token": "</s>",
50
+ "legacy": true,
51
+ "model_max_length": 4096,
52
+ "pad_token": "<unk>",
53
+ "sp_model_kwargs": {},
54
+ "spaces_between_special_tokens": false,
55
+ "tokenizer_class": "LlamaTokenizer",
56
+ "unk_token": "<unk>",
57
+ "use_default_system_prompt": false,
58
+ "use_fast": true
59
+ }