Added weights

Browse files

Files changed (9) hide show

README.md +49 -0
added_tokens.json +4 -0
config.json +29 -0
job_new.json +0 -0
measurement.json +0 -0
output.safetensors +3 -0
special_tokens_map.json +30 -0
tokenizer.model +3 -0
tokenizer_config.json +59 -0

README.md ADDED Viewed

	@@ -0,0 +1,49 @@

+---
+datasets:
+- teknium/OpenHermes-2.5
+---
+This is an EXL2 quantized model in 4bpw of [feeltheAGI/yi-super-9B](https://huggingface.co/feeltheAGI/yi-super-9B) using the default calibration dataset.
+# Original Model card:
+![1702046172090179.jpg](https://cdn-uploads.huggingface.co/production/uploads/65d1f383351255ba48a4f831/EdV6mhHGCv5w2BIC58vCm.jpeg)
+YI-9B-Super
+YI-9B-Super is an YI-9B model that has been further fine-tuned with OpenHermes-2.5 dataset.
+Results on some benchmarks :
+|                 Tasks                 |Version|     Filter     |n-shot|  Metric   | Value |   |Stderr|
+|---------------------------------------|-------|----------------|------|-----------|------:|---|-----:|
+|truthfulqa                             |N/A    |none            |     0|rouge1_max |47.1011|±  |0.8016|
+|hellaswag                              |      1|none            |None  |acc        | 0.5758|±  |0.0049|
+|                                       |       |none            |None  |acc_norm   | 0.7639|±  |0.0042|
+|gsm8k_cot                              |      3|strict-match    |8     |exact_match| 0.5262|±  |0.0138|
+|                                       |       |flexible-extract|8     |exact_match| 0.6027|±  |0.0135|
+|gsm8k                                  |      3|strict-match    |5     |exact_match| 0.6073|±  |0.0135|
+|                                       |       |flexible-extract|5     |exact_match| 0.6126|±  |0.0134|
+|      Groups      |Version|Filter|n-shot|  Metric   | Value |   |Stderr|
+|------------------|-------|------|------|-----------|------:|---|-----:|
+|truthfulqa        |N/A    |none  |     0|rouge1_max |47.1011|±  |0.8016|
+|                  |       |none  |     0|bleu_max   |21.9476|±  |0.7162|
+|                  |       |none  |     0|rouge2_acc | 0.3293|±  |0.0165|
+|                  |       |none  |     0|bleu_acc   | 0.3635|±  |0.0168|
+|                  |       |none  |     0|rouge1_acc | 0.3892|±  |0.0171|
+|                  |       |none  |     0|rougeL_acc | 0.3782|±  |0.0170|
+|                  |       |none  |     0|bleu_diff  |-2.3953|±  |0.6292|
+|                  |       |none  |     0|rouge2_diff|-4.6929|±  |0.9130|
+|                  |       |none  |     0|rougeL_diff|-4.2677|±  |0.8034|
+|                  |       |none  |     0|acc        | 0.4040|±  |0.0113|
+|                  |       |none  |     0|rouge1_diff|-3.8975|±  |0.7966|
+|                  |       |none  |     0|rougeL_max |43.7954|±  |0.8145|
+|                  |       |none  |     0|rouge2_max |32.3573|±  |0.9094|
+|mmlu              |N/A    |none  |     0|acc        | 0.6726|±  |0.0037|
+| - humanities     |N/A    |none  |None  |acc        | 0.6043|±  |0.0067|
+| - other          |N/A    |none  |None  |acc        | 0.7306|±  |0.0077|
+| - social_sciences|N/A    |none  |None  |acc        | 0.7741|±  |0.0074|
+| - stem           |N/A    |none  |None  |acc        | 0.6181|±  |0.0083|

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 64001,
+  "<s>": 64000
+}

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "01-ai/Yi-9B",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 64000,
+  "eos_token_id": 64001,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 8192,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 4,
+  "pad_token_id": 0,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": false,
+  "vocab_size": 64002
+}

job_new.json ADDED Viewed

The diff for this file is too large to render. See raw diff

measurement.json ADDED Viewed

The diff for this file is too large to render. See raw diff

output.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cff2ce34da0f3acfb45839535691a72d77ad784278e882724eeb14d709ef9fb
+size 4885584736

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
+size 1033105

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "64000": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "64001": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 4096,
+  "pad_token": "<unk>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false,
+  "use_fast": true
+}