FluffyKaeloky commited on
Commit
f0b1d82
1 Parent(s): 7d8e229

Upload 17 files

Browse files
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - jukofyork/Dark-Miqu-70B
4
+ - sophosympatheia/Midnight-Miqu-70B-v1.5
5
+ - jukofyork/Dawn-Miqu-70B
6
+ library_name: transformers
7
+ tags:
8
+ - mergekit
9
+ - merge
10
+ license: other
11
+ ---
12
+ Twilight Miqu is a Story writing model and is composed from sophosympatheia/Midnight-Miqu-70B-v1.5, jukofyork/Dawn-Miqu-70B and jukofyork/Dark-Miqu-70B
13
+
14
+ It is an experiment to see if large models are more coherent on story writing tasks.
15
+ Twilight = Midnight + Dawn + Dark
16
+
17
+ Please see this model card for details and usage instructions.
18
+ https://huggingface.co/sophosympatheia/Midnight-Miqu-70B-v1.5
19
+
20
+ This model is based on Miqu so it's capable of 32K context.
21
+
22
+ All miqu-derived models, including this merge, are only suitable for personal use. Mistral has been cool about it so far, but you should be aware that by downloading this merge you are assuming whatever legal risk is inherent in acquiring and using a model based on leaked weights. This merge comes with no warranties or guarantees of any kind, but you probably already knew that.
23
+
24
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
25
+
26
+ A big thank you to Mistral, sophosympatheia and jukofyork for the original models!
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "softwareweaver/Twilight-Miqu-146B",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 8192,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 28672,
14
+ "max_position_embeddings": 32764,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 64,
18
+ "num_hidden_layers": 170,
19
+ "num_key_value_heads": 8,
20
+ "pad_token_id": 0,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": null,
24
+ "rope_theta": 1000000,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "float16",
27
+ "transformers_version": "4.41.1",
28
+ "use_cache": true,
29
+ "vocab_size": 32000,
30
+ "quantization_config": {
31
+ "quant_method": "exl2",
32
+ "version": "0.1.1",
33
+ "bits": 3.75,
34
+ "head_bits": 6,
35
+ "calibration": {
36
+ "rows": 100,
37
+ "length": 2048,
38
+ "dataset": "(default)"
39
+ }
40
+ }
41
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const_tag: &MODEL1 sophosympatheia/Midnight-Miqu-70B-v1.5
2
+ const_tag: &MODEL3 jukofyork/Dawn-Miqu-70B
3
+ const_tag: &MODEL2 jukofyork/Dark-Miqu-70B
4
+
5
+ const_tag: &QK_ATTENUATION_FACTOR 0.8408964153 # sqrt(sqrt(1/2))
6
+ const_tag: &MLP_DOWN_SCALE_FACTOR 0.7071067812 # sqrt(1/2)
7
+
8
+ scale-filter-env: &scale_filter_env
9
+ parameters:
10
+ scale:
11
+ - filter: q_proj
12
+ value: *QK_ATTENUATION_FACTOR
13
+ - filter: k_proj
14
+ value: *QK_ATTENUATION_FACTOR
15
+ - filter: down_proj
16
+ value: *MLP_DOWN_SCALE_FACTOR
17
+ - value: 1.0
18
+
19
+ slices:
20
+ - sources:
21
+ - model: *MODEL1
22
+ layer_range: [0, 10]
23
+ - sources:
24
+ - model: *MODEL1
25
+ layer_range: [10, 20]
26
+ <<: *scale_filter_env
27
+ - sources:
28
+ - model: *MODEL2
29
+ layer_range: [10, 20]
30
+ <<: *scale_filter_env
31
+ - sources:
32
+ - model: *MODEL3
33
+ layer_range: [10, 20]
34
+ <<: *scale_filter_env
35
+ - sources:
36
+ - model: *MODEL3
37
+ layer_range: [20, 30]
38
+ <<: *scale_filter_env
39
+ - sources:
40
+ - model: *MODEL2
41
+ layer_range: [20, 30]
42
+ <<: *scale_filter_env
43
+ - sources:
44
+ - model: *MODEL1
45
+ layer_range: [30, 40]
46
+ <<: *scale_filter_env
47
+ - sources:
48
+ - model: *MODEL2
49
+ layer_range: [30, 40]
50
+ <<: *scale_filter_env
51
+ - sources:
52
+ - model: *MODEL3
53
+ layer_range: [40, 50]
54
+ <<: *scale_filter_env
55
+ - sources:
56
+ - model: *MODEL2
57
+ layer_range: [40, 50]
58
+ <<: *scale_filter_env
59
+ - sources:
60
+ - model: *MODEL1
61
+ layer_range: [50, 60]
62
+ <<: *scale_filter_env
63
+ - sources:
64
+ - model: *MODEL2
65
+ layer_range: [50, 60]
66
+ <<: *scale_filter_env
67
+ - sources:
68
+ - model: *MODEL3
69
+ layer_range: [50, 60]
70
+ <<: *scale_filter_env
71
+ - sources:
72
+ - model: *MODEL1
73
+ layer_range: [60, 70]
74
+ <<: *scale_filter_env
75
+ - sources:
76
+ - model: *MODEL2
77
+ layer_range: [60, 70]
78
+ <<: *scale_filter_env
79
+ - sources:
80
+ - model: *MODEL3
81
+ layer_range: [60, 70]
82
+ <<: *scale_filter_env
83
+ - sources:
84
+ - model: *MODEL1
85
+ layer_range: [70, 80]
86
+
87
+ merge_method: passthrough
88
+ dtype: float16
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
output-00001-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90f8af1becfbebb7c95a8c33139538e4bec9d102d468f39347fe3e303fb12b8
3
+ size 8587267724
output-00002-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:451f5f2985b252730c8e1de611ee83f519287814d5f13e045fbb2a8d6abd4937
3
+ size 8580363048
output-00003-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b58db45c4310a51e89c1b36737413fb025d1857895e490da96c68d3fde0f51
3
+ size 8574498044
output-00004-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08a55b8bbef71e88a857321ec62cfa85648f1235fdb36bd244a2341ef32a30a
3
+ size 8478075972
output-00005-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76e715679491eac55a8033925398b505e68524e5e01679d8e13103dce209250d
3
+ size 8582377760
output-00006-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ade61e7994208da9b551161fb9d09726a2dca18ef9aab1846f3c398ad1c6e7a
3
+ size 8549690360
output-00007-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d7c728209163e670f056547830f52f2e11c63db38c0e49085f637d113a221d5
3
+ size 8520705388
output-00008-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99f5a627b4fad8159026dc081735b9d371256d85a82d5c059a3efca4afa004ff
3
+ size 8572805704
output-00009-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fd4111ea9deac2576a45332f9d3f4172862f4b7fc6c627fad567ba47e9c9b1f
3
+ size 473836674
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "<unk>",
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
+ }