MrRobotoAI commited on
Commit
adff2e4
1 Parent(s): cebc97c

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  base_model:
3
- - MrRobotoAI/MrRoboto-ProLong-8b-v2g
4
- - MrRobotoAI/MrRoboto-ProLong-8b-v1n
5
- - MrRobotoAI/MrRoboto-ProLong-8b-v2e
6
  library_name: transformers
7
  tags:
8
  - mergekit
@@ -16,50 +16,50 @@ This is a merge of pre-trained language models created using [mergekit](https://
16
  ## Merge Details
17
  ### Merge Method
18
 
19
- This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using [MrRobotoAI/MrRoboto-ProLong-8b-v1n](https://huggingface.co/MrRobotoAI/MrRoboto-ProLong-8b-v1n) as a base.
20
 
21
  ### Models Merged
22
 
23
  The following models were included in the merge:
24
- * [MrRobotoAI/MrRoboto-ProLong-8b-v2g](https://huggingface.co/MrRobotoAI/MrRoboto-ProLong-8b-v2g)
25
- * [MrRobotoAI/MrRoboto-ProLong-8b-v2e](https://huggingface.co/MrRobotoAI/MrRoboto-ProLong-8b-v2e)
26
 
27
  ### Configuration
28
 
29
  The following YAML configuration was used to produce this model:
30
 
31
  ```yaml
32
- merge_method: ties
33
  models:
34
- - model: MrRobotoAI/MrRoboto-ProLong-8b-v2e
35
  parameters:
36
  weight:
37
  - filter: v_proj
38
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
39
  - filter: o_proj
40
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
41
  - filter: up_proj
42
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
43
  - filter: gate_proj
44
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
45
  - filter: down_proj
46
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
47
  - value: 1
48
- - model: MrRobotoAI/MrRoboto-ProLong-8b-v2g
49
  parameters:
50
  weight:
51
  - filter: v_proj
52
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
53
  - filter: o_proj
54
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
55
  - filter: up_proj
56
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
57
  - filter: gate_proj
58
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
59
  - filter: down_proj
60
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
61
  - value: 0
62
- base_model: MrRobotoAI/MrRoboto-ProLong-8b-v1n
63
  tokenizer_source: base
64
  dtype: bfloat16
65
  ```
 
1
  ---
2
  base_model:
3
+ - MrRobotoAI/MrRoboto-ProLong-8b-v2k
4
+ - MrRobotoAI/Thor-v1.4-8b-DARK-FICTION
5
+ - MrRobotoAI/MrRoboto-ProLong-8b-v2h
6
  library_name: transformers
7
  tags:
8
  - mergekit
 
16
  ## Merge Details
17
  ### Merge Method
18
 
19
+ This model was merged using the linear [DARE](https://arxiv.org/abs/2311.03099) merge method using [MrRobotoAI/MrRoboto-ProLong-8b-v2k](https://huggingface.co/MrRobotoAI/MrRoboto-ProLong-8b-v2k) as a base.
20
 
21
  ### Models Merged
22
 
23
  The following models were included in the merge:
24
+ * [MrRobotoAI/Thor-v1.4-8b-DARK-FICTION](https://huggingface.co/MrRobotoAI/Thor-v1.4-8b-DARK-FICTION)
25
+ * [MrRobotoAI/MrRoboto-ProLong-8b-v2h](https://huggingface.co/MrRobotoAI/MrRoboto-ProLong-8b-v2h)
26
 
27
  ### Configuration
28
 
29
  The following YAML configuration was used to produce this model:
30
 
31
  ```yaml
32
+ merge_method: dare_linear
33
  models:
34
+ - model: MrRobotoAI/Thor-v1.4-8b-DARK-FICTION
35
  parameters:
36
  weight:
37
  - filter: v_proj
38
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
39
  - filter: o_proj
40
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
41
  - filter: up_proj
42
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
43
  - filter: gate_proj
44
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
45
  - filter: down_proj
46
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
47
  - value: 1
48
+ - model: MrRobotoAI/MrRoboto-ProLong-8b-v2h
49
  parameters:
50
  weight:
51
  - filter: v_proj
52
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
53
  - filter: o_proj
54
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
55
  - filter: up_proj
56
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
57
  - filter: gate_proj
58
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
59
  - filter: down_proj
60
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
61
  - value: 0
62
+ base_model: MrRobotoAI/MrRoboto-ProLong-8b-v2k
63
  tokenizer_source: base
64
  dtype: bfloat16
65
  ```
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "MrRobotoAI/MrRoboto-ProLong-8b-v1n",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "MrRobotoAI/MrRoboto-ProLong-8b-v2k",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
mergekit_config.yml CHANGED
@@ -1,33 +1,33 @@
1
- merge_method: ties
2
  models:
3
- - model: MrRobotoAI/MrRoboto-ProLong-8b-v2e
4
  parameters:
5
  weight:
6
  - filter: v_proj
7
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
8
  - filter: o_proj
9
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
10
  - filter: up_proj
11
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
12
  - filter: gate_proj
13
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
14
  - filter: down_proj
15
- value: [0.15, 0.15, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.15, 0.15]
16
  - value: 1
17
- - model: MrRobotoAI/MrRoboto-ProLong-8b-v2g
18
  parameters:
19
  weight:
20
  - filter: v_proj
21
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
22
  - filter: o_proj
23
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
24
  - filter: up_proj
25
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
26
  - filter: gate_proj
27
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
28
  - filter: down_proj
29
- value: [0.85, 0.85, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.85, 0.85]
30
  - value: 0
31
- base_model: MrRobotoAI/MrRoboto-ProLong-8b-v1n
32
  tokenizer_source: base
33
  dtype: bfloat16
 
1
+ merge_method: dare_linear
2
  models:
3
+ - model: MrRobotoAI/Thor-v1.4-8b-DARK-FICTION
4
  parameters:
5
  weight:
6
  - filter: v_proj
7
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
8
  - filter: o_proj
9
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
10
  - filter: up_proj
11
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
12
  - filter: gate_proj
13
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
14
  - filter: down_proj
15
+ value: [0.25, 0.25, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25]
16
  - value: 1
17
+ - model: MrRobotoAI/MrRoboto-ProLong-8b-v2h
18
  parameters:
19
  weight:
20
  - filter: v_proj
21
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
22
  - filter: o_proj
23
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
24
  - filter: up_proj
25
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
26
  - filter: gate_proj
27
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
28
  - filter: down_proj
29
+ value: [0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.75]
30
  - value: 0
31
+ base_model: MrRobotoAI/MrRoboto-ProLong-8b-v2k
32
  tokenizer_source: base
33
  dtype: bfloat16
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:809be62669fd137bee689f27d4bd76263c6b9b108071b049dd91f19adcd9573d
3
  size 4953586384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01369721bb73c919970a271c9b300f6a9dc80151a79d357bb809170234e769c5
3
  size 4953586384
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68fbd16248bb797b6116978c6c1932d3ca70fd970d0553060961f4924e5ee7a8
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee61ec15b9cc941be0896e4c392f98186da9d4e124af310f9fd97678e027c55c
3
  size 4999819336
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cea27cdf0b846a1e00e0ce476b43aab211d216232d949251b9f905f4d1fa352a
3
  size 4915916144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f2a5192d07a8be66835093a322f66d04fc9a6a0288acfc98cc77de880c35888
3
  size 4915916144
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64b2772fd10f7173971faf5a5450d57fb6970b75d4b66ca187d67cd691888087
3
  size 1191234472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a8ff4c645398e0f65ede3055ad7e0c5263eed63bf5b416fac7095a8e56fddc8
3
  size 1191234472