Upload folder using huggingface_hub
Browse files
README.md
CHANGED
@@ -4,19 +4,19 @@ tags:
|
|
4 |
- merge
|
5 |
- mergekit
|
6 |
- lazymergekit
|
7 |
-
-
|
8 |
---
|
9 |
|
10 |
# Breeze-13B-32k-Base-v1_0
|
11 |
|
12 |
Breeze-13B-32k-Base-v1_0 is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):
|
13 |
-
* [
|
14 |
-
* [
|
15 |
-
* [
|
16 |
-
* [
|
17 |
-
* [
|
18 |
-
* [
|
19 |
-
* [
|
20 |
|
21 |
## 🧩 Configuration
|
22 |
|
@@ -26,53 +26,53 @@ merge_method: linear
|
|
26 |
slices:
|
27 |
- sources:
|
28 |
- layer_range: [0, 8]
|
29 |
-
model:
|
30 |
- layer_range: [0, 8]
|
31 |
-
model: meta-llama/Meta-Llama-3-8B
|
32 |
parameters:
|
33 |
weight: 0
|
34 |
- sources:
|
35 |
- layer_range: [4, 12]
|
36 |
-
model:
|
37 |
- layer_range: [4, 12]
|
38 |
-
model: meta-llama/Meta-Llama-3-8B
|
39 |
parameters:
|
40 |
weight: 0
|
41 |
- sources:
|
42 |
- layer_range: [8, 16]
|
43 |
-
model:
|
44 |
- layer_range: [8, 16]
|
45 |
-
model: meta-llama/Meta-Llama-3-8B
|
46 |
parameters:
|
47 |
weight: 0
|
48 |
- sources:
|
49 |
- layer_range: [12, 20]
|
50 |
-
model:
|
51 |
- layer_range: [12, 20]
|
52 |
-
model: meta-llama/Meta-Llama-3-8B
|
53 |
parameters:
|
54 |
weight: 0
|
55 |
- sources:
|
56 |
- layer_range: [16, 24]
|
57 |
-
model:
|
58 |
- layer_range: [16, 24]
|
59 |
-
model: meta-llama/Meta-Llama-3-8B
|
60 |
parameters:
|
61 |
weight: 0
|
62 |
- sources:
|
63 |
- layer_range: [20, 28]
|
64 |
-
model:
|
65 |
- layer_range: [20, 28]
|
66 |
-
model: meta-llama/Meta-Llama-3-8B
|
67 |
parameters:
|
68 |
weight: 0
|
69 |
- sources:
|
70 |
- layer_range: [24, 32]
|
71 |
-
model:
|
72 |
- layer_range: [24, 32]
|
73 |
-
model: meta-llama/Meta-Llama-3-8B
|
74 |
parameters:
|
75 |
weight: 0
|
76 |
-
tokenizer_source: union
|
77 |
|
78 |
```
|
|
|
4 |
- merge
|
5 |
- mergekit
|
6 |
- lazymergekit
|
7 |
+
- MediaTek-Research/Breeze-7B-32k-Base-v1_0
|
8 |
---
|
9 |
|
10 |
# Breeze-13B-32k-Base-v1_0
|
11 |
|
12 |
Breeze-13B-32k-Base-v1_0 is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):
|
13 |
+
* [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
|
14 |
+
* [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
|
15 |
+
* [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
|
16 |
+
* [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
|
17 |
+
* [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
|
18 |
+
* [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
|
19 |
+
* [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
|
20 |
|
21 |
## 🧩 Configuration
|
22 |
|
|
|
26 |
slices:
|
27 |
- sources:
|
28 |
- layer_range: [0, 8]
|
29 |
+
model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
|
30 |
- layer_range: [0, 8]
|
31 |
+
model: meta-llama/Meta-Llama-3-8B
|
32 |
parameters:
|
33 |
weight: 0
|
34 |
- sources:
|
35 |
- layer_range: [4, 12]
|
36 |
+
model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
|
37 |
- layer_range: [4, 12]
|
38 |
+
model: meta-llama/Meta-Llama-3-8B
|
39 |
parameters:
|
40 |
weight: 0
|
41 |
- sources:
|
42 |
- layer_range: [8, 16]
|
43 |
+
model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
|
44 |
- layer_range: [8, 16]
|
45 |
+
model: meta-llama/Meta-Llama-3-8B
|
46 |
parameters:
|
47 |
weight: 0
|
48 |
- sources:
|
49 |
- layer_range: [12, 20]
|
50 |
+
model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
|
51 |
- layer_range: [12, 20]
|
52 |
+
model: meta-llama/Meta-Llama-3-8B
|
53 |
parameters:
|
54 |
weight: 0
|
55 |
- sources:
|
56 |
- layer_range: [16, 24]
|
57 |
+
model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
|
58 |
- layer_range: [16, 24]
|
59 |
+
model: meta-llama/Meta-Llama-3-8B
|
60 |
parameters:
|
61 |
weight: 0
|
62 |
- sources:
|
63 |
- layer_range: [20, 28]
|
64 |
+
model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
|
65 |
- layer_range: [20, 28]
|
66 |
+
model: meta-llama/Meta-Llama-3-8B
|
67 |
parameters:
|
68 |
weight: 0
|
69 |
- sources:
|
70 |
- layer_range: [24, 32]
|
71 |
+
model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
|
72 |
- layer_range: [24, 32]
|
73 |
+
model: meta-llama/Meta-Llama-3-8B
|
74 |
parameters:
|
75 |
weight: 0
|
76 |
+
tokenizer_source: union
|
77 |
|
78 |
```
|