merge_method: passthrough dtype: bfloat16 slices: # Original first 10 layers (L0-L9) - sources: - layer_range: [0, 10] model: meta-llama/Llama-3.1-8B-Instruct # Skip Dupe A of L10 # Original L10 - sources: - layer_range: [10, 11] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L10 - sources: - layer_range: [10, 11] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L11 - sources: - layer_range: [11, 12] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L11 - sources: - layer_range: [11, 12] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L11 - sources: - layer_range: [11, 12] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L12 - sources: - layer_range: [12, 13] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L12 - sources: - layer_range: [12, 13] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L12 - sources: - layer_range: [12, 13] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L13 - sources: - layer_range: [13, 14] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L13 - sources: - layer_range: [13, 14] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L13 - sources: - layer_range: [13, 14] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L14 - sources: - layer_range: [14, 15] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L14 - sources: - layer_range: [14, 15] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L14 - sources: - layer_range: [14, 15] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L15 - sources: - layer_range: [15, 16] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L15 - sources: - layer_range: [15, 16] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L15 - sources: - layer_range: [15, 16] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L16 - sources: - layer_range: [16, 17] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L16 - sources: - layer_range: [16, 17] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L16 - sources: - layer_range: [16, 17] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L17 - sources: - layer_range: [17, 18] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L17 - sources: - layer_range: [17, 18] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L17 - sources: - layer_range: [17, 18] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L18 - sources: - layer_range: [18, 19] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L18 - sources: - layer_range: [18, 19] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L18 - sources: - layer_range: [18, 19] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L19 - sources: - layer_range: [19, 20] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L19 - sources: - layer_range: [19, 20] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L19 - sources: - layer_range: [19, 20] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L20 - sources: - layer_range: [20, 21] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L20 - sources: - layer_range: [20, 21] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L20 - sources: - layer_range: [20, 21] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Dupe A of L21 - sources: - layer_range: [21, 22] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original L21 - sources: - layer_range: [21, 22] model: meta-llama/Llama-3.1-8B-Instruct # Dupe B of L21 - sources: - layer_range: [21, 22] model: meta-llama/Llama-3.1-8B-Instruct parameters: scale: - filter: o_proj value: 0.0 - filter: down_proj value: 0.0 - value: 1.0 # Original last 10 layers (L22-L31) - sources: - layer_range: [22, 32] model: meta-llama/Llama-3.1-8B-Instruct