--- base_model: - meta-llama/Meta-Llama-3-70B-Instruct license: llama3 language: - en pipeline_tag: text-generation tags: - merge - frankenmerge - 96b --- # BigWeave v32 96b The BigWeave models aim to experimentally identify merge settings for increasing model performance. The version number merely tracks various attempts and is not a quality indicator. Only results demonstrating good performance are retained and shared. # Prompting Format llamav3 # Merge process This is a self-merge of meta-llama/Meta-Llama-3-70B-Instruct. Middle layers are duplicated and various matrices are scaled according to the template by jukofyork as shown here: https://github.com/arcee-ai/mergekit/issues/198#issuecomment-2079950009 Merge configuration: ``` const_tag: &MODEL meta-llama/Meta-Llama-3-70B-Instruct const_tag: &RESIDUAL_SCALE_FACTOR 0.5 const_tag: &QK_ATTENUATION_FACTOR 0.7071067812 const_tag: &OUT_FACTOR 0.9 scale-filter-env: &scale_filter_env parameters: scale: - filter: o_proj value: *RESIDUAL_SCALE_FACTOR - filter: down_proj value: *RESIDUAL_SCALE_FACTOR - filter: q_proj value: *QK_ATTENUATION_FACTOR - filter: k_proj value: *QK_ATTENUATION_FACTOR - filter: v_proj value: *OUT_FACTOR - filter: up_proj value: *OUT_FACTOR - value: 1.0 slices: - sources: - model: *MODEL layer_range: [0, 25] - sources: - model: *MODEL layer_range: [25, 26] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [25, 26] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [26, 27] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [26, 27] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [27, 28] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [27, 28] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [28, 29] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [28, 29] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [29, 30] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [29, 30] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [30, 31] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [30, 31] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [31, 32] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [31, 32] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [32, 33] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [32, 33] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [33, 34] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [33, 34] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [34, 35] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [34, 35] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [35, 36] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [35, 36] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [36, 37] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [36, 37] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [37, 38] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [37, 38] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [38, 39] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [38, 39] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [39, 40] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [39, 40] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [40, 41] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [40, 41] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [41, 42] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [41, 42] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [42, 43] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [42, 43] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [43, 44] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [43, 44] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [44, 45] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [44, 45] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [45, 46] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [45, 46] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [46, 47] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [46, 47] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [47, 48] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [47, 48] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [48, 49] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [48, 49] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [49, 50] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [49, 50] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [50, 51] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [50, 51] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [51, 52] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [51, 52] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [52, 53] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [52, 53] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [53, 54] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [53, 54] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [54, 55] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [54, 55] <<: *scale_filter_env - sources: - model: *MODEL layer_range: [55, 80] merge_method: passthrough dtype: float16 ```