--- base_model: - MathGenie/MathCoder2-Llama-3-8B - meta-llama/Llama-3.1-8B library_name: transformers tags: - mergekit - merge --- # mathcoder_8b_0.3_0.8_generated_ties_0.5_sparsity_15_samples-merged This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). ## Merge Details ### Merge Method This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) as a base. ### Models Merged The following models were included in the merge: * [MathGenie/MathCoder2-Llama-3-8B](https://huggingface.co/MathGenie/MathCoder2-Llama-3-8B) ### Configuration The following YAML configuration was used to produce this model: ```yaml base_model: meta-llama/Llama-3.1-8B dtype: bfloat16 merge_method: ties parameters: int8_mask: 1.0 normalize: 1.0 slices: - sources: - layer_range: [0, 32] model: meta-llama/Llama-3.1-8B - layer_range: [0, 32] model: MathGenie/MathCoder2-Llama-3-8B parameters: density: - value: 1.0 weight: - filter: model.layers.0.self_attn.q_proj value: 0.34567118839365213 - filter: model.layers.1.self_attn.q_proj value: 0.3194050175456974 - filter: model.layers.2.self_attn.q_proj value: 0.35258471691195725 - filter: model.layers.3.self_attn.q_proj value: 0.3345938301995496 - filter: model.layers.4.self_attn.q_proj value: 0.30193788299376734 - filter: model.layers.5.self_attn.q_proj value: 0.3173885717278584 - filter: model.layers.6.self_attn.q_proj value: 0.3099251034410517 - filter: model.layers.7.self_attn.q_proj value: 0.32521866652699943 - filter: model.layers.8.self_attn.q_proj value: 0.3688734091028125 - filter: model.layers.9.self_attn.q_proj value: 0.3703137275441261 - filter: model.layers.10.self_attn.q_proj value: 0.31995495731419893 - filter: model.layers.11.self_attn.q_proj value: 0.3247211019745456 - filter: model.layers.12.self_attn.q_proj value: 0.31401037029277745 - filter: model.layers.13.self_attn.q_proj value: 0.3123343633792489 - filter: model.layers.14.self_attn.q_proj value: 0.30020950086419107 - filter: model.layers.15.self_attn.q_proj value: 0.3220499659561096 - filter: model.layers.16.self_attn.q_proj value: 0.3007856282407165 - filter: model.layers.17.self_attn.q_proj value: 0.3021473838579584 - filter: model.layers.18.self_attn.q_proj value: 0.3078562824071649 - filter: model.layers.19.self_attn.q_proj value: 0.3117844236107474 - filter: model.layers.20.self_attn.q_proj value: 0.3057088985492065 - filter: model.layers.21.self_attn.q_proj value: 0.3555962918347038 - filter: model.layers.22.self_attn.q_proj value: 0.3584245535012832 - filter: model.layers.23.self_attn.q_proj value: 0.4073430052898968 - filter: model.layers.24.self_attn.q_proj value: 0.451652438066307 - filter: model.layers.25.self_attn.q_proj value: 0.47265489970146124 - filter: model.layers.26.self_attn.q_proj value: 0.5172262085581103 - filter: model.layers.27.self_attn.q_proj value: 0.5207091604252867 - filter: model.layers.28.self_attn.q_proj value: 0.5678730424763002 - filter: model.layers.29.self_attn.q_proj value: 0.5689467344052794 - filter: model.layers.30.self_attn.q_proj value: 0.7031844131357041 - filter: model.layers.31.self_attn.q_proj value: 0.5159692033729639 - filter: model.layers.0.self_attn.k_proj value: 0.32548054260723824 - filter: model.layers.1.self_attn.k_proj value: 0.32346409678939925 - filter: model.layers.2.self_attn.k_proj value: 0.3201120829623422 - filter: model.layers.3.self_attn.k_proj value: 0.30683496569423346 - filter: model.layers.4.self_attn.k_proj value: 0.31762426020007334 - filter: model.layers.5.self_attn.k_proj value: 0.30327345100298536 - filter: model.layers.6.self_attn.k_proj value: 0.3263185460640025 - filter: model.layers.7.self_attn.k_proj value: 0.3229665322369455 - filter: model.layers.8.self_attn.k_proj value: 0.3647619546430629 - filter: model.layers.9.self_attn.k_proj value: 0.3679830304300005 - filter: model.layers.10.self_attn.k_proj value: 0.34794951029172994 - filter: model.layers.11.self_attn.k_proj value: 0.3474519457392762 - filter: model.layers.12.self_attn.k_proj value: 0.3480018855077777 - filter: model.layers.13.self_attn.k_proj value: 0.38031739380924945 - filter: model.layers.14.self_attn.k_proj value: 0.35365840884093647 - filter: model.layers.15.self_attn.k_proj value: 0.35551772901063217 - filter: model.layers.16.self_attn.k_proj value: 0.3715707327292725 - filter: model.layers.17.self_attn.k_proj value: 0.3642382024825852 - filter: model.layers.18.self_attn.k_proj value: 0.3526109045199811 - filter: model.layers.19.self_attn.k_proj value: 0.3439428062640758 - filter: model.layers.20.self_attn.k_proj value: 0.3556224794427277 - filter: model.layers.21.self_attn.k_proj value: 0.390713874194731 - filter: model.layers.22.self_attn.k_proj value: 0.38280521657151834 - filter: model.layers.23.self_attn.k_proj value: 0.3970774629445346 - filter: model.layers.24.self_attn.k_proj value: 0.4266170847954748 - filter: model.layers.25.self_attn.k_proj value: 0.4180275493636411 - filter: model.layers.26.self_attn.k_proj value: 0.4680196930812339 - filter: model.layers.27.self_attn.k_proj value: 0.4603205363222123 - filter: model.layers.28.self_attn.k_proj value: 0.49577855758655004 - filter: model.layers.29.self_attn.k_proj value: 0.49716650081181585 - filter: model.layers.30.self_attn.k_proj value: 0.610715969203373 - filter: model.layers.31.self_attn.k_proj value: 0.46008484784999737 - filter: model.layers.0.self_attn.v_proj value: 0.3039805164196302 - filter: model.layers.1.self_attn.v_proj value: 0.31008222908919497 - filter: model.layers.2.self_attn.v_proj value: 0.3261614204158592 - filter: model.layers.3.self_attn.v_proj value: 0.34339286649557427 - filter: model.layers.4.self_attn.v_proj value: 0.33574608495260044 - filter: model.layers.5.self_attn.v_proj value: 0.3314251296286597 - filter: model.layers.6.self_attn.v_proj value: 0.3357722725606243 - filter: model.layers.7.self_attn.v_proj value: 0.3345938301995496 - filter: model.layers.8.self_attn.v_proj value: 0.33404389043104804 - filter: model.layers.9.self_attn.v_proj value: 0.36114806473576705 - filter: model.layers.10.self_attn.v_proj value: 0.3461163777300581 - filter: model.layers.11.self_attn.v_proj value: 0.34881370135651807 - filter: model.layers.12.self_attn.v_proj value: 0.35465353794584403 - filter: model.layers.13.self_attn.v_proj value: 0.3599958099827162 - filter: model.layers.14.self_attn.v_proj value: 0.36486670507515845 - filter: model.layers.15.self_attn.v_proj value: 0.3886450531608443 - filter: model.layers.16.self_attn.v_proj value: 0.3802650185932017 - filter: model.layers.17.self_attn.v_proj value: 0.3948253286544807 - filter: model.layers.18.self_attn.v_proj value: 0.4028911119258367 - filter: model.layers.19.self_attn.v_proj value: 0.41336615513538993 - filter: model.layers.20.self_attn.v_proj value: 0.4098832032682135 - filter: model.layers.21.self_attn.v_proj value: 0.39906772115434974 - filter: model.layers.22.self_attn.v_proj value: 0.4269051484837375 - filter: model.layers.23.self_attn.v_proj value: 0.4284502173571466 - filter: model.layers.24.self_attn.v_proj value: 0.43452574241868747 - filter: model.layers.25.self_attn.v_proj value: 0.4286073430052899 - filter: model.layers.26.self_attn.v_proj value: 0.421196249934531 - filter: model.layers.27.self_attn.v_proj value: 0.43709212800502806 - filter: model.layers.28.self_attn.v_proj value: 0.4468339181899125 - filter: model.layers.29.self_attn.v_proj value: 0.4439009060912376 - filter: model.layers.30.self_attn.v_proj value: 0.4961451840988844 - filter: model.layers.31.self_attn.v_proj value: 0.36374063793013145 - filter: model.layers.0.self_attn.o_proj value: 0.301335568009218 - filter: model.layers.1.self_attn.o_proj value: 0.30623265070968414 - filter: model.layers.2.self_attn.o_proj value: 0.3042685801078929 - filter: model.layers.3.self_attn.o_proj value: 0.3 - filter: model.layers.4.self_attn.o_proj value: 0.32671136018436076 - filter: model.layers.5.self_attn.o_proj value: 0.3351961451840989 - filter: model.layers.6.self_attn.o_proj value: 0.30518514638872885 - filter: model.layers.7.self_attn.o_proj value: 0.3552820405384172 - filter: model.layers.8.self_attn.o_proj value: 0.358241240245116 - filter: model.layers.9.self_attn.o_proj value: 0.35758655004451895 - filter: model.layers.10.self_attn.o_proj value: 0.3841669721887603 - filter: model.layers.11.self_attn.o_proj value: 0.3434714293196459 - filter: model.layers.12.self_attn.o_proj value: 0.38160058660241974 - filter: model.layers.13.self_attn.o_proj value: 0.3708374797046038 - filter: model.layers.14.self_attn.o_proj value: 0.4145707851044885 - filter: model.layers.15.self_attn.o_proj value: 0.39864871942596763 - filter: model.layers.16.self_attn.o_proj value: 0.41519928769706177 - filter: model.layers.17.self_attn.o_proj value: 0.31864557691300477 - filter: model.layers.18.self_attn.o_proj value: 0.3101084166972189 - filter: model.layers.19.self_attn.o_proj value: 0.3196145184098884 - filter: model.layers.20.self_attn.o_proj value: 0.4667365002880637 - filter: model.layers.21.self_attn.o_proj value: 0.3742418687477086 - filter: model.layers.22.self_attn.o_proj value: 0.3403812915728277 - filter: model.layers.23.self_attn.o_proj value: 0.3232284083171843 - filter: model.layers.24.self_attn.o_proj value: 0.3742418687477086 - filter: model.layers.25.self_attn.o_proj value: 0.6244644634159116 - filter: model.layers.26.self_attn.o_proj value: 0.5438066307023517 - filter: model.layers.27.self_attn.o_proj value: 0.542366312261038 - filter: model.layers.28.self_attn.o_proj value: 0.4544021369088147 - filter: model.layers.29.self_attn.o_proj value: 0.4355732467396428 - filter: model.layers.30.self_attn.o_proj value: 0.4489027392237993 - filter: model.layers.31.self_attn.o_proj value: 0.3717016707693919 - filter: model.layers.0.mlp.gate_proj value: 0.321735714659823 - filter: model.layers.1.mlp.gate_proj value: 0.32713036191274286 - filter: model.layers.2.mlp.gate_proj value: 0.3402765411407322 - filter: model.layers.3.mlp.gate_proj value: 0.34344524171162205 - filter: model.layers.4.mlp.gate_proj value: 0.34572356360969986 - filter: model.layers.5.mlp.gate_proj value: 0.34881370135651807 - filter: model.layers.6.mlp.gate_proj value: 0.3491017650447808 - filter: model.layers.7.mlp.gate_proj value: 0.34637825381029697 - filter: model.layers.8.mlp.gate_proj value: 0.351301524118787 - filter: model.layers.9.mlp.gate_proj value: 0.35083014717435707 - filter: model.layers.10.mlp.gate_proj value: 0.35062064631016604 - filter: model.layers.11.mlp.gate_proj value: 0.35083014717435707 - filter: model.layers.12.mlp.gate_proj value: 0.35724611114020843 - filter: model.layers.13.mlp.gate_proj value: 0.36078143822343267 - filter: model.layers.14.mlp.gate_proj value: 0.3717278583774158 - filter: model.layers.15.mlp.gate_proj value: 0.3819410255067302 - filter: model.layers.16.mlp.gate_proj value: 0.3969203372963913 - filter: model.layers.17.mlp.gate_proj value: 0.4064526266170848 - filter: model.layers.18.mlp.gate_proj value: 0.4028387367097889 - filter: model.layers.19.mlp.gate_proj value: 0.40865238569109097 - filter: model.layers.20.mlp.gate_proj value: 0.4113758969255748 - filter: model.layers.21.mlp.gate_proj value: 0.4152778505211334 - filter: model.layers.22.mlp.gate_proj value: 0.41237102603048237 - filter: model.layers.23.mlp.gate_proj value: 0.4021316712931441 - filter: model.layers.24.mlp.gate_proj value: 0.3956895197192688 - filter: model.layers.25.mlp.gate_proj value: 0.407290630073849 - filter: model.layers.26.mlp.gate_proj value: 0.41842036348399936 - filter: model.layers.27.mlp.gate_proj value: 0.43950138794322524 - filter: model.layers.28.mlp.gate_proj value: 0.4232912585764416 - filter: model.layers.29.mlp.gate_proj value: 0.43353061331377996 - filter: model.layers.30.mlp.gate_proj value: 0.4429843398104017 - filter: model.layers.31.mlp.gate_proj value: 0.39241606871628343 - filter: model.layers.0.mlp.up_proj value: 0.31846226365683755 - filter: model.layers.1.mlp.up_proj value: 0.3282826166657937 - filter: model.layers.2.mlp.up_proj value: 0.34148117110983084 - filter: model.layers.3.mlp.up_proj value: 0.34344524171162205 - filter: model.layers.4.mlp.up_proj value: 0.3439951814801236 - filter: model.layers.5.mlp.up_proj value: 0.3482899491960404 - filter: model.layers.6.mlp.up_proj value: 0.34933745351699574 - filter: model.layers.7.mlp.up_proj value: 0.34700675640287015 - filter: model.layers.8.mlp.up_proj value: 0.35025401979783166 - filter: model.layers.9.mlp.up_proj value: 0.34996595610956893 - filter: model.layers.10.mlp.up_proj value: 0.3525585293039334 - filter: model.layers.11.mlp.up_proj value: 0.3536060336248887 - filter: model.layers.12.mlp.up_proj value: 0.358686429581522 - filter: model.layers.13.mlp.up_proj value: 0.3634525742418687 - filter: model.layers.14.mlp.up_proj value: 0.3718849840255591 - filter: model.layers.15.mlp.up_proj value: 0.38311946786780493 - filter: model.layers.16.mlp.up_proj value: 0.3967108364322003 - filter: model.layers.17.mlp.up_proj value: 0.4041481171109831 - filter: model.layers.18.mlp.up_proj value: 0.4013460430524276 - filter: model.layers.19.mlp.up_proj value: 0.40493374535169957 - filter: model.layers.20.mlp.up_proj value: 0.408861886555282 - filter: model.layers.21.mlp.up_proj value: 0.41176871104593304 - filter: model.layers.22.mlp.up_proj value: 0.4101974545645001 - filter: model.layers.23.mlp.up_proj value: 0.4023411721573351 - filter: model.layers.24.mlp.up_proj value: 0.39652752317603307 - filter: model.layers.25.mlp.up_proj value: 0.40742156811396846 - filter: model.layers.26.mlp.up_proj value: 0.4191274289006442 - filter: model.layers.27.mlp.up_proj value: 0.4394490127271775 - filter: model.layers.28.mlp.up_proj value: 0.42931440842193475 - filter: model.layers.29.mlp.up_proj value: 0.44222489917770913 - filter: model.layers.30.mlp.up_proj value: 0.4493741161682292 - filter: model.layers.31.mlp.up_proj value: 0.3794532027444613 - filter: model.layers.0.mlp.down_proj value: 0.3215785890116797 - filter: model.layers.1.mlp.down_proj value: 0.34407374430419524 - filter: model.layers.2.mlp.down_proj value: 0.3128319279317027 - filter: model.layers.3.mlp.down_proj value: 0.32330697114125595 - filter: model.layers.4.mlp.down_proj value: 0.3263185460640025 - filter: model.layers.5.mlp.down_proj value: 0.3317393809249463 - filter: model.layers.6.mlp.down_proj value: 0.3353008956161944 - filter: model.layers.7.mlp.down_proj value: 0.3378934688105588 - filter: model.layers.8.mlp.down_proj value: 0.3421358613104279 - filter: model.layers.9.mlp.down_proj value: 0.3419263604462368 - filter: model.layers.10.mlp.down_proj value: 0.3389933483475619 - filter: model.layers.11.mlp.down_proj value: 0.33818153249882155 - filter: model.layers.12.mlp.down_proj value: 0.3378934688105588 - filter: model.layers.13.mlp.down_proj value: 0.35009689414968836 - filter: model.layers.14.mlp.down_proj value: 0.3351437699680511 - filter: model.layers.15.mlp.down_proj value: 0.3680354056460483 - filter: model.layers.16.mlp.down_proj value: 0.3725920494422039 - filter: model.layers.17.mlp.down_proj value: 0.36947572408736185 - filter: model.layers.18.mlp.down_proj value: 0.3621693814486985 - filter: model.layers.19.mlp.down_proj value: 0.3649452678992301 - filter: model.layers.20.mlp.down_proj value: 0.36468339181899123 - filter: model.layers.21.mlp.down_proj value: 0.3756036243649505 - filter: model.layers.22.mlp.down_proj value: 0.35449641229770074 - filter: model.layers.23.mlp.down_proj value: 0.34637825381029697 - filter: model.layers.24.mlp.down_proj value: 0.3522442780076468 - filter: model.layers.25.mlp.down_proj value: 0.3727491750903472 - filter: model.layers.26.mlp.down_proj value: 0.4206986853820772 - filter: model.layers.27.mlp.down_proj value: 0.4077620070182789 - filter: model.layers.28.mlp.down_proj value: 0.42363169748075213 - filter: model.layers.29.mlp.down_proj value: 0.534091028125491 - filter: model.layers.30.mlp.down_proj value: 0.6001361755617243 - filter: model.layers.31.mlp.down_proj value: 0.8 - value: 1.0 tokenizer_source: union ```