vidhirambhia's picture
Upload folder using huggingface_hub
93c6978 verified
metadata
base_model:
  - MathGenie/MathCoder2-Llama-3-8B
  - meta-llama/Llama-3.1-8B
library_name: transformers
tags:
  - mergekit
  - merge

mathcoder_8b_0.3_0.8_generated_ties_0.5_sparsity_15_samples-merged

This is a merge of pre-trained language models created using mergekit.

Merge Details

Merge Method

This model was merged using the TIES merge method using meta-llama/Llama-3.1-8B as a base.

Models Merged

The following models were included in the merge:

Configuration

The following YAML configuration was used to produce this model:

base_model: meta-llama/Llama-3.1-8B
dtype: bfloat16
merge_method: ties
parameters:
  int8_mask: 1.0
  normalize: 1.0
slices:
- sources:
  - layer_range: [0, 32]
    model: meta-llama/Llama-3.1-8B
  - layer_range: [0, 32]
    model: MathGenie/MathCoder2-Llama-3-8B
    parameters:
      density:
      - value: 1.0
      weight:
      - filter: model.layers.0.self_attn.q_proj
        value: 0.34567118839365213
      - filter: model.layers.1.self_attn.q_proj
        value: 0.3194050175456974
      - filter: model.layers.2.self_attn.q_proj
        value: 0.35258471691195725
      - filter: model.layers.3.self_attn.q_proj
        value: 0.3345938301995496
      - filter: model.layers.4.self_attn.q_proj
        value: 0.30193788299376734
      - filter: model.layers.5.self_attn.q_proj
        value: 0.3173885717278584
      - filter: model.layers.6.self_attn.q_proj
        value: 0.3099251034410517
      - filter: model.layers.7.self_attn.q_proj
        value: 0.32521866652699943
      - filter: model.layers.8.self_attn.q_proj
        value: 0.3688734091028125
      - filter: model.layers.9.self_attn.q_proj
        value: 0.3703137275441261
      - filter: model.layers.10.self_attn.q_proj
        value: 0.31995495731419893
      - filter: model.layers.11.self_attn.q_proj
        value: 0.3247211019745456
      - filter: model.layers.12.self_attn.q_proj
        value: 0.31401037029277745
      - filter: model.layers.13.self_attn.q_proj
        value: 0.3123343633792489
      - filter: model.layers.14.self_attn.q_proj
        value: 0.30020950086419107
      - filter: model.layers.15.self_attn.q_proj
        value: 0.3220499659561096
      - filter: model.layers.16.self_attn.q_proj
        value: 0.3007856282407165
      - filter: model.layers.17.self_attn.q_proj
        value: 0.3021473838579584
      - filter: model.layers.18.self_attn.q_proj
        value: 0.3078562824071649
      - filter: model.layers.19.self_attn.q_proj
        value: 0.3117844236107474
      - filter: model.layers.20.self_attn.q_proj
        value: 0.3057088985492065
      - filter: model.layers.21.self_attn.q_proj
        value: 0.3555962918347038
      - filter: model.layers.22.self_attn.q_proj
        value: 0.3584245535012832
      - filter: model.layers.23.self_attn.q_proj
        value: 0.4073430052898968
      - filter: model.layers.24.self_attn.q_proj
        value: 0.451652438066307
      - filter: model.layers.25.self_attn.q_proj
        value: 0.47265489970146124
      - filter: model.layers.26.self_attn.q_proj
        value: 0.5172262085581103
      - filter: model.layers.27.self_attn.q_proj
        value: 0.5207091604252867
      - filter: model.layers.28.self_attn.q_proj
        value: 0.5678730424763002
      - filter: model.layers.29.self_attn.q_proj
        value: 0.5689467344052794
      - filter: model.layers.30.self_attn.q_proj
        value: 0.7031844131357041
      - filter: model.layers.31.self_attn.q_proj
        value: 0.5159692033729639
      - filter: model.layers.0.self_attn.k_proj
        value: 0.32548054260723824
      - filter: model.layers.1.self_attn.k_proj
        value: 0.32346409678939925
      - filter: model.layers.2.self_attn.k_proj
        value: 0.3201120829623422
      - filter: model.layers.3.self_attn.k_proj
        value: 0.30683496569423346
      - filter: model.layers.4.self_attn.k_proj
        value: 0.31762426020007334
      - filter: model.layers.5.self_attn.k_proj
        value: 0.30327345100298536
      - filter: model.layers.6.self_attn.k_proj
        value: 0.3263185460640025
      - filter: model.layers.7.self_attn.k_proj
        value: 0.3229665322369455
      - filter: model.layers.8.self_attn.k_proj
        value: 0.3647619546430629
      - filter: model.layers.9.self_attn.k_proj
        value: 0.3679830304300005
      - filter: model.layers.10.self_attn.k_proj
        value: 0.34794951029172994
      - filter: model.layers.11.self_attn.k_proj
        value: 0.3474519457392762
      - filter: model.layers.12.self_attn.k_proj
        value: 0.3480018855077777
      - filter: model.layers.13.self_attn.k_proj
        value: 0.38031739380924945
      - filter: model.layers.14.self_attn.k_proj
        value: 0.35365840884093647
      - filter: model.layers.15.self_attn.k_proj
        value: 0.35551772901063217
      - filter: model.layers.16.self_attn.k_proj
        value: 0.3715707327292725
      - filter: model.layers.17.self_attn.k_proj
        value: 0.3642382024825852
      - filter: model.layers.18.self_attn.k_proj
        value: 0.3526109045199811
      - filter: model.layers.19.self_attn.k_proj
        value: 0.3439428062640758
      - filter: model.layers.20.self_attn.k_proj
        value: 0.3556224794427277
      - filter: model.layers.21.self_attn.k_proj
        value: 0.390713874194731
      - filter: model.layers.22.self_attn.k_proj
        value: 0.38280521657151834
      - filter: model.layers.23.self_attn.k_proj
        value: 0.3970774629445346
      - filter: model.layers.24.self_attn.k_proj
        value: 0.4266170847954748
      - filter: model.layers.25.self_attn.k_proj
        value: 0.4180275493636411
      - filter: model.layers.26.self_attn.k_proj
        value: 0.4680196930812339
      - filter: model.layers.27.self_attn.k_proj
        value: 0.4603205363222123
      - filter: model.layers.28.self_attn.k_proj
        value: 0.49577855758655004
      - filter: model.layers.29.self_attn.k_proj
        value: 0.49716650081181585
      - filter: model.layers.30.self_attn.k_proj
        value: 0.610715969203373
      - filter: model.layers.31.self_attn.k_proj
        value: 0.46008484784999737
      - filter: model.layers.0.self_attn.v_proj
        value: 0.3039805164196302
      - filter: model.layers.1.self_attn.v_proj
        value: 0.31008222908919497
      - filter: model.layers.2.self_attn.v_proj
        value: 0.3261614204158592
      - filter: model.layers.3.self_attn.v_proj
        value: 0.34339286649557427
      - filter: model.layers.4.self_attn.v_proj
        value: 0.33574608495260044
      - filter: model.layers.5.self_attn.v_proj
        value: 0.3314251296286597
      - filter: model.layers.6.self_attn.v_proj
        value: 0.3357722725606243
      - filter: model.layers.7.self_attn.v_proj
        value: 0.3345938301995496
      - filter: model.layers.8.self_attn.v_proj
        value: 0.33404389043104804
      - filter: model.layers.9.self_attn.v_proj
        value: 0.36114806473576705
      - filter: model.layers.10.self_attn.v_proj
        value: 0.3461163777300581
      - filter: model.layers.11.self_attn.v_proj
        value: 0.34881370135651807
      - filter: model.layers.12.self_attn.v_proj
        value: 0.35465353794584403
      - filter: model.layers.13.self_attn.v_proj
        value: 0.3599958099827162
      - filter: model.layers.14.self_attn.v_proj
        value: 0.36486670507515845
      - filter: model.layers.15.self_attn.v_proj
        value: 0.3886450531608443
      - filter: model.layers.16.self_attn.v_proj
        value: 0.3802650185932017
      - filter: model.layers.17.self_attn.v_proj
        value: 0.3948253286544807
      - filter: model.layers.18.self_attn.v_proj
        value: 0.4028911119258367
      - filter: model.layers.19.self_attn.v_proj
        value: 0.41336615513538993
      - filter: model.layers.20.self_attn.v_proj
        value: 0.4098832032682135
      - filter: model.layers.21.self_attn.v_proj
        value: 0.39906772115434974
      - filter: model.layers.22.self_attn.v_proj
        value: 0.4269051484837375
      - filter: model.layers.23.self_attn.v_proj
        value: 0.4284502173571466
      - filter: model.layers.24.self_attn.v_proj
        value: 0.43452574241868747
      - filter: model.layers.25.self_attn.v_proj
        value: 0.4286073430052899
      - filter: model.layers.26.self_attn.v_proj
        value: 0.421196249934531
      - filter: model.layers.27.self_attn.v_proj
        value: 0.43709212800502806
      - filter: model.layers.28.self_attn.v_proj
        value: 0.4468339181899125
      - filter: model.layers.29.self_attn.v_proj
        value: 0.4439009060912376
      - filter: model.layers.30.self_attn.v_proj
        value: 0.4961451840988844
      - filter: model.layers.31.self_attn.v_proj
        value: 0.36374063793013145
      - filter: model.layers.0.self_attn.o_proj
        value: 0.301335568009218
      - filter: model.layers.1.self_attn.o_proj
        value: 0.30623265070968414
      - filter: model.layers.2.self_attn.o_proj
        value: 0.3042685801078929
      - filter: model.layers.3.self_attn.o_proj
        value: 0.3
      - filter: model.layers.4.self_attn.o_proj
        value: 0.32671136018436076
      - filter: model.layers.5.self_attn.o_proj
        value: 0.3351961451840989
      - filter: model.layers.6.self_attn.o_proj
        value: 0.30518514638872885
      - filter: model.layers.7.self_attn.o_proj
        value: 0.3552820405384172
      - filter: model.layers.8.self_attn.o_proj
        value: 0.358241240245116
      - filter: model.layers.9.self_attn.o_proj
        value: 0.35758655004451895
      - filter: model.layers.10.self_attn.o_proj
        value: 0.3841669721887603
      - filter: model.layers.11.self_attn.o_proj
        value: 0.3434714293196459
      - filter: model.layers.12.self_attn.o_proj
        value: 0.38160058660241974
      - filter: model.layers.13.self_attn.o_proj
        value: 0.3708374797046038
      - filter: model.layers.14.self_attn.o_proj
        value: 0.4145707851044885
      - filter: model.layers.15.self_attn.o_proj
        value: 0.39864871942596763
      - filter: model.layers.16.self_attn.o_proj
        value: 0.41519928769706177
      - filter: model.layers.17.self_attn.o_proj
        value: 0.31864557691300477
      - filter: model.layers.18.self_attn.o_proj
        value: 0.3101084166972189
      - filter: model.layers.19.self_attn.o_proj
        value: 0.3196145184098884
      - filter: model.layers.20.self_attn.o_proj
        value: 0.4667365002880637
      - filter: model.layers.21.self_attn.o_proj
        value: 0.3742418687477086
      - filter: model.layers.22.self_attn.o_proj
        value: 0.3403812915728277
      - filter: model.layers.23.self_attn.o_proj
        value: 0.3232284083171843
      - filter: model.layers.24.self_attn.o_proj
        value: 0.3742418687477086
      - filter: model.layers.25.self_attn.o_proj
        value: 0.6244644634159116
      - filter: model.layers.26.self_attn.o_proj
        value: 0.5438066307023517
      - filter: model.layers.27.self_attn.o_proj
        value: 0.542366312261038
      - filter: model.layers.28.self_attn.o_proj
        value: 0.4544021369088147
      - filter: model.layers.29.self_attn.o_proj
        value: 0.4355732467396428
      - filter: model.layers.30.self_attn.o_proj
        value: 0.4489027392237993
      - filter: model.layers.31.self_attn.o_proj
        value: 0.3717016707693919
      - filter: model.layers.0.mlp.gate_proj
        value: 0.321735714659823
      - filter: model.layers.1.mlp.gate_proj
        value: 0.32713036191274286
      - filter: model.layers.2.mlp.gate_proj
        value: 0.3402765411407322
      - filter: model.layers.3.mlp.gate_proj
        value: 0.34344524171162205
      - filter: model.layers.4.mlp.gate_proj
        value: 0.34572356360969986
      - filter: model.layers.5.mlp.gate_proj
        value: 0.34881370135651807
      - filter: model.layers.6.mlp.gate_proj
        value: 0.3491017650447808
      - filter: model.layers.7.mlp.gate_proj
        value: 0.34637825381029697
      - filter: model.layers.8.mlp.gate_proj
        value: 0.351301524118787
      - filter: model.layers.9.mlp.gate_proj
        value: 0.35083014717435707
      - filter: model.layers.10.mlp.gate_proj
        value: 0.35062064631016604
      - filter: model.layers.11.mlp.gate_proj
        value: 0.35083014717435707
      - filter: model.layers.12.mlp.gate_proj
        value: 0.35724611114020843
      - filter: model.layers.13.mlp.gate_proj
        value: 0.36078143822343267
      - filter: model.layers.14.mlp.gate_proj
        value: 0.3717278583774158
      - filter: model.layers.15.mlp.gate_proj
        value: 0.3819410255067302
      - filter: model.layers.16.mlp.gate_proj
        value: 0.3969203372963913
      - filter: model.layers.17.mlp.gate_proj
        value: 0.4064526266170848
      - filter: model.layers.18.mlp.gate_proj
        value: 0.4028387367097889
      - filter: model.layers.19.mlp.gate_proj
        value: 0.40865238569109097
      - filter: model.layers.20.mlp.gate_proj
        value: 0.4113758969255748
      - filter: model.layers.21.mlp.gate_proj
        value: 0.4152778505211334
      - filter: model.layers.22.mlp.gate_proj
        value: 0.41237102603048237
      - filter: model.layers.23.mlp.gate_proj
        value: 0.4021316712931441
      - filter: model.layers.24.mlp.gate_proj
        value: 0.3956895197192688
      - filter: model.layers.25.mlp.gate_proj
        value: 0.407290630073849
      - filter: model.layers.26.mlp.gate_proj
        value: 0.41842036348399936
      - filter: model.layers.27.mlp.gate_proj
        value: 0.43950138794322524
      - filter: model.layers.28.mlp.gate_proj
        value: 0.4232912585764416
      - filter: model.layers.29.mlp.gate_proj
        value: 0.43353061331377996
      - filter: model.layers.30.mlp.gate_proj
        value: 0.4429843398104017
      - filter: model.layers.31.mlp.gate_proj
        value: 0.39241606871628343
      - filter: model.layers.0.mlp.up_proj
        value: 0.31846226365683755
      - filter: model.layers.1.mlp.up_proj
        value: 0.3282826166657937
      - filter: model.layers.2.mlp.up_proj
        value: 0.34148117110983084
      - filter: model.layers.3.mlp.up_proj
        value: 0.34344524171162205
      - filter: model.layers.4.mlp.up_proj
        value: 0.3439951814801236
      - filter: model.layers.5.mlp.up_proj
        value: 0.3482899491960404
      - filter: model.layers.6.mlp.up_proj
        value: 0.34933745351699574
      - filter: model.layers.7.mlp.up_proj
        value: 0.34700675640287015
      - filter: model.layers.8.mlp.up_proj
        value: 0.35025401979783166
      - filter: model.layers.9.mlp.up_proj
        value: 0.34996595610956893
      - filter: model.layers.10.mlp.up_proj
        value: 0.3525585293039334
      - filter: model.layers.11.mlp.up_proj
        value: 0.3536060336248887
      - filter: model.layers.12.mlp.up_proj
        value: 0.358686429581522
      - filter: model.layers.13.mlp.up_proj
        value: 0.3634525742418687
      - filter: model.layers.14.mlp.up_proj
        value: 0.3718849840255591
      - filter: model.layers.15.mlp.up_proj
        value: 0.38311946786780493
      - filter: model.layers.16.mlp.up_proj
        value: 0.3967108364322003
      - filter: model.layers.17.mlp.up_proj
        value: 0.4041481171109831
      - filter: model.layers.18.mlp.up_proj
        value: 0.4013460430524276
      - filter: model.layers.19.mlp.up_proj
        value: 0.40493374535169957
      - filter: model.layers.20.mlp.up_proj
        value: 0.408861886555282
      - filter: model.layers.21.mlp.up_proj
        value: 0.41176871104593304
      - filter: model.layers.22.mlp.up_proj
        value: 0.4101974545645001
      - filter: model.layers.23.mlp.up_proj
        value: 0.4023411721573351
      - filter: model.layers.24.mlp.up_proj
        value: 0.39652752317603307
      - filter: model.layers.25.mlp.up_proj
        value: 0.40742156811396846
      - filter: model.layers.26.mlp.up_proj
        value: 0.4191274289006442
      - filter: model.layers.27.mlp.up_proj
        value: 0.4394490127271775
      - filter: model.layers.28.mlp.up_proj
        value: 0.42931440842193475
      - filter: model.layers.29.mlp.up_proj
        value: 0.44222489917770913
      - filter: model.layers.30.mlp.up_proj
        value: 0.4493741161682292
      - filter: model.layers.31.mlp.up_proj
        value: 0.3794532027444613
      - filter: model.layers.0.mlp.down_proj
        value: 0.3215785890116797
      - filter: model.layers.1.mlp.down_proj
        value: 0.34407374430419524
      - filter: model.layers.2.mlp.down_proj
        value: 0.3128319279317027
      - filter: model.layers.3.mlp.down_proj
        value: 0.32330697114125595
      - filter: model.layers.4.mlp.down_proj
        value: 0.3263185460640025
      - filter: model.layers.5.mlp.down_proj
        value: 0.3317393809249463
      - filter: model.layers.6.mlp.down_proj
        value: 0.3353008956161944
      - filter: model.layers.7.mlp.down_proj
        value: 0.3378934688105588
      - filter: model.layers.8.mlp.down_proj
        value: 0.3421358613104279
      - filter: model.layers.9.mlp.down_proj
        value: 0.3419263604462368
      - filter: model.layers.10.mlp.down_proj
        value: 0.3389933483475619
      - filter: model.layers.11.mlp.down_proj
        value: 0.33818153249882155
      - filter: model.layers.12.mlp.down_proj
        value: 0.3378934688105588
      - filter: model.layers.13.mlp.down_proj
        value: 0.35009689414968836
      - filter: model.layers.14.mlp.down_proj
        value: 0.3351437699680511
      - filter: model.layers.15.mlp.down_proj
        value: 0.3680354056460483
      - filter: model.layers.16.mlp.down_proj
        value: 0.3725920494422039
      - filter: model.layers.17.mlp.down_proj
        value: 0.36947572408736185
      - filter: model.layers.18.mlp.down_proj
        value: 0.3621693814486985
      - filter: model.layers.19.mlp.down_proj
        value: 0.3649452678992301
      - filter: model.layers.20.mlp.down_proj
        value: 0.36468339181899123
      - filter: model.layers.21.mlp.down_proj
        value: 0.3756036243649505
      - filter: model.layers.22.mlp.down_proj
        value: 0.35449641229770074
      - filter: model.layers.23.mlp.down_proj
        value: 0.34637825381029697
      - filter: model.layers.24.mlp.down_proj
        value: 0.3522442780076468
      - filter: model.layers.25.mlp.down_proj
        value: 0.3727491750903472
      - filter: model.layers.26.mlp.down_proj
        value: 0.4206986853820772
      - filter: model.layers.27.mlp.down_proj
        value: 0.4077620070182789
      - filter: model.layers.28.mlp.down_proj
        value: 0.42363169748075213
      - filter: model.layers.29.mlp.down_proj
        value: 0.534091028125491
      - filter: model.layers.30.mlp.down_proj
        value: 0.6001361755617243
      - filter: model.layers.31.mlp.down_proj
        value: 0.8
      - value: 1.0
tokenizer_source: union