|
--- |
|
base_model: |
|
- meta-llama/Meta-Llama-3-8B-Instruct |
|
- meta-llama/Meta-Llama-3-8B |
|
library_name: transformers |
|
tags: |
|
- mergekit |
|
- merge |
|
license: other |
|
license_name: llama3 |
|
license_link: LICENSE |
|
extra_gated_prompt: >- |
|
### META LLAMA 3 COMMUNITY LICENSE AGREEMENT |
|
--- |
|
# Llama-3-DARE-v1-8B |
|
|
|
This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). |
|
|
|
## Merge Details |
|
### Merge Method |
|
|
|
This model was merged using the [DARE](https://arxiv.org/abs/2311.03099) [TIES](https://arxiv.org/abs/2306.01708) merge method using [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) as a base. |
|
|
|
### Models Merged |
|
|
|
The following models were included in the merge: |
|
* [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) |
|
|
|
### Configuration |
|
|
|
The following YAML configuration was used to produce this model: |
|
|
|
```yaml |
|
base_model: |
|
model: |
|
path: meta-llama/Meta-Llama-3-8B |
|
dtype: bfloat16 |
|
merge_method: dare_ties |
|
parameters: |
|
int8_mask: 1.0 |
|
normalize: 0.0 |
|
slices: |
|
- sources: |
|
- layer_range: [0, 32] |
|
model: |
|
model: |
|
path: meta-llama/Meta-Llama-3-8B-Instruct |
|
parameters: |
|
density: |
|
- filter: embed_token |
|
value: 0.07928115257656705 |
|
- filter: model.norm |
|
value: 0.8644655087483669 |
|
- filter: lm_head |
|
value: 0.8644655087483669 |
|
- filter: layers.0. |
|
value: 0.7023215975496551 |
|
- filter: layers.1. |
|
value: 0.5930151025883963 |
|
- filter: layers.2. |
|
value: 0.9235600142754441 |
|
- filter: layers.3. |
|
value: 0.30002273183016587 |
|
- filter: layers.4. |
|
value: 0.8718725802605614 |
|
- filter: layers.5. |
|
value: 0.8431819361408258 |
|
- filter: layers.6. |
|
value: 0.26586459452250655 |
|
- filter: layers.7. |
|
value: 0.20565052604836487 |
|
- filter: layers.8. |
|
value: 0.7865609337773642 |
|
- filter: layers.9. |
|
value: 0.8707795208055977 |
|
- filter: layers.10. |
|
value: 0.19128908483148235 |
|
- filter: layers.11. |
|
value: 0.925076317937204 |
|
- filter: layers.12. |
|
value: 0.3764120891817022 |
|
- filter: layers.13. |
|
value: 0.1537759874314265 |
|
- filter: layers.14. |
|
value: 0.8827103791905803 |
|
- filter: layers.15. |
|
value: 0.12994574083319627 |
|
- filter: layers.16. |
|
value: 0.9421382084146839 |
|
- filter: layers.17. |
|
value: 0.19784189207350633 |
|
- filter: layers.18. |
|
value: 0.9174803701757963 |
|
- filter: layers.19. |
|
value: 0.018511680579063827 |
|
- filter: layers.20. |
|
value: 0.6089701838828931 |
|
- filter: layers.21. |
|
value: 0.10667940525089978 |
|
- filter: layers.22. |
|
value: 0.7569387463687963 |
|
- filter: layers.23. |
|
value: 0.8617573261932969 |
|
- filter: layers.24. |
|
value: 0.8851543429501336 |
|
- filter: layers.25. |
|
value: 0.788944964233173 |
|
- filter: layers.26. |
|
value: 0.022117233408225547 |
|
- filter: layers.27. |
|
value: 0.9534983183659907 |
|
- filter: layers.28. |
|
value: 0.792604065605754 |
|
- filter: layers.29. |
|
value: 0.861671120755631 |
|
- filter: layers.30. |
|
value: 0.2948434371778307 |
|
- filter: layers.31. |
|
value: 0.18891036670059388 |
|
weight: |
|
- filter: embed_token |
|
value: 0.244303585022914 |
|
- filter: model.norm. |
|
value: 0.7921741064842713 |
|
- filter: lm_head |
|
value: 0.7921741064842713 |
|
- filter: layers.0. |
|
value: 0.7781818034133037 |
|
- filter: layers.1. |
|
value: 0.8847835098465024 |
|
- filter: layers.2. |
|
value: 0.6397880690508684 |
|
- filter: layers.3. |
|
value: 0.7937466750856308 |
|
- filter: layers.4. |
|
value: 0.8172928460203015 |
|
- filter: layers.5. |
|
value: 0.8653872213596285 |
|
- filter: layers.6. |
|
value: 0.9693001060347038 |
|
- filter: layers.7. |
|
value: 0.8929433929262818 |
|
- filter: layers.8. |
|
value: 0.8148535632999063 |
|
- filter: layers.9. |
|
value: 0.9540888550674099 |
|
- filter: layers.10. |
|
value: 0.9588139020445848 |
|
- filter: layers.11. |
|
value: 0.9210433265234956 |
|
- filter: layers.12. |
|
value: 0.7406799222922607 |
|
- filter: layers.13. |
|
value: 0.6955899334998573 |
|
- filter: layers.14. |
|
value: 0.15982240434953007 |
|
- filter: layers.15. |
|
value: 0.911878214829457 |
|
- filter: layers.16. |
|
value: 0.03946607654759535 |
|
- filter: layers.17. |
|
value: 0.8322149510147696 |
|
- filter: layers.18. |
|
value: 0.3733540416200388 |
|
- filter: layers.19. |
|
value: 0.6932851946473836 |
|
- filter: layers.20. |
|
value: 0.8918513138412437 |
|
- filter: layers.21. |
|
value: 0.08682354503629197 |
|
- filter: layers.22. |
|
value: 0.017627513785178843 |
|
- filter: layers.23. |
|
value: 0.5246168031857519 |
|
- filter: layers.24. |
|
value: 0.9561735656673052 |
|
- filter: layers.25. |
|
value: 0.9036966249098499 |
|
- filter: layers.26. |
|
value: 0.3270824190389672 |
|
- filter: layers.27. |
|
value: 0.8585505386205572 |
|
- filter: layers.28. |
|
value: 0.768055374051272 |
|
- filter: layers.29. |
|
value: 0.8343611910799547 |
|
- filter: layers.30. |
|
value: 0.9794887267657514 |
|
- filter: layers.31. |
|
value: 0.9143801295721963 |
|
- layer_range: [0, 32] |
|
model: |
|
model: |
|
path: meta-llama/Meta-Llama-3-8B |
|
parameters: |
|
density: |
|
- filter: embed_token |
|
value: 0.8288618306009948 |
|
- filter: model.norm |
|
value: 0.23292234562975161 |
|
- filter: lm_head |
|
value: 0.23292234562975161 |
|
- filter: layers.0. |
|
value: 0.372454056194375 |
|
- filter: layers.1. |
|
value: 0.14253218919354635 |
|
- filter: layers.2. |
|
value: 0.3078424546105084 |
|
- filter: layers.3. |
|
value: 0.25930581838885686 |
|
- filter: layers.4. |
|
value: 0.978321706776098 |
|
- filter: layers.5. |
|
value: 0.05609446797559111 |
|
- filter: layers.6. |
|
value: 0.24314602951396613 |
|
- filter: layers.7. |
|
value: 0.911885464400789 |
|
- filter: layers.8. |
|
value: 0.7414851452758723 |
|
- filter: layers.9. |
|
value: 0.18093642039158353 |
|
- filter: layers.10. |
|
value: 0.9084501788509814 |
|
- filter: layers.11. |
|
value: 0.2484987778735853 |
|
- filter: layers.12. |
|
value: 0.02864842329393736 |
|
- filter: layers.13. |
|
value: 0.035996105784139906 |
|
- filter: layers.14. |
|
value: 0.022814159286914156 |
|
- filter: layers.15. |
|
value: 0.8982289455883636 |
|
- filter: layers.16. |
|
value: 0.6749279016774873 |
|
- filter: layers.17. |
|
value: 0.2231666521672218 |
|
- filter: layers.18. |
|
value: 0.08042893992871515 |
|
- filter: layers.19. |
|
value: 0.9245191619395147 |
|
- filter: layers.20. |
|
value: 0.10245727381170405 |
|
- filter: layers.21. |
|
value: 0.6351565505035355 |
|
- filter: layers.22. |
|
value: 0.13631302757594085 |
|
- filter: layers.23. |
|
value: 0.02093704907904486 |
|
- filter: layers.24. |
|
value: 0.046387358073609594 |
|
- filter: layers.25. |
|
value: 0.39078092530283026 |
|
- filter: layers.26. |
|
value: 0.6422945171240722 |
|
- filter: layers.27. |
|
value: 0.9356630912083387 |
|
- filter: layers.28. |
|
value: 0.20795911007902343 |
|
- filter: layers.29. |
|
value: 0.10607547240686495 |
|
- filter: layers.30. |
|
value: 0.0142610579064266 |
|
- filter: layers.31. |
|
value: 0.9041406411956623 |
|
weight: |
|
- filter: embed_token |
|
value: 0.11199251330860797 |
|
- filter: model.norm. |
|
value: 0.14400396011333005 |
|
- filter: lm_head |
|
value: 0.14400396011333005 |
|
- filter: layers.0. |
|
value: 0.04471455188049395 |
|
- filter: layers.1. |
|
value: 0.9523895147273159 |
|
- filter: layers.2. |
|
value: 0.8434787249757318 |
|
- filter: layers.3. |
|
value: 0.8016501716390354 |
|
- filter: layers.4. |
|
value: 0.19555927362126765 |
|
- filter: layers.5. |
|
value: 0.11657419158719552 |
|
- filter: layers.6. |
|
value: 0.08984925597011956 |
|
- filter: layers.7. |
|
value: 0.2629227995683227 |
|
- filter: layers.8. |
|
value: 0.9596221742529512 |
|
- filter: layers.9. |
|
value: 0.754944626119186 |
|
- filter: layers.10. |
|
value: 0.6975471541191236 |
|
- filter: layers.11. |
|
value: 0.7057432892778266 |
|
- filter: layers.12. |
|
value: 0.8718457125948287 |
|
- filter: layers.13. |
|
value: 0.08609329339067016 |
|
- filter: layers.14. |
|
value: 0.062238388979337526 |
|
- filter: layers.15. |
|
value: 0.06582279783729371 |
|
- filter: layers.16. |
|
value: 0.16911181845635778 |
|
- filter: layers.17. |
|
value: 0.2734099561761782 |
|
- filter: layers.18. |
|
value: 0.21849327205885294 |
|
- filter: layers.19. |
|
value: 0.128433065340601 |
|
- filter: layers.20. |
|
value: 0.6468521489194113 |
|
- filter: layers.21. |
|
value: 0.16152478595976968 |
|
- filter: layers.22. |
|
value: 0.18505624743492918 |
|
- filter: layers.23. |
|
value: 0.5230576418780342 |
|
- filter: layers.24. |
|
value: 0.6970657976288235 |
|
- filter: layers.25. |
|
value: 0.9119218240518024 |
|
- filter: layers.26. |
|
value: 0.250155176705362 |
|
- filter: layers.27. |
|
value: 0.9157060215586246 |
|
- filter: layers.28. |
|
value: 0.9021879089954816 |
|
- filter: layers.29. |
|
value: 0.1541974948279516 |
|
- filter: layers.30. |
|
value: 0.8388637604851198 |
|
- filter: layers.31. |
|
value: 0.06372200149189318 |
|
``` |
|
|