|
merge_method: task_arithmetic |
|
base_model: meta-llama/Llama-3.3-70B-Instruct |
|
models: |
|
- model: meta-llama/Llama-3.1-70B |
|
parameters: |
|
weight: -1.0 |
|
- model: tokyotech-llm/Llama-3.1-Swallow-70B-v0.1 |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-llama33 |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-llama33 |
|
models: |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: -1.0 |
|
- model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-nemollama33 |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-nemollama33 |
|
models: |
|
- model: meta-llama/Llama-3.1-70B |
|
parameters: |
|
weight: -0.8 |
|
- model: allenai/Llama-3.1-Tulu-3-70B |
|
parameters: |
|
weight: 0.8 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-tulu |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-nemollama33 |
|
models: |
|
- model: tokyotech-llm/Llama-3.1-Swallow-70B-v0.1 |
|
parameters: |
|
weight: -0.8 |
|
- model: tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 |
|
parameters: |
|
weight: 0.8 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-swallow |
|
--- |
|
merge_method: model_stock |
|
base_model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-nemollama33 |
|
models: |
|
- model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-tulu |
|
- model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-swallow |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1 |
|
|