File size: 478 Bytes
7d37d64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
base_model: Qwen/QwQ-32B
gate_mode: random
architecture: qwen
dtype: bfloat16
experts:
- source_model: Qwen/QwQ-32B
- source_model: Qwen/QwQ-32B
- source_model: Qwen/QwQ-32B
- source_model: Qwen/QwQ-32B
- source_model: Qwen/QwQ-32B
- source_model: Qwen/QwQ-32B
- source_model: Qwen/QwQ-32B
- source_model: Qwen/QwQ-32B
shared_experts:
- source_model: Qwen/QwQ-32B
residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model
|