DavidAU's picture
Upload folder using huggingface_hub
328a773 verified
|
raw
history blame
10.9 kB
metadata
base_model: []
library_name: transformers
tags:
  - mergekit
  - merge

Utopia-20B-exp40-3-bf16

This is a merge of pre-trained language models created using mergekit.

Merge Details

Merge Method

This model was merged using the passthrough merge method.

Models Merged

The following models were included in the merge:

  • E:/Utopia-20B-V1

Configuration

The following YAML configuration was used to produce this model:

# Six splits plus "end game
# "D" starts at plus .1 VS D/O proj.
# 40 plus.

# D/G Block Settings.
# split 1: .11 to .61 	[.04 G]	[4 layers]
# split 2: .15 to .41 	[.08 G]	[4 layers]
# split 3: .19 to .35 	[.23 G]	[12 layers]
# split 4: .11 to .41 	[.244 G]	[4 layers]	
# split 5: .15 to .3 	[.248 G]	[4 layers]
# spilt 6: .19 to .3  	[.256 G]	[7 layers]
# final [D/G]: .33,.44,.55,.66,.77/.88	[5 layers]

slices:
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [0, 61]

# conc layers
# split 1

 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.01
         - filter: down_proj
           value: 0.01
         - value: 0.11
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.02
         - filter: down_proj
           value: 0.02
         - value: 0.12
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.03
         - filter: down_proj
           value: 0.03
         - value: 0.13

 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.04
         - filter: down_proj
           value: 0.04
         - value: 0.61

# split 2, SURGE D THEN D drop .46, continues @ D .15 (from .13)

 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.05
         - filter: down_proj
           value: 0.05
         - value: 0.15
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.06
         - filter: down_proj
           value: 0.06
         - value: 0.16
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.07
         - filter: down_proj
           value: 0.07
         - value: 0.17
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.08
         - filter: down_proj
           value: 0.08
         - value: 0.41

# split 3, SURGE D to .41, D drop .21 ... follows .17 previous

 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.09
         - filter: down_proj
           value: 0.09
         - value: 0.19
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.10
         - filter: down_proj
           value: 0.10
         - value: 0.20
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.11
         - filter: down_proj
           value: 0.11
         - value: .22
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.12
         - filter: down_proj
           value: 0.12
         - value: .24
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.13
         - filter: down_proj
           value: 0.13
         - value: .26
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.14
         - filter: down_proj
           value: 0.14
         - value: .28
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.15
         - filter: down_proj
           value: 0.15
         - value: .30
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.16
         - filter: down_proj
           value: 0.16
         - value: .31
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.20
         - filter: down_proj
           value: 0.20
         - value: .32
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.21
         - filter: down_proj
           value: 0.21
         - value: .33
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.22
         - filter: down_proj
           value: 0.22
         - value: .34
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.23
         - filter: down_proj
           value: 0.23
         - value: .35

# split 4 , NO SURGE D, "D" down drop of .24 ; reverts to .11 (the very first "D" setting )

 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.24
         - filter: down_proj
           value: 0.24
         - value: 0.11
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.241
         - filter: down_proj
           value: 0.241
         - value: 0.12
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.242
         - filter: down_proj
           value: 0.243
         - value: 0.13
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.244
         - filter: down_proj
           value: 0.244
         - value: 0.61

# split 5, D Surge to .61, drop to .15 (following .13)

 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.245
         - filter: down_proj
           value: 0.245
         - value: 0.15
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.246
         - filter: down_proj
           value: 0.246
         - value: 0.16
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.247
         - filter: down_proj
           value: 0.247
         - value: 0.17
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.248
         - filter: down_proj
           value: 0.248
         - value: 0.41

# split 6, D surge to .41 , then follows .17 

 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.249
         - filter: down_proj
           value: 0.249
         - value: 0.19
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.250
         - filter: down_proj
           value: 0.250
         - value: 0.20
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.251
         - filter: down_proj
           value: 0.251
         - value: .22
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.252
         - filter: down_proj
           value: 0.252
         - value: .24
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.253
         - filter: down_proj
           value: 0.254
         - value: .26
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.255
         - filter: down_proj
           value: 0.255
         - value: .28
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.256
         - filter: down_proj
           value: 0.256
         - value: .30

# O PROJ, DPROJ to .3333 / 
# end game

 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.3333333333333
         - filter: down_proj
           value: 0.3333333333333
         - value: 0.3333333333333
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.4444444444444
         - filter: down_proj
           value: 0.4444444444444
         - value: 0.4444444444444
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.5555555555555
         - filter: down_proj
           value: 0.5555555555555
         - value: 0.5555555555555
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.6666666666666
         - filter: down_proj
           value: 0.6666666666666
         - value: 0.6666666666666
 - sources:
   - model: E:/Utopia-20B-V1
     layer_range: [61,62]
     parameters:
       scale:
         - filter: o_proj
           value: 0.777777777777
         - filter: down_proj
           value: 0.777777777777
         - value: 0.888888888888
merge_method: passthrough
dtype: float16