# Either "cpu" or "cuda" # NOTE: Cuda requires enough VRAM to load 3 FP16 models (~45 GB for Mistral) # NOTE 2: The (much slower) CPU mode still requires Cuda capability, but only enough VRAM to load a model once. (~15 GB for Mistral) device: "cpu" random_seed: 42 # Random seed to use directories: model_path1: "../jondurbin_bagel-dpo-34b-v0.2" # Path to the base model. Must be a local copy. # model_directory: "../2xNous-Capybara-34B/" # Directory of models to scan, IGNORED if models_to_merge has entries in it output_directory: "./mm-output" # Output directory of the merged model # A list of models to use as merge candidates - HF syntax, so can be either local directories or repos. # Overrides model_directory if used models_to_merge: ["../NousResearch_Nous-Capybara-34B", "../NousResearch_Nous-Hermes-2-Yi-34B", "../SUSTech_SUS-Chat-34B"] # Merge ratios used for testing each layer's potential for improvement - Huge impact on total running time merge_ratios: [0.2, 0.4, 0.6, 0.8] # Choose from the following methods. Defaults to "lerp". # "lerp" - Linear interpolation # "slerp" - Spherical linear interpolation # "slice" - Highly experimental. The tensor weights shifts from one model to another. [Model 1 > 10% blend > Model 2] # "cyclic" - Highly experimental. Ignores merge ratios as these are predefined. [Model 1 > 10% blend > 10% Model 2 > 10% blend > Model 1] merge_method: "slerp" # If set to true, the lm_head and embed_token tensors (located outside the layers) will also be optimized # Models that have a different vocab size from model1 will skip this phase automatically as it tends to cause model stability issues merge_headers: true # Strategies: # "cumulative" - Default strategy. If there's a chance of reducing the combined probability, accept the merge. # "all_phrases" - Only accept the merge if all phrases show an improvement. (Warning: This rarely happens) # "quantitive" - Ignores probabilities completely. Only looks at how many phrases show an improvement, as defined by the threshold below. strategy: "cumulative" # Threshold is currently only used by the "quantitive" strategy. If 0.6, at least 60% of the number of phrases must show am improvement. strategy_threshold: 0.6 # Whether or not to automatically balance the weights so all phrases are of equal importance to the "cumulative" strategy. # The weight value of phrases is ignored if set to true. auto_weights: false # Phrase = What to measure, weight = multiplication factor, contexts = proceeding contexts bad_phrases: - phrase: "anticipation" weight: 12 contexts: ["Her body quivers with ", "The atmosphere is thick with "] - phrase: "unwavering" weight: 12 contexts: ["Filled with an "] - phrase: "determination" weight: 12 contexts: ["Her eyes were filled with ", "Her stubbornness only fuels my "] - phrase: "whisper" weight: 12 contexts: ["Her voice barely above a "] - phrase: "spine" weight: 12 contexts: ["shivers down her "] - phrase: "sends shivers" weight: 12 contexts: ["The thrill of the act "] - phrase: "ministrations" weight: 12 contexts: ["She moans and twitches at your "] - phrase: "legs" weight: 12 contexts: ["wraps her "] - phrase: "imposing figure" weight: 12 contexts: ["He had an "] - phrase: "shared challenges" weight: 12 contexts: ["Their bond strengthened through "] - phrase: "bond" weight: 12 contexts: ["forged a ", "an unspoken "] - phrase: "enhance our experience" weight: 12 contexts: ["I'm excited to see how "] - phrase: "sense of vulnerability" weight: 12 contexts: ["create a "] - phrase: "dimensions of intimacy" weight: 12 contexts: ["explore new "] - phrase: "deepening our connection" weight: 12 contexts: ["while "] - phrase: "shared experiences" weight: 12 contexts: ["through "] - phrase: "societal expectations" weight: 12 contexts: ["that transcend "] - phrase: "conventional boundaries" weight: 12 contexts: ["that defy ", "and defy "] - phrase: "open communication" weight: 12 contexts: ["an environment "] - phrase: "emotional vulnerability" weight: 12 contexts: ["an environment "] - phrase: "heightens our connection" weight: 12 contexts: ["touch and the anticipation "] - phrase: "sensations you're creating" weight: 12 contexts: ["I'm enjoying "] - phrase: "is truly arousing" weight: 12 contexts: ["attention to detail ", "way you explore my body "] - phrase: "challenge presented" weight: 12 contexts: ["my resolve unwavering despite "] - phrase: "humble vessel" weight: 12 contexts: ["surrendering to the exquisite torment "] - phrase: "bond" weight: 12 contexts: ["cherishing the unique ", "special "] - phrase: "grows stronger with each passing day" weight: 12 contexts: ["bond "] - phrase: "that cannot be broken by time or circumstance" weight: 12 contexts: ["bond "] - phrase: "becomes unbreakable, eternal" weight: 12 contexts: ["bond "] - phrase: "grew stronger with each passing" weight: 12 contexts: ["bond "] # Note - Example of a complex phrase good_phrases: - phrase: "The apple is in the bedroom" weight: 1 contexts: ["Question: If I'm in the living room and pick up the apple, go to the bedroom and drop the apple, then walk to the kitchen, where is the apple? Explain your reasoning. Answer: "]