Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,560 Bytes
d711508 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# Copyright 2023-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass, field
from typing import Optional
from peft.tuners.lora import LoraConfig
from peft.utils import PeftType
@dataclass
class AdaLoraConfig(LoraConfig):
"""
This is the configuration class to store the configuration of a [`~peft.AdaLora`].
Args:
target_r (`int`): The target average rank of incremental matrix.
init_r (`int`): The initial rank for each incremental matrix.
tinit (`int`): The steps of initial fine-tuning warmup.
tfinal (`int`): The step of final fine-tuning.
deltaT (`int`): The time internval between two budget allocations.
beta1 (`float`): The hyperparameter of EMA for sensitivity smoothing.
beta2 (`float`): The hyperparameter of EMA for undertainty quantification.
orth_reg_weight (`float`): The coefficient of orthogonal regularization.
total_step (`int`): The total training steps that should be specified before training.
rank_pattern (`list`): The allocated rank for each weight matrix by RankAllocator.
"""
target_r: int = field(default=8, metadata={"help": "Target Lora matrix dimension."})
init_r: int = field(default=12, metadata={"help": "Initial Lora matrix dimension."})
tinit: int = field(default=0, metadata={"help": "The steps of initial warmup."})
tfinal: int = field(default=0, metadata={"help": "The steps of final warmup."})
deltaT: int = field(default=1, metadata={"help": "Step interval of rank allocation."})
beta1: float = field(default=0.85, metadata={"help": "Hyperparameter of EMA."})
beta2: float = field(default=0.85, metadata={"help": "Hyperparameter of EMA."})
orth_reg_weight: float = field(default=0.5, metadata={"help": "The orthogonal regularization coefficient."})
total_step: Optional[int] = field(default=None, metadata={"help": "The total training steps."})
rank_pattern: Optional[dict] = field(default=None, metadata={"help": "The saved rank pattern."})
def __post_init__(self):
self.peft_type = PeftType.ADALORA
if self.use_dora:
raise ValueError(f"{self.peft_type} does not support DoRA.")
if self.loftq_config:
raise ValueError(f"{self.peft_type} does not support LOFTQ.")
self.target_modules = (
set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
)
# if target_modules is a regex expression, then layers_to_transform should be None
if isinstance(self.target_modules, str) and self.layers_to_transform is not None:
raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.")
# if target_modules is a regex expression, then layers_pattern should be None
if isinstance(self.target_modules, str) and self.layers_pattern is not None:
raise ValueError("`layers_pattern` cannot be used when `target_modules` is a str.")
|