|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import warnings |
|
from dataclasses import dataclass, field |
|
from typing import Optional |
|
|
|
from peft.tuners.lora import LoraConfig |
|
from peft.utils import PeftType |
|
|
|
|
|
@dataclass |
|
class AdaLoraConfig(LoraConfig): |
|
""" |
|
This is the configuration class to store the configuration of a [`~peft.AdaLora`]. |
|
|
|
Args: |
|
target_r (`int`): The target average rank of incremental matrix. |
|
init_r (`int`): The initial rank for each incremental matrix. |
|
tinit (`int`): The steps of initial fine-tuning warmup. |
|
tfinal (`int`): The step of final fine-tuning. |
|
deltaT (`int`): The time internval between two budget allocations. |
|
beta1 (`float`): The hyperparameter of EMA for sensitivity smoothing. |
|
beta2 (`float`): The hyperparameter of EMA for undertainty quantification. |
|
orth_reg_weight (`float`): The coefficient of orthogonal regularization. |
|
total_step (`int`): The total training steps that should be specified before training. |
|
rank_pattern (`list`): The allocated rank for each weight matrix by RankAllocator. |
|
""" |
|
|
|
target_r: int = field(default=8, metadata={"help": "Target Lora matrix dimension."}) |
|
init_r: int = field(default=12, metadata={"help": "Initial Lora matrix dimension."}) |
|
tinit: int = field(default=0, metadata={"help": "The steps of initial warmup."}) |
|
tfinal: int = field(default=0, metadata={"help": "The steps of final warmup."}) |
|
deltaT: int = field(default=1, metadata={"help": "Step interval of rank allocation."}) |
|
beta1: float = field(default=0.85, metadata={"help": "Hyperparameter of EMA."}) |
|
beta2: float = field(default=0.85, metadata={"help": "Hyperparameter of EMA."}) |
|
orth_reg_weight: float = field(default=0.5, metadata={"help": "The orthogonal regularization coefficient."}) |
|
total_step: Optional[int] = field(default=None, metadata={"help": "The total training steps."}) |
|
rank_pattern: Optional[dict] = field(default=None, metadata={"help": "The saved rank pattern."}) |
|
|
|
def __post_init__(self): |
|
super().__post_init__() |
|
self.peft_type = PeftType.ADALORA |
|
|
|
if self.use_dora: |
|
raise ValueError(f"{self.peft_type} does not support DoRA.") |
|
|
|
if self.loftq_config: |
|
raise ValueError(f"{self.peft_type} does not support LOFTQ.") |
|
|
|
self.target_modules = ( |
|
set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules |
|
) |
|
self.exclude_modules = ( |
|
set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules |
|
) |
|
|
|
if isinstance(self.target_modules, str) and self.layers_to_transform is not None: |
|
raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.") |
|
|
|
|
|
if self.layers_pattern and not self.layers_to_transform: |
|
raise ValueError("When `layers_pattern` is specified, `layers_to_transform` must also be specified. ") |
|
|
|
|
|
if self.r != 8: |
|
warnings.warn( |
|
"Note that `r` is not used in AdaLora and will be ignored." |
|
"If you intended to set the initial rank, use `init_r` instead." |
|
) |
|
|