lhzstar
new commits
abca9bf
raw
history blame contribute delete
No virus
3.79 kB
from .model import Unlimiformer
from .random_training_unlimiformer import RandomTrainingUnlimiformer
from dataclasses import dataclass, field
from typing import List, Optional
@dataclass
class UnlimiformerArguments:
"""
Arguments pertaining to what data we are going to input our model for training and eval.
"""
test_unlimiformer: Optional[bool] = field(
default=True,
metadata={
"help": "whether to use KNN."
},
)
unlimiformer_verbose: Optional[bool] = field(
default=False,
metadata={
"help": "whether to print KNN intermediate predictions (mostly for debugging)."
},
)
layer_begin: Optional[int] = field(
default=0,
metadata={"help": "The layer to begin applying KNN to. KNN will be applied to layers[knn_layer_begin:layer_end]. "
"By default, it will be applied to all layers: [0:None]]"},
)
layer_end: Optional[int] = field(
default=None,
metadata={"help": "The layer to end applying KNN to. KNN will be applied to layers[knn_layer_begin:layer_end]. "
"By default, it will be applied to all layers: [0:None]]"},
)
unlimiformer_chunk_overlap: Optional[float] = field(
default=0.5,
metadata={"help": "The fraction of overlap between input chunks"},
)
unlimiformer_chunk_size: Optional[int] = field(
default=None,
metadata={"help": "The size of each input chunk"},
)
unlimiformer_head_num: Optional[int] = field(
default=None,
metadata={"help": "The head to apply KNN to (if None, apply to all heads)"},
)
unlimiformer_exclude: Optional[bool] = field(
default=False,
metadata={
"help": "If True, prioritize the inputs that are **not** in the standard attention window."
},
)
random_unlimiformer_training: Optional[bool] = field(
default=False,
)
unlimiformer_training: Optional[bool] = field(
default=False,
)
use_datastore: Optional[bool] = field(default=False)
flat_index: Optional[bool] = field(default=False)
test_datastore: Optional[bool] = field(default=False)
reconstruct_embeddings: Optional[bool] = field(default=False)
gpu_datastore: Optional[bool] = field(default=True)
gpu_index: Optional[bool] = field(default=True)
# include these lines in your code somewhere before model training
def training_addin():
if unlimiformer_args.test_unlimiformer:
unlimiformer_kwargs = {
'layer_begin': unlimiformer_args.layer_begin,
'layer_end': unlimiformer_args.layer_end,
'unlimiformer_head_num': unlimiformer_args.unlimiformer_head_num,
'exclude_attention': unlimiformer_args.unlimiformer_exclude,
'chunk_overlap': unlimiformer_args.unlimiformer_chunk_overlap,
'model_encoder_max_len': unlimiformer_args.unlimiformer_chunk_size,
'verbose': unlimiformer_args.unlimiformer_verbose, 'tokenizer': tokenizer,
'unlimiformer_training': unlimiformer_args.unlimiformer_training,
'use_datastore': unlimiformer_args.use_datastore,
'flat_index': unlimiformer_args.flat_index,
'test_datastore': unlimiformer_args.test_datastore,
'reconstruct_embeddings': unlimiformer_args.reconstruct_embeddings,
'gpu_datastore': unlimiformer_args.gpu_datastore,
'gpu_index': unlimiformer_args.gpu_index
}
if unlimiformer_args.random_unlimiformer_training:
model = RandomTrainingUnlimiformer.convert_model(model, **unlimiformer_kwargs)
else:
model = Unlimiformer.convert_model(model, **unlimiformer_kwargs)