|
import numpy as np |
|
import dataclasses as dc |
|
|
|
|
|
@dc.dataclass |
|
class CtrlArguments: |
|
train_data: str = dc.field( |
|
default="data/training_cunique_with_distractors.json", |
|
metadata={"help": "A CSV list of training data files"} |
|
) |
|
|
|
formulation: str = dc.field( |
|
default="areg_ltr", |
|
metadata={"help": "Type of problem definition: autoregressive (areg) or u-PMLM (upmlm) or mixed (if predict_questions is set)"} |
|
) |
|
|
|
context_strategy: str = dc.field( |
|
default="take_first", |
|
metadata={"help": "How to deal with contexts greater than a specified length"} |
|
) |
|
|
|
tokenizer_file: str = dc.field( |
|
default="tokenizer.json", |
|
metadata={"help": "A JSON file (in the format provided by HuggingFace's tokenizers library) with a trained tokenizer"} |
|
) |
|
|
|
sequence_length: int = dc.field( |
|
default=256, |
|
metadata={"help": "The max sequence length"} |
|
) |
|
|
|
force_prepend_control: bool = dc.field( |
|
default=False, |
|
metadata={"help": "If the control code should be prepended for all sliding windows. Otherwise, it is only prepended at the start of the sequence"} |
|
) |
|
|
|
|
|
class GradientPrinter: |
|
def __init__(self, name): |
|
self.name = name |
|
|
|
def __call__(self, grad): |
|
np_grad = grad.cpu().numpy() |
|
print("======== GRAD FOR {} ========".format(self.name)) |
|
print("\tGRAD {}".format(grad)) |
|
print("\tGRAD NORM {}".format(np.linalg.norm(np_grad))) |
|
print("\tGRAD MEAN {}".format(np.mean(np_grad))) |
|
print() |