mt5-small-3task-prepend-tquad2 / experiment_config.yaml
fcakyon
initial commit
82ba154
_n_gpu: 1
adafactor: true
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
cache_dir: null
dataloader_drop_last: false
dataloader_num_workers: 0
dataloader_pin_memory: true
ddp_find_unused_parameters: null
debug: []
deepspeed: null
disable_tqdm: false
do_eval: true
do_predict: false
do_train: true
eval_accumulation_steps: 1
eval_dataset_list:
- tquad2-valid
- xquad.tr
eval_steps: 300
evaluation_strategy: &id001 !!python/object/apply:transformers.trainer_utils.IntervalStrategy
- steps
fp16: false
fp16_backend: auto
fp16_full_eval: false
fp16_opt_level: O1
freeze_embeddings: false
gradient_accumulation_steps: 4
greater_is_better: null
group_by_length: false
ignore_data_skip: false
label_names: null
label_smoothing_factor: 0
learning_rate: 0.001
length_column_name: length
load_best_model_at_end: false
local_rank: -1
log_level: -1
log_level_replica: -1
log_on_each_node: true
logging_dir: runs/mt5-small/3task/adafactor-1e3-15ep-prepend-tquad2train/runs/Sep04_12-32-14_palamut2.yonetim
logging_first_step: false
logging_steps: 500
logging_strategy: *id001
lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
- linear
max_grad_norm: 1.0
max_source_length: 512
max_steps: -1
max_target_length: 64
metric_for_best_model: null
model_name_or_path: google/mt5-small
model_type: mt5
mp_parameters: ''
mt5_qg_format: prepend
mt5_task_list:
- qa
- qg
- ans_ext
neptune_api_token: null
neptune_project: obss-ml/nqg-test
neptune_run: null
no_cuda: false
num_train_epochs: 15
output_dir: runs/mt5-small/3task/adafactor-1e3-15ep-prepend-tquad2train
overwrite_output_dir: false
past_index: -1
per_device_eval_batch_size: 64
per_device_train_batch_size: 64
per_gpu_eval_batch_size: null
per_gpu_train_batch_size: null
prediction_loss_only: false
prepare_data: true
push_to_hub: false
push_to_hub_model_id: adafactor-1e3-15ep-prepend-tquad2train
push_to_hub_organization: null
push_to_hub_token: null
remove_unused_columns: false
report_to:
- wandb
- neptune
resume_from_checkpoint: null
run_name: turque-mt5small-adafactor-1e3-15ep-tquad2train
save_on_each_node: false
save_steps: 500
save_strategy: *id001
save_total_limit: 1
seed: 42
sharded_ddp: []
skip_memory_metrics: true
tokenizer_path: tokenizers/mt5-small
tpu_metrics_debug: false
tpu_num_cores: null
train_dataset_list:
- tquad2-train
train_file_path: data/train_data.pt
use_legacy_prediction_loop: false
valid_dataset_list:
- tquad2-valid
valid_file_path: data/valid_data.pt
wandb_id: null
wandb_project: turkish-qa-qg
warmup_ratio: 0.0
warmup_steps: 0
weight_decay: 0.0