backbone_config_path: /notebooks/models/exp222 best_model_path: /notebooks/models/exp222/models/fold_0_2023_best.pth checkpoint_path: /notebooks/models/exp222/chkp/fold_0_2023_chkp.pth config_path: /notebooks/models/exp222/config.yaml criterion: criterion_type: BCEWithLogitsLoss mcrmse_loss: weights: - 0.5 - 0.5 mse_loss: reduction: mean rmse_loss: eps: 1.0e-09 reduction: mean smooth_l1_loss: beta: 0.1 reduction: mean data_dir: /notebooks/data dataset: bucket_batch_sampler: bucket_size: 400 noise_factor: 0.2 folds: true labels: - generated max_length: 256 sampler_type: StratifiedBatchSampler train_batch_size: 48 train_sources: - daigt - persuade - persuade_gpt - persuade_humanized_1 - persuade_gpt_patially_rewritten - persuade_gpt_patially_rewritten_05 - persuade_humanized_easy_1 - daigt_gpt_patially_rewritten - llama-mistral-partially-r - moth - books - neural-chat-7b - nbroad valid_batch_size: 48 valid_sources: - none debug: false exp_name: exp222_seed2023 external_dir: /notebooks/data/external fold: 0 interim_dir: /notebooks/data/interim log_path: /notebooks/models/exp222/logs/fold-0.log logger: job_type: training project: DAIGT-AIE train_print_frequency: 100 use_wandb: true valid_print_frequency: 100 model: architecture_type: CustomModel attention_dropout: 0.1 backbone_type: microsoft/deberta-v3-large dropout: 0.05 freeze_embeddings: false freeze_n_layers: 0 gem_pooling: eps: 1.0e-06 p: 3 gradient_checkpointing: false load_embeddings: true load_head: true load_n_layers: 24 load_parts: false pooling_type: MeanPooling reinitialize_n_layers: 0 state_from_model: None models_dir: /notebooks/models optimizer: beta1: 0.9 beta2: 0.999 decoder_lr: 2.0e-05 embeddings_lr: 2.0e-05 encoder_lr: 2.0e-05 eps: 1.0e-06 group_lr_multiplier: 1 n_groups: 1 weight_decay: 0.01 processed_dir: /notebooks/data/processed raw_dir: /notebooks/data/raw run_dir: /notebooks/models/exp222 run_id: exp222_seed2023_fold0 run_name: exp222_seed2023_fold0 scheduler: cosine_schedule_with_warmup: n_cycles: 0.5 n_warmup_steps: 0 type: cosine_schedule_with_warmup seed: 2023 tokenizer: null tokenizer_path: /notebooks/models/exp222/tokenizer training: apex: true epochs: 4 evaluate_n_times_per_epoch: 1 gradient_accumulation_steps: 1 max_grad_norm: 1000