AlekseyKorshuk's picture
Upload folder using huggingface_hub
f59a1a3 verified
backbone_config_path: /notebooks/models/exp222
best_model_path: /notebooks/models/exp222/models/fold_0_2023_best.pth
checkpoint_path: /notebooks/models/exp222/chkp/fold_0_2023_chkp.pth
config_path: /notebooks/models/exp222/config.yaml
criterion:
criterion_type: BCEWithLogitsLoss
mcrmse_loss:
weights:
- 0.5
- 0.5
mse_loss:
reduction: mean
rmse_loss:
eps: 1.0e-09
reduction: mean
smooth_l1_loss:
beta: 0.1
reduction: mean
data_dir: /notebooks/data
dataset:
bucket_batch_sampler:
bucket_size: 400
noise_factor: 0.2
folds: true
labels:
- generated
max_length: 256
sampler_type: StratifiedBatchSampler
train_batch_size: 48
train_sources:
- daigt
- persuade
- persuade_gpt
- persuade_humanized_1
- persuade_gpt_patially_rewritten
- persuade_gpt_patially_rewritten_05
- persuade_humanized_easy_1
- daigt_gpt_patially_rewritten
- llama-mistral-partially-r
- moth
- books
- neural-chat-7b
- nbroad
valid_batch_size: 48
valid_sources:
- none
debug: false
exp_name: exp222_seed2023
external_dir: /notebooks/data/external
fold: 0
interim_dir: /notebooks/data/interim
log_path: /notebooks/models/exp222/logs/fold-0.log
logger:
job_type: training
project: DAIGT-AIE
train_print_frequency: 100
use_wandb: true
valid_print_frequency: 100
model:
architecture_type: CustomModel
attention_dropout: 0.1
backbone_type: microsoft/deberta-v3-large
dropout: 0.05
freeze_embeddings: false
freeze_n_layers: 0
gem_pooling:
eps: 1.0e-06
p: 3
gradient_checkpointing: false
load_embeddings: true
load_head: true
load_n_layers: 24
load_parts: false
pooling_type: MeanPooling
reinitialize_n_layers: 0
state_from_model: None
models_dir: /notebooks/models
optimizer:
beta1: 0.9
beta2: 0.999
decoder_lr: 2.0e-05
embeddings_lr: 2.0e-05
encoder_lr: 2.0e-05
eps: 1.0e-06
group_lr_multiplier: 1
n_groups: 1
weight_decay: 0.01
processed_dir: /notebooks/data/processed
raw_dir: /notebooks/data/raw
run_dir: /notebooks/models/exp222
run_id: exp222_seed2023_fold0
run_name: exp222_seed2023_fold0
scheduler:
cosine_schedule_with_warmup:
n_cycles: 0.5
n_warmup_steps: 0
type: cosine_schedule_with_warmup
seed: 2023
tokenizer: null
tokenizer_path: /notebooks/models/exp222/tokenizer
training:
apex: true
epochs: 4
evaluate_n_times_per_epoch: 1
gradient_accumulation_steps: 1
max_grad_norm: 1000