File size: 1,788 Bytes
bb86129 249d795 bb86129 249d795 bb86129 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# lightning.pytorch==2.3.3
seed_everything: 0
trainer:
precision: bf16-mixed
max_steps: 50000
data:
class_path: lightning_ir.LightningIRDataModule
init_args:
num_workers: 1
train_batch_size: 64
shuffle_train: true
train_dataset:
class_path: lightning_ir.RunDataset
init_args:
run_path_or_id: msmarco-passage/train/rank-distillm/set-encoder
depth: 100
sample_size: 8
sampling_strategy: log_random
targets: score
normalize_targets: false
model:
class_path: lightning_ir.BiEncoderModule
init_args:
model_name_or_path: bert-base-uncased
config:
class_path: lightning_ir.ColConfig
init_args:
similarity_function: dot
query_expansion: true
attend_to_query_expanded_tokens: true
query_mask_scoring_tokens: null
doc_mask_scoring_tokens: punctuation
query_aggregation_function: mean
normalize: false
add_marker_tokens: false
embedding_dim: 128
projection: linear
query_pooling_strategy: null
doc_expansion: false
attend_to_doc_expanded_tokens: false
doc_pooling_strategy: null
sparsification: null
query_length: 32
doc_length: 256
loss_functions:
- class_path: lightning_ir.SupervisedMarginMSE
- class_path: lightning_ir.KLDivergence
- class_path: lightning_ir.InBatchCrossEntropy
init_args:
pos_sampling_technique: first
neg_sampling_technique: first
max_num_neg_samples: 8
optimizer:
class_path: torch.optim.AdamW
init_args:
lr: 2.0e-05
lr_scheduler:
class_path: lightning_ir.LinearLRSchedulerWithLinearWarmup
init_args:
num_warmup_steps: 5000
final_value: 0.02
num_delay_steps: 0
|