File size: 1,470 Bytes
2f044c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
shared_params:
  passages_path: null
  max_passage_length: 64
  passage_batch_size: 64
  question_batch_size: 64

datamodule:
  _target_: relik.retriever.lightning_modules.pl_data_modules.GoldenRetrieverPLDataModule
  datasets:
    train:
      _target_: relik.retriever.data.datasets.InBatchNegativesDataset
      name: "train"
      path: null
      tokenizer: ${model.language_model}
      max_passage_length: ${data.shared_params.max_passage_length}
      question_batch_size: ${data.shared_params.question_batch_size}
      passage_batch_size: ${data.shared_params.passage_batch_size}
      subsample_strategy: null
      subsample_portion: 0.1
      shuffle: True

    val:
      - _target_: relik.retriever.data.datasets.InBatchNegativesDataset
        name: "val"
        path: null
        tokenizer: ${model.language_model}
        max_passage_length: ${data.shared_params.max_passage_length}
        question_batch_size: ${data.shared_params.question_batch_size}
        passage_batch_size: ${data.shared_params.passage_batch_size}

    test:
      - _target_: relik.retriever.data.datasets.InBatchNegativesDataset
        name: "test"
        path: null
        tokenizer: ${model.language_model}
        max_passage_length: ${data.shared_params.max_passage_length}
        question_batch_size: ${data.shared_params.question_batch_size}
        passage_batch_size: ${data.shared_params.passage_batch_size}

  num_workers:
    train: 0
    val: 0
    test: 0