NetsPresso_QA / pyserini /2cr /msmarco-v1-passage.yaml
geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
conditions:
- name: bm25-rocchio-d2q-t5-tuned
display: BM25+Rocchio w/ doc2query-T5 (k1=2.18, b=0.86)
display-html: BM25+Rocchio w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=2.18, <i>b</i>=0.86)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rocchio
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.2395
R@1K: 0.9535
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4339
nDCG@10: 0.6559
R@1K: 0.8465
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4376
nDCG@10: 0.6224
R@1K: 0.8641
- name: bm25-rocchio-d2q-t5-default
display: BM25+Rocchio w/ doc2query-T5 (k1=0.9, b=0.4)
display-html: BM25+Rocchio w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rocchio --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.2158
R@1K: 0.9467
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4469
nDCG@10: 0.6538
R@1K: 0.8855
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4246
nDCG@10: 0.6102
R@1K: 0.8675
- name: bm25-rocchio-default
display: BM25+Rocchio (k1=0.9, b=0.4)
display-html: BM25+Rocchio (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-full --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --rocchio
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1595
R@1K: 0.8620
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3474
nDCG@10: 0.5275
R@1K: 0.8007
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.3115
nDCG@10: 0.4910
R@1K: 0.8156
- name: bm25-rocchio-tuned
display: BM25+Rocchio (k1=0.82, b=0.68)
display-html: BM25+Rocchio (<i>k<sub><small>1</small></sub></i>=0.82, <i>b</i>=0.68)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-full --topics $topics --output $output --bm25 --rocchio
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1684
R@1K: 0.8726
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3396
nDCG@10: 0.5275
R@1K: 0.7948
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.3120
nDCG@10: 0.4908
R@1K: 0.8327
- name: distilbert-kd-tasb-otf
display: "DistilBERT KD TASB: otf"
display-html: "DistilBERT KD TASB: on-the-fly query inference"
display-row: "[4]"
command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage-distilbert-dot-tas_b-b256-bf --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3444
R@1K: 0.9771
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4590
nDCG@10: 0.7210
R@1K: 0.8406
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4698
nDCG@10: 0.6854
R@1K: 0.8727
- name: distilbert-kd-tasb
display: "DistilBERT KD TASB: pre-encoded"
display-html: "DistilBERT KD TASB: pre-encoded queries"
display-row: "[4]"
command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage-distilbert-dot-tas_b-b256-bf --topics $topics --encoded-queries distilbert_tas_b-$topics --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3444
R@1K: 0.9771
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4590
nDCG@10: 0.7210
R@1K: 0.8406
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4698
nDCG@10: 0.6854
R@1K: 0.8727
- name: distilbert-kd-otf
display: "DistilBERT KD: otf"
display-html: "DistilBERT KD: on-the-fly query inference"
display-row: "[3]"
command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage-distilbert-dot-margin_mse-T2-bf --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-margin_mse-T2-msmarco --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3251
R@1K: 0.9553
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4053
nDCG@10: 0.6994
R@1K: 0.7653
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4159
nDCG@10: 0.6447
R@1K: 0.7953
- name: distilbert-kd
display: "DistilBERT KD: pre-encoded"
display-html: "DistilBERT KD: pre-encoded queries"
display-row: "[3]"
command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage-distilbert-dot-margin_mse-T2-bf --topics $topics --encoded-queries distilbert_kd-$topics --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3251
R@1K: 0.9553
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4053
nDCG@10: 0.6994
R@1K: 0.7653
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4159
nDCG@10: 0.6447
R@1K: 0.7953
- name: ance-otf
display: "ANCE: otf"
display-html: "ANCE: on-the-fly query inference"
display-row: "[2]"
command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage-ance-bf --topics $topics --encoder castorini/ance-msmarco-passage --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3302
R@1K: 0.9587
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3710
nDCG@10: 0.6452
R@1K: 0.7554
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4076
nDCG@10: 0.6458
R@1K: 0.7764
- name: ance
display: "ANCE: pre-encoded"
display-html: "ANCE: pre-encoded queries"
display-row: "[2]"
command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage-ance-bf --topics $topics --encoded-queries ance-$topics --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3302
R@1K: 0.9584
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3710
nDCG@10: 0.6452
R@1K: 0.7554
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4076
nDCG@10: 0.6458
R@1K: 0.7764
- name: bm25-tuned
display: BM25 (k1=0.82, b=0.68)
display-html: BM25 (<i>k<sub><small>1</small></sub></i>=0.82, <i>b</i>=0.68)
command: python -m pyserini.search.lucene --topics $topics --index msmarco-v1-passage-slim --output $output --bm25
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1875
R@1K: 0.8573
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.2903
nDCG@10: 0.4973
R@1K: 0.7450
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.2876
nDCG@10: 0.4876
R@1K: 0.8031
- name: bm25-rm3-tuned
display: BM25+RM3 (k1=0.82, b=0.68)
display-html: BM25+RM3 (<i>k<sub><small>1</small></sub></i>=0.82, <i>b</i>=0.68)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-full --topics $topics --output $output --bm25 --rm3
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1646
R@1K: 0.8704
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3339
nDCG@10: 0.5147
R@1K: 0.7950
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.3017
nDCG@10: 0.4924
R@1K: 0.8292
- name: bm25-default
display: BM25 (k1=0.9, b=0.4)
display-html: BM25 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (1a)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-slim --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1840
R@1K: 0.8526
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3013
nDCG@10: 0.5058
R@1K: 0.7501
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.2856
nDCG@10: 0.4796
R@1K: 0.7863
- name: bm25-rm3-default
display: BM25+RM3 (k1=0.9, b=0.4)
display-html: BM25+RM3 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (1b)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-full --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --rm3
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1566
R@1K: 0.8606
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3416
nDCG@10: 0.5216
R@1K: 0.8136
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.3006
nDCG@10: 0.4896
R@1K: 0.8236
- name: bm25-d2q-t5-tuned
display: BM25 w/ doc2query-T5 (k1=2.18, b=0.86)
display-html: BM25 w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=2.18, <i>b</i>=0.86)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5 --topics $topics --output $output --bm25
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.2816
R@1K: 0.9506
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4046
nDCG@10: 0.6336
R@1K: 0.8134
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4171
nDCG@10: 0.6265
R@1K: 0.8393
- name: bm25-d2q-t5-default
display: BM25 w/ doc2query-T5 (k1=0.9, b=0.4)
display-html: BM25 w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (2a)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5 --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.2723
R@1K: 0.9470
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4034
nDCG@10: 0.6417
R@1K: 0.8310
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4074
nDCG@10: 0.6187
R@1K: 0.8452
- name: bm25-rm3-d2q-t5-tuned
display: BM25+RM3 w/ doc2query-T5 (k1=2.18, b=0.86)
display-html: BM25+RM3 w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=2.18, <i>b</i>=0.86)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.2382
R@1K: 0.9528
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4377
nDCG@10: 0.6537
R@1K: 0.8443
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4348
nDCG@10: 0.6235
R@1K: 0.8605
- name: bm25-rm3-d2q-t5-default
display: BM25+RM3 w/ doc2query-T5 (k1=0.9, b=0.4)
display-html: BM25+RM3 w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (2b)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.2139
R@1K: 0.9460
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4483
nDCG@10: 0.6586
R@1K: 0.8863
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4286
nDCG@10: 0.6131
R@1K: 0.8700
- name: unicoil-otf
display: "uniCOIL (w/ doc2query-T5): otf"
display-html: "uniCOIL (w/ doc2query-T5): on-the-fly query inference"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3509
R@1K: 0.9581
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4617
nDCG@10: 0.7027
R@1K: 0.8291
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4429
nDCG@10: 0.6745
R@1K: 0.8433
- name: unicoil
display: "uniCOIL (w/ doc2query-T5): pre-encoded"
display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (3b)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil --topics $topics --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset-unicoil
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3516
R@1K: 0.9582
- topic_key: dl19-passage-unicoil
eval_key: dl19-passage
scores:
- MAP: 0.4612
nDCG@10: 0.7024
R@1K: 0.8292
- topic_key: dl20-unicoil
eval_key: dl20-passage
scores:
- MAP: 0.4430
nDCG@10: 0.6745
R@1K: 0.8430
- name: unicoil-noexp-otf
display: "uniCOIL (noexp): otf"
display-html: "uniCOIL (noexp): on-the-fly query inference"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil-noexp --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3153
R@1K: 0.9239
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4033
nDCG@10: 0.6434
R@1K: 0.7752
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4022
nDCG@10: 0.6524
R@1K: 0.7861
- name: unicoil-noexp
display: "uniCOIL (noexp): pre-encoded"
display-html: "uniCOIL (noexp): pre-encoded queries"
display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (3a)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil-noexp --topics $topics --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset-unicoil-noexp
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3153
R@1K: 0.9239
- topic_key: dl19-passage-unicoil-noexp
eval_key: dl19-passage
scores:
- MAP: 0.4033
nDCG@10: 0.6433
R@1K: 0.7752
- topic_key: dl20-unicoil-noexp
eval_key: dl20-passage
scores:
- MAP: 0.4021
nDCG@10: 0.6523
R@1K: 0.7861
- name: tct_colbert-v2-hnp-otf
display: "TCT_ColBERT-V2-HN+: otf"
display-html: "TCT_ColBERT-V2-HN+: on-the-fly query inference"
display-row: "[5]"
command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage-tct_colbert-v2-hnp-bf --topics $topics --encoder castorini/tct_colbert-v2-hnp-msmarco --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3584
R@1K: 0.9695
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4469
nDCG@10: 0.7204
R@1K: 0.8261
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4754
nDCG@10: 0.6882
R@1K: 0.8429
- name: tct_colbert-v2-hnp
display: "TCT_ColBERT-V2-HN+: pre-encoded"
display-html: "TCT_ColBERT-V2-HN+: pre-encoded queries"
display-row: "[5]"
command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage-tct_colbert-v2-hnp-bf --topics $topics --encoded-queries tct_colbert-v2-hnp-$topics --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3584
R@1K: 0.9695
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4469
nDCG@10: 0.7204
R@1K: 0.8261
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4754
nDCG@10: 0.6882
R@1K: 0.8429