conditions:
- name: bm25-doc-tuned
display: BM25 doc (k1=4.46, b=0.82)
display-html: BM25 doc (k1=4.46, b=0.82)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-slim --topics $topics --output $output --bm25
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2767
R@1K: 0.9357
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2336
nDCG@10: 0.5233
R@1K: 0.6757
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3581
nDCG@10: 0.5061
R@1K: 0.7776
- name: bm25-doc-default
display: BM25 doc (k1=0.9, b=0.4)
display-html: BM25 doc (k1=0.9, b=0.4)
display-row: "[1] — (1a)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-slim --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2299
R@1K: 0.8856
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2434
nDCG@10: 0.5176
R@1K: 0.6966
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3793
nDCG@10: 0.5286
R@1K: 0.8085
- name: bm25-doc-segmented-tuned
display: BM25 doc segmented (k1=2.16, b=0.61)
display-html: BM25 doc segmented (k1=2.16, b=0.61)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-slim --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2756
R@1K: 0.9311
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2398
nDCG@10: 0.5389
R@1K: 0.6565
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3458
nDCG@10: 0.5213
R@1K: 0.7725
- name: bm25-doc-segmented-default
display: BM25 doc segmented (k1=0.9, b=0.4)
display-html: BM25 doc segmented (k1=0.9, b=0.4)
display-row: "[1] — (1b)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-slim --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2684
R@1K: 0.9178
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2449
nDCG@10: 0.5302
R@1K: 0.6871
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3586
nDCG@10: 0.5281
R@1K: 0.7755
- name: bm25-rm3-doc-tuned
display: BM25+RM3 doc (k1=4.46, b=0.82)
display-html: BM25+RM3 doc (k1=4.46, b=0.82)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-full --topics $topics --output $output --bm25 --rm3
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2227
R@1K: 0.9303
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2638
nDCG@10: 0.5526
R@1K: 0.7188
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3610
nDCG@10: 0.5195
R@1K: 0.8180
- name: bm25-rm3-doc-default
display: BM25+RM3 doc (k1=0.9, b=0.4)
display-html: BM25+RM3 doc (k1=0.9, b=0.4)
display-row: "[1] — (1c)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-full --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.1618
R@1K: 0.8783
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2773
nDCG@10: 0.5174
R@1K: 0.7507
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4015
nDCG@10: 0.5254
R@1K: 0.8259
- name: bm25-rm3-doc-segmented-tuned
display: BM25+RM3 doc segmented (k1=2.16, b=0.61)
display-html: BM25+RM3 doc segmented (k1=2.16, b=0.61)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-full --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2448
R@1K: 0.9359
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2655
nDCG@10: 0.5392
R@1K: 0.7037
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3471
nDCG@10: 0.5030
R@1K: 0.8056
- name: bm25-rm3-doc-segmented-default
display: BM25+RM3 doc segmented (k1=0.9, b=0.4)
display-html: BM25+RM3 doc segmented (k1=0.9, b=0.4)
display-row: "[1] — (1d)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-full --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2413
R@1K: 0.9351
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2892
nDCG@10: 0.5684
R@1K: 0.7368
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3792
nDCG@10: 0.5202
R@1K: 0.8023
- name: bm25-rocchio-doc-tuned
display: BM25+Rocchio doc (k1=4.46, b=0.82)
display-html: BM25+Rocchio doc (k1=4.46, b=0.82)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-full --topics $topics --output $output --bm25 --rocchio
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2242
R@1K: 0.9314
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2657
nDCG@10: 0.5584
R@1K: 0.7299
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3628
nDCG@10: 0.5199
R@1K: 0.8217
- name: bm25-rocchio-doc-default
display: BM25+Rocchio doc (k1=0.9, b=0.4)
display-html: BM25+Rocchio doc (k1=0.9, b=0.4)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-full --topics $topics --output $output --bm25 --rocchio --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.1624
R@1K: 0.8789
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2811
nDCG@10: 0.5256
R@1K: 0.7546
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4089
nDCG@10: 0.5192
R@1K: 0.8273
- name: bm25-rocchio-doc-segmented-tuned
display: BM25+Rocchio doc segmented (k1=2.16, b=0.61)
display-html: BM25+Rocchio doc segmented (k1=2.16, b=0.61)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-full --topics $topics --output $output --bm25 --rocchio --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2475
R@1K: 0.9395
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2672
nDCG@10: 0.5421
R@1K: 0.7115
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3521
nDCG@10: 0.4997
R@1K: 0.8042
- name: bm25-rocchio-doc-segmented-default
display: BM25+Rocchio doc segmented (k1=0.9, b=0.4)
display-html: BM25+Rocchio doc segmented (k1=0.9, b=0.4)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-full --topics $topics --output $output --bm25 --rocchio --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2447
R@1K: 0.9351
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2889
nDCG@10: 0.5570
R@1K: 0.7423
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3830
nDCG@10: 0.5226
R@1K: 0.8102
- name: bm25-d2q-t5-doc-tuned
display: BM25 w/ doc2query-T5 doc (k1=4.68, b=0.87)
display-html: BM25 w/ doc2query-T5 doc (k1=4.68, b=0.87)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-d2q-t5 --topics $topics --output $output --bm25
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.3269
R@1K: 0.9553
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2620
nDCG@10: 0.5972
R@1K: 0.6867
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4099
nDCG@10: 0.5852
R@1K: 0.8105
- name: bm25-d2q-t5-doc-default
display: BM25 w/ doc2query-T5 doc (k1=0.9, b=0.4)
display-html: BM25 w/ doc2query-T5 doc (k1=0.9, b=0.4)
display-row: "[1] — (2a)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-d2q-t5 --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2880
R@1K: 0.9259
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2700
nDCG@10: 0.5968
R@1K: 0.7190
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4230
nDCG@10: 0.5885
R@1K: 0.8403
- name: bm25-d2q-t5-doc-segmented-tuned
display: BM25 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
display-html: BM25 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-d2q-t5 --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.3209
R@1K: 0.9530
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2658
nDCG@10: 0.6273
R@1K: 0.6707
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4047
nDCG@10: 0.5943
R@1K: 0.7968
- name: bm25-d2q-t5-doc-segmented-default
display: BM25 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
display-html: BM25 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
display-row: "[1] — (2b)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-d2q-t5 --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.3179
R@1K: 0.9490
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2798
nDCG@10: 0.6119
R@1K: 0.7165
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4150
nDCG@10: 0.5957
R@1K: 0.8046
- name: bm25-rm3-d2q-t5-doc-tuned
display: BM25+RM3 w/ doc2query-T5 doc (k1=4.68, b=0.87)
display-html: BM25+RM3 w/ doc2query-T5 doc (k1=4.68, b=0.87)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2623
R@1K: 0.9522
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2813
nDCG@10: 0.6091
R@1K: 0.7184
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4100
nDCG@10: 0.5745
R@1K: 0.8238
- name: bm25-rm3-d2q-t5-doc-default
display: BM25+RM3 w/ doc2query-T5 doc (k1=0.9, b=0.4)
display-html: BM25+RM3 w/ doc2query-T5 doc (k1=0.9, b=0.4)
display-row: "[1] — (2c)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.1834
R@1K: 0.9126
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.3045
nDCG@10: 0.5904
R@1K: 0.7737
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4230
nDCG@10: 0.5427
R@1K: 0.8631
- name: bm25-rm3-d2q-t5-doc-segmented-tuned
display: BM25+RM3 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
display-html: BM25+RM3 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2973
R@1K: 0.9563
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2892
nDCG@10: 0.6247
R@1K: 0.7069
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4016
nDCG@10: 0.5711
R@1K: 0.8156
- name: bm25-rm3-d2q-t5-doc-segmented-default
display: BM25+RM3 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
display-html: BM25+RM3 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
display-row: "[1] — (2d)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.2803
R@1K: 0.9551
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.3030
nDCG@10: 0.6290
R@1K: 0.7483
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.4271
nDCG@10: 0.5851
R@1K: 0.8266
- name: unicoil-noexp-otf
display: "uniCOIL (noexp): otf"
display-html: "uniCOIL (noexp): on-the-fly query inference"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil-noexp --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.3410
R@1K: 0.9420
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2661
nDCG@10: 0.6347
R@1K: 0.6385
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3698
nDCG@10: 0.5906
R@1K: 0.7621
- name: unicoil-noexp
display: "uniCOIL (noexp): pre-encoded"
display-html: "uniCOIL (noexp): pre-encoded queries"
display-row: "[1] — (3a)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil-noexp --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev-unicoil-noexp
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.3409
R@1K: 0.9420
- topic_key: dl19-doc-unicoil-noexp
eval_key: dl19-doc
scores:
- MAP: 0.2665
nDCG@10: 0.6349
R@1K: 0.6391
- topic_key: dl20-unicoil-noexp
eval_key: dl20-doc
scores:
- MAP: 0.3698
nDCG@10: 0.5893
R@1K: 0.7623
- name: unicoil-otf
display: "uniCOIL (w/ doc2query-T5): otf"
display-html: "uniCOIL (w/ doc2query-T5): on-the-fly query inference"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.3532
R@1K: 0.9546
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- MAP: 0.2789
nDCG@10: 0.6396
R@1K: 0.6654
- topic_key: dl20
eval_key: dl20-doc
scores:
- MAP: 0.3881
nDCG@10: 0.6030
R@1K: 0.7866
- name: unicoil
display: "uniCOIL (w/ doc2query-T5): pre-encoded"
display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
display-row: "[1] — (3b)"
command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev-unicoil
eval_key: msmarco-doc-dev
scores:
- MRR@10: 0.3531
R@1K: 0.9546
- topic_key: dl19-doc-unicoil
eval_key: dl19-doc
scores:
- MAP: 0.2789
nDCG@10: 0.6396
R@1K: 0.6652
- topic_key: dl20-unicoil
eval_key: dl20-doc
scores:
- MAP: 0.3882
nDCG@10: 0.6033
R@1K: 0.7869