Spaces:
Runtime error
Runtime error
# | |
# Pyserini: Reproducible IR research with sparse and dense representations | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
import os | |
import shutil | |
import unittest | |
from random import randint | |
from integrations.lucenesearcher_score_checker import LuceneSearcherScoreChecker | |
from integrations.utils import run_command, parse_score | |
class TestSearchIntegration(unittest.TestCase): | |
def setUp(self): | |
# The current directory depends on if you're running inside an IDE or from command line. | |
curdir = os.getcwd() | |
if curdir.endswith('clprf'): | |
self.pyserini_root = '../..' | |
self.anserini_root = '../../../anserini' | |
else: | |
self.pyserini_root = '.' | |
self.anserini_root = '../anserini' | |
self.tmp = f'{self.pyserini_root}/integrations/tmp{randint(0, 10000)}' | |
if os.path.exists(self.tmp): | |
shutil.rmtree(self.tmp) | |
else: | |
os.mkdir(self.tmp) | |
self.pyserini_search_cmd = 'python -m pyserini.search.lucene' | |
self.pyserini_fusion_cmd = 'python -m pyserini.fusion' | |
self.pyserini_eval_cmd = 'python -m pyserini.eval.trec_eval' | |
self.core17_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.nyt') | |
self.core17_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core17.txt') | |
self.core18_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.wapo.v2') | |
self.core18_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt') | |
self.robust04_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.disk45') | |
self.robust04_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt') | |
self.robust05_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.robust05') | |
self.robust05_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt') | |
self.core17_checker = LuceneSearcherScoreChecker( | |
index=self.core17_index_path, | |
topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core17.txt'), | |
pyserini_topics='core17', | |
qrels=self.core17_qrels_path, | |
eval=f'{self.pyserini_eval_cmd} -m map -m P.30') | |
self.core18_checker = LuceneSearcherScoreChecker( | |
index=self.core18_index_path, | |
topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core18.txt'), | |
pyserini_topics='core18', | |
qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt'), | |
eval=f'{self.pyserini_eval_cmd} -m map -m P.30') | |
self.robust04_checker = LuceneSearcherScoreChecker( | |
index=self.robust04_index_path, | |
topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust04.txt'), | |
pyserini_topics='robust04', | |
qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt'), | |
eval=f'{self.pyserini_eval_cmd} -m map -m P.30') | |
self.robust05_checker = LuceneSearcherScoreChecker( | |
index=self.robust05_index_path, | |
topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust05.txt'), | |
pyserini_topics='robust05', | |
qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt'), | |
eval=f'{self.pyserini_eval_cmd} -m map -m P.30') | |
def test_cross_validation(self): | |
pyserini_topics = 'core17' | |
os.mkdir(f'{self.tmp}/core17') | |
for alpha in [x / 10.0 for x in range(0, 11)]: | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17/core17_lr_A{alpha}_bm25.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha {alpha}' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
os.system(f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ | |
--anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ | |
--collection core17 --output {self.tmp}/core17_lr.txt --classifier lr ') | |
cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_lr.txt' | |
status = os.system(cmd) | |
stdout, stderr = run_command(cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2462, delta=0.0001) | |
def test_core17(self): | |
self.assertTrue(self.core17_checker.run('core17_bm25', '--bm25', 0.2087)) | |
def test_core17_rm3(self): | |
self.assertTrue(self.core17_checker.run('core17_bm25', '--bm25 --rm3', 0.2798)) | |
def test_core17_lr(self): | |
pyserini_topics = 'core17' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_lr.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_lr.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2473, delta=0.0001) | |
def test_core17_lr_rm3(self): | |
pyserini_topics = 'core17' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_lr_rm3.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_lr_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2926, delta=0.0001) | |
def test_core17_svm(self): | |
pyserini_topics = 'core17' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_svm.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_svm.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2385, delta=0.0001) | |
def test_core17_svm_rm3(self): | |
pyserini_topics = 'core17' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_svm_rm3.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_svm_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2956, delta=0.0001) | |
def test_core17_avg(self): | |
pyserini_topics = 'core17' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_avg.txt \ | |
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_avg.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2442, delta=0.0001) | |
def test_core17_avg_rm3(self): | |
pyserini_topics = 'core17' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_avg_rm3.txt \ | |
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_avg_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2950, delta=0.0001) | |
def test_core17_rrf(self): | |
pyserini_topics = 'core17' | |
lr_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_lr.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7' | |
status = os.system(lr_cmd) | |
self.assertEqual(status, 0) | |
svm_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_svm.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7' | |
status = os.system(svm_cmd) | |
self.assertEqual(status, 0) | |
rrf_cmd = f'{self.pyserini_fusion_cmd} \ | |
--runs {self.tmp}/core17_lr.txt {self.tmp}/core17_svm.txt \ | |
--output {self.tmp}/core17_rrf.txt --resort' | |
status = os.system(rrf_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_rrf.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2446, delta=0.0001) | |
def test_core17_rrf_rm3(self): | |
pyserini_topics = 'core17' | |
lr_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_lr_rm3.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' | |
status = os.system(lr_cmd) | |
self.assertEqual(status, 0) | |
svm_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core17_svm_rm3.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' | |
status = os.system(svm_cmd) | |
self.assertEqual(status, 0) | |
rrf_cmd = f'{self.pyserini_fusion_cmd} \ | |
--runs {self.tmp}/core17_lr_rm3.txt {self.tmp}/core17_svm_rm3.txt \ | |
--output {self.tmp}/core17_rrf_rm3.txt --resort' | |
status = os.system(rrf_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ | |
{self.tmp}/core17_rrf_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2957, delta=0.0001) | |
def test_core18(self): | |
self.assertTrue(self.core18_checker.run('core18_bm25', '--bm25', 0.2496)) | |
def test_core18_rm3(self): | |
self.assertTrue(self.core18_checker.run('core18_bm25', '--bm25 --rm3', 0.3129)) | |
def test_core18_lr(self): | |
pyserini_topics = 'core18' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_lr.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ | |
{self.tmp}/core18_lr.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2839, delta=0.0001) | |
def test_core18_lr_rm3(self): | |
pyserini_topics = 'core18' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_lr_rm3.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ | |
{self.tmp}/core18_lr_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.3216, delta=0.0001) | |
def test_core18_svm(self): | |
pyserini_topics = 'core18' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_svm.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ | |
{self.tmp}/core18_svm.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2841, delta=0.0001) | |
def test_core18_svm_rm3(self): | |
pyserini_topics = 'core18' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_svm_rm3.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ | |
{self.tmp}/core18_svm_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.3200, delta=0.0001) | |
def test_core18_avg(self): | |
pyserini_topics = 'core18' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_avg.txt \ | |
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ | |
{self.tmp}/core18_avg.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2860, delta=0.0001) | |
def test_core18_avg_rm3(self): | |
pyserini_topics = 'core18' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_avg_rm3.txt \ | |
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ | |
{self.tmp}/core18_avg_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.3215, delta=0.0001) | |
def test_core18_rrf(self): | |
pyserini_topics = 'core18' | |
lr_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_lr.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' | |
status = os.system(lr_cmd) | |
self.assertEqual(status, 0) | |
svm_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_svm.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' | |
status = os.system(svm_cmd) | |
self.assertEqual(status, 0) | |
rrf_cmd = f'{self.pyserini_fusion_cmd} \ | |
--runs {self.tmp}/core18_lr.txt {self.tmp}/core18_svm.txt \ | |
--output {self.tmp}/core18_rrf.txt --resort' | |
status = os.system(rrf_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ | |
{self.tmp}/core18_rrf.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2881, delta=0.0001) | |
def test_core18_rrf_rm3(self): | |
pyserini_topics = 'core18' | |
lr_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_lr_rm3.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' | |
status = os.system(lr_cmd) | |
self.assertEqual(status, 0) | |
svm_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/core18_svm_rm3.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' | |
status = os.system(svm_cmd) | |
self.assertEqual(status, 0) | |
rrf_cmd = f'{self.pyserini_fusion_cmd} \ | |
--runs {self.tmp}/core18_lr_rm3.txt {self.tmp}/core18_svm_rm3.txt \ | |
--output {self.tmp}/core18_rrf_rm3.txt --resort' | |
status = os.system(rrf_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ | |
{self.tmp}/core18_rrf_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.3205, delta=0.0001) | |
def test_robust04(self): | |
self.assertTrue(self.robust04_checker.run('robust04_bm25', '--bm25', 0.2531)) | |
def test_robust04_rm3(self): | |
self.assertTrue(self.robust04_checker.run('robust04_bm25_rm3', '--bm25 --rm3', 0.2908)) | |
def test_robust04_lr(self): | |
pyserini_topics = 'robust04' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_lr.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ | |
{self.tmp}/robust04_lr.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2747, delta=0.0001) | |
def test_robust04_lr_rm3(self): | |
pyserini_topics = 'robust04' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_lr_rm3.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ | |
{self.tmp}/robust04_lr_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2969, delta=0.0001) | |
def test_robust04_svm(self): | |
pyserini_topics = 'robust04' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_svm.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ | |
{self.tmp}/robust04_svm.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2726, delta=0.0001) | |
def test_robust04_svm_rm3(self): | |
pyserini_topics = 'robust04' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_svm_rm3.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ | |
{self.tmp}/robust04_svm_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2972, delta=0.0001) | |
def test_robust04_avg(self): | |
pyserini_topics = 'robust04' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_avg.txt \ | |
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ | |
{self.tmp}/robust04_avg.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.276, delta=0.0001) | |
def test_robust04_avg_rm3(self): | |
pyserini_topics = 'robust04' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_avg_rm3.txt \ | |
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ | |
{self.tmp}/robust04_avg_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2980, delta=0.0001) | |
def test_robust04_rrf(self): | |
pyserini_topics = 'robust04' | |
lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_lr.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' | |
status = os.system(lr_cmd) | |
self.assertEqual(status, 0) | |
svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_svm.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' | |
status = os.system(svm_cmd) | |
self.assertEqual(status, 0) | |
rrf_cmd = f'{self.pyserini_fusion_cmd} \ | |
--runs {self.tmp}/robust04_lr.txt {self.tmp}/robust04_svm.txt \ | |
--output {self.tmp}/robust04_rrf.txt --resort' | |
status = os.system(rrf_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ | |
{self.tmp}/robust04_rrf.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.275, delta=0.0001) | |
def test_robust04_rrf_rm3(self): | |
pyserini_topics = 'robust04' | |
lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_lr_rm3.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' | |
status = os.system(lr_cmd) | |
self.assertEqual(status, 0) | |
svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust04_svm_rm3.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' | |
status = os.system(svm_cmd) | |
self.assertEqual(status, 0) | |
rrf_cmd = f'{self.pyserini_fusion_cmd} \ | |
--runs {self.tmp}/robust04_lr_rm3.txt {self.tmp}/robust04_svm_rm3.txt \ | |
--output {self.tmp}/robust04_rrf_rm3.txt --resort' | |
status = os.system(rrf_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ | |
{self.tmp}/robust04_rrf_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2977, delta=0.0001) | |
def test_robust05(self): | |
self.assertTrue(self.robust05_checker.run('robust05_bm25', '--bm25', 0.2032)) | |
def test_robust05_rm3(self): | |
self.assertTrue(self.robust05_checker.run('robust05_bm25_rm3', '--bm25 --rm3', 0.2624)) | |
def test_robust05_lr(self): | |
pyserini_topics = 'robust05' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_lr.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ | |
{self.tmp}/robust05_lr.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2476, delta=0.0001) | |
def test_robust05_lr_rm3(self): | |
pyserini_topics = 'robust05' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_lr_rm3.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ | |
{self.tmp}/robust05_lr_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2872, delta=0.0001) | |
def test_robust05_svm(self): | |
pyserini_topics = 'robust05' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_svm.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ | |
{self.tmp}/robust05_svm.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2486, delta=0.0001) | |
def test_robust05_svm_rm3(self): | |
pyserini_topics = 'robust05' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_svm_rm3.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ | |
{self.tmp}/robust05_svm_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2871, delta=0.0001) | |
def test_robust05_avg(self): | |
pyserini_topics = 'robust05' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_avg.txt \ | |
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ | |
{self.tmp}/robust05_avg.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2485, delta=0.0001) | |
def test_robust05_avg_rm3(self): | |
pyserini_topics = 'robust05' | |
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_avg_rm3.txt \ | |
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3' | |
status = os.system(run_file_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ | |
{self.tmp}/robust05_avg_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2880, delta=0.0001) | |
def test_robust05_rrf(self): | |
pyserini_topics = 'robust05' | |
lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_lr.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' | |
status = os.system(lr_cmd) | |
self.assertEqual(status, 0) | |
svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_svm.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' | |
status = os.system(svm_cmd) | |
self.assertEqual(status, 0) | |
rrf_cmd = f'{self.pyserini_fusion_cmd} \ | |
--runs {self.tmp}/robust05_lr.txt {self.tmp}/robust05_svm.txt \ | |
--output {self.tmp}/robust05_rrf.txt --resort' | |
status = os.system(rrf_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ | |
{self.tmp}/robust05_rrf.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2401, delta=0.0001) | |
def test_robust05_rrf_rm3(self): | |
pyserini_topics = 'robust05' | |
lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_lr_rm3.txt \ | |
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' | |
status = os.system(lr_cmd) | |
self.assertEqual(status, 0) | |
svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \ | |
--topics {pyserini_topics} --output {self.tmp}/robust05_svm_rm3.txt \ | |
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' | |
status = os.system(svm_cmd) | |
self.assertEqual(status, 0) | |
rrf_cmd = f'{self.pyserini_fusion_cmd} \ | |
--runs {self.tmp}/robust05_lr_rm3.txt {self.tmp}/robust05_svm_rm3.txt \ | |
--output {self.tmp}/robust05_rrf_rm3.txt --resort' | |
status = os.system(rrf_cmd) | |
self.assertEqual(status, 0) | |
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \ | |
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ | |
{self.tmp}/robust05_rrf_rm3.txt' | |
status = os.system(score_cmd) | |
stdout, stderr = run_command(score_cmd) | |
score = parse_score(stdout, 'map') | |
self.assertEqual(status, 0) | |
self.assertEqual(stderr, '') | |
self.assertAlmostEqual(score, 0.2808, delta=0.0001) | |
def tearDown(self): | |
shutil.rmtree(f'{self.tmp}') | |
if __name__ == '__main__': | |
unittest.main() | |