|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import re |
|
import subprocess |
|
import sys |
|
import platform |
|
import pandas as pd |
|
import tempfile |
|
|
|
from pyserini.search import get_qrels_file |
|
from pyserini.util import download_evaluation_script |
|
|
|
script_path = download_evaluation_script('trec_eval') |
|
cmd_prefix = ['java', '-jar', script_path] |
|
args = sys.argv |
|
|
|
|
|
judged_docs_only = '' |
|
judged_result = [] |
|
cutoffs = [] |
|
|
|
if '-remove-unjudged' in args: |
|
judged_docs_only = args.pop(args.index('-remove-unjudged')) |
|
|
|
if any([i.startswith('judged.') for i in args]): |
|
|
|
idx = [i.startswith('judged.') for i in args].index(True) |
|
cutoffs = args.pop(idx) |
|
cutoffs = list(map(int, cutoffs[7:].split(','))) |
|
|
|
args.pop(idx-1) |
|
|
|
temp_file = '' |
|
|
|
if len(args) > 1: |
|
if not os.path.exists(args[-2]): |
|
args[-2] = get_qrels_file(args[-2]) |
|
if os.path.exists(args[-1]): |
|
|
|
with open(args[-1]) as f: |
|
first_line = f.readline() |
|
if 'Q0' not in first_line: |
|
temp_file = tempfile.NamedTemporaryFile(delete=False).name |
|
print('msmarco run detected. Converting to trec...') |
|
run = pd.read_csv(args[-1], delim_whitespace=True, header=None, names=['query_id', 'doc_id', 'rank']) |
|
run['score'] = 1 / run['rank'] |
|
run.insert(1, 'Q0', 'Q0') |
|
run['name'] = 'TEMPRUN' |
|
run.to_csv(temp_file, sep='\t', header=None, index=None) |
|
args[-1] = temp_file |
|
|
|
run = pd.read_csv(args[-1], delim_whitespace=True, header=None) |
|
qrels = pd.read_csv(args[-2], delim_whitespace=True, header=None) |
|
|
|
|
|
run[0] = run[0].astype(str) |
|
qrels[0] = qrels[0].astype(str) |
|
|
|
|
|
if judged_docs_only: |
|
if not temp_file: |
|
temp_file = tempfile.NamedTemporaryFile(delete=False).name |
|
judged_indexes = pd.merge(run[[0,2]].reset_index(), qrels[[0,2]], on = [0,2])['index'] |
|
run = run.loc[judged_indexes] |
|
run.to_csv(temp_file, sep='\t', header=None, index=None) |
|
args[-1] = temp_file |
|
|
|
for cutoff in cutoffs: |
|
run_cutoff = run.groupby(0).head(cutoff) |
|
judged = len(pd.merge(run_cutoff[[0,2]], qrels[[0,2]], on = [0,2])) / len(run_cutoff) |
|
metric_name = f'judged_{cutoff}' |
|
judged_result.append(f'{metric_name:22}\tall\t{judged:.4f}') |
|
cmd = cmd_prefix + args[1:] |
|
else: |
|
cmd = cmd_prefix |
|
|
|
print(f'Running command: {cmd}') |
|
shell = platform.system() == "Windows" |
|
process = subprocess.Popen(cmd, |
|
stdout=subprocess.PIPE, |
|
stderr=subprocess.PIPE, |
|
shell=shell) |
|
stdout, stderr = process.communicate() |
|
if stderr: |
|
print(stderr.decode("utf-8")) |
|
|
|
print('Results:') |
|
print(stdout.decode("utf-8").rstrip()) |
|
|
|
for judged in judged_result: |
|
print(judged) |
|
|
|
if temp_file: |
|
os.remove(temp_file) |
|
|