pair2rank / app.py
dustalov's picture
Add title, description, article
bd0b4a3 verified
raw
history blame
4.86 kB
# Copyright 2023 Dmitry Ustalov
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__author__ = 'Dmitry Ustalov'
__license__ = 'Apache 2.0'
import typing
import gradio as gr
import numpy as np
import numpy.typing as npt
import pandas as pd
# https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], tolerance: float = 10e-6, limit: int = 20) \
-> npt.ArrayLike:
assert wins.shape == ties.shape, 'wins and ties shapes are different'
pi, v = np.random.rand(wins.shape[0]), np.random.rand()
converged, iterations = False, 0
while not converged:
iterations += 1
v_numerator = np.sum(
ties * (pi[:, None] + pi) / (pi[:, None] + pi + 2 * v * np.sqrt(pi[:, None] * pi))
) / 2
v_denominator = np.sum(
wins * 2 * np.sqrt(pi[:, None] * pi) / (pi[:, None] + pi + 2 * v * np.sqrt(pi[:, None] * pi))
)
v = v_numerator / v_denominator
pi_old = pi.copy()
pi_numerator = np.sum(
(wins + ties / 2) * (pi + v * np.sqrt(pi[:, np.newaxis] * pi)) /
(pi[:, np.newaxis] + pi + 2 * np.sqrt(pi[:, np.newaxis] * pi)),
axis=1
)
pi_denominator = np.sum(
(wins + ties / 2) * (1 + v * np.sqrt(pi[:, np.newaxis] * pi)) /
(pi[:, np.newaxis] + pi + 2 * np.sqrt(pi[:, np.newaxis] * pi)),
axis=0
)
pi = pi_numerator / pi_denominator
converged = bool(np.all(np.abs(pi / (pi + 1) - pi_old / (pi_old + 1)) < tolerance)) or (iterations >= limit)
return pi
def handler(file: typing.IO[bytes]) -> pd.DataFrame:
try:
df = pd.read_csv(file.name, dtype=str)
except ValueError as e:
raise gr.Error(f'Parsing error: {e}')
if not pd.Series(['left', 'right', 'winner']).isin(df.columns).all():
raise gr.Error('Columns must exist: left, right, winner')
if not df['winner'].isin(pd.Series(['left', 'right', 'tie'])).all():
raise gr.Error('Allowed winner values: left, right, tie')
df = df[['left', 'right', 'winner']]
df.dropna(axis='rows', inplace=True)
index = pd.Index(np.unique(df[['left', 'right']].values), name='item')
df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
index='left', columns='right', values='winner',
aggfunc='count', fill_value=0)
df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0)
df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
index='left', columns='right', values='winner', aggfunc='count',
fill_value=0)
df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0)
wins = df_wins.to_numpy(dtype=np.int64)
ties = df_ties.to_numpy(dtype=np.int64)
ties += ties.T
scores = aggregate(wins, ties)
df_result = pd.DataFrame(data={'score': scores}, index=index)
df_result['rank'] = df_result['score'].rank(ascending=False).astype(int)
df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
df_result.reset_index(inplace=True)
return df_result
iface = gr.Interface(
fn=handler,
inputs=gr.File(
value='example.csv',
file_types=['.tsv', '.csv']
),
outputs=gr.Dataframe(
headers=['item', 'score', 'rank']
),
title='Turn Your Side-by-Side Comparisons into Ranking!',
description='''
This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
As an input, it expects a comma-separated (CSV) file containing the following columns:
- `left`: the first compared item
- `right`: the second compared item
- `winner`: the label indicating the winning item
Possible values for `winner` are `left`, `right`, or `tie`.
The provided example might be a good starting point of the format.
As the output, this tool provides a table with items, their estimated scores, and ranks.
''',
article='''
This tool implements the tie-aware ranking aggregation algorithm as described in
[Efficient Computation of Rankings from Pairwise Comparisons](https://www.jmlr.org/papers/v24/22-1086.html).
''',
allow_flagging='never'
)
iface.launch()