File size: 4,016 Bytes
58629f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e44c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Copyright 2023 Dmitry Ustalov
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__author__ = 'Dmitry Ustalov'
__license__ = 'Apache 2.0'

import typing

import gradio as gr
import numpy as np
import numpy.typing as npt
import pandas as pd


# https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], tolerance: float = 10e-6, limit: int = 20) \
        -> npt.ArrayLike:
    assert wins.shape == ties.shape, 'wins and ties shapes are different'

    pi, v = np.random.rand(wins.shape[0]), np.random.rand()

    converged, iterations = False, 0

    while not converged:
        iterations += 1

        v_numerator = np.sum(
            ties * (pi[:, None] + pi) / (pi[:, None] + pi + 2 * v * np.sqrt(pi[:, None] * pi))
        ) / 2

        v_denominator = np.sum(
            wins * 2 * np.sqrt(pi[:, None] * pi) / (pi[:, None] + pi + 2 * v * np.sqrt(pi[:, None] * pi))
        )

        v = v_numerator / v_denominator

        pi_old = pi.copy()

        pi_numerator = np.sum(
            (wins + ties / 2) * (pi + v * np.sqrt(pi[:, np.newaxis] * pi)) /
            (pi[:, np.newaxis] + pi + 2 * np.sqrt(pi[:, np.newaxis] * pi)),
            axis=1
        )

        pi_denominator = np.sum(
            (wins + ties / 2) * (1 + v * np.sqrt(pi[:, np.newaxis] * pi)) /
            (pi[:, np.newaxis] + pi + 2 * np.sqrt(pi[:, np.newaxis] * pi)),
            axis=0
        )

        pi = pi_numerator / pi_denominator

        converged = bool(np.all(np.abs(pi / (pi + 1) - pi_old / (pi_old + 1)) < tolerance)) or (iterations >= limit)

    return pi


def handler(file: typing.IO[bytes]) -> pd.DataFrame:
    try:
        df = pd.read_csv(file.name, dtype=str)
    except ValueError as e:
        raise gr.Error(f'Parsing error: {e}')

    if not pd.Series(['left', 'right', 'winner']).isin(df.columns).all():
        raise gr.Error('Columns must exist: left, right, winner')

    if not df['winner'].isin(pd.Series(['left', 'right', 'tie'])).all():
        raise gr.Error('Allowed winner values: left, right, tie')

    df = df[['left', 'right', 'winner']]

    df.dropna(axis='rows', inplace=True)

    index = pd.Index(np.unique(df[['left', 'right']].values), name='item')

    df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
                             index='left', columns='right', values='winner',
                             aggfunc='count', fill_value=0)
    df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0)

    df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
                             index='left', columns='right', values='winner', aggfunc='count',
                             fill_value=0)
    df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0)

    wins = df_wins.to_numpy(dtype=np.int64)
    ties = df_ties.to_numpy(dtype=np.int64)
    ties += ties.T

    scores = aggregate(wins, ties)

    df_result = pd.DataFrame(data={'score': scores}, index=index)
    df_result['rank'] = df_result['score'].rank(ascending=False).astype(int)
    df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
    df_result.reset_index(inplace=True)

    return df_result


iface = gr.Interface(
    fn=handler,
    inputs=gr.File(
        value='example.csv',
        file_types=['.tsv', '.csv']
    ),
    outputs=gr.Dataframe(
        headers=['item', 'score', 'rank']
    ),
    allow_flagging='never'
)

iface.launch()