Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@
|
|
15 |
__author__ = 'Dmitry Ustalov'
|
16 |
__license__ = 'Apache 2.0'
|
17 |
|
18 |
-
from typing import IO, Tuple
|
19 |
|
20 |
import gradio as gr
|
21 |
import numpy as np
|
@@ -32,11 +32,44 @@ def visualize(df_pairwise: pd.DataFrame) -> Figure:
|
|
32 |
return fig
|
33 |
|
34 |
|
35 |
-
# https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-
|
36 |
-
def
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
rng = np.random.default_rng(seed)
|
41 |
|
42 |
pi, v = rng.random(wins.shape[0]), rng.random()
|
@@ -82,10 +115,19 @@ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
|
|
82 |
return pi
|
83 |
|
84 |
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
if file is None:
|
87 |
raise gr.Error('File must be uploaded')
|
88 |
|
|
|
|
|
|
|
89 |
try:
|
90 |
df = pd.read_csv(file.name, dtype=str)
|
91 |
except ValueError as e:
|
@@ -117,7 +159,9 @@ def handler(file: IO[bytes], seed: int) -> Tuple[pd.DataFrame, Figure]:
|
|
117 |
ties = df_ties.to_numpy(dtype=np.int64)
|
118 |
ties += ties.T
|
119 |
|
120 |
-
|
|
|
|
|
121 |
|
122 |
df_result = pd.DataFrame(data={'score': scores}, index=index)
|
123 |
df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
|
@@ -142,6 +186,11 @@ def main() -> None:
|
|
142 |
file_types=['.tsv', '.csv'],
|
143 |
label='Comparisons'
|
144 |
),
|
|
|
|
|
|
|
|
|
|
|
145 |
gr.Number(
|
146 |
label='Seed',
|
147 |
precision=0
|
@@ -157,7 +206,8 @@ def main() -> None:
|
|
157 |
)
|
158 |
],
|
159 |
examples=[
|
160 |
-
['food.csv', 0]
|
|
|
161 |
],
|
162 |
title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
|
163 |
description='''
|
|
|
15 |
__author__ = 'Dmitry Ustalov'
|
16 |
__license__ = 'Apache 2.0'
|
17 |
|
18 |
+
from typing import IO, Tuple, List, cast
|
19 |
|
20 |
import gradio as gr
|
21 |
import numpy as np
|
|
|
32 |
return fig
|
33 |
|
34 |
|
35 |
+
# https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-bradley_terry-py
|
36 |
+
def bradley_terry(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
|
37 |
+
seed: int = 0, tolerance: float = 10e-6, limit: int = 20) -> npt.NDArray[np.float64]:
|
38 |
+
M = wins + .5 * ties
|
39 |
+
|
40 |
+
T = M.T + M
|
41 |
+
active = T > 0
|
42 |
+
|
43 |
+
w = M.sum(axis=1)
|
44 |
+
|
45 |
+
Z = np.zeros_like(M, dtype=float)
|
46 |
+
|
47 |
+
p = np.ones(M.shape[0])
|
48 |
+
p_new = p.copy()
|
49 |
+
|
50 |
+
converged, iterations = False, 0
|
51 |
+
|
52 |
+
while not converged:
|
53 |
+
iterations += 1
|
54 |
+
|
55 |
+
P = np.broadcast_to(p, M.shape)
|
56 |
+
|
57 |
+
Z[active] = T[active] / (P[active] + P.T[active])
|
58 |
|
59 |
+
p_new[:] = w
|
60 |
+
p_new /= Z.sum(axis=0)
|
61 |
+
p_new /= p_new.sum()
|
62 |
+
|
63 |
+
converged = bool(np.linalg.norm(p_new - p) < tolerance) or (iterations >= limit)
|
64 |
+
|
65 |
+
p[:] = p_new
|
66 |
+
|
67 |
+
return p
|
68 |
+
|
69 |
+
|
70 |
+
# https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
|
71 |
+
def newman(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
|
72 |
+
seed: int = 0, tolerance: float = 10e-6, limit: int = 20) -> npt.NDArray[np.float64]:
|
73 |
rng = np.random.default_rng(seed)
|
74 |
|
75 |
pi, v = rng.random(wins.shape[0]), rng.random()
|
|
|
115 |
return pi
|
116 |
|
117 |
|
118 |
+
ALGORITHMS = {
|
119 |
+
'Bradley-Terry (1952)': bradley_terry,
|
120 |
+
'Newman (2023)': newman,
|
121 |
+
}
|
122 |
+
|
123 |
+
|
124 |
+
def handler(file: IO[bytes], algorithm: str, seed: int) -> Tuple[pd.DataFrame, Figure]:
|
125 |
if file is None:
|
126 |
raise gr.Error('File must be uploaded')
|
127 |
|
128 |
+
if algorithm not in ALGORITHMS:
|
129 |
+
raise gr.Error(f'Unknown algorithm: {algorithm}')
|
130 |
+
|
131 |
try:
|
132 |
df = pd.read_csv(file.name, dtype=str)
|
133 |
except ValueError as e:
|
|
|
159 |
ties = df_ties.to_numpy(dtype=np.int64)
|
160 |
ties += ties.T
|
161 |
|
162 |
+
assert wins.shape == ties.shape, 'wins and ties shapes are different'
|
163 |
+
|
164 |
+
scores = ALGORITHMS[algorithm](wins, ties, seed=seed)
|
165 |
|
166 |
df_result = pd.DataFrame(data={'score': scores}, index=index)
|
167 |
df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
|
|
|
186 |
file_types=['.tsv', '.csv'],
|
187 |
label='Comparisons'
|
188 |
),
|
189 |
+
gr.Dropdown(
|
190 |
+
choices=cast(List[str], ALGORITHMS),
|
191 |
+
value='Bradley-Terry (1952)',
|
192 |
+
label='Algorithm'
|
193 |
+
),
|
194 |
gr.Number(
|
195 |
label='Seed',
|
196 |
precision=0
|
|
|
206 |
)
|
207 |
],
|
208 |
examples=[
|
209 |
+
['food.csv', 'Bradley-Terry (1952)', 0],
|
210 |
+
['food.csv', 'Newman (2023)', 0]
|
211 |
],
|
212 |
title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
|
213 |
description='''
|