Use Evalica
Browse files- README.md +8 -1
- app.py +53 -152
- requirements.txt +1 -1
README.md
CHANGED
@@ -13,4 +13,11 @@ license: apache-2.0
|
|
13 |
|
14 |
# Pair2Rank
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Pair2Rank
|
15 |
|
16 |
+
This spaces uses the [Evalica](https://github.com/dustalov/evalica) library for pairwise comparisons, exposing the following methods:
|
17 |
+
|
18 |
+
- Counting
|
19 |
+
- [Bradley-Terry (1952)](https://doi.org/10.2307/2334029)
|
20 |
+
- [Elo (1960)](https://web.archive.org/web/20080926015601/http://www.uschess.org/about/about.php)
|
21 |
+
- [Eigenvector (1987)](https://doi.org/10.1086/228631)
|
22 |
+
- [PageRank (1998)](https://doi.org/10.1016/S0169-7552(98)00110-X)
|
23 |
+
- [Newman (2023)]((https://arxiv.org/abs/2207.00076))
|
app.py
CHANGED
@@ -17,19 +17,18 @@
|
|
17 |
__author__ = 'Dmitry Ustalov'
|
18 |
__license__ = 'Apache 2.0'
|
19 |
|
20 |
-
from collections.abc import Callable
|
21 |
-
from functools import partial
|
22 |
from typing import BinaryIO, cast
|
23 |
|
|
|
24 |
import gradio as gr
|
25 |
import networkx as nx
|
26 |
import numpy as np
|
27 |
-
import numpy.typing as npt
|
28 |
import pandas as pd
|
29 |
import plotly.express as px
|
|
|
30 |
from plotly.graph_objects import Figure
|
31 |
|
32 |
-
TOLERANCE, LIMIT = 1e-
|
33 |
|
34 |
|
35 |
def visualize(df_pairwise: pd.DataFrame) -> Figure:
|
@@ -39,134 +38,56 @@ def visualize(df_pairwise: pd.DataFrame) -> Figure:
|
|
39 |
return fig
|
40 |
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
|
46 |
-
T = M.T + M
|
47 |
-
active = T > 0
|
48 |
|
49 |
-
|
|
|
|
|
50 |
|
51 |
-
Z = np.zeros_like(M, dtype=float)
|
52 |
|
53 |
-
|
54 |
-
|
|
|
55 |
|
56 |
-
converged, iterations = False, 0
|
57 |
|
58 |
-
|
59 |
-
|
|
|
60 |
|
61 |
-
P = np.broadcast_to(p, M.shape)
|
62 |
|
63 |
-
|
|
|
|
|
64 |
|
65 |
-
p_new[:] = w
|
66 |
-
p_new /= Z.sum(axis=0)
|
67 |
-
p_new /= p_new.sum()
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
return p
|
74 |
-
|
75 |
-
|
76 |
-
def centrality(algorithm: Callable[[nx.DiGraph], dict[int, float]],
|
77 |
-
wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
|
78 |
-
A = wins + .5 * ties
|
79 |
-
|
80 |
-
G = nx.from_numpy_array(A, create_using=nx.DiGraph)
|
81 |
-
|
82 |
-
scores: dict[int, float] = algorithm(G)
|
83 |
-
|
84 |
-
p = np.array([scores[i] for i in range(len(G))])
|
85 |
-
|
86 |
-
return p
|
87 |
-
|
88 |
-
|
89 |
-
def counting(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
|
90 |
-
M = wins + .5 * ties
|
91 |
-
|
92 |
-
return cast(npt.NDArray[np.float64], M.sum(axis=1))
|
93 |
-
|
94 |
-
|
95 |
-
def eigen(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
|
96 |
-
algorithm = partial(nx.algorithms.eigenvector_centrality_numpy, max_iter=LIMIT, tol=TOLERANCE, weight='weight')
|
97 |
-
|
98 |
-
return centrality(algorithm, wins, ties)
|
99 |
-
|
100 |
-
|
101 |
-
def pagerank(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
|
102 |
-
algorithm = partial(nx.algorithms.pagerank, max_iter=LIMIT, tol=TOLERANCE, weight='weight')
|
103 |
-
|
104 |
-
return centrality(algorithm, wins, ties)
|
105 |
-
|
106 |
-
|
107 |
-
# https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
|
108 |
-
def newman(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
|
109 |
-
pi, v = np.ones(wins.shape[0]), .5
|
110 |
-
|
111 |
-
converged, iterations = False, 0
|
112 |
-
|
113 |
-
while not converged:
|
114 |
-
iterations += 1
|
115 |
-
|
116 |
-
v_numerator = np.sum(
|
117 |
-
ties * (pi[:, np.newaxis] + pi) /
|
118 |
-
(pi[:, np.newaxis] + pi + 2 * v * np.sqrt(pi[:, np.newaxis] * pi))
|
119 |
-
) / 2
|
120 |
-
|
121 |
-
v_denominator = np.sum(
|
122 |
-
wins * 2 * np.sqrt(pi[:, np.newaxis] * pi) /
|
123 |
-
(pi[:, np.newaxis] + pi + 2 * v * np.sqrt(pi[:, np.newaxis] * pi))
|
124 |
-
)
|
125 |
-
|
126 |
-
v = v_numerator / v_denominator
|
127 |
-
v = np.nan_to_num(v, nan=TOLERANCE)
|
128 |
-
|
129 |
-
pi_old = pi.copy()
|
130 |
-
|
131 |
-
pi_numerator = np.sum(
|
132 |
-
(wins + ties / 2) * (pi + v * np.sqrt(pi[:, np.newaxis] * pi)) /
|
133 |
-
(pi[:, np.newaxis] + pi + 2 * v * np.sqrt(pi[:, np.newaxis] * pi)),
|
134 |
-
axis=1
|
135 |
-
)
|
136 |
-
|
137 |
-
pi_denominator = np.sum(
|
138 |
-
(wins + ties / 2) * (1 + v * np.sqrt(pi[:, np.newaxis] * pi)) /
|
139 |
-
(pi[:, np.newaxis] + pi + 2 * v * np.sqrt(pi[:, np.newaxis] * pi)),
|
140 |
-
axis=0
|
141 |
-
)
|
142 |
-
|
143 |
-
pi = pi_numerator / pi_denominator
|
144 |
-
pi = np.nan_to_num(pi, nan=TOLERANCE)
|
145 |
-
|
146 |
-
converged = np.allclose(pi / (pi + 1), pi_old / (pi_old + 1),
|
147 |
-
rtol=TOLERANCE, atol=TOLERANCE) or (iterations >= LIMIT)
|
148 |
-
|
149 |
-
return pi
|
150 |
|
151 |
|
152 |
ALGORITHMS = {
|
153 |
'Counting': counting,
|
154 |
'Bradley-Terry (1952)': bradley_terry,
|
155 |
-
'
|
|
|
156 |
'PageRank (1998)': pagerank,
|
157 |
'Newman (2023)': newman,
|
158 |
}
|
159 |
|
160 |
|
161 |
-
def largest_strongly_connected_component(
|
162 |
-
G = nx.from_pandas_edgelist(
|
163 |
-
H = nx.from_pandas_edgelist(
|
|
|
164 |
F = nx.compose(G, H)
|
165 |
largest = max(nx.strongly_connected_components(F), key=len)
|
166 |
return cast(set[str], largest)
|
167 |
|
168 |
|
169 |
-
def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool
|
170 |
if file is None:
|
171 |
raise gr.Error('File must be uploaded')
|
172 |
|
@@ -174,56 +95,37 @@ def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool, see
|
|
174 |
raise gr.Error(f'Unknown algorithm: {algorithm}')
|
175 |
|
176 |
try:
|
177 |
-
|
178 |
except ValueError as e:
|
179 |
raise gr.Error(f'Parsing error: {e}')
|
180 |
|
181 |
-
if not pd.Series(['left', 'right', 'winner']).isin(
|
182 |
raise gr.Error('Columns must exist: left, right, winner')
|
183 |
|
184 |
-
if not
|
185 |
raise gr.Error('Allowed winner values: left, right, tie')
|
186 |
|
187 |
-
|
188 |
-
|
189 |
-
df.dropna(axis=0, inplace=True)
|
190 |
|
191 |
-
|
192 |
-
df.loc[df['winner'] == 'right', 'winner'] = 'left'
|
193 |
|
194 |
if filtered:
|
195 |
-
largest = largest_strongly_connected_component(
|
196 |
-
|
197 |
-
df.drop(df[~(df['left'].isin(largest) & df['right'].isin(largest))].index, inplace=True)
|
198 |
-
|
199 |
-
index = pd.Index(largest, name='item')
|
200 |
-
else:
|
201 |
-
index = pd.Index(np.unique(df[['left', 'right']].values), name='item')
|
202 |
-
|
203 |
-
df_wins = pd.pivot_table(df[df['winner'] != 'tie'],
|
204 |
-
index='left', columns='right', values='winner',
|
205 |
-
aggfunc='count', fill_value=0)
|
206 |
-
df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0, copy=False)
|
207 |
|
208 |
-
|
209 |
-
index='left', columns='right', values='winner',
|
210 |
-
aggfunc='count', fill_value=0)
|
211 |
-
df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0, copy=False)
|
212 |
|
213 |
-
|
214 |
-
|
215 |
-
ties += ties.T
|
216 |
|
217 |
-
|
218 |
-
|
219 |
-
scores = ALGORITHMS[algorithm](wins, ties)
|
220 |
|
221 |
df_result = pd.DataFrame(data={'score': scores}, index=index)
|
222 |
|
223 |
df_result['pairs'] = pd.Series(0, dtype=int, index=index).add(
|
224 |
-
|
225 |
).add(
|
226 |
-
|
227 |
).astype(int)
|
228 |
|
229 |
df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
|
@@ -236,9 +138,9 @@ def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool, see
|
|
236 |
df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
|
237 |
df_result = df_result[~df_result.index.duplicated(keep='last')]
|
238 |
|
239 |
-
|
240 |
-
|
241 |
-
df_pairwise =
|
242 |
|
243 |
fig = visualize(df_pairwise)
|
244 |
|
@@ -272,10 +174,6 @@ def main() -> None:
|
|
272 |
info='Perform the entire computation but output only five head and five tail items, '
|
273 |
'avoiding overlap.'
|
274 |
),
|
275 |
-
gr.Number(
|
276 |
-
label='Seed',
|
277 |
-
precision=0
|
278 |
-
)
|
279 |
],
|
280 |
outputs=[
|
281 |
gr.Dataframe(
|
@@ -287,12 +185,13 @@ def main() -> None:
|
|
287 |
)
|
288 |
],
|
289 |
examples=[
|
290 |
-
['food.csv', 'Counting', False, False
|
291 |
-
['food.csv', 'Bradley-Terry (1952)', False, False
|
292 |
-
['food.csv', 'Eigenvector (
|
293 |
-
['food.csv', 'PageRank (1998)', False, False
|
294 |
-
['food.csv', 'Newman (2023)', False, False
|
295 |
-
['llmfao.csv', 'Bradley-Terry (1952)', False, True,
|
|
|
296 |
],
|
297 |
title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
|
298 |
description='''
|
@@ -309,6 +208,8 @@ Possible values for `winner` are `left`, `right`, or `tie`. The provided example
|
|
309 |
As the output, this tool provides a table with items, their estimated scores, and ranks.
|
310 |
'''.strip(),
|
311 |
article='''
|
|
|
|
|
312 |
Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
|
313 |
'''.strip(),
|
314 |
allow_flagging='never'
|
|
|
17 |
__author__ = 'Dmitry Ustalov'
|
18 |
__license__ = 'Apache 2.0'
|
19 |
|
|
|
|
|
20 |
from typing import BinaryIO, cast
|
21 |
|
22 |
+
import evalica
|
23 |
import gradio as gr
|
24 |
import networkx as nx
|
25 |
import numpy as np
|
|
|
26 |
import pandas as pd
|
27 |
import plotly.express as px
|
28 |
+
from evalica import Winner
|
29 |
from plotly.graph_objects import Figure
|
30 |
|
31 |
+
TOLERANCE, LIMIT = 1e-6, 100
|
32 |
|
33 |
|
34 |
def visualize(df_pairwise: pd.DataFrame) -> Figure:
|
|
|
38 |
return fig
|
39 |
|
40 |
|
41 |
+
def counting(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
|
42 |
+
result = evalica.counting(xs, ys, ws)
|
43 |
+
return result.scores, result.index
|
44 |
|
|
|
|
|
45 |
|
46 |
+
def bradley_terry(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
|
47 |
+
result = evalica.bradley_terry(xs, ys, ws, tolerance=TOLERANCE, limit=LIMIT)
|
48 |
+
return result.scores, result.index
|
49 |
|
|
|
50 |
|
51 |
+
def elo(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
|
52 |
+
result = evalica.elo(xs, ys, ws)
|
53 |
+
return result.scores, result.index
|
54 |
|
|
|
55 |
|
56 |
+
def eigen(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
|
57 |
+
result = evalica.eigen(xs, ys, ws, tolerance=TOLERANCE, limit=LIMIT)
|
58 |
+
return result.scores, result.index
|
59 |
|
|
|
60 |
|
61 |
+
def pagerank(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
|
62 |
+
result = evalica.pagerank(xs, ys, ws, tolerance=TOLERANCE, limit=LIMIT)
|
63 |
+
return result.scores, result.index
|
64 |
|
|
|
|
|
|
|
65 |
|
66 |
+
def newman(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
|
67 |
+
result = evalica.newman(xs, ys, ws, tolerance=TOLERANCE, limit=LIMIT)
|
68 |
+
return result.scores, result.index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
|
71 |
ALGORITHMS = {
|
72 |
'Counting': counting,
|
73 |
'Bradley-Terry (1952)': bradley_terry,
|
74 |
+
'Elo (1960)': elo,
|
75 |
+
'Eigenvector (1987)': eigen,
|
76 |
'PageRank (1998)': pagerank,
|
77 |
'Newman (2023)': newman,
|
78 |
}
|
79 |
|
80 |
|
81 |
+
def largest_strongly_connected_component(df_pairs: pd.DataFrame) -> set[str]:
|
82 |
+
G = nx.from_pandas_edgelist(df_pairs, source='left', target='right', create_using=nx.DiGraph)
|
83 |
+
H = nx.from_pandas_edgelist(df_pairs[df_pairs['winner'] == 'tie'], source='right', target='left',
|
84 |
+
create_using=nx.DiGraph)
|
85 |
F = nx.compose(G, H)
|
86 |
largest = max(nx.strongly_connected_components(F), key=len)
|
87 |
return cast(set[str], largest)
|
88 |
|
89 |
|
90 |
+
def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool) -> tuple[pd.DataFrame, Figure]:
|
91 |
if file is None:
|
92 |
raise gr.Error('File must be uploaded')
|
93 |
|
|
|
95 |
raise gr.Error(f'Unknown algorithm: {algorithm}')
|
96 |
|
97 |
try:
|
98 |
+
df_pairs = pd.read_csv(file.name, dtype=str)
|
99 |
except ValueError as e:
|
100 |
raise gr.Error(f'Parsing error: {e}')
|
101 |
|
102 |
+
if not pd.Series(['left', 'right', 'winner']).isin(df_pairs.columns).all():
|
103 |
raise gr.Error('Columns must exist: left, right, winner')
|
104 |
|
105 |
+
if not df_pairs['winner'].isin(pd.Series(['left', 'right', 'tie'])).all():
|
106 |
raise gr.Error('Allowed winner values: left, right, tie')
|
107 |
|
108 |
+
df_pairs = df_pairs[['left', 'right', 'winner']]
|
|
|
|
|
109 |
|
110 |
+
df_pairs.dropna(axis=0, inplace=True)
|
|
|
111 |
|
112 |
if filtered:
|
113 |
+
largest = largest_strongly_connected_component(df_pairs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
+
df_pairs.drop(df_pairs[~(df_pairs['left'].isin(largest) & df_pairs['right'].isin(largest))].index, inplace=True)
|
|
|
|
|
|
|
116 |
|
117 |
+
xs, ys = df_pairs["left"], df_pairs["right"]
|
118 |
+
ws = df_pairs["winner"].map({"left": Winner.X, "right": Winner.Y, "tie": Winner.Draw})
|
|
|
119 |
|
120 |
+
scores, index = ALGORITHMS[algorithm](xs, ys, ws)
|
121 |
+
index.name = 'item'
|
|
|
122 |
|
123 |
df_result = pd.DataFrame(data={'score': scores}, index=index)
|
124 |
|
125 |
df_result['pairs'] = pd.Series(0, dtype=int, index=index).add(
|
126 |
+
df_pairs.groupby('left')['left'].count(), fill_value=0
|
127 |
).add(
|
128 |
+
df_pairs.groupby('right')['right'].count(), fill_value=0
|
129 |
).astype(int)
|
130 |
|
131 |
df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
|
|
|
138 |
df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
|
139 |
df_result = df_result[~df_result.index.duplicated(keep='last')]
|
140 |
|
141 |
+
pairwise = evalica.pairwise_scores(df_result['score'].to_numpy())
|
142 |
+
|
143 |
+
df_pairwise = pd.DataFrame(data=pairwise, index=df_result['item'], columns=df_result['item'])
|
144 |
|
145 |
fig = visualize(df_pairwise)
|
146 |
|
|
|
174 |
info='Perform the entire computation but output only five head and five tail items, '
|
175 |
'avoiding overlap.'
|
176 |
),
|
|
|
|
|
|
|
|
|
177 |
],
|
178 |
outputs=[
|
179 |
gr.Dataframe(
|
|
|
185 |
)
|
186 |
],
|
187 |
examples=[
|
188 |
+
['food.csv', 'Counting', False, False],
|
189 |
+
['food.csv', 'Bradley-Terry (1952)', False, False],
|
190 |
+
['food.csv', 'Eigenvector (1987)', False, False],
|
191 |
+
['food.csv', 'PageRank (1998)', False, False],
|
192 |
+
['food.csv', 'Newman (2023)', False, False],
|
193 |
+
['llmfao.csv', 'Bradley-Terry (1952)', False, True],
|
194 |
+
['llmfao.csv', 'Elo (1960)', False, True],
|
195 |
],
|
196 |
title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
|
197 |
description='''
|
|
|
208 |
As the output, this tool provides a table with items, their estimated scores, and ranks.
|
209 |
'''.strip(),
|
210 |
article='''
|
211 |
+
Pair2Rank uses the [Evalica](https://pypi.org/p/evalica) library for computing the scores: <https://github.com/dustalov/evalica>.
|
212 |
+
|
213 |
Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
|
214 |
'''.strip(),
|
215 |
allow_flagging='never'
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
|
|
1 |
networkx
|
2 |
plotly
|
3 |
-
scipy
|
|
|
1 |
+
evalica
|
2 |
networkx
|
3 |
plotly
|
|