dustalov commited on
Commit
e94b477
1 Parent(s): db3062e

Use Evalica

Browse files
Files changed (3) hide show
  1. README.md +8 -1
  2. app.py +53 -152
  3. requirements.txt +1 -1
README.md CHANGED
@@ -13,4 +13,11 @@ license: apache-2.0
13
 
14
  # Pair2Rank
15
 
16
- It's all about [ranking](https://arxiv.org/abs/2207.00076).
 
 
 
 
 
 
 
 
13
 
14
  # Pair2Rank
15
 
16
+ This spaces uses the [Evalica](https://github.com/dustalov/evalica) library for pairwise comparisons, exposing the following methods:
17
+
18
+ - Counting
19
+ - [Bradley-Terry (1952)](https://doi.org/10.2307/2334029)
20
+ - [Elo (1960)](https://web.archive.org/web/20080926015601/http://www.uschess.org/about/about.php)
21
+ - [Eigenvector (1987)](https://doi.org/10.1086/228631)
22
+ - [PageRank (1998)](https://doi.org/10.1016/S0169-7552(98)00110-X)
23
+ - [Newman (2023)]((https://arxiv.org/abs/2207.00076))
app.py CHANGED
@@ -17,19 +17,18 @@
17
  __author__ = 'Dmitry Ustalov'
18
  __license__ = 'Apache 2.0'
19
 
20
- from collections.abc import Callable
21
- from functools import partial
22
  from typing import BinaryIO, cast
23
 
 
24
  import gradio as gr
25
  import networkx as nx
26
  import numpy as np
27
- import numpy.typing as npt
28
  import pandas as pd
29
  import plotly.express as px
 
30
  from plotly.graph_objects import Figure
31
 
32
- TOLERANCE, LIMIT = 1e-16, 1000
33
 
34
 
35
  def visualize(df_pairwise: pd.DataFrame) -> Figure:
@@ -39,134 +38,56 @@ def visualize(df_pairwise: pd.DataFrame) -> Figure:
39
  return fig
40
 
41
 
42
- # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-bradley_terry-py
43
- def bradley_terry(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
44
- M = wins + .5 * ties
45
 
46
- T = M.T + M
47
- active = T > 0
48
 
49
- w = M.sum(axis=1)
 
 
50
 
51
- Z = np.zeros_like(M, dtype=float)
52
 
53
- p = np.ones(M.shape[0])
54
- p_new = p.copy()
 
55
 
56
- converged, iterations = False, 0
57
 
58
- while not converged:
59
- iterations += 1
 
60
 
61
- P = np.broadcast_to(p, M.shape)
62
 
63
- Z[active] = T[active] / (P[active] + P.T[active])
 
 
64
 
65
- p_new[:] = w
66
- p_new /= Z.sum(axis=0)
67
- p_new /= p_new.sum()
68
 
69
- converged = bool(np.linalg.norm(p_new - p) < TOLERANCE) or (iterations >= LIMIT)
70
-
71
- p[:] = p_new
72
-
73
- return p
74
-
75
-
76
- def centrality(algorithm: Callable[[nx.DiGraph], dict[int, float]],
77
- wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
78
- A = wins + .5 * ties
79
-
80
- G = nx.from_numpy_array(A, create_using=nx.DiGraph)
81
-
82
- scores: dict[int, float] = algorithm(G)
83
-
84
- p = np.array([scores[i] for i in range(len(G))])
85
-
86
- return p
87
-
88
-
89
- def counting(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
90
- M = wins + .5 * ties
91
-
92
- return cast(npt.NDArray[np.float64], M.sum(axis=1))
93
-
94
-
95
- def eigen(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
96
- algorithm = partial(nx.algorithms.eigenvector_centrality_numpy, max_iter=LIMIT, tol=TOLERANCE, weight='weight')
97
-
98
- return centrality(algorithm, wins, ties)
99
-
100
-
101
- def pagerank(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
102
- algorithm = partial(nx.algorithms.pagerank, max_iter=LIMIT, tol=TOLERANCE, weight='weight')
103
-
104
- return centrality(algorithm, wins, ties)
105
-
106
-
107
- # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
108
- def newman(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
109
- pi, v = np.ones(wins.shape[0]), .5
110
-
111
- converged, iterations = False, 0
112
-
113
- while not converged:
114
- iterations += 1
115
-
116
- v_numerator = np.sum(
117
- ties * (pi[:, np.newaxis] + pi) /
118
- (pi[:, np.newaxis] + pi + 2 * v * np.sqrt(pi[:, np.newaxis] * pi))
119
- ) / 2
120
-
121
- v_denominator = np.sum(
122
- wins * 2 * np.sqrt(pi[:, np.newaxis] * pi) /
123
- (pi[:, np.newaxis] + pi + 2 * v * np.sqrt(pi[:, np.newaxis] * pi))
124
- )
125
-
126
- v = v_numerator / v_denominator
127
- v = np.nan_to_num(v, nan=TOLERANCE)
128
-
129
- pi_old = pi.copy()
130
-
131
- pi_numerator = np.sum(
132
- (wins + ties / 2) * (pi + v * np.sqrt(pi[:, np.newaxis] * pi)) /
133
- (pi[:, np.newaxis] + pi + 2 * v * np.sqrt(pi[:, np.newaxis] * pi)),
134
- axis=1
135
- )
136
-
137
- pi_denominator = np.sum(
138
- (wins + ties / 2) * (1 + v * np.sqrt(pi[:, np.newaxis] * pi)) /
139
- (pi[:, np.newaxis] + pi + 2 * v * np.sqrt(pi[:, np.newaxis] * pi)),
140
- axis=0
141
- )
142
-
143
- pi = pi_numerator / pi_denominator
144
- pi = np.nan_to_num(pi, nan=TOLERANCE)
145
-
146
- converged = np.allclose(pi / (pi + 1), pi_old / (pi_old + 1),
147
- rtol=TOLERANCE, atol=TOLERANCE) or (iterations >= LIMIT)
148
-
149
- return pi
150
 
151
 
152
  ALGORITHMS = {
153
  'Counting': counting,
154
  'Bradley-Terry (1952)': bradley_terry,
155
- 'Eigenvector (1986)': eigen,
 
156
  'PageRank (1998)': pagerank,
157
  'Newman (2023)': newman,
158
  }
159
 
160
 
161
- def largest_strongly_connected_component(df: pd.DataFrame) -> set[str]:
162
- G = nx.from_pandas_edgelist(df, source='left', target='right', create_using=nx.DiGraph)
163
- H = nx.from_pandas_edgelist(df[df['winner'] == 'tie'], source='right', target='left', create_using=nx.DiGraph)
 
164
  F = nx.compose(G, H)
165
  largest = max(nx.strongly_connected_components(F), key=len)
166
  return cast(set[str], largest)
167
 
168
 
169
- def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool, seed: int) -> tuple[pd.DataFrame, Figure]:
170
  if file is None:
171
  raise gr.Error('File must be uploaded')
172
 
@@ -174,56 +95,37 @@ def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool, see
174
  raise gr.Error(f'Unknown algorithm: {algorithm}')
175
 
176
  try:
177
- df = pd.read_csv(file.name, dtype=str)
178
  except ValueError as e:
179
  raise gr.Error(f'Parsing error: {e}')
180
 
181
- if not pd.Series(['left', 'right', 'winner']).isin(df.columns).all():
182
  raise gr.Error('Columns must exist: left, right, winner')
183
 
184
- if not df['winner'].isin(pd.Series(['left', 'right', 'tie'])).all():
185
  raise gr.Error('Allowed winner values: left, right, tie')
186
 
187
- df = df[['left', 'right', 'winner']]
188
-
189
- df.dropna(axis=0, inplace=True)
190
 
191
- df.loc[df['winner'] == 'right', ['left', 'right']] = df.loc[df['winner'] == 'right', ['right', 'left']].values
192
- df.loc[df['winner'] == 'right', 'winner'] = 'left'
193
 
194
  if filtered:
195
- largest = largest_strongly_connected_component(df)
196
-
197
- df.drop(df[~(df['left'].isin(largest) & df['right'].isin(largest))].index, inplace=True)
198
-
199
- index = pd.Index(largest, name='item')
200
- else:
201
- index = pd.Index(np.unique(df[['left', 'right']].values), name='item')
202
-
203
- df_wins = pd.pivot_table(df[df['winner'] != 'tie'],
204
- index='left', columns='right', values='winner',
205
- aggfunc='count', fill_value=0)
206
- df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0, copy=False)
207
 
208
- df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
209
- index='left', columns='right', values='winner',
210
- aggfunc='count', fill_value=0)
211
- df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0, copy=False)
212
 
213
- wins = df_wins.to_numpy(dtype=int)
214
- ties = df_ties.to_numpy(dtype=int)
215
- ties += ties.T
216
 
217
- assert wins.shape == ties.shape, 'wins and ties shapes are different'
218
-
219
- scores = ALGORITHMS[algorithm](wins, ties)
220
 
221
  df_result = pd.DataFrame(data={'score': scores}, index=index)
222
 
223
  df_result['pairs'] = pd.Series(0, dtype=int, index=index).add(
224
- df.groupby('left')['left'].count(), fill_value=0
225
  ).add(
226
- df.groupby('right')['right'].count(), fill_value=0
227
  ).astype(int)
228
 
229
  df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
@@ -236,9 +138,9 @@ def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool, see
236
  df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
237
  df_result = df_result[~df_result.index.duplicated(keep='last')]
238
 
239
- df_pairwise = pd.DataFrame(data=scores[:, np.newaxis] / (scores + scores[:, np.newaxis]),
240
- index=index, columns=index)
241
- df_pairwise = df_pairwise.reindex(labels=df_result['item'], columns=df_result['item'], copy=False)
242
 
243
  fig = visualize(df_pairwise)
244
 
@@ -272,10 +174,6 @@ def main() -> None:
272
  info='Perform the entire computation but output only five head and five tail items, '
273
  'avoiding overlap.'
274
  ),
275
- gr.Number(
276
- label='Seed',
277
- precision=0
278
- )
279
  ],
280
  outputs=[
281
  gr.Dataframe(
@@ -287,12 +185,13 @@ def main() -> None:
287
  )
288
  ],
289
  examples=[
290
- ['food.csv', 'Counting', False, False, 0],
291
- ['food.csv', 'Bradley-Terry (1952)', False, False, 0],
292
- ['food.csv', 'Eigenvector (1986)', False, False, 0],
293
- ['food.csv', 'PageRank (1998)', False, False, 0],
294
- ['food.csv', 'Newman (2023)', False, False, 0],
295
- ['llmfao.csv', 'Bradley-Terry (1952)', False, True, 0]
 
296
  ],
297
  title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
298
  description='''
@@ -309,6 +208,8 @@ Possible values for `winner` are `left`, `right`, or `tie`. The provided example
309
  As the output, this tool provides a table with items, their estimated scores, and ranks.
310
  '''.strip(),
311
  article='''
 
 
312
  Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
313
  '''.strip(),
314
  allow_flagging='never'
 
17
  __author__ = 'Dmitry Ustalov'
18
  __license__ = 'Apache 2.0'
19
 
 
 
20
  from typing import BinaryIO, cast
21
 
22
+ import evalica
23
  import gradio as gr
24
  import networkx as nx
25
  import numpy as np
 
26
  import pandas as pd
27
  import plotly.express as px
28
+ from evalica import Winner
29
  from plotly.graph_objects import Figure
30
 
31
+ TOLERANCE, LIMIT = 1e-6, 100
32
 
33
 
34
  def visualize(df_pairwise: pd.DataFrame) -> Figure:
 
38
  return fig
39
 
40
 
41
+ def counting(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
42
+ result = evalica.counting(xs, ys, ws)
43
+ return result.scores, result.index
44
 
 
 
45
 
46
+ def bradley_terry(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
47
+ result = evalica.bradley_terry(xs, ys, ws, tolerance=TOLERANCE, limit=LIMIT)
48
+ return result.scores, result.index
49
 
 
50
 
51
+ def elo(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
52
+ result = evalica.elo(xs, ys, ws)
53
+ return result.scores, result.index
54
 
 
55
 
56
+ def eigen(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
57
+ result = evalica.eigen(xs, ys, ws, tolerance=TOLERANCE, limit=LIMIT)
58
+ return result.scores, result.index
59
 
 
60
 
61
+ def pagerank(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
62
+ result = evalica.pagerank(xs, ys, ws, tolerance=TOLERANCE, limit=LIMIT)
63
+ return result.scores, result.index
64
 
 
 
 
65
 
66
+ def newman(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[str]", "pd.Index[str]"]:
67
+ result = evalica.newman(xs, ys, ws, tolerance=TOLERANCE, limit=LIMIT)
68
+ return result.scores, result.index
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
  ALGORITHMS = {
72
  'Counting': counting,
73
  'Bradley-Terry (1952)': bradley_terry,
74
+ 'Elo (1960)': elo,
75
+ 'Eigenvector (1987)': eigen,
76
  'PageRank (1998)': pagerank,
77
  'Newman (2023)': newman,
78
  }
79
 
80
 
81
+ def largest_strongly_connected_component(df_pairs: pd.DataFrame) -> set[str]:
82
+ G = nx.from_pandas_edgelist(df_pairs, source='left', target='right', create_using=nx.DiGraph)
83
+ H = nx.from_pandas_edgelist(df_pairs[df_pairs['winner'] == 'tie'], source='right', target='left',
84
+ create_using=nx.DiGraph)
85
  F = nx.compose(G, H)
86
  largest = max(nx.strongly_connected_components(F), key=len)
87
  return cast(set[str], largest)
88
 
89
 
90
+ def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool) -> tuple[pd.DataFrame, Figure]:
91
  if file is None:
92
  raise gr.Error('File must be uploaded')
93
 
 
95
  raise gr.Error(f'Unknown algorithm: {algorithm}')
96
 
97
  try:
98
+ df_pairs = pd.read_csv(file.name, dtype=str)
99
  except ValueError as e:
100
  raise gr.Error(f'Parsing error: {e}')
101
 
102
+ if not pd.Series(['left', 'right', 'winner']).isin(df_pairs.columns).all():
103
  raise gr.Error('Columns must exist: left, right, winner')
104
 
105
+ if not df_pairs['winner'].isin(pd.Series(['left', 'right', 'tie'])).all():
106
  raise gr.Error('Allowed winner values: left, right, tie')
107
 
108
+ df_pairs = df_pairs[['left', 'right', 'winner']]
 
 
109
 
110
+ df_pairs.dropna(axis=0, inplace=True)
 
111
 
112
  if filtered:
113
+ largest = largest_strongly_connected_component(df_pairs)
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ df_pairs.drop(df_pairs[~(df_pairs['left'].isin(largest) & df_pairs['right'].isin(largest))].index, inplace=True)
 
 
 
116
 
117
+ xs, ys = df_pairs["left"], df_pairs["right"]
118
+ ws = df_pairs["winner"].map({"left": Winner.X, "right": Winner.Y, "tie": Winner.Draw})
 
119
 
120
+ scores, index = ALGORITHMS[algorithm](xs, ys, ws)
121
+ index.name = 'item'
 
122
 
123
  df_result = pd.DataFrame(data={'score': scores}, index=index)
124
 
125
  df_result['pairs'] = pd.Series(0, dtype=int, index=index).add(
126
+ df_pairs.groupby('left')['left'].count(), fill_value=0
127
  ).add(
128
+ df_pairs.groupby('right')['right'].count(), fill_value=0
129
  ).astype(int)
130
 
131
  df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
 
138
  df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
139
  df_result = df_result[~df_result.index.duplicated(keep='last')]
140
 
141
+ pairwise = evalica.pairwise_scores(df_result['score'].to_numpy())
142
+
143
+ df_pairwise = pd.DataFrame(data=pairwise, index=df_result['item'], columns=df_result['item'])
144
 
145
  fig = visualize(df_pairwise)
146
 
 
174
  info='Perform the entire computation but output only five head and five tail items, '
175
  'avoiding overlap.'
176
  ),
 
 
 
 
177
  ],
178
  outputs=[
179
  gr.Dataframe(
 
185
  )
186
  ],
187
  examples=[
188
+ ['food.csv', 'Counting', False, False],
189
+ ['food.csv', 'Bradley-Terry (1952)', False, False],
190
+ ['food.csv', 'Eigenvector (1987)', False, False],
191
+ ['food.csv', 'PageRank (1998)', False, False],
192
+ ['food.csv', 'Newman (2023)', False, False],
193
+ ['llmfao.csv', 'Bradley-Terry (1952)', False, True],
194
+ ['llmfao.csv', 'Elo (1960)', False, True],
195
  ],
196
  title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
197
  description='''
 
208
  As the output, this tool provides a table with items, their estimated scores, and ranks.
209
  '''.strip(),
210
  article='''
211
+ Pair2Rank uses the [Evalica](https://pypi.org/p/evalica) library for computing the scores: <https://github.com/dustalov/evalica>.
212
+
213
  Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
214
  '''.strip(),
215
  allow_flagging='never'
requirements.txt CHANGED
@@ -1,3 +1,3 @@
 
1
  networkx
2
  plotly
3
- scipy
 
1
+ evalica
2
  networkx
3
  plotly