dustalov commited on
Commit
9977893
·
verified ·
1 Parent(s): 123578e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -8
app.py CHANGED
@@ -15,7 +15,7 @@
15
  __author__ = 'Dmitry Ustalov'
16
  __license__ = 'Apache 2.0'
17
 
18
- from typing import IO, Tuple
19
 
20
  import gradio as gr
21
  import numpy as np
@@ -32,11 +32,44 @@ def visualize(df_pairwise: pd.DataFrame) -> Figure:
32
  return fig
33
 
34
 
35
- # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
36
- def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
37
- seed: int = 0, tolerance: float = 10e-6, limit: int = 20) -> npt.NDArray[np.float64]:
38
- assert wins.shape == ties.shape, 'wins and ties shapes are different'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  rng = np.random.default_rng(seed)
41
 
42
  pi, v = rng.random(wins.shape[0]), rng.random()
@@ -82,10 +115,19 @@ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
82
  return pi
83
 
84
 
85
- def handler(file: IO[bytes], seed: int) -> Tuple[pd.DataFrame, Figure]:
 
 
 
 
 
 
86
  if file is None:
87
  raise gr.Error('File must be uploaded')
88
 
 
 
 
89
  try:
90
  df = pd.read_csv(file.name, dtype=str)
91
  except ValueError as e:
@@ -117,7 +159,9 @@ def handler(file: IO[bytes], seed: int) -> Tuple[pd.DataFrame, Figure]:
117
  ties = df_ties.to_numpy(dtype=np.int64)
118
  ties += ties.T
119
 
120
- scores = aggregate(wins, ties, seed=seed)
 
 
121
 
122
  df_result = pd.DataFrame(data={'score': scores}, index=index)
123
  df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
@@ -142,6 +186,11 @@ def main() -> None:
142
  file_types=['.tsv', '.csv'],
143
  label='Comparisons'
144
  ),
 
 
 
 
 
145
  gr.Number(
146
  label='Seed',
147
  precision=0
@@ -157,7 +206,8 @@ def main() -> None:
157
  )
158
  ],
159
  examples=[
160
- ['food.csv', 0]
 
161
  ],
162
  title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
163
  description='''
 
15
  __author__ = 'Dmitry Ustalov'
16
  __license__ = 'Apache 2.0'
17
 
18
+ from typing import IO, Tuple, List, cast
19
 
20
  import gradio as gr
21
  import numpy as np
 
32
  return fig
33
 
34
 
35
+ # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-bradley_terry-py
36
+ def bradley_terry(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
37
+ seed: int = 0, tolerance: float = 10e-6, limit: int = 20) -> npt.NDArray[np.float64]:
38
+ M = wins + .5 * ties
39
+
40
+ T = M.T + M
41
+ active = T > 0
42
+
43
+ w = M.sum(axis=1)
44
+
45
+ Z = np.zeros_like(M, dtype=float)
46
+
47
+ p = np.ones(M.shape[0])
48
+ p_new = p.copy()
49
+
50
+ converged, iterations = False, 0
51
+
52
+ while not converged:
53
+ iterations += 1
54
+
55
+ P = np.broadcast_to(p, M.shape)
56
+
57
+ Z[active] = T[active] / (P[active] + P.T[active])
58
 
59
+ p_new[:] = w
60
+ p_new /= Z.sum(axis=0)
61
+ p_new /= p_new.sum()
62
+
63
+ converged = bool(np.linalg.norm(p_new - p) < tolerance) or (iterations >= limit)
64
+
65
+ p[:] = p_new
66
+
67
+ return p
68
+
69
+
70
+ # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
71
+ def newman(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
72
+ seed: int = 0, tolerance: float = 10e-6, limit: int = 20) -> npt.NDArray[np.float64]:
73
  rng = np.random.default_rng(seed)
74
 
75
  pi, v = rng.random(wins.shape[0]), rng.random()
 
115
  return pi
116
 
117
 
118
+ ALGORITHMS = {
119
+ 'Bradley-Terry (1952)': bradley_terry,
120
+ 'Newman (2023)': newman,
121
+ }
122
+
123
+
124
+ def handler(file: IO[bytes], algorithm: str, seed: int) -> Tuple[pd.DataFrame, Figure]:
125
  if file is None:
126
  raise gr.Error('File must be uploaded')
127
 
128
+ if algorithm not in ALGORITHMS:
129
+ raise gr.Error(f'Unknown algorithm: {algorithm}')
130
+
131
  try:
132
  df = pd.read_csv(file.name, dtype=str)
133
  except ValueError as e:
 
159
  ties = df_ties.to_numpy(dtype=np.int64)
160
  ties += ties.T
161
 
162
+ assert wins.shape == ties.shape, 'wins and ties shapes are different'
163
+
164
+ scores = ALGORITHMS[algorithm](wins, ties, seed=seed)
165
 
166
  df_result = pd.DataFrame(data={'score': scores}, index=index)
167
  df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
 
186
  file_types=['.tsv', '.csv'],
187
  label='Comparisons'
188
  ),
189
+ gr.Dropdown(
190
+ choices=cast(List[str], ALGORITHMS),
191
+ value='Bradley-Terry (1952)',
192
+ label='Algorithm'
193
+ ),
194
  gr.Number(
195
  label='Seed',
196
  precision=0
 
206
  )
207
  ],
208
  examples=[
209
+ ['food.csv', 'Bradley-Terry (1952)', 0],
210
+ ['food.csv', 'Newman (2023)', 0]
211
  ],
212
  title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
213
  description='''