dustalov commited on
Commit
ed19e77
1 Parent(s): 94a0793

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -12
app.py CHANGED
@@ -15,7 +15,7 @@
15
  __author__ = 'Dmitry Ustalov'
16
  __license__ = 'Apache 2.0'
17
 
18
- from typing import IO, Tuple, List, cast, Dict
19
 
20
  import gradio as gr
21
  import networkx as nx
@@ -74,11 +74,11 @@ def pagerank(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
74
 
75
  G = nx.from_numpy_array(A, create_using=nx.DiGraph)
76
 
77
- pagerank: Dict[int, float] = nx.algorithms.pagerank(G, max_iter=limit, tol=tolerance)
78
 
79
- scores = np.array([pagerank[i] for i in range(len(G))])
80
 
81
- return scores
82
 
83
 
84
  # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
@@ -136,7 +136,15 @@ ALGORITHMS = {
136
  }
137
 
138
 
139
- def handler(file: IO[bytes], algorithm: str, seed: int) -> Tuple[pd.DataFrame, Figure]:
 
 
 
 
 
 
 
 
140
  if file is None:
141
  raise gr.Error('File must be uploaded')
142
 
@@ -158,7 +166,14 @@ def handler(file: IO[bytes], algorithm: str, seed: int) -> Tuple[pd.DataFrame, F
158
 
159
  df.dropna(axis='rows', inplace=True)
160
 
161
- index = pd.Index(np.unique(df[['left', 'right']].values), name='item')
 
 
 
 
 
 
 
162
 
163
  df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
164
  index='left', columns='right', values='winner',
@@ -179,8 +194,15 @@ def handler(file: IO[bytes], algorithm: str, seed: int) -> Tuple[pd.DataFrame, F
179
  scores = ALGORITHMS[algorithm](wins, ties, seed=seed)
180
 
181
  df_result = pd.DataFrame(data={'score': scores}, index=index)
182
- df_result['pairs'] = df.groupby('left')['left'].count() + df.groupby('right')['right'].count()
 
 
 
 
 
 
183
  df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
 
184
  df_result.fillna(np.NINF, inplace=True)
185
  df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
186
  df_result.reset_index(inplace=True)
@@ -207,6 +229,13 @@ def main() -> None:
207
  value='Bradley-Terry (1952)',
208
  label='Algorithm'
209
  ),
 
 
 
 
 
 
 
210
  gr.Number(
211
  label='Seed',
212
  precision=0
@@ -222,9 +251,9 @@ def main() -> None:
222
  )
223
  ],
224
  examples=[
225
- ['food.csv', 'Bradley-Terry (1952)', 0],
226
- ['food.csv', 'PageRank (1998)', 0],
227
- ['food.csv', 'Newman (2023)', 0]
228
  ],
229
  title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
230
  description='''
@@ -236,8 +265,7 @@ As an input, it expects a comma-separated (CSV) file with a header containing th
236
  - `right`: the second compared item
237
  - `winner`: the label indicating the winning item
238
 
239
- Possible values for `winner` are `left`, `right`, or `tie`.
240
- The provided example might be a good starting point.
241
 
242
  As the output, this tool provides a table with items, their estimated scores, and ranks.
243
  ''',
 
15
  __author__ = 'Dmitry Ustalov'
16
  __license__ = 'Apache 2.0'
17
 
18
+ from typing import IO, Tuple, List, cast, Dict, Set
19
 
20
  import gradio as gr
21
  import networkx as nx
 
74
 
75
  G = nx.from_numpy_array(A, create_using=nx.DiGraph)
76
 
77
+ scores: Dict[int, float] = nx.algorithms.pagerank(G, max_iter=limit, tol=tolerance)
78
 
79
+ p = np.array([scores[i] for i in range(len(G))])
80
 
81
+ return p
82
 
83
 
84
  # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
 
136
  }
137
 
138
 
139
+ def largest_strongly_connected_component(df: pd.DataFrame) -> Set[str]:
140
+ G = nx.from_pandas_edgelist(df, source='left', target='right', create_using=nx.DiGraph)
141
+ H = nx.from_pandas_edgelist(df[df['winner'] == 'tie'], source='right', target='left', create_using=nx.DiGraph)
142
+ F = nx.compose(G, H)
143
+ largest = max(nx.strongly_connected_components(F), key=len)
144
+ return cast(Set[str], largest)
145
+
146
+
147
+ def handler(file: IO[bytes], algorithm: str, filtered: bool, seed: int) -> Tuple[pd.DataFrame, Figure]:
148
  if file is None:
149
  raise gr.Error('File must be uploaded')
150
 
 
166
 
167
  df.dropna(axis='rows', inplace=True)
168
 
169
+ if filtered:
170
+ largest = largest_strongly_connected_component(df)
171
+
172
+ df.drop(df[~(df['left'].isin(largest) & df['right'].isin(largest))].index, inplace=True)
173
+
174
+ index = pd.Index(largest, name='item')
175
+ else:
176
+ index = pd.Index(np.unique(df[['left', 'right']].values), name='item')
177
 
178
  df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
179
  index='left', columns='right', values='winner',
 
194
  scores = ALGORITHMS[algorithm](wins, ties, seed=seed)
195
 
196
  df_result = pd.DataFrame(data={'score': scores}, index=index)
197
+
198
+ df_result['pairs'] = pd.Series(0, dtype=int, index=index).add(
199
+ df.groupby('left')['left'].count(), fill_value=0
200
+ ).add(
201
+ df.groupby('right')['right'].count(), fill_value=0
202
+ ).astype(int)
203
+
204
  df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
205
+
206
  df_result.fillna(np.NINF, inplace=True)
207
  df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
208
  df_result.reset_index(inplace=True)
 
229
  value='Bradley-Terry (1952)',
230
  label='Algorithm'
231
  ),
232
+ gr.Checkbox(
233
+ value=False,
234
+ label='Largest SCC',
235
+ info='Bradley-Terry and Newman algorithms require the comparison graph to be strongly-connected. '
236
+ 'This option keeps only the largest strongly-connected component (SCC) of the input graph. '
237
+ 'Some items might be missing as a result of this filtering.'
238
+ ),
239
  gr.Number(
240
  label='Seed',
241
  precision=0
 
251
  )
252
  ],
253
  examples=[
254
+ ['food.csv', 'Bradley-Terry (1952)', False],
255
+ ['food.csv', 'PageRank (1998)', False],
256
+ ['food.csv', 'Newman (2023)', False]
257
  ],
258
  title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
259
  description='''
 
265
  - `right`: the second compared item
266
  - `winner`: the label indicating the winning item
267
 
268
+ Possible values for `winner` are `left`, `right`, or `tie`. The provided examples might be a good starting point.
 
269
 
270
  As the output, this tool provides a table with items, their estimated scores, and ranks.
271
  ''',