dustalov commited on
Commit
123ce74
1 Parent(s): 1a4fad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -7
app.py CHANGED
@@ -15,6 +15,7 @@
15
  __author__ = 'Dmitry Ustalov'
16
  __license__ = 'Apache 2.0'
17
 
 
18
  from typing import IO, Tuple, List, cast, Dict, Set, Callable
19
 
20
  import gradio as gr
@@ -68,28 +69,38 @@ def bradley_terry(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
68
  return p
69
 
70
 
71
- def centrality(algorithm: Callable[..., Dict[int, float]],
72
- wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
73
- tolerance: float = 10e-6, limit: int = 100) -> npt.NDArray[np.float64]:
74
  A = wins + .5 * ties
75
 
76
  G = nx.from_numpy_array(A, create_using=nx.DiGraph)
77
 
78
- scores: Dict[int, float] = algorithm(G, max_iter=limit, tol=tolerance)
79
 
80
  p = np.array([scores[i] for i in range(len(G))])
81
 
82
  return p
83
 
84
 
 
 
 
 
 
 
 
85
  def eigen(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
86
  seed: int = 0, tolerance: float = 10e-6, limit: int = 100) -> npt.NDArray[np.float64]:
87
- return centrality(nx.algorithms.eigenvector_centrality_numpy, wins, ties, tolerance, limit)
 
 
88
 
89
 
90
  def pagerank(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
91
  seed: int = 0, tolerance: float = 10e-6, limit: int = 100) -> npt.NDArray[np.float64]:
92
- return centrality(nx.algorithms.pagerank, wins, ties, tolerance, limit)
 
 
93
 
94
 
95
  # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
@@ -141,6 +152,7 @@ def newman(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
141
 
142
 
143
  ALGORITHMS = {
 
144
  'Bradley-Terry (1952)': bradley_terry,
145
  'Eigenvector (1986)': eigen,
146
  'PageRank (1998)': pagerank,
@@ -156,7 +168,7 @@ def largest_strongly_connected_component(df: pd.DataFrame) -> Set[str]:
156
  return cast(Set[str], largest)
157
 
158
 
159
- def handler(file: IO[bytes], algorithm: str, filtered: bool, seed: int) -> Tuple[pd.DataFrame, Figure]:
160
  if file is None:
161
  raise gr.Error('File must be uploaded')
162
 
@@ -219,6 +231,10 @@ def handler(file: IO[bytes], algorithm: str, filtered: bool, seed: int) -> Tuple
219
  df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
220
  df_result.reset_index(inplace=True)
221
 
 
 
 
 
222
  df_pairwise = pd.DataFrame(data=scores[:, np.newaxis] / (scores + scores[:, np.newaxis]),
223
  index=index, columns=index)
224
  df_pairwise = df_pairwise.reindex(labels=df_result['item'], columns=df_result['item'], copy=False)
@@ -249,6 +265,12 @@ def main() -> None:
249
  'This option keeps only the largest strongly-connected component (SCC) of the input graph. '
250
  'Some items might be missing as a result of this filtering.'
251
  ),
 
 
 
 
 
 
252
  gr.Number(
253
  label='Seed',
254
  precision=0
@@ -264,6 +286,7 @@ def main() -> None:
264
  )
265
  ],
266
  examples=[
 
267
  ['food.csv', 'Bradley-Terry (1952)', False],
268
  ['food.csv', 'Eigenvector (1986)', False],
269
  ['food.csv', 'PageRank (1998)', False],
 
15
  __author__ = 'Dmitry Ustalov'
16
  __license__ = 'Apache 2.0'
17
 
18
+ from functools import partial
19
  from typing import IO, Tuple, List, cast, Dict, Set, Callable
20
 
21
  import gradio as gr
 
69
  return p
70
 
71
 
72
+ def centrality(algorithm: Callable[[nx.DiGraph], Dict[int, float]],
73
+ wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64]) -> npt.NDArray[np.float64]:
 
74
  A = wins + .5 * ties
75
 
76
  G = nx.from_numpy_array(A, create_using=nx.DiGraph)
77
 
78
+ scores: Dict[int, float] = algorithm(G)
79
 
80
  p = np.array([scores[i] for i in range(len(G))])
81
 
82
  return p
83
 
84
 
85
+ def counting(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
86
+ seed: int = 0, tolerance: float = 10e-6, limit: int = 100) -> npt.NDArray[np.float64]:
87
+ M = wins + .5 * ties
88
+
89
+ return M.sum(axis=0)
90
+
91
+
92
  def eigen(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
93
  seed: int = 0, tolerance: float = 10e-6, limit: int = 100) -> npt.NDArray[np.float64]:
94
+ algorithm = partial(nx.algorithms.eigenvector_centrality_numpy, max_iter=limit, tol=tolerance)
95
+
96
+ return centrality(algorithm, wins, ties)
97
 
98
 
99
  def pagerank(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
100
  seed: int = 0, tolerance: float = 10e-6, limit: int = 100) -> npt.NDArray[np.float64]:
101
+ algorithm = partial(nx.algorithms.pagerank, max_iter=limit, tol=tolerance)
102
+
103
+ return centrality(algorithm, wins, ties)
104
 
105
 
106
  # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
 
152
 
153
 
154
  ALGORITHMS = {
155
+ 'Counting': counting,
156
  'Bradley-Terry (1952)': bradley_terry,
157
  'Eigenvector (1986)': eigen,
158
  'PageRank (1998)': pagerank,
 
168
  return cast(Set[str], largest)
169
 
170
 
171
+ def handler(file: IO[bytes], algorithm: str, filtered: bool, truncated: bool, seed: int) -> Tuple[pd.DataFrame, Figure]:
172
  if file is None:
173
  raise gr.Error('File must be uploaded')
174
 
 
231
  df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
232
  df_result.reset_index(inplace=True)
233
 
234
+ if truncated:
235
+ df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
236
+ df_result = df_result[~df_result.index.duplicated(keep='last')]
237
+
238
  df_pairwise = pd.DataFrame(data=scores[:, np.newaxis] / (scores + scores[:, np.newaxis]),
239
  index=index, columns=index)
240
  df_pairwise = df_pairwise.reindex(labels=df_result['item'], columns=df_result['item'], copy=False)
 
265
  'This option keeps only the largest strongly-connected component (SCC) of the input graph. '
266
  'Some items might be missing as a result of this filtering.'
267
  ),
268
+ gr.Checkbox(
269
+ value=False,
270
+ label='Truncate Output',
271
+ info='Perform the entire computation but output only five head and five tail items, '
272
+ 'avoiding overlap.'
273
+ ),
274
  gr.Number(
275
  label='Seed',
276
  precision=0
 
286
  )
287
  ],
288
  examples=[
289
+ ['food.csv', 'Counting', False],
290
  ['food.csv', 'Bradley-Terry (1952)', False],
291
  ['food.csv', 'Eigenvector (1986)', False],
292
  ['food.csv', 'PageRank (1998)', False],