Spaces:

dustalov
/

pair2rank

Running

App Files Files Community

dustalov commited on Sep 2, 2023

Commit

ed19e77

•

1 Parent(s): 94a0793

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -12

app.py CHANGED Viewed

@@ -15,7 +15,7 @@
 __author__ = 'Dmitry Ustalov'
 __license__ = 'Apache 2.0'
-from typing import IO, Tuple, List, cast, Dict
 import gradio as gr
 import networkx as nx
@@ -74,11 +74,11 @@ def pagerank(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
     G = nx.from_numpy_array(A, create_using=nx.DiGraph)
-    pagerank: Dict[int, float] = nx.algorithms.pagerank(G, max_iter=limit, tol=tolerance)
-    scores = np.array([pagerank[i] for i in range(len(G))])
-    return scores
 # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
@@ -136,7 +136,15 @@ ALGORITHMS = {
 }
-def handler(file: IO[bytes], algorithm: str, seed: int) -> Tuple[pd.DataFrame, Figure]:
     if file is None:
         raise gr.Error('File must be uploaded')
@@ -158,7 +166,14 @@ def handler(file: IO[bytes], algorithm: str, seed: int) -> Tuple[pd.DataFrame, F
     df.dropna(axis='rows', inplace=True)
-    index = pd.Index(np.unique(df[['left', 'right']].values), name='item')
     df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
                              index='left', columns='right', values='winner',
@@ -179,8 +194,15 @@ def handler(file: IO[bytes], algorithm: str, seed: int) -> Tuple[pd.DataFrame, F
     scores = ALGORITHMS[algorithm](wins, ties, seed=seed)
     df_result = pd.DataFrame(data={'score': scores}, index=index)
-    df_result['pairs'] = df.groupby('left')['left'].count() + df.groupby('right')['right'].count()
     df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
     df_result.fillna(np.NINF, inplace=True)
     df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
     df_result.reset_index(inplace=True)
@@ -207,6 +229,13 @@ def main() -> None:
                 value='Bradley-Terry (1952)',
                 label='Algorithm'
             ),
             gr.Number(
                 label='Seed',
                 precision=0
@@ -222,9 +251,9 @@ def main() -> None:
             )
         ],
         examples=[
-            ['food.csv', 'Bradley-Terry (1952)', 0],
-            ['food.csv', 'PageRank (1998)', 0],
-            ['food.csv', 'Newman (2023)', 0]
         ],
         title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
         description='''
@@ -236,8 +265,7 @@ As an input, it expects a comma-separated (CSV) file with a header containing th
 - `right`: the second compared item
 - `winner`: the label indicating the winning item
-Possible values for `winner` are `left`, `right`, or `tie`.
-The provided example might be a good starting point.
 As the output, this tool provides a table with items, their estimated scores, and ranks.
         ''',

 __author__ = 'Dmitry Ustalov'
 __license__ = 'Apache 2.0'
+from typing import IO, Tuple, List, cast, Dict, Set
 import gradio as gr
 import networkx as nx
     G = nx.from_numpy_array(A, create_using=nx.DiGraph)
+    scores: Dict[int, float] = nx.algorithms.pagerank(G, max_iter=limit, tol=tolerance)
+    p = np.array([scores[i] for i in range(len(G))])
+    return p
 # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
 }
+def largest_strongly_connected_component(df: pd.DataFrame) -> Set[str]:
+    G = nx.from_pandas_edgelist(df, source='left', target='right', create_using=nx.DiGraph)
+    H = nx.from_pandas_edgelist(df[df['winner'] == 'tie'], source='right', target='left', create_using=nx.DiGraph)
+    F = nx.compose(G, H)
+    largest = max(nx.strongly_connected_components(F), key=len)
+    return cast(Set[str], largest)
+def handler(file: IO[bytes], algorithm: str, filtered: bool, seed: int) -> Tuple[pd.DataFrame, Figure]:
     if file is None:
         raise gr.Error('File must be uploaded')
     df.dropna(axis='rows', inplace=True)
+    if filtered:
+        largest = largest_strongly_connected_component(df)
+        df.drop(df[~(df['left'].isin(largest) & df['right'].isin(largest))].index, inplace=True)
+        index = pd.Index(largest, name='item')
+    else:
+        index = pd.Index(np.unique(df[['left', 'right']].values), name='item')
     df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
                              index='left', columns='right', values='winner',
     scores = ALGORITHMS[algorithm](wins, ties, seed=seed)
     df_result = pd.DataFrame(data={'score': scores}, index=index)
+    df_result['pairs'] = pd.Series(0, dtype=int, index=index).add(
+        df.groupby('left')['left'].count(), fill_value=0
+    ).add(
+        df.groupby('right')['right'].count(), fill_value=0
+    ).astype(int)
     df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
     df_result.fillna(np.NINF, inplace=True)
     df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
     df_result.reset_index(inplace=True)
                 value='Bradley-Terry (1952)',
                 label='Algorithm'
             ),
+            gr.Checkbox(
+                value=False,
+                label='Largest SCC',
+                info='Bradley-Terry and Newman algorithms require the comparison graph to be strongly-connected. '
+                     'This option keeps only the largest strongly-connected component (SCC) of the input graph. '
+                     'Some items might be missing as a result of this filtering.'
+            ),
             gr.Number(
                 label='Seed',
                 precision=0
             )
         ],
         examples=[
+            ['food.csv', 'Bradley-Terry (1952)', False],
+            ['food.csv', 'PageRank (1998)', False],
+            ['food.csv', 'Newman (2023)', False]
         ],
         title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
         description='''
 - `right`: the second compared item
 - `winner`: the label indicating the winning item
+Possible values for `winner` are `left`, `right`, or `tie`. The provided examples might be a good starting point.
 As the output, this tool provides a table with items, their estimated scores, and ranks.
         ''',