Spaces:

dustalov
/

pair2rank

Running

App Files Files Community

dustalov commited on Jul 9

Commit

cc521be

•

1 Parent(s): dad8cba

Use Ruff

Browse files

Files changed (2) hide show

app.py +71 -63
ruff.toml +14 -0

app.py CHANGED Viewed

@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__author__ = 'Dmitry Ustalov'
-__license__ = 'Apache 2.0'
 from typing import BinaryIO, cast
@@ -32,9 +32,12 @@ TOLERANCE, LIMIT = 1e-6, 100
 def visualize(df_pairwise: pd.DataFrame) -> Figure:
-    fig = px.imshow(df_pairwise, color_continuous_scale='RdBu', text_auto='.2f')
-    fig.update_layout(xaxis_title='Loser', yaxis_title='Winner', xaxis_side='top')
-    fig.update_traces(hovertemplate='Winner: %{y}<br>Loser: %{x}<br>Fraction of Wins: %{z}<extra></extra>')
     return fig
@@ -69,78 +72,83 @@ def newman(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[s
 ALGORITHMS = {
-    'Counting': counting,
-    'Bradley-Terry (1952)': bradley_terry,
-    'Elo (1960)': elo,
-    'Eigenvector (1987)': eigen,
-    'PageRank (1998)': pagerank,
-    'Newman (2023)': newman,
 }
 def largest_strongly_connected_component(df_pairs: pd.DataFrame) -> set[str]:
-    G = nx.from_pandas_edgelist(df_pairs, source='left', target='right', create_using=nx.DiGraph)
-    H = nx.from_pandas_edgelist(df_pairs[df_pairs['winner'] == 'tie'], source='right', target='left',
                                 create_using=nx.DiGraph)
     F = nx.compose(G, H)
     largest = max(nx.strongly_connected_components(F), key=len)
     return cast(set[str], largest)
-def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool) -> tuple[pd.DataFrame, Figure]:
     if file is None:
-        raise gr.Error('File must be uploaded')
     if algorithm not in ALGORITHMS:
-        raise gr.Error(f'Unknown algorithm: {algorithm}')
     try:
         df_pairs = pd.read_csv(file.name, dtype=str)
     except ValueError as e:
-        raise gr.Error(f'Parsing error: {e}')
-    if not pd.Series(['left', 'right', 'winner']).isin(df_pairs.columns).all():
-        raise gr.Error('Columns must exist: left, right, winner')
-    if not df_pairs['winner'].isin(pd.Series(['left', 'right', 'tie'])).all():
-        raise gr.Error('Allowed winner values: left, right, tie')
-    df_pairs = df_pairs[['left', 'right', 'winner']]
     df_pairs.dropna(axis=0, inplace=True)
     if filtered:
         largest = largest_strongly_connected_component(df_pairs)
-        df_pairs.drop(df_pairs[~(df_pairs['left'].isin(largest) & df_pairs['right'].isin(largest))].index, inplace=True)
     xs, ys = df_pairs["left"], df_pairs["right"]
     ws = df_pairs["winner"].map({"left": Winner.X, "right": Winner.Y, "tie": Winner.Draw})
     scores, index = ALGORITHMS[algorithm](xs, ys, ws)
-    index.name = 'item'
-    df_result = pd.DataFrame(data={'score': scores}, index=index)
-    df_result['pairs'] = pd.Series(0, dtype=int, index=index).add(
-        df_pairs.groupby('left')['left'].count(), fill_value=0
     ).add(
-        df_pairs.groupby('right')['right'].count(), fill_value=0
     ).astype(int)
-    df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
     df_result.fillna(-np.inf, inplace=True)
-    df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
     df_result.reset_index(inplace=True)
     if truncated:
         df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
-        df_result = df_result[~df_result.index.duplicated(keep='last')]
-    pairwise = evalica.pairwise_scores(df_result['score'].to_numpy())
-    df_pairwise = pd.DataFrame(data=pairwise, index=df_result['item'], columns=df_result['item'])
     fig = visualize(df_pairwise)
@@ -152,49 +160,49 @@ def main() -> None:
         fn=handler,
         inputs=[
             gr.File(
-                file_types=['.tsv', '.csv'],
-                label='Comparisons'
             ),
             gr.Dropdown(
                 choices=cast(list[str], ALGORITHMS),
-                value='Bradley-Terry (1952)',
-                label='Algorithm'
             ),
             gr.Checkbox(
                 value=False,
-                label='Largest SCC',
-                info='Bradley-Terry, Eigenvector, and Newman algorithms require the comparison graph '
-                     'to be strongly-connected. '
-                     'This option keeps only the largest strongly-connected component (SCC) of the input graph. '
-                     'Some items might be missing as a result of this filtering.'
             ),
             gr.Checkbox(
                 value=False,
-                label='Truncate Output',
-                info='Perform the entire computation but output only five head and five tail items, '
-                     'avoiding overlap.'
             ),
         ],
         outputs=[
             gr.Dataframe(
-                headers=['item', 'score', 'pairs', 'rank'],
-                label='Ranking'
             ),
             gr.Plot(
-                label='Pairwise Chances of Winning the Comparison'
-            )
         ],
         examples=[
-            ['food.csv', 'Counting', False, False],
-            ['food.csv', 'Bradley-Terry (1952)', False, False],
-            ['food.csv', 'Eigenvector (1987)', False, False],
-            ['food.csv', 'PageRank (1998)', False, False],
-            ['food.csv', 'Newman (2023)', False, False],
-            ['llmfao.csv', 'Bradley-Terry (1952)', False, True],
-            ['llmfao.csv', 'Elo (1960)', False, True],
         ],
-        title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
-        description='''
 This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
 As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
@@ -206,17 +214,17 @@ As an input, it expects a comma-separated (CSV) file with a header containing th
 Possible values for `winner` are `left`, `right`, or `tie`. The provided examples might be a good starting point.
 As the output, this tool provides a table with items, their estimated scores, and ranks.
-        '''.strip(),
-        article='''
 Pair2Rank uses the [Evalica](https://pypi.org/p/evalica) library for computing the scores: <https://github.com/dustalov/evalica>.
 Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
-        '''.strip(),
-        allow_flagging='never'
     )
     iface.launch()
-if __name__ == '__main__':
     main()

 # See the License for the specific language governing permissions and
 # limitations under the License.
+__author__ = "Dmitry Ustalov"
+__license__ = "Apache 2.0"
 from typing import BinaryIO, cast
 def visualize(df_pairwise: pd.DataFrame) -> Figure:
+    fig = px.imshow(df_pairwise, color_continuous_scale="RdBu", text_auto=".2f")
+    fig.update_layout(xaxis_title="Loser", yaxis_title="Winner", xaxis_side="top")
+    fig.update_traces(hovertemplate="Winner: %{y}<br>Loser: %{x}<br>Fraction of Wins: %{z}<extra></extra>")
     return fig
 ALGORITHMS = {
+    "Counting": counting,
+    "Bradley-Terry (1952)": bradley_terry,
+    "Elo (1960)": elo,
+    "Eigenvector (1987)": eigen,
+    "PageRank (1998)": pagerank,
+    "Newman (2023)": newman,
 }
 def largest_strongly_connected_component(df_pairs: pd.DataFrame) -> set[str]:
+    G = nx.from_pandas_edgelist(df_pairs, source="left", target="right", create_using=nx.DiGraph)
+    H = nx.from_pandas_edgelist(df_pairs[df_pairs["winner"] == "tie"], source="right", target="left",
                                 create_using=nx.DiGraph)
     F = nx.compose(G, H)
     largest = max(nx.strongly_connected_components(F), key=len)
     return cast(set[str], largest)
+def handler(
+        file: BinaryIO,
+        algorithm: str,
+        filtered: bool,
+        truncated: bool,
+) -> tuple[pd.DataFrame, Figure]:
     if file is None:
+        raise gr.Error("File must be uploaded")
     if algorithm not in ALGORITHMS:
+        raise gr.Error(f"Unknown algorithm: {algorithm}")
     try:
         df_pairs = pd.read_csv(file.name, dtype=str)
     except ValueError as e:
+        raise gr.Error(f"Parsing error: {e}") from e
+    if not pd.Series(["left", "right", "winner"]).isin(df_pairs.columns).all():
+        raise gr.Error("Columns must exist: left, right, winner")
+    if not df_pairs["winner"].isin(pd.Series(["left", "right", "tie"])).all():
+        raise gr.Error("Allowed winner values: left, right, tie")
+    df_pairs = df_pairs[["left", "right", "winner"]]
     df_pairs.dropna(axis=0, inplace=True)
     if filtered:
         largest = largest_strongly_connected_component(df_pairs)
+        df_pairs.drop(df_pairs[~(df_pairs["left"].isin(largest) & df_pairs["right"].isin(largest))].index, inplace=True)
     xs, ys = df_pairs["left"], df_pairs["right"]
     ws = df_pairs["winner"].map({"left": Winner.X, "right": Winner.Y, "tie": Winner.Draw})
     scores, index = ALGORITHMS[algorithm](xs, ys, ws)
+    index.name = "item"
+    df_result = pd.DataFrame(data={"score": scores}, index=index)
+    df_result["pairs"] = pd.Series(0, dtype=int, index=index).add(
+        df_pairs.groupby("left")["left"].count(), fill_value=0,
     ).add(
+        df_pairs.groupby("right")["right"].count(), fill_value=0,
     ).astype(int)
+    df_result["rank"] = df_result["score"].rank(na_option="bottom", ascending=False).astype(int)
     df_result.fillna(-np.inf, inplace=True)
+    df_result.sort_values(by=["rank", "score"], ascending=[True, False], inplace=True)
     df_result.reset_index(inplace=True)
     if truncated:
         df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
+        df_result = df_result[~df_result.index.duplicated(keep="last")]
+    pairwise = evalica.pairwise_scores(df_result["score"].to_numpy())
+    df_pairwise = pd.DataFrame(data=pairwise, index=df_result["item"], columns=df_result["item"])
     fig = visualize(df_pairwise)
         fn=handler,
         inputs=[
             gr.File(
+                file_types=[".tsv", ".csv"],
+                label="Comparisons",
             ),
             gr.Dropdown(
                 choices=cast(list[str], ALGORITHMS),
+                value="Bradley-Terry (1952)",
+                label="Algorithm",
             ),
             gr.Checkbox(
                 value=False,
+                label="Largest SCC",
+                info="Bradley-Terry, Eigenvector, and Newman algorithms require the comparison graph "
+                     "to be strongly-connected. "
+                     "This option keeps only the largest strongly-connected component (SCC) of the input graph. "
+                     "Some items might be missing as a result of this filtering.",
             ),
             gr.Checkbox(
                 value=False,
+                label="Truncate Output",
+                info="Perform the entire computation but output only five head and five tail items, "
+                     "avoiding overlap.",
             ),
         ],
         outputs=[
             gr.Dataframe(
+                headers=["item", "score", "pairs", "rank"],
+                label="Ranking",
             ),
             gr.Plot(
+                label="Pairwise Chances of Winning the Comparison",
+            ),
         ],
         examples=[
+            ["food.csv", "Counting", False, False],
+            ["food.csv", "Bradley-Terry (1952)", False, False],
+            ["food.csv", "Eigenvector (1987)", False, False],
+            ["food.csv", "PageRank (1998)", False, False],
+            ["food.csv", "Newman (2023)", False, False],
+            ["llmfao.csv", "Bradley-Terry (1952)", False, True],
+            ["llmfao.csv", "Elo (1960)", False, True],
         ],
+        title="Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!",
+        description="""
 This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
 As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
 Possible values for `winner` are `left`, `right`, or `tie`. The provided examples might be a good starting point.
 As the output, this tool provides a table with items, their estimated scores, and ranks.
+        """.strip(),
+        article="""
 Pair2Rank uses the [Evalica](https://pypi.org/p/evalica) library for computing the scores: <https://github.com/dustalov/evalica>.
 Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
+        """.strip(),
+        allow_flagging="never",
     )
     iface.launch()
+if __name__ == "__main__":
     main()

ruff.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+line-length = 120
+target-version = "py311"
+[lint]
+select = ["ALL"]
+ignore = [
+    "D",      # pydocstyle
+    "EM101",  # raw-string-in-exception
+    "EM102",  # f-string-in-exception
+    "FBT001", # boolean-type-hint-positional-argument
+    "N806",   # non-lowercase-variable-in-function
+    "PD002",  # pandas-use-of-inplace-argument
+    "TRY003", # raise-vanilla-args
+]