dustalov commited on
Commit
cc521be
1 Parent(s): dad8cba
Files changed (2) hide show
  1. app.py +71 -63
  2. ruff.toml +14 -0
app.py CHANGED
@@ -14,8 +14,8 @@
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
16
 
17
- __author__ = 'Dmitry Ustalov'
18
- __license__ = 'Apache 2.0'
19
 
20
  from typing import BinaryIO, cast
21
 
@@ -32,9 +32,12 @@ TOLERANCE, LIMIT = 1e-6, 100
32
 
33
 
34
  def visualize(df_pairwise: pd.DataFrame) -> Figure:
35
- fig = px.imshow(df_pairwise, color_continuous_scale='RdBu', text_auto='.2f')
36
- fig.update_layout(xaxis_title='Loser', yaxis_title='Winner', xaxis_side='top')
37
- fig.update_traces(hovertemplate='Winner: %{y}<br>Loser: %{x}<br>Fraction of Wins: %{z}<extra></extra>')
 
 
 
38
  return fig
39
 
40
 
@@ -69,78 +72,83 @@ def newman(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[s
69
 
70
 
71
  ALGORITHMS = {
72
- 'Counting': counting,
73
- 'Bradley-Terry (1952)': bradley_terry,
74
- 'Elo (1960)': elo,
75
- 'Eigenvector (1987)': eigen,
76
- 'PageRank (1998)': pagerank,
77
- 'Newman (2023)': newman,
78
  }
79
 
80
 
81
  def largest_strongly_connected_component(df_pairs: pd.DataFrame) -> set[str]:
82
- G = nx.from_pandas_edgelist(df_pairs, source='left', target='right', create_using=nx.DiGraph)
83
- H = nx.from_pandas_edgelist(df_pairs[df_pairs['winner'] == 'tie'], source='right', target='left',
84
  create_using=nx.DiGraph)
85
  F = nx.compose(G, H)
86
  largest = max(nx.strongly_connected_components(F), key=len)
87
  return cast(set[str], largest)
88
 
89
 
90
- def handler(file: BinaryIO, algorithm: str, filtered: bool, truncated: bool) -> tuple[pd.DataFrame, Figure]:
 
 
 
 
 
91
  if file is None:
92
- raise gr.Error('File must be uploaded')
93
 
94
  if algorithm not in ALGORITHMS:
95
- raise gr.Error(f'Unknown algorithm: {algorithm}')
96
 
97
  try:
98
  df_pairs = pd.read_csv(file.name, dtype=str)
99
  except ValueError as e:
100
- raise gr.Error(f'Parsing error: {e}')
101
 
102
- if not pd.Series(['left', 'right', 'winner']).isin(df_pairs.columns).all():
103
- raise gr.Error('Columns must exist: left, right, winner')
104
 
105
- if not df_pairs['winner'].isin(pd.Series(['left', 'right', 'tie'])).all():
106
- raise gr.Error('Allowed winner values: left, right, tie')
107
 
108
- df_pairs = df_pairs[['left', 'right', 'winner']]
109
 
110
  df_pairs.dropna(axis=0, inplace=True)
111
 
112
  if filtered:
113
  largest = largest_strongly_connected_component(df_pairs)
114
 
115
- df_pairs.drop(df_pairs[~(df_pairs['left'].isin(largest) & df_pairs['right'].isin(largest))].index, inplace=True)
116
 
117
  xs, ys = df_pairs["left"], df_pairs["right"]
118
  ws = df_pairs["winner"].map({"left": Winner.X, "right": Winner.Y, "tie": Winner.Draw})
119
 
120
  scores, index = ALGORITHMS[algorithm](xs, ys, ws)
121
- index.name = 'item'
122
 
123
- df_result = pd.DataFrame(data={'score': scores}, index=index)
124
 
125
- df_result['pairs'] = pd.Series(0, dtype=int, index=index).add(
126
- df_pairs.groupby('left')['left'].count(), fill_value=0
127
  ).add(
128
- df_pairs.groupby('right')['right'].count(), fill_value=0
129
  ).astype(int)
130
 
131
- df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
132
 
133
  df_result.fillna(-np.inf, inplace=True)
134
- df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
135
  df_result.reset_index(inplace=True)
136
 
137
  if truncated:
138
  df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
139
- df_result = df_result[~df_result.index.duplicated(keep='last')]
140
 
141
- pairwise = evalica.pairwise_scores(df_result['score'].to_numpy())
142
 
143
- df_pairwise = pd.DataFrame(data=pairwise, index=df_result['item'], columns=df_result['item'])
144
 
145
  fig = visualize(df_pairwise)
146
 
@@ -152,49 +160,49 @@ def main() -> None:
152
  fn=handler,
153
  inputs=[
154
  gr.File(
155
- file_types=['.tsv', '.csv'],
156
- label='Comparisons'
157
  ),
158
  gr.Dropdown(
159
  choices=cast(list[str], ALGORITHMS),
160
- value='Bradley-Terry (1952)',
161
- label='Algorithm'
162
  ),
163
  gr.Checkbox(
164
  value=False,
165
- label='Largest SCC',
166
- info='Bradley-Terry, Eigenvector, and Newman algorithms require the comparison graph '
167
- 'to be strongly-connected. '
168
- 'This option keeps only the largest strongly-connected component (SCC) of the input graph. '
169
- 'Some items might be missing as a result of this filtering.'
170
  ),
171
  gr.Checkbox(
172
  value=False,
173
- label='Truncate Output',
174
- info='Perform the entire computation but output only five head and five tail items, '
175
- 'avoiding overlap.'
176
  ),
177
  ],
178
  outputs=[
179
  gr.Dataframe(
180
- headers=['item', 'score', 'pairs', 'rank'],
181
- label='Ranking'
182
  ),
183
  gr.Plot(
184
- label='Pairwise Chances of Winning the Comparison'
185
- )
186
  ],
187
  examples=[
188
- ['food.csv', 'Counting', False, False],
189
- ['food.csv', 'Bradley-Terry (1952)', False, False],
190
- ['food.csv', 'Eigenvector (1987)', False, False],
191
- ['food.csv', 'PageRank (1998)', False, False],
192
- ['food.csv', 'Newman (2023)', False, False],
193
- ['llmfao.csv', 'Bradley-Terry (1952)', False, True],
194
- ['llmfao.csv', 'Elo (1960)', False, True],
195
  ],
196
- title='Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!',
197
- description='''
198
  This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
199
 
200
  As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
@@ -206,17 +214,17 @@ As an input, it expects a comma-separated (CSV) file with a header containing th
206
  Possible values for `winner` are `left`, `right`, or `tie`. The provided examples might be a good starting point.
207
 
208
  As the output, this tool provides a table with items, their estimated scores, and ranks.
209
- '''.strip(),
210
- article='''
211
  Pair2Rank uses the [Evalica](https://pypi.org/p/evalica) library for computing the scores: <https://github.com/dustalov/evalica>.
212
 
213
  Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
214
- '''.strip(),
215
- allow_flagging='never'
216
  )
217
 
218
  iface.launch()
219
 
220
 
221
- if __name__ == '__main__':
222
  main()
 
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
16
 
17
+ __author__ = "Dmitry Ustalov"
18
+ __license__ = "Apache 2.0"
19
 
20
  from typing import BinaryIO, cast
21
 
 
32
 
33
 
34
  def visualize(df_pairwise: pd.DataFrame) -> Figure:
35
+ fig = px.imshow(df_pairwise, color_continuous_scale="RdBu", text_auto=".2f")
36
+
37
+ fig.update_layout(xaxis_title="Loser", yaxis_title="Winner", xaxis_side="top")
38
+
39
+ fig.update_traces(hovertemplate="Winner: %{y}<br>Loser: %{x}<br>Fraction of Wins: %{z}<extra></extra>")
40
+
41
  return fig
42
 
43
 
 
72
 
73
 
74
  ALGORITHMS = {
75
+ "Counting": counting,
76
+ "Bradley-Terry (1952)": bradley_terry,
77
+ "Elo (1960)": elo,
78
+ "Eigenvector (1987)": eigen,
79
+ "PageRank (1998)": pagerank,
80
+ "Newman (2023)": newman,
81
  }
82
 
83
 
84
  def largest_strongly_connected_component(df_pairs: pd.DataFrame) -> set[str]:
85
+ G = nx.from_pandas_edgelist(df_pairs, source="left", target="right", create_using=nx.DiGraph)
86
+ H = nx.from_pandas_edgelist(df_pairs[df_pairs["winner"] == "tie"], source="right", target="left",
87
  create_using=nx.DiGraph)
88
  F = nx.compose(G, H)
89
  largest = max(nx.strongly_connected_components(F), key=len)
90
  return cast(set[str], largest)
91
 
92
 
93
+ def handler(
94
+ file: BinaryIO,
95
+ algorithm: str,
96
+ filtered: bool,
97
+ truncated: bool,
98
+ ) -> tuple[pd.DataFrame, Figure]:
99
  if file is None:
100
+ raise gr.Error("File must be uploaded")
101
 
102
  if algorithm not in ALGORITHMS:
103
+ raise gr.Error(f"Unknown algorithm: {algorithm}")
104
 
105
  try:
106
  df_pairs = pd.read_csv(file.name, dtype=str)
107
  except ValueError as e:
108
+ raise gr.Error(f"Parsing error: {e}") from e
109
 
110
+ if not pd.Series(["left", "right", "winner"]).isin(df_pairs.columns).all():
111
+ raise gr.Error("Columns must exist: left, right, winner")
112
 
113
+ if not df_pairs["winner"].isin(pd.Series(["left", "right", "tie"])).all():
114
+ raise gr.Error("Allowed winner values: left, right, tie")
115
 
116
+ df_pairs = df_pairs[["left", "right", "winner"]]
117
 
118
  df_pairs.dropna(axis=0, inplace=True)
119
 
120
  if filtered:
121
  largest = largest_strongly_connected_component(df_pairs)
122
 
123
+ df_pairs.drop(df_pairs[~(df_pairs["left"].isin(largest) & df_pairs["right"].isin(largest))].index, inplace=True)
124
 
125
  xs, ys = df_pairs["left"], df_pairs["right"]
126
  ws = df_pairs["winner"].map({"left": Winner.X, "right": Winner.Y, "tie": Winner.Draw})
127
 
128
  scores, index = ALGORITHMS[algorithm](xs, ys, ws)
129
+ index.name = "item"
130
 
131
+ df_result = pd.DataFrame(data={"score": scores}, index=index)
132
 
133
+ df_result["pairs"] = pd.Series(0, dtype=int, index=index).add(
134
+ df_pairs.groupby("left")["left"].count(), fill_value=0,
135
  ).add(
136
+ df_pairs.groupby("right")["right"].count(), fill_value=0,
137
  ).astype(int)
138
 
139
+ df_result["rank"] = df_result["score"].rank(na_option="bottom", ascending=False).astype(int)
140
 
141
  df_result.fillna(-np.inf, inplace=True)
142
+ df_result.sort_values(by=["rank", "score"], ascending=[True, False], inplace=True)
143
  df_result.reset_index(inplace=True)
144
 
145
  if truncated:
146
  df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
147
+ df_result = df_result[~df_result.index.duplicated(keep="last")]
148
 
149
+ pairwise = evalica.pairwise_scores(df_result["score"].to_numpy())
150
 
151
+ df_pairwise = pd.DataFrame(data=pairwise, index=df_result["item"], columns=df_result["item"])
152
 
153
  fig = visualize(df_pairwise)
154
 
 
160
  fn=handler,
161
  inputs=[
162
  gr.File(
163
+ file_types=[".tsv", ".csv"],
164
+ label="Comparisons",
165
  ),
166
  gr.Dropdown(
167
  choices=cast(list[str], ALGORITHMS),
168
+ value="Bradley-Terry (1952)",
169
+ label="Algorithm",
170
  ),
171
  gr.Checkbox(
172
  value=False,
173
+ label="Largest SCC",
174
+ info="Bradley-Terry, Eigenvector, and Newman algorithms require the comparison graph "
175
+ "to be strongly-connected. "
176
+ "This option keeps only the largest strongly-connected component (SCC) of the input graph. "
177
+ "Some items might be missing as a result of this filtering.",
178
  ),
179
  gr.Checkbox(
180
  value=False,
181
+ label="Truncate Output",
182
+ info="Perform the entire computation but output only five head and five tail items, "
183
+ "avoiding overlap.",
184
  ),
185
  ],
186
  outputs=[
187
  gr.Dataframe(
188
+ headers=["item", "score", "pairs", "rank"],
189
+ label="Ranking",
190
  ),
191
  gr.Plot(
192
+ label="Pairwise Chances of Winning the Comparison",
193
+ ),
194
  ],
195
  examples=[
196
+ ["food.csv", "Counting", False, False],
197
+ ["food.csv", "Bradley-Terry (1952)", False, False],
198
+ ["food.csv", "Eigenvector (1987)", False, False],
199
+ ["food.csv", "PageRank (1998)", False, False],
200
+ ["food.csv", "Newman (2023)", False, False],
201
+ ["llmfao.csv", "Bradley-Terry (1952)", False, True],
202
+ ["llmfao.csv", "Elo (1960)", False, True],
203
  ],
204
+ title="Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!",
205
+ description="""
206
  This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
207
 
208
  As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
 
214
  Possible values for `winner` are `left`, `right`, or `tie`. The provided examples might be a good starting point.
215
 
216
  As the output, this tool provides a table with items, their estimated scores, and ranks.
217
+ """.strip(),
218
+ article="""
219
  Pair2Rank uses the [Evalica](https://pypi.org/p/evalica) library for computing the scores: <https://github.com/dustalov/evalica>.
220
 
221
  Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
222
+ """.strip(),
223
+ allow_flagging="never",
224
  )
225
 
226
  iface.launch()
227
 
228
 
229
+ if __name__ == "__main__":
230
  main()
ruff.toml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ line-length = 120
2
+ target-version = "py311"
3
+
4
+ [lint]
5
+ select = ["ALL"]
6
+ ignore = [
7
+ "D", # pydocstyle
8
+ "EM101", # raw-string-in-exception
9
+ "EM102", # f-string-in-exception
10
+ "FBT001", # boolean-type-hint-positional-argument
11
+ "N806", # non-lowercase-variable-in-function
12
+ "PD002", # pandas-use-of-inplace-argument
13
+ "TRY003", # raise-vanilla-args
14
+ ]