Use Ruff
Browse files
app.py
CHANGED
@@ -14,8 +14,8 @@
|
|
14 |
# See the License for the specific language governing permissions and
|
15 |
# limitations under the License.
|
16 |
|
17 |
-
__author__ =
|
18 |
-
__license__ =
|
19 |
|
20 |
from typing import BinaryIO, cast
|
21 |
|
@@ -32,9 +32,12 @@ TOLERANCE, LIMIT = 1e-6, 100
|
|
32 |
|
33 |
|
34 |
def visualize(df_pairwise: pd.DataFrame) -> Figure:
|
35 |
-
fig = px.imshow(df_pairwise, color_continuous_scale=
|
36 |
-
|
37 |
-
fig.
|
|
|
|
|
|
|
38 |
return fig
|
39 |
|
40 |
|
@@ -69,78 +72,83 @@ def newman(xs: list[str], ys: list[str], ws: list[Winner]) -> tuple["pd.Series[s
|
|
69 |
|
70 |
|
71 |
ALGORITHMS = {
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
}
|
79 |
|
80 |
|
81 |
def largest_strongly_connected_component(df_pairs: pd.DataFrame) -> set[str]:
|
82 |
-
G = nx.from_pandas_edgelist(df_pairs, source=
|
83 |
-
H = nx.from_pandas_edgelist(df_pairs[df_pairs[
|
84 |
create_using=nx.DiGraph)
|
85 |
F = nx.compose(G, H)
|
86 |
largest = max(nx.strongly_connected_components(F), key=len)
|
87 |
return cast(set[str], largest)
|
88 |
|
89 |
|
90 |
-
def handler(
|
|
|
|
|
|
|
|
|
|
|
91 |
if file is None:
|
92 |
-
raise gr.Error(
|
93 |
|
94 |
if algorithm not in ALGORITHMS:
|
95 |
-
raise gr.Error(f
|
96 |
|
97 |
try:
|
98 |
df_pairs = pd.read_csv(file.name, dtype=str)
|
99 |
except ValueError as e:
|
100 |
-
raise gr.Error(f
|
101 |
|
102 |
-
if not pd.Series([
|
103 |
-
raise gr.Error(
|
104 |
|
105 |
-
if not df_pairs[
|
106 |
-
raise gr.Error(
|
107 |
|
108 |
-
df_pairs = df_pairs[[
|
109 |
|
110 |
df_pairs.dropna(axis=0, inplace=True)
|
111 |
|
112 |
if filtered:
|
113 |
largest = largest_strongly_connected_component(df_pairs)
|
114 |
|
115 |
-
df_pairs.drop(df_pairs[~(df_pairs[
|
116 |
|
117 |
xs, ys = df_pairs["left"], df_pairs["right"]
|
118 |
ws = df_pairs["winner"].map({"left": Winner.X, "right": Winner.Y, "tie": Winner.Draw})
|
119 |
|
120 |
scores, index = ALGORITHMS[algorithm](xs, ys, ws)
|
121 |
-
index.name =
|
122 |
|
123 |
-
df_result = pd.DataFrame(data={
|
124 |
|
125 |
-
df_result[
|
126 |
-
df_pairs.groupby(
|
127 |
).add(
|
128 |
-
df_pairs.groupby(
|
129 |
).astype(int)
|
130 |
|
131 |
-
df_result[
|
132 |
|
133 |
df_result.fillna(-np.inf, inplace=True)
|
134 |
-
df_result.sort_values(by=[
|
135 |
df_result.reset_index(inplace=True)
|
136 |
|
137 |
if truncated:
|
138 |
df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
|
139 |
-
df_result = df_result[~df_result.index.duplicated(keep=
|
140 |
|
141 |
-
pairwise = evalica.pairwise_scores(df_result[
|
142 |
|
143 |
-
df_pairwise = pd.DataFrame(data=pairwise, index=df_result[
|
144 |
|
145 |
fig = visualize(df_pairwise)
|
146 |
|
@@ -152,49 +160,49 @@ def main() -> None:
|
|
152 |
fn=handler,
|
153 |
inputs=[
|
154 |
gr.File(
|
155 |
-
file_types=[
|
156 |
-
label=
|
157 |
),
|
158 |
gr.Dropdown(
|
159 |
choices=cast(list[str], ALGORITHMS),
|
160 |
-
value=
|
161 |
-
label=
|
162 |
),
|
163 |
gr.Checkbox(
|
164 |
value=False,
|
165 |
-
label=
|
166 |
-
info=
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
),
|
171 |
gr.Checkbox(
|
172 |
value=False,
|
173 |
-
label=
|
174 |
-
info=
|
175 |
-
|
176 |
),
|
177 |
],
|
178 |
outputs=[
|
179 |
gr.Dataframe(
|
180 |
-
headers=[
|
181 |
-
label=
|
182 |
),
|
183 |
gr.Plot(
|
184 |
-
label=
|
185 |
-
)
|
186 |
],
|
187 |
examples=[
|
188 |
-
[
|
189 |
-
[
|
190 |
-
[
|
191 |
-
[
|
192 |
-
[
|
193 |
-
[
|
194 |
-
[
|
195 |
],
|
196 |
-
title=
|
197 |
-
description=
|
198 |
This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
|
199 |
|
200 |
As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
|
@@ -206,17 +214,17 @@ As an input, it expects a comma-separated (CSV) file with a header containing th
|
|
206 |
Possible values for `winner` are `left`, `right`, or `tie`. The provided examples might be a good starting point.
|
207 |
|
208 |
As the output, this tool provides a table with items, their estimated scores, and ranks.
|
209 |
-
|
210 |
-
article=
|
211 |
Pair2Rank uses the [Evalica](https://pypi.org/p/evalica) library for computing the scores: <https://github.com/dustalov/evalica>.
|
212 |
|
213 |
Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
|
214 |
-
|
215 |
-
allow_flagging=
|
216 |
)
|
217 |
|
218 |
iface.launch()
|
219 |
|
220 |
|
221 |
-
if __name__ ==
|
222 |
main()
|
|
|
14 |
# See the License for the specific language governing permissions and
|
15 |
# limitations under the License.
|
16 |
|
17 |
+
__author__ = "Dmitry Ustalov"
|
18 |
+
__license__ = "Apache 2.0"
|
19 |
|
20 |
from typing import BinaryIO, cast
|
21 |
|
|
|
32 |
|
33 |
|
34 |
def visualize(df_pairwise: pd.DataFrame) -> Figure:
|
35 |
+
fig = px.imshow(df_pairwise, color_continuous_scale="RdBu", text_auto=".2f")
|
36 |
+
|
37 |
+
fig.update_layout(xaxis_title="Loser", yaxis_title="Winner", xaxis_side="top")
|
38 |
+
|
39 |
+
fig.update_traces(hovertemplate="Winner: %{y}<br>Loser: %{x}<br>Fraction of Wins: %{z}<extra></extra>")
|
40 |
+
|
41 |
return fig
|
42 |
|
43 |
|
|
|
72 |
|
73 |
|
74 |
ALGORITHMS = {
|
75 |
+
"Counting": counting,
|
76 |
+
"Bradley-Terry (1952)": bradley_terry,
|
77 |
+
"Elo (1960)": elo,
|
78 |
+
"Eigenvector (1987)": eigen,
|
79 |
+
"PageRank (1998)": pagerank,
|
80 |
+
"Newman (2023)": newman,
|
81 |
}
|
82 |
|
83 |
|
84 |
def largest_strongly_connected_component(df_pairs: pd.DataFrame) -> set[str]:
|
85 |
+
G = nx.from_pandas_edgelist(df_pairs, source="left", target="right", create_using=nx.DiGraph)
|
86 |
+
H = nx.from_pandas_edgelist(df_pairs[df_pairs["winner"] == "tie"], source="right", target="left",
|
87 |
create_using=nx.DiGraph)
|
88 |
F = nx.compose(G, H)
|
89 |
largest = max(nx.strongly_connected_components(F), key=len)
|
90 |
return cast(set[str], largest)
|
91 |
|
92 |
|
93 |
+
def handler(
|
94 |
+
file: BinaryIO,
|
95 |
+
algorithm: str,
|
96 |
+
filtered: bool,
|
97 |
+
truncated: bool,
|
98 |
+
) -> tuple[pd.DataFrame, Figure]:
|
99 |
if file is None:
|
100 |
+
raise gr.Error("File must be uploaded")
|
101 |
|
102 |
if algorithm not in ALGORITHMS:
|
103 |
+
raise gr.Error(f"Unknown algorithm: {algorithm}")
|
104 |
|
105 |
try:
|
106 |
df_pairs = pd.read_csv(file.name, dtype=str)
|
107 |
except ValueError as e:
|
108 |
+
raise gr.Error(f"Parsing error: {e}") from e
|
109 |
|
110 |
+
if not pd.Series(["left", "right", "winner"]).isin(df_pairs.columns).all():
|
111 |
+
raise gr.Error("Columns must exist: left, right, winner")
|
112 |
|
113 |
+
if not df_pairs["winner"].isin(pd.Series(["left", "right", "tie"])).all():
|
114 |
+
raise gr.Error("Allowed winner values: left, right, tie")
|
115 |
|
116 |
+
df_pairs = df_pairs[["left", "right", "winner"]]
|
117 |
|
118 |
df_pairs.dropna(axis=0, inplace=True)
|
119 |
|
120 |
if filtered:
|
121 |
largest = largest_strongly_connected_component(df_pairs)
|
122 |
|
123 |
+
df_pairs.drop(df_pairs[~(df_pairs["left"].isin(largest) & df_pairs["right"].isin(largest))].index, inplace=True)
|
124 |
|
125 |
xs, ys = df_pairs["left"], df_pairs["right"]
|
126 |
ws = df_pairs["winner"].map({"left": Winner.X, "right": Winner.Y, "tie": Winner.Draw})
|
127 |
|
128 |
scores, index = ALGORITHMS[algorithm](xs, ys, ws)
|
129 |
+
index.name = "item"
|
130 |
|
131 |
+
df_result = pd.DataFrame(data={"score": scores}, index=index)
|
132 |
|
133 |
+
df_result["pairs"] = pd.Series(0, dtype=int, index=index).add(
|
134 |
+
df_pairs.groupby("left")["left"].count(), fill_value=0,
|
135 |
).add(
|
136 |
+
df_pairs.groupby("right")["right"].count(), fill_value=0,
|
137 |
).astype(int)
|
138 |
|
139 |
+
df_result["rank"] = df_result["score"].rank(na_option="bottom", ascending=False).astype(int)
|
140 |
|
141 |
df_result.fillna(-np.inf, inplace=True)
|
142 |
+
df_result.sort_values(by=["rank", "score"], ascending=[True, False], inplace=True)
|
143 |
df_result.reset_index(inplace=True)
|
144 |
|
145 |
if truncated:
|
146 |
df_result = pd.concat((df_result.head(5), df_result.tail(5)), copy=False)
|
147 |
+
df_result = df_result[~df_result.index.duplicated(keep="last")]
|
148 |
|
149 |
+
pairwise = evalica.pairwise_scores(df_result["score"].to_numpy())
|
150 |
|
151 |
+
df_pairwise = pd.DataFrame(data=pairwise, index=df_result["item"], columns=df_result["item"])
|
152 |
|
153 |
fig = visualize(df_pairwise)
|
154 |
|
|
|
160 |
fn=handler,
|
161 |
inputs=[
|
162 |
gr.File(
|
163 |
+
file_types=[".tsv", ".csv"],
|
164 |
+
label="Comparisons",
|
165 |
),
|
166 |
gr.Dropdown(
|
167 |
choices=cast(list[str], ALGORITHMS),
|
168 |
+
value="Bradley-Terry (1952)",
|
169 |
+
label="Algorithm",
|
170 |
),
|
171 |
gr.Checkbox(
|
172 |
value=False,
|
173 |
+
label="Largest SCC",
|
174 |
+
info="Bradley-Terry, Eigenvector, and Newman algorithms require the comparison graph "
|
175 |
+
"to be strongly-connected. "
|
176 |
+
"This option keeps only the largest strongly-connected component (SCC) of the input graph. "
|
177 |
+
"Some items might be missing as a result of this filtering.",
|
178 |
),
|
179 |
gr.Checkbox(
|
180 |
value=False,
|
181 |
+
label="Truncate Output",
|
182 |
+
info="Perform the entire computation but output only five head and five tail items, "
|
183 |
+
"avoiding overlap.",
|
184 |
),
|
185 |
],
|
186 |
outputs=[
|
187 |
gr.Dataframe(
|
188 |
+
headers=["item", "score", "pairs", "rank"],
|
189 |
+
label="Ranking",
|
190 |
),
|
191 |
gr.Plot(
|
192 |
+
label="Pairwise Chances of Winning the Comparison",
|
193 |
+
),
|
194 |
],
|
195 |
examples=[
|
196 |
+
["food.csv", "Counting", False, False],
|
197 |
+
["food.csv", "Bradley-Terry (1952)", False, False],
|
198 |
+
["food.csv", "Eigenvector (1987)", False, False],
|
199 |
+
["food.csv", "PageRank (1998)", False, False],
|
200 |
+
["food.csv", "Newman (2023)", False, False],
|
201 |
+
["llmfao.csv", "Bradley-Terry (1952)", False, True],
|
202 |
+
["llmfao.csv", "Elo (1960)", False, True],
|
203 |
],
|
204 |
+
title="Pair2Rank: Turn Your Side-by-Side Comparisons into Ranking!",
|
205 |
+
description="""
|
206 |
This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
|
207 |
|
208 |
As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
|
|
|
214 |
Possible values for `winner` are `left`, `right`, or `tie`. The provided examples might be a good starting point.
|
215 |
|
216 |
As the output, this tool provides a table with items, their estimated scores, and ranks.
|
217 |
+
""".strip(),
|
218 |
+
article="""
|
219 |
Pair2Rank uses the [Evalica](https://pypi.org/p/evalica) library for computing the scores: <https://github.com/dustalov/evalica>.
|
220 |
|
221 |
Read more about Pair2Rank at <https://evalovernite.substack.com/p/llmfao-human-ranking>.
|
222 |
+
""".strip(),
|
223 |
+
allow_flagging="never",
|
224 |
)
|
225 |
|
226 |
iface.launch()
|
227 |
|
228 |
|
229 |
+
if __name__ == "__main__":
|
230 |
main()
|
ruff.toml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
line-length = 120
|
2 |
+
target-version = "py311"
|
3 |
+
|
4 |
+
[lint]
|
5 |
+
select = ["ALL"]
|
6 |
+
ignore = [
|
7 |
+
"D", # pydocstyle
|
8 |
+
"EM101", # raw-string-in-exception
|
9 |
+
"EM102", # f-string-in-exception
|
10 |
+
"FBT001", # boolean-type-hint-positional-argument
|
11 |
+
"N806", # non-lowercase-variable-in-function
|
12 |
+
"PD002", # pandas-use-of-inplace-argument
|
13 |
+
"TRY003", # raise-vanilla-args
|
14 |
+
]
|