import pandas as pd import gradio as gr from typing import List HEIGHT=600 WIDTH=1000 def set_error(row: pd.Series) -> bool: """Sets the error for the given row.""" if row.error not in [True, False]: if not row.prompt_response: return True return False return row.error def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame: """Gets the tool winning rate data for the given tools and calculates the winning percentage.""" tools_inc = tools_df[tools_df['tool'].isin(inc_tools)].copy() tools_inc['error'] = tools_inc.apply(set_error, axis=1) tools_non_error = tools_inc[tools_inc['error'] != True] tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'}) tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])] tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])] tools_non_error['win'] = (tools_non_error['currentAnswer'] == tools_non_error['vote']).astype(int) tools_non_error.columns = tools_non_error.columns.astype(str) wins = tools_non_error.groupby(['tool', 'request_month_year_week', 'win']).size().unstack().fillna(0) wins['win_perc'] = (wins[1] / (wins[0] + wins[1])) * 100 wins.reset_index(inplace=True) wins['total_request'] = wins[0] + wins[1] wins.columns = wins.columns.astype(str) # Convert request_month_year_week to string and explicitly set type for Altair wins['request_month_year_week'] = wins['request_month_year_week'].astype(str) return wins def get_overall_winning_rate(wins_df: pd.DataFrame) -> pd.DataFrame: """Gets the overall winning rate data for the given tools and calculates the winning percentage.""" overall_wins = wins_df.groupby('request_month_year_week').agg({ "0": 'sum', "1": 'sum', "win_perc": 'mean', "total_request": 'sum' }).rename(columns={"0": 'losses', "1": 'wins'}).reset_index() return overall_wins def plot_tool_winnings_overall(wins_df: pd.DataFrame, winning_selector: str = "win_perc") -> gr.BarPlot: """Plots the overall winning rate data for the given tools and calculates the winning percentage.""" return gr.BarPlot( title="Winning Rate", x_title="Date", y_title=winning_selector, show_label=True, interactive=True, show_actions_button=True, tooltip=["request_month_year_week", winning_selector], value=wins_df, x="request_month_year_week", y=winning_selector, height=HEIGHT, width=WIDTH ) def plot_tool_winnings_by_tool(wins_df: pd.DataFrame, tool: str) -> gr.BarPlot: """Plots the winning rate data for the given tool.""" return gr.BarPlot( title="Winning Rate", x_title="Week", y_title="Winning Rate", x="request_month_year_week", y="win_perc", value=wins_df[wins_df['tool'] == tool], show_label=True, interactive=True, show_actions_button=True, tooltip=["request_month_year_week", "win_perc"], height=HEIGHT, width=WIDTH )