None1145's picture
Update app.py
b28fe12 verified
import pandas as pd
import gradio as gr
def compare_csv_files(selected_languages, model_size):
max_num = 10
# Construct file names dynamically based on model size
file_1_5 = f"result_1.5_{model_size}.csv"
file_1_4 = f"result_1.4_{model_size}.csv"
# Load data
df1 = pd.read_csv(file_1_5)
df2 = pd.read_csv(file_1_4)
# Merge with Language column
merged_df = pd.merge(df1, df2, on=["SourceText", "Language"], suffixes=("_1.5", "_1.4"))
# Filter by selected languages
if selected_languages:
merged_df = merged_df[merged_df["Language"].isin(selected_languages)]
# Calculate differences
merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"]
merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"]
# Add comparison columns
merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
# Overall averages
avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean()
avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean()
overall_summary = f"""
<h3>Overall Comparison:</h3>
<p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})' if avg_word_diff > 0 else "1.4 is the same as 1.5 (0)"}</p>
<p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})' if avg_word_diff > 0 else "1.4 is the same as 1.5 (0)"}</p>
"""
# Generate result HTML
result_html = overall_summary + merged_df[[
"Language",
"SourceText",
"WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison",
"CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison",
]].to_html(escape=False, index=False)
return result_html
# Load unique languages from the data (defaulting to Base files for initialization)
df1 = pd.read_csv("result_1.5_Base.csv")
df2 = pd.read_csv("result_1.4_Base.csv")
languages = sorted(set(df1["Language"]).union(set(df2["Language"])))
gr.Interface(
fn=compare_csv_files,
inputs=[
gr.CheckboxGroup(choices=languages, label="Select Languages to Compare"),
gr.Dropdown(choices=["Base", "Medium"], label="Select Whisper Model Size", value="Base")
],
outputs="html",
title="Fish Speech Benchmark",
description="Select specific languages and model sizes (Base or Medium) to compare the results of WordErrorRate and CharacterErrorRate."
).launch()