import pandas as pd import streamlit as st best_results_df = pd.read_csv("assets/best_tasks_with_hits.csv") worst_results_df = pd.read_csv("assets/worst_tasks_with_hits.csv") show_worst_best_statistics = st.sidebar.checkbox( label="show worst/best statistics grouped by area" ) show_area_aggregated_results = st.sidebar.checkbox( label="show results aggregated by area" ) if show_worst_best_statistics: st.markdown( """ ## Worst/best queries The following are top 10 worst/best queries per area by number of hits. There are at least 10 documents per query in the test set, so number of hits/10 is the accuracy. """ ) sort_key = st.selectbox("sort by", list(best_results_df.columns)) st.markdown("## Queries with best results") st.table(best_results_df.sort_values(sort_key, ascending=False)) st.markdown("## Queries with worst results") st.table(worst_results_df.sort_values(sort_key, ascending=False)) if show_area_aggregated_results: st.markdown("## Area aggregated results") best_results_agg = best_results_df.groupby("area").agg("mean").reset_index() worst_results_agg = worst_results_df.groupby("area").agg("mean").reset_index() sort_key = st.selectbox("sort by", list(best_results_agg.columns)) st.markdown("Best results") st.table(best_results_agg.sort_values(sort_key, ascending=False)) st.markdown("Worst results") st.table(worst_results_agg.sort_values(sort_key, ascending=False))