import re import streamlit as st import requests import pandas as pd from io import StringIO import plotly.graph_objs as go import plotly.express as px def convert_markdown_table_to_dataframe(md_content): """ Converts a markdown table to a Pandas DataFrame, handling special characters, links, and extracting Hugging Face URLs. """ cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE) df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python') df = df.drop(0, axis=0) # Remove first row if it's not the header df.columns = df.columns.str.strip() # Clean column names # Extract Model names and URLs model_link_pattern = r'\[(.*?)\]\((.*?)\)' df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None) df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x)) return df def create_bar_chart(df, metric, color_map, key_suffix): """ Creates and displays a bar chart for a given metric. """ st.write(f"### {metric} Scores") if metric not in df.columns: st.write(f"No data available for {metric}.") return sorted_df = df[['Model', metric]].dropna().sort_values(by=metric, ascending=True) fig = go.Figure(go.Bar( x=sorted_df[metric], y=sorted_df['Model'], orientation='h', marker=dict(color=sorted_df[metric], colorscale=color_map) )) fig.update_layout(margin=dict(l=20, r=20, t=20, b=20)) st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}") def create_radar_chart(df, metric_columns): """ Create a radar chart for the top 10 models by "Average" score. """ st.write("### Radar Chart (Top 10 Models by Average Score)") if 'Average' not in df.columns: st.write("Average column not found.") return top_10_df = df.nlargest(10, 'Average') if top_10_df.empty: st.write("No models available for the radar chart.") return radar_data = top_10_df[['Model'] + metric_columns].set_index('Model') fig = go.Figure() for model_name, row in radar_data.iterrows(): fig.add_trace(go.Scatterpolar( r=row.values, theta=metric_columns, fill='toself', name=model_name )) fig.update_layout( polar=dict( radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()]) # Dynamic range ), showlegend=True ) st.plotly_chart(fig, use_container_width=True, key="radar_chart") def main(): st.set_page_config(page_title="SLM Leaderboard", layout="wide") st.title("🏆 SLM Leaderboard") st.markdown("We record Nous and Standard benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to inlcude your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard) ") # URL to your markdown file md_url = st.text_input("This the default location of the bechmarks and can be changed", "https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md") st.markdown(""" Copy the following links into the textbox above and refresh dashboard: - [Nous benchmark results](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md) - [Standard LLM benchmarks](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md) """) if not md_url: st.error("Please provide a valid URL to a markdown file containing the leaderboard table.") return try: response = requests.get(md_url) response.raise_for_status() md_content = response.text df = convert_markdown_table_to_dataframe(md_content) # Automatically detect metrics (all columns except 'Model' and 'URL') metric_columns = [col for col in df.columns if col not in ['Model', 'URL']] # Convert metric columns to numeric, handling errors gracefully for col in metric_columns: df[col] = pd.to_numeric(df[col], errors='coerce') # Calculate "Average" score as a new column (exclude 'Model' and 'URL') df['Average'] = df[metric_columns].mean(axis=1, skipna=True) if 'Average' not in metric_columns: metric_columns.append('Average') # Dropdown to select color map color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis']) # Sortable leaderboard table st.dataframe( df[['Model'] + metric_columns + ['URL']], use_container_width=True, hide_index=True, ) # Bar charts for each metric for i, metric in enumerate(metric_columns): create_bar_chart(df, metric, color_map, key_suffix=i) # Extra bar chart for the "Average" score create_bar_chart(df, 'Average', color_map, key_suffix="average") # Radar chart for the top 10 models by "Average" score create_radar_chart(df, metric_columns) except Exception as e: st.error(f"An error occurred while processing the markdown table: {e}") if __name__ == "__main__": main()