import gradio as gr
import pandas as pd
import plotly.express as px
from dataclasses import dataclass, field
from typing import List, Dict, Tuple, Union
import json
import os
from collections import OrderedDict
import re
@dataclass
class ScorecardCategory:
name: str
questions: List[Dict[str, Union[str, List[str]]]]
scores: Dict[str, int] = field(default_factory=dict)
def extract_category_number(category_name: str) -> int:
"""Extract the category number from the category name."""
match = re.match(r'^(\d+)\.?\s*.*$', category_name)
return int(match.group(1)) if match else float('inf')
def sort_categories(categories):
"""Sort categories by their numeric prefix."""
return sorted(categories, key=extract_category_number)
# def load_scorecard_templates(directory):
# templates = []
# for filename in os.listdir(directory):
# if filename.endswith('.json'):
# with open(os.path.join(directory, filename), 'r') as file:
# data = json.load(file)
# templates.append(ScorecardCategory(
# name=data['name'],
# questions=data['questions']
# ))
# return templates
def create_category_summary(category_data):
"""Create a summary section for a category"""
# Calculate statistics
total_sections = len(category_data)
completed_sections = sum(1 for section in category_data.values() if section['status'] == 'Yes')
na_sections = sum(1 for section in category_data.values() if section['status'] == 'N/A')
# Calculate completion rates
total_questions = 0
completed_questions = 0
evaluation_types = set()
has_human_eval = False
has_quantitative = False
has_documentation = False
for section in category_data.values():
if section['status'] != 'N/A':
questions = section.get('questions', {})
total_questions += len(questions)
completed_questions += sum(1 for q in questions.values() if q)
# Check for evaluation types
for question in questions.keys():
if 'human' in question.lower():
has_human_eval = True
if any(term in question.lower() for term in ['quantitative', 'metric', 'benchmark']):
has_quantitative = True
if 'documentation' in question.lower():
has_documentation = True
completion_rate = (completed_questions / total_questions * 100) if total_questions > 0 else 0
# Create summary HTML
html = "
"
html += "
📊 Section Summary
"
# Completion metrics
html += "
"
html += "
📈 Completion Metrics
"
html += f"
Overall Completion Rate: {completion_rate:.1f}%
"
html += f"
Sections Completed: {completed_sections}/{total_sections}
"
html += "
"
# Evaluation Coverage
html += "
"
html += "
🎯 Evaluation Coverage
"
html += "
"
html += f"
👥 Human Evaluation
"
html += f"
📊 Quantitative Analysis
"
html += f"
📝 Documentation
"
html += "
"
html += "
"
# Status Breakdown
html += "
"
html += "
📋 Status Breakdown
"
html += create_status_pills(category_data)
html += "
"
html += "
"
return html
def create_overall_summary(model_data, selected_categories):
"""Create a comprehensive summary of all categories"""
scores = model_data['scores']
# Initialize counters
total_sections = 0
completed_sections = 0
na_sections = 0
total_questions = 0
completed_questions = 0
# Track evaluation types across all categories
evaluation_types = {
'human': 0,
'quantitative': 0,
'documentation': 0,
'monitoring': 0,
'transparency': 0
}
# Calculate completion rates for categories
category_completion = {}
# Process all categories
for category, category_data in scores.items():
if category not in selected_categories:
continue # Skip unselected categories
category_questions = 0
category_completed = 0
category_na = 0
total_sections_in_category = len(category_data)
na_sections_in_category = sum(1 for section in category_data.values() if section['status'] == 'N/A')
for section in category_data.values():
total_sections += 1
if section['status'] == 'Yes':
completed_sections += 1
elif section['status'] == 'N/A':
na_sections += 1
category_na += 1
if section['status'] != 'N/A':
questions = section.get('questions', {})
section_total = len(questions)
section_completed = sum(1 for q in questions.values() if q)
total_questions += section_total
completed_questions += section_completed
category_questions += section_total
category_completed += section_completed
# Check for evaluation types
for question in questions.keys():
if 'human' in question.lower():
evaluation_types['human'] += 1
if any(term in question.lower() for term in ['quantitative', 'metric', 'benchmark']):
evaluation_types['quantitative'] += 1
if 'documentation' in question.lower():
evaluation_types['documentation'] += 1
if 'monitoring' in question.lower():
evaluation_types['monitoring'] += 1
if 'transparency' in question.lower():
evaluation_types['transparency'] += 1
# Store category information
is_na = na_sections_in_category == total_sections_in_category
completion_rate = (category_completed / category_questions * 100) if category_questions > 0 and not is_na else 0
category_completion[category] = {
'completion_rate': completion_rate,
'is_na': is_na
}
# Create summary HTML
html = ""
html += "
📊 Overall Model Evaluation Summary
"
# Key metrics section
html += "
"
# Overall completion metrics
html += "
"
html += "
📈 Overall Completion
"
completion_rate = (completed_questions / total_questions * 100) if total_questions > 0 else 0
html += f"
Overall Completion Rate: {completion_rate:.1f}%
"
html += f"
Sections Completed: {completed_sections}/{total_sections}
"
html += f"
Questions Completed: {completed_questions}/{total_questions}
"
html += "
"
# Evaluation coverage
html += "
"
html += "
🎯 Evaluation Types Coverage
"
html += "
"
for eval_type, count in evaluation_types.items():
icon = {
'human': '👥',
'quantitative': '📊',
'documentation': '📝',
'monitoring': '📡',
'transparency': '🔍'
}.get(eval_type, '❓')
has_coverage = count > 0
html += f"
{icon} {eval_type.title()}
"
html += "
"
html += "
"
html += "
" # End summary-grid
# Category breakdown
html += "
"
html += "
📋 Category Completion Breakdown
"
html += "
"
# Sort and filter categories
sorted_categories = [cat for cat in sort_categories(scores.keys()) if cat in selected_categories]
for category in sorted_categories:
info = category_completion[category]
category_name = category.split('. ', 1)[1] if '. ' in category else category
# Determine display text and style
if info['is_na']:
completion_text = "N/A"
bar_width = "0"
style_class = "na"
else:
completion_text = f"{info['completion_rate']:.1f}%"
bar_width = f"{info['completion_rate']}"
style_class = "active"
html += f"""
"""
html += "
"
html += "
" # End overall-summary-card
return html
def get_coverage_class(has_feature):
"""Return CSS class based on feature presence"""
return 'covered' if has_feature else 'not-covered'
def create_status_pills(category_data):
"""Create status pill indicators"""
status_counts = {'Yes': 0, 'No': 0, 'N/A': 0}
for section in category_data.values():
status_counts[section['status']] += 1
html = ""
for status, count in status_counts.items():
html += f"
{status}: {count}
"
html += "
"
return html
def get_modality_icon(modality):
"""Return an emoji icon for each modality type."""
icons = {
"Text-to-Text": "📝", # Memo icon for text-to-text
"Text-to-Image": "🎨", # Artist palette for text-to-image
"Image-to-Text": "🔍", # Magnifying glass for image-to-text
"Image-to-Image": "🖼️", # Frame for image-to-image
"Audio": "🎵", # Musical note for audio
"Video": "🎬", # Clapper board for video
"Multimodal": "🔄" # Cycle arrows for multimodal
}
return icons.get(modality, "💫") # Default icon if modality not found
def create_metadata_card(metadata):
"""Create a formatted HTML card for metadata."""
html = ""
return html
def load_models_from_json(directory):
models = {}
for filename in os.listdir(directory):
if filename.endswith('.json'):
with open(os.path.join(directory, filename), 'r') as file:
model_data = json.load(file)
model_name = model_data['metadata']['Name']
models[model_name] = model_data
return OrderedDict(sorted(models.items(), key=lambda x: x[0].lower()))
# Load templates and models
# scorecard_template = load_scorecard_templates('scorecard_templates')
models = load_models_from_json('model_data')
def create_source_html(sources):
if not sources:
return ""
html = ""
for source in sources:
icon = source.get("type", "")
detail = source.get("detail", "")
name = source.get("name", detail)
html += f"
{icon} "
if detail.startswith("http"):
html += f"
{name}"
else:
html += name
html += "
"
html += "
"
return html
def create_leaderboard(selected_categories):
scores = []
for model, data in models.items():
total_score = 0
total_questions = 0
score_by_category = {}
# Calculate scores by category
for category_name, category in data['scores'].items():
category_score = 0
category_total = 0
for section in category.values():
if section['status'] != 'N/A':
questions = section.get('questions', {})
category_score += sum(1 for q in questions.values() if q)
category_total += len(questions)
if category_total > 0:
score_by_category[category_name] = (category_score / category_total) * 100
total_score += category_score
total_questions += category_total
# Calculate overall score
score_percentage = (total_score / total_questions * 100) if total_questions > 0 else 0
# Get model type
model_type = data['metadata'].get('Type', 'Unknown')
# Create entry with numerical scores
model_entry = {
'AI System': model,
'Type': model_type,
'Overall Completion Rate': score_percentage
}
# Add selected category scores with emojis
category_map = {
'1. Bias, Stereotypes, and Representational Harms Evaluation': '⚖️ Bias and Fairness',
'2. Cultural Values and Sensitive Content Evaluation': '🌍 Cultural Values',
'3. Disparate Performance Evaluation': '📊 Disparate Performance',
'4. Environmental Costs and Carbon Emissions Evaluation': '🌱 Environmental Impact',
'5. Privacy and Data Protection Evaluation': '🔒 Privacy',
'6. Financial Costs Evaluation': '💰 Financial Costs',
'7. Data and Content Moderation Labor Evaluation': '👥 Labor Practices'
}
for full_cat_name, display_name in category_map.items():
if full_cat_name in selected_categories:
score = score_by_category.get(full_cat_name, 0)
model_entry[display_name] = score
scores.append(model_entry)
# Convert to DataFrame
df = pd.DataFrame(scores)
# Sort by Overall Completion Rate descending
df = df.sort_values('Overall Completion Rate', ascending=False)
# Add rank column based on current sort
df.insert(0, 'Rank', range(1, len(df) + 1))
# Format scores with % after sorting
numeric_columns = ['Overall Completion Rate'] + list(category_map.values())
for col in df.columns:
if col in numeric_columns:
df[col] = df[col].apply(lambda x: f"{x:.1f}%")
return df
first_model = next(iter(models.values()))
category_choices = list(first_model['scores'].keys())
with gr.Column(visible=True) as leaderboard_tab:
leaderboard_output = gr.DataFrame(
value=create_leaderboard(category_choices), # Initialize with all categories selected
interactive=False,
wrap=True
)
def create_category_chart(selected_models, selected_categories):
if not selected_models:
return px.bar(title='Please select at least one model for comparison')
# Sort categories before processing
selected_categories = sort_categories(selected_categories)
data = []
for model in selected_models:
for category in selected_categories:
if category in models[model]['scores']:
total_score = 0
total_questions = 0
for section in models[model]['scores'][category].values():
if section['status'] != 'N/A':
questions = section.get('questions', {})
total_score += sum(1 for q in questions.values() if q)
total_questions += len(questions)
score_percentage = (total_score / total_questions * 100) if total_questions > 0 else 0
data.append({
'Model': model,
'Category': category,
'Score Percentage': score_percentage
})
df = pd.DataFrame(data)
if df.empty:
return px.bar(title='No data available for the selected models and categories')
fig = px.bar(df, x='Model', y='Score Percentage', color='Category',
title='AI Model Scores by Category',
labels={'Score Percentage': 'Score Percentage'},
category_orders={"Category": selected_categories})
return fig
def update_detailed_scorecard(model, selected_categories):
if not model:
return [
gr.update(value="Please select a model to view details.", visible=True),
gr.update(visible=False),
gr.update(visible=False)
]
selected_categories = sort_categories(selected_categories)
metadata_html = create_metadata_card(models[model]['metadata'])
overall_summary_html = create_overall_summary(models[model], selected_categories)
# Combine metadata and overall summary
combined_header = metadata_html + overall_summary_html
total_yes = 0
total_no = 0
total_na = 0
has_non_na = False
# Create category cards
all_cards_content = ""
for category_name in selected_categories:
if category_name in models[model]['scores']:
category_data = models[model]['scores'][category_name]
card_content = f"
{category_name}
"
# Add category-specific summary at the top of each card
card_content += create_category_summary(category_data)
# Sort sections within each category
sorted_sections = sorted(category_data.items(),
key=lambda x: float(re.match(r'^(\d+\.?\d*)', x[0]).group(1)))
category_yes = 0
category_no = 0
category_na = 0
for section, details in sorted_sections:
status = details['status']
if status != 'N/A':
has_non_na = True
sources = details.get('sources', [])
questions = details.get('questions', {})
section_class = "section-na" if status == "N/A" else "section-active"
status_class = status.lower()
status_icon = "●" if status == "Yes" else "○" if status == "N/A" else "×"
card_content += f"
"
card_content += f""
if sources:
card_content += "
"
for source in sources:
icon = source.get("type", "")
detail = source.get("detail", "")
name = source.get("name", detail)
card_content += f"
{icon} "
if detail.startswith("http"):
card_content += f"
{name}"
else:
card_content += name
card_content += "
"
card_content += "
"
if questions:
yes_count = sum(1 for v in questions.values() if v)
total_count = len(questions)
card_content += "
"
if status == "N/A":
card_content += f"View {total_count} N/A items
"
else:
card_content += f"View details ({yes_count}/{total_count} completed)
"
card_content += ""
for question, is_checked in questions.items():
if status == "N/A":
style_class = "na"
icon = "○"
category_na += 1
total_na += 1
else:
if is_checked:
style_class = "checked"
icon = "✓"
category_yes += 1
total_yes += 1
else:
style_class = "unchecked"
icon = "✗"
category_no += 1
total_no += 1
card_content += f"
{icon} {question}
"
card_content += "
"
card_content += "
"
if category_yes + category_no > 0:
category_score = category_yes / (category_yes + category_no) * 100
card_content += f"
Completion Score Breakdown: {category_score:.2f}% Yes: {category_yes}, No: {category_no}, N/A: {category_na}
"
elif category_na > 0:
card_content += f"
Completion Score Breakdown: N/A (All {category_na} items not applicable)
"
card_content += "
"
all_cards_content += card_content
all_cards_content += "
"
# Create total score
if not has_non_na:
total_score_md = "No applicable scores (all items N/A)
"
elif total_yes + total_no > 0:
total_score = total_yes / (total_yes + total_no) * 100
total_score_md = f"Total Score: {total_score:.2f}% (Yes: {total_yes}, No: {total_no}, N/A: {total_na})
"
else:
total_score_md = "No applicable scores (all items N/A)
"
return [
gr.update(value=combined_header, visible=True),
gr.update(value=all_cards_content, visible=True),
gr.update(value=total_score_md, visible=True)
]
css = """
.container {
display: flex;
flex-wrap: wrap;
justify-content: space-between;
}
.container.svelte-1hfxrpf.svelte-1hfxrpf {
height: 0%;
}
.card {
width: calc(50% - 20px);
border: 1px solid #e0e0e0;
border-radius: 10px;
padding: 20px;
margin-bottom: 20px;
background-color: #ffffff;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
transition: all 0.3s ease;
}
.card:hover {
box-shadow: 0 6px 8px rgba(0,0,0,0.15);
transform: translateY(-5px);
}
.card-title {
font-size: 1.4em;
font-weight: bold;
margin-bottom: 15px;
color: #333;
border-bottom: 2px solid #e0e0e0;
padding-bottom: 10px;
}
.sources-list {
margin: 10px 0;
}
.source-item {
margin: 5px 0;
padding: 5px;
background-color: #f8f9fa;
border-radius: 4px;
}
.question-item {
margin: 5px 0;
padding: 8px;
border-radius: 4px;
}
.question-item.checked {
background-color: #e6ffe6;
}
.question-item.unchecked {
background-color: #ffe6e6;
}
.category-score, .total-score {
background-color: #f0f8ff;
border: 1px solid #b0d4ff;
border-radius: 5px;
padding: 10px;
margin-top: 15px;
font-weight: bold;
text-align: center;
}
.total-score {
font-size: 1.2em;
background-color: #e6f3ff;
border-color: #80bdff;
}
.leaderboard-card {
width: 100%;
max-width: 800px;
margin: 0 auto;
}
.leaderboard-table {
width: 100%;
border-collapse: collapse;
}
.leaderboard-table th, .leaderboard-table td {
padding: 10px;
text-align: left;
border-bottom: 1px solid #e0e0e0;
}
.leaderboard-table th {
background-color: #f2f2f2;
font-weight: bold;
}
.section {
margin-bottom: 20px;
padding: 15px;
border-radius: 5px;
background-color: #f8f9fa;
}
@media (max-width: 768px) {
.card {
width: 100%;
}
}
.dark {
background-color: #1a1a1a;
color: #e0e0e0;
.card {
background-color: #2a2a2a;
border-color: #444;
}
.card-title {
color: #fff;
border-bottom-color: #444;
}
.source-item {
background-color: #2a2a2a;
}
.question-item.checked {
background-color: #1a3a1a;
}
.question-item.unchecked {
background-color: #3a1a1a;
}
.section {
background-color: #2a2a2a;
}
.category-score, .total-score {
background-color: #2c3e50;
border-color: #34495e;
}
.leaderboard-table th {
background-color: #2c3e50;
}
}
.section-na {
opacity: 0.6;
}
.question-item.na {
background-color: #f0f0f0;
color: #666;
}
.dark .question-item.na {
background-color: #2d2d2d;
color: #999;
}
.section-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 10px;
}
.status-badge {
font-size: 0.9em;
padding: 4px 8px;
border-radius: 12px;
font-weight: 500;
}
.status-badge.yes {
background-color: #e6ffe6;
color: #006600;
}
.status-badge.no {
background-color: #ffe6e6;
color: #990000;
}
.status-badge.n\/a {
background-color: #f0f0f0;
color: #666666;
}
.question-accordion {
margin-top: 10px;
}
.question-accordion summary {
cursor: pointer;
padding: 8px;
background-color: #f8f9fa;
border-radius: 4px;
margin-bottom: 10px;
font-weight: 500;
}
.question-accordion summary:hover {
background-color: #e9ecef;
}
.dark .status-badge.yes {
background-color: #1a3a1a;
color: #90EE90;
}
.dark .status-badge.no {
background-color: #3a1a1a;
color: #FFB6B6;
}
.dark .status-badge.n\/a {
background-color: #2d2d2d;
color: #999999;
}
.dark .question-accordion summary {
background-color: #2a2a2a;
}
.dark .question-accordion summary:hover {
background-color: #333333;
}
.metadata-card {
margin-bottom: 30px;
width: 100% !important;
}
.metadata-content {
display: flex;
flex-direction: column;
gap: 12px;
}
.metadata-row {
display: flex;
align-items: flex-start;
gap: 10px;
line-height: 1.5;
}
.metadata-label {
font-weight: 600;
min-width: 100px;
color: #555;
}
.metadata-value {
color: #333;
}
.metadata-link {
color: #007bff;
text-decoration: none;
}
.metadata-link:hover {
text-decoration: underline;
}
.modality-container {
display: flex;
flex-wrap: wrap;
gap: 8px;
}
.modality-badge {
display: inline-flex;
align-items: center;
gap: 4px;
padding: 4px 10px;
background-color: #f0f7ff;
border: 1px solid #cce3ff;
border-radius: 15px;
font-size: 0.9em;
color: #0066cc;
}
.dark .metadata-label {
color: #aaa;
}
.dark .metadata-value {
color: #ddd;
}
.dark .metadata-link {
color: #66b3ff;
}
.dark .modality-badge {
background-color: #1a2733;
border-color: #2c3e50;
color: #99ccff;
}
.summary-card {
background-color: #f8f9fa;
border: 1px solid #e0e0e0;
border-radius: 8px;
padding: 16px;
margin-bottom: 20px;
}
.summary-title {
font-size: 1.2em;
font-weight: bold;
margin-bottom: 12px;
color: #333;
}
.summary-section {
margin-bottom: 16px;
}
.summary-subtitle {
font-size: 1em;
font-weight: 600;
color: #555;
margin-bottom: 8px;
}
.metric-row {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 4px;
}
.metric-label {
color: #666;
}
.metric-value {
font-weight: 600;
color: #333;
}
.coverage-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 8px;
margin-top: 8px;
}
.coverage-item {
padding: 8px;
border-radius: 6px;
text-align: center;
font-size: 0.9em;
}
.coverage-item.covered {
background-color: #e6ffe6;
color: #006600;
border: 1px solid #b3ffb3;
}
.coverage-item.not-covered {
background-color: #f5f5f5;
color: #666;
border: 1px solid #ddd;
}
.status-pills {
display: flex;
gap: 8px;
flex-wrap: wrap;
}
.status-pill {
padding: 4px 12px;
border-radius: 16px;
font-size: 0.9em;
font-weight: 500;
}
.status-pill.yes {
background-color: #e6ffe6;
color: #006600;
border: 1px solid #b3ffb3;
}
.status-pill.no {
background-color: #ffe6e6;
color: #990000;
border: 1px solid #ffb3b3;
}
.status-pill.n\\/a {
background-color: #f5f5f5;
color: #666;
border: 1px solid #ddd;
}
.dark .summary-card {
background-color: #2a2a2a;
border-color: #444;
}
.dark .summary-title,
.dark .summary-subtitle {
color: #e0e0e0;
}
.dark .metric-label {
color: #999;
}
.dark .metric-value {
color: #fff;
}
.dark .coverage-item.covered {
background-color: #1a3a1a;
color: #90EE90;
border-color: #2d5a2d;
}
.dark .coverage-item.not-covered {
background-color: #333;
color: #999;
border-color: #444;
}
.dark .status-pill.yes {
background-color: #1a3a1a;
color: #90EE90;
border-color: #2d5a2d;
}
.dark .status-pill.no {
background-color: #3a1a1a;
color: #FFB6B6;
border-color: #5a2d2d;
}
.dark .status-pill.n\\/a {
background-color: #333;
color: #999;
border-color: #444;
}
.overall-summary-card {
width: 100% !important;
margin-bottom: 30px;
}
.summary-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 20px;
margin-bottom: 20px;
}
.category-completion-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 16px;
margin-top: 12px;
}
.category-completion-item {
display: flex;
flex-direction: column;
background-color: #f8f9fa;
border-radius: 8px;
padding: 12px;
min-height: 86px; /* Set consistent height */
}
.category-name {
flex: 1;
font-size: 0.9em;
font-weight: 500;
color: #555;
margin-bottom: 8px;
line-height: 1.3;
}
.completion-bar-container {
height: 24px;
background-color: #eee;
border-radius: 12px;
position: relative;
overflow: hidden;
margin-top: auto; /* Push to bottom */
}
.completion-bar {
height: 100%;
background-color: #4CAF50;
transition: width 0.3s ease;
}
.completion-text {
position: absolute;
right: 8px;
top: 50%;
transform: translateY(-50%);
font-size: 0.8em;
font-weight: 600;
color: #333;
}
/* Dark mode adjustments */
.dark .category-completion-item {
background-color: #2a2a2a;
}
.dark .category-name {
color: #ccc;
}
.dark .completion-bar-container {
background-color: #333;
}
.dark .completion-bar {
background-color: #2e7d32;
}
.dark .completion-text {
color: #fff;
}
.completion-bar-container.na {
background-color: #f0f0f0;
}
.completion-bar-container.na .completion-bar {
background-color: #999;
width: 0 !important;
}
.dark .completion-bar-container.na {
background-color: #2d2d2d;
}
.dark .completion-bar-container.na .completion-bar {
background-color: #666;
}
.leaderboard-filters {
margin-bottom: 20px;
padding: 15px;
background-color: #f8f9fa;
border-radius: 8px;
}
.dark .leaderboard-filters {
background-color: #2a2a2a;
}
.filter-group {
margin-bottom: 10px;
}
.filter-label {
font-weight: 600;
margin-bottom: 5px;
display: block;
}
.score-column {
background-color: #f0f7ff;
}
.dark .score-column {
background-color: #1a2733;
}
.metric-header {
font-size: 0.9em;
color: #666;
text-align: center;
}
.dark .metric-header {
color: #aaa;
}
.table-container {
overflow-x: auto;
}
.leaderboard-table td {
white-space: nowrap;
}
.score-cell {
text-align: right;
padding-right: 15px !important;
}
.model-cell {
max-width: 300px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.leaderboard-table {
width: 100%;
border-collapse: collapse;
}
.leaderboard-table th,
.leaderboard-table td {
padding: 10px;
text-align: left;
border: 1px solid #e0e0e0;
}
.dark .leaderboard-table th,
.dark .leaderboard-table td {
border-color: #444;
}
.leaderboard-table th {
background-color: #f2f2f2;
font-weight: bold;
}
.dark .leaderboard-table th {
background-color: #2c3e50;
}
.leaderboard-table tr:hover {
background-color: #f5f5f5;
}
.dark .leaderboard-table tr:hover {
background-color: #2d2d2d;
}
"""
first_model = next(iter(models.values()))
category_choices = list(first_model['scores'].keys())
with gr.Blocks(css=css) as demo:
gr.Markdown("# AI Model Social Impact Scorecard Dashboard")
with gr.Row():
tab_selection = gr.Radio(["Leaderboard", "Category Analysis", "Detailed Scorecard"],
label="Select Tab", value="Leaderboard")
with gr.Row():
model_chooser = gr.Dropdown(choices=[""] + list(models.keys()),
label="Select Model for Details",
value="",
interactive=True, visible=False)
model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
label="Select Models for Comparison",
value=[],
multiselect=True,
interactive=True,
visible=False,
info="Select one or more models")
# Category filter now visible for all tabs
category_filter = gr.CheckboxGroup(choices=category_choices,
label="Filter Categories",
value=category_choices)
with gr.Column(visible=True) as leaderboard_tab:
leaderboard_output = gr.DataFrame(
value=create_leaderboard(category_filter.value), # Initialize with all categories selected
interactive=False,
wrap=True
)
with gr.Column(visible=False) as category_analysis_tab:
category_chart = gr.Plot()
with gr.Column(visible=False) as detailed_scorecard_tab:
model_metadata = gr.HTML()
all_category_cards = gr.HTML()
total_score = gr.Markdown()
# Initialize the dashboard
def init_leaderboard():
df = create_leaderboard(category_filter.value)
return df
leaderboard_output.value = init_leaderboard()
# Update handlers
def update_dashboard(tab, selected_models, selected_model, selected_categories):
leaderboard_visibility = gr.update(visible=False)
category_chart_visibility = gr.update(visible=False)
detailed_scorecard_visibility = gr.update(visible=False)
model_chooser_visibility = gr.update(visible=False)
model_multi_chooser_visibility = gr.update(visible=False)
if tab == "Leaderboard":
leaderboard_visibility = gr.update(visible=True)
df = create_leaderboard(selected_categories)
return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
model_chooser_visibility, model_multi_chooser_visibility,
gr.update(value=df), gr.update(), gr.update(), gr.update(), gr.update()]
elif tab == "Category Analysis":
category_chart_visibility = gr.update(visible=True)
model_multi_chooser_visibility = gr.update(visible=True)
category_filter_visibility = gr.update(visible=True)
category_plot = create_category_chart(selected_models or [], selected_categories)
return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
None, gr.update(value=category_plot), gr.update(), gr.update(), gr.update()]
elif tab == "Detailed Scorecard":
detailed_scorecard_visibility = gr.update(visible=True)
model_chooser_visibility = gr.update(visible=True)
category_filter_visibility = gr.update(visible=True)
if selected_model:
scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
else:
scorecard_updates = [
gr.update(value="Please select a model to view details.", visible=True),
gr.update(visible=False),
gr.update(visible=False)
]
return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
None, None] + scorecard_updates
# Set up event handlers
tab_selection.change(
fn=update_dashboard,
inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
model_chooser, model_multi_chooser,
leaderboard_output, category_chart, model_metadata,
all_category_cards, total_score]
)
model_chooser.change(
fn=update_dashboard,
inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
model_chooser, model_multi_chooser, category_filter,
leaderboard_output, category_chart, model_metadata,
all_category_cards, total_score]
)
model_multi_chooser.change(
fn=update_dashboard,
inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
model_chooser, model_multi_chooser, category_filter,
leaderboard_output, category_chart, model_metadata,
all_category_cards, total_score]
)
category_filter.change(
fn=update_dashboard,
inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
model_chooser, model_multi_chooser,
leaderboard_output, category_chart, model_metadata,
all_category_cards, total_score]
)
# Launch the app
if __name__ == "__main__":
demo.launch()