Spaces:

Alignment-Lab-AI
/

orcaleaderboard

Runtime error

App Files Files Community

Alignment-Lab-AI commited on Oct 28, 2024

Commit

8efb597

verified ·

1 Parent(s): d741f98

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -9

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import gradio as gr
 import datasets
 from huggingface_hub import snapshot_download
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 from src.display.about import (
     CITATION_BUTTON_LABEL,
@@ -31,12 +32,11 @@ from src.envs import (
     HF_HOME,
 )
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
 # This controls whether a full initialization should be performed.
 DO_FULL_INIT = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
@@ -53,7 +53,6 @@ def time_diff_wrapper(func):
     return wrapper
 @time_diff_wrapper
 def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
     """Download dataset with exponential backoff retries."""
@@ -119,18 +118,85 @@ def init_space():
     return leaderboard_df, eval_queue_dfs
-# Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
-# This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
 leaderboard_df, eval_queue_dfs = init_space()
 finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
 # Data processing for plots now only on demand in the respective Gradio tab
 def load_and_create_plots():
     plot_df = create_plot_df(create_scores_df(leaderboard_df))
     return plot_df
 def init_leaderboard(dataframe):
     return Leaderboard(
         value = dataframe,
@@ -210,6 +276,5 @@ with demo:
             )
     demo.load(fn=get_latest_data_leaderboard, inputs=[leaderboard], outputs=[leaderboard])
-demo.queue(default_concurrency_limit=40).launch()

 import datasets
 from huggingface_hub import snapshot_download
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+import plotly.graph_objects as go
 from src.display.about import (
     CITATION_BUTTON_LABEL,
     HF_HOME,
 )
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.tools.plots import create_plot_df, create_scores_df
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
 # This controls whether a full initialization should be performed.
 DO_FULL_INIT = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
     return wrapper
 @time_diff_wrapper
 def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
     """Download dataset with exponential backoff retries."""
     return leaderboard_df, eval_queue_dfs
+# Initialize the space
 leaderboard_df, eval_queue_dfs = init_space()
 finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
 # Data processing for plots now only on demand in the respective Gradio tab
 def load_and_create_plots():
     plot_df = create_plot_df(create_scores_df(leaderboard_df))
     return plot_df
+def create_metric_plot_obj(df, metrics, title="Metrics Over Time"):
+    """Create plot with Open-Orca models highlighted in purple"""
+    fig = go.Figure()
+    # Add traces for each metric
+    for metric in metrics:
+        # Get the model names for this metric
+        model_names = df[f"{metric}_model"].tolist()
+        # Create masks for Open-Orca and non-Open-Orca models
+        is_open_orca = ["Open-Orca" in str(model) for model in model_names]
+        # Add trace for non-Open-Orca models
+        fig.add_trace(
+            go.Scatter(
+                x=df[df.index[~is_open_orca]],
+                y=df[metric][~is_open_orca],
+                name=metric,
+                mode='lines+markers',
+                line=dict(width=2),
+                marker=dict(size=8),
+                hovertemplate=(
+                    "Date: %{x}<br>"
+                    "Score: %{y:.2f}<br>"
+                    "Model: %{text}<br>"
+                ),
+                text=[model_names[i] for i, flag in enumerate(is_open_orca) if not flag]
+            )
+        )
+        # Add trace for Open-Orca models with purple color and larger markers
+        if any(is_open_orca):
+            fig.add_trace(
+                go.Scatter(
+                    x=df[df.index[is_open_orca]],
+                    y=df[metric][is_open_orca],
+                    name=f"{metric} (Open-Orca)",
+                    mode='lines+markers',
+                    line=dict(color='purple', width=3),
+                    marker=dict(
+                        color='purple',
+                        size=12,
+                        symbol='star'
+                    ),
+                    hovertemplate=(
+                        "Date: %{x}<br>"
+                        "Score: %{y:.2f}<br>"
+                        "Model: %{text}<br>"
+                    ),
+                    text=[model_names[i] for i, flag in enumerate(is_open_orca) if flag]
+                )
+            )
+    # Update layout
+    fig.update_layout(
+        title=title,
+        xaxis_title="Date",
+        yaxis_title="Score",
+        hovermode='x unified',
+        showlegend=True,
+        legend=dict(
+            yanchor="top",
+            y=0.99,
+            xanchor="left",
+            x=0.01
+        )
+    )
+    return fig
 def init_leaderboard(dataframe):
     return Leaderboard(
         value = dataframe,
             )
     demo.load(fn=get_latest_data_leaderboard, inputs=[leaderboard], outputs=[leaderboard])
+demo.queue(default_concurrency_limit=40).launch()