Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -1,45 +1,12 @@
-# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
-__pycache__
-.cache/
-# dependencies
-frontend/node_modules
-/.pnp
-.pnp.js
-# testing
-/coverage
-# production
-/build
-# misc
-.DS_Store
-.env.local
-.env.development.local
-.env.test.local
-.env.production.local
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log\*
-src/dataframe.json
-yarn.lock
-package-lock.json
-/public
-.claudesync/
-# Environment variables
 .env
-.env.*
-!.env.example

+venv/
+__pycache__/
 .env
+.ipynb_checkpoints
+*ipynb
+.vscode/
+eval-queue/
+eval-results/
+dynamic-info/
+src/assets/model_counts.html

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+default_language_version:
+  python: python3
+ci:
+  autofix_prs: true
+  autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
+  autoupdate_schedule: quarterly
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: check-yaml
+      - id: check-case-conflict
+      - id: detect-private-key
+      - id: check-added-large-files
+        args: ['--maxkb=1000']
+      - id: requirements-txt-fixer
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        name: Format imports
+  - repo: https://github.com/psf/black
+    rev: 22.12.0
+    hooks:
+      - id: black
+        name: Format code
+        additional_dependencies: ['click==8.0.2']
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    # Ruff version.
+    rev: 'v0.0.267'
+    hooks:
+      - id: ruff

Dockerfile DELETED Viewed

@@ -1,62 +0,0 @@
-# Build frontend
-FROM node:18 as frontend-build
-WORKDIR /app
-COPY frontend/package*.json ./
-RUN npm install
-COPY frontend/ ./
-RUN npm run build
-# Build backend
-FROM python:3.12-slim
-WORKDIR /app
-# Create non-root user
-RUN useradd -m -u 1000 user
-# Install poetry
-RUN pip install poetry
-# Create and configure cache directory
-RUN mkdir -p /app/.cache && \
-    chown -R user:user /app
-# Copy and install backend dependencies
-COPY backend/pyproject.toml backend/poetry.lock* ./
-RUN poetry config virtualenvs.create false \
-    && poetry install --no-interaction --no-ansi --no-root --only main
-# Copy backend code
-COPY backend/ .
-# Install Node.js and npm
-RUN apt-get update && apt-get install -y \
-    curl \
-    netcat-openbsd \
-    && curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
-    && apt-get install -y nodejs \
-    && rm -rf /var/lib/apt/lists/*
-# Copy frontend server and build
-COPY --from=frontend-build /app/build ./frontend/build
-COPY --from=frontend-build /app/package*.json ./frontend/
-COPY --from=frontend-build /app/server.js ./frontend/
-# Install frontend production dependencies
-WORKDIR /app/frontend
-RUN npm install --production
-WORKDIR /app
-# Environment variables
-ENV HF_HOME=/app/.cache \
-    HF_DATASETS_CACHE=/app/.cache \
-    INTERNAL_API_PORT=7861 \
-    PORT=7860 \
-    NODE_ENV=production
-# Note: HF_TOKEN should be provided at runtime, not build time
-USER user
-EXPOSE 7860
-# Start both servers with wait-for
-CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]

Makefile ADDED Viewed

	@@ -0,0 +1,13 @@

+.PHONY: style format
+style:
+	python -m black --line-length 119 .
+	python -m isort .
+	ruff check --fix .
+quality:
+	python -m black --check --line-length 119 .
+	python -m isort --check-only .
+	ruff check .

README.md CHANGED Viewed

@@ -1,85 +1,23 @@
 ---
 title: Open LLM Leaderboard
 emoji: 🏆
-colorFrom: blue
-colorTo: red
-sdk: docker
-hf_oauth: true
 pinned: true
 license: apache-2.0
-duplicated_from: open-llm-leaderboard/open_llm_leaderboard
 tags:
 - leaderboard
 short_description: Track, rank and evaluate open LLMs and chatbots
 ---
-# Open LLM Leaderboard
-Modern React interface for comparing Large Language Models (LLMs) in an open and reproducible way.
-## Features
-- 📊 Interactive table with advanced sorting and filtering
-- 🔍 Semantic model search
-- 📌 Pin models for comparison
-- 📱 Responsive and modern interface
-- 🎨 Dark/Light mode
-- ⚡️ Optimized performance with virtualization
-## Architecture
-The project is split into two main parts:
-### Frontend (React)
-```
-frontend/
-├── src/
-│   ├── components/     # Reusable UI components
-│   ├── pages/         # Application pages
-│   ├── hooks/         # Custom React hooks
-│   ├── context/       # React contexts
-│   └── constants/     # Constants and configurations
-├── public/            # Static assets
-└── server.js          # Express server for production
-```
-### Backend (FastAPI)
-```
-backend/
-├── app/
-│   ├── api/           # API router and endpoints
-│   │   └── endpoints/ # Specific API endpoints
-│   ├── core/          # Core functionality
-│   ├── config/        # Configuration
-│   └── services/      # Business logic services
-│       ├── leaderboard.py
-│       ├── models.py
-│       ├── votes.py
-│       └── hf_service.py
-└── utils/             # Utility functions
-```
-## Technologies
-### Frontend
-- React
-- Material-UI
-- TanStack Table & Virtual
-- Express.js
-### Backend
-- FastAPI
-- Hugging Face API
-- Docker
-## Development
-The application is containerized using Docker and can be run using:
-```bash
-docker-compose up
-```

 ---
 title: Open LLM Leaderboard
 emoji: 🏆
+colorFrom: green
+colorTo: indigo
+sdk: gradio
+sdk_version: 4.9.0
+app_file: app.py
 pinned: true
 license: apache-2.0
+duplicated_from: HuggingFaceH4/open_llm_leaderboard
+fullWidth: true
+space_ci:
+  private: true
+  secrets:
+  - HF_TOKEN
+  - H4_TOKEN
 tags:
 - leaderboard
 short_description: Track, rank and evaluate open LLMs and chatbots
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,448 @@

+import gradio as gr
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
+from gradio_space_ci import enable_space_ci
+from src.display.about import (
+    CITATION_BUTTON_LABEL,
+    CITATION_BUTTON_TEXT,
+    EVALUATION_QUEUE_TEXT,
+    INTRODUCTION_TEXT,
+    LLM_BENCHMARKS_TEXT,
+    FAQ_TEXT,
+    TITLE,
+)
+from src.display.css_html_js import custom_css
+from src.display.utils import (
+    BENCHMARK_COLS,
+    COLS,
+    EVAL_COLS,
+    EVAL_TYPES,
+    NUMERIC_INTERVALS,
+    TYPES,
+    AutoEvalColumn,
+    ModelType,
+    fields,
+    WeightType,
+    Precision
+)
+from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
+from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import add_new_eval
+from src.scripts.update_all_request_files import update_dynamic_files
+from src.tools.collections import update_collections
+from src.tools.plots import (
+    create_metric_plot_obj,
+    create_plot_df,
+    create_scores_df,
+)
+# Start ephemeral Spaces on PRs (see config in README.md)
+#enable_space_ci()
+def restart_space():
+    API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
+def init_space(full_init: bool = True):
+    if full_init:
+        try:
+            print(EVAL_REQUESTS_PATH)
+            snapshot_download(
+                repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+            )
+        except Exception:
+            restart_space()
+        try:
+            print(DYNAMIC_INFO_PATH)
+            snapshot_download(
+                repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+            )
+        except Exception:
+            restart_space()
+        try:
+            print(EVAL_RESULTS_PATH)
+            snapshot_download(
+                repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+            )
+        except Exception:
+            restart_space()
+    raw_data, original_df = get_leaderboard_df(
+        results_path=EVAL_RESULTS_PATH,
+        requests_path=EVAL_REQUESTS_PATH,
+        dynamic_path=DYNAMIC_INFO_FILE_PATH,
+        cols=COLS,
+        benchmark_cols=BENCHMARK_COLS
+    )
+    update_collections(original_df.copy())
+    leaderboard_df = original_df.copy()
+    plot_df = create_plot_df(create_scores_df(raw_data))
+    (
+        finished_eval_queue_df,
+        running_eval_queue_df,
+        pending_eval_queue_df,
+    ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+    return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
+leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
+# Searching and filtering
+def update_table(
+    hidden_df: pd.DataFrame,
+    columns: list,
+    type_query: list,
+    precision_query: str,
+    size_query: list,
+    hide_models: list,
+    query: str,
+):
+    filtered_df = filter_models(df=hidden_df, type_query=type_query, size_query=size_query, precision_query=precision_query, hide_models=hide_models)
+    filtered_df = filter_queries(query, filtered_df)
+    df = select_columns(filtered_df, columns)
+    return df
+def load_query(request: gr.Request):  # triggered only once at startup => read query parameter if it exists
+    query = request.query_params.get("query") or ""
+    return query, query # return one for the "search_bar", one for a hidden component that triggers a reload only if value has changed
+def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
+    return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
+def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
+    always_here_cols = [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
+    dummy_col = [AutoEvalColumn.dummy.name]
+        #AutoEvalColumn.model_type_symbol.name,
+        #AutoEvalColumn.model.name,
+    # We use COLS to maintain sorting
+    filtered_df = df[
+        always_here_cols + [c for c in COLS if c in df.columns and c in columns] + dummy_col
+    ]
+    return filtered_df
+def filter_queries(query: str, filtered_df: pd.DataFrame):
+    """Added by Abishek"""
+    final_df = []
+    if query != "":
+        queries = [q.strip() for q in query.split(";")]
+        for _q in queries:
+            _q = _q.strip()
+            if _q != "":
+                temp_filtered_df = search_table(filtered_df, _q)
+                if len(temp_filtered_df) > 0:
+                    final_df.append(temp_filtered_df)
+        if len(final_df) > 0:
+            filtered_df = pd.concat(final_df)
+            filtered_df = filtered_df.drop_duplicates(
+                subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
+            )
+    return filtered_df
+def filter_models(
+    df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, hide_models: list
+) -> pd.DataFrame:
+    # Show all models
+    if "Private or deleted" in hide_models:
+        filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
+    else:
+        filtered_df = df
+    if "Contains a merge/moerge" in hide_models:
+        filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
+    if "MoE" in hide_models:
+        filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
+    if "Flagged" in hide_models:
+        filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
+    type_emoji = [t[0] for t in type_query]
+    filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
+    filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
+    numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
+    params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
+    mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
+    filtered_df = filtered_df.loc[mask]
+    return filtered_df
+leaderboard_df = filter_models(
+    df=leaderboard_df,
+    type_query=[t.to_str(" : ") for t in ModelType],
+    size_query=list(NUMERIC_INTERVALS.keys()),
+    precision_query=[i.value.name for i in Precision],
+    hide_models=["Private or deleted", "Contains a merge/moerge", "Flagged"], # Deleted, merges, flagged, MoEs
+)
+demo = gr.Blocks(css=custom_css)
+with demo:
+    gr.HTML(TITLE)
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        search_bar = gr.Textbox(
+                            placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
+                            show_label=False,
+                            elem_id="search-bar",
+                        )
+                    with gr.Row():
+                        shown_columns = gr.CheckboxGroup(
+                            choices=[
+                                c.name
+                                for c in fields(AutoEvalColumn)
+                                if not c.hidden and not c.never_hidden and not c.dummy
+                            ],
+                            value=[
+                                c.name
+                                for c in fields(AutoEvalColumn)
+                                if c.displayed_by_default and not c.hidden and not c.never_hidden
+                            ],
+                            label="Select columns to show",
+                            elem_id="column-select",
+                            interactive=True,
+                        )
+                    with gr.Row():
+                        hide_models = gr.CheckboxGroup(
+                            label="Hide models",
+                            choices = ["Private or deleted", "Contains a merge/moerge", "Flagged", "MoE"],
+                            value=["Private or deleted", "Contains a merge/moerge", "Flagged"],
+                            interactive=True
+                        )
+                with gr.Column(min_width=320):
+                    #with gr.Box(elem_id="box-filter"):
+                    filter_columns_type = gr.CheckboxGroup(
+                        label="Model types",
+                        choices=[t.to_str() for t in ModelType],
+                        value=[t.to_str() for t in ModelType],
+                        interactive=True,
+                        elem_id="filter-columns-type",
+                    )
+                    filter_columns_precision = gr.CheckboxGroup(
+                        label="Precision",
+                        choices=[i.value.name for i in Precision],
+                        value=[i.value.name for i in Precision],
+                        interactive=True,
+                        elem_id="filter-columns-precision",
+                    )
+                    filter_columns_size = gr.CheckboxGroup(
+                        label="Model sizes (in billions of parameters)",
+                        choices=list(NUMERIC_INTERVALS.keys()),
+                        value=list(NUMERIC_INTERVALS.keys()),
+                        interactive=True,
+                        elem_id="filter-columns-size",
+                    )
+            leaderboard_table = gr.components.Dataframe(
+                value=leaderboard_df[
+                    [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
+                    + shown_columns.value
+                    + [AutoEvalColumn.dummy.name]
+                ],
+                headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
+                datatype=TYPES,
+                elem_id="leaderboard-table",
+                interactive=False,
+                visible=True,
+                #column_widths=["2%", "33%"]
+            )
+            # Dummy leaderboard for handling the case when the user uses backspace key
+            hidden_leaderboard_table_for_search = gr.components.Dataframe(
+                value=original_df[COLS],
+                headers=COLS,
+                datatype=TYPES,
+                visible=False,
+            )
+            search_bar.submit(
+                update_table,
+                [
+                    hidden_leaderboard_table_for_search,
+                    shown_columns,
+                    filter_columns_type,
+                    filter_columns_precision,
+                    filter_columns_size,
+                    hide_models,
+                    search_bar,
+                ],
+                leaderboard_table,
+            )
+            # Define a hidden component that will trigger a reload only if a query parameter has been set
+            hidden_search_bar = gr.Textbox(value="", visible=False)
+            hidden_search_bar.change(
+                update_table,
+                [
+                    hidden_leaderboard_table_for_search,
+                    shown_columns,
+                    filter_columns_type,
+                    filter_columns_precision,
+                    filter_columns_size,
+                    hide_models,
+                    search_bar,
+                ],
+                leaderboard_table,
+            )
+            # Check query parameter once at startup and update search bar + hidden component
+            demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
+            for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, hide_models]:
+                selector.change(
+                    update_table,
+                    [
+                        hidden_leaderboard_table_for_search,
+                        shown_columns,
+                        filter_columns_type,
+                        filter_columns_precision,
+                        filter_columns_size,
+                        hide_models,
+                        search_bar,
+                    ],
+                    leaderboard_table,
+                    queue=True,
+                )
+        with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
+            with gr.Row():
+                with gr.Column():
+                    chart = create_metric_plot_obj(
+                        plot_df,
+                        [AutoEvalColumn.average.name],
+                        title="Average of Top Scores and Human Baseline Over Time (from last update)",
+                    )
+                    gr.Plot(value=chart, min_width=500)
+                with gr.Column():
+                    chart = create_metric_plot_obj(
+                        plot_df,
+                        BENCHMARK_COLS,
+                        title="Top Scores and Human Baseline Over Time (from last update)",
+                    )
+                    gr.Plot(value=chart, min_width=500)
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
+            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=4):
+            gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
+            with gr.Column():
+                with gr.Row():
+                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+            with gr.Row():
+                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+            with gr.Row():
+                with gr.Column():
+                    model_name_textbox = gr.Textbox(label="Model name")
+                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
+                    private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
+                    model_type = gr.Dropdown(
+                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
+                        label="Model type",
+                        multiselect=False,
+                        value=ModelType.FT.to_str(" : "),
+                        interactive=True,
+                    )
+                with gr.Column():
+                    precision = gr.Dropdown(
+                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
+                        label="Precision",
+                        multiselect=False,
+                        value="float16",
+                        interactive=True,
+                    )
+                    weight_type = gr.Dropdown(
+                        choices=[i.value.name for i in WeightType],
+                        label="Weights type",
+                        multiselect=False,
+                        value="Original",
+                        interactive=True,
+                    )
+                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+            with gr.Column():
+                with gr.Accordion(
+                    f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
+                    open=False,
+                ):
+                    with gr.Row():
+                        finished_eval_table = gr.components.Dataframe(
+                            value=finished_eval_queue_df,
+                            headers=EVAL_COLS,
+                            datatype=EVAL_TYPES,
+                            row_count=5,
+                        )
+                with gr.Accordion(
+                    f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
+                    open=False,
+                ):
+                    with gr.Row():
+                        running_eval_table = gr.components.Dataframe(
+                            value=running_eval_queue_df,
+                            headers=EVAL_COLS,
+                            datatype=EVAL_TYPES,
+                            row_count=5,
+                        )
+                with gr.Accordion(
+                    f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
+                    open=False,
+                ):
+                    with gr.Row():
+                        pending_eval_table = gr.components.Dataframe(
+                            value=pending_eval_queue_df,
+                            headers=EVAL_COLS,
+                            datatype=EVAL_TYPES,
+                            row_count=5,
+                        )
+            submit_button = gr.Button("Submit Eval")
+            submission_result = gr.Markdown()
+            submit_button.click(
+                add_new_eval,
+                [
+                    model_name_textbox,
+                    base_model_name_textbox,
+                    revision_name_textbox,
+                    precision,
+                    private,
+                    weight_type,
+                    model_type,
+                ],
+                submission_result,
+            )
+    with gr.Row():
+        with gr.Accordion("📙 Citation", open=False):
+            citation_button = gr.Textbox(
+                value=CITATION_BUTTON_TEXT,
+                label=CITATION_BUTTON_LABEL,
+                lines=20,
+                elem_id="citation-button",
+                show_copy_button=True,
+            )
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h
+scheduler.add_job(update_dynamic_files, "interval", hours=2) # launched every 2 hour
+scheduler.start()
+demo.queue(default_concurrency_limit=40).launch()

backend/Dockerfile.dev DELETED Viewed

@@ -1,25 +0,0 @@
-FROM python:3.12-slim
-WORKDIR /app
-# Install required system dependencies
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    && rm -rf /var/lib/apt/lists/*
-# Install poetry
-RUN pip install poetry
-# Copy Poetry configuration files
-COPY pyproject.toml poetry.lock* ./
-# Install dependencies
-RUN poetry config virtualenvs.create false && \
-    poetry install --no-interaction --no-ansi --no-root
-# Environment variables configuration for logs
-ENV PYTHONUNBUFFERED=1
-ENV LOG_LEVEL=INFO
-# In dev, mount volume directly
-CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]

backend/README.md DELETED Viewed

@@ -1,352 +0,0 @@
-# Backend - Open LLM Leaderboard 🏆
-FastAPI backend for the Open LLM Leaderboard. This service is part of a larger architecture that includes a React frontend. For complete project installation, see the [main README](../README.md).
-## ✨ Features
-- 📊 REST API for LLM models leaderboard management
-- 🗳️ Voting and ranking system
-- 🔄 HuggingFace Hub integration
-- 🚀 Caching and performance optimizations
-## 🏗 Architecture
-```mermaid
-flowchart TD
-    Client(["**Frontend**<br><br>React Application"]) --> API["**API Server**<br><br>FastAPI REST Endpoints"]
-    subgraph Backend
-        API --> Core["**Core Layer**<br><br>• Middleware<br>• Cache<br>• Rate Limiting"]
-        Core --> Services["**Services Layer**<br><br>• Business Logic<br>• Data Processing"]
-        subgraph Services Layer
-            Services --> Models["**Model Service**<br><br>• Model Submission<br>• Evaluation Pipeline"]
-            Services --> Votes["**Vote Service**<br><br>• Vote Management<br>• Data Synchronization"]
-            Services --> Board["**Leaderboard Service**<br><br>• Rankings<br>• Performance Metrics"]
-        end
-        Models --> Cache["**Cache Layer**<br><br>• In-Memory Store<br>• Auto Invalidation"]
-        Votes --> Cache
-        Board --> Cache
-        Models --> HF["**HuggingFace Hub**<br><br>• Models Repository<br>• Datasets Access"]
-        Votes --> HF
-        Board --> HF
-    end
-    style Client fill:#f9f,stroke:#333,stroke-width:2px
-    style Models fill:#bbf,stroke:#333,stroke-width:2px
-    style Votes fill:#bbf,stroke:#333,stroke-width:2px
-    style Board fill:#bbf,stroke:#333,stroke-width:2px
-    style HF fill:#bfb,stroke:#333,stroke-width:2px
-```
-## 🛠️ HuggingFace Datasets
-The application uses several datasets on the HuggingFace Hub:
-### 1. Requests Dataset (`{HF_ORGANIZATION}/requests`)
-- **Operations**:
-  - 📤 `POST /api/models/submit`: Adds a JSON file for each new model submission
-  - 📥 `GET /api/models/status`: Reads files to get models status
-- **Format**: One JSON file per model with submission details
-- **Updates**: On each new model submission
-### 2. Votes Dataset (`{HF_ORGANIZATION}/votes`)
-- **Operations**:
-  - 📤 `POST /api/votes/{model_id}`: Adds a new vote
-  - 📥 `GET /api/votes/model/{provider}/{model}`: Reads model votes
-  - 📥 `GET /api/votes/user/{user_id}`: Reads user votes
-- **Format**: JSONL with one vote per line
-- **Sync**: Bidirectional between local cache and Hub
-### 3. Contents Dataset (`{HF_ORGANIZATION}/contents`)
-- **Operations**:
-  - 📥 `GET /api/leaderboard`: Reads raw data
-  - 📥 `GET /api/leaderboard/formatted`: Reads and formats data
-- **Format**: Main dataset containing all scores and metrics
-- **Updates**: Automatic after model evaluations
-### 4. Official Providers Dataset (`{HF_ORGANIZATION}/official-providers`)
-- **Operations**:
-  - 📥 Read-only access for highlighted models
-- **Format**: List of models selected by maintainers
-- **Updates**: Manual by maintainers
-## 🛠 Local Development
-### Prerequisites
-- Python 3.9+
-- [Poetry](https://python-poetry.org/docs/#installation)
-### Standalone Installation (without Docker)
-```bash
-# Install dependencies
-poetry install
-# Setup configuration
-cp .env.example .env
-# Start development server
-poetry run uvicorn app.asgi:app --host 0.0.0.0 --port 7860 --reload
-```
-Server will be available at http://localhost:7860
-## ⚙️ Configuration
-| Variable     | Description                          | Default     |
-| ------------ | ------------------------------------ | ----------- |
-| ENVIRONMENT  | Environment (development/production) | development |
-| HF_TOKEN     | HuggingFace authentication token     | -           |
-| PORT         | Server port                          | 7860        |
-| LOG_LEVEL    | Logging level (INFO/DEBUG/WARNING)   | INFO        |
-| CORS_ORIGINS | Allowed CORS origins                 | ["*"]       |
-| CACHE_TTL    | Cache Time To Live in seconds        | 300         |
-## 🔧 Middleware
-The backend uses several middleware layers for optimal performance and security:
-- **CORS Middleware**: Handles Cross-Origin Resource Sharing
-- **GZIP Middleware**: Compresses responses > 500 bytes
-- **Rate Limiting**: Prevents API abuse
-- **Caching**: In-memory caching with automatic invalidation
-## 📝 Logging
-The application uses a structured logging system with:
-- Formatted console output
-- Different log levels per component
-- Request/Response logging
-- Performance metrics
-- Error tracking
-## 📁 File Structure
-```
-backend/
-├── app/                  # Source code
-│   ├── api/             # Routes and endpoints
-│   │   └── endpoints/   # Endpoint handlers
-│   ├── core/           # Configurations
-│   ├── services/       # Business logic
-│   └── utils/          # Utilities
-└── tests/              # Tests
-```
-## 📚 API
-Swagger documentation available at http://localhost:7860/docs
-### Main Endpoints & Data Structures
-#### Leaderboard
-- `GET /api/leaderboard/formatted` - Formatted data with computed fields and metadata
-  ```typescript
-  Response {
-    models: [{
-      id: string,  // eval_name
-      model: {
-        name: string,  // fullname
-        sha: string,  // Model sha
-        precision: string,  // e.g. "fp16", "int8"
-        type: string,  // e.g. "fined-tuned-on-domain-specific-dataset"
-        weight_type: string,
-        architecture: string,
-        average_score: number,
-        has_chat_template: boolean
-      },
-      evaluations: {
-        ifeval: {
-          name: "IFEval",
-          value: number,  // Raw score
-          normalized_score: number
-        },
-        bbh: {
-          name: "BBH",
-          value: number,
-          normalized_score: number
-        },
-        math: {
-          name: "MATH Level 5",
-          value: number,
-          normalized_score: number
-        },
-        gpqa: {
-          name: "GPQA",
-          value: number,
-          normalized_score: number
-        },
-        musr: {
-          name: "MUSR",
-          value: number,
-          normalized_score: number
-        },
-        mmlu_pro: {
-          name: "MMLU-PRO",
-          value: number,
-          normalized_score: number
-        }
-      },
-      features: {
-        is_not_available_on_hub: boolean,
-        is_merged: boolean,
-        is_moe: boolean,
-        is_flagged: boolean,
-        is_official_provider: boolean
-      },
-      metadata: {
-        upload_date: string,
-        submission_date: string,
-        generation: string,
-        base_model: string,
-        hub_license: string,
-        hub_hearts: number,
-        params_billions: number,
-        co2_cost: number  // CO₂ cost in kg
-      }
-    }]
-  }
-  ```
-- `GET /api/leaderboard` - Raw data from the HuggingFace dataset
-  ```typescript
-  Response {
-    models: [{
-      eval_name: string,
-      Precision: string,
-      Type: string,
-      "Weight type": string,
-      Architecture: string,
-      Model: string,
-      fullname: string,
-      "Model sha": string,
-      "Average ⬆️": number,
-      "Hub License": string,
-      "Hub ❤️": number,
-      "#Params (B)": number,
-      "Available on the hub": boolean,
-      Merged: boolean,
-      MoE: boolean,
-      Flagged: boolean,
-      "Chat Template": boolean,
-      "CO₂ cost (kg)": number,
-      "IFEval Raw": number,
-      IFEval: number,
-      "BBH Raw": number,
-      BBH: number,
-      "MATH Lvl 5 Raw": number,
-      "MATH Lvl 5": number,
-      "GPQA Raw": number,
-      GPQA: number,
-      "MUSR Raw": number,
-      MUSR: number,
-      "MMLU-PRO Raw": number,
-      "MMLU-PRO": number,
-      "Maintainer's Highlight": boolean,
-      "Upload To Hub Date": string,
-      "Submission Date": string,
-      Generation: string,
-      "Base Model": string
-    }]
-  }
-  ```
-#### Models
-- `GET /api/models/status` - Get all models grouped by status
-  ```typescript
-  Response {
-    pending: [{
-      name: string,
-      submitter: string,
-      revision: string,
-      wait_time: string,
-      submission_time: string,
-      status: "PENDING" | "EVALUATING" | "FINISHED",
-      precision: string
-    }],
-    evaluating: Array<Model>,
-    finished: Array<Model>
-  }
-  ```
-- `GET /api/models/pending` - Get pending models only
-- `POST /api/models/submit` - Submit model
-  ```typescript
-  Request {
-    user_id: string,
-    model_id: string,
-    base_model?: string,
-    precision?: string,
-    model_type: string
-  }
-  Response {
-    status: string,
-    message: string
-  }
-  ```
-- `GET /api/models/{model_id}/status` - Get model status
-#### Votes
-- `POST /api/votes/{model_id}` - Vote
-  ```typescript
-  Request {
-    vote_type: "up" | "down",
-    user_id: string  // HuggingFace username
-  }
-  Response {
-    success: boolean,
-    message: string
-  }
-  ```
-- `GET /api/votes/model/{provider}/{model}` - Get model votes
-  ```typescript
-  Response {
-    total_votes: number,
-    up_votes: number,
-    down_votes: number
-  }
-  ```
-- `GET /api/votes/user/{user_id}` - Get user votes
-  ```typescript
-  Response Array<{
-    model_id: string,
-    vote_type: string,
-    timestamp: string
-  }>
-  ```
-## 🔒 Authentication
-The backend uses HuggingFace token-based authentication for secure API access. Make sure to:
-1. Set your HF_TOKEN in the .env file
-2. Include the token in API requests via Bearer authentication
-3. Keep your token secure and never commit it to version control
-## 🚀 Performance
-The backend implements several optimizations:
-- In-memory caching with configurable TTL (Time To Live)
-- Batch processing for model evaluations
-- Rate limiting for API endpoints
-- Efficient database queries with proper indexing
-- Automatic cache invalidation for votes

backend/__init__.py DELETED Viewed

File without changes

backend/app/api/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-"""
-API package initialization
-"""
-__all__ = ["endpoints"]

backend/app/api/dependencies.py DELETED Viewed

@@ -1,34 +0,0 @@
-from fastapi import Depends, HTTPException
-import logging
-from app.services.models import ModelService
-from app.services.votes import VoteService
-from app.core.formatting import LogFormatter
-logger = logging.getLogger(__name__)
-model_service = ModelService()
-vote_service = VoteService()
-async def get_model_service() -> ModelService:
-    """Dependency to get ModelService instance"""
-    try:
-        logger.info(LogFormatter.info("Initializing model service dependency"))
-        await model_service.initialize()
-        logger.info(LogFormatter.success("Model service initialized"))
-        return model_service
-    except Exception as e:
-        error_msg = "Failed to initialize model service"
-        logger.error(LogFormatter.error(error_msg, e))
-        raise HTTPException(status_code=500, detail=str(e))
-async def get_vote_service() -> VoteService:
-    """Dependency to get VoteService instance"""
-    try:
-        logger.info(LogFormatter.info("Initializing vote service dependency"))
-        await vote_service.initialize()
-        logger.info(LogFormatter.success("Vote service initialized"))
-        return vote_service
-    except Exception as e:
-        error_msg = "Failed to initialize vote service"
-        logger.error(LogFormatter.error(error_msg, e))
-        raise HTTPException(status_code=500, detail=str(e))

backend/app/api/endpoints/leaderboard.py DELETED Viewed

@@ -1,49 +0,0 @@
-from fastapi import APIRouter
-from typing import List, Dict, Any
-from app.services.leaderboard import LeaderboardService
-from app.core.fastapi_cache import cached, build_cache_key
-import logging
-from app.core.formatting import LogFormatter
-logger = logging.getLogger(__name__)
-router = APIRouter()
-leaderboard_service = LeaderboardService()
-def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
-    """Build cache key for leaderboard data"""
-    key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
-    key = build_cache_key(namespace, key_type)
-    logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
-    return key
-@router.get("")
-@cached(expire=300, key_builder=leaderboard_key_builder)
-async def get_leaderboard() -> List[Dict[str, Any]]:
-    """
-    Get raw leaderboard data
-    Response will be automatically GZIP compressed if size > 500 bytes
-    """
-    try:
-        logger.info(LogFormatter.info("Fetching raw leaderboard data"))
-        data = await leaderboard_service.fetch_raw_data()
-        logger.info(LogFormatter.success(f"Retrieved {len(data)} leaderboard entries"))
-        return data
-    except Exception as e:
-        logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
-        raise
-@router.get("/formatted")
-@cached(expire=300, key_builder=leaderboard_key_builder)
-async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
-    """
-    Get formatted leaderboard data with restructured objects
-    Response will be automatically GZIP compressed if size > 500 bytes
-    """
-    try:
-        logger.info(LogFormatter.info("Fetching formatted leaderboard data"))
-        data = await leaderboard_service.get_formatted_data()
-        logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
-        return data
-    except Exception as e:
-        logger.error(LogFormatter.error("Failed to fetch formatted leaderboard data", e))
-        raise

backend/app/api/endpoints/models.py DELETED Viewed

@@ -1,103 +0,0 @@
-from fastapi import APIRouter, HTTPException, Depends
-from typing import Dict, Any, List
-import logging
-from app.services.models import ModelService
-from app.api.dependencies import get_model_service
-from app.core.fastapi_cache import cached
-from app.core.formatting import LogFormatter
-logger = logging.getLogger(__name__)
-router = APIRouter(tags=["models"])
-@router.get("/status")
-@cached(expire=300)
-async def get_models_status(
-    model_service: ModelService = Depends(get_model_service)
-) -> Dict[str, List[Dict[str, Any]]]:
-    """Get all models grouped by status"""
-    try:
-        logger.info(LogFormatter.info("Fetching status for all models"))
-        result = await model_service.get_models()
-        stats = {
-            status: len(models) for status, models in result.items()
-        }
-        for line in LogFormatter.stats(stats, "Models by Status"):
-            logger.info(line)
-        return result
-    except Exception as e:
-        logger.error(LogFormatter.error("Failed to get models status", e))
-        raise HTTPException(status_code=500, detail=str(e))
-@router.get("/pending")
-@cached(expire=60)
-async def get_pending_models(
-    model_service: ModelService = Depends(get_model_service)
-) -> List[Dict[str, Any]]:
-    """Get all models waiting for evaluation"""
-    try:
-        logger.info(LogFormatter.info("Fetching pending models"))
-        models = await model_service.get_models()
-        pending = models.get("pending", [])
-        logger.info(LogFormatter.success(f"Found {len(pending)} pending models"))
-        return pending
-    except Exception as e:
-        logger.error(LogFormatter.error("Failed to get pending models", e))
-        raise HTTPException(status_code=500, detail=str(e))
-@router.post("/submit")
-async def submit_model(
-    model_data: Dict[str, Any],
-    model_service: ModelService = Depends(get_model_service)
-) -> Dict[str, Any]:
-    try:
-        logger.info(LogFormatter.section("MODEL SUBMISSION"))
-        user_id = model_data.pop('user_id', None)
-        if not user_id:
-            error_msg = "user_id is required"
-            logger.error(LogFormatter.error("Validation failed", error_msg))
-            raise ValueError(error_msg)
-        # Log submission details
-        submission_info = {
-            "Model_ID": model_data.get("model_id"),
-            "User": user_id,
-            "Base_Model": model_data.get("base_model"),
-            "Precision": model_data.get("precision"),
-            "Model_Type": model_data.get("model_type")
-        }
-        for line in LogFormatter.tree(submission_info, "Submission Details"):
-            logger.info(line)
-        result = await model_service.submit_model(model_data, user_id)
-        logger.info(LogFormatter.success("Model submitted successfully"))
-        return result
-    except ValueError as e:
-        logger.error(LogFormatter.error("Invalid submission data", e))
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        logger.error(LogFormatter.error("Submission failed", e))
-        raise HTTPException(status_code=500, detail=str(e))
-@router.get("/{model_id}/status")
-async def get_model_status(
-    model_id: str,
-    model_service: ModelService = Depends(get_model_service)
-) -> Dict[str, Any]:
-    try:
-        logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
-        status = await model_service.get_model_status(model_id)
-        if status["status"] != "not_found":
-            logger.info(LogFormatter.success("Status found"))
-            for line in LogFormatter.tree(status, "Model Status"):
-                logger.info(line)
-        else:
-            logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
-        return status
-    except Exception as e:
-        logger.error(LogFormatter.error("Failed to get model status", e))
-        raise HTTPException(status_code=500, detail=str(e))

backend/app/api/endpoints/votes.py DELETED Viewed

@@ -1,105 +0,0 @@
-from fastapi import APIRouter, HTTPException, Query, Depends
-from typing import Dict, Any, List
-from app.services.votes import VoteService
-from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
-import logging
-from app.core.formatting import LogFormatter
-logger = logging.getLogger(__name__)
-router = APIRouter()
-vote_service = VoteService()
-def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
-    """Build cache key for model votes"""
-    provider = kwargs.get('provider')
-    model = kwargs.get('model')
-    key = build_cache_key(namespace, provider, model)
-    logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
-    return key
-def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
-    """Build cache key for user votes"""
-    user_id = kwargs.get('user_id')
-    key = build_cache_key(namespace, user_id)
-    logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
-    return key
-@router.post("/{model_id:path}")
-async def add_vote(
-    model_id: str,
-    vote_type: str = Query(..., description="Type of vote (up/down)"),
-    user_id: str = Query(..., description="HuggingFace username")
-) -> Dict[str, Any]:
-    try:
-        logger.info(LogFormatter.section("ADDING VOTE"))
-        stats = {
-            "Model": model_id,
-            "User": user_id,
-            "Type": vote_type
-        }
-        for line in LogFormatter.tree(stats, "Vote Details"):
-            logger.info(line)
-        await vote_service.initialize()
-        result = await vote_service.add_vote(model_id, user_id, vote_type)
-        # Invalidate affected caches
-        try:
-            logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
-            provider, model = model_id.split('/', 1)
-            # Build and invalidate cache keys
-            model_cache_key = build_cache_key("model_votes", provider, model)
-            user_cache_key = build_cache_key("user_votes", user_id)
-            invalidate_cache_key(model_cache_key)
-            invalidate_cache_key(user_cache_key)
-            cache_stats = {
-                "Model_Cache": model_cache_key,
-                "User_Cache": user_cache_key
-            }
-            for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
-                logger.info(line)
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to invalidate cache", e))
-        return result
-    except Exception as e:
-        logger.error(LogFormatter.error("Failed to add vote", e))
-        raise HTTPException(status_code=400, detail=str(e))
-@router.get("/model/{provider}/{model}")
-@cached(expire=60, key_builder=model_votes_key_builder)
-async def get_model_votes(
-    provider: str,
-    model: str
-) -> Dict[str, Any]:
-    """Get all votes for a specific model"""
-    try:
-        logger.info(LogFormatter.info(f"Fetching votes for model: {provider}/{model}"))
-        await vote_service.initialize()
-        model_id = f"{provider}/{model}"
-        result = await vote_service.get_model_votes(model_id)
-        logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
-        return result
-    except Exception as e:
-        logger.error(LogFormatter.error("Failed to get model votes", e))
-        raise HTTPException(status_code=400, detail=str(e))
-@router.get("/user/{user_id}")
-@cached(expire=60, key_builder=user_votes_key_builder)
-async def get_user_votes(
-    user_id: str
-) -> List[Dict[str, Any]]:
-    """Get all votes from a specific user"""
-    try:
-        logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
-        await vote_service.initialize()
-        votes = await vote_service.get_user_votes(user_id)
-        logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
-        return votes
-    except Exception as e:
-        logger.error(LogFormatter.error("Failed to get user votes", e))
-        raise HTTPException(status_code=400, detail=str(e))

backend/app/api/router.py DELETED Viewed

@@ -1,9 +0,0 @@
-from fastapi import APIRouter
-from app.api.endpoints import leaderboard, votes, models
-router = APIRouter()
-router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
-router.include_router(votes.router, prefix="/votes", tags=["votes"])
-router.include_router(models.router, prefix="/models", tags=["models"])

backend/app/asgi.py DELETED Viewed

@@ -1,106 +0,0 @@
-"""
-ASGI entry point for the Open LLM Leaderboard API.
-"""
-import os
-import uvicorn
-import logging
-import logging.config
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.middleware.gzip import GZipMiddleware
-import sys
-from app.api.router import router
-from app.core.fastapi_cache import setup_cache
-from app.core.formatting import LogFormatter
-from app.config import hf_config
-# Configure logging before anything else
-LOGGING_CONFIG = {
-    "version": 1,
-    "disable_existing_loggers": True,
-    "formatters": {
-        "default": {
-            "format": "%(name)s - %(levelname)s - %(message)s",
-        }
-    },
-    "handlers": {
-        "default": {
-            "formatter": "default",
-            "class": "logging.StreamHandler",
-            "stream": "ext://sys.stdout",
-        }
-    },
-    "loggers": {
-        "uvicorn": {
-            "handlers": ["default"],
-            "level": "WARNING",
-            "propagate": False,
-        },
-        "uvicorn.error": {
-            "level": "WARNING",
-            "handlers": ["default"],
-            "propagate": False,
-        },
-        "uvicorn.access": {
-            "handlers": ["default"],
-            "level": "WARNING",
-            "propagate": False,
-        },
-        "app": {
-            "handlers": ["default"],
-            "level": "WARNING",
-            "propagate": False,
-        }
-    },
-    "root": {
-        "handlers": ["default"],
-        "level": "WARNING",
-    }
-}
-# Apply logging configuration
-logging.config.dictConfig(LOGGING_CONFIG)
-logger = logging.getLogger("app")
-# Create FastAPI application
-app = FastAPI(
-    title="Open LLM Leaderboard",
-    version="1.0.0",
-    docs_url="/docs",
-)
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Add GZIP compression
-app.add_middleware(GZipMiddleware, minimum_size=500)
-# Include API router
-app.include_router(router, prefix="/api")
-@app.on_event("startup")
-async def startup_event():
-    """Initialize services on startup"""
-    logger.info("\n")
-    logger.info(LogFormatter.section("APPLICATION STARTUP"))
-    # Log HF configuration
-    logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
-    logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
-    logger.info(LogFormatter.info(f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"))
-    logger.info(LogFormatter.info(f"Using repositories:"))
-    logger.info(LogFormatter.info(f"  - Queue: {hf_config.QUEUE_REPO}"))
-    logger.info(LogFormatter.info(f"  - Aggregated: {hf_config.AGGREGATED_REPO}"))
-    logger.info(LogFormatter.info(f"  - Votes: {hf_config.VOTES_REPO}"))
-    logger.info(LogFormatter.info(f"  - Official Providers: {hf_config.OFFICIAL_PROVIDERS_REPO}"))
-    # Setup cache
-    setup_cache()
-    logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))

backend/app/config/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-"""
-Configuration module for the Open LLM Leaderboard backend.
-All configuration values are imported from base.py to avoid circular dependencies.
-"""
-from .base import *

backend/app/config/base.py DELETED Viewed

@@ -1,38 +0,0 @@
-import os
-from pathlib import Path
-# Server configuration
-HOST = "0.0.0.0"
-PORT = 7860
-WORKERS = 4
-RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
-# CORS configuration
-ORIGINS = ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
-# Cache configuration
-CACHE_TTL = int(os.environ.get("CACHE_TTL", 300))  # 5 minutes default
-# Rate limiting
-RATE_LIMIT_PERIOD = 7  # days
-RATE_LIMIT_QUOTA = 5
-HAS_HIGHER_RATE_LIMIT = []
-# HuggingFace configuration
-HF_TOKEN = os.environ.get("HF_TOKEN")
-HF_ORGANIZATION = "open-llm-leaderboard"
-API = {
-    "INFERENCE": "https://api-inference.huggingface.co/models",
-    "HUB": "https://huggingface.co"
-}
-# Cache paths
-CACHE_ROOT = Path(os.environ.get("HF_HOME", ".cache"))
-DATASETS_CACHE = CACHE_ROOT / "datasets"
-MODELS_CACHE = CACHE_ROOT / "models"
-VOTES_CACHE = CACHE_ROOT / "votes"
-EVAL_CACHE = CACHE_ROOT / "eval-queue"
-# Repository configuration
-QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
-EVAL_REQUESTS_PATH = EVAL_CACHE / "eval_requests.jsonl"

backend/app/config/hf_config.py DELETED Viewed

@@ -1,30 +0,0 @@
-import os
-import logging
-from typing import Optional
-from huggingface_hub import HfApi
-from pathlib import Path
-from app.core.cache import cache_config
-logger = logging.getLogger(__name__)
-# Organization or user who owns the datasets
-HF_ORGANIZATION = "open-llm-leaderboard"
-# Get HF token directly from environment
-HF_TOKEN = os.environ.get("HF_TOKEN")
-if not HF_TOKEN:
-    logger.warning("HF_TOKEN not found in environment variables. Some features may be limited.")
-# Initialize HF API
-API = HfApi(token=HF_TOKEN)
-# Repository configuration
-QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
-AGGREGATED_REPO = f"{HF_ORGANIZATION}/contents"
-VOTES_REPO = f"{HF_ORGANIZATION}/votes"
-OFFICIAL_PROVIDERS_REPO = f"{HF_ORGANIZATION}/official-providers"
-# File paths from cache config
-VOTES_PATH = cache_config.votes_file
-EVAL_REQUESTS_PATH = cache_config.eval_requests_file
-MODEL_CACHE_DIR = cache_config.models_cache

backend/app/config/logging_config.py DELETED Viewed

@@ -1,38 +0,0 @@
-import logging
-import sys
-from tqdm import tqdm
-def get_tqdm_handler():
-    """
-    Creates a special handler for tqdm that doesn't interfere with other logs.
-    """
-    class TqdmLoggingHandler(logging.Handler):
-        def emit(self, record):
-            try:
-                msg = self.format(record)
-                tqdm.write(msg)
-                self.flush()
-            except Exception:
-                self.handleError(record)
-    return TqdmLoggingHandler()
-def setup_service_logger(service_name: str) -> logging.Logger:
-    """
-    Configure a specific logger for a given service.
-    """
-    logger = logging.getLogger(f"app.services.{service_name}")
-    # If the logger already has handlers, don't reconfigure it
-    if logger.handlers:
-        return logger
-    # Add tqdm handler for this service
-    tqdm_handler = get_tqdm_handler()
-    tqdm_handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s'))
-    logger.addHandler(tqdm_handler)
-    # Don't propagate logs to parent loggers
-    logger.propagate = False
-    return logger

backend/app/core/cache.py DELETED Viewed

@@ -1,109 +0,0 @@
-import os
-import shutil
-from pathlib import Path
-from datetime import timedelta
-import logging
-from app.core.formatting import LogFormatter
-from app.config.base import (
-    CACHE_ROOT,
-    DATASETS_CACHE,
-    MODELS_CACHE,
-    VOTES_CACHE,
-    EVAL_CACHE,
-    CACHE_TTL
-)
-logger = logging.getLogger(__name__)
-class CacheConfig:
-    def __init__(self):
-        # Get cache paths from config
-        self.cache_root = CACHE_ROOT
-        self.datasets_cache = DATASETS_CACHE
-        self.models_cache = MODELS_CACHE
-        self.votes_cache = VOTES_CACHE
-        self.eval_cache = EVAL_CACHE
-        # Specific files
-        self.votes_file = self.votes_cache / "votes_data.jsonl"
-        self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
-        # Cache TTL
-        self.cache_ttl = timedelta(seconds=CACHE_TTL)
-        self._initialize_cache_dirs()
-        self._setup_environment()
-    def _initialize_cache_dirs(self):
-        """Initialize all necessary cache directories"""
-        try:
-            logger.info(LogFormatter.section("CACHE INITIALIZATION"))
-            cache_dirs = {
-                "Root": self.cache_root,
-                "Datasets": self.datasets_cache,
-                "Models": self.models_cache,
-                "Votes": self.votes_cache,
-                "Eval": self.eval_cache
-            }
-            for name, cache_dir in cache_dirs.items():
-                cache_dir.mkdir(parents=True, exist_ok=True)
-                logger.info(LogFormatter.success(f"{name} cache directory: {cache_dir}"))
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to create cache directories", e))
-            raise
-    def _setup_environment(self):
-        """Configure HuggingFace environment variables"""
-        logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
-        env_vars = {
-            "HF_HOME": str(self.cache_root),
-            "HF_DATASETS_CACHE": str(self.datasets_cache)
-        }
-        for var, value in env_vars.items():
-            os.environ[var] = value
-            logger.info(LogFormatter.info(f"Set {var}={value}"))
-    def get_cache_path(self, cache_type: str) -> Path:
-        """Returns the path for a specific cache type"""
-        cache_paths = {
-            "datasets": self.datasets_cache,
-            "models": self.models_cache,
-            "votes": self.votes_cache,
-            "eval": self.eval_cache
-        }
-        return cache_paths.get(cache_type, self.cache_root)
-    def flush_cache(self, cache_type: str = None):
-        """Flush specified cache or all caches if no type is specified"""
-        try:
-            if cache_type:
-                logger.info(LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE"))
-                cache_dir = self.get_cache_path(cache_type)
-                if cache_dir.exists():
-                    stats = {
-                        "Cache_Type": cache_type,
-                        "Directory": str(cache_dir)
-                    }
-                    for line in LogFormatter.tree(stats, "Cache Details"):
-                        logger.info(line)
-                    shutil.rmtree(cache_dir)
-                    cache_dir.mkdir(parents=True, exist_ok=True)
-                    logger.info(LogFormatter.success("Cache cleared successfully"))
-            else:
-                logger.info(LogFormatter.section("FLUSHING ALL CACHES"))
-                for cache_type in ["datasets", "models", "votes", "eval"]:
-                    self.flush_cache(cache_type)
-                logger.info(LogFormatter.success("All caches cleared successfully"))
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to flush cache", e))
-            raise
-# Singleton instance of cache configuration
-cache_config = CacheConfig()

backend/app/core/fastapi_cache.py DELETED Viewed

@@ -1,48 +0,0 @@
-from fastapi_cache import FastAPICache
-from fastapi_cache.backends.inmemory import InMemoryBackend
-from fastapi_cache.decorator import cache
-from datetime import timedelta
-from app.config import CACHE_TTL
-import logging
-from app.core.formatting import LogFormatter
-logger = logging.getLogger(__name__)
-def setup_cache():
-    """Initialize FastAPI Cache with in-memory backend"""
-    FastAPICache.init(
-        backend=InMemoryBackend(),
-        prefix="fastapi-cache",
-        expire=CACHE_TTL
-    )
-    logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
-def invalidate_cache_key(key: str):
-    """Invalidate a specific cache key"""
-    try:
-        backend = FastAPICache.get_backend()
-        if hasattr(backend, 'delete'):
-            backend.delete(key)
-            logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
-        else:
-            logger.warning(LogFormatter.warning("Cache backend does not support deletion"))
-    except Exception as e:
-        logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
-def build_cache_key(namespace: str, *args) -> str:
-    """Build a consistent cache key"""
-    key = f"fastapi-cache:{namespace}:{':'.join(str(arg) for arg in args)}"
-    logger.debug(LogFormatter.info(f"Built cache key: {key}"))
-    return key
-def cached(expire: int = CACHE_TTL, key_builder=None):
-    """Decorator for caching endpoint responses
-    Args:
-        expire (int): Cache TTL in seconds
-        key_builder (callable, optional): Custom key builder function
-    """
-    return cache(
-        expire=expire,
-        key_builder=key_builder
-    )

backend/app/core/formatting.py DELETED Viewed

@@ -1,104 +0,0 @@
-import logging
-from typing import Dict, Any, List, Optional
-logger = logging.getLogger(__name__)
-class LogFormatter:
-    """Utility class for consistent log formatting across the application"""
-    @staticmethod
-    def section(title: str) -> str:
-        """Create a section header"""
-        return f"\n{'='*20} {title.upper()} {'='*20}"
-    @staticmethod
-    def subsection(title: str) -> str:
-        """Create a subsection header"""
-        return f"\n{'─'*20} {title} {'─'*20}"
-    @staticmethod
-    def tree(items: Dict[str, Any], title: str = None) -> List[str]:
-        """Create a tree view of dictionary data"""
-        lines = []
-        if title:
-            lines.append(f"📊 {title}:")
-        # Get the maximum length for alignment
-        max_key_length = max(len(str(k)) for k in items.keys())
-        # Format each item
-        for i, (key, value) in enumerate(items.items()):
-            prefix = "└──" if i == len(items) - 1 else "├──"
-            if isinstance(value, (int, float)):
-                value = f"{value:,}"  # Add thousand separators
-            lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
-        return lines
-    @staticmethod
-    def stats(stats: Dict[str, int], title: str = None) -> List[str]:
-        """Format statistics with icons"""
-        lines = []
-        if title:
-            lines.append(f"📊 {title}:")
-        # Get the maximum length for alignment
-        max_key_length = max(len(str(k)) for k in stats.keys())
-        # Format each stat with an appropriate icon
-        icons = {
-            "total": "📌",
-            "success": "✅",
-            "error": "❌",
-            "pending": "⏳",
-            "processing": "⚙️",
-            "finished": "✨",
-            "evaluating": "🔄",
-            "downloads": "⬇️",
-            "files": "📁",
-            "cached": "💾",
-            "size": "📏",
-            "time": "⏱️",
-            "rate": "🚀"
-        }
-        # Format each item
-        for i, (key, value) in enumerate(stats.items()):
-            prefix = "└──" if i == len(stats) - 1 else "├──"
-            icon = icons.get(key.lower().split('_')[0], "•")
-            if isinstance(value, (int, float)):
-                value = f"{value:,}"  # Add thousand separators
-            lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
-        return lines
-    @staticmethod
-    def progress_bar(current: int, total: int, width: int = 20) -> str:
-        """Create a progress bar"""
-        percentage = (current * 100) // total
-        filled = "█" * (percentage * width // 100)
-        empty = "░" * (width - len(filled))
-        return f"{filled}{empty} {percentage:3d}%"
-    @staticmethod
-    def error(message: str, error: Optional[Exception] = None) -> str:
-        """Format error message"""
-        error_msg = f"\n❌ Error: {message}"
-        if error:
-            error_msg += f"\n   └── Details: {str(error)}"
-        return error_msg
-    @staticmethod
-    def success(message: str) -> str:
-        """Format success message"""
-        return f"✅ {message}"
-    @staticmethod
-    def warning(message: str) -> str:
-        """Format warning message"""
-        return f"⚠️  {message}"
-    @staticmethod
-    def info(message: str) -> str:
-        """Format info message"""
-        return f"ℹ️  {message}"

backend/app/main.py DELETED Viewed

@@ -1,18 +0,0 @@
-from fastapi import FastAPI
-from app.config.logging_config import setup_logging
-import logging
-# Initialize logging configuration
-setup_logging()
-logger = logging.getLogger(__name__)
-app = FastAPI(title="Open LLM Leaderboard API")
-@app.on_event("startup")
-async def startup_event():
-    logger.info("Starting up the application...")
-# Import and include routers after app initialization
-from app.api import models, votes
-app.include_router(models.router, prefix="/api", tags=["models"])
-app.include_router(votes.router, prefix="/api", tags=["votes"])

backend/app/services/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from . import hf_service, leaderboard, votes, models
-__all__ = ["hf_service", "leaderboard", "votes", "models"]

backend/app/services/hf_service.py DELETED Viewed

@@ -1,50 +0,0 @@
-from typing import Optional
-from huggingface_hub import HfApi
-from app.config import HF_TOKEN, API
-from app.core.cache import cache_config
-from app.core.formatting import LogFormatter
-import logging
-logger = logging.getLogger(__name__)
-class HuggingFaceService:
-    def __init__(self):
-        self.api = API
-        self.token = HF_TOKEN
-        self.cache_dir = cache_config.models_cache
-    async def check_authentication(self) -> bool:
-        """Check if the HF token is valid"""
-        if not self.token:
-            return False
-        try:
-            logger.info(LogFormatter.info("Checking HF token validity..."))
-            self.api.get_token_permission()
-            logger.info(LogFormatter.success("HF token is valid"))
-            return True
-        except Exception as e:
-            logger.error(LogFormatter.error("HF token validation failed", e))
-            return False
-    async def get_user_info(self) -> Optional[dict]:
-        """Get information about the authenticated user"""
-        try:
-            logger.info(LogFormatter.info("Fetching user information..."))
-            info = self.api.get_token_permission()
-            logger.info(LogFormatter.success(f"User info retrieved for: {info.get('user', 'Unknown')}"))
-            return info
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to get user info", e))
-            return None
-    def _log_repo_operation(self, operation: str, repo: str, details: str = None):
-        """Helper to log repository operations"""
-        logger.info(LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}"))
-        stats = {
-            "Operation": operation,
-            "Repository": repo,
-        }
-        if details:
-            stats["Details"] = details
-        for line in LogFormatter.tree(stats):
-            logger.info(line)

backend/app/services/leaderboard.py DELETED Viewed

@@ -1,208 +0,0 @@
-from app.core.cache import cache_config
-from datetime import datetime
-from typing import List, Dict, Any
-import datasets
-from fastapi import HTTPException
-import logging
-from app.config.base import HF_ORGANIZATION
-from app.core.formatting import LogFormatter
-logger = logging.getLogger(__name__)
-class LeaderboardService:
-    def __init__(self):
-        pass
-    async def fetch_raw_data(self) -> List[Dict[str, Any]]:
-        """Fetch raw leaderboard data from HuggingFace dataset"""
-        try:
-            logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
-            logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/contents"))
-            dataset = datasets.load_dataset(
-                f"{HF_ORGANIZATION}/contents",
-                cache_dir=cache_config.get_cache_path("datasets")
-            )["train"]
-            df = dataset.to_pandas()
-            data = df.to_dict('records')
-            stats = {
-                "Total_Entries": len(data),
-                "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB"
-            }
-            for line in LogFormatter.stats(stats, "Dataset Statistics"):
-                logger.info(line)
-            return data
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
-            raise HTTPException(status_code=500, detail=str(e))
-    async def get_formatted_data(self) -> List[Dict[str, Any]]:
-        """Get formatted leaderboard data"""
-        try:
-            logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
-            raw_data = await self.fetch_raw_data()
-            formatted_data = []
-            type_counts = {}
-            error_count = 0
-            # Initialize progress tracking
-            total_items = len(raw_data)
-            logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
-            for i, item in enumerate(raw_data, 1):
-                try:
-                    formatted_item = await self.transform_data(item)
-                    formatted_data.append(formatted_item)
-                    # Count model types
-                    model_type = formatted_item["model"]["type"]
-                    type_counts[model_type] = type_counts.get(model_type, 0) + 1
-                except Exception as e:
-                    error_count += 1
-                    logger.error(LogFormatter.error(f"Failed to format entry {i}/{total_items}", e))
-                    continue
-                # Log progress every 10%
-                if i % max(1, total_items // 10) == 0:
-                    progress = (i / total_items) * 100
-                    logger.info(LogFormatter.info(f"Progress: {LogFormatter.progress_bar(i, total_items)}"))
-            # Log final statistics
-            stats = {
-                "Total_Processed": total_items,
-                "Successful": len(formatted_data),
-                "Failed": error_count
-            }
-            logger.info(LogFormatter.section("PROCESSING SUMMARY"))
-            for line in LogFormatter.stats(stats, "Processing Statistics"):
-                logger.info(line)
-            # Log model type distribution
-            type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
-            logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
-            for line in LogFormatter.stats(type_stats):
-                logger.info(line)
-            return formatted_data
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to format leaderboard data", e))
-            raise HTTPException(status_code=500, detail=str(e))
-    async def transform_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform raw data into the format expected by the frontend"""
-        try:
-            # Extract model name for logging
-            model_name = data.get("fullname", "Unknown")
-            logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}"))
-            # Create unique ID combining model name, precision, sha and chat template status
-            unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
-            evaluations = {
-                "ifeval": {
-                    "name": "IFEval",
-                    "value": data.get("IFEval Raw", 0),
-                    "normalized_score": data.get("IFEval", 0)
-                },
-                "bbh": {
-                    "name": "BBH",
-                    "value": data.get("BBH Raw", 0),
-                    "normalized_score": data.get("BBH", 0)
-                },
-                "math": {
-                    "name": "MATH Level 5",
-                    "value": data.get("MATH Lvl 5 Raw", 0),
-                    "normalized_score": data.get("MATH Lvl 5", 0)
-                },
-                "gpqa": {
-                    "name": "GPQA",
-                    "value": data.get("GPQA Raw", 0),
-                    "normalized_score": data.get("GPQA", 0)
-                },
-                "musr": {
-                    "name": "MUSR",
-                    "value": data.get("MUSR Raw", 0),
-                    "normalized_score": data.get("MUSR", 0)
-                },
-                "mmlu_pro": {
-                    "name": "MMLU-PRO",
-                    "value": data.get("MMLU-PRO Raw", 0),
-                    "normalized_score": data.get("MMLU-PRO", 0)
-                }
-            }
-            features = {
-                "is_not_available_on_hub": data.get("Available on the hub", False),
-                "is_merged": data.get("Merged", False),
-                "is_moe": data.get("MoE", False),
-                "is_flagged": data.get("Flagged", False),
-                "is_official_provider": data.get("Official Providers", False)
-            }
-            metadata = {
-                "upload_date": data.get("Upload To Hub Date"),
-                "submission_date": data.get("Submission Date"),
-                "generation": data.get("Generation"),
-                "base_model": data.get("Base Model"),
-                "hub_license": data.get("Hub License"),
-                "hub_hearts": data.get("Hub ❤️"),
-                "params_billions": data.get("#Params (B)"),
-                "co2_cost": data.get("CO₂ cost (kg)", 0)
-            }
-            # Clean model type by removing emojis if present
-            original_type = data.get("Type", "")
-            model_type = original_type.lower().strip()
-            # Remove emojis and parentheses
-            if "(" in model_type:
-                model_type = model_type.split("(")[0].strip()
-            model_type = ''.join(c for c in model_type if not c in '🔶🟢🟩💬🤝🌸 ')
-            # Map old model types to new ones
-            model_type_mapping = {
-                "fine-tuned": "fined-tuned-on-domain-specific-dataset",
-                "fine tuned": "fined-tuned-on-domain-specific-dataset",
-                "finetuned": "fined-tuned-on-domain-specific-dataset",
-                "fine_tuned": "fined-tuned-on-domain-specific-dataset",
-                "ft": "fined-tuned-on-domain-specific-dataset",
-                "finetuning": "fined-tuned-on-domain-specific-dataset",
-                "fine tuning": "fined-tuned-on-domain-specific-dataset",
-                "fine-tuning": "fined-tuned-on-domain-specific-dataset"
-            }
-            mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
-            if mapped_type != model_type:
-                logger.debug(LogFormatter.info(f"Model type mapped: {original_type} -> {mapped_type}"))
-            transformed_data = {
-                "id": unique_id,
-                "model": {
-                    "name": data.get("fullname"),
-                    "sha": data.get("Model sha"),
-                    "precision": data.get("Precision"),
-                    "type": mapped_type,
-                    "weight_type": data.get("Weight type"),
-                    "architecture": data.get("Architecture"),
-                    "average_score": data.get("Average ⬆️"),
-                    "has_chat_template": data.get("Chat Template", False)
-                },
-                "evaluations": evaluations,
-                "features": features,
-                "metadata": metadata
-            }
-            logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}"))
-            return transformed_data
-        except Exception as e:
-            logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
-            raise

backend/app/services/models.py DELETED Viewed

@@ -1,587 +0,0 @@
-from datetime import datetime, timezone
-from typing import Dict, Any, Optional, List
-import json
-import os
-from pathlib import Path
-import logging
-import aiohttp
-import asyncio
-import time
-from huggingface_hub import HfApi, CommitOperationAdd
-from huggingface_hub.utils import build_hf_headers
-from datasets import disable_progress_bar
-import sys
-import contextlib
-from concurrent.futures import ThreadPoolExecutor
-import tempfile
-from app.config import (
-    QUEUE_REPO,
-    HF_TOKEN,
-    EVAL_REQUESTS_PATH
-)
-from app.config.hf_config import HF_ORGANIZATION
-from app.services.hf_service import HuggingFaceService
-from app.utils.model_validation import ModelValidator
-from app.services.votes import VoteService
-from app.core.cache import cache_config
-from app.core.formatting import LogFormatter
-# Disable datasets progress bars globally
-disable_progress_bar()
-logger = logging.getLogger(__name__)
-# Context manager to temporarily disable stdout and stderr
-@contextlib.contextmanager
-def suppress_output():
-    stdout = sys.stdout
-    stderr = sys.stderr
-    devnull = open(os.devnull, 'w')
-    try:
-        sys.stdout = devnull
-        sys.stderr = devnull
-        yield
-    finally:
-        sys.stdout = stdout
-        sys.stderr = stderr
-        devnull.close()
-class ProgressTracker:
-    def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
-        self.total = total
-        self.current = 0
-        self.desc = desc
-        self.start_time = time.time()
-        self.update_frequency = update_frequency  # Percentage steps
-        self.last_update = -1
-        # Initial log with fancy formatting
-        logger.info(LogFormatter.section(desc))
-        logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
-        sys.stdout.flush()
-    def update(self, n: int = 1):
-        self.current += n
-        current_percentage = (self.current * 100) // self.total
-        # Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
-        if current_percentage >= self.last_update + self.update_frequency or current_percentage == 100:
-            elapsed = time.time() - self.start_time
-            rate = self.current / elapsed if elapsed > 0 else 0
-            remaining = (self.total - self.current) / rate if rate > 0 else 0
-            # Create progress stats
-            stats = {
-                "Progress": LogFormatter.progress_bar(self.current, self.total),
-                "Items": f"{self.current:,}/{self.total:,}",
-                "Time": f"⏱️  {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
-                "Rate": f"🚀 {rate:.1f} items/s"
-            }
-            # Log progress using tree format
-            for line in LogFormatter.tree(stats):
-                logger.info(line)
-            sys.stdout.flush()
-            self.last_update = (current_percentage // self.update_frequency) * self.update_frequency
-    def close(self):
-        elapsed = time.time() - self.start_time
-        rate = self.total / elapsed if elapsed > 0 else 0
-        # Final summary with fancy formatting
-        logger.info(LogFormatter.section("COMPLETED"))
-        stats = {
-            "Total": f"{self.total:,} items",
-            "Time": f"{elapsed:.1f}s",
-            "Rate": f"{rate:.1f} items/s"
-        }
-        for line in LogFormatter.stats(stats):
-            logger.info(line)
-        logger.info("="*50)
-        sys.stdout.flush()
-class ModelService(HuggingFaceService):
-    _instance: Optional['ModelService'] = None
-    _initialized = False
-    def __new__(cls):
-        if cls._instance is None:
-            logger.info(LogFormatter.info("Creating new ModelService instance"))
-            cls._instance = super(ModelService, cls).__new__(cls)
-        return cls._instance
-    def __init__(self):
-        if not hasattr(self, '_init_done'):
-            logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
-            super().__init__()
-            self.validator = ModelValidator()
-            self.vote_service = VoteService()
-            self.eval_requests_path = cache_config.eval_requests_file
-            logger.info(LogFormatter.info(f"Using eval requests path: {self.eval_requests_path}"))
-            self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
-            self.hf_api = HfApi(token=HF_TOKEN)
-            self.cached_models = None
-            self.last_cache_update = 0
-            self.cache_ttl = cache_config.cache_ttl.total_seconds()
-            self._init_done = True
-            logger.info(LogFormatter.success("Initialization complete"))
-    async def _download_and_process_file(self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker) -> Optional[Dict]:
-        """Download and process a file asynchronously"""
-        try:
-            # Build file URL
-            url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
-            headers = build_hf_headers(token=self.token)
-            # Download file
-            async with session.get(url, headers=headers) as response:
-                if response.status != 200:
-                    logger.error(LogFormatter.error(f"Failed to download {file}", f"HTTP {response.status}"))
-                    progress.update()
-                    return None
-                try:
-                    # First read content as text
-                    text_content = await response.text()
-                    # Then parse JSON
-                    content = json.loads(text_content)
-                except json.JSONDecodeError as e:
-                    logger.error(LogFormatter.error(f"Failed to decode JSON from {file}", e))
-                    progress.update()
-                    return None
-            # Get status and determine target status
-            status = content.get("status", "PENDING").upper()
-            target_status = None
-            status_map = {
-                "PENDING": ["PENDING"],
-                "EVALUATING": ["RUNNING"],
-                "FINISHED": ["FINISHED"]
-            }
-            for target, source_statuses in status_map.items():
-                if status in source_statuses:
-                    target_status = target
-                    break
-            if not target_status:
-                progress.update()
-                return None
-            # Calculate wait time
-            try:
-                submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00"))
-                if submit_time.tzinfo is None:
-                    submit_time = submit_time.replace(tzinfo=timezone.utc)
-                current_time = datetime.now(timezone.utc)
-                wait_time = current_time - submit_time
-                model_info = {
-                    "name": content["model"],
-                    "submitter": content.get("sender", "Unknown"),
-                    "revision": content["revision"],
-                    "wait_time": f"{wait_time.total_seconds():.1f}s",
-                    "submission_time": content["submitted_time"],
-                    "status": target_status,
-                    "precision": content.get("precision", "Unknown")
-                }
-                progress.update()
-                return model_info
-            except (ValueError, TypeError) as e:
-                logger.error(LogFormatter.error(f"Failed to process {file}", e))
-                progress.update()
-                return None
-        except Exception as e:
-            logger.error(LogFormatter.error(f"Failed to load {file}", e))
-            progress.update()
-            return None
-    async def _refresh_models_cache(self):
-        """Refresh the models cache"""
-        try:
-            logger.info(LogFormatter.section("CACHE REFRESH"))
-            self._log_repo_operation("read", f"{HF_ORGANIZATION}/requests", "Refreshing models cache")
-            # Initialize models dictionary
-            models = {
-                "finished": [],
-                "evaluating": [],
-                "pending": []
-            }
-            try:
-                logger.info(LogFormatter.subsection("DATASET LOADING"))
-                logger.info(LogFormatter.info("Loading dataset files..."))
-                # List files in repository
-                with suppress_output():
-                    files = self.hf_api.list_repo_files(
-                        repo_id=QUEUE_REPO,
-                        repo_type="dataset",
-                        token=self.token
-                    )
-                # Filter JSON files
-                json_files = [f for f in files if f.endswith('.json')]
-                total_files = len(json_files)
-                # Log repository stats
-                stats = {
-                    "Total_Files": len(files),
-                    "JSON_Files": total_files,
-                }
-                for line in LogFormatter.stats(stats, "Repository Statistics"):
-                    logger.info(line)
-                if not json_files:
-                    raise Exception("No JSON files found in repository")
-                # Initialize progress tracker
-                progress = ProgressTracker(total_files, "PROCESSING FILES")
-                try:
-                    # Create aiohttp session to reuse connections
-                    async with aiohttp.ClientSession() as session:
-                        # Process files in chunks
-                        chunk_size = 50
-                        for i in range(0, len(json_files), chunk_size):
-                            chunk = json_files[i:i + chunk_size]
-                            chunk_tasks = [
-                                self._download_and_process_file(file, session, progress)
-                                for file in chunk
-                            ]
-                            results = await asyncio.gather(*chunk_tasks)
-                            # Process results
-                            for result in results:
-                                if result:
-                                    status = result.pop("status")
-                                    models[status.lower()].append(result)
-                finally:
-                    progress.close()
-                # Final summary with fancy formatting
-                logger.info(LogFormatter.section("CACHE SUMMARY"))
-                stats = {
-                    "Finished": len(models["finished"]),
-                    "Evaluating": len(models["evaluating"]),
-                    "Pending": len(models["pending"])
-                }
-                for line in LogFormatter.stats(stats, "Models by Status"):
-                    logger.info(line)
-                logger.info("="*50)
-            except Exception as e:
-                logger.error(LogFormatter.error("Error processing files", e))
-                raise
-            # Update cache
-            self.cached_models = models
-            self.last_cache_update = time.time()
-            logger.info(LogFormatter.success("Cache updated successfully"))
-            return models
-        except Exception as e:
-            logger.error(LogFormatter.error("Cache refresh failed", e))
-            raise
-    async def initialize(self):
-        """Initialize the model service"""
-        if self._initialized:
-            logger.info(LogFormatter.info("Service already initialized, using cached data"))
-            return
-        try:
-            logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
-            # Check if cache already exists
-            cache_path = cache_config.get_cache_path("datasets")
-            if not cache_path.exists() or not any(cache_path.iterdir()):
-                logger.info(LogFormatter.info("No existing cache found, initializing datasets cache..."))
-                cache_config.flush_cache("datasets")
-            else:
-                logger.info(LogFormatter.info("Using existing datasets cache"))
-            # Ensure eval requests directory exists
-            self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
-            logger.info(LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}"))
-            # List existing files
-            if self.eval_requests_path.exists():
-                files = list(self.eval_requests_path.glob("**/*.json"))
-                stats = {
-                    "Total_Files": len(files),
-                    "Directory": str(self.eval_requests_path)
-                }
-                for line in LogFormatter.stats(stats, "Eval Requests"):
-                    logger.info(line)
-            # Load initial cache
-            await self._refresh_models_cache()
-            self._initialized = True
-            logger.info(LogFormatter.success("Model service initialization complete"))
-        except Exception as e:
-            logger.error(LogFormatter.error("Initialization failed", e))
-            raise
-    async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
-        """Get all models with their status"""
-        if not self._initialized:
-            logger.info(LogFormatter.info("Service not initialized, initializing now..."))
-            await self.initialize()
-        current_time = time.time()
-        cache_age = current_time - self.last_cache_update
-        # Check if cache needs refresh
-        if not self.cached_models:
-            logger.info(LogFormatter.info("No cached data available, refreshing cache..."))
-            return await self._refresh_models_cache()
-        elif cache_age > self.cache_ttl:
-            logger.info(LogFormatter.info(f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"))
-            return await self._refresh_models_cache()
-        else:
-            logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
-            return self.cached_models
-    async def submit_model(
-        self,
-        model_data: Dict[str, Any],
-        user_id: str
-    ) -> Dict[str, Any]:
-        logger.info(LogFormatter.section("MODEL SUBMISSION"))
-        self._log_repo_operation("write", f"{HF_ORGANIZATION}/requests", f"Submitting model {model_data['model_id']} by {user_id}")
-        stats = {
-            "Model": model_data["model_id"],
-            "User": user_id,
-            "Revision": model_data["revision"],
-            "Precision": model_data["precision"],
-            "Type": model_data["model_type"]
-        }
-        for line in LogFormatter.tree(stats, "Submission Details"):
-            logger.info(line)
-        # Validate required fields
-        required_fields = [
-            "model_id", "base_model", "revision", "precision",
-            "weight_type", "model_type", "use_chat_template"
-        ]
-        for field in required_fields:
-            if field not in model_data:
-                raise ValueError(f"Missing required field: {field}")
-        # Get model info and validate it exists on HuggingFace
-        try:
-            logger.info(LogFormatter.subsection("MODEL VALIDATION"))
-            # Get the model info to check if it exists
-            model_info = self.hf_api.model_info(
-                model_data["model_id"],
-                revision=model_data["revision"],
-                token=self.token
-            )
-            if not model_info:
-                raise Exception(f"Model {model_data['model_id']} not found on HuggingFace Hub")
-            logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
-        except Exception as e:
-            logger.error(LogFormatter.error("Model validation failed", e))
-            raise
-        # Update model revision with commit sha
-        model_data["revision"] = model_info.sha
-        # Check if model already exists in the system
-        try:
-            logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
-            existing_models = await self.get_models()
-            # Call the official provider status check
-            is_valid, error_message = await self.validator.check_official_provider_status(
-                model_data["model_id"],
-                existing_models
-            )
-            if not is_valid:
-                raise ValueError(error_message)
-            # Check in all statuses (pending, evaluating, finished)
-            for status, models in existing_models.items():
-                for model in models:
-                    if model["name"] == model_data["model_id"] and model["revision"] == model_data["revision"]:
-                        error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
-                        logger.error(LogFormatter.error("Submission rejected", error_msg))
-                        raise ValueError(error_msg)
-            logger.info(LogFormatter.success("No existing submission found"))
-        except ValueError:
-            raise
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to check existing submissions", e))
-            raise
-        # Check that model on hub and valid
-        valid, error, model_config = await self.validator.is_model_on_hub(
-            model_data["model_id"],
-            model_data["revision"],
-            test_tokenizer=True
-        )
-        if not valid:
-            logger.error(LogFormatter.error("Model on hub validation failed", error))
-            raise Exception(error)
-        logger.info(LogFormatter.success("Model on hub validation passed"))
-        # Validate model card
-        valid, error, model_card = await self.validator.check_model_card(
-            model_data["model_id"]
-        )
-        if not valid:
-            logger.error(LogFormatter.error("Model card validation failed", error))
-            raise Exception(error)
-        logger.info(LogFormatter.success("Model card validation passed"))
-        # Check size limits
-        model_size, error = await self.validator.get_model_size(
-            model_info,
-            model_data["precision"],
-            model_data["base_model"],
-            revision=model_data["revision"]
-        )
-        if model_size is None:
-            logger.error(LogFormatter.error("Model size validation failed", error))
-            raise Exception(error)
-        logger.info(LogFormatter.success(f"Model size validation passed: {model_size:.1f}B"))
-        # Size limits based on precision
-        if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
-            error_msg = f"Model too large for {model_data['precision']} (limit: 100B)"
-            logger.error(LogFormatter.error("Size limit exceeded", error_msg))
-            raise Exception(error_msg)
-        # Chat template validation if requested
-        if model_data["use_chat_template"]:
-            valid, error = await self.validator.check_chat_template(
-                model_data["model_id"],
-                model_data["revision"]
-            )
-            if not valid:
-                logger.error(LogFormatter.error("Chat template validation failed", error))
-                raise Exception(error)
-            logger.info(LogFormatter.success("Chat template validation passed"))
-        architectures = model_info.config.get("architectures", "")
-        if architectures:
-            architectures = ";".join(architectures)
-        # Create eval entry
-        eval_entry = {
-            "model": model_data["model_id"],
-            "base_model": model_data["base_model"],
-            "revision": model_info.sha,
-            "precision": model_data["precision"],
-            "params": model_size,
-            "architectures": architectures,
-            "weight_type": model_data["weight_type"],
-            "status": "PENDING",
-            "submitted_time": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
-            "model_type": model_data["model_type"],
-            "job_id": -1,
-            "job_start_time": None,
-            "use_chat_template": model_data["use_chat_template"],
-            "sender": user_id
-        }
-        logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
-        for line in LogFormatter.tree(eval_entry):
-            logger.info(line)
-        # Upload to HF dataset
-        try:
-            logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
-            logger.info(LogFormatter.info(f"Uploading to {HF_ORGANIZATION}/requests..."))
-            # Construct the path in the dataset
-            org_or_user = model_data["model_id"].split("/")[0] if "/" in model_data["model_id"] else ""
-            model_path = model_data["model_id"].split("/")[-1]
-            relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
-            # Create a temporary file with the request
-            with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
-                json.dump(eval_entry, temp_file, indent=2)
-                temp_file.flush()
-                temp_path = temp_file.name
-            # Upload file directly
-            self.hf_api.upload_file(
-                path_or_fileobj=temp_path,
-                path_in_repo=relative_path,
-                repo_id=f"{HF_ORGANIZATION}/requests",
-                repo_type="dataset",
-                commit_message=f"Add {model_data['model_id']} to eval queue",
-                token=self.token
-            )
-            # Clean up temp file
-            os.unlink(temp_path)
-            logger.info(LogFormatter.success("Upload successful"))
-        except Exception as e:
-            logger.error(LogFormatter.error("Upload failed", e))
-            raise
-        # Add automatic vote
-        try:
-            logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
-            logger.info(LogFormatter.info(f"Adding upvote for {model_data['model_id']} by {user_id}"))
-            await self.vote_service.add_vote(
-                model_data["model_id"],
-                user_id,
-                "up"
-            )
-            logger.info(LogFormatter.success("Vote recorded successfully"))
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to record vote", e))
-            # Don't raise here as the main submission was successful
-        return {
-            "status": "success",
-            "message": "The model was submitted successfully, and the vote has been recorded"
-        }
-    async def get_model_status(self, model_id: str) -> Dict[str, Any]:
-        """Get evaluation status of a model"""
-        logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
-        eval_path = self.eval_requests_path
-        for user_folder in eval_path.iterdir():
-            if user_folder.is_dir():
-                for file in user_folder.glob("*.json"):
-                    with open(file, "r") as f:
-                        data = json.load(f)
-                        if data["model"] == model_id:
-                            status = {
-                                "status": data["status"],
-                                "submitted_time": data["submitted_time"],
-                                "job_id": data.get("job_id", -1)
-                            }
-                            logger.info(LogFormatter.success("Status found"))
-                            for line in LogFormatter.tree(status, "Model Status"):
-                                logger.info(line)
-                            return status
-        logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
-        return {"status": "not_found"}

backend/app/services/rate_limiter.py DELETED Viewed

@@ -1,72 +0,0 @@
-"""
-import logging
-from datetime import datetime, timedelta, timezone
-from typing import Tuple, Dict, List
-logger = logging.getLogger(__name__)
-class RateLimiter:
-    def __init__(self, period_days: int = 7, quota: int = 5):
-        self.period_days = period_days
-        self.quota = quota
-        self.submission_history: Dict[str, List[datetime]] = {}
-        self.higher_quota_users = set()  # Users with higher quotas
-        self.unlimited_users = set()  # Users with no quota limits
-    def add_unlimited_user(self, user_id: str):
-        """Add a user to the unlimited users list"""
-        self.unlimited_users.add(user_id)
-    def add_higher_quota_user(self, user_id: str):
-        """Add a user to the higher quota users list"""
-        self.higher_quota_users.add(user_id)
-    def record_submission(self, user_id: str):
-        """Record a new submission for a user"""
-        current_time = datetime.now(timezone.utc)
-        if user_id not in self.submission_history:
-            self.submission_history[user_id] = []
-        self.submission_history[user_id].append(current_time)
-    def clean_old_submissions(self, user_id: str):
-        """Remove submissions older than the period"""
-        if user_id not in self.submission_history:
-            return
-        current_time = datetime.now(timezone.utc)
-        cutoff_time = current_time - timedelta(days=self.period_days)
-        self.submission_history[user_id] = [
-            time for time in self.submission_history[user_id]
-            if time > cutoff_time
-        ]
-    async def check_rate_limit(self, user_id: str) -> Tuple[bool, str]:
-        """Check if a user has exceeded their rate limit
-        Returns:
-            Tuple[bool, str]: (is_allowed, error_message)
-        """
-        # Unlimited users bypass all checks
-        if user_id in self.unlimited_users:
-            return True, ""
-        # Clean old submissions
-        self.clean_old_submissions(user_id)
-        # Get current submission count
-        submission_count = len(self.submission_history.get(user_id, []))
-        # Calculate user's quota
-        user_quota = self.quota * 2 if user_id in self.higher_quota_users else self.quota
-        # Check if user has exceeded their quota
-        if submission_count >= user_quota:
-            error_msg = (
-                f"User '{user_id}' has reached the limit of {user_quota} submissions "
-                f"in the last {self.period_days} days. Please wait before submitting again."
-            )
-            return False, error_msg
-        return True, ""
-"""

backend/app/services/votes.py DELETED Viewed

@@ -1,390 +0,0 @@
-from datetime import datetime, timezone
-from typing import Dict, Any, List, Set, Tuple, Optional
-import json
-import logging
-import asyncio
-from pathlib import Path
-import aiohttp
-from huggingface_hub import HfApi
-import datasets
-from app.services.hf_service import HuggingFaceService
-from app.config import HF_TOKEN
-from app.config.hf_config import HF_ORGANIZATION
-from app.core.cache import cache_config
-from app.core.formatting import LogFormatter
-logger = logging.getLogger(__name__)
-class VoteService(HuggingFaceService):
-    _instance: Optional['VoteService'] = None
-    _initialized = False
-    def __new__(cls):
-        if cls._instance is None:
-            cls._instance = super(VoteService, cls).__new__(cls)
-        return cls._instance
-    def __init__(self):
-        if not hasattr(self, '_init_done'):
-            super().__init__()
-            self.votes_file = cache_config.votes_file
-            self.votes_to_upload: List[Dict[str, Any]] = []
-            self.vote_check_set: Set[Tuple[str, str, str]] = set()
-            self._votes_by_model: Dict[str, List[Dict[str, Any]]] = {}
-            self._votes_by_user: Dict[str, List[Dict[str, Any]]] = {}
-            self._upload_lock = asyncio.Lock()
-            self._last_sync = None
-            self._sync_interval = 300  # 5 minutes
-            self._total_votes = 0
-            self._last_vote_timestamp = None
-            self._max_retries = 3
-            self._retry_delay = 1  # seconds
-            self._upload_batch_size = 10
-            self.hf_api = HfApi(token=HF_TOKEN)
-            self._init_done = True
-    async def initialize(self):
-        """Initialize the vote service"""
-        if self._initialized:
-            await self._check_for_new_votes()
-            return
-        try:
-            logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
-            # Ensure votes directory exists
-            self.votes_file.parent.mkdir(parents=True, exist_ok=True)
-            # Load existing votes if file exists
-            local_vote_count = 0
-            if self.votes_file.exists():
-                logger.info(LogFormatter.info(f"Loading votes from {self.votes_file}"))
-                local_vote_count = await self._count_local_votes()
-                logger.info(LogFormatter.info(f"Found {local_vote_count:,} local votes"))
-            # Check remote votes count
-            remote_vote_count = await self._count_remote_votes()
-            logger.info(LogFormatter.info(f"Found {remote_vote_count:,} remote votes"))
-            if remote_vote_count > local_vote_count:
-                logger.info(LogFormatter.info(f"Fetching {remote_vote_count - local_vote_count:,} new votes"))
-                await self._sync_with_hub()
-            elif remote_vote_count < local_vote_count:
-                logger.warning(LogFormatter.warning(f"Local votes ({local_vote_count:,}) > Remote votes ({remote_vote_count:,})"))
-                await self._load_existing_votes()
-            else:
-                logger.info(LogFormatter.success("Local and remote votes are in sync"))
-                if local_vote_count > 0:
-                    await self._load_existing_votes()
-                else:
-                    logger.info(LogFormatter.info("No votes found"))
-            self._initialized = True
-            self._last_sync = datetime.now(timezone.utc)
-            # Final summary
-            stats = {
-                "Total_Votes": self._total_votes,
-                "Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC")
-            }
-            logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
-            for line in LogFormatter.stats(stats):
-                logger.info(line)
-        except Exception as e:
-            logger.error(LogFormatter.error("Initialization failed", e))
-            raise
-    async def _count_local_votes(self) -> int:
-        """Count votes in local file"""
-        if not self.votes_file.exists():
-            return 0
-        count = 0
-        try:
-            with open(self.votes_file, 'r') as f:
-                for _ in f:
-                    count += 1
-            return count
-        except Exception as e:
-            logger.error(f"Error counting local votes: {str(e)}")
-            return 0
-    async def _count_remote_votes(self) -> int:
-        """Count votes in remote file"""
-        url = f"https://huggingface.co/datasets/{HF_ORGANIZATION}/votes/raw/main/votes_data.jsonl"
-        headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(url, headers=headers) as response:
-                    if response.status == 200:
-                        count = 0
-                        async for line in response.content:
-                            if line.strip():  # Skip empty lines
-                                count += 1
-                        return count
-                    else:
-                        logger.error(f"Failed to get remote votes: HTTP {response.status}")
-                        return 0
-        except Exception as e:
-            logger.error(f"Error counting remote votes: {str(e)}")
-            return 0
-    async def _sync_with_hub(self):
-        """Sync votes with HuggingFace hub using datasets"""
-        try:
-            logger.info(LogFormatter.section("VOTE SYNC"))
-            self._log_repo_operation("sync", f"{HF_ORGANIZATION}/votes", "Syncing local votes with HF hub")
-            logger.info(LogFormatter.info("Syncing with HuggingFace hub..."))
-            # Load votes from HF dataset
-            dataset = datasets.load_dataset(
-                f"{HF_ORGANIZATION}/votes",
-                split="train",
-                cache_dir=cache_config.get_cache_path("datasets")
-            )
-            remote_votes = len(dataset)
-            logger.info(LogFormatter.info(f"Dataset loaded with {remote_votes:,} votes"))
-            # Convert to list of dictionaries
-            df = dataset.to_pandas()
-            if 'timestamp' in df.columns:
-                df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%SZ')
-            remote_votes = df.to_dict('records')
-            # If we have more remote votes than local
-            if len(remote_votes) > self._total_votes:
-                new_votes = len(remote_votes) - self._total_votes
-                logger.info(LogFormatter.info(f"Processing {new_votes:,} new votes..."))
-                # Save votes to local file
-                with open(self.votes_file, 'w') as f:
-                    for vote in remote_votes:
-                        f.write(json.dumps(vote) + '\n')
-                # Reload votes in memory
-                await self._load_existing_votes()
-                logger.info(LogFormatter.success("Sync completed successfully"))
-            else:
-                logger.info(LogFormatter.success("Local votes are up to date"))
-            self._last_sync = datetime.now(timezone.utc)
-        except Exception as e:
-            logger.error(LogFormatter.error("Sync failed", e))
-            raise
-    async def _check_for_new_votes(self):
-        """Check for new votes on the hub"""
-        try:
-            self._log_repo_operation("check", f"{HF_ORGANIZATION}/votes", "Checking for new votes")
-            # Load only dataset metadata
-            dataset_info = datasets.load_dataset(f"{HF_ORGANIZATION}/votes", split="train")
-            remote_vote_count = len(dataset_info)
-            if remote_vote_count > self._total_votes:
-                logger.info(f"Found {remote_vote_count - self._total_votes} new votes on hub")
-                await self._sync_with_hub()
-            else:
-                logger.info("No new votes found on hub")
-        except Exception as e:
-            logger.error(f"Error checking for new votes: {str(e)}")
-    async def _load_existing_votes(self):
-        """Load existing votes from file"""
-        if not self.votes_file.exists():
-            logger.warning(LogFormatter.warning("No votes file found"))
-            return
-        try:
-            logger.info(LogFormatter.section("LOADING VOTES"))
-            # Clear existing data structures
-            self.vote_check_set.clear()
-            self._votes_by_model.clear()
-            self._votes_by_user.clear()
-            vote_count = 0
-            latest_timestamp = None
-            with open(self.votes_file, "r") as f:
-                for line in f:
-                    try:
-                        vote = json.loads(line.strip())
-                        vote_count += 1
-                        # Track latest timestamp
-                        try:
-                            vote_timestamp = datetime.fromisoformat(vote["timestamp"].replace("Z", "+00:00"))
-                            if not latest_timestamp or vote_timestamp > latest_timestamp:
-                                latest_timestamp = vote_timestamp
-                            vote["timestamp"] = vote_timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
-                        except (KeyError, ValueError) as e:
-                            logger.warning(LogFormatter.warning(f"Invalid timestamp in vote: {str(e)}"))
-                            continue
-                        if vote_count % 1000 == 0:
-                            logger.info(LogFormatter.info(f"Processed {vote_count:,} votes..."))
-                        self._add_vote_to_memory(vote)
-                    except json.JSONDecodeError as e:
-                        logger.error(LogFormatter.error("Vote parsing failed", e))
-                        continue
-                    except Exception as e:
-                        logger.error(LogFormatter.error("Vote processing failed", e))
-                        continue
-            self._total_votes = vote_count
-            self._last_vote_timestamp = latest_timestamp
-            # Final summary
-            stats = {
-                "Total_Votes": vote_count,
-                "Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") if latest_timestamp else "None",
-                "Unique_Models": len(self._votes_by_model),
-                "Unique_Users": len(self._votes_by_user)
-            }
-            logger.info(LogFormatter.section("VOTE SUMMARY"))
-            for line in LogFormatter.stats(stats):
-                logger.info(line)
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to load votes", e))
-            raise
-    def _add_vote_to_memory(self, vote: Dict[str, Any]):
-        """Add vote to memory structures"""
-        try:
-            check_tuple = (vote["model"], vote["revision"], vote["username"])
-            # Skip if we already have this vote
-            if check_tuple in self.vote_check_set:
-                return
-            self.vote_check_set.add(check_tuple)
-            # Update model votes
-            if vote["model"] not in self._votes_by_model:
-                self._votes_by_model[vote["model"]] = []
-            self._votes_by_model[vote["model"]].append(vote)
-            # Update user votes
-            if vote["username"] not in self._votes_by_user:
-                self._votes_by_user[vote["username"]] = []
-            self._votes_by_user[vote["username"]].append(vote)
-        except KeyError as e:
-            logger.error(f"Malformed vote data, missing key: {str(e)}")
-        except Exception as e:
-            logger.error(f"Error adding vote to memory: {str(e)}")
-    async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
-        """Get all votes from a specific user"""
-        logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
-        votes = self._votes_by_user.get(user_id, [])
-        logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
-        return votes
-    async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
-        """Get all votes for a specific model"""
-        logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
-        votes = self._votes_by_model.get(model_id, [])
-        # Group votes by revision
-        votes_by_revision = {}
-        for vote in votes:
-            revision = vote["revision"]
-            if revision not in votes_by_revision:
-                votes_by_revision[revision] = 0
-            votes_by_revision[revision] += 1
-        stats = {
-            "Total_Votes": len(votes),
-            **{f"Revision_{k}": v for k, v in votes_by_revision.items()}
-        }
-        logger.info(LogFormatter.section("VOTE STATISTICS"))
-        for line in LogFormatter.stats(stats):
-            logger.info(line)
-        return {
-            "total_votes": len(votes),
-            "votes_by_revision": votes_by_revision,
-            "votes": votes
-        }
-    async def _get_model_revision(self, model_id: str) -> str:
-        """Get current revision of a model with retries"""
-        logger.info(f"Getting revision for model: {model_id}")
-        for attempt in range(self._max_retries):
-            try:
-                model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
-                logger.info(f"Successfully got revision {model_info.sha} for model {model_id}")
-                return model_info.sha
-            except Exception as e:
-                logger.error(f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}")
-                if attempt < self._max_retries - 1:
-                    retry_delay = self._retry_delay * (attempt + 1)
-                    logger.info(f"Retrying in {retry_delay} seconds...")
-                    await asyncio.sleep(retry_delay)
-                else:
-                    logger.warning(f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts")
-                    return "main"
-    async def add_vote(self, model_id: str, user_id: str, vote_type: str) -> Dict[str, Any]:
-        """Add a vote for a model"""
-        try:
-            self._log_repo_operation("add", f"{HF_ORGANIZATION}/votes", f"Adding {vote_type} vote for {model_id} by {user_id}")
-            logger.info(LogFormatter.section("NEW VOTE"))
-            stats = {
-                "Model": model_id,
-                "User": user_id,
-                "Type": vote_type
-            }
-            for line in LogFormatter.tree(stats, "Vote Details"):
-                logger.info(line)
-            revision = await self._get_model_revision(model_id)
-            check_tuple = (model_id, revision, user_id)
-            if check_tuple in self.vote_check_set:
-                raise ValueError("Vote already recorded for this model")
-            vote = {
-                "model": model_id,
-                "revision": revision,
-                "username": user_id,
-                "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
-                "vote_type": vote_type
-            }
-            # Update local storage
-            with open(self.votes_file, "a") as f:
-                f.write(json.dumps(vote) + "\n")
-            self._add_vote_to_memory(vote)
-            self.votes_to_upload.append(vote)
-            stats = {
-                "Status": "Success",
-                "Queue_Size": len(self.votes_to_upload)
-            }
-            for line in LogFormatter.stats(stats):
-                logger.info(line)
-            # Try to upload if batch size reached
-            if len(self.votes_to_upload) >= self._upload_batch_size:
-                logger.info(LogFormatter.info(f"Upload batch size reached ({self._upload_batch_size}), triggering sync"))
-                await self._sync_with_hub()
-            return {"status": "success", "message": "Vote added successfully"}
-        except Exception as e:
-            logger.error(LogFormatter.error("Failed to add vote", e))
-            raise

backend/app/utils/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from . import model_validation
-__all__ = ["model_validation"]

backend/app/utils/logging.py DELETED Viewed

@@ -1,3 +0,0 @@
-from app.core.formatting import LogFormatter
-__all__ = ['LogFormatter']

backend/app/utils/model_validation.py DELETED Viewed

@@ -1,266 +0,0 @@
-import json
-import logging
-import asyncio
-from typing import Tuple, Optional, Dict, Any
-from datasets import load_dataset
-from huggingface_hub import HfApi, ModelCard, hf_hub_download
-from huggingface_hub import hf_api
-from transformers import AutoConfig, AutoTokenizer
-from app.config.base import HF_TOKEN
-from app.config.hf_config import OFFICIAL_PROVIDERS_REPO
-from app.core.formatting import LogFormatter
-logger = logging.getLogger(__name__)
-class ModelValidator:
-    def __init__(self):
-        self.token = HF_TOKEN
-        self.api = HfApi(token=self.token)
-        self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
-    async def check_model_card(self, model_id: str) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
-        """Check if model has a valid model card"""
-        try:
-            logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
-            # Get model card content using ModelCard.load
-            try:
-                model_card = await asyncio.to_thread(
-                    ModelCard.load,
-                    model_id
-                )
-                logger.info(LogFormatter.success("Model card found"))
-            except Exception as e:
-                error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
-                logger.error(LogFormatter.error(error_msg, e))
-                return False, error_msg, None
-            # Check license in model card data
-            if model_card.data.license is None and not ("license_name" in model_card.data and "license_link" in model_card.data):
-                error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
-                logger.warning(LogFormatter.warning(error_msg))
-                return False, error_msg, None
-            # Enforce card content length
-            if len(model_card.text) < 200:
-                error_msg = "Please add a description to your model card, it is too short."
-                logger.warning(LogFormatter.warning(error_msg))
-                return False, error_msg, None
-            logger.info(LogFormatter.success("Model card validation passed"))
-            return True, "", model_card
-        except Exception as e:
-            error_msg = "Failed to validate model card"
-            logger.error(LogFormatter.error(error_msg, e))
-            return False, str(e), None
-    async def get_safetensors_metadata(self, model_id: str, is_adapter: bool = False, revision: str = "main")  -> Optional[Dict]:
-        """Get metadata from a safetensors file"""
-        try:
-            if is_adapter:
-                metadata = await asyncio.to_thread(
-                    hf_api.parse_safetensors_file_metadata,
-                    model_id,
-                    "adapter_model.safetensors",
-                    token=self.token,
-                    revision=revision,
-                )
-            else:
-                metadata = await asyncio.to_thread(
-                    hf_api.get_safetensors_metadata,
-                    repo_id=model_id,
-                    token=self.token,
-                    revision=revision,
-                )
-            return metadata
-        except Exception as e:
-            logger.error(f"Failed to get safetensors metadata: {str(e)}")
-            return None
-    async def get_model_size(
-        self,
-        model_info: Any,
-        precision: str,
-        base_model: str,
-        revision: str
-    ) -> Tuple[Optional[float], Optional[str]]:
-        """Get model size in billions of parameters"""
-        try:
-            logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}"))
-            # Check if model is adapter
-            is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename'))
-            # Try to get size from safetensors first
-            model_size = None
-            if is_adapter and base_model:
-                # For adapters, we need both adapter and base model sizes
-                adapter_meta = await self.get_safetensors_metadata(model_info.id, is_adapter=True, revision=revision)
-                base_meta = await self.get_safetensors_metadata(base_model, revision="main")
-                if adapter_meta and base_meta:
-                    adapter_size = sum(adapter_meta.parameter_count.values())
-                    base_size = sum(base_meta.parameter_count.values())
-                    model_size = adapter_size + base_size
-            else:
-                # For regular models, just get the model size
-                meta = await self.get_safetensors_metadata(model_info.id, revision=revision)
-                if meta:
-                    model_size = sum(meta.parameter_count.values()) # total params
-            if model_size is None:
-                # If model size could not be determined, return an error
-                return None, "Model size could not be determined"
-            # Adjust size for GPTQ models
-            size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
-            model_size = model_size / 1e9  # Convert to billions, assuming float16
-            model_size = round(size_factor * model_size, 3)
-            logger.info(LogFormatter.success(f"Model size: {model_size}B parameters"))
-            return model_size, None
-        except Exception as e:
-            logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
-            return None, str(e)
-    async def check_chat_template(
-        self,
-        model_id: str,
-        revision: str
-    ) -> Tuple[bool, Optional[str]]:
-        """Check if model has a valid chat template"""
-        try:
-            logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
-            try:
-                config_file = await asyncio.to_thread(
-                    hf_hub_download,
-                    repo_id=model_id,
-                    filename="tokenizer_config.json",
-                    revision=revision,
-                    repo_type="model"
-                )
-                with open(config_file, 'r') as f:
-                    tokenizer_config = json.load(f)
-                if 'chat_template' not in tokenizer_config:
-                    error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
-                    logger.error(LogFormatter.error(error_msg))
-                    return False, error_msg
-                logger.info(LogFormatter.success("Valid chat template found"))
-                return True, None
-            except Exception as e:
-                error_msg = f"Error checking chat_template: {str(e)}"
-                logger.error(LogFormatter.error(error_msg))
-                return False, error_msg
-        except Exception as e:
-            error_msg = "Failed to check chat template"
-            logger.error(LogFormatter.error(error_msg, e))
-            return False, str(e)
-    async def is_model_on_hub(
-        self,
-        model_name: str,
-        revision: str,
-        test_tokenizer: bool = False,
-        trust_remote_code: bool = False
-    ) -> Tuple[bool, Optional[str], Optional[Any]]:
-        """Check if model exists and is properly configured on the Hub"""
-        try:
-            config = await asyncio.to_thread(
-                AutoConfig.from_pretrained,
-                model_name,
-                revision=revision,
-                trust_remote_code=trust_remote_code,
-                token=self.token,
-                force_download=True
-            )
-            if test_tokenizer:
-                try:
-                    await asyncio.to_thread(
-                        AutoTokenizer.from_pretrained,
-                        model_name,
-                        revision=revision,
-                        trust_remote_code=trust_remote_code,
-                        token=self.token
-                    )
-                except ValueError as e:
-                    return False, f"The tokenizer is not available in an official Transformers release: {e}", None
-                except Exception:
-                    return False, "The tokenizer cannot be loaded. Ensure the tokenizer class is part of a stable Transformers release and correctly configured.", None
-            return True, None, config
-        except ValueError:
-            return False, "The model requires `trust_remote_code=True` to launch, and for safety reasons, we don't accept such models automatically.", None
-        except Exception as e:
-            if "You are trying to access a gated repo." in str(e):
-                return True, "The model is gated and requires special access permissions.", None
-            return False, f"The model was not found or is misconfigured on the Hub. Error: {e.args[0]}", None
-    async def check_official_provider_status(
-        self,
-        model_id: str,
-        existing_models: Dict[str, list]
-    ) -> Tuple[bool, Optional[str]]:
-        """
-        Check if model is from official provider and has finished submission.
-        Args:
-            model_id: The model identifier (org/model-name)
-            existing_models: Dictionary of models by status from get_models()
-        Returns:
-            Tuple[bool, Optional[str]]: (is_valid, error_message)
-        """
-        try:
-            logger.info(LogFormatter.info(f"Checking official provider status for {model_id}"))
-            # Get model organization
-            model_org = model_id.split('/')[0] if '/' in model_id else None
-            if not model_org:
-                return True, None
-            # Load official providers dataset
-            dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
-            official_providers = dataset["train"][0]["CURATED_SET"]
-            # Check if model org is in official providers
-            is_official = model_org in official_providers
-            if is_official:
-                logger.info(LogFormatter.info(f"Model organization '{model_org}' is an official provider"))
-                # Check for finished submissions
-                if "finished" in existing_models:
-                    for model in existing_models["finished"]:
-                        if model["name"] == model_id:
-                            error_msg = (
-                                f"Model {model_id} is an official provider model "
-                                f"with a completed evaluation. "
-                                f"To re-evaluate, please open a discussion."
-                            )
-                            logger.error(LogFormatter.error("Validation failed", error_msg))
-                            return False, error_msg
-                logger.info(LogFormatter.success("No finished submission found for this official provider model"))
-            else:
-                logger.info(LogFormatter.info(f"Model organization '{model_org}' is not an official provider"))
-            return True, None
-        except Exception as e:
-            error_msg = f"Failed to check official provider status: {str(e)}"
-            logger.error(LogFormatter.error(error_msg))
-            return False, error_msg

backend/pyproject.toml DELETED Viewed

@@ -1,31 +0,0 @@
-[tool.poetry]
-name = "llm-leaderboard-backend"
-version = "0.1.0"
-description = "Backend for the Open LLM Leaderboard"
-authors = ["Your Name <your.email@example.com>"]
-[tool.poetry.dependencies]
-python = "^3.12"
-fastapi = "^0.115.6"
-uvicorn = {extras = ["standard"], version = "^0.34.0"}
-numpy = "^2.2.0"
-pandas = "^2.2.3"
-datasets = "^3.2.0"
-pyarrow = "^18.1.0"
-python-multipart = "^0.0.20"
-huggingface-hub = "^0.27.1"
-transformers = "4.48.0"
-safetensors = "^0.4.5"
-aiofiles = "^24.1.0"
-fastapi-cache2 = "^0.2.1"
-python-dotenv = "^1.0.1"
-[tool.poetry.group.dev.dependencies]
-pytest = "^8.3.4"
-black = "^24.10.0"
-isort = "^5.13.2"
-flake8 = "^6.1.0"
-[build-system]
-requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"

backend/utils/analyze_prod_datasets.py DELETED Viewed

@@ -1,170 +0,0 @@
-import os
-import json
-import logging
-from datetime import datetime
-from pathlib import Path
-from typing import Dict, Any, List
-from huggingface_hub import HfApi
-from dotenv import load_dotenv
-from app.config.hf_config import HF_ORGANIZATION
-# Get the backend directory path
-BACKEND_DIR = Path(__file__).parent.parent
-ROOT_DIR = BACKEND_DIR.parent
-# Load environment variables from .env file in root directory
-load_dotenv(ROOT_DIR / ".env")
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s'
-)
-logger = logging.getLogger(__name__)
-# Initialize Hugging Face API
-HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    raise ValueError("HF_TOKEN not found in environment variables")
-api = HfApi(token=HF_TOKEN)
-def analyze_dataset(repo_id: str) -> Dict[str, Any]:
-    """Analyze a dataset and return statistics"""
-    try:
-        # Get dataset info
-        dataset_info = api.dataset_info(repo_id=repo_id)
-        # Get file list
-        files = api.list_repo_files(repo_id, repo_type="dataset")
-        # Get last commit info
-        commits = api.list_repo_commits(repo_id, repo_type="dataset")
-        last_commit = next(commits, None)
-        # Count lines in jsonl files
-        total_entries = 0
-        for file in files:
-            if file.endswith('.jsonl'):
-                try:
-                    # Download file content
-                    content = api.hf_hub_download(
-                        repo_id=repo_id,
-                        filename=file,
-                        repo_type="dataset"
-                    )
-                    # Count lines
-                    with open(content, 'r') as f:
-                        for _ in f:
-                            total_entries += 1
-                except Exception as e:
-                    logger.error(f"Error processing file {file}: {str(e)}")
-                    continue
-        # Special handling for requests dataset
-        if repo_id == f"{HF_ORGANIZATION}/requests":
-            pending_count = 0
-            completed_count = 0
-            try:
-                content = api.hf_hub_download(
-                    repo_id=repo_id,
-                    filename="eval_requests.jsonl",
-                    repo_type="dataset"
-                )
-                with open(content, 'r') as f:
-                    for line in f:
-                        try:
-                            entry = json.loads(line)
-                            if entry.get("status") == "pending":
-                                pending_count += 1
-                            elif entry.get("status") == "completed":
-                                completed_count += 1
-                        except json.JSONDecodeError:
-                            continue
-            except Exception as e:
-                logger.error(f"Error analyzing requests: {str(e)}")
-        # Build response
-        response = {
-            "id": repo_id,
-            "last_modified": last_commit.created_at if last_commit else None,
-            "total_entries": total_entries,
-            "file_count": len(files),
-            "size_bytes": dataset_info.size_in_bytes,
-            "downloads": dataset_info.downloads,
-        }
-        # Add request-specific info if applicable
-        if repo_id == f"{HF_ORGANIZATION}/requests":
-            response.update({
-                "pending_requests": pending_count,
-                "completed_requests": completed_count
-            })
-        return response
-    except Exception as e:
-        logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
-        return {
-            "id": repo_id,
-            "error": str(e)
-        }
-def main():
-    """Main function to analyze all datasets"""
-    try:
-        # List of datasets to analyze
-        datasets = [
-            {
-                "id": f"{HF_ORGANIZATION}/contents",
-                "description": "Aggregated results"
-            },
-            {
-                "id": f"{HF_ORGANIZATION}/requests",
-                "description": "Evaluation requests"
-            },
-            {
-                "id": f"{HF_ORGANIZATION}/votes",
-                "description": "User votes"
-            },
-            {
-                "id": f"{HF_ORGANIZATION}/official-providers",
-                "description": "Highlighted models"
-            }
-        ]
-        # Analyze each dataset
-        results = []
-        for dataset in datasets:
-            logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
-            result = analyze_dataset(dataset['id'])
-            results.append(result)
-            if 'error' in result:
-                logger.error(f"❌ Error: {result['error']}")
-            else:
-                logger.info(f"✓ {result['total_entries']} entries")
-                logger.info(f"✓ {result['file_count']} files")
-                logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
-                logger.info(f"✓ {result['downloads']} downloads")
-                if 'pending_requests' in result:
-                    logger.info(f"✓ {result['pending_requests']} pending requests")
-                    logger.info(f"✓ {result['completed_requests']} completed requests")
-                if result['last_modified']:
-                    last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
-                    logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
-        return results
-    except Exception as e:
-        logger.error(f"Global error: {str(e)}")
-        return []
-if __name__ == "__main__":
-    main()

backend/utils/analyze_prod_models.py DELETED Viewed

@@ -1,106 +0,0 @@
-import os
-import json
-import logging
-from datetime import datetime
-from pathlib import Path
-from huggingface_hub import HfApi
-from dotenv import load_dotenv
-from app.config.hf_config import HF_ORGANIZATION
-# Get the backend directory path
-BACKEND_DIR = Path(__file__).parent.parent
-ROOT_DIR = BACKEND_DIR.parent
-# Load environment variables from .env file in root directory
-load_dotenv(ROOT_DIR / ".env")
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s'
-)
-logger = logging.getLogger(__name__)
-# Initialize Hugging Face API
-HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    raise ValueError("HF_TOKEN not found in environment variables")
-api = HfApi(token=HF_TOKEN)
-def count_evaluated_models():
-    """Count the number of evaluated models"""
-    try:
-        # Get dataset info
-        dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset")
-        # Get file list
-        files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
-        # Get last commit info
-        commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
-        last_commit = next(commits, None)
-        # Count lines in jsonl files
-        total_entries = 0
-        for file in files:
-            if file.endswith('.jsonl'):
-                try:
-                    # Download file content
-                    content = api.hf_hub_download(
-                        repo_id=f"{HF_ORGANIZATION}/contents",
-                        filename=file,
-                        repo_type="dataset"
-                    )
-                    # Count lines
-                    with open(content, 'r') as f:
-                        for _ in f:
-                            total_entries += 1
-                except Exception as e:
-                    logger.error(f"Error processing file {file}: {str(e)}")
-                    continue
-        # Build response
-        response = {
-            "total_models": total_entries,
-            "last_modified": last_commit.created_at if last_commit else None,
-            "file_count": len(files),
-            "size_bytes": dataset_info.size_in_bytes,
-            "downloads": dataset_info.downloads
-        }
-        return response
-    except Exception as e:
-        logger.error(f"Error counting evaluated models: {str(e)}")
-        return {
-            "error": str(e)
-        }
-def main():
-    """Main function to count evaluated models"""
-    try:
-        logger.info("\nAnalyzing evaluated models...")
-        result = count_evaluated_models()
-        if 'error' in result:
-            logger.error(f"❌ Error: {result['error']}")
-        else:
-            logger.info(f"✓ {result['total_models']} models evaluated")
-            logger.info(f"✓ {result['file_count']} files")
-            logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
-            logger.info(f"✓ {result['downloads']} downloads")
-            if result['last_modified']:
-                last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
-                logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
-        return result
-    except Exception as e:
-        logger.error(f"Global error: {str(e)}")
-        return {"error": str(e)}
-if __name__ == "__main__":
-    main()

backend/utils/fix_wrong_model_size.py DELETED Viewed

@@ -1,110 +0,0 @@
-import os
-import json
-import pytz
-import logging
-import asyncio
-from datetime import datetime
-from pathlib import Path
-import huggingface_hub
-from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
-from dotenv import load_dotenv
-from git import Repo
-from datetime import datetime
-from tqdm.auto import tqdm
-from tqdm.contrib.logging import logging_redirect_tqdm
-from app.config.hf_config import HF_TOKEN, API
-from app.utils.model_validation import ModelValidator
-huggingface_hub.logging.set_verbosity_error()
-huggingface_hub.utils.disable_progress_bars()
-logging.basicConfig(
-    level=logging.ERROR,
-    format='%(message)s'
-)
-logger = logging.getLogger(__name__)
-load_dotenv()
-validator = ModelValidator()
-def get_changed_files(repo_path, start_date, end_date):
-    repo = Repo(repo_path)
-    start = datetime.strptime(start_date, '%Y-%m-%d')
-    end = datetime.strptime(end_date, '%Y-%m-%d')
-    changed_files = set()
-    pbar = tqdm(repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}")
-    for commit in pbar:
-        commit_date = datetime.fromtimestamp(commit.committed_date)
-        pbar.set_postfix_str(f"Commit date: {commit_date}")
-        if start <= commit_date <= end:
-            changed_files.update(item.a_path for item in commit.diff(commit.parents[0]))
-        if commit_date < start:
-            break
-    return changed_files
-def read_json(repo_path, file):
-    with open(f"{repo_path}/{file}") as file:
-        return json.load(file)
-def write_json(repo_path, file, content):
-    with open(f"{repo_path}/{file}", "w") as file:
-        json.dump(content, file, indent=2)
-def main():
-    requests_path = "/requests"
-    start_date = "2024-12-09"
-    end_date = "2025-01-07"
-    changed_files = get_changed_files(requests_path, start_date, end_date)
-    for file in tqdm(changed_files):
-        try:
-            request_data = read_json(requests_path, file)
-        except FileNotFoundError as e:
-            tqdm.write(f"File {file} not found")
-            continue
-        try:
-            model_info = API.model_info(
-                repo_id=request_data["model"],
-                revision=request_data["revision"],
-                token=HF_TOKEN
-            )
-        except (RepositoryNotFoundError, RevisionNotFoundError) as e:
-            tqdm.write(f"Model info for {request_data["model"]} not found")
-            continue
-        with logging_redirect_tqdm():
-            new_model_size, error = asyncio.run(validator.get_model_size(
-                model_info=model_info,
-                precision=request_data["precision"],
-                base_model=request_data["base_model"],
-                revision=request_data["revision"]
-            ))
-        if error:
-            tqdm.write(f"Error getting model size info for {request_data["model"]}, {error}")
-            continue
-        old_model_size = request_data["params"]
-        if old_model_size != new_model_size:
-            if new_model_size > 100:
-                tqdm.write(f"Model: {request_data["model"]}, size is more 100B: {new_model_size}")
-            tqdm.write(f"Model: {request_data["model"]}, old size: {request_data["params"]} new size: {new_model_size}")
-            tqdm.write(f"Updating request file {file}")
-            request_data["params"] = new_model_size
-            write_json(requests_path, file, content=request_data)
-if __name__ == "__main__":
-    main()

backend/utils/last_activity.py DELETED Viewed

@@ -1,164 +0,0 @@
-import os
-import json
-import logging
-from datetime import datetime
-from pathlib import Path
-from typing import Dict, Any, List, Tuple
-from huggingface_hub import HfApi
-from dotenv import load_dotenv
-# Get the backend directory path
-BACKEND_DIR = Path(__file__).parent.parent
-ROOT_DIR = BACKEND_DIR.parent
-# Load environment variables from .env file in root directory
-load_dotenv(ROOT_DIR / ".env")
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s'
-)
-logger = logging.getLogger(__name__)
-# Initialize Hugging Face API
-HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    raise ValueError("HF_TOKEN not found in environment variables")
-api = HfApi(token=HF_TOKEN)
-# Default organization
-HF_ORGANIZATION = os.getenv('HF_ORGANIZATION', 'open-llm-leaderboard')
-def get_last_votes(limit: int = 5) -> List[Dict]:
-    """Get the last votes from the votes dataset"""
-    try:
-        logger.info("\nFetching last votes...")
-        # Download and read votes file
-        logger.info("Downloading votes file...")
-        votes_file = api.hf_hub_download(
-            repo_id=f"{HF_ORGANIZATION}/votes",
-            filename="votes_data.jsonl",
-            repo_type="dataset"
-        )
-        logger.info("Reading votes file...")
-        votes = []
-        with open(votes_file, 'r') as f:
-            for line in f:
-                try:
-                    vote = json.loads(line)
-                    votes.append(vote)
-                except json.JSONDecodeError:
-                    continue
-        # Sort by timestamp and get last n votes
-        logger.info("Sorting votes...")
-        votes.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
-        last_votes = votes[:limit]
-        logger.info(f"✓ Found {len(last_votes)} recent votes")
-        return last_votes
-    except Exception as e:
-        logger.error(f"Error reading votes: {str(e)}")
-        return []
-def get_last_models(limit: int = 5) -> List[Dict]:
-    """Get the last models from the requests dataset using commit history"""
-    try:
-        logger.info("\nFetching last model submissions...")
-        # Get commit history
-        logger.info("Getting commit history...")
-        commits = list(api.list_repo_commits(
-            repo_id=f"{HF_ORGANIZATION}/requests",
-            repo_type="dataset"
-        ))
-        logger.info(f"Found {len(commits)} commits")
-        # Track processed files to avoid duplicates
-        processed_files = set()
-        models = []
-        # Process commits until we have enough models
-        for i, commit in enumerate(commits):
-            logger.info(f"Processing commit {i+1}/{len(commits)} ({commit.created_at})")
-            # Look at added/modified files in this commit
-            files_to_process = [f for f in (commit.added + commit.modified) if f.endswith('.json')]
-            if files_to_process:
-                logger.info(f"Found {len(files_to_process)} JSON files in commit")
-            for file in files_to_process:
-                if file in processed_files:
-                    continue
-                processed_files.add(file)
-                logger.info(f"Downloading {file}...")
-                try:
-                    # Download and read the file
-                    content = api.hf_hub_download(
-                        repo_id=f"{HF_ORGANIZATION}/requests",
-                        filename=file,
-                        repo_type="dataset"
-                    )
-                    with open(content, 'r') as f:
-                        model_data = json.load(f)
-                        models.append(model_data)
-                        logger.info(f"✓ Added model {model_data.get('model', 'Unknown')}")
-                        if len(models) >= limit:
-                            logger.info("Reached desired number of models")
-                            break
-                except Exception as e:
-                    logger.error(f"Error reading file {file}: {str(e)}")
-                    continue
-            if len(models) >= limit:
-                break
-        logger.info(f"✓ Found {len(models)} recent model submissions")
-        return models
-    except Exception as e:
-        logger.error(f"Error reading models: {str(e)}")
-        return []
-def main():
-    """Display last activities from the leaderboard"""
-    try:
-        # Get last votes
-        logger.info("\n=== Last Votes ===")
-        last_votes = get_last_votes()
-        if last_votes:
-            for vote in last_votes:
-                logger.info(f"\nModel: {vote.get('model')}")
-                logger.info(f"User: {vote.get('username')}")
-                logger.info(f"Timestamp: {vote.get('timestamp')}")
-        else:
-            logger.info("No votes found")
-        # Get last model submissions
-        logger.info("\n=== Last Model Submissions ===")
-        last_models = get_last_models()
-        if last_models:
-            for model in last_models:
-                logger.info(f"\nModel: {model.get('model')}")
-                logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
-                logger.info(f"Status: {model.get('status', 'Unknown')}")
-                logger.info(f"Submission Time: {model.get('submitted_time', 'Unknown')}")
-                logger.info(f"Precision: {model.get('precision', 'Unknown')}")
-                logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
-        else:
-            logger.info("No models found")
-    except Exception as e:
-        logger.error(f"Global error: {str(e)}")
-if __name__ == "__main__":
-    main()

backend/utils/sync_datasets_locally.py DELETED Viewed

@@ -1,130 +0,0 @@
-import os
-import shutil
-import tempfile
-import logging
-from pathlib import Path
-from huggingface_hub import HfApi, snapshot_download, upload_folder, create_repo
-from dotenv import load_dotenv
-# Configure source and destination usernames
-SOURCE_USERNAME = "open-llm-leaderboard"
-DESTINATION_USERNAME = "tfrere"
-# Get the backend directory path
-BACKEND_DIR = Path(__file__).parent.parent
-ROOT_DIR = BACKEND_DIR.parent
-# Load environment variables from .env file in root directory
-load_dotenv(ROOT_DIR / ".env")
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s'
-)
-logger = logging.getLogger(__name__)
-# List of dataset names to sync
-DATASET_NAMES = [
-    "votes",
-    "results",
-    "requests",
-    "contents",
-    "official-providers",
-]
-# Build list of datasets with their source and destination paths
-DATASETS = [
-    (name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
-    for name in DATASET_NAMES
-]
-# Initialize Hugging Face API
-api = HfApi()
-def ensure_repo_exists(repo_id, token):
-    """Ensure the repository exists, create it if it doesn't"""
-    try:
-        api.repo_info(repo_id=repo_id, repo_type="dataset")
-        logger.info(f"✓ Repository {repo_id} already exists")
-    except Exception:
-        logger.info(f"Creating repository {repo_id}...")
-        create_repo(
-            repo_id=repo_id,
-            repo_type="dataset",
-            token=token,
-            private=True
-        )
-        logger.info(f"✓ Repository {repo_id} created")
-def process_dataset(dataset_info, token):
-    """Process a single dataset"""
-    name, source_dataset, destination_dataset = dataset_info
-    try:
-        logger.info(f"\n📥 Processing dataset: {name}")
-        # Ensure destination repository exists
-        ensure_repo_exists(destination_dataset, token)
-        # Create a temporary directory for this dataset
-        with tempfile.TemporaryDirectory() as temp_dir:
-            try:
-                # List files in source dataset
-                logger.info(f"Listing files in {source_dataset}...")
-                files = api.list_repo_files(source_dataset, repo_type="dataset")
-                logger.info(f"Detected structure: {len(files)} files")
-                # Download dataset
-                logger.info(f"Downloading from {source_dataset}...")
-                local_dir = snapshot_download(
-                    repo_id=source_dataset,
-                    repo_type="dataset",
-                    local_dir=temp_dir,
-                    token=token
-                )
-                logger.info(f"✓ Download complete")
-                # Upload to destination while preserving structure
-                logger.info(f"📤 Uploading to {destination_dataset}...")
-                api.upload_folder(
-                    folder_path=local_dir,
-                    repo_id=destination_dataset,
-                    repo_type="dataset",
-                    token=token
-                )
-                logger.info(f"✅ {name} copied successfully!")
-                return True
-            except Exception as e:
-                logger.error(f"❌ Error processing {name}: {str(e)}")
-                return False
-    except Exception as e:
-        logger.error(f"❌ Error for {name}: {str(e)}")
-        return False
-def copy_datasets():
-    try:
-        logger.info("🔑 Checking authentication...")
-        # Get token from .env file
-        token = os.getenv("HF_TOKEN")
-        if not token:
-            raise ValueError("HF_TOKEN not found in .env file")
-        # Process datasets sequentially
-        results = []
-        for dataset_info in DATASETS:
-            success = process_dataset(dataset_info, token)
-            results.append((dataset_info[0], success))
-        # Print final summary
-        logger.info("\n📊 Final summary:")
-        for dataset, success in results:
-            status = "✅ Success" if success else "❌ Failure"
-            logger.info(f"{dataset}: {status}")
-    except Exception as e:
-        logger.error(f"❌ Global error: {str(e)}")
-if __name__ == "__main__":
-    copy_datasets()

backend/uv.lock DELETED Viewed

The diff for this file is too large to render. See raw diff

docker-compose.yml DELETED Viewed

@@ -1,33 +0,0 @@
-services:
-  backend:
-    build:
-      context: ./backend
-      dockerfile: Dockerfile.dev
-      args:
-        - HF_TOKEN=${HF_TOKEN}
-    ports:
-      - "${BACKEND_PORT:-8000}:8000"
-    volumes:
-      - ./backend:/app
-    environment:
-      - ENVIRONMENT=${ENVIRONMENT:-development}
-      - HF_TOKEN=${HF_TOKEN}
-      - HF_HOME=${HF_HOME:-/.cache}
-    command: uvicorn app.asgi:app --host 0.0.0.0 --port 8000 --reload
-  frontend:
-    build:
-      context: ./frontend
-      dockerfile: Dockerfile.dev
-    ports:
-      - "${FRONTEND_PORT:-7860}:7860"
-    volumes:
-      - ./frontend:/app
-      - /app/node_modules
-    environment:
-      - NODE_ENV=${ENVIRONMENT:-development}
-      - CHOKIDAR_USEPOLLING=true
-      - PORT=${FRONTEND_PORT:-7860}
-    command: npm start
-    stdin_open: true
-    tty: true

frontend/Dockerfile.dev DELETED Viewed

@@ -1,15 +0,0 @@
-FROM node:18
-WORKDIR /app
-# Install required global dependencies
-RUN npm install -g react-scripts
-# Copy package.json and package-lock.json
-COPY package*.json ./
-# Install project dependencies
-RUN npm install
-# Volume will be mounted here, no need for COPY
-CMD ["npm", "start"]

frontend/README.md DELETED Viewed

@@ -1,80 +0,0 @@
-# Frontend - Open LLM Leaderboard 🏆
-React interface for exploring and comparing open-source language models.
-## 🏗 Architecture
-```mermaid
-flowchart TD
-    Client(["User Browser"]) --> Components["React Components"]
-    subgraph Frontend
-        Components --> Context["Context Layer<br>• LeaderboardContext<br>• Global State"]
-        API["API Layer<br>• /api/leaderboard/formatted<br>• TanStack Query"] --> |Data Feed| Context
-        Context --> Hooks["Hooks Layer<br>• Data Processing<br>• Filtering<br>• Caching"]
-        Hooks --> Features["Features<br>• Table Management<br>• Search & Filters<br>• Display Options"]
-        Features --> Cache["Cache Layer<br>• LocalStorage<br>• URL State"]
-    end
-    API --> Backend["Backend Server"]
-    style Backend fill:#f96,stroke:#333,stroke-width:2px
-```
-## ✨ Core Features
-- 🔍 **Search & Filters**: Real-time filtering, regex search, advanced filters
-- 📊 **Data Visualization**: Interactive table, customizable columns, sorting
-- 🔄 **State Management**: URL sync, client-side caching (5min TTL)
-- 📱 **Responsive Design**: Mobile-friendly, dark/light themes
-## 🛠 Tech Stack
-- React 18 + Material-UI
-- TanStack Query & Table
-- React Router v6
-## 📁 Project Structure
-```
-src/
-├── pages/
-│   └── LeaderboardPage/
-│       ├── components/      # UI Components
-│       ├── context/        # Global State
-│       └── hooks/          # Data Processing
-├── components/             # Shared Components
-└── utils/                 # Helper Functions
-```
-## 🚀 Development
-```bash
-# Install dependencies
-npm install
-# Start development server
-npm start
-# Production build
-npm run build
-```
-## 🔧 Environment Variables
-```env
-# API Configuration
-REACT_APP_API_URL=http://localhost:8000
-REACT_APP_CACHE_DURATION=300000  # 5 minutes
-```
-## 🔄 Data Flow
-1. API fetches leaderboard data from backend
-2. Context stores and manages global state
-3. Hooks handle data processing and filtering
-4. Components render based on processed data
-5. Cache maintains user preferences and URL state

frontend/package.json DELETED Viewed

@@ -1,55 +0,0 @@
-{
-  "name": "open-llm-leaderboard",
-  "version": "0.1.0",
-  "private": true,
-  "dependencies": {
-    "@emotion/react": "^11.13.3",
-    "@emotion/styled": "^11.13.0",
-    "@huggingface/hub": "^0.14.0",
-    "@mui/icons-material": "^6.1.7",
-    "@mui/lab": "^6.0.0-beta.16",
-    "@mui/material": "^6.1.6",
-    "@mui/x-data-grid": "^7.22.2",
-    "@tanstack/react-query": "^5.62.2",
-    "@tanstack/react-table": "^8.20.5",
-    "@tanstack/react-virtual": "^3.10.9",
-    "@testing-library/jest-dom": "^5.17.0",
-    "@testing-library/react": "^13.4.0",
-    "@testing-library/user-event": "^13.5.0",
-    "compression": "^1.7.4",
-    "cors": "^2.8.5",
-    "express": "^4.18.2",
-    "react": "^18.3.1",
-    "react-dom": "^18.3.1",
-    "react-router-dom": "^6.28.0",
-    "react-scripts": "5.0.1",
-    "serve-static": "^1.15.0",
-    "web-vitals": "^2.1.4"
-  },
-  "scripts": {
-    "start": "react-scripts start",
-    "build": "react-scripts build",
-    "test": "react-scripts test",
-    "eject": "react-scripts eject",
-    "serve": "node server.js"
-  },
-  "eslintConfig": {
-    "extends": [
-      "react-app",
-      "react-app/jest"
-    ]
-  },
-  "browserslist": {
-    "production": [
-      ">0.2%",
-      "not dead",
-      "not op_mini all"
-    ],
-    "development": [
-      "last 1 chrome version",
-      "last 1 firefox version",
-      "last 1 safari version"
-    ]
-  },
-  "proxy": "http://backend:8000"
-}

frontend/public/index.html DELETED Viewed

@@ -1,96 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8" />
-    <link rel="icon" href="%PUBLIC_URL%/logo32.png" />
-    <meta
-      name="viewport"
-      content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no, viewport-fit=cover"
-    />
-    <meta
-      name="description"
-      content="Interactive leaderboard tracking and comparing open-source Large Language Models across multiple benchmarks: IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
-    />
-    <!-- Open Graph / Facebook -->
-    <meta property="og:type" content="website" />
-    <meta
-      property="og:url"
-      content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
-    />
-    <meta
-      property="og:title"
-      content="Open LLM Leaderboard - Compare Open Source Large Language Models"
-    />
-    <meta
-      property="og:description"
-      content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
-    />
-    <meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
-    <!-- Twitter -->
-    <meta property="twitter:card" content="summary_large_image" />
-    <meta
-      property="twitter:url"
-      content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
-    />
-    <meta
-      property="twitter:title"
-      content="Open LLM Leaderboard - Compare Open Source Large Language Models"
-    />
-    <meta
-      property="twitter:description"
-      content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
-    />
-    <meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
-    <!--
-      Notice the use of %PUBLIC_URL% in the tags above.
-      It will be replaced with the URL of the `public` folder during the build.
-      Only files inside the `public` folder can be referenced from the HTML.
-      Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
-      work correctly both with client-side routing and a non-root public URL.
-      Learn how to configure a non-root public URL by running `npm run build`.
-    -->
-    <title>
-      Open LLM Leaderboard - Compare Open Source Large Language Models
-    </title>
-    <link
-      href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"
-      rel="stylesheet"
-    />
-    <style>
-      html,
-      body {
-        position: fixed;
-        width: 100%;
-        height: 100%;
-        overflow: hidden;
-        -webkit-overflow-scrolling: touch;
-      }
-      #root {
-        position: absolute;
-        top: 0;
-        left: 0;
-        right: 0;
-        bottom: 0;
-        overflow-y: auto;
-        -webkit-overflow-scrolling: touch;
-      }
-    </style>
-  </head>
-  <body>
-    <noscript>You need to enable JavaScript to run this app.</noscript>
-    <div id="root"></div>
-    <!--
-      This HTML file is a template.
-      If you open it directly in the browser, you will see an empty page.
-      You can add webfonts, meta tags, or analytics to this file.
-      The build step will place the bundled scripts into the <body> tag.
-      To begin the development, run `npm start` or `yarn start`.
-      To create a production bundle, use `npm run build` or `yarn build`.
-    -->
-  </body>
-</html>

frontend/public/logo256.png DELETED Viewed

Binary file (24.6 kB)

frontend/public/logo32.png DELETED Viewed

Binary file (1.96 kB)

frontend/public/og-image.jpg DELETED Viewed

Binary file (13.8 kB)