This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .env.example +0 -3
  2. .gitattributes +35 -0
  3. .gitignore +10 -43
  4. .pre-commit-config.yaml +53 -0
  5. Dockerfile +0 -62
  6. Makefile +13 -0
  7. README.md +13 -75
  8. app.py +448 -0
  9. backend/Dockerfile.dev +0 -25
  10. backend/README.md +0 -352
  11. backend/__init__.py +0 -0
  12. backend/app/api/__init__.py +0 -5
  13. backend/app/api/dependencies.py +0 -34
  14. backend/app/api/endpoints/leaderboard.py +0 -49
  15. backend/app/api/endpoints/models.py +0 -103
  16. backend/app/api/endpoints/votes.py +0 -105
  17. backend/app/api/router.py +0 -9
  18. backend/app/asgi.py +0 -106
  19. backend/app/config/__init__.py +0 -6
  20. backend/app/config/base.py +0 -38
  21. backend/app/config/hf_config.py +0 -30
  22. backend/app/config/logging_config.py +0 -38
  23. backend/app/core/cache.py +0 -109
  24. backend/app/core/fastapi_cache.py +0 -48
  25. backend/app/core/formatting.py +0 -104
  26. backend/app/main.py +0 -18
  27. backend/app/services/__init__.py +0 -3
  28. backend/app/services/hf_service.py +0 -50
  29. backend/app/services/leaderboard.py +0 -208
  30. backend/app/services/models.py +0 -587
  31. backend/app/services/rate_limiter.py +0 -72
  32. backend/app/services/votes.py +0 -390
  33. backend/app/utils/__init__.py +0 -3
  34. backend/app/utils/logging.py +0 -3
  35. backend/app/utils/model_validation.py +0 -266
  36. backend/pyproject.toml +0 -31
  37. backend/utils/analyze_prod_datasets.py +0 -170
  38. backend/utils/analyze_prod_models.py +0 -106
  39. backend/utils/fix_wrong_model_size.py +0 -110
  40. backend/utils/last_activity.py +0 -164
  41. backend/utils/sync_datasets_locally.py +0 -130
  42. backend/uv.lock +0 -0
  43. docker-compose.yml +0 -33
  44. frontend/Dockerfile.dev +0 -15
  45. frontend/README.md +0 -80
  46. frontend/package.json +0 -55
  47. frontend/public/index.html +0 -96
  48. frontend/public/logo256.png +0 -0
  49. frontend/public/logo32.png +0 -0
  50. frontend/public/og-image.jpg +0 -0
.env.example DELETED
@@ -1,3 +0,0 @@
1
- ENVIRONMENT=development
2
- HF_TOKEN=xxx
3
- HF_HOME=.cache
 
 
 
 
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1,45 +1,12 @@
1
- # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2
-
3
- __pycache__
4
- .cache/
5
-
6
- # dependencies
7
-
8
- frontend/node_modules
9
- /.pnp
10
- .pnp.js
11
-
12
- # testing
13
-
14
- /coverage
15
-
16
- # production
17
-
18
- /build
19
-
20
- # misc
21
-
22
- .DS_Store
23
- .env.local
24
- .env.development.local
25
- .env.test.local
26
- .env.production.local
27
-
28
- npm-debug.log*
29
- yarn-debug.log*
30
- yarn-error.log\*
31
-
32
- src/dataframe.json
33
-
34
- yarn.lock
35
- package-lock.json
36
-
37
- /public
38
-
39
- .claudesync/
40
-
41
- # Environment variables
42
  .env
43
- .env.*
44
- !.env.example
 
 
 
 
 
45
 
 
 
1
+ venv/
2
+ __pycache__/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  .env
4
+ .ipynb_checkpoints
5
+ *ipynb
6
+ .vscode/
7
+
8
+ eval-queue/
9
+ eval-results/
10
+ dynamic-info/
11
 
12
+ src/assets/model_counts.html
.pre-commit-config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ default_language_version:
16
+ python: python3
17
+
18
+ ci:
19
+ autofix_prs: true
20
+ autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
21
+ autoupdate_schedule: quarterly
22
+
23
+ repos:
24
+ - repo: https://github.com/pre-commit/pre-commit-hooks
25
+ rev: v4.3.0
26
+ hooks:
27
+ - id: check-yaml
28
+ - id: check-case-conflict
29
+ - id: detect-private-key
30
+ - id: check-added-large-files
31
+ args: ['--maxkb=1000']
32
+ - id: requirements-txt-fixer
33
+ - id: end-of-file-fixer
34
+ - id: trailing-whitespace
35
+
36
+ - repo: https://github.com/PyCQA/isort
37
+ rev: 5.12.0
38
+ hooks:
39
+ - id: isort
40
+ name: Format imports
41
+
42
+ - repo: https://github.com/psf/black
43
+ rev: 22.12.0
44
+ hooks:
45
+ - id: black
46
+ name: Format code
47
+ additional_dependencies: ['click==8.0.2']
48
+
49
+ - repo: https://github.com/charliermarsh/ruff-pre-commit
50
+ # Ruff version.
51
+ rev: 'v0.0.267'
52
+ hooks:
53
+ - id: ruff
Dockerfile DELETED
@@ -1,62 +0,0 @@
1
- # Build frontend
2
- FROM node:18 as frontend-build
3
- WORKDIR /app
4
- COPY frontend/package*.json ./
5
- RUN npm install
6
- COPY frontend/ ./
7
-
8
- RUN npm run build
9
-
10
- # Build backend
11
- FROM python:3.12-slim
12
- WORKDIR /app
13
-
14
- # Create non-root user
15
- RUN useradd -m -u 1000 user
16
-
17
- # Install poetry
18
- RUN pip install poetry
19
-
20
- # Create and configure cache directory
21
- RUN mkdir -p /app/.cache && \
22
- chown -R user:user /app
23
-
24
- # Copy and install backend dependencies
25
- COPY backend/pyproject.toml backend/poetry.lock* ./
26
- RUN poetry config virtualenvs.create false \
27
- && poetry install --no-interaction --no-ansi --no-root --only main
28
-
29
- # Copy backend code
30
- COPY backend/ .
31
-
32
- # Install Node.js and npm
33
- RUN apt-get update && apt-get install -y \
34
- curl \
35
- netcat-openbsd \
36
- && curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
37
- && apt-get install -y nodejs \
38
- && rm -rf /var/lib/apt/lists/*
39
-
40
- # Copy frontend server and build
41
- COPY --from=frontend-build /app/build ./frontend/build
42
- COPY --from=frontend-build /app/package*.json ./frontend/
43
- COPY --from=frontend-build /app/server.js ./frontend/
44
-
45
- # Install frontend production dependencies
46
- WORKDIR /app/frontend
47
- RUN npm install --production
48
- WORKDIR /app
49
-
50
- # Environment variables
51
- ENV HF_HOME=/app/.cache \
52
- HF_DATASETS_CACHE=/app/.cache \
53
- INTERNAL_API_PORT=7861 \
54
- PORT=7860 \
55
- NODE_ENV=production
56
-
57
- # Note: HF_TOKEN should be provided at runtime, not build time
58
- USER user
59
- EXPOSE 7860
60
-
61
- # Start both servers with wait-for
62
- CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Makefile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: style format
2
+
3
+
4
+ style:
5
+ python -m black --line-length 119 .
6
+ python -m isort .
7
+ ruff check --fix .
8
+
9
+
10
+ quality:
11
+ python -m black --check --line-length 119 .
12
+ python -m isort --check-only .
13
+ ruff check .
README.md CHANGED
@@ -1,85 +1,23 @@
1
  ---
2
  title: Open LLM Leaderboard
3
  emoji: 🏆
4
- colorFrom: blue
5
- colorTo: red
6
- sdk: docker
7
- hf_oauth: true
 
8
  pinned: true
9
  license: apache-2.0
10
- duplicated_from: open-llm-leaderboard/open_llm_leaderboard
 
 
 
 
 
 
11
  tags:
12
  - leaderboard
13
  short_description: Track, rank and evaluate open LLMs and chatbots
14
  ---
15
 
16
- # Open LLM Leaderboard
17
-
18
- Modern React interface for comparing Large Language Models (LLMs) in an open and reproducible way.
19
-
20
- ## Features
21
-
22
- - 📊 Interactive table with advanced sorting and filtering
23
- - 🔍 Semantic model search
24
- - 📌 Pin models for comparison
25
- - 📱 Responsive and modern interface
26
- - 🎨 Dark/Light mode
27
- - ⚡️ Optimized performance with virtualization
28
-
29
- ## Architecture
30
-
31
- The project is split into two main parts:
32
-
33
- ### Frontend (React)
34
-
35
- ```
36
- frontend/
37
- ├── src/
38
- │ ├── components/ # Reusable UI components
39
- │ ├── pages/ # Application pages
40
- │ ├── hooks/ # Custom React hooks
41
- │ ├── context/ # React contexts
42
- │ └── constants/ # Constants and configurations
43
- ├── public/ # Static assets
44
- └── server.js # Express server for production
45
- ```
46
-
47
- ### Backend (FastAPI)
48
-
49
- ```
50
- backend/
51
- ├── app/
52
- │ ├── api/ # API router and endpoints
53
- │ │ └── endpoints/ # Specific API endpoints
54
- │ ├── core/ # Core functionality
55
- │ ├── config/ # Configuration
56
- │ └── services/ # Business logic services
57
- │ ├── leaderboard.py
58
- │ ├── models.py
59
- │ ├── votes.py
60
- │ └── hf_service.py
61
- └── utils/ # Utility functions
62
- ```
63
-
64
- ## Technologies
65
-
66
- ### Frontend
67
-
68
- - React
69
- - Material-UI
70
- - TanStack Table & Virtual
71
- - Express.js
72
-
73
- ### Backend
74
-
75
- - FastAPI
76
- - Hugging Face API
77
- - Docker
78
-
79
- ## Development
80
-
81
- The application is containerized using Docker and can be run using:
82
-
83
- ```bash
84
- docker-compose up
85
- ```
 
1
  ---
2
  title: Open LLM Leaderboard
3
  emoji: 🏆
4
+ colorFrom: green
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 4.9.0
8
+ app_file: app.py
9
  pinned: true
10
  license: apache-2.0
11
+ duplicated_from: HuggingFaceH4/open_llm_leaderboard
12
+ fullWidth: true
13
+ space_ci:
14
+ private: true
15
+ secrets:
16
+ - HF_TOKEN
17
+ - H4_TOKEN
18
  tags:
19
  - leaderboard
20
  short_description: Track, rank and evaluate open LLMs and chatbots
21
  ---
22
 
23
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from apscheduler.schedulers.background import BackgroundScheduler
4
+ from huggingface_hub import snapshot_download
5
+ from gradio_space_ci import enable_space_ci
6
+
7
+ from src.display.about import (
8
+ CITATION_BUTTON_LABEL,
9
+ CITATION_BUTTON_TEXT,
10
+ EVALUATION_QUEUE_TEXT,
11
+ INTRODUCTION_TEXT,
12
+ LLM_BENCHMARKS_TEXT,
13
+ FAQ_TEXT,
14
+ TITLE,
15
+ )
16
+ from src.display.css_html_js import custom_css
17
+ from src.display.utils import (
18
+ BENCHMARK_COLS,
19
+ COLS,
20
+ EVAL_COLS,
21
+ EVAL_TYPES,
22
+ NUMERIC_INTERVALS,
23
+ TYPES,
24
+ AutoEvalColumn,
25
+ ModelType,
26
+ fields,
27
+ WeightType,
28
+ Precision
29
+ )
30
+ from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
31
+ from src.populate import get_evaluation_queue_df, get_leaderboard_df
32
+ from src.submission.submit import add_new_eval
33
+ from src.scripts.update_all_request_files import update_dynamic_files
34
+ from src.tools.collections import update_collections
35
+ from src.tools.plots import (
36
+ create_metric_plot_obj,
37
+ create_plot_df,
38
+ create_scores_df,
39
+ )
40
+
41
+ # Start ephemeral Spaces on PRs (see config in README.md)
42
+ #enable_space_ci()
43
+
44
+ def restart_space():
45
+ API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
46
+
47
+
48
+ def init_space(full_init: bool = True):
49
+ if full_init:
50
+ try:
51
+ print(EVAL_REQUESTS_PATH)
52
+ snapshot_download(
53
+ repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
54
+ )
55
+ except Exception:
56
+ restart_space()
57
+ try:
58
+ print(DYNAMIC_INFO_PATH)
59
+ snapshot_download(
60
+ repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
61
+ )
62
+ except Exception:
63
+ restart_space()
64
+ try:
65
+ print(EVAL_RESULTS_PATH)
66
+ snapshot_download(
67
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
68
+ )
69
+ except Exception:
70
+ restart_space()
71
+
72
+
73
+ raw_data, original_df = get_leaderboard_df(
74
+ results_path=EVAL_RESULTS_PATH,
75
+ requests_path=EVAL_REQUESTS_PATH,
76
+ dynamic_path=DYNAMIC_INFO_FILE_PATH,
77
+ cols=COLS,
78
+ benchmark_cols=BENCHMARK_COLS
79
+ )
80
+ update_collections(original_df.copy())
81
+ leaderboard_df = original_df.copy()
82
+
83
+ plot_df = create_plot_df(create_scores_df(raw_data))
84
+
85
+ (
86
+ finished_eval_queue_df,
87
+ running_eval_queue_df,
88
+ pending_eval_queue_df,
89
+ ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
90
+
91
+ return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
92
+
93
+ leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
94
+
95
+
96
+ # Searching and filtering
97
+ def update_table(
98
+ hidden_df: pd.DataFrame,
99
+ columns: list,
100
+ type_query: list,
101
+ precision_query: str,
102
+ size_query: list,
103
+ hide_models: list,
104
+ query: str,
105
+ ):
106
+ filtered_df = filter_models(df=hidden_df, type_query=type_query, size_query=size_query, precision_query=precision_query, hide_models=hide_models)
107
+ filtered_df = filter_queries(query, filtered_df)
108
+ df = select_columns(filtered_df, columns)
109
+ return df
110
+
111
+
112
+ def load_query(request: gr.Request): # triggered only once at startup => read query parameter if it exists
113
+ query = request.query_params.get("query") or ""
114
+ return query, query # return one for the "search_bar", one for a hidden component that triggers a reload only if value has changed
115
+
116
+
117
+ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
118
+ return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
119
+
120
+
121
+ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
122
+ always_here_cols = [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
123
+ dummy_col = [AutoEvalColumn.dummy.name]
124
+ #AutoEvalColumn.model_type_symbol.name,
125
+ #AutoEvalColumn.model.name,
126
+ # We use COLS to maintain sorting
127
+ filtered_df = df[
128
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns] + dummy_col
129
+ ]
130
+ return filtered_df
131
+
132
+
133
+ def filter_queries(query: str, filtered_df: pd.DataFrame):
134
+ """Added by Abishek"""
135
+ final_df = []
136
+ if query != "":
137
+ queries = [q.strip() for q in query.split(";")]
138
+ for _q in queries:
139
+ _q = _q.strip()
140
+ if _q != "":
141
+ temp_filtered_df = search_table(filtered_df, _q)
142
+ if len(temp_filtered_df) > 0:
143
+ final_df.append(temp_filtered_df)
144
+ if len(final_df) > 0:
145
+ filtered_df = pd.concat(final_df)
146
+ filtered_df = filtered_df.drop_duplicates(
147
+ subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
148
+ )
149
+
150
+ return filtered_df
151
+
152
+
153
+ def filter_models(
154
+ df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, hide_models: list
155
+ ) -> pd.DataFrame:
156
+ # Show all models
157
+ if "Private or deleted" in hide_models:
158
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
159
+ else:
160
+ filtered_df = df
161
+
162
+ if "Contains a merge/moerge" in hide_models:
163
+ filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
164
+
165
+ if "MoE" in hide_models:
166
+ filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
167
+
168
+ if "Flagged" in hide_models:
169
+ filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
170
+
171
+ type_emoji = [t[0] for t in type_query]
172
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
173
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
174
+
175
+ numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
176
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
177
+ mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
178
+ filtered_df = filtered_df.loc[mask]
179
+
180
+ return filtered_df
181
+
182
+ leaderboard_df = filter_models(
183
+ df=leaderboard_df,
184
+ type_query=[t.to_str(" : ") for t in ModelType],
185
+ size_query=list(NUMERIC_INTERVALS.keys()),
186
+ precision_query=[i.value.name for i in Precision],
187
+ hide_models=["Private or deleted", "Contains a merge/moerge", "Flagged"], # Deleted, merges, flagged, MoEs
188
+ )
189
+
190
+ demo = gr.Blocks(css=custom_css)
191
+ with demo:
192
+ gr.HTML(TITLE)
193
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
194
+
195
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
196
+ with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
197
+ with gr.Row():
198
+ with gr.Column():
199
+ with gr.Row():
200
+ search_bar = gr.Textbox(
201
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
202
+ show_label=False,
203
+ elem_id="search-bar",
204
+ )
205
+ with gr.Row():
206
+ shown_columns = gr.CheckboxGroup(
207
+ choices=[
208
+ c.name
209
+ for c in fields(AutoEvalColumn)
210
+ if not c.hidden and not c.never_hidden and not c.dummy
211
+ ],
212
+ value=[
213
+ c.name
214
+ for c in fields(AutoEvalColumn)
215
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
216
+ ],
217
+ label="Select columns to show",
218
+ elem_id="column-select",
219
+ interactive=True,
220
+ )
221
+ with gr.Row():
222
+ hide_models = gr.CheckboxGroup(
223
+ label="Hide models",
224
+ choices = ["Private or deleted", "Contains a merge/moerge", "Flagged", "MoE"],
225
+ value=["Private or deleted", "Contains a merge/moerge", "Flagged"],
226
+ interactive=True
227
+ )
228
+ with gr.Column(min_width=320):
229
+ #with gr.Box(elem_id="box-filter"):
230
+ filter_columns_type = gr.CheckboxGroup(
231
+ label="Model types",
232
+ choices=[t.to_str() for t in ModelType],
233
+ value=[t.to_str() for t in ModelType],
234
+ interactive=True,
235
+ elem_id="filter-columns-type",
236
+ )
237
+ filter_columns_precision = gr.CheckboxGroup(
238
+ label="Precision",
239
+ choices=[i.value.name for i in Precision],
240
+ value=[i.value.name for i in Precision],
241
+ interactive=True,
242
+ elem_id="filter-columns-precision",
243
+ )
244
+ filter_columns_size = gr.CheckboxGroup(
245
+ label="Model sizes (in billions of parameters)",
246
+ choices=list(NUMERIC_INTERVALS.keys()),
247
+ value=list(NUMERIC_INTERVALS.keys()),
248
+ interactive=True,
249
+ elem_id="filter-columns-size",
250
+ )
251
+
252
+ leaderboard_table = gr.components.Dataframe(
253
+ value=leaderboard_df[
254
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
255
+ + shown_columns.value
256
+ + [AutoEvalColumn.dummy.name]
257
+ ],
258
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
259
+ datatype=TYPES,
260
+ elem_id="leaderboard-table",
261
+ interactive=False,
262
+ visible=True,
263
+ #column_widths=["2%", "33%"]
264
+ )
265
+
266
+ # Dummy leaderboard for handling the case when the user uses backspace key
267
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
268
+ value=original_df[COLS],
269
+ headers=COLS,
270
+ datatype=TYPES,
271
+ visible=False,
272
+ )
273
+ search_bar.submit(
274
+ update_table,
275
+ [
276
+ hidden_leaderboard_table_for_search,
277
+ shown_columns,
278
+ filter_columns_type,
279
+ filter_columns_precision,
280
+ filter_columns_size,
281
+ hide_models,
282
+ search_bar,
283
+ ],
284
+ leaderboard_table,
285
+ )
286
+
287
+ # Define a hidden component that will trigger a reload only if a query parameter has been set
288
+ hidden_search_bar = gr.Textbox(value="", visible=False)
289
+ hidden_search_bar.change(
290
+ update_table,
291
+ [
292
+ hidden_leaderboard_table_for_search,
293
+ shown_columns,
294
+ filter_columns_type,
295
+ filter_columns_precision,
296
+ filter_columns_size,
297
+ hide_models,
298
+ search_bar,
299
+ ],
300
+ leaderboard_table,
301
+ )
302
+ # Check query parameter once at startup and update search bar + hidden component
303
+ demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
304
+
305
+ for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, hide_models]:
306
+ selector.change(
307
+ update_table,
308
+ [
309
+ hidden_leaderboard_table_for_search,
310
+ shown_columns,
311
+ filter_columns_type,
312
+ filter_columns_precision,
313
+ filter_columns_size,
314
+ hide_models,
315
+ search_bar,
316
+ ],
317
+ leaderboard_table,
318
+ queue=True,
319
+ )
320
+
321
+ with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
322
+ with gr.Row():
323
+ with gr.Column():
324
+ chart = create_metric_plot_obj(
325
+ plot_df,
326
+ [AutoEvalColumn.average.name],
327
+ title="Average of Top Scores and Human Baseline Over Time (from last update)",
328
+ )
329
+ gr.Plot(value=chart, min_width=500)
330
+ with gr.Column():
331
+ chart = create_metric_plot_obj(
332
+ plot_df,
333
+ BENCHMARK_COLS,
334
+ title="Top Scores and Human Baseline Over Time (from last update)",
335
+ )
336
+ gr.Plot(value=chart, min_width=500)
337
+ with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
338
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
339
+
340
+ with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=4):
341
+ gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
342
+
343
+ with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
344
+ with gr.Column():
345
+ with gr.Row():
346
+ gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
347
+
348
+ with gr.Row():
349
+ gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
350
+
351
+ with gr.Row():
352
+ with gr.Column():
353
+ model_name_textbox = gr.Textbox(label="Model name")
354
+ revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
355
+ private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
356
+ model_type = gr.Dropdown(
357
+ choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
358
+ label="Model type",
359
+ multiselect=False,
360
+ value=ModelType.FT.to_str(" : "),
361
+ interactive=True,
362
+ )
363
+
364
+ with gr.Column():
365
+ precision = gr.Dropdown(
366
+ choices=[i.value.name for i in Precision if i != Precision.Unknown],
367
+ label="Precision",
368
+ multiselect=False,
369
+ value="float16",
370
+ interactive=True,
371
+ )
372
+ weight_type = gr.Dropdown(
373
+ choices=[i.value.name for i in WeightType],
374
+ label="Weights type",
375
+ multiselect=False,
376
+ value="Original",
377
+ interactive=True,
378
+ )
379
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
380
+
381
+ with gr.Column():
382
+ with gr.Accordion(
383
+ f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
384
+ open=False,
385
+ ):
386
+ with gr.Row():
387
+ finished_eval_table = gr.components.Dataframe(
388
+ value=finished_eval_queue_df,
389
+ headers=EVAL_COLS,
390
+ datatype=EVAL_TYPES,
391
+ row_count=5,
392
+ )
393
+ with gr.Accordion(
394
+ f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
395
+ open=False,
396
+ ):
397
+ with gr.Row():
398
+ running_eval_table = gr.components.Dataframe(
399
+ value=running_eval_queue_df,
400
+ headers=EVAL_COLS,
401
+ datatype=EVAL_TYPES,
402
+ row_count=5,
403
+ )
404
+
405
+ with gr.Accordion(
406
+ f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
407
+ open=False,
408
+ ):
409
+ with gr.Row():
410
+ pending_eval_table = gr.components.Dataframe(
411
+ value=pending_eval_queue_df,
412
+ headers=EVAL_COLS,
413
+ datatype=EVAL_TYPES,
414
+ row_count=5,
415
+ )
416
+
417
+ submit_button = gr.Button("Submit Eval")
418
+ submission_result = gr.Markdown()
419
+ submit_button.click(
420
+ add_new_eval,
421
+ [
422
+ model_name_textbox,
423
+ base_model_name_textbox,
424
+ revision_name_textbox,
425
+ precision,
426
+ private,
427
+ weight_type,
428
+ model_type,
429
+ ],
430
+ submission_result,
431
+ )
432
+
433
+ with gr.Row():
434
+ with gr.Accordion("📙 Citation", open=False):
435
+ citation_button = gr.Textbox(
436
+ value=CITATION_BUTTON_TEXT,
437
+ label=CITATION_BUTTON_LABEL,
438
+ lines=20,
439
+ elem_id="citation-button",
440
+ show_copy_button=True,
441
+ )
442
+
443
+ scheduler = BackgroundScheduler()
444
+ scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h
445
+ scheduler.add_job(update_dynamic_files, "interval", hours=2) # launched every 2 hour
446
+ scheduler.start()
447
+
448
+ demo.queue(default_concurrency_limit=40).launch()
backend/Dockerfile.dev DELETED
@@ -1,25 +0,0 @@
1
- FROM python:3.12-slim
2
-
3
- WORKDIR /app
4
-
5
- # Install required system dependencies
6
- RUN apt-get update && apt-get install -y \
7
- build-essential \
8
- && rm -rf /var/lib/apt/lists/*
9
-
10
- # Install poetry
11
- RUN pip install poetry
12
-
13
- # Copy Poetry configuration files
14
- COPY pyproject.toml poetry.lock* ./
15
-
16
- # Install dependencies
17
- RUN poetry config virtualenvs.create false && \
18
- poetry install --no-interaction --no-ansi --no-root
19
-
20
- # Environment variables configuration for logs
21
- ENV PYTHONUNBUFFERED=1
22
- ENV LOG_LEVEL=INFO
23
-
24
- # In dev, mount volume directly
25
- CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/README.md DELETED
@@ -1,352 +0,0 @@
1
- # Backend - Open LLM Leaderboard 🏆
2
-
3
- FastAPI backend for the Open LLM Leaderboard. This service is part of a larger architecture that includes a React frontend. For complete project installation, see the [main README](../README.md).
4
-
5
- ## ✨ Features
6
-
7
- - 📊 REST API for LLM models leaderboard management
8
- - 🗳️ Voting and ranking system
9
- - 🔄 HuggingFace Hub integration
10
- - 🚀 Caching and performance optimizations
11
-
12
- ## 🏗 Architecture
13
-
14
- ```mermaid
15
- flowchart TD
16
- Client(["**Frontend**<br><br>React Application"]) --> API["**API Server**<br><br>FastAPI REST Endpoints"]
17
-
18
- subgraph Backend
19
- API --> Core["**Core Layer**<br><br>• Middleware<br>• Cache<br>• Rate Limiting"]
20
- Core --> Services["**Services Layer**<br><br>• Business Logic<br>• Data Processing"]
21
-
22
- subgraph Services Layer
23
- Services --> Models["**Model Service**<br><br>• Model Submission<br>• Evaluation Pipeline"]
24
- Services --> Votes["**Vote Service**<br><br>• Vote Management<br>• Data Synchronization"]
25
- Services --> Board["**Leaderboard Service**<br><br>• Rankings<br>• Performance Metrics"]
26
- end
27
-
28
- Models --> Cache["**Cache Layer**<br><br>• In-Memory Store<br>• Auto Invalidation"]
29
- Votes --> Cache
30
- Board --> Cache
31
-
32
- Models --> HF["**HuggingFace Hub**<br><br>• Models Repository<br>• Datasets Access"]
33
- Votes --> HF
34
- Board --> HF
35
- end
36
-
37
- style Client fill:#f9f,stroke:#333,stroke-width:2px
38
- style Models fill:#bbf,stroke:#333,stroke-width:2px
39
- style Votes fill:#bbf,stroke:#333,stroke-width:2px
40
- style Board fill:#bbf,stroke:#333,stroke-width:2px
41
- style HF fill:#bfb,stroke:#333,stroke-width:2px
42
- ```
43
-
44
- ## 🛠️ HuggingFace Datasets
45
-
46
- The application uses several datasets on the HuggingFace Hub:
47
-
48
- ### 1. Requests Dataset (`{HF_ORGANIZATION}/requests`)
49
-
50
- - **Operations**:
51
- - 📤 `POST /api/models/submit`: Adds a JSON file for each new model submission
52
- - 📥 `GET /api/models/status`: Reads files to get models status
53
- - **Format**: One JSON file per model with submission details
54
- - **Updates**: On each new model submission
55
-
56
- ### 2. Votes Dataset (`{HF_ORGANIZATION}/votes`)
57
-
58
- - **Operations**:
59
- - 📤 `POST /api/votes/{model_id}`: Adds a new vote
60
- - 📥 `GET /api/votes/model/{provider}/{model}`: Reads model votes
61
- - 📥 `GET /api/votes/user/{user_id}`: Reads user votes
62
- - **Format**: JSONL with one vote per line
63
- - **Sync**: Bidirectional between local cache and Hub
64
-
65
- ### 3. Contents Dataset (`{HF_ORGANIZATION}/contents`)
66
-
67
- - **Operations**:
68
- - 📥 `GET /api/leaderboard`: Reads raw data
69
- - 📥 `GET /api/leaderboard/formatted`: Reads and formats data
70
- - **Format**: Main dataset containing all scores and metrics
71
- - **Updates**: Automatic after model evaluations
72
-
73
- ### 4. Official Providers Dataset (`{HF_ORGANIZATION}/official-providers`)
74
-
75
- - **Operations**:
76
- - 📥 Read-only access for highlighted models
77
- - **Format**: List of models selected by maintainers
78
- - **Updates**: Manual by maintainers
79
-
80
- ## 🛠 Local Development
81
-
82
- ### Prerequisites
83
-
84
- - Python 3.9+
85
- - [Poetry](https://python-poetry.org/docs/#installation)
86
-
87
- ### Standalone Installation (without Docker)
88
-
89
- ```bash
90
- # Install dependencies
91
- poetry install
92
-
93
- # Setup configuration
94
- cp .env.example .env
95
-
96
- # Start development server
97
- poetry run uvicorn app.asgi:app --host 0.0.0.0 --port 7860 --reload
98
- ```
99
-
100
- Server will be available at http://localhost:7860
101
-
102
- ## ⚙️ Configuration
103
-
104
- | Variable | Description | Default |
105
- | ------------ | ------------------------------------ | ----------- |
106
- | ENVIRONMENT | Environment (development/production) | development |
107
- | HF_TOKEN | HuggingFace authentication token | - |
108
- | PORT | Server port | 7860 |
109
- | LOG_LEVEL | Logging level (INFO/DEBUG/WARNING) | INFO |
110
- | CORS_ORIGINS | Allowed CORS origins | ["*"] |
111
- | CACHE_TTL | Cache Time To Live in seconds | 300 |
112
-
113
- ## 🔧 Middleware
114
-
115
- The backend uses several middleware layers for optimal performance and security:
116
-
117
- - **CORS Middleware**: Handles Cross-Origin Resource Sharing
118
- - **GZIP Middleware**: Compresses responses > 500 bytes
119
- - **Rate Limiting**: Prevents API abuse
120
- - **Caching**: In-memory caching with automatic invalidation
121
-
122
- ## 📝 Logging
123
-
124
- The application uses a structured logging system with:
125
-
126
- - Formatted console output
127
- - Different log levels per component
128
- - Request/Response logging
129
- - Performance metrics
130
- - Error tracking
131
-
132
- ## 📁 File Structure
133
-
134
- ```
135
- backend/
136
- ├── app/ # Source code
137
- │ ├── api/ # Routes and endpoints
138
- │ │ └── endpoints/ # Endpoint handlers
139
- │ ├── core/ # Configurations
140
- │ ├── services/ # Business logic
141
- │ └── utils/ # Utilities
142
- └── tests/ # Tests
143
- ```
144
-
145
- ## 📚 API
146
-
147
- Swagger documentation available at http://localhost:7860/docs
148
-
149
- ### Main Endpoints & Data Structures
150
-
151
- #### Leaderboard
152
-
153
- - `GET /api/leaderboard/formatted` - Formatted data with computed fields and metadata
154
-
155
- ```typescript
156
- Response {
157
- models: [{
158
- id: string, // eval_name
159
- model: {
160
- name: string, // fullname
161
- sha: string, // Model sha
162
- precision: string, // e.g. "fp16", "int8"
163
- type: string, // e.g. "fined-tuned-on-domain-specific-dataset"
164
- weight_type: string,
165
- architecture: string,
166
- average_score: number,
167
- has_chat_template: boolean
168
- },
169
- evaluations: {
170
- ifeval: {
171
- name: "IFEval",
172
- value: number, // Raw score
173
- normalized_score: number
174
- },
175
- bbh: {
176
- name: "BBH",
177
- value: number,
178
- normalized_score: number
179
- },
180
- math: {
181
- name: "MATH Level 5",
182
- value: number,
183
- normalized_score: number
184
- },
185
- gpqa: {
186
- name: "GPQA",
187
- value: number,
188
- normalized_score: number
189
- },
190
- musr: {
191
- name: "MUSR",
192
- value: number,
193
- normalized_score: number
194
- },
195
- mmlu_pro: {
196
- name: "MMLU-PRO",
197
- value: number,
198
- normalized_score: number
199
- }
200
- },
201
- features: {
202
- is_not_available_on_hub: boolean,
203
- is_merged: boolean,
204
- is_moe: boolean,
205
- is_flagged: boolean,
206
- is_official_provider: boolean
207
- },
208
- metadata: {
209
- upload_date: string,
210
- submission_date: string,
211
- generation: string,
212
- base_model: string,
213
- hub_license: string,
214
- hub_hearts: number,
215
- params_billions: number,
216
- co2_cost: number // CO₂ cost in kg
217
- }
218
- }]
219
- }
220
- ```
221
-
222
- - `GET /api/leaderboard` - Raw data from the HuggingFace dataset
223
- ```typescript
224
- Response {
225
- models: [{
226
- eval_name: string,
227
- Precision: string,
228
- Type: string,
229
- "Weight type": string,
230
- Architecture: string,
231
- Model: string,
232
- fullname: string,
233
- "Model sha": string,
234
- "Average ⬆️": number,
235
- "Hub License": string,
236
- "Hub ❤️": number,
237
- "#Params (B)": number,
238
- "Available on the hub": boolean,
239
- Merged: boolean,
240
- MoE: boolean,
241
- Flagged: boolean,
242
- "Chat Template": boolean,
243
- "CO₂ cost (kg)": number,
244
- "IFEval Raw": number,
245
- IFEval: number,
246
- "BBH Raw": number,
247
- BBH: number,
248
- "MATH Lvl 5 Raw": number,
249
- "MATH Lvl 5": number,
250
- "GPQA Raw": number,
251
- GPQA: number,
252
- "MUSR Raw": number,
253
- MUSR: number,
254
- "MMLU-PRO Raw": number,
255
- "MMLU-PRO": number,
256
- "Maintainer's Highlight": boolean,
257
- "Upload To Hub Date": string,
258
- "Submission Date": string,
259
- Generation: string,
260
- "Base Model": string
261
- }]
262
- }
263
- ```
264
-
265
- #### Models
266
-
267
- - `GET /api/models/status` - Get all models grouped by status
268
- ```typescript
269
- Response {
270
- pending: [{
271
- name: string,
272
- submitter: string,
273
- revision: string,
274
- wait_time: string,
275
- submission_time: string,
276
- status: "PENDING" | "EVALUATING" | "FINISHED",
277
- precision: string
278
- }],
279
- evaluating: Array<Model>,
280
- finished: Array<Model>
281
- }
282
- ```
283
- - `GET /api/models/pending` - Get pending models only
284
- - `POST /api/models/submit` - Submit model
285
-
286
- ```typescript
287
- Request {
288
- user_id: string,
289
- model_id: string,
290
- base_model?: string,
291
- precision?: string,
292
- model_type: string
293
- }
294
-
295
- Response {
296
- status: string,
297
- message: string
298
- }
299
- ```
300
-
301
- - `GET /api/models/{model_id}/status` - Get model status
302
-
303
- #### Votes
304
-
305
- - `POST /api/votes/{model_id}` - Vote
306
-
307
- ```typescript
308
- Request {
309
- vote_type: "up" | "down",
310
- user_id: string // HuggingFace username
311
- }
312
-
313
- Response {
314
- success: boolean,
315
- message: string
316
- }
317
- ```
318
-
319
- - `GET /api/votes/model/{provider}/{model}` - Get model votes
320
- ```typescript
321
- Response {
322
- total_votes: number,
323
- up_votes: number,
324
- down_votes: number
325
- }
326
- ```
327
- - `GET /api/votes/user/{user_id}` - Get user votes
328
- ```typescript
329
- Response Array<{
330
- model_id: string,
331
- vote_type: string,
332
- timestamp: string
333
- }>
334
- ```
335
-
336
- ## 🔒 Authentication
337
-
338
- The backend uses HuggingFace token-based authentication for secure API access. Make sure to:
339
-
340
- 1. Set your HF_TOKEN in the .env file
341
- 2. Include the token in API requests via Bearer authentication
342
- 3. Keep your token secure and never commit it to version control
343
-
344
- ## 🚀 Performance
345
-
346
- The backend implements several optimizations:
347
-
348
- - In-memory caching with configurable TTL (Time To Live)
349
- - Batch processing for model evaluations
350
- - Rate limiting for API endpoints
351
- - Efficient database queries with proper indexing
352
- - Automatic cache invalidation for votes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/__init__.py DELETED
File without changes
backend/app/api/__init__.py DELETED
@@ -1,5 +0,0 @@
1
- """
2
- API package initialization
3
- """
4
-
5
- __all__ = ["endpoints"]
 
 
 
 
 
 
backend/app/api/dependencies.py DELETED
@@ -1,34 +0,0 @@
1
- from fastapi import Depends, HTTPException
2
- import logging
3
- from app.services.models import ModelService
4
- from app.services.votes import VoteService
5
- from app.core.formatting import LogFormatter
6
-
7
- logger = logging.getLogger(__name__)
8
-
9
- model_service = ModelService()
10
- vote_service = VoteService()
11
-
12
- async def get_model_service() -> ModelService:
13
- """Dependency to get ModelService instance"""
14
- try:
15
- logger.info(LogFormatter.info("Initializing model service dependency"))
16
- await model_service.initialize()
17
- logger.info(LogFormatter.success("Model service initialized"))
18
- return model_service
19
- except Exception as e:
20
- error_msg = "Failed to initialize model service"
21
- logger.error(LogFormatter.error(error_msg, e))
22
- raise HTTPException(status_code=500, detail=str(e))
23
-
24
- async def get_vote_service() -> VoteService:
25
- """Dependency to get VoteService instance"""
26
- try:
27
- logger.info(LogFormatter.info("Initializing vote service dependency"))
28
- await vote_service.initialize()
29
- logger.info(LogFormatter.success("Vote service initialized"))
30
- return vote_service
31
- except Exception as e:
32
- error_msg = "Failed to initialize vote service"
33
- logger.error(LogFormatter.error(error_msg, e))
34
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/api/endpoints/leaderboard.py DELETED
@@ -1,49 +0,0 @@
1
- from fastapi import APIRouter
2
- from typing import List, Dict, Any
3
- from app.services.leaderboard import LeaderboardService
4
- from app.core.fastapi_cache import cached, build_cache_key
5
- import logging
6
- from app.core.formatting import LogFormatter
7
-
8
- logger = logging.getLogger(__name__)
9
- router = APIRouter()
10
- leaderboard_service = LeaderboardService()
11
-
12
- def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
13
- """Build cache key for leaderboard data"""
14
- key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
15
- key = build_cache_key(namespace, key_type)
16
- logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
17
- return key
18
-
19
- @router.get("")
20
- @cached(expire=300, key_builder=leaderboard_key_builder)
21
- async def get_leaderboard() -> List[Dict[str, Any]]:
22
- """
23
- Get raw leaderboard data
24
- Response will be automatically GZIP compressed if size > 500 bytes
25
- """
26
- try:
27
- logger.info(LogFormatter.info("Fetching raw leaderboard data"))
28
- data = await leaderboard_service.fetch_raw_data()
29
- logger.info(LogFormatter.success(f"Retrieved {len(data)} leaderboard entries"))
30
- return data
31
- except Exception as e:
32
- logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
33
- raise
34
-
35
- @router.get("/formatted")
36
- @cached(expire=300, key_builder=leaderboard_key_builder)
37
- async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
38
- """
39
- Get formatted leaderboard data with restructured objects
40
- Response will be automatically GZIP compressed if size > 500 bytes
41
- """
42
- try:
43
- logger.info(LogFormatter.info("Fetching formatted leaderboard data"))
44
- data = await leaderboard_service.get_formatted_data()
45
- logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
46
- return data
47
- except Exception as e:
48
- logger.error(LogFormatter.error("Failed to fetch formatted leaderboard data", e))
49
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/api/endpoints/models.py DELETED
@@ -1,103 +0,0 @@
1
- from fastapi import APIRouter, HTTPException, Depends
2
- from typing import Dict, Any, List
3
- import logging
4
- from app.services.models import ModelService
5
- from app.api.dependencies import get_model_service
6
- from app.core.fastapi_cache import cached
7
- from app.core.formatting import LogFormatter
8
-
9
- logger = logging.getLogger(__name__)
10
- router = APIRouter(tags=["models"])
11
-
12
- @router.get("/status")
13
- @cached(expire=300)
14
- async def get_models_status(
15
- model_service: ModelService = Depends(get_model_service)
16
- ) -> Dict[str, List[Dict[str, Any]]]:
17
- """Get all models grouped by status"""
18
- try:
19
- logger.info(LogFormatter.info("Fetching status for all models"))
20
- result = await model_service.get_models()
21
- stats = {
22
- status: len(models) for status, models in result.items()
23
- }
24
- for line in LogFormatter.stats(stats, "Models by Status"):
25
- logger.info(line)
26
- return result
27
- except Exception as e:
28
- logger.error(LogFormatter.error("Failed to get models status", e))
29
- raise HTTPException(status_code=500, detail=str(e))
30
-
31
- @router.get("/pending")
32
- @cached(expire=60)
33
- async def get_pending_models(
34
- model_service: ModelService = Depends(get_model_service)
35
- ) -> List[Dict[str, Any]]:
36
- """Get all models waiting for evaluation"""
37
- try:
38
- logger.info(LogFormatter.info("Fetching pending models"))
39
- models = await model_service.get_models()
40
- pending = models.get("pending", [])
41
- logger.info(LogFormatter.success(f"Found {len(pending)} pending models"))
42
- return pending
43
- except Exception as e:
44
- logger.error(LogFormatter.error("Failed to get pending models", e))
45
- raise HTTPException(status_code=500, detail=str(e))
46
-
47
- @router.post("/submit")
48
- async def submit_model(
49
- model_data: Dict[str, Any],
50
- model_service: ModelService = Depends(get_model_service)
51
- ) -> Dict[str, Any]:
52
- try:
53
- logger.info(LogFormatter.section("MODEL SUBMISSION"))
54
-
55
- user_id = model_data.pop('user_id', None)
56
- if not user_id:
57
- error_msg = "user_id is required"
58
- logger.error(LogFormatter.error("Validation failed", error_msg))
59
- raise ValueError(error_msg)
60
-
61
- # Log submission details
62
- submission_info = {
63
- "Model_ID": model_data.get("model_id"),
64
- "User": user_id,
65
- "Base_Model": model_data.get("base_model"),
66
- "Precision": model_data.get("precision"),
67
- "Model_Type": model_data.get("model_type")
68
- }
69
- for line in LogFormatter.tree(submission_info, "Submission Details"):
70
- logger.info(line)
71
-
72
- result = await model_service.submit_model(model_data, user_id)
73
- logger.info(LogFormatter.success("Model submitted successfully"))
74
- return result
75
-
76
- except ValueError as e:
77
- logger.error(LogFormatter.error("Invalid submission data", e))
78
- raise HTTPException(status_code=400, detail=str(e))
79
- except Exception as e:
80
- logger.error(LogFormatter.error("Submission failed", e))
81
- raise HTTPException(status_code=500, detail=str(e))
82
-
83
- @router.get("/{model_id}/status")
84
- async def get_model_status(
85
- model_id: str,
86
- model_service: ModelService = Depends(get_model_service)
87
- ) -> Dict[str, Any]:
88
- try:
89
- logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
90
- status = await model_service.get_model_status(model_id)
91
-
92
- if status["status"] != "not_found":
93
- logger.info(LogFormatter.success("Status found"))
94
- for line in LogFormatter.tree(status, "Model Status"):
95
- logger.info(line)
96
- else:
97
- logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
98
-
99
- return status
100
-
101
- except Exception as e:
102
- logger.error(LogFormatter.error("Failed to get model status", e))
103
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/api/endpoints/votes.py DELETED
@@ -1,105 +0,0 @@
1
- from fastapi import APIRouter, HTTPException, Query, Depends
2
- from typing import Dict, Any, List
3
- from app.services.votes import VoteService
4
- from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
5
- import logging
6
- from app.core.formatting import LogFormatter
7
-
8
- logger = logging.getLogger(__name__)
9
- router = APIRouter()
10
- vote_service = VoteService()
11
-
12
- def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
13
- """Build cache key for model votes"""
14
- provider = kwargs.get('provider')
15
- model = kwargs.get('model')
16
- key = build_cache_key(namespace, provider, model)
17
- logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
18
- return key
19
-
20
- def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
21
- """Build cache key for user votes"""
22
- user_id = kwargs.get('user_id')
23
- key = build_cache_key(namespace, user_id)
24
- logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
25
- return key
26
-
27
- @router.post("/{model_id:path}")
28
- async def add_vote(
29
- model_id: str,
30
- vote_type: str = Query(..., description="Type of vote (up/down)"),
31
- user_id: str = Query(..., description="HuggingFace username")
32
- ) -> Dict[str, Any]:
33
- try:
34
- logger.info(LogFormatter.section("ADDING VOTE"))
35
- stats = {
36
- "Model": model_id,
37
- "User": user_id,
38
- "Type": vote_type
39
- }
40
- for line in LogFormatter.tree(stats, "Vote Details"):
41
- logger.info(line)
42
-
43
- await vote_service.initialize()
44
- result = await vote_service.add_vote(model_id, user_id, vote_type)
45
-
46
- # Invalidate affected caches
47
- try:
48
- logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
49
- provider, model = model_id.split('/', 1)
50
-
51
- # Build and invalidate cache keys
52
- model_cache_key = build_cache_key("model_votes", provider, model)
53
- user_cache_key = build_cache_key("user_votes", user_id)
54
-
55
- invalidate_cache_key(model_cache_key)
56
- invalidate_cache_key(user_cache_key)
57
-
58
- cache_stats = {
59
- "Model_Cache": model_cache_key,
60
- "User_Cache": user_cache_key
61
- }
62
- for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
63
- logger.info(line)
64
-
65
- except Exception as e:
66
- logger.error(LogFormatter.error("Failed to invalidate cache", e))
67
-
68
- return result
69
- except Exception as e:
70
- logger.error(LogFormatter.error("Failed to add vote", e))
71
- raise HTTPException(status_code=400, detail=str(e))
72
-
73
- @router.get("/model/{provider}/{model}")
74
- @cached(expire=60, key_builder=model_votes_key_builder)
75
- async def get_model_votes(
76
- provider: str,
77
- model: str
78
- ) -> Dict[str, Any]:
79
- """Get all votes for a specific model"""
80
- try:
81
- logger.info(LogFormatter.info(f"Fetching votes for model: {provider}/{model}"))
82
- await vote_service.initialize()
83
- model_id = f"{provider}/{model}"
84
- result = await vote_service.get_model_votes(model_id)
85
- logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
86
- return result
87
- except Exception as e:
88
- logger.error(LogFormatter.error("Failed to get model votes", e))
89
- raise HTTPException(status_code=400, detail=str(e))
90
-
91
- @router.get("/user/{user_id}")
92
- @cached(expire=60, key_builder=user_votes_key_builder)
93
- async def get_user_votes(
94
- user_id: str
95
- ) -> List[Dict[str, Any]]:
96
- """Get all votes from a specific user"""
97
- try:
98
- logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
99
- await vote_service.initialize()
100
- votes = await vote_service.get_user_votes(user_id)
101
- logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
102
- return votes
103
- except Exception as e:
104
- logger.error(LogFormatter.error("Failed to get user votes", e))
105
- raise HTTPException(status_code=400, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/api/router.py DELETED
@@ -1,9 +0,0 @@
1
- from fastapi import APIRouter
2
-
3
- from app.api.endpoints import leaderboard, votes, models
4
-
5
- router = APIRouter()
6
-
7
- router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
8
- router.include_router(votes.router, prefix="/votes", tags=["votes"])
9
- router.include_router(models.router, prefix="/models", tags=["models"])
 
 
 
 
 
 
 
 
 
 
backend/app/asgi.py DELETED
@@ -1,106 +0,0 @@
1
- """
2
- ASGI entry point for the Open LLM Leaderboard API.
3
- """
4
- import os
5
- import uvicorn
6
- import logging
7
- import logging.config
8
- from fastapi import FastAPI
9
- from fastapi.middleware.cors import CORSMiddleware
10
- from fastapi.middleware.gzip import GZipMiddleware
11
- import sys
12
-
13
- from app.api.router import router
14
- from app.core.fastapi_cache import setup_cache
15
- from app.core.formatting import LogFormatter
16
- from app.config import hf_config
17
-
18
- # Configure logging before anything else
19
- LOGGING_CONFIG = {
20
- "version": 1,
21
- "disable_existing_loggers": True,
22
- "formatters": {
23
- "default": {
24
- "format": "%(name)s - %(levelname)s - %(message)s",
25
- }
26
- },
27
- "handlers": {
28
- "default": {
29
- "formatter": "default",
30
- "class": "logging.StreamHandler",
31
- "stream": "ext://sys.stdout",
32
- }
33
- },
34
- "loggers": {
35
- "uvicorn": {
36
- "handlers": ["default"],
37
- "level": "WARNING",
38
- "propagate": False,
39
- },
40
- "uvicorn.error": {
41
- "level": "WARNING",
42
- "handlers": ["default"],
43
- "propagate": False,
44
- },
45
- "uvicorn.access": {
46
- "handlers": ["default"],
47
- "level": "WARNING",
48
- "propagate": False,
49
- },
50
- "app": {
51
- "handlers": ["default"],
52
- "level": "WARNING",
53
- "propagate": False,
54
- }
55
- },
56
- "root": {
57
- "handlers": ["default"],
58
- "level": "WARNING",
59
- }
60
- }
61
-
62
- # Apply logging configuration
63
- logging.config.dictConfig(LOGGING_CONFIG)
64
- logger = logging.getLogger("app")
65
-
66
- # Create FastAPI application
67
- app = FastAPI(
68
- title="Open LLM Leaderboard",
69
- version="1.0.0",
70
- docs_url="/docs",
71
- )
72
-
73
- # Add CORS middleware
74
- app.add_middleware(
75
- CORSMiddleware,
76
- allow_origins=["*"],
77
- allow_credentials=True,
78
- allow_methods=["*"],
79
- allow_headers=["*"],
80
- )
81
-
82
- # Add GZIP compression
83
- app.add_middleware(GZipMiddleware, minimum_size=500)
84
-
85
- # Include API router
86
- app.include_router(router, prefix="/api")
87
-
88
- @app.on_event("startup")
89
- async def startup_event():
90
- """Initialize services on startup"""
91
- logger.info("\n")
92
- logger.info(LogFormatter.section("APPLICATION STARTUP"))
93
-
94
- # Log HF configuration
95
- logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
96
- logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
97
- logger.info(LogFormatter.info(f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"))
98
- logger.info(LogFormatter.info(f"Using repositories:"))
99
- logger.info(LogFormatter.info(f" - Queue: {hf_config.QUEUE_REPO}"))
100
- logger.info(LogFormatter.info(f" - Aggregated: {hf_config.AGGREGATED_REPO}"))
101
- logger.info(LogFormatter.info(f" - Votes: {hf_config.VOTES_REPO}"))
102
- logger.info(LogFormatter.info(f" - Official Providers: {hf_config.OFFICIAL_PROVIDERS_REPO}"))
103
-
104
- # Setup cache
105
- setup_cache()
106
- logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/config/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- """
2
- Configuration module for the Open LLM Leaderboard backend.
3
- All configuration values are imported from base.py to avoid circular dependencies.
4
- """
5
-
6
- from .base import *
 
 
 
 
 
 
 
backend/app/config/base.py DELETED
@@ -1,38 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- # Server configuration
5
- HOST = "0.0.0.0"
6
- PORT = 7860
7
- WORKERS = 4
8
- RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
9
-
10
- # CORS configuration
11
- ORIGINS = ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
12
-
13
- # Cache configuration
14
- CACHE_TTL = int(os.environ.get("CACHE_TTL", 300)) # 5 minutes default
15
-
16
- # Rate limiting
17
- RATE_LIMIT_PERIOD = 7 # days
18
- RATE_LIMIT_QUOTA = 5
19
- HAS_HIGHER_RATE_LIMIT = []
20
-
21
- # HuggingFace configuration
22
- HF_TOKEN = os.environ.get("HF_TOKEN")
23
- HF_ORGANIZATION = "open-llm-leaderboard"
24
- API = {
25
- "INFERENCE": "https://api-inference.huggingface.co/models",
26
- "HUB": "https://huggingface.co"
27
- }
28
-
29
- # Cache paths
30
- CACHE_ROOT = Path(os.environ.get("HF_HOME", ".cache"))
31
- DATASETS_CACHE = CACHE_ROOT / "datasets"
32
- MODELS_CACHE = CACHE_ROOT / "models"
33
- VOTES_CACHE = CACHE_ROOT / "votes"
34
- EVAL_CACHE = CACHE_ROOT / "eval-queue"
35
-
36
- # Repository configuration
37
- QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
38
- EVAL_REQUESTS_PATH = EVAL_CACHE / "eval_requests.jsonl"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/config/hf_config.py DELETED
@@ -1,30 +0,0 @@
1
- import os
2
- import logging
3
- from typing import Optional
4
- from huggingface_hub import HfApi
5
- from pathlib import Path
6
- from app.core.cache import cache_config
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- # Organization or user who owns the datasets
11
- HF_ORGANIZATION = "open-llm-leaderboard"
12
-
13
- # Get HF token directly from environment
14
- HF_TOKEN = os.environ.get("HF_TOKEN")
15
- if not HF_TOKEN:
16
- logger.warning("HF_TOKEN not found in environment variables. Some features may be limited.")
17
-
18
- # Initialize HF API
19
- API = HfApi(token=HF_TOKEN)
20
-
21
- # Repository configuration
22
- QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
23
- AGGREGATED_REPO = f"{HF_ORGANIZATION}/contents"
24
- VOTES_REPO = f"{HF_ORGANIZATION}/votes"
25
- OFFICIAL_PROVIDERS_REPO = f"{HF_ORGANIZATION}/official-providers"
26
-
27
- # File paths from cache config
28
- VOTES_PATH = cache_config.votes_file
29
- EVAL_REQUESTS_PATH = cache_config.eval_requests_file
30
- MODEL_CACHE_DIR = cache_config.models_cache
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/config/logging_config.py DELETED
@@ -1,38 +0,0 @@
1
- import logging
2
- import sys
3
- from tqdm import tqdm
4
-
5
- def get_tqdm_handler():
6
- """
7
- Creates a special handler for tqdm that doesn't interfere with other logs.
8
- """
9
- class TqdmLoggingHandler(logging.Handler):
10
- def emit(self, record):
11
- try:
12
- msg = self.format(record)
13
- tqdm.write(msg)
14
- self.flush()
15
- except Exception:
16
- self.handleError(record)
17
-
18
- return TqdmLoggingHandler()
19
-
20
- def setup_service_logger(service_name: str) -> logging.Logger:
21
- """
22
- Configure a specific logger for a given service.
23
- """
24
- logger = logging.getLogger(f"app.services.{service_name}")
25
-
26
- # If the logger already has handlers, don't reconfigure it
27
- if logger.handlers:
28
- return logger
29
-
30
- # Add tqdm handler for this service
31
- tqdm_handler = get_tqdm_handler()
32
- tqdm_handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s'))
33
- logger.addHandler(tqdm_handler)
34
-
35
- # Don't propagate logs to parent loggers
36
- logger.propagate = False
37
-
38
- return logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/core/cache.py DELETED
@@ -1,109 +0,0 @@
1
- import os
2
- import shutil
3
- from pathlib import Path
4
- from datetime import timedelta
5
- import logging
6
- from app.core.formatting import LogFormatter
7
- from app.config.base import (
8
- CACHE_ROOT,
9
- DATASETS_CACHE,
10
- MODELS_CACHE,
11
- VOTES_CACHE,
12
- EVAL_CACHE,
13
- CACHE_TTL
14
- )
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
- class CacheConfig:
19
- def __init__(self):
20
- # Get cache paths from config
21
- self.cache_root = CACHE_ROOT
22
- self.datasets_cache = DATASETS_CACHE
23
- self.models_cache = MODELS_CACHE
24
- self.votes_cache = VOTES_CACHE
25
- self.eval_cache = EVAL_CACHE
26
-
27
- # Specific files
28
- self.votes_file = self.votes_cache / "votes_data.jsonl"
29
- self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
30
-
31
- # Cache TTL
32
- self.cache_ttl = timedelta(seconds=CACHE_TTL)
33
-
34
- self._initialize_cache_dirs()
35
- self._setup_environment()
36
-
37
- def _initialize_cache_dirs(self):
38
- """Initialize all necessary cache directories"""
39
- try:
40
- logger.info(LogFormatter.section("CACHE INITIALIZATION"))
41
-
42
- cache_dirs = {
43
- "Root": self.cache_root,
44
- "Datasets": self.datasets_cache,
45
- "Models": self.models_cache,
46
- "Votes": self.votes_cache,
47
- "Eval": self.eval_cache
48
- }
49
-
50
- for name, cache_dir in cache_dirs.items():
51
- cache_dir.mkdir(parents=True, exist_ok=True)
52
- logger.info(LogFormatter.success(f"{name} cache directory: {cache_dir}"))
53
-
54
- except Exception as e:
55
- logger.error(LogFormatter.error("Failed to create cache directories", e))
56
- raise
57
-
58
- def _setup_environment(self):
59
- """Configure HuggingFace environment variables"""
60
- logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
61
-
62
- env_vars = {
63
- "HF_HOME": str(self.cache_root),
64
- "HF_DATASETS_CACHE": str(self.datasets_cache)
65
- }
66
-
67
- for var, value in env_vars.items():
68
- os.environ[var] = value
69
- logger.info(LogFormatter.info(f"Set {var}={value}"))
70
-
71
-
72
- def get_cache_path(self, cache_type: str) -> Path:
73
- """Returns the path for a specific cache type"""
74
- cache_paths = {
75
- "datasets": self.datasets_cache,
76
- "models": self.models_cache,
77
- "votes": self.votes_cache,
78
- "eval": self.eval_cache
79
- }
80
- return cache_paths.get(cache_type, self.cache_root)
81
-
82
- def flush_cache(self, cache_type: str = None):
83
- """Flush specified cache or all caches if no type is specified"""
84
- try:
85
- if cache_type:
86
- logger.info(LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE"))
87
- cache_dir = self.get_cache_path(cache_type)
88
- if cache_dir.exists():
89
- stats = {
90
- "Cache_Type": cache_type,
91
- "Directory": str(cache_dir)
92
- }
93
- for line in LogFormatter.tree(stats, "Cache Details"):
94
- logger.info(line)
95
- shutil.rmtree(cache_dir)
96
- cache_dir.mkdir(parents=True, exist_ok=True)
97
- logger.info(LogFormatter.success("Cache cleared successfully"))
98
- else:
99
- logger.info(LogFormatter.section("FLUSHING ALL CACHES"))
100
- for cache_type in ["datasets", "models", "votes", "eval"]:
101
- self.flush_cache(cache_type)
102
- logger.info(LogFormatter.success("All caches cleared successfully"))
103
-
104
- except Exception as e:
105
- logger.error(LogFormatter.error("Failed to flush cache", e))
106
- raise
107
-
108
- # Singleton instance of cache configuration
109
- cache_config = CacheConfig()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/core/fastapi_cache.py DELETED
@@ -1,48 +0,0 @@
1
- from fastapi_cache import FastAPICache
2
- from fastapi_cache.backends.inmemory import InMemoryBackend
3
- from fastapi_cache.decorator import cache
4
- from datetime import timedelta
5
- from app.config import CACHE_TTL
6
- import logging
7
- from app.core.formatting import LogFormatter
8
-
9
- logger = logging.getLogger(__name__)
10
-
11
- def setup_cache():
12
- """Initialize FastAPI Cache with in-memory backend"""
13
- FastAPICache.init(
14
- backend=InMemoryBackend(),
15
- prefix="fastapi-cache",
16
- expire=CACHE_TTL
17
- )
18
- logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
19
-
20
- def invalidate_cache_key(key: str):
21
- """Invalidate a specific cache key"""
22
- try:
23
- backend = FastAPICache.get_backend()
24
- if hasattr(backend, 'delete'):
25
- backend.delete(key)
26
- logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
27
- else:
28
- logger.warning(LogFormatter.warning("Cache backend does not support deletion"))
29
- except Exception as e:
30
- logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
31
-
32
- def build_cache_key(namespace: str, *args) -> str:
33
- """Build a consistent cache key"""
34
- key = f"fastapi-cache:{namespace}:{':'.join(str(arg) for arg in args)}"
35
- logger.debug(LogFormatter.info(f"Built cache key: {key}"))
36
- return key
37
-
38
- def cached(expire: int = CACHE_TTL, key_builder=None):
39
- """Decorator for caching endpoint responses
40
-
41
- Args:
42
- expire (int): Cache TTL in seconds
43
- key_builder (callable, optional): Custom key builder function
44
- """
45
- return cache(
46
- expire=expire,
47
- key_builder=key_builder
48
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/core/formatting.py DELETED
@@ -1,104 +0,0 @@
1
- import logging
2
- from typing import Dict, Any, List, Optional
3
-
4
- logger = logging.getLogger(__name__)
5
-
6
- class LogFormatter:
7
- """Utility class for consistent log formatting across the application"""
8
-
9
- @staticmethod
10
- def section(title: str) -> str:
11
- """Create a section header"""
12
- return f"\n{'='*20} {title.upper()} {'='*20}"
13
-
14
- @staticmethod
15
- def subsection(title: str) -> str:
16
- """Create a subsection header"""
17
- return f"\n{'─'*20} {title} {'─'*20}"
18
-
19
- @staticmethod
20
- def tree(items: Dict[str, Any], title: str = None) -> List[str]:
21
- """Create a tree view of dictionary data"""
22
- lines = []
23
- if title:
24
- lines.append(f"📊 {title}:")
25
-
26
- # Get the maximum length for alignment
27
- max_key_length = max(len(str(k)) for k in items.keys())
28
-
29
- # Format each item
30
- for i, (key, value) in enumerate(items.items()):
31
- prefix = "└──" if i == len(items) - 1 else "├──"
32
- if isinstance(value, (int, float)):
33
- value = f"{value:,}" # Add thousand separators
34
- lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
35
-
36
- return lines
37
-
38
- @staticmethod
39
- def stats(stats: Dict[str, int], title: str = None) -> List[str]:
40
- """Format statistics with icons"""
41
- lines = []
42
- if title:
43
- lines.append(f"📊 {title}:")
44
-
45
- # Get the maximum length for alignment
46
- max_key_length = max(len(str(k)) for k in stats.keys())
47
-
48
- # Format each stat with an appropriate icon
49
- icons = {
50
- "total": "📌",
51
- "success": "✅",
52
- "error": "❌",
53
- "pending": "⏳",
54
- "processing": "⚙️",
55
- "finished": "✨",
56
- "evaluating": "🔄",
57
- "downloads": "⬇️",
58
- "files": "📁",
59
- "cached": "💾",
60
- "size": "📏",
61
- "time": "⏱️",
62
- "rate": "🚀"
63
- }
64
-
65
- # Format each item
66
- for i, (key, value) in enumerate(stats.items()):
67
- prefix = "└──" if i == len(stats) - 1 else "├──"
68
- icon = icons.get(key.lower().split('_')[0], "•")
69
- if isinstance(value, (int, float)):
70
- value = f"{value:,}" # Add thousand separators
71
- lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
72
-
73
- return lines
74
-
75
- @staticmethod
76
- def progress_bar(current: int, total: int, width: int = 20) -> str:
77
- """Create a progress bar"""
78
- percentage = (current * 100) // total
79
- filled = "█" * (percentage * width // 100)
80
- empty = "░" * (width - len(filled))
81
- return f"{filled}{empty} {percentage:3d}%"
82
-
83
- @staticmethod
84
- def error(message: str, error: Optional[Exception] = None) -> str:
85
- """Format error message"""
86
- error_msg = f"\n❌ Error: {message}"
87
- if error:
88
- error_msg += f"\n └── Details: {str(error)}"
89
- return error_msg
90
-
91
- @staticmethod
92
- def success(message: str) -> str:
93
- """Format success message"""
94
- return f"✅ {message}"
95
-
96
- @staticmethod
97
- def warning(message: str) -> str:
98
- """Format warning message"""
99
- return f"⚠️ {message}"
100
-
101
- @staticmethod
102
- def info(message: str) -> str:
103
- """Format info message"""
104
- return f"ℹ️ {message}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/main.py DELETED
@@ -1,18 +0,0 @@
1
- from fastapi import FastAPI
2
- from app.config.logging_config import setup_logging
3
- import logging
4
-
5
- # Initialize logging configuration
6
- setup_logging()
7
- logger = logging.getLogger(__name__)
8
-
9
- app = FastAPI(title="Open LLM Leaderboard API")
10
-
11
- @app.on_event("startup")
12
- async def startup_event():
13
- logger.info("Starting up the application...")
14
-
15
- # Import and include routers after app initialization
16
- from app.api import models, votes
17
- app.include_router(models.router, prefix="/api", tags=["models"])
18
- app.include_router(votes.router, prefix="/api", tags=["votes"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- from . import hf_service, leaderboard, votes, models
2
-
3
- __all__ = ["hf_service", "leaderboard", "votes", "models"]
 
 
 
 
backend/app/services/hf_service.py DELETED
@@ -1,50 +0,0 @@
1
- from typing import Optional
2
- from huggingface_hub import HfApi
3
- from app.config import HF_TOKEN, API
4
- from app.core.cache import cache_config
5
- from app.core.formatting import LogFormatter
6
- import logging
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- class HuggingFaceService:
11
- def __init__(self):
12
- self.api = API
13
- self.token = HF_TOKEN
14
- self.cache_dir = cache_config.models_cache
15
-
16
- async def check_authentication(self) -> bool:
17
- """Check if the HF token is valid"""
18
- if not self.token:
19
- return False
20
- try:
21
- logger.info(LogFormatter.info("Checking HF token validity..."))
22
- self.api.get_token_permission()
23
- logger.info(LogFormatter.success("HF token is valid"))
24
- return True
25
- except Exception as e:
26
- logger.error(LogFormatter.error("HF token validation failed", e))
27
- return False
28
-
29
- async def get_user_info(self) -> Optional[dict]:
30
- """Get information about the authenticated user"""
31
- try:
32
- logger.info(LogFormatter.info("Fetching user information..."))
33
- info = self.api.get_token_permission()
34
- logger.info(LogFormatter.success(f"User info retrieved for: {info.get('user', 'Unknown')}"))
35
- return info
36
- except Exception as e:
37
- logger.error(LogFormatter.error("Failed to get user info", e))
38
- return None
39
-
40
- def _log_repo_operation(self, operation: str, repo: str, details: str = None):
41
- """Helper to log repository operations"""
42
- logger.info(LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}"))
43
- stats = {
44
- "Operation": operation,
45
- "Repository": repo,
46
- }
47
- if details:
48
- stats["Details"] = details
49
- for line in LogFormatter.tree(stats):
50
- logger.info(line)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/leaderboard.py DELETED
@@ -1,208 +0,0 @@
1
- from app.core.cache import cache_config
2
- from datetime import datetime
3
- from typing import List, Dict, Any
4
- import datasets
5
- from fastapi import HTTPException
6
- import logging
7
- from app.config.base import HF_ORGANIZATION
8
- from app.core.formatting import LogFormatter
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
- class LeaderboardService:
13
- def __init__(self):
14
- pass
15
-
16
- async def fetch_raw_data(self) -> List[Dict[str, Any]]:
17
- """Fetch raw leaderboard data from HuggingFace dataset"""
18
- try:
19
- logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
20
- logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/contents"))
21
-
22
- dataset = datasets.load_dataset(
23
- f"{HF_ORGANIZATION}/contents",
24
- cache_dir=cache_config.get_cache_path("datasets")
25
- )["train"]
26
-
27
- df = dataset.to_pandas()
28
- data = df.to_dict('records')
29
-
30
- stats = {
31
- "Total_Entries": len(data),
32
- "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB"
33
- }
34
- for line in LogFormatter.stats(stats, "Dataset Statistics"):
35
- logger.info(line)
36
-
37
- return data
38
-
39
- except Exception as e:
40
- logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
41
- raise HTTPException(status_code=500, detail=str(e))
42
-
43
- async def get_formatted_data(self) -> List[Dict[str, Any]]:
44
- """Get formatted leaderboard data"""
45
- try:
46
- logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
47
-
48
- raw_data = await self.fetch_raw_data()
49
- formatted_data = []
50
- type_counts = {}
51
- error_count = 0
52
-
53
- # Initialize progress tracking
54
- total_items = len(raw_data)
55
- logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
56
-
57
- for i, item in enumerate(raw_data, 1):
58
- try:
59
- formatted_item = await self.transform_data(item)
60
- formatted_data.append(formatted_item)
61
-
62
- # Count model types
63
- model_type = formatted_item["model"]["type"]
64
- type_counts[model_type] = type_counts.get(model_type, 0) + 1
65
-
66
- except Exception as e:
67
- error_count += 1
68
- logger.error(LogFormatter.error(f"Failed to format entry {i}/{total_items}", e))
69
- continue
70
-
71
- # Log progress every 10%
72
- if i % max(1, total_items // 10) == 0:
73
- progress = (i / total_items) * 100
74
- logger.info(LogFormatter.info(f"Progress: {LogFormatter.progress_bar(i, total_items)}"))
75
-
76
- # Log final statistics
77
- stats = {
78
- "Total_Processed": total_items,
79
- "Successful": len(formatted_data),
80
- "Failed": error_count
81
- }
82
- logger.info(LogFormatter.section("PROCESSING SUMMARY"))
83
- for line in LogFormatter.stats(stats, "Processing Statistics"):
84
- logger.info(line)
85
-
86
- # Log model type distribution
87
- type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
88
- logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
89
- for line in LogFormatter.stats(type_stats):
90
- logger.info(line)
91
-
92
- return formatted_data
93
-
94
- except Exception as e:
95
- logger.error(LogFormatter.error("Failed to format leaderboard data", e))
96
- raise HTTPException(status_code=500, detail=str(e))
97
-
98
- async def transform_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
99
- """Transform raw data into the format expected by the frontend"""
100
- try:
101
- # Extract model name for logging
102
- model_name = data.get("fullname", "Unknown")
103
- logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}"))
104
-
105
- # Create unique ID combining model name, precision, sha and chat template status
106
- unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
107
-
108
- evaluations = {
109
- "ifeval": {
110
- "name": "IFEval",
111
- "value": data.get("IFEval Raw", 0),
112
- "normalized_score": data.get("IFEval", 0)
113
- },
114
- "bbh": {
115
- "name": "BBH",
116
- "value": data.get("BBH Raw", 0),
117
- "normalized_score": data.get("BBH", 0)
118
- },
119
- "math": {
120
- "name": "MATH Level 5",
121
- "value": data.get("MATH Lvl 5 Raw", 0),
122
- "normalized_score": data.get("MATH Lvl 5", 0)
123
- },
124
- "gpqa": {
125
- "name": "GPQA",
126
- "value": data.get("GPQA Raw", 0),
127
- "normalized_score": data.get("GPQA", 0)
128
- },
129
- "musr": {
130
- "name": "MUSR",
131
- "value": data.get("MUSR Raw", 0),
132
- "normalized_score": data.get("MUSR", 0)
133
- },
134
- "mmlu_pro": {
135
- "name": "MMLU-PRO",
136
- "value": data.get("MMLU-PRO Raw", 0),
137
- "normalized_score": data.get("MMLU-PRO", 0)
138
- }
139
- }
140
-
141
- features = {
142
- "is_not_available_on_hub": data.get("Available on the hub", False),
143
- "is_merged": data.get("Merged", False),
144
- "is_moe": data.get("MoE", False),
145
- "is_flagged": data.get("Flagged", False),
146
- "is_official_provider": data.get("Official Providers", False)
147
- }
148
-
149
- metadata = {
150
- "upload_date": data.get("Upload To Hub Date"),
151
- "submission_date": data.get("Submission Date"),
152
- "generation": data.get("Generation"),
153
- "base_model": data.get("Base Model"),
154
- "hub_license": data.get("Hub License"),
155
- "hub_hearts": data.get("Hub ❤️"),
156
- "params_billions": data.get("#Params (B)"),
157
- "co2_cost": data.get("CO₂ cost (kg)", 0)
158
- }
159
-
160
- # Clean model type by removing emojis if present
161
- original_type = data.get("Type", "")
162
- model_type = original_type.lower().strip()
163
-
164
- # Remove emojis and parentheses
165
- if "(" in model_type:
166
- model_type = model_type.split("(")[0].strip()
167
- model_type = ''.join(c for c in model_type if not c in '🔶🟢🟩💬🤝🌸 ')
168
-
169
- # Map old model types to new ones
170
- model_type_mapping = {
171
- "fine-tuned": "fined-tuned-on-domain-specific-dataset",
172
- "fine tuned": "fined-tuned-on-domain-specific-dataset",
173
- "finetuned": "fined-tuned-on-domain-specific-dataset",
174
- "fine_tuned": "fined-tuned-on-domain-specific-dataset",
175
- "ft": "fined-tuned-on-domain-specific-dataset",
176
- "finetuning": "fined-tuned-on-domain-specific-dataset",
177
- "fine tuning": "fined-tuned-on-domain-specific-dataset",
178
- "fine-tuning": "fined-tuned-on-domain-specific-dataset"
179
- }
180
-
181
- mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
182
-
183
- if mapped_type != model_type:
184
- logger.debug(LogFormatter.info(f"Model type mapped: {original_type} -> {mapped_type}"))
185
-
186
- transformed_data = {
187
- "id": unique_id,
188
- "model": {
189
- "name": data.get("fullname"),
190
- "sha": data.get("Model sha"),
191
- "precision": data.get("Precision"),
192
- "type": mapped_type,
193
- "weight_type": data.get("Weight type"),
194
- "architecture": data.get("Architecture"),
195
- "average_score": data.get("Average ⬆️"),
196
- "has_chat_template": data.get("Chat Template", False)
197
- },
198
- "evaluations": evaluations,
199
- "features": features,
200
- "metadata": metadata
201
- }
202
-
203
- logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}"))
204
- return transformed_data
205
-
206
- except Exception as e:
207
- logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
208
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/models.py DELETED
@@ -1,587 +0,0 @@
1
- from datetime import datetime, timezone
2
- from typing import Dict, Any, Optional, List
3
- import json
4
- import os
5
- from pathlib import Path
6
- import logging
7
- import aiohttp
8
- import asyncio
9
- import time
10
- from huggingface_hub import HfApi, CommitOperationAdd
11
- from huggingface_hub.utils import build_hf_headers
12
- from datasets import disable_progress_bar
13
- import sys
14
- import contextlib
15
- from concurrent.futures import ThreadPoolExecutor
16
- import tempfile
17
-
18
- from app.config import (
19
- QUEUE_REPO,
20
- HF_TOKEN,
21
- EVAL_REQUESTS_PATH
22
- )
23
- from app.config.hf_config import HF_ORGANIZATION
24
- from app.services.hf_service import HuggingFaceService
25
- from app.utils.model_validation import ModelValidator
26
- from app.services.votes import VoteService
27
- from app.core.cache import cache_config
28
- from app.core.formatting import LogFormatter
29
-
30
- # Disable datasets progress bars globally
31
- disable_progress_bar()
32
-
33
- logger = logging.getLogger(__name__)
34
-
35
- # Context manager to temporarily disable stdout and stderr
36
- @contextlib.contextmanager
37
- def suppress_output():
38
- stdout = sys.stdout
39
- stderr = sys.stderr
40
- devnull = open(os.devnull, 'w')
41
- try:
42
- sys.stdout = devnull
43
- sys.stderr = devnull
44
- yield
45
- finally:
46
- sys.stdout = stdout
47
- sys.stderr = stderr
48
- devnull.close()
49
-
50
- class ProgressTracker:
51
- def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
52
- self.total = total
53
- self.current = 0
54
- self.desc = desc
55
- self.start_time = time.time()
56
- self.update_frequency = update_frequency # Percentage steps
57
- self.last_update = -1
58
-
59
- # Initial log with fancy formatting
60
- logger.info(LogFormatter.section(desc))
61
- logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
62
- sys.stdout.flush()
63
-
64
- def update(self, n: int = 1):
65
- self.current += n
66
- current_percentage = (self.current * 100) // self.total
67
-
68
- # Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
69
- if current_percentage >= self.last_update + self.update_frequency or current_percentage == 100:
70
- elapsed = time.time() - self.start_time
71
- rate = self.current / elapsed if elapsed > 0 else 0
72
- remaining = (self.total - self.current) / rate if rate > 0 else 0
73
-
74
- # Create progress stats
75
- stats = {
76
- "Progress": LogFormatter.progress_bar(self.current, self.total),
77
- "Items": f"{self.current:,}/{self.total:,}",
78
- "Time": f"⏱️ {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
79
- "Rate": f"🚀 {rate:.1f} items/s"
80
- }
81
-
82
- # Log progress using tree format
83
- for line in LogFormatter.tree(stats):
84
- logger.info(line)
85
- sys.stdout.flush()
86
-
87
- self.last_update = (current_percentage // self.update_frequency) * self.update_frequency
88
-
89
- def close(self):
90
- elapsed = time.time() - self.start_time
91
- rate = self.total / elapsed if elapsed > 0 else 0
92
-
93
- # Final summary with fancy formatting
94
- logger.info(LogFormatter.section("COMPLETED"))
95
- stats = {
96
- "Total": f"{self.total:,} items",
97
- "Time": f"{elapsed:.1f}s",
98
- "Rate": f"{rate:.1f} items/s"
99
- }
100
- for line in LogFormatter.stats(stats):
101
- logger.info(line)
102
- logger.info("="*50)
103
- sys.stdout.flush()
104
-
105
- class ModelService(HuggingFaceService):
106
- _instance: Optional['ModelService'] = None
107
- _initialized = False
108
-
109
- def __new__(cls):
110
- if cls._instance is None:
111
- logger.info(LogFormatter.info("Creating new ModelService instance"))
112
- cls._instance = super(ModelService, cls).__new__(cls)
113
- return cls._instance
114
-
115
- def __init__(self):
116
- if not hasattr(self, '_init_done'):
117
- logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
118
- super().__init__()
119
- self.validator = ModelValidator()
120
- self.vote_service = VoteService()
121
- self.eval_requests_path = cache_config.eval_requests_file
122
- logger.info(LogFormatter.info(f"Using eval requests path: {self.eval_requests_path}"))
123
-
124
- self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
125
- self.hf_api = HfApi(token=HF_TOKEN)
126
- self.cached_models = None
127
- self.last_cache_update = 0
128
- self.cache_ttl = cache_config.cache_ttl.total_seconds()
129
- self._init_done = True
130
- logger.info(LogFormatter.success("Initialization complete"))
131
-
132
- async def _download_and_process_file(self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker) -> Optional[Dict]:
133
- """Download and process a file asynchronously"""
134
- try:
135
- # Build file URL
136
- url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
137
- headers = build_hf_headers(token=self.token)
138
-
139
- # Download file
140
- async with session.get(url, headers=headers) as response:
141
- if response.status != 200:
142
- logger.error(LogFormatter.error(f"Failed to download {file}", f"HTTP {response.status}"))
143
- progress.update()
144
- return None
145
-
146
- try:
147
- # First read content as text
148
- text_content = await response.text()
149
- # Then parse JSON
150
- content = json.loads(text_content)
151
- except json.JSONDecodeError as e:
152
- logger.error(LogFormatter.error(f"Failed to decode JSON from {file}", e))
153
- progress.update()
154
- return None
155
-
156
- # Get status and determine target status
157
- status = content.get("status", "PENDING").upper()
158
- target_status = None
159
- status_map = {
160
- "PENDING": ["PENDING"],
161
- "EVALUATING": ["RUNNING"],
162
- "FINISHED": ["FINISHED"]
163
- }
164
-
165
- for target, source_statuses in status_map.items():
166
- if status in source_statuses:
167
- target_status = target
168
- break
169
-
170
- if not target_status:
171
- progress.update()
172
- return None
173
-
174
- # Calculate wait time
175
- try:
176
- submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00"))
177
- if submit_time.tzinfo is None:
178
- submit_time = submit_time.replace(tzinfo=timezone.utc)
179
- current_time = datetime.now(timezone.utc)
180
- wait_time = current_time - submit_time
181
-
182
- model_info = {
183
- "name": content["model"],
184
- "submitter": content.get("sender", "Unknown"),
185
- "revision": content["revision"],
186
- "wait_time": f"{wait_time.total_seconds():.1f}s",
187
- "submission_time": content["submitted_time"],
188
- "status": target_status,
189
- "precision": content.get("precision", "Unknown")
190
- }
191
-
192
- progress.update()
193
- return model_info
194
-
195
- except (ValueError, TypeError) as e:
196
- logger.error(LogFormatter.error(f"Failed to process {file}", e))
197
- progress.update()
198
- return None
199
-
200
- except Exception as e:
201
- logger.error(LogFormatter.error(f"Failed to load {file}", e))
202
- progress.update()
203
- return None
204
-
205
- async def _refresh_models_cache(self):
206
- """Refresh the models cache"""
207
- try:
208
- logger.info(LogFormatter.section("CACHE REFRESH"))
209
- self._log_repo_operation("read", f"{HF_ORGANIZATION}/requests", "Refreshing models cache")
210
-
211
- # Initialize models dictionary
212
- models = {
213
- "finished": [],
214
- "evaluating": [],
215
- "pending": []
216
- }
217
-
218
- try:
219
- logger.info(LogFormatter.subsection("DATASET LOADING"))
220
- logger.info(LogFormatter.info("Loading dataset files..."))
221
-
222
- # List files in repository
223
- with suppress_output():
224
- files = self.hf_api.list_repo_files(
225
- repo_id=QUEUE_REPO,
226
- repo_type="dataset",
227
- token=self.token
228
- )
229
-
230
- # Filter JSON files
231
- json_files = [f for f in files if f.endswith('.json')]
232
- total_files = len(json_files)
233
-
234
- # Log repository stats
235
- stats = {
236
- "Total_Files": len(files),
237
- "JSON_Files": total_files,
238
- }
239
- for line in LogFormatter.stats(stats, "Repository Statistics"):
240
- logger.info(line)
241
-
242
- if not json_files:
243
- raise Exception("No JSON files found in repository")
244
-
245
- # Initialize progress tracker
246
- progress = ProgressTracker(total_files, "PROCESSING FILES")
247
-
248
- try:
249
- # Create aiohttp session to reuse connections
250
- async with aiohttp.ClientSession() as session:
251
- # Process files in chunks
252
- chunk_size = 50
253
-
254
- for i in range(0, len(json_files), chunk_size):
255
- chunk = json_files[i:i + chunk_size]
256
- chunk_tasks = [
257
- self._download_and_process_file(file, session, progress)
258
- for file in chunk
259
- ]
260
- results = await asyncio.gather(*chunk_tasks)
261
-
262
- # Process results
263
- for result in results:
264
- if result:
265
- status = result.pop("status")
266
- models[status.lower()].append(result)
267
-
268
- finally:
269
- progress.close()
270
-
271
- # Final summary with fancy formatting
272
- logger.info(LogFormatter.section("CACHE SUMMARY"))
273
- stats = {
274
- "Finished": len(models["finished"]),
275
- "Evaluating": len(models["evaluating"]),
276
- "Pending": len(models["pending"])
277
- }
278
- for line in LogFormatter.stats(stats, "Models by Status"):
279
- logger.info(line)
280
- logger.info("="*50)
281
-
282
- except Exception as e:
283
- logger.error(LogFormatter.error("Error processing files", e))
284
- raise
285
-
286
- # Update cache
287
- self.cached_models = models
288
- self.last_cache_update = time.time()
289
- logger.info(LogFormatter.success("Cache updated successfully"))
290
-
291
- return models
292
-
293
- except Exception as e:
294
- logger.error(LogFormatter.error("Cache refresh failed", e))
295
- raise
296
-
297
- async def initialize(self):
298
- """Initialize the model service"""
299
- if self._initialized:
300
- logger.info(LogFormatter.info("Service already initialized, using cached data"))
301
- return
302
-
303
- try:
304
- logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
305
-
306
- # Check if cache already exists
307
- cache_path = cache_config.get_cache_path("datasets")
308
- if not cache_path.exists() or not any(cache_path.iterdir()):
309
- logger.info(LogFormatter.info("No existing cache found, initializing datasets cache..."))
310
- cache_config.flush_cache("datasets")
311
- else:
312
- logger.info(LogFormatter.info("Using existing datasets cache"))
313
-
314
- # Ensure eval requests directory exists
315
- self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
316
- logger.info(LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}"))
317
-
318
- # List existing files
319
- if self.eval_requests_path.exists():
320
- files = list(self.eval_requests_path.glob("**/*.json"))
321
- stats = {
322
- "Total_Files": len(files),
323
- "Directory": str(self.eval_requests_path)
324
- }
325
- for line in LogFormatter.stats(stats, "Eval Requests"):
326
- logger.info(line)
327
-
328
- # Load initial cache
329
- await self._refresh_models_cache()
330
-
331
- self._initialized = True
332
- logger.info(LogFormatter.success("Model service initialization complete"))
333
-
334
- except Exception as e:
335
- logger.error(LogFormatter.error("Initialization failed", e))
336
- raise
337
-
338
- async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
339
- """Get all models with their status"""
340
- if not self._initialized:
341
- logger.info(LogFormatter.info("Service not initialized, initializing now..."))
342
- await self.initialize()
343
-
344
- current_time = time.time()
345
- cache_age = current_time - self.last_cache_update
346
-
347
- # Check if cache needs refresh
348
- if not self.cached_models:
349
- logger.info(LogFormatter.info("No cached data available, refreshing cache..."))
350
- return await self._refresh_models_cache()
351
- elif cache_age > self.cache_ttl:
352
- logger.info(LogFormatter.info(f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"))
353
- return await self._refresh_models_cache()
354
- else:
355
- logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
356
- return self.cached_models
357
-
358
- async def submit_model(
359
- self,
360
- model_data: Dict[str, Any],
361
- user_id: str
362
- ) -> Dict[str, Any]:
363
- logger.info(LogFormatter.section("MODEL SUBMISSION"))
364
- self._log_repo_operation("write", f"{HF_ORGANIZATION}/requests", f"Submitting model {model_data['model_id']} by {user_id}")
365
- stats = {
366
- "Model": model_data["model_id"],
367
- "User": user_id,
368
- "Revision": model_data["revision"],
369
- "Precision": model_data["precision"],
370
- "Type": model_data["model_type"]
371
- }
372
- for line in LogFormatter.tree(stats, "Submission Details"):
373
- logger.info(line)
374
-
375
- # Validate required fields
376
- required_fields = [
377
- "model_id", "base_model", "revision", "precision",
378
- "weight_type", "model_type", "use_chat_template"
379
- ]
380
- for field in required_fields:
381
- if field not in model_data:
382
- raise ValueError(f"Missing required field: {field}")
383
-
384
- # Get model info and validate it exists on HuggingFace
385
- try:
386
- logger.info(LogFormatter.subsection("MODEL VALIDATION"))
387
-
388
- # Get the model info to check if it exists
389
- model_info = self.hf_api.model_info(
390
- model_data["model_id"],
391
- revision=model_data["revision"],
392
- token=self.token
393
- )
394
-
395
- if not model_info:
396
- raise Exception(f"Model {model_data['model_id']} not found on HuggingFace Hub")
397
-
398
- logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
399
-
400
- except Exception as e:
401
- logger.error(LogFormatter.error("Model validation failed", e))
402
- raise
403
-
404
- # Update model revision with commit sha
405
- model_data["revision"] = model_info.sha
406
-
407
- # Check if model already exists in the system
408
- try:
409
- logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
410
- existing_models = await self.get_models()
411
-
412
- # Call the official provider status check
413
- is_valid, error_message = await self.validator.check_official_provider_status(
414
- model_data["model_id"],
415
- existing_models
416
- )
417
- if not is_valid:
418
- raise ValueError(error_message)
419
-
420
- # Check in all statuses (pending, evaluating, finished)
421
- for status, models in existing_models.items():
422
- for model in models:
423
- if model["name"] == model_data["model_id"] and model["revision"] == model_data["revision"]:
424
- error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
425
- logger.error(LogFormatter.error("Submission rejected", error_msg))
426
- raise ValueError(error_msg)
427
-
428
- logger.info(LogFormatter.success("No existing submission found"))
429
- except ValueError:
430
- raise
431
- except Exception as e:
432
- logger.error(LogFormatter.error("Failed to check existing submissions", e))
433
- raise
434
-
435
- # Check that model on hub and valid
436
- valid, error, model_config = await self.validator.is_model_on_hub(
437
- model_data["model_id"],
438
- model_data["revision"],
439
- test_tokenizer=True
440
- )
441
- if not valid:
442
- logger.error(LogFormatter.error("Model on hub validation failed", error))
443
- raise Exception(error)
444
- logger.info(LogFormatter.success("Model on hub validation passed"))
445
-
446
- # Validate model card
447
- valid, error, model_card = await self.validator.check_model_card(
448
- model_data["model_id"]
449
- )
450
- if not valid:
451
- logger.error(LogFormatter.error("Model card validation failed", error))
452
- raise Exception(error)
453
- logger.info(LogFormatter.success("Model card validation passed"))
454
-
455
- # Check size limits
456
- model_size, error = await self.validator.get_model_size(
457
- model_info,
458
- model_data["precision"],
459
- model_data["base_model"],
460
- revision=model_data["revision"]
461
- )
462
- if model_size is None:
463
- logger.error(LogFormatter.error("Model size validation failed", error))
464
- raise Exception(error)
465
- logger.info(LogFormatter.success(f"Model size validation passed: {model_size:.1f}B"))
466
-
467
- # Size limits based on precision
468
- if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
469
- error_msg = f"Model too large for {model_data['precision']} (limit: 100B)"
470
- logger.error(LogFormatter.error("Size limit exceeded", error_msg))
471
- raise Exception(error_msg)
472
-
473
- # Chat template validation if requested
474
- if model_data["use_chat_template"]:
475
- valid, error = await self.validator.check_chat_template(
476
- model_data["model_id"],
477
- model_data["revision"]
478
- )
479
- if not valid:
480
- logger.error(LogFormatter.error("Chat template validation failed", error))
481
- raise Exception(error)
482
- logger.info(LogFormatter.success("Chat template validation passed"))
483
-
484
-
485
- architectures = model_info.config.get("architectures", "")
486
- if architectures:
487
- architectures = ";".join(architectures)
488
-
489
- # Create eval entry
490
- eval_entry = {
491
- "model": model_data["model_id"],
492
- "base_model": model_data["base_model"],
493
- "revision": model_info.sha,
494
- "precision": model_data["precision"],
495
- "params": model_size,
496
- "architectures": architectures,
497
- "weight_type": model_data["weight_type"],
498
- "status": "PENDING",
499
- "submitted_time": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
500
- "model_type": model_data["model_type"],
501
- "job_id": -1,
502
- "job_start_time": None,
503
- "use_chat_template": model_data["use_chat_template"],
504
- "sender": user_id
505
- }
506
-
507
- logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
508
- for line in LogFormatter.tree(eval_entry):
509
- logger.info(line)
510
-
511
- # Upload to HF dataset
512
- try:
513
- logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
514
- logger.info(LogFormatter.info(f"Uploading to {HF_ORGANIZATION}/requests..."))
515
-
516
- # Construct the path in the dataset
517
- org_or_user = model_data["model_id"].split("/")[0] if "/" in model_data["model_id"] else ""
518
- model_path = model_data["model_id"].split("/")[-1]
519
- relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
520
-
521
- # Create a temporary file with the request
522
- with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
523
- json.dump(eval_entry, temp_file, indent=2)
524
- temp_file.flush()
525
- temp_path = temp_file.name
526
-
527
- # Upload file directly
528
- self.hf_api.upload_file(
529
- path_or_fileobj=temp_path,
530
- path_in_repo=relative_path,
531
- repo_id=f"{HF_ORGANIZATION}/requests",
532
- repo_type="dataset",
533
- commit_message=f"Add {model_data['model_id']} to eval queue",
534
- token=self.token
535
- )
536
-
537
- # Clean up temp file
538
- os.unlink(temp_path)
539
-
540
- logger.info(LogFormatter.success("Upload successful"))
541
-
542
- except Exception as e:
543
- logger.error(LogFormatter.error("Upload failed", e))
544
- raise
545
-
546
- # Add automatic vote
547
- try:
548
- logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
549
- logger.info(LogFormatter.info(f"Adding upvote for {model_data['model_id']} by {user_id}"))
550
- await self.vote_service.add_vote(
551
- model_data["model_id"],
552
- user_id,
553
- "up"
554
- )
555
- logger.info(LogFormatter.success("Vote recorded successfully"))
556
- except Exception as e:
557
- logger.error(LogFormatter.error("Failed to record vote", e))
558
- # Don't raise here as the main submission was successful
559
-
560
- return {
561
- "status": "success",
562
- "message": "The model was submitted successfully, and the vote has been recorded"
563
- }
564
-
565
- async def get_model_status(self, model_id: str) -> Dict[str, Any]:
566
- """Get evaluation status of a model"""
567
- logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
568
- eval_path = self.eval_requests_path
569
-
570
- for user_folder in eval_path.iterdir():
571
- if user_folder.is_dir():
572
- for file in user_folder.glob("*.json"):
573
- with open(file, "r") as f:
574
- data = json.load(f)
575
- if data["model"] == model_id:
576
- status = {
577
- "status": data["status"],
578
- "submitted_time": data["submitted_time"],
579
- "job_id": data.get("job_id", -1)
580
- }
581
- logger.info(LogFormatter.success("Status found"))
582
- for line in LogFormatter.tree(status, "Model Status"):
583
- logger.info(line)
584
- return status
585
-
586
- logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
587
- return {"status": "not_found"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/rate_limiter.py DELETED
@@ -1,72 +0,0 @@
1
- """
2
- import logging
3
- from datetime import datetime, timedelta, timezone
4
- from typing import Tuple, Dict, List
5
-
6
- logger = logging.getLogger(__name__)
7
-
8
- class RateLimiter:
9
- def __init__(self, period_days: int = 7, quota: int = 5):
10
- self.period_days = period_days
11
- self.quota = quota
12
- self.submission_history: Dict[str, List[datetime]] = {}
13
- self.higher_quota_users = set() # Users with higher quotas
14
- self.unlimited_users = set() # Users with no quota limits
15
-
16
- def add_unlimited_user(self, user_id: str):
17
- """Add a user to the unlimited users list"""
18
- self.unlimited_users.add(user_id)
19
-
20
- def add_higher_quota_user(self, user_id: str):
21
- """Add a user to the higher quota users list"""
22
- self.higher_quota_users.add(user_id)
23
-
24
- def record_submission(self, user_id: str):
25
- """Record a new submission for a user"""
26
- current_time = datetime.now(timezone.utc)
27
- if user_id not in self.submission_history:
28
- self.submission_history[user_id] = []
29
- self.submission_history[user_id].append(current_time)
30
-
31
- def clean_old_submissions(self, user_id: str):
32
- """Remove submissions older than the period"""
33
- if user_id not in self.submission_history:
34
- return
35
-
36
- current_time = datetime.now(timezone.utc)
37
- cutoff_time = current_time - timedelta(days=self.period_days)
38
-
39
- self.submission_history[user_id] = [
40
- time for time in self.submission_history[user_id]
41
- if time > cutoff_time
42
- ]
43
-
44
- async def check_rate_limit(self, user_id: str) -> Tuple[bool, str]:
45
- """Check if a user has exceeded their rate limit
46
-
47
- Returns:
48
- Tuple[bool, str]: (is_allowed, error_message)
49
- """
50
- # Unlimited users bypass all checks
51
- if user_id in self.unlimited_users:
52
- return True, ""
53
-
54
- # Clean old submissions
55
- self.clean_old_submissions(user_id)
56
-
57
- # Get current submission count
58
- submission_count = len(self.submission_history.get(user_id, []))
59
-
60
- # Calculate user's quota
61
- user_quota = self.quota * 2 if user_id in self.higher_quota_users else self.quota
62
-
63
- # Check if user has exceeded their quota
64
- if submission_count >= user_quota:
65
- error_msg = (
66
- f"User '{user_id}' has reached the limit of {user_quota} submissions "
67
- f"in the last {self.period_days} days. Please wait before submitting again."
68
- )
69
- return False, error_msg
70
-
71
- return True, ""
72
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/votes.py DELETED
@@ -1,390 +0,0 @@
1
- from datetime import datetime, timezone
2
- from typing import Dict, Any, List, Set, Tuple, Optional
3
- import json
4
- import logging
5
- import asyncio
6
- from pathlib import Path
7
- import aiohttp
8
- from huggingface_hub import HfApi
9
- import datasets
10
-
11
- from app.services.hf_service import HuggingFaceService
12
- from app.config import HF_TOKEN
13
- from app.config.hf_config import HF_ORGANIZATION
14
- from app.core.cache import cache_config
15
- from app.core.formatting import LogFormatter
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
- class VoteService(HuggingFaceService):
20
- _instance: Optional['VoteService'] = None
21
- _initialized = False
22
-
23
- def __new__(cls):
24
- if cls._instance is None:
25
- cls._instance = super(VoteService, cls).__new__(cls)
26
- return cls._instance
27
-
28
- def __init__(self):
29
- if not hasattr(self, '_init_done'):
30
- super().__init__()
31
- self.votes_file = cache_config.votes_file
32
- self.votes_to_upload: List[Dict[str, Any]] = []
33
- self.vote_check_set: Set[Tuple[str, str, str]] = set()
34
- self._votes_by_model: Dict[str, List[Dict[str, Any]]] = {}
35
- self._votes_by_user: Dict[str, List[Dict[str, Any]]] = {}
36
- self._upload_lock = asyncio.Lock()
37
- self._last_sync = None
38
- self._sync_interval = 300 # 5 minutes
39
- self._total_votes = 0
40
- self._last_vote_timestamp = None
41
- self._max_retries = 3
42
- self._retry_delay = 1 # seconds
43
- self._upload_batch_size = 10
44
- self.hf_api = HfApi(token=HF_TOKEN)
45
- self._init_done = True
46
-
47
- async def initialize(self):
48
- """Initialize the vote service"""
49
- if self._initialized:
50
- await self._check_for_new_votes()
51
- return
52
-
53
- try:
54
- logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
55
-
56
- # Ensure votes directory exists
57
- self.votes_file.parent.mkdir(parents=True, exist_ok=True)
58
-
59
- # Load existing votes if file exists
60
- local_vote_count = 0
61
- if self.votes_file.exists():
62
- logger.info(LogFormatter.info(f"Loading votes from {self.votes_file}"))
63
- local_vote_count = await self._count_local_votes()
64
- logger.info(LogFormatter.info(f"Found {local_vote_count:,} local votes"))
65
-
66
- # Check remote votes count
67
- remote_vote_count = await self._count_remote_votes()
68
- logger.info(LogFormatter.info(f"Found {remote_vote_count:,} remote votes"))
69
-
70
- if remote_vote_count > local_vote_count:
71
- logger.info(LogFormatter.info(f"Fetching {remote_vote_count - local_vote_count:,} new votes"))
72
- await self._sync_with_hub()
73
- elif remote_vote_count < local_vote_count:
74
- logger.warning(LogFormatter.warning(f"Local votes ({local_vote_count:,}) > Remote votes ({remote_vote_count:,})"))
75
- await self._load_existing_votes()
76
- else:
77
- logger.info(LogFormatter.success("Local and remote votes are in sync"))
78
- if local_vote_count > 0:
79
- await self._load_existing_votes()
80
- else:
81
- logger.info(LogFormatter.info("No votes found"))
82
-
83
- self._initialized = True
84
- self._last_sync = datetime.now(timezone.utc)
85
-
86
- # Final summary
87
- stats = {
88
- "Total_Votes": self._total_votes,
89
- "Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC")
90
- }
91
- logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
92
- for line in LogFormatter.stats(stats):
93
- logger.info(line)
94
-
95
- except Exception as e:
96
- logger.error(LogFormatter.error("Initialization failed", e))
97
- raise
98
-
99
- async def _count_local_votes(self) -> int:
100
- """Count votes in local file"""
101
- if not self.votes_file.exists():
102
- return 0
103
-
104
- count = 0
105
- try:
106
- with open(self.votes_file, 'r') as f:
107
- for _ in f:
108
- count += 1
109
- return count
110
- except Exception as e:
111
- logger.error(f"Error counting local votes: {str(e)}")
112
- return 0
113
-
114
- async def _count_remote_votes(self) -> int:
115
- """Count votes in remote file"""
116
- url = f"https://huggingface.co/datasets/{HF_ORGANIZATION}/votes/raw/main/votes_data.jsonl"
117
- headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
118
-
119
- try:
120
- async with aiohttp.ClientSession() as session:
121
- async with session.get(url, headers=headers) as response:
122
- if response.status == 200:
123
- count = 0
124
- async for line in response.content:
125
- if line.strip(): # Skip empty lines
126
- count += 1
127
- return count
128
- else:
129
- logger.error(f"Failed to get remote votes: HTTP {response.status}")
130
- return 0
131
- except Exception as e:
132
- logger.error(f"Error counting remote votes: {str(e)}")
133
- return 0
134
-
135
- async def _sync_with_hub(self):
136
- """Sync votes with HuggingFace hub using datasets"""
137
- try:
138
- logger.info(LogFormatter.section("VOTE SYNC"))
139
- self._log_repo_operation("sync", f"{HF_ORGANIZATION}/votes", "Syncing local votes with HF hub")
140
- logger.info(LogFormatter.info("Syncing with HuggingFace hub..."))
141
-
142
- # Load votes from HF dataset
143
- dataset = datasets.load_dataset(
144
- f"{HF_ORGANIZATION}/votes",
145
- split="train",
146
- cache_dir=cache_config.get_cache_path("datasets")
147
- )
148
-
149
- remote_votes = len(dataset)
150
- logger.info(LogFormatter.info(f"Dataset loaded with {remote_votes:,} votes"))
151
-
152
- # Convert to list of dictionaries
153
- df = dataset.to_pandas()
154
- if 'timestamp' in df.columns:
155
- df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%SZ')
156
- remote_votes = df.to_dict('records')
157
-
158
- # If we have more remote votes than local
159
- if len(remote_votes) > self._total_votes:
160
- new_votes = len(remote_votes) - self._total_votes
161
- logger.info(LogFormatter.info(f"Processing {new_votes:,} new votes..."))
162
-
163
- # Save votes to local file
164
- with open(self.votes_file, 'w') as f:
165
- for vote in remote_votes:
166
- f.write(json.dumps(vote) + '\n')
167
-
168
- # Reload votes in memory
169
- await self._load_existing_votes()
170
- logger.info(LogFormatter.success("Sync completed successfully"))
171
- else:
172
- logger.info(LogFormatter.success("Local votes are up to date"))
173
-
174
- self._last_sync = datetime.now(timezone.utc)
175
-
176
- except Exception as e:
177
- logger.error(LogFormatter.error("Sync failed", e))
178
- raise
179
-
180
- async def _check_for_new_votes(self):
181
- """Check for new votes on the hub"""
182
- try:
183
- self._log_repo_operation("check", f"{HF_ORGANIZATION}/votes", "Checking for new votes")
184
- # Load only dataset metadata
185
- dataset_info = datasets.load_dataset(f"{HF_ORGANIZATION}/votes", split="train")
186
- remote_vote_count = len(dataset_info)
187
-
188
- if remote_vote_count > self._total_votes:
189
- logger.info(f"Found {remote_vote_count - self._total_votes} new votes on hub")
190
- await self._sync_with_hub()
191
- else:
192
- logger.info("No new votes found on hub")
193
-
194
- except Exception as e:
195
- logger.error(f"Error checking for new votes: {str(e)}")
196
-
197
- async def _load_existing_votes(self):
198
- """Load existing votes from file"""
199
- if not self.votes_file.exists():
200
- logger.warning(LogFormatter.warning("No votes file found"))
201
- return
202
-
203
- try:
204
- logger.info(LogFormatter.section("LOADING VOTES"))
205
-
206
- # Clear existing data structures
207
- self.vote_check_set.clear()
208
- self._votes_by_model.clear()
209
- self._votes_by_user.clear()
210
-
211
- vote_count = 0
212
- latest_timestamp = None
213
-
214
- with open(self.votes_file, "r") as f:
215
- for line in f:
216
- try:
217
- vote = json.loads(line.strip())
218
- vote_count += 1
219
-
220
- # Track latest timestamp
221
- try:
222
- vote_timestamp = datetime.fromisoformat(vote["timestamp"].replace("Z", "+00:00"))
223
- if not latest_timestamp or vote_timestamp > latest_timestamp:
224
- latest_timestamp = vote_timestamp
225
- vote["timestamp"] = vote_timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
226
- except (KeyError, ValueError) as e:
227
- logger.warning(LogFormatter.warning(f"Invalid timestamp in vote: {str(e)}"))
228
- continue
229
-
230
- if vote_count % 1000 == 0:
231
- logger.info(LogFormatter.info(f"Processed {vote_count:,} votes..."))
232
-
233
- self._add_vote_to_memory(vote)
234
-
235
- except json.JSONDecodeError as e:
236
- logger.error(LogFormatter.error("Vote parsing failed", e))
237
- continue
238
- except Exception as e:
239
- logger.error(LogFormatter.error("Vote processing failed", e))
240
- continue
241
-
242
- self._total_votes = vote_count
243
- self._last_vote_timestamp = latest_timestamp
244
-
245
- # Final summary
246
- stats = {
247
- "Total_Votes": vote_count,
248
- "Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") if latest_timestamp else "None",
249
- "Unique_Models": len(self._votes_by_model),
250
- "Unique_Users": len(self._votes_by_user)
251
- }
252
-
253
- logger.info(LogFormatter.section("VOTE SUMMARY"))
254
- for line in LogFormatter.stats(stats):
255
- logger.info(line)
256
-
257
- except Exception as e:
258
- logger.error(LogFormatter.error("Failed to load votes", e))
259
- raise
260
-
261
- def _add_vote_to_memory(self, vote: Dict[str, Any]):
262
- """Add vote to memory structures"""
263
- try:
264
- check_tuple = (vote["model"], vote["revision"], vote["username"])
265
-
266
- # Skip if we already have this vote
267
- if check_tuple in self.vote_check_set:
268
- return
269
-
270
- self.vote_check_set.add(check_tuple)
271
-
272
- # Update model votes
273
- if vote["model"] not in self._votes_by_model:
274
- self._votes_by_model[vote["model"]] = []
275
- self._votes_by_model[vote["model"]].append(vote)
276
-
277
- # Update user votes
278
- if vote["username"] not in self._votes_by_user:
279
- self._votes_by_user[vote["username"]] = []
280
- self._votes_by_user[vote["username"]].append(vote)
281
-
282
- except KeyError as e:
283
- logger.error(f"Malformed vote data, missing key: {str(e)}")
284
- except Exception as e:
285
- logger.error(f"Error adding vote to memory: {str(e)}")
286
-
287
- async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
288
- """Get all votes from a specific user"""
289
- logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
290
- votes = self._votes_by_user.get(user_id, [])
291
- logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
292
- return votes
293
-
294
- async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
295
- """Get all votes for a specific model"""
296
- logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
297
- votes = self._votes_by_model.get(model_id, [])
298
-
299
- # Group votes by revision
300
- votes_by_revision = {}
301
- for vote in votes:
302
- revision = vote["revision"]
303
- if revision not in votes_by_revision:
304
- votes_by_revision[revision] = 0
305
- votes_by_revision[revision] += 1
306
-
307
- stats = {
308
- "Total_Votes": len(votes),
309
- **{f"Revision_{k}": v for k, v in votes_by_revision.items()}
310
- }
311
-
312
- logger.info(LogFormatter.section("VOTE STATISTICS"))
313
- for line in LogFormatter.stats(stats):
314
- logger.info(line)
315
-
316
- return {
317
- "total_votes": len(votes),
318
- "votes_by_revision": votes_by_revision,
319
- "votes": votes
320
- }
321
-
322
- async def _get_model_revision(self, model_id: str) -> str:
323
- """Get current revision of a model with retries"""
324
- logger.info(f"Getting revision for model: {model_id}")
325
- for attempt in range(self._max_retries):
326
- try:
327
- model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
328
- logger.info(f"Successfully got revision {model_info.sha} for model {model_id}")
329
- return model_info.sha
330
- except Exception as e:
331
- logger.error(f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}")
332
- if attempt < self._max_retries - 1:
333
- retry_delay = self._retry_delay * (attempt + 1)
334
- logger.info(f"Retrying in {retry_delay} seconds...")
335
- await asyncio.sleep(retry_delay)
336
- else:
337
- logger.warning(f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts")
338
- return "main"
339
-
340
- async def add_vote(self, model_id: str, user_id: str, vote_type: str) -> Dict[str, Any]:
341
- """Add a vote for a model"""
342
- try:
343
- self._log_repo_operation("add", f"{HF_ORGANIZATION}/votes", f"Adding {vote_type} vote for {model_id} by {user_id}")
344
- logger.info(LogFormatter.section("NEW VOTE"))
345
- stats = {
346
- "Model": model_id,
347
- "User": user_id,
348
- "Type": vote_type
349
- }
350
- for line in LogFormatter.tree(stats, "Vote Details"):
351
- logger.info(line)
352
-
353
- revision = await self._get_model_revision(model_id)
354
- check_tuple = (model_id, revision, user_id)
355
-
356
- if check_tuple in self.vote_check_set:
357
- raise ValueError("Vote already recorded for this model")
358
-
359
- vote = {
360
- "model": model_id,
361
- "revision": revision,
362
- "username": user_id,
363
- "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
364
- "vote_type": vote_type
365
- }
366
-
367
- # Update local storage
368
- with open(self.votes_file, "a") as f:
369
- f.write(json.dumps(vote) + "\n")
370
-
371
- self._add_vote_to_memory(vote)
372
- self.votes_to_upload.append(vote)
373
-
374
- stats = {
375
- "Status": "Success",
376
- "Queue_Size": len(self.votes_to_upload)
377
- }
378
- for line in LogFormatter.stats(stats):
379
- logger.info(line)
380
-
381
- # Try to upload if batch size reached
382
- if len(self.votes_to_upload) >= self._upload_batch_size:
383
- logger.info(LogFormatter.info(f"Upload batch size reached ({self._upload_batch_size}), triggering sync"))
384
- await self._sync_with_hub()
385
-
386
- return {"status": "success", "message": "Vote added successfully"}
387
-
388
- except Exception as e:
389
- logger.error(LogFormatter.error("Failed to add vote", e))
390
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/utils/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- from . import model_validation
2
-
3
- __all__ = ["model_validation"]
 
 
 
 
backend/app/utils/logging.py DELETED
@@ -1,3 +0,0 @@
1
- from app.core.formatting import LogFormatter
2
-
3
- __all__ = ['LogFormatter']
 
 
 
 
backend/app/utils/model_validation.py DELETED
@@ -1,266 +0,0 @@
1
- import json
2
- import logging
3
- import asyncio
4
- from typing import Tuple, Optional, Dict, Any
5
- from datasets import load_dataset
6
- from huggingface_hub import HfApi, ModelCard, hf_hub_download
7
- from huggingface_hub import hf_api
8
- from transformers import AutoConfig, AutoTokenizer
9
- from app.config.base import HF_TOKEN
10
- from app.config.hf_config import OFFICIAL_PROVIDERS_REPO
11
- from app.core.formatting import LogFormatter
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
- class ModelValidator:
16
- def __init__(self):
17
- self.token = HF_TOKEN
18
- self.api = HfApi(token=self.token)
19
- self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
20
-
21
- async def check_model_card(self, model_id: str) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
22
- """Check if model has a valid model card"""
23
- try:
24
- logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
25
-
26
- # Get model card content using ModelCard.load
27
- try:
28
- model_card = await asyncio.to_thread(
29
- ModelCard.load,
30
- model_id
31
- )
32
- logger.info(LogFormatter.success("Model card found"))
33
- except Exception as e:
34
- error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
35
- logger.error(LogFormatter.error(error_msg, e))
36
- return False, error_msg, None
37
-
38
- # Check license in model card data
39
- if model_card.data.license is None and not ("license_name" in model_card.data and "license_link" in model_card.data):
40
- error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
41
- logger.warning(LogFormatter.warning(error_msg))
42
- return False, error_msg, None
43
-
44
- # Enforce card content length
45
- if len(model_card.text) < 200:
46
- error_msg = "Please add a description to your model card, it is too short."
47
- logger.warning(LogFormatter.warning(error_msg))
48
- return False, error_msg, None
49
-
50
- logger.info(LogFormatter.success("Model card validation passed"))
51
- return True, "", model_card
52
-
53
- except Exception as e:
54
- error_msg = "Failed to validate model card"
55
- logger.error(LogFormatter.error(error_msg, e))
56
- return False, str(e), None
57
-
58
- async def get_safetensors_metadata(self, model_id: str, is_adapter: bool = False, revision: str = "main") -> Optional[Dict]:
59
- """Get metadata from a safetensors file"""
60
- try:
61
- if is_adapter:
62
- metadata = await asyncio.to_thread(
63
- hf_api.parse_safetensors_file_metadata,
64
- model_id,
65
- "adapter_model.safetensors",
66
- token=self.token,
67
- revision=revision,
68
- )
69
- else:
70
- metadata = await asyncio.to_thread(
71
- hf_api.get_safetensors_metadata,
72
- repo_id=model_id,
73
- token=self.token,
74
- revision=revision,
75
- )
76
- return metadata
77
-
78
- except Exception as e:
79
- logger.error(f"Failed to get safetensors metadata: {str(e)}")
80
- return None
81
-
82
- async def get_model_size(
83
- self,
84
- model_info: Any,
85
- precision: str,
86
- base_model: str,
87
- revision: str
88
- ) -> Tuple[Optional[float], Optional[str]]:
89
- """Get model size in billions of parameters"""
90
- try:
91
- logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}"))
92
-
93
- # Check if model is adapter
94
- is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename'))
95
-
96
- # Try to get size from safetensors first
97
- model_size = None
98
-
99
- if is_adapter and base_model:
100
- # For adapters, we need both adapter and base model sizes
101
- adapter_meta = await self.get_safetensors_metadata(model_info.id, is_adapter=True, revision=revision)
102
- base_meta = await self.get_safetensors_metadata(base_model, revision="main")
103
-
104
- if adapter_meta and base_meta:
105
- adapter_size = sum(adapter_meta.parameter_count.values())
106
- base_size = sum(base_meta.parameter_count.values())
107
- model_size = adapter_size + base_size
108
- else:
109
- # For regular models, just get the model size
110
- meta = await self.get_safetensors_metadata(model_info.id, revision=revision)
111
- if meta:
112
- model_size = sum(meta.parameter_count.values()) # total params
113
-
114
- if model_size is None:
115
- # If model size could not be determined, return an error
116
- return None, "Model size could not be determined"
117
-
118
- # Adjust size for GPTQ models
119
- size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
120
- model_size = model_size / 1e9 # Convert to billions, assuming float16
121
- model_size = round(size_factor * model_size, 3)
122
-
123
- logger.info(LogFormatter.success(f"Model size: {model_size}B parameters"))
124
- return model_size, None
125
-
126
- except Exception as e:
127
- logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
128
- return None, str(e)
129
-
130
-
131
- async def check_chat_template(
132
- self,
133
- model_id: str,
134
- revision: str
135
- ) -> Tuple[bool, Optional[str]]:
136
- """Check if model has a valid chat template"""
137
- try:
138
- logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
139
-
140
- try:
141
- config_file = await asyncio.to_thread(
142
- hf_hub_download,
143
- repo_id=model_id,
144
- filename="tokenizer_config.json",
145
- revision=revision,
146
- repo_type="model"
147
- )
148
-
149
- with open(config_file, 'r') as f:
150
- tokenizer_config = json.load(f)
151
-
152
- if 'chat_template' not in tokenizer_config:
153
- error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
154
- logger.error(LogFormatter.error(error_msg))
155
- return False, error_msg
156
-
157
- logger.info(LogFormatter.success("Valid chat template found"))
158
- return True, None
159
-
160
- except Exception as e:
161
- error_msg = f"Error checking chat_template: {str(e)}"
162
- logger.error(LogFormatter.error(error_msg))
163
- return False, error_msg
164
-
165
- except Exception as e:
166
- error_msg = "Failed to check chat template"
167
- logger.error(LogFormatter.error(error_msg, e))
168
- return False, str(e)
169
-
170
- async def is_model_on_hub(
171
- self,
172
- model_name: str,
173
- revision: str,
174
- test_tokenizer: bool = False,
175
- trust_remote_code: bool = False
176
- ) -> Tuple[bool, Optional[str], Optional[Any]]:
177
- """Check if model exists and is properly configured on the Hub"""
178
- try:
179
- config = await asyncio.to_thread(
180
- AutoConfig.from_pretrained,
181
- model_name,
182
- revision=revision,
183
- trust_remote_code=trust_remote_code,
184
- token=self.token,
185
- force_download=True
186
- )
187
-
188
- if test_tokenizer:
189
- try:
190
- await asyncio.to_thread(
191
- AutoTokenizer.from_pretrained,
192
- model_name,
193
- revision=revision,
194
- trust_remote_code=trust_remote_code,
195
- token=self.token
196
- )
197
- except ValueError as e:
198
- return False, f"The tokenizer is not available in an official Transformers release: {e}", None
199
- except Exception:
200
- return False, "The tokenizer cannot be loaded. Ensure the tokenizer class is part of a stable Transformers release and correctly configured.", None
201
-
202
- return True, None, config
203
-
204
- except ValueError:
205
- return False, "The model requires `trust_remote_code=True` to launch, and for safety reasons, we don't accept such models automatically.", None
206
- except Exception as e:
207
- if "You are trying to access a gated repo." in str(e):
208
- return True, "The model is gated and requires special access permissions.", None
209
- return False, f"The model was not found or is misconfigured on the Hub. Error: {e.args[0]}", None
210
-
211
- async def check_official_provider_status(
212
- self,
213
- model_id: str,
214
- existing_models: Dict[str, list]
215
- ) -> Tuple[bool, Optional[str]]:
216
- """
217
- Check if model is from official provider and has finished submission.
218
-
219
- Args:
220
- model_id: The model identifier (org/model-name)
221
- existing_models: Dictionary of models by status from get_models()
222
-
223
- Returns:
224
- Tuple[bool, Optional[str]]: (is_valid, error_message)
225
- """
226
- try:
227
- logger.info(LogFormatter.info(f"Checking official provider status for {model_id}"))
228
-
229
- # Get model organization
230
- model_org = model_id.split('/')[0] if '/' in model_id else None
231
-
232
- if not model_org:
233
- return True, None
234
-
235
- # Load official providers dataset
236
- dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
237
- official_providers = dataset["train"][0]["CURATED_SET"]
238
-
239
- # Check if model org is in official providers
240
- is_official = model_org in official_providers
241
-
242
- if is_official:
243
- logger.info(LogFormatter.info(f"Model organization '{model_org}' is an official provider"))
244
-
245
- # Check for finished submissions
246
- if "finished" in existing_models:
247
- for model in existing_models["finished"]:
248
- if model["name"] == model_id:
249
- error_msg = (
250
- f"Model {model_id} is an official provider model "
251
- f"with a completed evaluation. "
252
- f"To re-evaluate, please open a discussion."
253
- )
254
- logger.error(LogFormatter.error("Validation failed", error_msg))
255
- return False, error_msg
256
-
257
- logger.info(LogFormatter.success("No finished submission found for this official provider model"))
258
- else:
259
- logger.info(LogFormatter.info(f"Model organization '{model_org}' is not an official provider"))
260
-
261
- return True, None
262
-
263
- except Exception as e:
264
- error_msg = f"Failed to check official provider status: {str(e)}"
265
- logger.error(LogFormatter.error(error_msg))
266
- return False, error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/pyproject.toml DELETED
@@ -1,31 +0,0 @@
1
- [tool.poetry]
2
- name = "llm-leaderboard-backend"
3
- version = "0.1.0"
4
- description = "Backend for the Open LLM Leaderboard"
5
- authors = ["Your Name <your.email@example.com>"]
6
-
7
- [tool.poetry.dependencies]
8
- python = "^3.12"
9
- fastapi = "^0.115.6"
10
- uvicorn = {extras = ["standard"], version = "^0.34.0"}
11
- numpy = "^2.2.0"
12
- pandas = "^2.2.3"
13
- datasets = "^3.2.0"
14
- pyarrow = "^18.1.0"
15
- python-multipart = "^0.0.20"
16
- huggingface-hub = "^0.27.1"
17
- transformers = "4.48.0"
18
- safetensors = "^0.4.5"
19
- aiofiles = "^24.1.0"
20
- fastapi-cache2 = "^0.2.1"
21
- python-dotenv = "^1.0.1"
22
-
23
- [tool.poetry.group.dev.dependencies]
24
- pytest = "^8.3.4"
25
- black = "^24.10.0"
26
- isort = "^5.13.2"
27
- flake8 = "^6.1.0"
28
-
29
- [build-system]
30
- requires = ["poetry-core>=1.0.0"]
31
- build-backend = "poetry.core.masonry.api"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/utils/analyze_prod_datasets.py DELETED
@@ -1,170 +0,0 @@
1
- import os
2
- import json
3
- import logging
4
- from datetime import datetime
5
- from pathlib import Path
6
- from typing import Dict, Any, List
7
- from huggingface_hub import HfApi
8
- from dotenv import load_dotenv
9
- from app.config.hf_config import HF_ORGANIZATION
10
-
11
- # Get the backend directory path
12
- BACKEND_DIR = Path(__file__).parent.parent
13
- ROOT_DIR = BACKEND_DIR.parent
14
-
15
- # Load environment variables from .env file in root directory
16
- load_dotenv(ROOT_DIR / ".env")
17
-
18
- # Configure logging
19
- logging.basicConfig(
20
- level=logging.INFO,
21
- format='%(message)s'
22
- )
23
- logger = logging.getLogger(__name__)
24
-
25
- # Initialize Hugging Face API
26
- HF_TOKEN = os.getenv("HF_TOKEN")
27
- if not HF_TOKEN:
28
- raise ValueError("HF_TOKEN not found in environment variables")
29
- api = HfApi(token=HF_TOKEN)
30
-
31
- def analyze_dataset(repo_id: str) -> Dict[str, Any]:
32
- """Analyze a dataset and return statistics"""
33
- try:
34
- # Get dataset info
35
- dataset_info = api.dataset_info(repo_id=repo_id)
36
-
37
- # Get file list
38
- files = api.list_repo_files(repo_id, repo_type="dataset")
39
-
40
- # Get last commit info
41
- commits = api.list_repo_commits(repo_id, repo_type="dataset")
42
- last_commit = next(commits, None)
43
-
44
- # Count lines in jsonl files
45
- total_entries = 0
46
- for file in files:
47
- if file.endswith('.jsonl'):
48
- try:
49
- # Download file content
50
- content = api.hf_hub_download(
51
- repo_id=repo_id,
52
- filename=file,
53
- repo_type="dataset"
54
- )
55
-
56
- # Count lines
57
- with open(content, 'r') as f:
58
- for _ in f:
59
- total_entries += 1
60
-
61
- except Exception as e:
62
- logger.error(f"Error processing file {file}: {str(e)}")
63
- continue
64
-
65
- # Special handling for requests dataset
66
- if repo_id == f"{HF_ORGANIZATION}/requests":
67
- pending_count = 0
68
- completed_count = 0
69
-
70
- try:
71
- content = api.hf_hub_download(
72
- repo_id=repo_id,
73
- filename="eval_requests.jsonl",
74
- repo_type="dataset"
75
- )
76
-
77
- with open(content, 'r') as f:
78
- for line in f:
79
- try:
80
- entry = json.loads(line)
81
- if entry.get("status") == "pending":
82
- pending_count += 1
83
- elif entry.get("status") == "completed":
84
- completed_count += 1
85
- except json.JSONDecodeError:
86
- continue
87
-
88
- except Exception as e:
89
- logger.error(f"Error analyzing requests: {str(e)}")
90
-
91
- # Build response
92
- response = {
93
- "id": repo_id,
94
- "last_modified": last_commit.created_at if last_commit else None,
95
- "total_entries": total_entries,
96
- "file_count": len(files),
97
- "size_bytes": dataset_info.size_in_bytes,
98
- "downloads": dataset_info.downloads,
99
- }
100
-
101
- # Add request-specific info if applicable
102
- if repo_id == f"{HF_ORGANIZATION}/requests":
103
- response.update({
104
- "pending_requests": pending_count,
105
- "completed_requests": completed_count
106
- })
107
-
108
- return response
109
-
110
- except Exception as e:
111
- logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
112
- return {
113
- "id": repo_id,
114
- "error": str(e)
115
- }
116
-
117
- def main():
118
- """Main function to analyze all datasets"""
119
- try:
120
- # List of datasets to analyze
121
- datasets = [
122
- {
123
- "id": f"{HF_ORGANIZATION}/contents",
124
- "description": "Aggregated results"
125
- },
126
- {
127
- "id": f"{HF_ORGANIZATION}/requests",
128
- "description": "Evaluation requests"
129
- },
130
- {
131
- "id": f"{HF_ORGANIZATION}/votes",
132
- "description": "User votes"
133
- },
134
- {
135
- "id": f"{HF_ORGANIZATION}/official-providers",
136
- "description": "Highlighted models"
137
- }
138
- ]
139
-
140
- # Analyze each dataset
141
- results = []
142
- for dataset in datasets:
143
- logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
144
- result = analyze_dataset(dataset['id'])
145
- results.append(result)
146
-
147
- if 'error' in result:
148
- logger.error(f"❌ Error: {result['error']}")
149
- else:
150
- logger.info(f"✓ {result['total_entries']} entries")
151
- logger.info(f"✓ {result['file_count']} files")
152
- logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
153
- logger.info(f"✓ {result['downloads']} downloads")
154
-
155
- if 'pending_requests' in result:
156
- logger.info(f"✓ {result['pending_requests']} pending requests")
157
- logger.info(f"✓ {result['completed_requests']} completed requests")
158
-
159
- if result['last_modified']:
160
- last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
161
- logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
162
-
163
- return results
164
-
165
- except Exception as e:
166
- logger.error(f"Global error: {str(e)}")
167
- return []
168
-
169
- if __name__ == "__main__":
170
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/utils/analyze_prod_models.py DELETED
@@ -1,106 +0,0 @@
1
- import os
2
- import json
3
- import logging
4
- from datetime import datetime
5
- from pathlib import Path
6
- from huggingface_hub import HfApi
7
- from dotenv import load_dotenv
8
- from app.config.hf_config import HF_ORGANIZATION
9
-
10
- # Get the backend directory path
11
- BACKEND_DIR = Path(__file__).parent.parent
12
- ROOT_DIR = BACKEND_DIR.parent
13
-
14
- # Load environment variables from .env file in root directory
15
- load_dotenv(ROOT_DIR / ".env")
16
-
17
- # Configure logging
18
- logging.basicConfig(
19
- level=logging.INFO,
20
- format='%(message)s'
21
- )
22
- logger = logging.getLogger(__name__)
23
-
24
- # Initialize Hugging Face API
25
- HF_TOKEN = os.getenv("HF_TOKEN")
26
- if not HF_TOKEN:
27
- raise ValueError("HF_TOKEN not found in environment variables")
28
- api = HfApi(token=HF_TOKEN)
29
-
30
- def count_evaluated_models():
31
- """Count the number of evaluated models"""
32
- try:
33
- # Get dataset info
34
- dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset")
35
-
36
- # Get file list
37
- files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
38
-
39
- # Get last commit info
40
- commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
41
- last_commit = next(commits, None)
42
-
43
- # Count lines in jsonl files
44
- total_entries = 0
45
- for file in files:
46
- if file.endswith('.jsonl'):
47
- try:
48
- # Download file content
49
- content = api.hf_hub_download(
50
- repo_id=f"{HF_ORGANIZATION}/contents",
51
- filename=file,
52
- repo_type="dataset"
53
- )
54
-
55
- # Count lines
56
- with open(content, 'r') as f:
57
- for _ in f:
58
- total_entries += 1
59
-
60
- except Exception as e:
61
- logger.error(f"Error processing file {file}: {str(e)}")
62
- continue
63
-
64
- # Build response
65
- response = {
66
- "total_models": total_entries,
67
- "last_modified": last_commit.created_at if last_commit else None,
68
- "file_count": len(files),
69
- "size_bytes": dataset_info.size_in_bytes,
70
- "downloads": dataset_info.downloads
71
- }
72
-
73
- return response
74
-
75
- except Exception as e:
76
- logger.error(f"Error counting evaluated models: {str(e)}")
77
- return {
78
- "error": str(e)
79
- }
80
-
81
- def main():
82
- """Main function to count evaluated models"""
83
- try:
84
- logger.info("\nAnalyzing evaluated models...")
85
- result = count_evaluated_models()
86
-
87
- if 'error' in result:
88
- logger.error(f"❌ Error: {result['error']}")
89
- else:
90
- logger.info(f"✓ {result['total_models']} models evaluated")
91
- logger.info(f"✓ {result['file_count']} files")
92
- logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
93
- logger.info(f"✓ {result['downloads']} downloads")
94
-
95
- if result['last_modified']:
96
- last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
97
- logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
98
-
99
- return result
100
-
101
- except Exception as e:
102
- logger.error(f"Global error: {str(e)}")
103
- return {"error": str(e)}
104
-
105
- if __name__ == "__main__":
106
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/utils/fix_wrong_model_size.py DELETED
@@ -1,110 +0,0 @@
1
- import os
2
- import json
3
- import pytz
4
- import logging
5
- import asyncio
6
- from datetime import datetime
7
- from pathlib import Path
8
- import huggingface_hub
9
- from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
10
- from dotenv import load_dotenv
11
- from git import Repo
12
- from datetime import datetime
13
- from tqdm.auto import tqdm
14
- from tqdm.contrib.logging import logging_redirect_tqdm
15
-
16
- from app.config.hf_config import HF_TOKEN, API
17
-
18
- from app.utils.model_validation import ModelValidator
19
-
20
- huggingface_hub.logging.set_verbosity_error()
21
- huggingface_hub.utils.disable_progress_bars()
22
-
23
- logging.basicConfig(
24
- level=logging.ERROR,
25
- format='%(message)s'
26
- )
27
- logger = logging.getLogger(__name__)
28
- load_dotenv()
29
-
30
- validator = ModelValidator()
31
-
32
- def get_changed_files(repo_path, start_date, end_date):
33
- repo = Repo(repo_path)
34
- start = datetime.strptime(start_date, '%Y-%m-%d')
35
- end = datetime.strptime(end_date, '%Y-%m-%d')
36
-
37
- changed_files = set()
38
- pbar = tqdm(repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}")
39
- for commit in pbar:
40
- commit_date = datetime.fromtimestamp(commit.committed_date)
41
- pbar.set_postfix_str(f"Commit date: {commit_date}")
42
- if start <= commit_date <= end:
43
- changed_files.update(item.a_path for item in commit.diff(commit.parents[0]))
44
-
45
- if commit_date < start:
46
- break
47
-
48
- return changed_files
49
-
50
-
51
- def read_json(repo_path, file):
52
- with open(f"{repo_path}/{file}") as file:
53
- return json.load(file)
54
-
55
-
56
- def write_json(repo_path, file, content):
57
- with open(f"{repo_path}/{file}", "w") as file:
58
- json.dump(content, file, indent=2)
59
-
60
-
61
- def main():
62
- requests_path = "/requests"
63
- start_date = "2024-12-09"
64
- end_date = "2025-01-07"
65
-
66
- changed_files = get_changed_files(requests_path, start_date, end_date)
67
-
68
- for file in tqdm(changed_files):
69
- try:
70
- request_data = read_json(requests_path, file)
71
- except FileNotFoundError as e:
72
- tqdm.write(f"File {file} not found")
73
- continue
74
-
75
- try:
76
- model_info = API.model_info(
77
- repo_id=request_data["model"],
78
- revision=request_data["revision"],
79
- token=HF_TOKEN
80
- )
81
- except (RepositoryNotFoundError, RevisionNotFoundError) as e:
82
- tqdm.write(f"Model info for {request_data["model"]} not found")
83
- continue
84
-
85
- with logging_redirect_tqdm():
86
- new_model_size, error = asyncio.run(validator.get_model_size(
87
- model_info=model_info,
88
- precision=request_data["precision"],
89
- base_model=request_data["base_model"],
90
- revision=request_data["revision"]
91
- ))
92
-
93
- if error:
94
- tqdm.write(f"Error getting model size info for {request_data["model"]}, {error}")
95
- continue
96
-
97
- old_model_size = request_data["params"]
98
- if old_model_size != new_model_size:
99
- if new_model_size > 100:
100
- tqdm.write(f"Model: {request_data["model"]}, size is more 100B: {new_model_size}")
101
-
102
- tqdm.write(f"Model: {request_data["model"]}, old size: {request_data["params"]} new size: {new_model_size}")
103
- tqdm.write(f"Updating request file {file}")
104
-
105
- request_data["params"] = new_model_size
106
- write_json(requests_path, file, content=request_data)
107
-
108
-
109
- if __name__ == "__main__":
110
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/utils/last_activity.py DELETED
@@ -1,164 +0,0 @@
1
- import os
2
- import json
3
- import logging
4
- from datetime import datetime
5
- from pathlib import Path
6
- from typing import Dict, Any, List, Tuple
7
- from huggingface_hub import HfApi
8
- from dotenv import load_dotenv
9
-
10
- # Get the backend directory path
11
- BACKEND_DIR = Path(__file__).parent.parent
12
- ROOT_DIR = BACKEND_DIR.parent
13
-
14
- # Load environment variables from .env file in root directory
15
- load_dotenv(ROOT_DIR / ".env")
16
-
17
- # Configure logging
18
- logging.basicConfig(
19
- level=logging.INFO,
20
- format='%(message)s'
21
- )
22
- logger = logging.getLogger(__name__)
23
-
24
- # Initialize Hugging Face API
25
- HF_TOKEN = os.getenv("HF_TOKEN")
26
- if not HF_TOKEN:
27
- raise ValueError("HF_TOKEN not found in environment variables")
28
- api = HfApi(token=HF_TOKEN)
29
-
30
- # Default organization
31
- HF_ORGANIZATION = os.getenv('HF_ORGANIZATION', 'open-llm-leaderboard')
32
-
33
- def get_last_votes(limit: int = 5) -> List[Dict]:
34
- """Get the last votes from the votes dataset"""
35
- try:
36
- logger.info("\nFetching last votes...")
37
-
38
- # Download and read votes file
39
- logger.info("Downloading votes file...")
40
- votes_file = api.hf_hub_download(
41
- repo_id=f"{HF_ORGANIZATION}/votes",
42
- filename="votes_data.jsonl",
43
- repo_type="dataset"
44
- )
45
-
46
- logger.info("Reading votes file...")
47
- votes = []
48
- with open(votes_file, 'r') as f:
49
- for line in f:
50
- try:
51
- vote = json.loads(line)
52
- votes.append(vote)
53
- except json.JSONDecodeError:
54
- continue
55
-
56
- # Sort by timestamp and get last n votes
57
- logger.info("Sorting votes...")
58
- votes.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
59
- last_votes = votes[:limit]
60
-
61
- logger.info(f"✓ Found {len(last_votes)} recent votes")
62
- return last_votes
63
-
64
- except Exception as e:
65
- logger.error(f"Error reading votes: {str(e)}")
66
- return []
67
-
68
- def get_last_models(limit: int = 5) -> List[Dict]:
69
- """Get the last models from the requests dataset using commit history"""
70
- try:
71
- logger.info("\nFetching last model submissions...")
72
-
73
- # Get commit history
74
- logger.info("Getting commit history...")
75
- commits = list(api.list_repo_commits(
76
- repo_id=f"{HF_ORGANIZATION}/requests",
77
- repo_type="dataset"
78
- ))
79
- logger.info(f"Found {len(commits)} commits")
80
-
81
- # Track processed files to avoid duplicates
82
- processed_files = set()
83
- models = []
84
-
85
- # Process commits until we have enough models
86
- for i, commit in enumerate(commits):
87
- logger.info(f"Processing commit {i+1}/{len(commits)} ({commit.created_at})")
88
-
89
- # Look at added/modified files in this commit
90
- files_to_process = [f for f in (commit.added + commit.modified) if f.endswith('.json')]
91
- if files_to_process:
92
- logger.info(f"Found {len(files_to_process)} JSON files in commit")
93
-
94
- for file in files_to_process:
95
- if file in processed_files:
96
- continue
97
-
98
- processed_files.add(file)
99
- logger.info(f"Downloading {file}...")
100
-
101
- try:
102
- # Download and read the file
103
- content = api.hf_hub_download(
104
- repo_id=f"{HF_ORGANIZATION}/requests",
105
- filename=file,
106
- repo_type="dataset"
107
- )
108
-
109
- with open(content, 'r') as f:
110
- model_data = json.load(f)
111
- models.append(model_data)
112
- logger.info(f"✓ Added model {model_data.get('model', 'Unknown')}")
113
-
114
- if len(models) >= limit:
115
- logger.info("Reached desired number of models")
116
- break
117
-
118
- except Exception as e:
119
- logger.error(f"Error reading file {file}: {str(e)}")
120
- continue
121
-
122
- if len(models) >= limit:
123
- break
124
-
125
- logger.info(f"✓ Found {len(models)} recent model submissions")
126
- return models
127
-
128
- except Exception as e:
129
- logger.error(f"Error reading models: {str(e)}")
130
- return []
131
-
132
- def main():
133
- """Display last activities from the leaderboard"""
134
- try:
135
- # Get last votes
136
- logger.info("\n=== Last Votes ===")
137
- last_votes = get_last_votes()
138
- if last_votes:
139
- for vote in last_votes:
140
- logger.info(f"\nModel: {vote.get('model')}")
141
- logger.info(f"User: {vote.get('username')}")
142
- logger.info(f"Timestamp: {vote.get('timestamp')}")
143
- else:
144
- logger.info("No votes found")
145
-
146
- # Get last model submissions
147
- logger.info("\n=== Last Model Submissions ===")
148
- last_models = get_last_models()
149
- if last_models:
150
- for model in last_models:
151
- logger.info(f"\nModel: {model.get('model')}")
152
- logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
153
- logger.info(f"Status: {model.get('status', 'Unknown')}")
154
- logger.info(f"Submission Time: {model.get('submitted_time', 'Unknown')}")
155
- logger.info(f"Precision: {model.get('precision', 'Unknown')}")
156
- logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
157
- else:
158
- logger.info("No models found")
159
-
160
- except Exception as e:
161
- logger.error(f"Global error: {str(e)}")
162
-
163
- if __name__ == "__main__":
164
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/utils/sync_datasets_locally.py DELETED
@@ -1,130 +0,0 @@
1
- import os
2
- import shutil
3
- import tempfile
4
- import logging
5
- from pathlib import Path
6
- from huggingface_hub import HfApi, snapshot_download, upload_folder, create_repo
7
- from dotenv import load_dotenv
8
-
9
- # Configure source and destination usernames
10
- SOURCE_USERNAME = "open-llm-leaderboard"
11
- DESTINATION_USERNAME = "tfrere"
12
-
13
- # Get the backend directory path
14
- BACKEND_DIR = Path(__file__).parent.parent
15
- ROOT_DIR = BACKEND_DIR.parent
16
-
17
- # Load environment variables from .env file in root directory
18
- load_dotenv(ROOT_DIR / ".env")
19
-
20
- # Configure logging
21
- logging.basicConfig(
22
- level=logging.INFO,
23
- format='%(message)s'
24
- )
25
- logger = logging.getLogger(__name__)
26
-
27
- # List of dataset names to sync
28
- DATASET_NAMES = [
29
- "votes",
30
- "results",
31
- "requests",
32
- "contents",
33
- "official-providers",
34
- ]
35
-
36
- # Build list of datasets with their source and destination paths
37
- DATASETS = [
38
- (name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
39
- for name in DATASET_NAMES
40
- ]
41
-
42
- # Initialize Hugging Face API
43
- api = HfApi()
44
-
45
- def ensure_repo_exists(repo_id, token):
46
- """Ensure the repository exists, create it if it doesn't"""
47
- try:
48
- api.repo_info(repo_id=repo_id, repo_type="dataset")
49
- logger.info(f"✓ Repository {repo_id} already exists")
50
- except Exception:
51
- logger.info(f"Creating repository {repo_id}...")
52
- create_repo(
53
- repo_id=repo_id,
54
- repo_type="dataset",
55
- token=token,
56
- private=True
57
- )
58
- logger.info(f"✓ Repository {repo_id} created")
59
-
60
- def process_dataset(dataset_info, token):
61
- """Process a single dataset"""
62
- name, source_dataset, destination_dataset = dataset_info
63
- try:
64
- logger.info(f"\n📥 Processing dataset: {name}")
65
-
66
- # Ensure destination repository exists
67
- ensure_repo_exists(destination_dataset, token)
68
-
69
- # Create a temporary directory for this dataset
70
- with tempfile.TemporaryDirectory() as temp_dir:
71
- try:
72
- # List files in source dataset
73
- logger.info(f"Listing files in {source_dataset}...")
74
- files = api.list_repo_files(source_dataset, repo_type="dataset")
75
- logger.info(f"Detected structure: {len(files)} files")
76
-
77
- # Download dataset
78
- logger.info(f"Downloading from {source_dataset}...")
79
- local_dir = snapshot_download(
80
- repo_id=source_dataset,
81
- repo_type="dataset",
82
- local_dir=temp_dir,
83
- token=token
84
- )
85
- logger.info(f"✓ Download complete")
86
-
87
- # Upload to destination while preserving structure
88
- logger.info(f"📤 Uploading to {destination_dataset}...")
89
- api.upload_folder(
90
- folder_path=local_dir,
91
- repo_id=destination_dataset,
92
- repo_type="dataset",
93
- token=token
94
- )
95
- logger.info(f"✅ {name} copied successfully!")
96
- return True
97
-
98
- except Exception as e:
99
- logger.error(f"❌ Error processing {name}: {str(e)}")
100
- return False
101
-
102
- except Exception as e:
103
- logger.error(f"❌ Error for {name}: {str(e)}")
104
- return False
105
-
106
- def copy_datasets():
107
- try:
108
- logger.info("🔑 Checking authentication...")
109
- # Get token from .env file
110
- token = os.getenv("HF_TOKEN")
111
- if not token:
112
- raise ValueError("HF_TOKEN not found in .env file")
113
-
114
- # Process datasets sequentially
115
- results = []
116
- for dataset_info in DATASETS:
117
- success = process_dataset(dataset_info, token)
118
- results.append((dataset_info[0], success))
119
-
120
- # Print final summary
121
- logger.info("\n📊 Final summary:")
122
- for dataset, success in results:
123
- status = "✅ Success" if success else "❌ Failure"
124
- logger.info(f"{dataset}: {status}")
125
-
126
- except Exception as e:
127
- logger.error(f"❌ Global error: {str(e)}")
128
-
129
- if __name__ == "__main__":
130
- copy_datasets()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/uv.lock DELETED
The diff for this file is too large to render. See raw diff
 
docker-compose.yml DELETED
@@ -1,33 +0,0 @@
1
- services:
2
- backend:
3
- build:
4
- context: ./backend
5
- dockerfile: Dockerfile.dev
6
- args:
7
- - HF_TOKEN=${HF_TOKEN}
8
- ports:
9
- - "${BACKEND_PORT:-8000}:8000"
10
- volumes:
11
- - ./backend:/app
12
- environment:
13
- - ENVIRONMENT=${ENVIRONMENT:-development}
14
- - HF_TOKEN=${HF_TOKEN}
15
- - HF_HOME=${HF_HOME:-/.cache}
16
- command: uvicorn app.asgi:app --host 0.0.0.0 --port 8000 --reload
17
-
18
- frontend:
19
- build:
20
- context: ./frontend
21
- dockerfile: Dockerfile.dev
22
- ports:
23
- - "${FRONTEND_PORT:-7860}:7860"
24
- volumes:
25
- - ./frontend:/app
26
- - /app/node_modules
27
- environment:
28
- - NODE_ENV=${ENVIRONMENT:-development}
29
- - CHOKIDAR_USEPOLLING=true
30
- - PORT=${FRONTEND_PORT:-7860}
31
- command: npm start
32
- stdin_open: true
33
- tty: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/Dockerfile.dev DELETED
@@ -1,15 +0,0 @@
1
- FROM node:18
2
-
3
- WORKDIR /app
4
-
5
- # Install required global dependencies
6
- RUN npm install -g react-scripts
7
-
8
- # Copy package.json and package-lock.json
9
- COPY package*.json ./
10
-
11
- # Install project dependencies
12
- RUN npm install
13
-
14
- # Volume will be mounted here, no need for COPY
15
- CMD ["npm", "start"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/README.md DELETED
@@ -1,80 +0,0 @@
1
- # Frontend - Open LLM Leaderboard 🏆
2
-
3
- React interface for exploring and comparing open-source language models.
4
-
5
- ## 🏗 Architecture
6
-
7
- ```mermaid
8
- flowchart TD
9
- Client(["User Browser"]) --> Components["React Components"]
10
-
11
- subgraph Frontend
12
- Components --> Context["Context Layer<br>• LeaderboardContext<br>• Global State"]
13
-
14
- API["API Layer<br>• /api/leaderboard/formatted<br>• TanStack Query"] --> |Data Feed| Context
15
-
16
- Context --> Hooks["Hooks Layer<br>• Data Processing<br>• Filtering<br>• Caching"]
17
-
18
- Hooks --> Features["Features<br>• Table Management<br>• Search & Filters<br>• Display Options"]
19
- Features --> Cache["Cache Layer<br>• LocalStorage<br>• URL State"]
20
- end
21
-
22
- API --> Backend["Backend Server"]
23
-
24
- style Backend fill:#f96,stroke:#333,stroke-width:2px
25
- ```
26
-
27
- ## ✨ Core Features
28
-
29
- - 🔍 **Search & Filters**: Real-time filtering, regex search, advanced filters
30
- - 📊 **Data Visualization**: Interactive table, customizable columns, sorting
31
- - 🔄 **State Management**: URL sync, client-side caching (5min TTL)
32
- - 📱 **Responsive Design**: Mobile-friendly, dark/light themes
33
-
34
- ## 🛠 Tech Stack
35
-
36
- - React 18 + Material-UI
37
- - TanStack Query & Table
38
- - React Router v6
39
-
40
- ## 📁 Project Structure
41
-
42
- ```
43
- src/
44
- ├── pages/
45
- │ └── LeaderboardPage/
46
- │ ├── components/ # UI Components
47
- │ ├── context/ # Global State
48
- │ └── hooks/ # Data Processing
49
- ├── components/ # Shared Components
50
- └── utils/ # Helper Functions
51
- ```
52
-
53
- ## 🚀 Development
54
-
55
- ```bash
56
- # Install dependencies
57
- npm install
58
-
59
- # Start development server
60
- npm start
61
-
62
- # Production build
63
- npm run build
64
- ```
65
-
66
- ## 🔧 Environment Variables
67
-
68
- ```env
69
- # API Configuration
70
- REACT_APP_API_URL=http://localhost:8000
71
- REACT_APP_CACHE_DURATION=300000 # 5 minutes
72
- ```
73
-
74
- ## 🔄 Data Flow
75
-
76
- 1. API fetches leaderboard data from backend
77
- 2. Context stores and manages global state
78
- 3. Hooks handle data processing and filtering
79
- 4. Components render based on processed data
80
- 5. Cache maintains user preferences and URL state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/package.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "name": "open-llm-leaderboard",
3
- "version": "0.1.0",
4
- "private": true,
5
- "dependencies": {
6
- "@emotion/react": "^11.13.3",
7
- "@emotion/styled": "^11.13.0",
8
- "@huggingface/hub": "^0.14.0",
9
- "@mui/icons-material": "^6.1.7",
10
- "@mui/lab": "^6.0.0-beta.16",
11
- "@mui/material": "^6.1.6",
12
- "@mui/x-data-grid": "^7.22.2",
13
- "@tanstack/react-query": "^5.62.2",
14
- "@tanstack/react-table": "^8.20.5",
15
- "@tanstack/react-virtual": "^3.10.9",
16
- "@testing-library/jest-dom": "^5.17.0",
17
- "@testing-library/react": "^13.4.0",
18
- "@testing-library/user-event": "^13.5.0",
19
- "compression": "^1.7.4",
20
- "cors": "^2.8.5",
21
- "express": "^4.18.2",
22
- "react": "^18.3.1",
23
- "react-dom": "^18.3.1",
24
- "react-router-dom": "^6.28.0",
25
- "react-scripts": "5.0.1",
26
- "serve-static": "^1.15.0",
27
- "web-vitals": "^2.1.4"
28
- },
29
- "scripts": {
30
- "start": "react-scripts start",
31
- "build": "react-scripts build",
32
- "test": "react-scripts test",
33
- "eject": "react-scripts eject",
34
- "serve": "node server.js"
35
- },
36
- "eslintConfig": {
37
- "extends": [
38
- "react-app",
39
- "react-app/jest"
40
- ]
41
- },
42
- "browserslist": {
43
- "production": [
44
- ">0.2%",
45
- "not dead",
46
- "not op_mini all"
47
- ],
48
- "development": [
49
- "last 1 chrome version",
50
- "last 1 firefox version",
51
- "last 1 safari version"
52
- ]
53
- },
54
- "proxy": "http://backend:8000"
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/public/index.html DELETED
@@ -1,96 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="utf-8" />
5
- <link rel="icon" href="%PUBLIC_URL%/logo32.png" />
6
- <meta
7
- name="viewport"
8
- content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no, viewport-fit=cover"
9
- />
10
- <meta
11
- name="description"
12
- content="Interactive leaderboard tracking and comparing open-source Large Language Models across multiple benchmarks: IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
13
- />
14
-
15
- <!-- Open Graph / Facebook -->
16
- <meta property="og:type" content="website" />
17
- <meta
18
- property="og:url"
19
- content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
20
- />
21
- <meta
22
- property="og:title"
23
- content="Open LLM Leaderboard - Compare Open Source Large Language Models"
24
- />
25
- <meta
26
- property="og:description"
27
- content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
28
- />
29
- <meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
30
-
31
- <!-- Twitter -->
32
- <meta property="twitter:card" content="summary_large_image" />
33
- <meta
34
- property="twitter:url"
35
- content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
36
- />
37
- <meta
38
- property="twitter:title"
39
- content="Open LLM Leaderboard - Compare Open Source Large Language Models"
40
- />
41
- <meta
42
- property="twitter:description"
43
- content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
44
- />
45
- <meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
46
- <!--
47
- Notice the use of %PUBLIC_URL% in the tags above.
48
- It will be replaced with the URL of the `public` folder during the build.
49
- Only files inside the `public` folder can be referenced from the HTML.
50
-
51
- Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
52
- work correctly both with client-side routing and a non-root public URL.
53
- Learn how to configure a non-root public URL by running `npm run build`.
54
- -->
55
- <title>
56
- Open LLM Leaderboard - Compare Open Source Large Language Models
57
- </title>
58
- <link
59
- href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"
60
- rel="stylesheet"
61
- />
62
- <style>
63
- html,
64
- body {
65
- position: fixed;
66
- width: 100%;
67
- height: 100%;
68
- overflow: hidden;
69
- -webkit-overflow-scrolling: touch;
70
- }
71
- #root {
72
- position: absolute;
73
- top: 0;
74
- left: 0;
75
- right: 0;
76
- bottom: 0;
77
- overflow-y: auto;
78
- -webkit-overflow-scrolling: touch;
79
- }
80
- </style>
81
- </head>
82
- <body>
83
- <noscript>You need to enable JavaScript to run this app.</noscript>
84
- <div id="root"></div>
85
- <!--
86
- This HTML file is a template.
87
- If you open it directly in the browser, you will see an empty page.
88
-
89
- You can add webfonts, meta tags, or analytics to this file.
90
- The build step will place the bundled scripts into the <body> tag.
91
-
92
- To begin the development, run `npm start` or `yarn start`.
93
- To create a production bundle, use `npm run build` or `yarn build`.
94
- -->
95
- </body>
96
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/public/logo256.png DELETED
Binary file (24.6 kB)
 
frontend/public/logo32.png DELETED
Binary file (1.96 kB)
 
frontend/public/og-image.jpg DELETED
Binary file (13.8 kB)